summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2015-11-03 15:45:04 -0800
committerJason Ekstrand <[email protected]>2015-11-03 15:45:04 -0800
commitb00e3f221b3f6dd0e87697c53331fd033b6e8676 (patch)
treea59dfeca8fd404c65da59a663e0abda301e893a2 /src/gallium/drivers
parenta1e7b8701a4687f29b013364a852aa773c80f960 (diff)
parent5d4b019d2a6d4deb4db11780618515cf1fa8a4fc (diff)
Merge remote-tracking branch 'mesa-public/master' into vulkan
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_draw.c10
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.c2
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_draw.c12
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_emit.c2
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.c1
-rw-r--r--src/gallium/drivers/i915/i915_screen.c1
-rw-r--r--src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h33
-rw-r--r--src/gallium/drivers/ilo/core/ilo_builder_3d_top.h99
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_compute.c95
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_compute.h12
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_shader.c82
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_shader.h43
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_shader_ps.c52
-rw-r--r--src/gallium/drivers/ilo/ilo_blit.h6
-rw-r--r--src/gallium/drivers/ilo/ilo_draw.c8
-rw-r--r--src/gallium/drivers/ilo/ilo_render.c39
-rw-r--r--src/gallium/drivers/ilo/ilo_render.h6
-rw-r--r--src/gallium/drivers/ilo/ilo_render_gen.h5
-rw-r--r--src/gallium/drivers/ilo/ilo_render_gen6.c27
-rw-r--r--src/gallium/drivers/ilo/ilo_render_gen7.c35
-rw-r--r--src/gallium/drivers/ilo/ilo_render_gen8.c2
-rw-r--r--src/gallium/drivers/ilo/ilo_screen.c1
-rw-r--r--src/gallium/drivers/ilo/ilo_shader.c48
-rw-r--r--src/gallium/drivers/ilo/ilo_shader.h6
-rw-r--r--src/gallium/drivers/ilo/shader/ilo_shader_internal.h1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.c1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.c8
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_sampler.c8
-rw-r--r--src/gallium/drivers/llvmpipe/lp_texture.c21
-rw-r--r--src/gallium/drivers/nouveau/Makefile.sources3
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir.cpp1
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h4
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h12
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp31
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp26
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp28
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp30
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp28
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp39
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_target.h21
-rw-r--r--src/gallium/drivers/nouveau/nouveau_heap.c130
-rw-r--r--src/gallium/drivers/nouveau/nouveau_heap.h12
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.c304
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.h82
-rw-r--r--src/gallium/drivers/nouveau/nouveau_statebuf.h6
-rw-r--r--src/gallium/drivers/nouveau/nouveau_video.c2
-rw-r--r--src/gallium/drivers/nouveau/nouveau_video.h2
-rw-r--r--src/gallium/drivers/nouveau/nouveau_vp3_video.h32
-rw-r--r--src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c142
-rw-r--r--src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c262
-rw-r--r--src/gallium/drivers/nouveau/nouveau_winsys.h24
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_screen.c1
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_context.h12
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_program.c10
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_program.h3
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query.c397
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query.h33
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query_hw.c406
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query_hw.h40
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_resource.c8
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_screen.c5
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_shader_state.c23
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_state.c24
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_state_validate.c2
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_surface.c18
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_vbo.c4
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv98_video_vp.c48
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_program.c35
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_program.h7
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query.c14
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query.h3
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_resource.c3
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.c63
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.h1
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c48
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_state.c6
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c5
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_surface.c118
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c40
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c48
-rw-r--r--src/gallium/drivers/r300/r300_screen.c1
-rw-r--r--src/gallium/drivers/r600/r600_asm.c8
-rw-r--r--src/gallium/drivers/r600/r600_asm.h2
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c1
-rw-r--r--src/gallium/drivers/r600/r600_shader.c6
-rw-r--r--src/gallium/drivers/radeon/r600_buffer_common.c21
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.c8
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.h9
-rw-r--r--src/gallium/drivers/radeon/r600_texture.c131
-rw-r--r--src/gallium/drivers/radeon/r600d_common.h1
-rw-r--r--src/gallium/drivers/radeon/radeon_uvd.c7
-rw-r--r--src/gallium/drivers/radeon/radeon_uvd.h9
-rw-r--r--src/gallium/drivers/radeon/radeon_video.c50
-rw-r--r--src/gallium/drivers/radeon/radeon_winsys.h5
-rw-r--r--src/gallium/drivers/radeonsi/cik_sdma.c3
-rw-r--r--src/gallium/drivers/radeonsi/si_blit.c11
-rw-r--r--src/gallium/drivers/radeonsi/si_descriptors.c8
-rw-r--r--src/gallium/drivers/radeonsi/si_dma.c3
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c33
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h6
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c47
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h1
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.c67
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c95
-rw-r--r--src/gallium/drivers/softpipe/sp_screen.c1
-rw-r--r--src/gallium/drivers/softpipe/sp_state_sampler.c8
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_sample.c32
-rw-r--r--src/gallium/drivers/softpipe/sp_texture.c18
-rw-r--r--src/gallium/drivers/svga/svga_cmd.c2
-rw-r--r--src/gallium/drivers/svga/svga_cmd_vgpu10.c6
-rw-r--r--src/gallium/drivers/svga/svga_draw_arrays.c29
-rw-r--r--src/gallium/drivers/svga/svga_screen.c1
-rw-r--r--src/gallium/drivers/svga/svga_shader.h3
-rw-r--r--src/gallium/drivers/svga/svga_tgsi.c7
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_emit.h3
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_insn.c27
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_vgpu10.c43
-rw-r--r--src/gallium/drivers/svga/svga_winsys.h7
-rw-r--r--src/gallium/drivers/vc4/vc4_cl_dump.c49
-rw-r--r--src/gallium/drivers/vc4/vc4_context.h11
-rw-r--r--src/gallium/drivers/vc4/vc4_draw.c53
-rw-r--r--src/gallium/drivers/vc4/vc4_job.c2
-rw-r--r--src/gallium/drivers/vc4/vc4_nir_lower_blend.c286
-rw-r--r--src/gallium/drivers/vc4/vc4_nir_lower_io.c1
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_algebraic.c1
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_copy_propagation.c85
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_cse.c1
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_small_immediates.c4
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_vpm_writes.c2
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c114
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c86
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h74
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu.h3
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_defines.h4
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_disasm.c16
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c213
-rw-r--r--src/gallium/drivers/vc4/vc4_register_allocate.c30
-rw-r--r--src/gallium/drivers/vc4/vc4_resource.c18
-rw-r--r--src/gallium/drivers/vc4/vc4_screen.c3
-rw-r--r--src/gallium/drivers/vc4/vc4_simulator.c12
-rw-r--r--src/gallium/drivers/vc4/vc4_state.c6
-rw-r--r--src/gallium/drivers/vc4/vc4_uniforms.c30
-rw-r--r--src/gallium/drivers/virgl/Automake.inc11
-rw-r--r--src/gallium/drivers/virgl/Makefile.am32
-rw-r--r--src/gallium/drivers/virgl/Makefile.sources18
-rw-r--r--src/gallium/drivers/virgl/virgl_buffer.c172
-rw-r--r--src/gallium/drivers/virgl/virgl_context.c963
-rw-r--r--src/gallium/drivers/virgl/virgl_context.h115
-rw-r--r--src/gallium/drivers/virgl/virgl_encode.c867
-rw-r--r--src/gallium/drivers/virgl/virgl_encode.h247
-rw-r--r--src/gallium/drivers/virgl/virgl_hw.h286
-rw-r--r--src/gallium/drivers/virgl/virgl_protocol.h468
-rw-r--r--src/gallium/drivers/virgl/virgl_public.h31
-rw-r--r--src/gallium/drivers/virgl/virgl_query.c175
-rw-r--r--src/gallium/drivers/virgl/virgl_resource.c90
-rw-r--r--src/gallium/drivers/virgl/virgl_resource.h146
-rw-r--r--src/gallium/drivers/virgl/virgl_screen.c553
-rw-r--r--src/gallium/drivers/virgl/virgl_screen.h47
-rw-r--r--src/gallium/drivers/virgl/virgl_streamout.c88
-rw-r--r--src/gallium/drivers/virgl/virgl_texture.c351
-rw-r--r--src/gallium/drivers/virgl/virgl_tgsi.c66
-rw-r--r--src/gallium/drivers/virgl/virgl_winsys.h113
164 files changed, 8171 insertions, 1808 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
index a9498835011..3906c9b996e 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
@@ -81,7 +81,7 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
info->restart_index : 0xffffffff);
- if (ctx->rasterizer && ctx->rasterizer->point_size_per_vertex &&
+ if (ctx->rasterizer->point_size_per_vertex &&
(info->mode == PIPE_PRIM_POINTS))
primtype = DI_PT_POINTLIST_PSIZE;
@@ -137,7 +137,7 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
.key = {
/* do binning pass first: */
.binning_pass = true,
- .color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false,
+ .color_two_side = ctx->rasterizer->light_twoside,
// TODO set .half_precision based on render target format,
// ie. float16 and smaller use half, float32 use full..
.half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
@@ -149,9 +149,9 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
.fsaturate_t = fd3_ctx->fsaturate_t,
.fsaturate_r = fd3_ctx->fsaturate_r,
},
- .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
- .sprite_coord_enable = ctx->rasterizer ? ctx->rasterizer->sprite_coord_enable : 0,
- .sprite_coord_mode = ctx->rasterizer ? ctx->rasterizer->sprite_coord_mode : false,
+ .rasterflat = ctx->rasterizer->flatshade,
+ .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
+ .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
};
unsigned dirty;
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 411f5b76329..8f9c8b0623c 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -627,7 +627,7 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
ctx->prog.dirty = 0;
}
- if ((dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) && ctx->blend) {
+ if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) {
struct fd3_blend_stateobj *blend = fd3_blend_stateobj(ctx->blend);
uint32_t i;
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
index 025753c037e..7bd5163529a 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
@@ -118,12 +118,12 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
.key = {
/* do binning pass first: */
.binning_pass = true,
- .color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false,
- .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
+ .color_two_side = ctx->rasterizer->light_twoside,
+ .rasterflat = ctx->rasterizer->flatshade,
// TODO set .half_precision based on render target format,
// ie. float16 and smaller use half, float32 use full..
.half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
- .ucp_enables = ctx->rasterizer ? ctx->rasterizer->clip_plane_enable : 0,
+ .ucp_enables = ctx->rasterizer->clip_plane_enable,
.has_per_samp = (fd4_ctx->fsaturate || fd4_ctx->vsaturate),
.vsaturate_s = fd4_ctx->vsaturate_s,
.vsaturate_t = fd4_ctx->vsaturate_t,
@@ -132,9 +132,9 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
.fsaturate_t = fd4_ctx->fsaturate_t,
.fsaturate_r = fd4_ctx->fsaturate_r,
},
- .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
- .sprite_coord_enable = ctx->rasterizer ? ctx->rasterizer->sprite_coord_enable : false,
- .sprite_coord_mode = ctx->rasterizer ? ctx->rasterizer->sprite_coord_mode : false,
+ .rasterflat = ctx->rasterizer->flatshade,
+ .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
+ .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
};
unsigned dirty;
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index c7ed1d2e379..cf5dd7b0f17 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -594,7 +594,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
ctx->prog.dirty = 0;
}
- if ((dirty & FD_DIRTY_BLEND) && ctx->blend) {
+ if ((dirty & FD_DIRTY_BLEND)) {
struct fd4_blend_stateobj *blend = fd4_blend_stateobj(ctx->blend);
uint32_t i;
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 50d140fe903..9f8c33263fb 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -238,6 +238,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 0;
case PIPE_CAP_MAX_VIEWPORTS:
diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
index 5812af626cb..2d2fd375656 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -252,6 +252,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 0;
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h
index 5efe9da2d22..2e9470e66e9 100644
--- a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h
+++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h
@@ -202,14 +202,16 @@ static inline void
gen6_3DSTATE_WM(struct ilo_builder *builder,
const struct ilo_state_raster *rs,
const struct ilo_state_ps *ps,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 9;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 6, 6);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
dw[1] = kernel_offset;
@@ -221,6 +223,11 @@ gen6_3DSTATE_WM(struct ilo_builder *builder,
dw[6] = rs->wm[2] | ps->ps[4];
dw[7] = 0; /* kernel 1 */
dw[8] = 0; /* kernel 2 */
+
+ if (ilo_state_ps_get_scratch_size(ps)) {
+ ilo_builder_batch_reloc(builder, pos + 2, scratch_bo,
+ ps->ps[0], 0);
+ }
}
static inline void
@@ -329,14 +336,16 @@ gen8_3DSTATE_WM_CHROMAKEY(struct ilo_builder *builder)
static inline void
gen7_3DSTATE_PS(struct ilo_builder *builder,
const struct ilo_state_ps *ps,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 8;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
dw[1] = kernel_offset;
@@ -347,19 +356,26 @@ gen7_3DSTATE_PS(struct ilo_builder *builder,
dw[5] = ps->ps[5];
dw[6] = 0; /* kernel 1 */
dw[7] = 0; /* kernel 2 */
+
+ if (ilo_state_ps_get_scratch_size(ps)) {
+ ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
+ ps->ps[3], 0);
+ }
}
static inline void
gen8_3DSTATE_PS(struct ilo_builder *builder,
const struct ilo_state_ps *ps,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 12;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 8, 8);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
dw[1] = kernel_offset;
@@ -374,6 +390,11 @@ gen8_3DSTATE_PS(struct ilo_builder *builder,
dw[9] = 0;
dw[10] = 0; /* kernel 2 */
dw[11] = 0;
+
+ if (ilo_state_ps_get_scratch_size(ps)) {
+ ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo,
+ ps->ps[1], 0);
+ }
}
static inline void
diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h
index 6e94fb25f1f..3a448719c15 100644
--- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h
+++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h
@@ -477,14 +477,16 @@ gen8_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder,
static inline void
gen6_3DSTATE_VS(struct ilo_builder *builder,
const struct ilo_state_vs *vs,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 6;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 6, 7.5);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
dw[1] = kernel_offset;
@@ -493,19 +495,26 @@ gen6_3DSTATE_VS(struct ilo_builder *builder,
dw[3] = vs->vs[1];
dw[4] = vs->vs[2];
dw[5] = vs->vs[3];
+
+ if (ilo_state_vs_get_scratch_size(vs)) {
+ ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
+ vs->vs[1], 0);
+ }
}
static inline void
gen8_3DSTATE_VS(struct ilo_builder *builder,
const struct ilo_state_vs *vs,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 9;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 8, 8);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
dw[1] = kernel_offset;
@@ -517,19 +526,26 @@ gen8_3DSTATE_VS(struct ilo_builder *builder,
dw[6] = vs->vs[2];
dw[7] = vs->vs[3];
dw[8] = vs->vs[4];
+
+ if (ilo_state_vs_get_scratch_size(vs)) {
+ ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo,
+ vs->vs[1], 0);
+ }
}
static inline void
gen7_3DSTATE_HS(struct ilo_builder *builder,
const struct ilo_state_hs *hs,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 7;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2);
/* see hs_set_gen7_3DSTATE_HS() */
@@ -539,19 +555,26 @@ gen7_3DSTATE_HS(struct ilo_builder *builder,
dw[4] = hs->hs[2];
dw[5] = hs->hs[3];
dw[6] = 0;
+
+ if (ilo_state_hs_get_scratch_size(hs)) {
+ ilo_builder_batch_reloc(builder, pos + 4, scratch_bo,
+ hs->hs[2], 0);
+ }
}
static inline void
gen8_3DSTATE_HS(struct ilo_builder *builder,
const struct ilo_state_hs *hs,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 9;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 8, 8);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2);
/* see hs_set_gen7_3DSTATE_HS() */
@@ -563,6 +586,11 @@ gen8_3DSTATE_HS(struct ilo_builder *builder,
dw[6] = 0;
dw[7] = hs->hs[3];
dw[8] = 0;
+
+ if (ilo_state_hs_get_scratch_size(hs)) {
+ ilo_builder_batch_reloc64(builder, pos + 5, scratch_bo,
+ hs->hs[2], 0);
+ }
}
static inline void
@@ -586,14 +614,16 @@ gen7_3DSTATE_TE(struct ilo_builder *builder,
static inline void
gen7_3DSTATE_DS(struct ilo_builder *builder,
const struct ilo_state_ds *ds,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 6;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2);
/* see ds_set_gen7_3DSTATE_DS() */
@@ -602,19 +632,26 @@ gen7_3DSTATE_DS(struct ilo_builder *builder,
dw[3] = ds->ds[1];
dw[4] = ds->ds[2];
dw[5] = ds->ds[3];
+
+ if (ilo_state_ds_get_scratch_size(ds)) {
+ ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
+ ds->ds[1], 0);
+ }
}
static inline void
gen8_3DSTATE_DS(struct ilo_builder *builder,
const struct ilo_state_ds *ds,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 9;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 8, 8);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2);
/* see ds_set_gen7_3DSTATE_DS() */
@@ -626,19 +663,26 @@ gen8_3DSTATE_DS(struct ilo_builder *builder,
dw[6] = ds->ds[2];
dw[7] = ds->ds[3];
dw[8] = ds->ds[4];
+
+ if (ilo_state_ds_get_scratch_size(ds)) {
+ ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo,
+ ds->ds[1], 0);
+ }
}
static inline void
gen6_3DSTATE_GS(struct ilo_builder *builder,
const struct ilo_state_gs *gs,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 7;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 6, 6);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
dw[1] = kernel_offset;
@@ -648,6 +692,11 @@ gen6_3DSTATE_GS(struct ilo_builder *builder,
dw[4] = gs->gs[2];
dw[5] = gs->gs[3];
dw[6] = gs->gs[4];
+
+ if (ilo_state_gs_get_scratch_size(gs)) {
+ ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
+ gs->gs[1], 0);
+ }
}
static inline void
@@ -677,14 +726,16 @@ gen6_3DSTATE_GS_SVB_INDEX(struct ilo_builder *builder,
static inline void
gen7_3DSTATE_GS(struct ilo_builder *builder,
const struct ilo_state_gs *gs,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 7;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
dw[1] = kernel_offset;
@@ -694,19 +745,26 @@ gen7_3DSTATE_GS(struct ilo_builder *builder,
dw[4] = gs->gs[2];
dw[5] = gs->gs[3];
dw[6] = 0;
+
+ if (ilo_state_gs_get_scratch_size(gs)) {
+ ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
+ gs->gs[1], 0);
+ }
}
static inline void
gen8_3DSTATE_GS(struct ilo_builder *builder,
const struct ilo_state_gs *gs,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 10;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 8, 8);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
dw[1] = kernel_offset;
@@ -719,6 +777,11 @@ gen8_3DSTATE_GS(struct ilo_builder *builder,
dw[7] = gs->gs[3];
dw[8] = 0;
dw[9] = gs->gs[4];
+
+ if (ilo_state_gs_get_scratch_size(gs)) {
+ ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo,
+ gs->gs[1], 0);
+ }
}
static inline void
diff --git a/src/gallium/drivers/ilo/core/ilo_state_compute.c b/src/gallium/drivers/ilo/core/ilo_state_compute.c
index a5fe5e1a6b0..ba3ff9001e1 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_compute.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_compute.c
@@ -158,7 +158,8 @@ compute_interface_get_gen6_read_end(const struct ilo_dev *dev,
*/
assert(per_thread_read <= 63);
- /* From the Haswell PRM, volume 2d, page 199:
+ /*
+ * From the Haswell PRM, volume 2d, page 199:
*
* "(Cross-Thread Constant Data Read Length) [0,127]"
*/
@@ -210,38 +211,68 @@ compute_validate_gen6(const struct ilo_dev *dev,
return true;
}
-static uint8_t
-compute_get_gen6_scratch_space(const struct ilo_dev *dev,
- const struct ilo_state_compute_info *info)
+static uint32_t
+compute_get_gen6_per_thread_scratch_size(const struct ilo_dev *dev,
+ const struct ilo_state_compute_info *info,
+ uint8_t *per_thread_space)
{
- uint32_t scratch_size = 0;
- uint8_t i;
+ ILO_DEV_ASSERT(dev, 6, 7);
- ILO_DEV_ASSERT(dev, 6, 8);
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 2, page 30:
+ *
+ * "(Per Thread Scratch Space)
+ * Range = [0,11] indicating [1k bytes, 12k bytes] [DevSNB]"
+ */
+ assert(info->per_thread_scratch_size <= 12 * 1024);
- for (i = 0; i < info->interface_count; i++) {
- if (scratch_size < info->interfaces[i].scratch_size)
- scratch_size = info->interfaces[i].scratch_size;
+ if (!info->per_thread_scratch_size) {
+ *per_thread_space = 0;
+ return 0;
}
- if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
- assert(scratch_size <= 2 * 1024 * 1024);
+ *per_thread_space = (info->per_thread_scratch_size > 1024) ?
+ (info->per_thread_scratch_size - 1) / 1024 : 0;
+
+ return 1024 * (1 + *per_thread_space);
+}
- /* next power of two, starting from 1KB */
- return (scratch_size > 1024) ?
- (util_last_bit(scratch_size - 1) - 10): 0;
- } else if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
- assert(scratch_size <= 2 * 1024 * 1024);
+static uint32_t
+compute_get_gen75_per_thread_scratch_size(const struct ilo_dev *dev,
+ const struct ilo_state_compute_info *info,
+ uint8_t *per_thread_space)
+{
+ ILO_DEV_ASSERT(dev, 7.5, 8);
- /* next power of two, starting from 2KB */
- return (scratch_size > 2048) ?
- (util_last_bit(scratch_size - 1) - 11): 0;
- } else {
- assert(scratch_size <= 12 * 1024);
+ /*
+ * From the Haswell PRM, volume 2b, page 407:
+ *
+ * "(Per Thread Scratch Space)
+ * [0,10] Indicating [2k bytes, 2 Mbytes]"
+ *
+ * "Note: The scratch space should be declared as 2x the desired
+ * scratch space. The stack will start at the half-way point instead
+ * of the end. The upper half of scratch space will not be accessed
+ * and so does not have to be allocated in memory."
+ *
+ * From the Broadwell PRM, volume 2a, page 450:
+ *
+ * "(Per Thread Scratch Space)
+ * [0,11] indicating [1k bytes, 2 Mbytes]"
+ */
+ assert(info->per_thread_scratch_size <=
+ ((ilo_dev_gen(dev) >= ILO_GEN(8)) ? 2 : 1) * 1024 * 1024);
- return (scratch_size > 1024) ?
- (scratch_size - 1) / 1024 : 0;
+ if (!info->per_thread_scratch_size) {
+ *per_thread_space = 0;
+ return 0;
}
+
+ /* next power of two, starting from 1KB */
+ *per_thread_space = (info->per_thread_scratch_size > 1024) ?
+ (util_last_bit(info->per_thread_scratch_size - 1) - 10) : 0;
+
+ return 1 << (10 + *per_thread_space);
}
static bool
@@ -250,7 +281,8 @@ compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute,
const struct ilo_state_compute_info *info)
{
struct compute_urb_configuration urb;
- uint8_t scratch_space;
+ uint32_t per_thread_size;
+ uint8_t per_thread_space;
uint32_t dw1, dw2, dw4;
@@ -260,9 +292,16 @@ compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute,
!compute_validate_gen6(dev, info, &urb))
return false;
- scratch_space = compute_get_gen6_scratch_space(dev, info);
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
+ per_thread_size = compute_get_gen75_per_thread_scratch_size(dev,
+ info, &per_thread_space);
+ } else {
+ per_thread_size = compute_get_gen6_per_thread_scratch_size(dev,
+ info, &per_thread_space);
+ }
+
+ dw1 = per_thread_space << GEN6_VFE_DW1_SCRATCH_SPACE_PER_THREAD__SHIFT;
- dw1 = scratch_space << GEN6_VFE_DW1_SCRATCH_SPACE_PER_THREAD__SHIFT;
dw2 = (dev->thread_count - 1) << GEN6_VFE_DW2_MAX_THREADS__SHIFT |
urb.urb_entry_count << GEN6_VFE_DW2_URB_ENTRY_COUNT__SHIFT |
GEN6_VFE_DW2_RESET_GATEWAY_TIMER |
@@ -281,6 +320,8 @@ compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute,
compute->vfe[1] = dw2;
compute->vfe[2] = dw4;
+ compute->scratch_size = per_thread_size * dev->thread_count;
+
return true;
}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_compute.h b/src/gallium/drivers/ilo/core/ilo_state_compute.h
index 346f7b617f4..bd56bba4369 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_compute.h
+++ b/src/gallium/drivers/ilo/core/ilo_state_compute.h
@@ -45,8 +45,6 @@ struct ilo_state_compute_interface_info {
/* usually 0 unless there are multiple interfaces */
uint32_t kernel_offset;
- uint32_t scratch_size;
-
uint8_t sampler_count;
uint8_t surface_count;
@@ -65,6 +63,8 @@ struct ilo_state_compute_info {
const struct ilo_state_compute_interface_info *interfaces;
uint8_t interface_count;
+ uint32_t per_thread_scratch_size;
+
uint32_t cv_urb_alloc_size;
uint32_t curbe_alloc_size;
};
@@ -74,6 +74,8 @@ struct ilo_state_compute {
uint32_t (*idrt)[6];
uint8_t idrt_count;
+
+ uint32_t scratch_size;
};
static inline size_t
@@ -89,4 +91,10 @@ ilo_state_compute_init(struct ilo_state_compute *compute,
const struct ilo_dev *dev,
const struct ilo_state_compute_info *info);
+static inline uint32_t
+ilo_state_compute_get_scratch_size(const struct ilo_state_compute *compute)
+{
+ return compute->scratch_size;
+}
+
#endif /* ILO_STATE_COMPUTE_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader.c b/src/gallium/drivers/ilo/core/ilo_state_shader.c
index f67326c7f10..aec4fd6d8a6 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_shader.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_shader.c
@@ -37,7 +37,9 @@ enum vertex_stage {
struct vertex_ff {
uint8_t grf_start;
- uint8_t scratch_space;
+
+ uint8_t per_thread_scratch_space;
+ uint32_t per_thread_scratch_size;
uint8_t sampler_count;
uint8_t surface_count;
@@ -59,13 +61,6 @@ vertex_validate_gen6_kernel(const struct ilo_dev *dev,
* others.
*/
const uint8_t max_grf_start = (stage == STAGE_GS) ? 16 : 32;
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 134:
- *
- * "(Per-Thread Scratch Space)
- * Range [0,11] indicating [1K Bytes, 2M Bytes]"
- */
- const uint32_t max_scratch_size = 2 * 1024 * 1024;
ILO_DEV_ASSERT(dev, 6, 8);
@@ -73,7 +68,6 @@ vertex_validate_gen6_kernel(const struct ilo_dev *dev,
assert(!kernel->offset);
assert(kernel->grf_start < max_grf_start);
- assert(kernel->scratch_size <= max_scratch_size);
return true;
}
@@ -112,18 +106,33 @@ vertex_get_gen6_ff(const struct ilo_dev *dev,
const struct ilo_state_shader_kernel_info *kernel,
const struct ilo_state_shader_resource_info *resource,
const struct ilo_state_shader_urb_info *urb,
+ uint32_t per_thread_scratch_size,
struct vertex_ff *ff)
{
ILO_DEV_ASSERT(dev, 6, 8);
+ memset(ff, 0, sizeof(*ff));
+
if (!vertex_validate_gen6_kernel(dev, stage, kernel) ||
!vertex_validate_gen6_urb(dev, stage, urb))
return false;
ff->grf_start = kernel->grf_start;
- /* next power of two, starting from 1KB */
- ff->scratch_space = (kernel->scratch_size > 1024) ?
- (util_last_bit(kernel->scratch_size - 1) - 10): 0;
+
+ if (per_thread_scratch_size) {
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 134:
+ *
+ * "(Per-Thread Scratch Space)
+ * Range [0,11] indicating [1K Bytes, 2M Bytes]"
+ */
+ assert(per_thread_scratch_size <= 2 * 1024 * 1024);
+
+ /* next power of two, starting from 1KB */
+ ff->per_thread_scratch_space = (per_thread_scratch_size > 1024) ?
+ (util_last_bit(per_thread_scratch_size - 1) - 10) : 0;
+ ff->per_thread_scratch_size = 1 << (10 + ff->per_thread_scratch_space);
+ }
ff->sampler_count = (resource->sampler_count <= 12) ?
(resource->sampler_count + 3) / 4 : 4;
@@ -192,8 +201,8 @@ vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs,
ILO_DEV_ASSERT(dev, 6, 8);
- if (!vertex_get_gen6_ff(dev, STAGE_VS, &info->kernel,
- &info->resource, &info->urb, &ff))
+ if (!vertex_get_gen6_ff(dev, STAGE_VS, &info->kernel, &info->resource,
+ &info->urb, info->per_thread_scratch_size, &ff))
return false;
thread_count = vs_get_gen6_thread_count(dev, info);
@@ -207,7 +216,8 @@ vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs,
if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
dw2 |= GEN75_THREADDISP_ACCESS_UAV;
- dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw3 = ff.per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = ff.grf_start << GEN6_VS_DW4_URB_GRF_START__SHIFT |
ff.vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT |
@@ -234,6 +244,8 @@ vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs,
if (ilo_dev_gen(dev) >= ILO_GEN(8))
vs->vs[4] = ff.user_clip_enables << GEN8_VS_DW8_UCP_CLIP_ENABLES__SHIFT;
+ vs->scratch_size = ff.per_thread_scratch_size * thread_count;
+
return true;
}
@@ -273,8 +285,8 @@ hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs,
ILO_DEV_ASSERT(dev, 7, 8);
- if (!vertex_get_gen6_ff(dev, STAGE_HS, &info->kernel,
- &info->resource, &info->urb, &ff))
+ if (!vertex_get_gen6_ff(dev, STAGE_HS, &info->kernel, &info->resource,
+ &info->urb, info->per_thread_scratch_size, &ff))
return false;
thread_count = hs_get_gen7_thread_count(dev, info);
@@ -282,19 +294,22 @@ hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs,
dw1 = ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
- if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
+ dw2 = 0 << GEN7_HS_DW2_INSTANCE_COUNT__SHIFT;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8))
+ dw2 |= thread_count << GEN8_HS_DW2_MAX_THREADS__SHIFT;
+ else if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
dw1 |= thread_count << GEN75_HS_DW1_DISPATCH_MAX_THREADS__SHIFT;
else
dw1 |= thread_count << GEN7_HS_DW1_DISPATCH_MAX_THREADS__SHIFT;
- dw2 = 0 << GEN7_HS_DW2_INSTANCE_COUNT__SHIFT;
-
if (info->dispatch_enable)
dw2 |= GEN7_HS_DW2_HS_ENABLE;
if (info->stats_enable)
dw2 |= GEN7_HS_DW2_STATISTICS;
- dw4 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw4 = ff.per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw5 = GEN7_HS_DW5_INCLUDE_VERTEX_HANDLES |
ff.grf_start << GEN7_HS_DW5_URB_GRF_START__SHIFT |
@@ -310,6 +325,8 @@ hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs,
hs->hs[2] = dw4;
hs->hs[3] = dw5;
+ hs->scratch_size = ff.per_thread_scratch_size * thread_count;
+
return true;
}
@@ -373,8 +390,8 @@ ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds,
ILO_DEV_ASSERT(dev, 7, 8);
- if (!vertex_get_gen6_ff(dev, STAGE_DS, &info->kernel,
- &info->resource, &info->urb, &ff))
+ if (!vertex_get_gen6_ff(dev, STAGE_DS, &info->kernel, &info->resource,
+ &info->urb, info->per_thread_scratch_size, &ff))
return false;
thread_count = ds_get_gen7_thread_count(dev, info);
@@ -385,7 +402,8 @@ ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds,
if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
dw2 |= GEN75_THREADDISP_ACCESS_UAV;
- dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw3 = ff.per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = ff.grf_start << GEN7_DS_DW4_URB_GRF_START__SHIFT |
ff.vue_read_len << GEN7_DS_DW4_URB_READ_LEN__SHIFT |
@@ -412,6 +430,8 @@ ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds,
if (ilo_dev_gen(dev) >= ILO_GEN(8))
ds->ds[4] = ff.user_clip_enables << GEN8_DS_DW8_UCP_CLIP_ENABLES__SHIFT;
+ ds->scratch_size = ff.per_thread_scratch_size * thread_count;
+
return true;
}
@@ -425,8 +445,8 @@ gs_get_gen6_ff(const struct ilo_dev *dev,
ILO_DEV_ASSERT(dev, 6, 8);
- if (!vertex_get_gen6_ff(dev, STAGE_GS, &info->kernel,
- &info->resource, &info->urb, ff))
+ if (!vertex_get_gen6_ff(dev, STAGE_GS, &info->kernel, &info->resource,
+ &info->urb, info->per_thread_scratch_size, ff))
return false;
/*
@@ -510,7 +530,8 @@ gs_set_gen6_3DSTATE_GS(struct ilo_state_gs *gs,
ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
- dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw3 = ff.per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = ff.vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT |
ff.vue_read_offset << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT |
@@ -550,6 +571,8 @@ gs_set_gen6_3DSTATE_GS(struct ilo_state_gs *gs,
gs->gs[3] = dw5;
gs->gs[4] = dw6;
+ gs->scratch_size = ff.per_thread_scratch_size * thread_count;
+
return true;
}
@@ -588,7 +611,8 @@ gs_set_gen7_3DSTATE_GS(struct ilo_state_gs *gs,
if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
dw2 |= GEN75_THREADDISP_ACCESS_UAV;
- dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw3 = ff.per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = vertex_size << GEN7_GS_DW4_OUTPUT_SIZE__SHIFT |
0 << GEN7_GS_DW4_OUTPUT_TOPO__SHIFT |
@@ -618,6 +642,8 @@ gs_set_gen7_3DSTATE_GS(struct ilo_state_gs *gs,
if (ilo_dev_gen(dev) >= ILO_GEN(8))
gs->gs[4] = ff.user_clip_enables << GEN8_GS_DW9_UCP_CLIP_ENABLES__SHIFT;
+ gs->scratch_size = ff.per_thread_scratch_size * thread_count;
+
return true;
}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader.h b/src/gallium/drivers/ilo/core/ilo_state_shader.h
index 44690c5b0bb..35651090d66 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_shader.h
+++ b/src/gallium/drivers/ilo/core/ilo_state_shader.h
@@ -42,8 +42,6 @@ struct ilo_state_shader_kernel_info {
uint8_t grf_start;
uint8_t pcb_attr_count;
-
- uint32_t scratch_size;
};
/**
@@ -77,6 +75,7 @@ struct ilo_state_vs_info {
struct ilo_state_shader_resource_info resource;
struct ilo_state_shader_urb_info urb;
+ uint32_t per_thread_scratch_size;
bool dispatch_enable;
bool stats_enable;
};
@@ -86,6 +85,7 @@ struct ilo_state_hs_info {
struct ilo_state_shader_resource_info resource;
struct ilo_state_shader_urb_info urb;
+ uint32_t per_thread_scratch_size;
bool dispatch_enable;
bool stats_enable;
};
@@ -95,6 +95,7 @@ struct ilo_state_ds_info {
struct ilo_state_shader_resource_info resource;
struct ilo_state_shader_urb_info urb;
+ uint32_t per_thread_scratch_size;
bool dispatch_enable;
bool stats_enable;
};
@@ -119,6 +120,7 @@ struct ilo_state_gs_info {
struct ilo_state_gs_sol_info sol;
+ uint32_t per_thread_scratch_size;
bool dispatch_enable;
bool stats_enable;
};
@@ -158,6 +160,8 @@ struct ilo_state_ps_info {
struct ilo_state_ps_io_info io;
struct ilo_state_ps_params_info params;
+ uint32_t per_thread_scratch_size;
+
/* bitmask of GEN6_PS_DISPATCH_x */
uint8_t valid_kernels;
bool per_sample_dispatch;
@@ -173,23 +177,28 @@ struct ilo_state_ps_info {
struct ilo_state_vs {
uint32_t vs[5];
+ uint32_t scratch_size;
};
struct ilo_state_hs {
uint32_t hs[4];
+ uint32_t scratch_size;
};
struct ilo_state_ds {
uint32_t te[3];
uint32_t ds[5];
+ uint32_t scratch_size;
};
struct ilo_state_gs {
uint32_t gs[5];
+ uint32_t scratch_size;
};
struct ilo_state_ps {
uint32_t ps[8];
+ uint32_t scratch_size;
struct ilo_state_ps_dispatch_conds {
bool ps_valid;
@@ -211,6 +220,12 @@ bool
ilo_state_vs_init_disabled(struct ilo_state_vs *vs,
const struct ilo_dev *dev);
+static inline uint32_t
+ilo_state_vs_get_scratch_size(const struct ilo_state_vs *vs)
+{
+ return vs->scratch_size;
+}
+
bool
ilo_state_hs_init(struct ilo_state_hs *hs,
const struct ilo_dev *dev,
@@ -221,6 +236,12 @@ ilo_state_hs_init_disabled(struct ilo_state_hs *hs,
const struct ilo_dev *dev);
+static inline uint32_t
+ilo_state_hs_get_scratch_size(const struct ilo_state_hs *hs)
+{
+ return hs->scratch_size;
+}
+
bool
ilo_state_ds_init(struct ilo_state_ds *ds,
const struct ilo_dev *dev,
@@ -230,6 +251,12 @@ bool
ilo_state_ds_init_disabled(struct ilo_state_ds *ds,
const struct ilo_dev *dev);
+static inline uint32_t
+ilo_state_ds_get_scratch_size(const struct ilo_state_ds *ds)
+{
+ return ds->scratch_size;
+}
+
bool
ilo_state_gs_init(struct ilo_state_gs *gs,
const struct ilo_dev *dev,
@@ -239,6 +266,12 @@ bool
ilo_state_gs_init_disabled(struct ilo_state_gs *gs,
const struct ilo_dev *dev);
+static inline uint32_t
+ilo_state_gs_get_scratch_size(const struct ilo_state_gs *gs)
+{
+ return gs->scratch_size;
+}
+
bool
ilo_state_ps_init(struct ilo_state_ps *ps,
const struct ilo_dev *dev,
@@ -253,4 +286,10 @@ ilo_state_ps_set_params(struct ilo_state_ps *ps,
const struct ilo_dev *dev,
const struct ilo_state_ps_params_info *params);
+static inline uint32_t
+ilo_state_ps_get_scratch_size(const struct ilo_state_ps *ps)
+{
+ return ps->scratch_size;
+}
+
#endif /* ILO_STATE_SHADER_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c b/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c
index ceeb68a460e..5c3ca1ebe37 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c
@@ -34,7 +34,8 @@ struct pixel_ff {
uint32_t kernel_offsets[3];
uint8_t grf_starts[3];
bool pcb_enable;
- uint8_t scratch_space;
+ uint8_t per_thread_scratch_space;
+ uint32_t per_thread_scratch_size;
uint8_t sampler_count;
uint8_t surface_count;
@@ -56,13 +57,6 @@ ps_kernel_validate_gen6(const struct ilo_dev *dev,
{
/* "Dispatch GRF Start Register for Constant/Setup Data" is U7 */
const uint8_t max_grf_start = 128;
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 271:
- *
- * "(Per-Thread Scratch Space)
- * Range [0,11] indicating [1k bytes, 2M bytes] in powers of two"
- */
- const uint32_t max_scratch_size = 2 * 1024 * 1024;
ILO_DEV_ASSERT(dev, 6, 8);
@@ -70,7 +64,6 @@ ps_kernel_validate_gen6(const struct ilo_dev *dev,
assert(kernel->offset % 64 == 0);
assert(kernel->grf_start < max_grf_start);
- assert(kernel->scratch_size <= max_scratch_size);
return true;
}
@@ -325,7 +318,6 @@ ps_get_gen6_ff_kernels(const struct ilo_dev *dev,
const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8;
const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16;
const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32;
- uint32_t scratch_size;
ILO_DEV_ASSERT(dev, 6, 8);
@@ -363,21 +355,6 @@ ps_get_gen6_ff_kernels(const struct ilo_dev *dev,
((ff->dispatch_modes & GEN6_PS_DISPATCH_32) &&
kernel_32->pcb_attr_count));
- scratch_size = 0;
- if ((ff->dispatch_modes & GEN6_PS_DISPATCH_8) &&
- scratch_size < kernel_8->scratch_size)
- scratch_size = kernel_8->scratch_size;
- if ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) &&
- scratch_size < kernel_16->scratch_size)
- scratch_size = kernel_16->scratch_size;
- if ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) &&
- scratch_size < kernel_32->scratch_size)
- scratch_size = kernel_32->scratch_size;
-
- /* next power of two, starting from 1KB */
- ff->scratch_space = (scratch_size > 1024) ?
- (util_last_bit(scratch_size - 1) - 10): 0;
-
/* GPU hangs on Haswell if none of the dispatch mode bits is set */
if (ilo_dev_gen(dev) == ILO_GEN(7.5) && !ff->dispatch_modes)
ff->dispatch_modes |= GEN6_PS_DISPATCH_8;
@@ -401,6 +378,21 @@ ps_get_gen6_ff(const struct ilo_dev *dev,
if (!ps_validate_gen6(dev, info) || !ps_get_gen6_ff_kernels(dev, info, ff))
return false;
+ if (info->per_thread_scratch_size) {
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 271:
+ *
+ * "(Per-Thread Scratch Space)
+ * Range [0,11] indicating [1k bytes, 2M bytes] in powers of two"
+ */
+ assert(info->per_thread_scratch_size <= 2 * 1024 * 1024);
+
+ /* next power of two, starting from 1KB */
+ ff->per_thread_scratch_space = (info->per_thread_scratch_size > 1024) ?
+ (util_last_bit(info->per_thread_scratch_size - 1) - 10) : 0;
+ ff->per_thread_scratch_size = 1 << (10 + ff->per_thread_scratch_space);
+ }
+
ff->sampler_count = (resource->sampler_count <= 12) ?
(resource->sampler_count + 3) / 4 : 4;
ff->surface_count = resource->surface_count;
@@ -441,7 +433,8 @@ ps_set_gen6_3dstate_wm(struct ilo_state_ps *ps,
if (false)
dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
- dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw3 = ff->per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = ff->grf_starts[0] << GEN6_WM_DW4_URB_GRF_START0__SHIFT |
ff->grf_starts[1] << GEN6_WM_DW4_URB_GRF_START1__SHIFT |
@@ -539,7 +532,8 @@ ps_set_gen7_3DSTATE_PS(struct ilo_state_ps *ps,
if (false)
dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
- dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw3 = ff->per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = io->posoffset << GEN7_PS_DW4_POSOFFSET__SHIFT |
ff->dispatch_modes << GEN7_PS_DW4_DISPATCH_MODE__SHIFT;
@@ -603,7 +597,8 @@ ps_set_gen8_3DSTATE_PS(struct ilo_state_ps *ps,
if (false)
dw3 |= GEN6_THREADDISP_FP_MODE_ALT;
- dw4 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw4 = ff->per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw6 = ff->thread_count << GEN8_PS_DW6_MAX_THREADS__SHIFT |
io->posoffset << GEN8_PS_DW6_POSOFFSET__SHIFT |
@@ -705,6 +700,7 @@ ilo_state_ps_init(struct ilo_state_ps *ps,
ret &= ps_set_gen6_3dstate_wm(ps, dev, info, &ff);
}
+ ps->scratch_size = ff.per_thread_scratch_size * ff.thread_count;
/* save conditions */
ps->conds = ff.conds;
diff --git a/src/gallium/drivers/ilo/ilo_blit.h b/src/gallium/drivers/ilo/ilo_blit.h
index da0bfe9c4c9..bad4dab8404 100644
--- a/src/gallium/drivers/ilo/ilo_blit.h
+++ b/src/gallium/drivers/ilo/ilo_blit.h
@@ -58,10 +58,12 @@ ilo_blit_resolve_slices(struct ilo_context *ilo,
* As it is only used to resolve HiZ right now, return early when there is
* no HiZ.
*/
- if (!ilo_image_can_enable_aux(&tex->image, level))
+ if (tex->image.aux.type != ILO_IMAGE_AUX_HIZ ||
+ !ilo_image_can_enable_aux(&tex->image, level))
return;
- if (ilo_image_can_enable_aux(&tex->image, level)) {
+ if (tex->image.aux.type == ILO_IMAGE_AUX_HIZ &&
+ ilo_image_can_enable_aux(&tex->image, level)) {
ilo_blit_resolve_slices_for_hiz(ilo, res, level,
first_slice, num_slices, resolve_flags);
}
diff --git a/src/gallium/drivers/ilo/ilo_draw.c b/src/gallium/drivers/ilo/ilo_draw.c
index 433348d9326..69f36ae5df6 100644
--- a/src/gallium/drivers/ilo/ilo_draw.c
+++ b/src/gallium/drivers/ilo/ilo_draw.c
@@ -547,6 +547,7 @@ static void
ilo_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
{
struct ilo_context *ilo = ilo_context(pipe);
+ int vs_scratch_size, gs_scratch_size, fs_scratch_size;
if (ilo_debug & ILO_DEBUG_DRAW) {
if (info->indexed) {
@@ -574,8 +575,15 @@ ilo_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
ilo_finalize_3d_states(ilo, info);
+ /* upload kernels */
ilo_shader_cache_upload(ilo->shader_cache, &ilo->cp->builder);
+ /* prepare scratch spaces */
+ ilo_shader_cache_get_max_scratch_sizes(ilo->shader_cache,
+ &vs_scratch_size, &gs_scratch_size, &fs_scratch_size);
+ ilo_render_prepare_scratch_spaces(ilo->render,
+ vs_scratch_size, gs_scratch_size, fs_scratch_size);
+
ilo_blit_resolve_framebuffer(ilo);
/* If draw_vbo ever fails, return immediately. */
diff --git a/src/gallium/drivers/ilo/ilo_render.c b/src/gallium/drivers/ilo/ilo_render.c
index 21f75de11a0..8bc04df4fab 100644
--- a/src/gallium/drivers/ilo/ilo_render.c
+++ b/src/gallium/drivers/ilo/ilo_render.c
@@ -67,10 +67,49 @@ ilo_render_create(struct ilo_builder *builder)
void
ilo_render_destroy(struct ilo_render *render)
{
+ intel_bo_unref(render->vs_scratch.bo);
+ intel_bo_unref(render->gs_scratch.bo);
+ intel_bo_unref(render->fs_scratch.bo);
+
intel_bo_unref(render->workaround_bo);
FREE(render);
}
+static bool
+resize_scratch_space(struct ilo_render *render,
+ struct ilo_render_scratch_space *scratch,
+ const char *name, int new_size)
+{
+ struct intel_bo *bo;
+
+ if (scratch->size >= new_size)
+ return true;
+
+ bo = intel_winsys_alloc_bo(render->builder->winsys, name, new_size, false);
+ if (!bo)
+ return false;
+
+ intel_bo_unref(scratch->bo);
+ scratch->bo = bo;
+ scratch->size = new_size;
+
+ return true;
+}
+
+bool
+ilo_render_prepare_scratch_spaces(struct ilo_render *render,
+ int vs_scratch_size,
+ int gs_scratch_size,
+ int fs_scratch_size)
+{
+ return (resize_scratch_space(render, &render->vs_scratch,
+ "vs scratch", vs_scratch_size) &&
+ resize_scratch_space(render, &render->gs_scratch,
+ "gs scratch", gs_scratch_size) &&
+ resize_scratch_space(render, &render->fs_scratch,
+ "fs scratch", fs_scratch_size));
+}
+
void
ilo_render_get_sample_position(const struct ilo_render *render,
unsigned sample_count,
diff --git a/src/gallium/drivers/ilo/ilo_render.h b/src/gallium/drivers/ilo/ilo_render.h
index 098af73ec9b..31fd1e6f859 100644
--- a/src/gallium/drivers/ilo/ilo_render.h
+++ b/src/gallium/drivers/ilo/ilo_render.h
@@ -43,6 +43,12 @@ ilo_render_create(struct ilo_builder *builder);
void
ilo_render_destroy(struct ilo_render *render);
+bool
+ilo_render_prepare_scratch_spaces(struct ilo_render *render,
+ int vs_scratch_size,
+ int gs_scratch_size,
+ int fs_scratch_size);
+
void
ilo_render_get_sample_position(const struct ilo_render *render,
unsigned sample_count,
diff --git a/src/gallium/drivers/ilo/ilo_render_gen.h b/src/gallium/drivers/ilo/ilo_render_gen.h
index 6b133750043..f227d6bf4da 100644
--- a/src/gallium/drivers/ilo/ilo_render_gen.h
+++ b/src/gallium/drivers/ilo/ilo_render_gen.h
@@ -51,6 +51,11 @@ struct ilo_render {
struct intel_bo *workaround_bo;
+ struct ilo_render_scratch_space {
+ struct intel_bo *bo;
+ int size;
+ } vs_scratch, gs_scratch, fs_scratch;
+
struct ilo_state_sample_pattern sample_pattern;
bool hw_ctx_changed;
diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c
index c1f759f3043..910e6c0fb7a 100644
--- a/src/gallium/drivers/ilo/ilo_render_gen6.c
+++ b/src/gallium/drivers/ilo/ilo_render_gen6.c
@@ -475,10 +475,13 @@ gen6_draw_vs(struct ilo_render *r,
gen6_wa_pre_3dstate_vs_toggle(r);
if (ilo_dev_gen(r->dev) == ILO_GEN(6) &&
- ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO))
- gen6_3DSTATE_VS(r->builder, &cso->vs_sol.vs, kernel_offset);
- else
- gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset);
+ ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)) {
+ gen6_3DSTATE_VS(r->builder, &cso->vs_sol.vs,
+ kernel_offset, r->vs_scratch.bo);
+ } else {
+ gen6_3DSTATE_VS(r->builder, &cso->vs,
+ kernel_offset, r->vs_scratch.bo);
+ }
}
}
@@ -501,7 +504,8 @@ gen6_draw_gs(struct ilo_render *r,
cso = ilo_shader_get_kernel_cso(vec->gs);
kernel_offset = ilo_shader_get_kernel_offset(vec->gs);
- gen6_3DSTATE_GS(r->builder, &cso->gs, kernel_offset);
+ gen6_3DSTATE_GS(r->builder, &cso->gs,
+ kernel_offset, r->gs_scratch.bo);
} else if (ilo_dev_gen(r->dev) == ILO_GEN(6) &&
ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)) {
const int verts_per_prim =
@@ -524,9 +528,10 @@ gen6_draw_gs(struct ilo_render *r,
kernel_offset = ilo_shader_get_kernel_offset(vec->vs) +
ilo_shader_get_kernel_param(vec->vs, param);
- gen6_3DSTATE_GS(r->builder, &cso->vs_sol.sol, kernel_offset);
+ gen6_3DSTATE_GS(r->builder, &cso->vs_sol.sol,
+ kernel_offset, r->gs_scratch.bo);
} else {
- gen6_3DSTATE_GS(r->builder, &vec->disabled_gs, 0);
+ gen6_3DSTATE_GS(r->builder, &vec->disabled_gs, 0, NULL);
}
}
}
@@ -672,7 +677,7 @@ gen6_draw_wm(struct ilo_render *r,
gen6_wa_pre_3dstate_wm_max_threads(r);
gen6_3DSTATE_WM(r->builder, &vec->rasterizer->rs,
- &cso->ps, kernel_offset);
+ &cso->ps, kernel_offset, r->fs_scratch.bo);
}
}
@@ -817,10 +822,10 @@ gen6_rectlist_vs_to_sf(struct ilo_render *r,
gen6_wa_post_3dstate_constant_vs(r);
gen6_wa_pre_3dstate_vs_toggle(r);
- gen6_3DSTATE_VS(r->builder, &blitter->vs, 0);
+ gen6_3DSTATE_VS(r->builder, &blitter->vs, 0, NULL);
gen6_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0);
- gen6_3DSTATE_GS(r->builder, &blitter->gs, 0);
+ gen6_3DSTATE_GS(r->builder, &blitter->gs, 0, NULL);
gen6_3DSTATE_CLIP(r->builder, &blitter->fb.rs);
gen6_3DSTATE_SF(r->builder, &blitter->fb.rs, &blitter->sbe);
@@ -833,7 +838,7 @@ gen6_rectlist_wm(struct ilo_render *r,
gen6_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0);
gen6_wa_pre_3dstate_wm_max_threads(r);
- gen6_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps, 0);
+ gen6_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps, 0, NULL);
}
static void
diff --git a/src/gallium/drivers/ilo/ilo_render_gen7.c b/src/gallium/drivers/ilo/ilo_render_gen7.c
index 6623a8bcb43..330ba6c88d6 100644
--- a/src/gallium/drivers/ilo/ilo_render_gen7.c
+++ b/src/gallium/drivers/ilo/ilo_render_gen7.c
@@ -318,10 +318,13 @@ gen7_draw_vs(struct ilo_render *r,
const union ilo_shader_cso *cso = ilo_shader_get_kernel_cso(vec->vs);
const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->vs);
- if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
- gen8_3DSTATE_VS(r->builder, &cso->vs, kernel_offset);
- else
- gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset);
+ if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) {
+ gen8_3DSTATE_VS(r->builder, &cso->vs,
+ kernel_offset, r->vs_scratch.bo);
+ } else {
+ gen6_3DSTATE_VS(r->builder, &cso->vs,
+ kernel_offset, r->vs_scratch.bo);
+ }
}
}
@@ -338,9 +341,9 @@ gen7_draw_hs(struct ilo_render *r,
gen7_3DSTATE_CONSTANT_HS(r->builder, 0, 0, 0);
if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
- gen8_3DSTATE_HS(r->builder, hs, kernel_offset);
+ gen8_3DSTATE_HS(r->builder, hs, kernel_offset, NULL);
else
- gen7_3DSTATE_HS(r->builder, hs, kernel_offset);
+ gen7_3DSTATE_HS(r->builder, hs, kernel_offset, NULL);
}
/* 3DSTATE_BINDING_TABLE_POINTERS_HS */
@@ -373,9 +376,9 @@ gen7_draw_ds(struct ilo_render *r,
gen7_3DSTATE_CONSTANT_DS(r->builder, 0, 0, 0);
if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
- gen8_3DSTATE_DS(r->builder, ds, kernel_offset);
+ gen8_3DSTATE_DS(r->builder, ds, kernel_offset, NULL);
else
- gen7_3DSTATE_DS(r->builder, ds, kernel_offset);
+ gen7_3DSTATE_DS(r->builder, ds, kernel_offset, NULL);
}
/* 3DSTATE_BINDING_TABLE_POINTERS_DS */
@@ -397,9 +400,9 @@ gen7_draw_gs(struct ilo_render *r,
gen7_3DSTATE_CONSTANT_GS(r->builder, 0, 0, 0);
if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
- gen8_3DSTATE_GS(r->builder, gs, kernel_offset);
+ gen8_3DSTATE_GS(r->builder, gs, kernel_offset, NULL);
else
- gen7_3DSTATE_GS(r->builder, gs, kernel_offset);
+ gen7_3DSTATE_GS(r->builder, gs, kernel_offset, NULL);
}
/* 3DSTATE_BINDING_TABLE_POINTERS_GS */
@@ -534,7 +537,7 @@ gen7_draw_wm(struct ilo_render *r,
if (r->hw_ctx_changed)
gen7_wa_pre_3dstate_ps_max_threads(r);
- gen7_3DSTATE_PS(r->builder, &cso->ps, kernel_offset);
+ gen7_3DSTATE_PS(r->builder, &cso->ps, kernel_offset, r->fs_scratch.bo);
}
/* 3DSTATE_SCISSOR_STATE_POINTERS */
@@ -678,18 +681,18 @@ gen7_rectlist_vs_to_sf(struct ilo_render *r,
const struct ilo_blitter *blitter)
{
gen7_3DSTATE_CONSTANT_VS(r->builder, NULL, NULL, 0);
- gen6_3DSTATE_VS(r->builder, &blitter->vs, 0);
+ gen6_3DSTATE_VS(r->builder, &blitter->vs, 0, NULL);
gen7_3DSTATE_CONSTANT_HS(r->builder, NULL, NULL, 0);
- gen7_3DSTATE_HS(r->builder, &blitter->hs, 0);
+ gen7_3DSTATE_HS(r->builder, &blitter->hs, 0, NULL);
gen7_3DSTATE_TE(r->builder, &blitter->ds);
gen7_3DSTATE_CONSTANT_DS(r->builder, NULL, NULL, 0);
- gen7_3DSTATE_DS(r->builder, &blitter->ds, 0);
+ gen7_3DSTATE_DS(r->builder, &blitter->ds, 0, NULL);
gen7_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0);
- gen7_3DSTATE_GS(r->builder, &blitter->gs, 0);
+ gen7_3DSTATE_GS(r->builder, &blitter->gs, 0, NULL);
gen7_3DSTATE_STREAMOUT(r->builder, &blitter->sol);
@@ -711,7 +714,7 @@ gen7_rectlist_wm(struct ilo_render *r,
gen7_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0);
gen7_wa_pre_3dstate_ps_max_threads(r);
- gen7_3DSTATE_PS(r->builder, &blitter->ps, 0);
+ gen7_3DSTATE_PS(r->builder, &blitter->ps, 0, NULL);
}
static void
diff --git a/src/gallium/drivers/ilo/ilo_render_gen8.c b/src/gallium/drivers/ilo/ilo_render_gen8.c
index 65494b4058a..efe0e0d501b 100644
--- a/src/gallium/drivers/ilo/ilo_render_gen8.c
+++ b/src/gallium/drivers/ilo/ilo_render_gen8.c
@@ -125,7 +125,7 @@ gen8_draw_wm(struct ilo_render *r,
/* 3DSTATE_PS */
if (DIRTY(FS) || r->instruction_bo_changed)
- gen8_3DSTATE_PS(r->builder, &cso->ps, kernel_offset);
+ gen8_3DSTATE_PS(r->builder, &cso->ps, kernel_offset, r->fs_scratch.bo);
/* 3DSTATE_PS_EXTRA */
if (DIRTY(FS))
diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c
index e1a7dc56685..888f7aa6782 100644
--- a/src/gallium/drivers/ilo/ilo_screen.c
+++ b/src/gallium/drivers/ilo/ilo_screen.c
@@ -474,6 +474,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 0;
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c
index 73b625e9de4..c61716dc791 100644
--- a/src/gallium/drivers/ilo/ilo_shader.c
+++ b/src/gallium/drivers/ilo/ilo_shader.c
@@ -37,6 +37,10 @@
struct ilo_shader_cache {
struct list_head shaders;
struct list_head changed;
+
+ int max_vs_scratch_size;
+ int max_gs_scratch_size;
+ int max_fs_scratch_size;
};
/**
@@ -121,6 +125,8 @@ ilo_shader_cache_upload(struct ilo_shader_cache *shc,
struct ilo_shader *sh;
LIST_FOR_EACH_ENTRY(sh, &shader->variants, list) {
+ int scratch_size, *cur_max;
+
if (sh->uploaded)
continue;
@@ -128,6 +134,29 @@ ilo_shader_cache_upload(struct ilo_shader_cache *shc,
sh->kernel_size, sh->kernel);
sh->uploaded = true;
+
+ switch (shader->info.type) {
+ case PIPE_SHADER_VERTEX:
+ scratch_size = ilo_state_vs_get_scratch_size(&sh->cso.vs);
+ cur_max = &shc->max_vs_scratch_size;
+ break;
+ case PIPE_SHADER_GEOMETRY:
+ scratch_size = ilo_state_gs_get_scratch_size(&sh->cso.gs);
+ cur_max = &shc->max_gs_scratch_size;
+ break;
+ case PIPE_SHADER_FRAGMENT:
+ scratch_size = ilo_state_ps_get_scratch_size(&sh->cso.ps);
+ cur_max = &shc->max_fs_scratch_size;
+ break;
+ default:
+ assert(!"unknown shader type");
+ scratch_size = 0;
+ cur_max = &shc->max_vs_scratch_size;
+ break;
+ }
+
+ if (*cur_max < scratch_size)
+ *cur_max = scratch_size;
}
list_del(&shader->list);
@@ -155,6 +184,21 @@ ilo_shader_cache_invalidate(struct ilo_shader_cache *shc)
LIST_FOR_EACH_ENTRY(sh, &shader->variants, list)
sh->uploaded = false;
}
+
+ shc->max_vs_scratch_size = 0;
+ shc->max_gs_scratch_size = 0;
+ shc->max_fs_scratch_size = 0;
+}
+
+void
+ilo_shader_cache_get_max_scratch_sizes(const struct ilo_shader_cache *shc,
+ int *vs_scratch_size,
+ int *gs_scratch_size,
+ int *fs_scratch_size)
+{
+ *vs_scratch_size = shc->max_vs_scratch_size;
+ *gs_scratch_size = shc->max_gs_scratch_size;
+ *fs_scratch_size = shc->max_fs_scratch_size;
}
/**
@@ -578,7 +622,6 @@ init_shader_kernel(const struct ilo_shader *kernel,
kern->grf_start = kernel->in.start_grf;
kern->pcb_attr_count =
(kernel->pcb.cbuf0_size + kernel->pcb.clip_state_size + 15) / 16;
- kern->scratch_size = 0;
}
static void
@@ -602,6 +645,7 @@ init_vs(struct ilo_shader *kernel,
init_shader_urb(kernel, state, &info.urb);
init_shader_kernel(kernel, state, &info.kernel);
init_shader_resource(kernel, state, &info.resource);
+ info.per_thread_scratch_size = kernel->per_thread_scratch_size;
info.dispatch_enable = true;
info.stats_enable = true;
@@ -640,6 +684,7 @@ init_gs(struct ilo_shader *kernel,
init_shader_urb(kernel, state, &info.urb);
init_shader_kernel(kernel, state, &info.kernel);
init_shader_resource(kernel, state, &info.resource);
+ info.per_thread_scratch_size = kernel->per_thread_scratch_size;
info.dispatch_enable = true;
info.stats_enable = true;
@@ -664,6 +709,7 @@ init_ps(struct ilo_shader *kernel,
init_shader_kernel(kernel, state, &info.kernel_8);
init_shader_resource(kernel, state, &info.resource);
+ info.per_thread_scratch_size = kernel->per_thread_scratch_size;
info.io.has_rt_write = true;
info.io.posoffset = GEN6_POSOFFSET_NONE;
info.io.attr_count = kernel->in.count;
diff --git a/src/gallium/drivers/ilo/ilo_shader.h b/src/gallium/drivers/ilo/ilo_shader.h
index 01de54146b1..10dcf739430 100644
--- a/src/gallium/drivers/ilo/ilo_shader.h
+++ b/src/gallium/drivers/ilo/ilo_shader.h
@@ -120,6 +120,12 @@ ilo_shader_cache_upload(struct ilo_shader_cache *shc,
void
ilo_shader_cache_invalidate(struct ilo_shader_cache *shc);
+void
+ilo_shader_cache_get_max_scratch_sizes(const struct ilo_shader_cache *shc,
+ int *vs_scratch_size,
+ int *gs_scratch_size,
+ int *fs_scratch_size);
+
struct ilo_shader_state *
ilo_shader_create_vs(const struct ilo_dev *dev,
const struct pipe_shader_state *state,
diff --git a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h
index 01c86675202..1f0cda174e8 100644
--- a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h
+++ b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h
@@ -139,6 +139,7 @@ struct ilo_shader {
void *kernel;
int kernel_size;
+ int per_thread_scratch_size;
struct ilo_kernel_routing routing;
struct ilo_state_ps_params_info ps_params;
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index e2ed267da78..d1c50aefc84 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -299,6 +299,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 0;
}
/* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index 4c8167a9e7d..1778b13f9dd 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -854,10 +854,10 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
jit_tex->img_stride[j] = lp_tex->img_stride[j];
}
- if (view->target == PIPE_TEXTURE_1D_ARRAY ||
- view->target == PIPE_TEXTURE_2D_ARRAY ||
- view->target == PIPE_TEXTURE_CUBE ||
- view->target == PIPE_TEXTURE_CUBE_ARRAY) {
+ if (res->target == PIPE_TEXTURE_1D_ARRAY ||
+ res->target == PIPE_TEXTURE_2D_ARRAY ||
+ res->target == PIPE_TEXTURE_CUBE ||
+ res->target == PIPE_TEXTURE_CUBE_ARRAY) {
/*
* For array textures, we don't have first_layer, instead
* adjust last_layer (stored as depth) plus the mip level offsets
diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
index b205f02fdba..1e055878f7c 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
@@ -275,10 +275,10 @@ prepare_shader_sampling(
row_stride[j] = lp_tex->row_stride[j];
img_stride[j] = lp_tex->img_stride[j];
}
- if (view->target == PIPE_TEXTURE_1D_ARRAY ||
- view->target == PIPE_TEXTURE_2D_ARRAY ||
- view->target == PIPE_TEXTURE_CUBE ||
- view->target == PIPE_TEXTURE_CUBE_ARRAY) {
+ if (tex->target == PIPE_TEXTURE_1D_ARRAY ||
+ tex->target == PIPE_TEXTURE_2D_ARRAY ||
+ tex->target == PIPE_TEXTURE_CUBE ||
+ tex->target == PIPE_TEXTURE_CUBE_ARRAY) {
num_layers = view->u.tex.last_layer - view->u.tex.first_layer + 1;
for (j = first_level; j <= last_level; j++) {
mip_offsets[j] += view->u.tex.first_layer *
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index af46342fdf2..7862ac8f217 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -200,7 +200,8 @@ llvmpipe_can_create_resource(struct pipe_screen *screen,
static boolean
llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen,
- struct llvmpipe_resource *lpr)
+ struct llvmpipe_resource *lpr,
+ const void *map_front_private)
{
struct sw_winsys *winsys = screen->winsys;
@@ -215,12 +216,13 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen,
lpr->base.format,
width, height,
64,
+ map_front_private,
&lpr->row_stride[0] );
if (lpr->dt == NULL)
return FALSE;
- {
+ if (!map_front_private) {
void *map = winsys->displaytarget_map(winsys, lpr->dt,
PIPE_TRANSFER_WRITE);
@@ -235,8 +237,9 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen,
static struct pipe_resource *
-llvmpipe_resource_create(struct pipe_screen *_screen,
- const struct pipe_resource *templat)
+llvmpipe_resource_create_front(struct pipe_screen *_screen,
+ const struct pipe_resource *templat,
+ const void *map_front_private)
{
struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
struct llvmpipe_resource *lpr = CALLOC_STRUCT(llvmpipe_resource);
@@ -254,7 +257,7 @@ llvmpipe_resource_create(struct pipe_screen *_screen,
PIPE_BIND_SCANOUT |
PIPE_BIND_SHARED)) {
/* displayable surface */
- if (!llvmpipe_displaytarget_layout(screen, lpr))
+ if (!llvmpipe_displaytarget_layout(screen, lpr, map_front_private))
goto fail;
}
else {
@@ -300,7 +303,12 @@ llvmpipe_resource_create(struct pipe_screen *_screen,
FREE(lpr);
return NULL;
}
-
+static struct pipe_resource *
+llvmpipe_resource_create(struct pipe_screen *_screen,
+ const struct pipe_resource *templat)
+{
+ return llvmpipe_resource_create_front(_screen, templat, NULL);
+}
static void
llvmpipe_resource_destroy(struct pipe_screen *pscreen,
@@ -797,6 +805,7 @@ llvmpipe_init_screen_resource_funcs(struct pipe_screen *screen)
#endif
screen->resource_create = llvmpipe_resource_create;
+ screen->resource_create_front = llvmpipe_resource_create_front;
screen->resource_destroy = llvmpipe_resource_destroy;
screen->resource_from_handle = llvmpipe_resource_from_handle;
screen->resource_get_handle = llvmpipe_resource_get_handle;
diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources
index c18e9f5b435..83f81135590 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -73,6 +73,9 @@ NV50_C_SOURCES := \
nv50/nv50_program.h \
nv50/nv50_push.c \
nv50/nv50_query.c \
+ nv50/nv50_query.h \
+ nv50/nv50_query_hw.c \
+ nv50/nv50_query_hw.h \
nv50/nv50_resource.c \
nv50/nv50_resource.h \
nv50/nv50_screen.c \
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
index cce60550ae5..6ad9dd31681 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
@@ -1128,7 +1128,6 @@ nv50_ir_init_prog_info(struct nv50_ir_prog_info *info)
info->prop.gp.instanceCount = 1;
info->prop.gp.maxVertices = 1;
}
- info->io.clipDistance = 0xff;
info->io.pointSize = 0xff;
info->io.instanceId = 0xff;
info->io.vertexId = 0xff;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h
index a610c773f55..0d544581697 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h
@@ -73,8 +73,8 @@ public:
Instruction *mkCvt(operation, DataType, Value *, DataType, Value *);
CmpInstruction *mkCmp(operation, CondCode, DataType,
- Value *,
- DataType, Value *, Value *, Value * = NULL);
+ Value *,
+ DataType, Value *, Value *, Value * = NULL);
TexInstruction *mkTex(operation, TexTarget,
uint16_t tic, uint16_t tsc,
const std::vector<Value *> &def,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 2b9edcf9172..c0cab3299b5 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -99,6 +99,7 @@ struct nv50_ir_prog_info
uint8_t sourceRep; /* NV50_PROGRAM_IR */
const void *source;
void *relocData;
+ void *interpData;
struct nv50_ir_prog_symbol *syms;
uint16_t numSyms;
} bin;
@@ -143,6 +144,7 @@ struct nv50_ir_prog_info
bool earlyFragTests;
bool separateFragData;
bool usesDiscard;
+ bool sampleInterp; /* perform sample interp on all fp inputs */
} fp;
struct {
uint32_t inputOffset; /* base address for user args */
@@ -154,9 +156,8 @@ struct nv50_ir_prog_info
uint8_t numBarriers;
struct {
- uint8_t clipDistance; /* index of first clip distance output */
- uint8_t clipDistanceMask; /* mask of clip distances defined */
- uint8_t cullDistanceMask; /* clip distance mode (1 bit per output) */
+ uint8_t clipDistances; /* number of clip distance outputs */
+ uint8_t cullDistances; /* number of cull distance outputs */
int8_t genUserClip; /* request user clip planes for ClipVertex */
uint16_t ucpBase; /* base address for UCPs */
uint8_t ucpCBSlot; /* constant buffer index of UCP data */
@@ -168,7 +169,6 @@ struct nv50_ir_prog_info
int8_t viewportId; /* output index of ViewportIndex */
uint8_t fragDepth; /* output index of FragDepth */
uint8_t sampleMask; /* output index of SampleMask */
- bool sampleInterp; /* perform sample interp on all fp inputs */
uint8_t backFaceColor[2]; /* input/output indices of back face colour */
uint8_t globalAccess; /* 1 for read, 2 for wr, 3 for rw */
bool fp64; /* program uses fp64 math */
@@ -198,6 +198,10 @@ extern void nv50_ir_relocate_code(void *relocData, uint32_t *code,
uint32_t libPos,
uint32_t dataPos);
+extern void
+nv50_ir_change_interp(void *interpData, uint32_t *code,
+ bool force_per_sample, bool flatshade);
+
/* obtain code that will be shared among programs */
extern void nv50_ir_get_target_library(uint32_t chipset,
const uint32_t **code, uint32_t *size);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index 8f1542959c9..d712c9c300a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -1437,6 +1437,30 @@ CodeEmitterGK110::emitInterpMode(const Instruction *i)
code[1] |= (i->ipa & 0xc) << (19 - 2);
}
+static void
+interpApply(const InterpEntry *entry, uint32_t *code,
+ bool force_persample_interp, bool flatshade)
+{
+ int ipa = entry->ipa;
+ int reg = entry->reg;
+ int loc = entry->loc;
+
+ if (flatshade &&
+ (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
+ ipa = NV50_IR_INTERP_FLAT;
+ reg = 0xff;
+ } else if (force_persample_interp &&
+ (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
+ (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
+ ipa |= NV50_IR_INTERP_CENTROID;
+ }
+ code[loc + 1] &= ~(0xf << 19);
+ code[loc + 1] |= (ipa & 0x3) << 21;
+ code[loc + 1] |= (ipa & 0xc) << (19 - 2);
+ code[loc + 0] &= ~(0xff << 23);
+ code[loc + 0] |= reg << 23;
+}
+
void
CodeEmitterGK110::emitINTERP(const Instruction *i)
{
@@ -1448,10 +1472,13 @@ CodeEmitterGK110::emitINTERP(const Instruction *i)
if (i->saturate)
code[1] |= 1 << 18;
- if (i->op == OP_PINTERP)
+ if (i->op == OP_PINTERP) {
srcId(i->src(1), 23);
- else
+ addInterp(i->ipa, SDATA(i->src(1)).id, interpApply);
+ } else {
code[0] |= 0xff << 23;
+ addInterp(i->ipa, 0xff, interpApply);
+ }
srcId(i->src(0).getIndirect(0), 10);
emitInterpMode(i);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index 6e22788341f..a327d572470 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -2217,6 +2217,30 @@ CodeEmitterGM107::emitAL2P()
emitGPR (0x00, insn->def(0));
}
+static void
+interpApply(const InterpEntry *entry, uint32_t *code,
+ bool force_persample_interp, bool flatshade)
+{
+ int ipa = entry->ipa;
+ int reg = entry->reg;
+ int loc = entry->loc;
+
+ if (flatshade &&
+ (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
+ ipa = NV50_IR_INTERP_FLAT;
+ reg = 0xff;
+ } else if (force_persample_interp &&
+ (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
+ (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
+ ipa |= NV50_IR_INTERP_CENTROID;
+ }
+ code[loc + 1] &= ~(0xf << 0x14);
+ code[loc + 1] |= (ipa & 0x3) << 0x16;
+ code[loc + 1] |= (ipa & 0xc) << (0x14 - 2);
+ code[loc + 0] &= ~(0xff << 0x14);
+ code[loc + 0] |= reg << 0x14;
+}
+
void
CodeEmitterGM107::emitIPA()
{
@@ -2255,10 +2279,12 @@ CodeEmitterGM107::emitIPA()
emitGPR(0x14, insn->src(1));
if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
emitGPR(0x27, insn->src(2));
+ addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, interpApply);
} else {
if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
emitGPR(0x27, insn->src(1));
emitGPR(0x14);
+ addInterp(insn->ipa, 0xff, interpApply);
}
if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
index 90147668c91..9f1e4b803d5 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
@@ -372,7 +372,7 @@ CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc)
mode |= 3 << (s * 2);
break;
default:
- ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile());
+ ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile());
assert(0);
break;
}
@@ -876,6 +876,30 @@ CodeEmitterNV50::emitPFETCH(const Instruction *i)
emitFlagsRd(i);
}
+static void
+interpApply(const InterpEntry *entry, uint32_t *code,
+ bool force_persample_interp, bool flatshade)
+{
+ int ipa = entry->ipa;
+ int encSize = entry->reg;
+ int loc = entry->loc;
+
+ if ((ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
+ (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
+ if (force_persample_interp) {
+ if (encSize == 8)
+ code[loc + 1] |= 1 << 16;
+ else
+ code[loc + 0] |= 1 << 24;
+ } else {
+ if (encSize == 8)
+ code[loc + 1] &= ~(1 << 16);
+ else
+ code[loc + 0] &= ~(1 << 24);
+ }
+ }
+}
+
void
CodeEmitterNV50::emitINTERP(const Instruction *i)
{
@@ -904,6 +928,8 @@ CodeEmitterNV50::emitINTERP(const Instruction *i)
code[0] |= 1;
emitFlagsRd(i);
}
+
+ addInterp(i->ipa, i->encSize, interpApply);
}
void
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index 6bf5219d346..fd103146c72 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -1618,6 +1618,29 @@ CodeEmitterNVC0::emitInterpMode(const Instruction *i)
}
}
+static void
+interpApply(const InterpEntry *entry, uint32_t *code,
+ bool force_persample_interp, bool flatshade)
+{
+ int ipa = entry->ipa;
+ int reg = entry->reg;
+ int loc = entry->loc;
+
+ if (flatshade &&
+ (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
+ ipa = NV50_IR_INTERP_FLAT;
+ reg = 0x3f;
+ } else if (force_persample_interp &&
+ (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
+ (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
+ ipa |= NV50_IR_INTERP_CENTROID;
+ }
+ code[loc + 0] &= ~(0xf << 6);
+ code[loc + 0] |= ipa << 6;
+ code[loc + 0] &= ~(0x3f << 26);
+ code[loc + 0] |= reg << 26;
+}
+
void
CodeEmitterNVC0::emitINTERP(const Instruction *i)
{
@@ -1630,10 +1653,13 @@ CodeEmitterNVC0::emitINTERP(const Instruction *i)
if (i->saturate)
code[0] |= 1 << 5;
- if (i->op == OP_PINTERP)
+ if (i->op == OP_PINTERP) {
srcId(i->src(1), 26);
- else
+ addInterp(i->ipa, SDATA(i->src(1)).id, interpApply);
+ } else {
code[0] |= 0x3f << 26;
+ addInterp(i->ipa, 0x3f, interpApply);
+ }
srcId(i->src(0).getIndirect(0), 20);
} else {
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index c8efaf5947a..6a7cb4224f4 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -910,7 +910,7 @@ bool Source::scanSource()
info->bin.tlsSpace += (scan.file_max[TGSI_FILE_TEMPORARY] + 1) * 16;
if (info->io.genUserClip > 0) {
- info->io.clipDistanceMask = (1 << info->io.genUserClip) - 1;
+ info->io.clipDistances = info->io.genUserClip;
const unsigned int nOut = (info->io.genUserClip + 3) / 4;
@@ -919,7 +919,7 @@ bool Source::scanSource()
info->out[i].id = i;
info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
info->out[i].si = n;
- info->out[i].mask = info->io.clipDistanceMask >> (n * 4);
+ info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
}
}
@@ -969,6 +969,12 @@ void Source::scanProperty(const struct tgsi_full_property *prop)
else
info->prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; /* anything but points */
break;
+ case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
+ info->io.clipDistances = prop->u[0].Data;
+ break;
+ case TGSI_PROPERTY_NUM_CULLDIST_ENABLED:
+ info->io.cullDistances = prop->u[0].Data;
+ break;
default:
INFO("unhandled TGSI property %d\n", prop->Property.PropertyName);
break;
@@ -1054,7 +1060,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
default:
break;
}
- if (decl->Interp.Location || info->io.sampleInterp)
+ if (decl->Interp.Location)
info->in[i].centroid = 1;
}
@@ -1086,8 +1092,6 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
clipVertexOutput = i;
break;
case TGSI_SEMANTIC_CLIPDIST:
- info->io.clipDistanceMask |=
- decl->Declaration.UsageMask << (si * 4);
info->io.genUserClip = -1;
break;
case TGSI_SEMANTIC_SAMPLEMASK:
@@ -1119,6 +1123,10 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
case TGSI_SEMANTIC_VERTEXID:
info->io.vertexId = first;
break;
+ case TGSI_SEMANTIC_SAMPLEID:
+ case TGSI_SEMANTIC_SAMPLEPOS:
+ info->prop.fp.sampleInterp = 1;
+ break;
default:
break;
}
@@ -1338,6 +1346,8 @@ private:
void handleINTERP(Value *dst0[4]);
+ uint8_t translateInterpMode(const struct nv50_ir_varying *var,
+ operation& op);
Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr);
void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork);
@@ -1451,8 +1461,8 @@ Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address)
return sym;
}
-static inline uint8_t
-translateInterpMode(const struct nv50_ir_varying *var, operation& op)
+uint8_t
+Converter::translateInterpMode(const struct nv50_ir_varying *var, operation& op)
{
uint8_t mode = NV50_IR_INTERP_PERSPECTIVE;
@@ -1468,7 +1478,7 @@ translateInterpMode(const struct nv50_ir_varying *var, operation& op)
op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC)
? OP_PINTERP : OP_LINTERP;
- if (var->centroid)
+ if (var->centroid || info->prop.fp.sampleInterp)
mode |= NV50_IR_INTERP_CENTROID;
return mode;
@@ -1628,7 +1638,7 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
// don't load masked inputs, won't be assigned a slot
if (!ptr && !(info->in[idx].mask & (1 << swz)))
return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f);
- if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE)
+ if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE)
return mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_FACE, 0));
return interpolate(src, c, shiftAddress(ptr));
} else
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
index fe530c76b62..afc8ff1374f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
@@ -166,7 +166,7 @@ void Target::destroy(Target *targ)
delete targ;
}
-CodeEmitter::CodeEmitter(const Target *target) : targ(target)
+CodeEmitter::CodeEmitter(const Target *target) : targ(target), interpInfo(NULL)
{
}
@@ -388,6 +388,7 @@ Program::emitBinary(struct nv50_ir_prog_info *info)
}
}
info->bin.relocData = emit->getRelocInfo();
+ info->bin.interpData = emit->getInterpInfo();
emitSymbolTable(info);
@@ -428,6 +429,29 @@ CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m,
return true;
}
+bool
+CodeEmitter::addInterp(int ipa, int reg, InterpApply apply)
+{
+ unsigned int n = interpInfo ? interpInfo->count : 0;
+
+ if (!(n % RELOC_ALLOC_INCREMENT)) {
+ size_t size = sizeof(InterpInfo) + n * sizeof(InterpEntry);
+ interpInfo = reinterpret_cast<InterpInfo *>(
+ REALLOC(interpInfo, n ? size : 0,
+ size + RELOC_ALLOC_INCREMENT * sizeof(InterpEntry)));
+ if (!interpInfo)
+ return false;
+ if (n == 0)
+ memset(interpInfo, 0, sizeof(InterpInfo));
+ }
+ ++interpInfo->count;
+
+ interpInfo->entry[n] = InterpEntry(ipa, reg, codeSize >> 2);
+ interpInfo->apply = apply;
+
+ return true;
+}
+
void
RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const
{
@@ -472,6 +496,19 @@ nv50_ir_relocate_code(void *relocData, uint32_t *code,
}
void
+nv50_ir_change_interp(void *interpData, uint32_t *code,
+ bool force_persample_interp, bool flatshade)
+{
+ nv50_ir::InterpInfo *info = reinterpret_cast<nv50_ir::InterpInfo *>(
+ interpData);
+
+ // force_persample_interp: all non-flat -> per-sample
+ // flatshade: all color -> flat
+ for (unsigned i = 0; i < info->count; ++i)
+ info->apply(&info->entry[i], code, force_persample_interp, flatshade);
+}
+
+void
nv50_ir_get_target_library(uint32_t chipset,
const uint32_t **code, uint32_t *size)
{
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
index 591916eb412..4e33997e1c1 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
@@ -58,6 +58,23 @@ struct RelocInfo
RelocEntry entry[0];
};
+struct InterpEntry
+{
+ InterpEntry(int ipa, int reg, int loc) : ipa(ipa), reg(reg), loc(loc) {}
+ uint32_t ipa:4; // SC mode used to identify colors
+ uint32_t reg:8; // The reg used for perspective division
+ uint32_t loc:20; // Let's hope we don't have more than 1M-sized shaders
+};
+
+typedef void (*InterpApply)(const InterpEntry*, uint32_t*, bool, bool);
+
+struct InterpInfo
+{
+ uint32_t count;
+ InterpApply apply;
+ InterpEntry entry[0];
+};
+
class CodeEmitter
{
public:
@@ -78,6 +95,9 @@ public:
inline void *getRelocInfo() const { return relocInfo; }
+ bool addInterp(int ipa, int reg, InterpApply apply);
+ inline void *getInterpInfo() const { return interpInfo; }
+
virtual void prepareEmission(Program *);
virtual void prepareEmission(Function *);
virtual void prepareEmission(BasicBlock *);
@@ -92,6 +112,7 @@ protected:
uint32_t codeSizeLimit;
RelocInfo *relocInfo;
+ InterpInfo *interpInfo;
};
diff --git a/src/gallium/drivers/nouveau/nouveau_heap.c b/src/gallium/drivers/nouveau/nouveau_heap.c
index f4aa5081dfe..3d415a5f30e 100644
--- a/src/gallium/drivers/nouveau/nouveau_heap.c
+++ b/src/gallium/drivers/nouveau/nouveau_heap.c
@@ -29,95 +29,95 @@ int
nouveau_heap_init(struct nouveau_heap **heap,
unsigned start, unsigned size)
{
- struct nouveau_heap *r;
+ struct nouveau_heap *r;
- r = calloc(1, sizeof(struct nouveau_heap));
- if (!r)
- return 1;
+ r = calloc(1, sizeof(struct nouveau_heap));
+ if (!r)
+ return 1;
- r->start = start;
- r->size = size;
- *heap = r;
- return 0;
+ r->start = start;
+ r->size = size;
+ *heap = r;
+ return 0;
}
void
nouveau_heap_destroy(struct nouveau_heap **heap)
{
- if (!*heap)
- return;
- free(*heap);
- *heap = NULL;
+ if (!*heap)
+ return;
+ free(*heap);
+ *heap = NULL;
}
int
nouveau_heap_alloc(struct nouveau_heap *heap, unsigned size, void *priv,
struct nouveau_heap **res)
{
- struct nouveau_heap *r;
+ struct nouveau_heap *r;
- if (!heap || !size || !res || *res)
- return 1;
+ if (!heap || !size || !res || *res)
+ return 1;
- while (heap) {
- if (!heap->in_use && heap->size >= size) {
- r = calloc(1, sizeof(struct nouveau_heap));
- if (!r)
- return 1;
+ while (heap) {
+ if (!heap->in_use && heap->size >= size) {
+ r = calloc(1, sizeof(struct nouveau_heap));
+ if (!r)
+ return 1;
- r->start = (heap->start + heap->size) - size;
- r->size = size;
- r->in_use = 1;
- r->priv = priv;
+ r->start = (heap->start + heap->size) - size;
+ r->size = size;
+ r->in_use = 1;
+ r->priv = priv;
- heap->size -= size;
+ heap->size -= size;
- r->next = heap->next;
- if (heap->next)
- heap->next->prev = r;
- r->prev = heap;
- heap->next = r;
+ r->next = heap->next;
+ if (heap->next)
+ heap->next->prev = r;
+ r->prev = heap;
+ heap->next = r;
- *res = r;
- return 0;
- }
+ *res = r;
+ return 0;
+ }
- heap = heap->next;
- }
+ heap = heap->next;
+ }
- return 1;
+ return 1;
}
void
nouveau_heap_free(struct nouveau_heap **res)
{
- struct nouveau_heap *r;
-
- if (!res || !*res)
- return;
- r = *res;
- *res = NULL;
-
- r->in_use = 0;
-
- if (r->next && !r->next->in_use) {
- struct nouveau_heap *new = r->next;
-
- new->prev = r->prev;
- if (r->prev)
- r->prev->next = new;
- new->size += r->size;
- new->start = r->start;
-
- free(r);
- r = new;
- }
-
- if (r->prev && !r->prev->in_use) {
- r->prev->next = r->next;
- if (r->next)
- r->next->prev = r->prev;
- r->prev->size += r->size;
- free(r);
- }
+ struct nouveau_heap *r;
+
+ if (!res || !*res)
+ return;
+ r = *res;
+ *res = NULL;
+
+ r->in_use = 0;
+
+ if (r->next && !r->next->in_use) {
+ struct nouveau_heap *new = r->next;
+
+ new->prev = r->prev;
+ if (r->prev)
+ r->prev->next = new;
+ new->size += r->size;
+ new->start = r->start;
+
+ free(r);
+ r = new;
+ }
+
+ if (r->prev && !r->prev->in_use) {
+ r->prev->next = r->next;
+ if (r->next)
+ r->next->prev = r->prev;
+ r->prev->size += r->size;
+ free(r);
+ }
}
diff --git a/src/gallium/drivers/nouveau/nouveau_heap.h b/src/gallium/drivers/nouveau/nouveau_heap.h
index a3d64a65623..99f610ed4c8 100644
--- a/src/gallium/drivers/nouveau/nouveau_heap.h
+++ b/src/gallium/drivers/nouveau/nouveau_heap.h
@@ -44,15 +44,15 @@
* full size of the heap.
*/
struct nouveau_heap {
- struct nouveau_heap *prev;
- struct nouveau_heap *next;
+ struct nouveau_heap *prev;
+ struct nouveau_heap *next;
- void *priv;
+ void *priv;
- unsigned start;
- unsigned size;
+ unsigned start;
+ unsigned size;
- int in_use;
+ int in_use;
};
int
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
index b2290e7e784..47603b0b7fd 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -30,211 +30,211 @@ int nouveau_mesa_debug = 0;
static const char *
nouveau_screen_get_name(struct pipe_screen *pscreen)
{
- struct nouveau_device *dev = nouveau_screen(pscreen)->device;
- static char buffer[128];
+ struct nouveau_device *dev = nouveau_screen(pscreen)->device;
+ static char buffer[128];
- util_snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset);
- return buffer;
+ util_snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset);
+ return buffer;
}
static const char *
nouveau_screen_get_vendor(struct pipe_screen *pscreen)
{
- return "nouveau";
+ return "nouveau";
}
static const char *
nouveau_screen_get_device_vendor(struct pipe_screen *pscreen)
{
- return "NVIDIA";
+ return "NVIDIA";
}
static uint64_t
nouveau_screen_get_timestamp(struct pipe_screen *pscreen)
{
- int64_t cpu_time = os_time_get() * 1000;
+ int64_t cpu_time = os_time_get() * 1000;
- /* getparam of PTIMER_TIME takes about x10 as long (several usecs) */
+ /* getparam of PTIMER_TIME takes about x10 as long (several usecs) */
- return cpu_time + nouveau_screen(pscreen)->cpu_gpu_time_delta;
+ return cpu_time + nouveau_screen(pscreen)->cpu_gpu_time_delta;
}
static void
nouveau_screen_fence_ref(struct pipe_screen *pscreen,
- struct pipe_fence_handle **ptr,
- struct pipe_fence_handle *pfence)
+ struct pipe_fence_handle **ptr,
+ struct pipe_fence_handle *pfence)
{
- nouveau_fence_ref(nouveau_fence(pfence), (struct nouveau_fence **)ptr);
+ nouveau_fence_ref(nouveau_fence(pfence), (struct nouveau_fence **)ptr);
}
static boolean
nouveau_screen_fence_finish(struct pipe_screen *screen,
- struct pipe_fence_handle *pfence,
+ struct pipe_fence_handle *pfence,
uint64_t timeout)
{
- if (!timeout)
- return nouveau_fence_signalled(nouveau_fence(pfence));
+ if (!timeout)
+ return nouveau_fence_signalled(nouveau_fence(pfence));
- return nouveau_fence_wait(nouveau_fence(pfence));
+ return nouveau_fence_wait(nouveau_fence(pfence));
}
struct nouveau_bo *
nouveau_screen_bo_from_handle(struct pipe_screen *pscreen,
- struct winsys_handle *whandle,
- unsigned *out_stride)
+ struct winsys_handle *whandle,
+ unsigned *out_stride)
{
- struct nouveau_device *dev = nouveau_screen(pscreen)->device;
- struct nouveau_bo *bo = 0;
- int ret;
-
- if (whandle->type != DRM_API_HANDLE_TYPE_SHARED &&
- whandle->type != DRM_API_HANDLE_TYPE_FD) {
- debug_printf("%s: attempt to import unsupported handle type %d\n",
- __FUNCTION__, whandle->type);
- return NULL;
- }
-
- if (whandle->type == DRM_API_HANDLE_TYPE_SHARED)
- ret = nouveau_bo_name_ref(dev, whandle->handle, &bo);
- else
- ret = nouveau_bo_prime_handle_ref(dev, whandle->handle, &bo);
-
- if (ret) {
- debug_printf("%s: ref name 0x%08x failed with %d\n",
- __FUNCTION__, whandle->handle, ret);
- return NULL;
- }
-
- *out_stride = whandle->stride;
- return bo;
+ struct nouveau_device *dev = nouveau_screen(pscreen)->device;
+ struct nouveau_bo *bo = 0;
+ int ret;
+
+ if (whandle->type != DRM_API_HANDLE_TYPE_SHARED &&
+ whandle->type != DRM_API_HANDLE_TYPE_FD) {
+ debug_printf("%s: attempt to import unsupported handle type %d\n",
+ __FUNCTION__, whandle->type);
+ return NULL;
+ }
+
+ if (whandle->type == DRM_API_HANDLE_TYPE_SHARED)
+ ret = nouveau_bo_name_ref(dev, whandle->handle, &bo);
+ else
+ ret = nouveau_bo_prime_handle_ref(dev, whandle->handle, &bo);
+
+ if (ret) {
+ debug_printf("%s: ref name 0x%08x failed with %d\n",
+ __FUNCTION__, whandle->handle, ret);
+ return NULL;
+ }
+
+ *out_stride = whandle->stride;
+ return bo;
}
bool
nouveau_screen_bo_get_handle(struct pipe_screen *pscreen,
- struct nouveau_bo *bo,
- unsigned stride,
- struct winsys_handle *whandle)
+ struct nouveau_bo *bo,
+ unsigned stride,
+ struct winsys_handle *whandle)
{
- whandle->stride = stride;
-
- if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
- return nouveau_bo_name_get(bo, &whandle->handle) == 0;
- } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) {
- whandle->handle = bo->handle;
- return true;
- } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
- return nouveau_bo_set_prime(bo, (int *)&whandle->handle) == 0;
- } else {
- return false;
- }
+ whandle->stride = stride;
+
+ if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
+ return nouveau_bo_name_get(bo, &whandle->handle) == 0;
+ } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) {
+ whandle->handle = bo->handle;
+ return true;
+ } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
+ return nouveau_bo_set_prime(bo, (int *)&whandle->handle) == 0;
+ } else {
+ return false;
+ }
}
int
nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
{
- struct pipe_screen *pscreen = &screen->base;
- struct nv04_fifo nv04_data = { .vram = 0xbeef0201, .gart = 0xbeef0202 };
- struct nvc0_fifo nvc0_data = { };
- uint64_t time;
- int size, ret;
- void *data;
- union nouveau_bo_config mm_config;
-
- char *nv_dbg = getenv("NOUVEAU_MESA_DEBUG");
- if (nv_dbg)
- nouveau_mesa_debug = atoi(nv_dbg);
-
- /*
- * this is initialized to 1 in nouveau_drm_screen_create after screen
- * is fully constructed and added to the global screen list.
- */
- screen->refcount = -1;
-
- if (dev->chipset < 0xc0) {
- data = &nv04_data;
- size = sizeof(nv04_data);
- } else {
- data = &nvc0_data;
- size = sizeof(nvc0_data);
- }
-
- /*
- * Set default VRAM domain if not overridden
- */
- if (!screen->vram_domain) {
- if (dev->vram_size > 0)
- screen->vram_domain = NOUVEAU_BO_VRAM;
- else
- screen->vram_domain = NOUVEAU_BO_GART;
- }
-
- ret = nouveau_object_new(&dev->object, 0, NOUVEAU_FIFO_CHANNEL_CLASS,
- data, size, &screen->channel);
- if (ret)
- return ret;
- screen->device = dev;
-
- ret = nouveau_client_new(screen->device, &screen->client);
- if (ret)
- return ret;
- ret = nouveau_pushbuf_new(screen->client, screen->channel,
- 4, 512 * 1024, 1,
- &screen->pushbuf);
- if (ret)
- return ret;
-
- /* getting CPU time first appears to be more accurate */
- screen->cpu_gpu_time_delta = os_time_get();
-
- ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_PTIMER_TIME, &time);
- if (!ret)
- screen->cpu_gpu_time_delta = time - screen->cpu_gpu_time_delta * 1000;
-
- pscreen->get_name = nouveau_screen_get_name;
- pscreen->get_vendor = nouveau_screen_get_vendor;
- pscreen->get_device_vendor = nouveau_screen_get_device_vendor;
-
- pscreen->get_timestamp = nouveau_screen_get_timestamp;
-
- pscreen->fence_reference = nouveau_screen_fence_ref;
- pscreen->fence_finish = nouveau_screen_fence_finish;
-
- util_format_s3tc_init();
-
- screen->lowmem_bindings = PIPE_BIND_GLOBAL; /* gallium limit */
- screen->vidmem_bindings =
- PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL |
- PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT |
- PIPE_BIND_CURSOR |
- PIPE_BIND_SAMPLER_VIEW |
- PIPE_BIND_SHADER_BUFFER | PIPE_BIND_SHADER_IMAGE |
- PIPE_BIND_COMPUTE_RESOURCE |
- PIPE_BIND_GLOBAL;
- screen->sysmem_bindings =
- PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_STREAM_OUTPUT |
- PIPE_BIND_COMMAND_ARGS_BUFFER;
-
- memset(&mm_config, 0, sizeof(mm_config));
-
- screen->mm_GART = nouveau_mm_create(dev,
- NOUVEAU_BO_GART | NOUVEAU_BO_MAP,
- &mm_config);
- screen->mm_VRAM = nouveau_mm_create(dev, NOUVEAU_BO_VRAM, &mm_config);
- return 0;
+ struct pipe_screen *pscreen = &screen->base;
+ struct nv04_fifo nv04_data = { .vram = 0xbeef0201, .gart = 0xbeef0202 };
+ struct nvc0_fifo nvc0_data = { };
+ uint64_t time;
+ int size, ret;
+ void *data;
+ union nouveau_bo_config mm_config;
+
+ char *nv_dbg = getenv("NOUVEAU_MESA_DEBUG");
+ if (nv_dbg)
+ nouveau_mesa_debug = atoi(nv_dbg);
+
+ /*
+ * this is initialized to 1 in nouveau_drm_screen_create after screen
+ * is fully constructed and added to the global screen list.
+ */
+ screen->refcount = -1;
+
+ if (dev->chipset < 0xc0) {
+ data = &nv04_data;
+ size = sizeof(nv04_data);
+ } else {
+ data = &nvc0_data;
+ size = sizeof(nvc0_data);
+ }
+
+ /*
+ * Set default VRAM domain if not overridden
+ */
+ if (!screen->vram_domain) {
+ if (dev->vram_size > 0)
+ screen->vram_domain = NOUVEAU_BO_VRAM;
+ else
+ screen->vram_domain = NOUVEAU_BO_GART;
+ }
+
+ ret = nouveau_object_new(&dev->object, 0, NOUVEAU_FIFO_CHANNEL_CLASS,
+ data, size, &screen->channel);
+ if (ret)
+ return ret;
+ screen->device = dev;
+
+ ret = nouveau_client_new(screen->device, &screen->client);
+ if (ret)
+ return ret;
+ ret = nouveau_pushbuf_new(screen->client, screen->channel,
+ 4, 512 * 1024, 1,
+ &screen->pushbuf);
+ if (ret)
+ return ret;
+
+ /* getting CPU time first appears to be more accurate */
+ screen->cpu_gpu_time_delta = os_time_get();
+
+ ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_PTIMER_TIME, &time);
+ if (!ret)
+ screen->cpu_gpu_time_delta = time - screen->cpu_gpu_time_delta * 1000;
+
+ pscreen->get_name = nouveau_screen_get_name;
+ pscreen->get_vendor = nouveau_screen_get_vendor;
+ pscreen->get_device_vendor = nouveau_screen_get_device_vendor;
+
+ pscreen->get_timestamp = nouveau_screen_get_timestamp;
+
+ pscreen->fence_reference = nouveau_screen_fence_ref;
+ pscreen->fence_finish = nouveau_screen_fence_finish;
+
+ util_format_s3tc_init();
+
+ screen->lowmem_bindings = PIPE_BIND_GLOBAL; /* gallium limit */
+ screen->vidmem_bindings =
+ PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL |
+ PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT |
+ PIPE_BIND_CURSOR |
+ PIPE_BIND_SAMPLER_VIEW |
+ PIPE_BIND_SHADER_BUFFER | PIPE_BIND_SHADER_IMAGE |
+ PIPE_BIND_COMPUTE_RESOURCE |
+ PIPE_BIND_GLOBAL;
+ screen->sysmem_bindings =
+ PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_STREAM_OUTPUT |
+ PIPE_BIND_COMMAND_ARGS_BUFFER;
+
+ memset(&mm_config, 0, sizeof(mm_config));
+
+ screen->mm_GART = nouveau_mm_create(dev,
+ NOUVEAU_BO_GART | NOUVEAU_BO_MAP,
+ &mm_config);
+ screen->mm_VRAM = nouveau_mm_create(dev, NOUVEAU_BO_VRAM, &mm_config);
+ return 0;
}
void
nouveau_screen_fini(struct nouveau_screen *screen)
{
- nouveau_mm_destroy(screen->mm_GART);
- nouveau_mm_destroy(screen->mm_VRAM);
+ nouveau_mm_destroy(screen->mm_GART);
+ nouveau_mm_destroy(screen->mm_VRAM);
- nouveau_pushbuf_del(&screen->pushbuf);
+ nouveau_pushbuf_del(&screen->pushbuf);
- nouveau_client_del(&screen->client);
- nouveau_object_del(&screen->channel);
+ nouveau_client_del(&screen->client);
+ nouveau_object_del(&screen->channel);
- nouveau_device_del(&screen->device);
+ nouveau_device_del(&screen->device);
}
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h
index 4fdde9fbf3d..328646fe3ce 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.h
+++ b/src/gallium/drivers/nouveau/nouveau_screen.h
@@ -16,47 +16,47 @@ extern int nouveau_mesa_debug;
struct nouveau_bo;
struct nouveau_screen {
- struct pipe_screen base;
- struct nouveau_device *device;
- struct nouveau_object *channel;
- struct nouveau_client *client;
- struct nouveau_pushbuf *pushbuf;
+ struct pipe_screen base;
+ struct nouveau_device *device;
+ struct nouveau_object *channel;
+ struct nouveau_client *client;
+ struct nouveau_pushbuf *pushbuf;
- int refcount;
+ int refcount;
- unsigned vidmem_bindings; /* PIPE_BIND_* where VRAM placement is desired */
- unsigned sysmem_bindings; /* PIPE_BIND_* where GART placement is desired */
- unsigned lowmem_bindings; /* PIPE_BIND_* that require an address < 4 GiB */
- /*
- * For bindings with (vidmem & sysmem) bits set, PIPE_USAGE_* decides
- * placement.
- */
+ unsigned vidmem_bindings; /* PIPE_BIND_* where VRAM placement is desired */
+ unsigned sysmem_bindings; /* PIPE_BIND_* where GART placement is desired */
+ unsigned lowmem_bindings; /* PIPE_BIND_* that require an address < 4 GiB */
+ /*
+ * For bindings with (vidmem & sysmem) bits set, PIPE_USAGE_* decides
+ * placement.
+ */
- uint16_t class_3d;
+ uint16_t class_3d;
- struct {
- struct nouveau_fence *head;
- struct nouveau_fence *tail;
- struct nouveau_fence *current;
- u32 sequence;
- u32 sequence_ack;
- void (*emit)(struct pipe_screen *, u32 *sequence);
- u32 (*update)(struct pipe_screen *);
- } fence;
+ struct {
+ struct nouveau_fence *head;
+ struct nouveau_fence *tail;
+ struct nouveau_fence *current;
+ u32 sequence;
+ u32 sequence_ack;
+ void (*emit)(struct pipe_screen *, u32 *sequence);
+ u32 (*update)(struct pipe_screen *);
+ } fence;
- struct nouveau_mman *mm_VRAM;
- struct nouveau_mman *mm_GART;
+ struct nouveau_mman *mm_VRAM;
+ struct nouveau_mman *mm_GART;
- int64_t cpu_gpu_time_delta;
+ int64_t cpu_gpu_time_delta;
- bool hint_buf_keep_sysmem_copy;
+ bool hint_buf_keep_sysmem_copy;
- unsigned vram_domain;
+ unsigned vram_domain;
- struct {
- unsigned profiles_checked;
- unsigned profiles_present;
- } firmware_info;
+ struct {
+ unsigned profiles_checked;
+ unsigned profiles_present;
+ } firmware_info;
#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
union {
@@ -100,10 +100,10 @@ struct nouveau_screen {
#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
# define NOUVEAU_DRV_STAT(s, n, v) do { \
- (s)->stats.named.n += (v); \
+ (s)->stats.named.n += (v); \
} while(0)
-# define NOUVEAU_DRV_STAT_RES(r, n, v) do { \
- nouveau_screen((r)->base.screen)->stats.named.n += (v); \
+# define NOUVEAU_DRV_STAT_RES(r, n, v) do { \
+ nouveau_screen((r)->base.screen)->stats.named.n += (v); \
} while(0)
# define NOUVEAU_DRV_STAT_IFD(x) x
#else
@@ -115,20 +115,20 @@ struct nouveau_screen {
static inline struct nouveau_screen *
nouveau_screen(struct pipe_screen *pscreen)
{
- return (struct nouveau_screen *)pscreen;
+ return (struct nouveau_screen *)pscreen;
}
bool nouveau_drm_screen_unref(struct nouveau_screen *screen);
bool
nouveau_screen_bo_get_handle(struct pipe_screen *pscreen,
- struct nouveau_bo *bo,
- unsigned stride,
- struct winsys_handle *whandle);
+ struct nouveau_bo *bo,
+ unsigned stride,
+ struct winsys_handle *whandle);
struct nouveau_bo *
nouveau_screen_bo_from_handle(struct pipe_screen *pscreen,
- struct winsys_handle *whandle,
- unsigned *out_stride);
+ struct winsys_handle *whandle,
+ unsigned *out_stride);
int nouveau_screen_init(struct nouveau_screen *, struct nouveau_device *);
diff --git a/src/gallium/drivers/nouveau/nouveau_statebuf.h b/src/gallium/drivers/nouveau/nouveau_statebuf.h
index f38014091ba..da5d7972d9c 100644
--- a/src/gallium/drivers/nouveau/nouveau_statebuf.h
+++ b/src/gallium/drivers/nouveau/nouveau_statebuf.h
@@ -6,9 +6,9 @@
struct nouveau_statebuf_builder
{
- uint32_t* p;
+ uint32_t* p;
#ifdef DEBUG
- uint32_t* pend;
+ uint32_t* pend;
#endif
};
@@ -22,7 +22,7 @@ struct nouveau_statebuf_builder
static inline uint32_t sb_header(unsigned subc, unsigned mthd, unsigned size)
{
- return (size << 18) | (subc << 13) | mthd;
+ return (size << 18) | (subc << 13) | mthd;
}
#define sb_method(sb, v, n) sb_data(sb, sb_header(SUBC_3D(v), n));
diff --git a/src/gallium/drivers/nouveau/nouveau_video.c b/src/gallium/drivers/nouveau/nouveau_video.c
index e414a534418..8bb12b22ac1 100644
--- a/src/gallium/drivers/nouveau/nouveau_video.c
+++ b/src/gallium/drivers/nouveau/nouveau_video.c
@@ -831,7 +831,7 @@ error:
static int
nouveau_screen_get_video_param(struct pipe_screen *pscreen,
enum pipe_video_profile profile,
- enum pipe_video_entrypoint entrypoint,
+ enum pipe_video_entrypoint entrypoint,
enum pipe_video_cap param)
{
switch (param) {
diff --git a/src/gallium/drivers/nouveau/nouveau_video.h b/src/gallium/drivers/nouveau/nouveau_video.h
index fd1bd527deb..3ef6f89ce28 100644
--- a/src/gallium/drivers/nouveau/nouveau_video.h
+++ b/src/gallium/drivers/nouveau/nouveau_video.h
@@ -83,7 +83,7 @@ BEGIN_NI04(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
static inline void
PUSH_MTHDl(struct nouveau_pushbuf *push, int subc, int mthd,
struct nouveau_bo *bo, uint32_t offset,
- struct nouveau_bufctx *ctx, int bin, uint32_t rw)
+ struct nouveau_bufctx *ctx, int bin, uint32_t rw)
{
nouveau_bufctx_mthd(ctx, bin, NV04_FIFO_PKHDR(subc, mthd, 1),
bo, offset,
diff --git a/src/gallium/drivers/nouveau/nouveau_vp3_video.h b/src/gallium/drivers/nouveau/nouveau_vp3_video.h
index 33e3bef3df3..58df5ee847f 100644
--- a/src/gallium/drivers/nouveau/nouveau_vp3_video.h
+++ b/src/gallium/drivers/nouveau/nouveau_vp3_video.h
@@ -117,22 +117,22 @@ struct nouveau_vp3_decoder {
};
struct comm {
- uint32_t bsp_cur_index; // 000
- uint32_t byte_ofs; // 004
- uint32_t status[0x10]; // 008
- uint32_t pos[0x10]; // 048
- uint8_t pad[0x100 - 0x88]; // 0a0 bool comm_encrypted
-
- uint32_t pvp_cur_index; // 100
- uint32_t acked_byte_ofs; // 104
- uint32_t status_vp[0x10]; // 108
- uint16_t mb_y[0x10]; //148
- uint32_t pvp_stage; // 168 0xeeXX
- uint16_t parse_endpos_index; // 16c
- uint16_t irq_index; // 16e
- uint8_t irq_470[0x10]; // 170
- uint32_t irq_pos[0x10]; // 180
- uint32_t parse_endpos[0x10]; // 1c0
+ uint32_t bsp_cur_index; // 000
+ uint32_t byte_ofs; // 004
+ uint32_t status[0x10]; // 008
+ uint32_t pos[0x10]; // 048
+ uint8_t pad[0x100 - 0x88]; // 0a0 bool comm_encrypted
+
+ uint32_t pvp_cur_index; // 100
+ uint32_t acked_byte_ofs; // 104
+ uint32_t status_vp[0x10]; // 108
+ uint16_t mb_y[0x10]; //148
+ uint32_t pvp_stage; // 168 0xeeXX
+ uint16_t parse_endpos_index; // 16c
+ uint16_t irq_index; // 16e
+ uint8_t irq_470[0x10]; // 170
+ uint32_t irq_pos[0x10]; // 180
+ uint32_t parse_endpos[0x10]; // 1c0
};
static inline uint32_t nouveau_vp3_video_align(uint32_t h)
diff --git a/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c b/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c
index 6d968c18399..692772e49d1 100644
--- a/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c
+++ b/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c
@@ -23,90 +23,90 @@
#include "nouveau_vp3_video.h"
struct strparm_bsp {
- uint32_t w0[4]; // bits 0-23 length, bits 24-31 addr_hi
- uint32_t w1[4]; // bit 8-24 addr_lo
- uint32_t unk20; // should be idx * 0x8000000, bitstream offset
- uint32_t do_crypto_crap; // set to 0
+ uint32_t w0[4]; // bits 0-23 length, bits 24-31 addr_hi
+ uint32_t w1[4]; // bit 8-24 addr_lo
+ uint32_t unk20; // should be idx * 0x8000000, bitstream offset
+ uint32_t do_crypto_crap; // set to 0
};
struct mpeg12_picparm_bsp {
- uint16_t width;
- uint16_t height;
- uint8_t picture_structure;
- uint8_t picture_coding_type;
- uint8_t intra_dc_precision;
- uint8_t frame_pred_frame_dct;
- uint8_t concealment_motion_vectors;
- uint8_t intra_vlc_format;
- uint16_t pad;
- uint8_t f_code[2][2];
+ uint16_t width;
+ uint16_t height;
+ uint8_t picture_structure;
+ uint8_t picture_coding_type;
+ uint8_t intra_dc_precision;
+ uint8_t frame_pred_frame_dct;
+ uint8_t concealment_motion_vectors;
+ uint8_t intra_vlc_format;
+ uint16_t pad;
+ uint8_t f_code[2][2];
};
struct mpeg4_picparm_bsp {
- uint16_t width;
- uint16_t height;
- uint8_t vop_time_increment_size;
- uint8_t interlaced;
- uint8_t resync_marker_disable;
+ uint16_t width;
+ uint16_t height;
+ uint8_t vop_time_increment_size;
+ uint8_t interlaced;
+ uint8_t resync_marker_disable;
};
struct vc1_picparm_bsp {
- uint16_t width;
- uint16_t height;
- uint8_t profile; // 04 0 simple, 1 main, 2 advanced
- uint8_t postprocflag; // 05
- uint8_t pulldown; // 06
- uint8_t interlaced; // 07
- uint8_t tfcntrflag; // 08
- uint8_t finterpflag; // 09
- uint8_t psf; // 0a
- uint8_t pad; // 0b
- uint8_t multires; // 0c
- uint8_t syncmarker; // 0d
- uint8_t rangered; // 0e
- uint8_t maxbframes; // 0f
- uint8_t dquant; // 10
- uint8_t panscan_flag; // 11
- uint8_t refdist_flag; // 12
- uint8_t quantizer; // 13
- uint8_t extended_mv; // 14
- uint8_t extended_dmv; // 15
- uint8_t overlap; // 16
- uint8_t vstransform; // 17
+ uint16_t width;
+ uint16_t height;
+ uint8_t profile; // 04 0 simple, 1 main, 2 advanced
+ uint8_t postprocflag; // 05
+ uint8_t pulldown; // 06
+ uint8_t interlaced; // 07
+ uint8_t tfcntrflag; // 08
+ uint8_t finterpflag; // 09
+ uint8_t psf; // 0a
+ uint8_t pad; // 0b
+ uint8_t multires; // 0c
+ uint8_t syncmarker; // 0d
+ uint8_t rangered; // 0e
+ uint8_t maxbframes; // 0f
+ uint8_t dquant; // 10
+ uint8_t panscan_flag; // 11
+ uint8_t refdist_flag; // 12
+ uint8_t quantizer; // 13
+ uint8_t extended_mv; // 14
+ uint8_t extended_dmv; // 15
+ uint8_t overlap; // 16
+ uint8_t vstransform; // 17
};
struct h264_picparm_bsp {
- // 00
- uint32_t unk00;
- // 04
- uint32_t log2_max_frame_num_minus4; // 04 checked
- uint32_t pic_order_cnt_type; // 08 checked
- uint32_t log2_max_pic_order_cnt_lsb_minus4; // 0c checked
- uint32_t delta_pic_order_always_zero_flag; // 10, or unknown
+ // 00
+ uint32_t unk00;
+ // 04
+ uint32_t log2_max_frame_num_minus4; // 04 checked
+ uint32_t pic_order_cnt_type; // 08 checked
+ uint32_t log2_max_pic_order_cnt_lsb_minus4; // 0c checked
+ uint32_t delta_pic_order_always_zero_flag; // 10, or unknown
- uint32_t frame_mbs_only_flag; // 14, always 1?
- uint32_t direct_8x8_inference_flag; // 18, always 1?
- uint32_t width_mb; // 1c checked
- uint32_t height_mb; // 20 checked
- // 24
- //struct picparm2
- uint32_t entropy_coding_mode_flag; // 00, checked
- uint32_t pic_order_present_flag; // 04 checked
- uint32_t unk; // 08 seems to be 0?
- uint32_t pad1; // 0c seems to be 0?
- uint32_t pad2; // 10 always 0 ?
- uint32_t num_ref_idx_l0_active_minus1; // 14 always 0?
- uint32_t num_ref_idx_l1_active_minus1; // 18 always 0?
- uint32_t weighted_pred_flag; // 1c checked
- uint32_t weighted_bipred_idc; // 20 checked
- uint32_t pic_init_qp_minus26; // 24 checked
- uint32_t deblocking_filter_control_present_flag; // 28 always 1?
- uint32_t redundant_pic_cnt_present_flag; // 2c always 0?
- uint32_t transform_8x8_mode_flag; // 30 checked
- uint32_t mb_adaptive_frame_field_flag; // 34 checked-ish
- uint8_t field_pic_flag; // 38 checked
- uint8_t bottom_field_flag; // 39 checked
- uint8_t real_pad[0x1b]; // XX why?
+ uint32_t frame_mbs_only_flag; // 14, always 1?
+ uint32_t direct_8x8_inference_flag; // 18, always 1?
+ uint32_t width_mb; // 1c checked
+ uint32_t height_mb; // 20 checked
+ // 24
+ //struct picparm2
+ uint32_t entropy_coding_mode_flag; // 00, checked
+ uint32_t pic_order_present_flag; // 04 checked
+ uint32_t unk; // 08 seems to be 0?
+ uint32_t pad1; // 0c seems to be 0?
+ uint32_t pad2; // 10 always 0 ?
+ uint32_t num_ref_idx_l0_active_minus1; // 14 always 0?
+ uint32_t num_ref_idx_l1_active_minus1; // 18 always 0?
+ uint32_t weighted_pred_flag; // 1c checked
+ uint32_t weighted_bipred_idc; // 20 checked
+ uint32_t pic_init_qp_minus26; // 24 checked
+ uint32_t deblocking_filter_control_present_flag; // 28 always 1?
+ uint32_t redundant_pic_cnt_present_flag; // 2c always 0?
+ uint32_t transform_8x8_mode_flag; // 30 checked
+ uint32_t mb_adaptive_frame_field_flag; // 34 checked-ish
+ uint8_t field_pic_flag; // 38 checked
+ uint8_t bottom_field_flag; // 39 checked
+ uint8_t real_pad[0x1b]; // XX why?
};
static uint32_t
diff --git a/src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c b/src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c
index 25283b79952..53f5db0003d 100644
--- a/src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c
+++ b/src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c
@@ -23,147 +23,147 @@
#include "nouveau_vp3_video.h"
struct mpeg12_picparm_vp {
- uint16_t width; // 00 in mb units
- uint16_t height; // 02 in mb units
-
- uint32_t unk04; // 04 stride for Y?
- uint32_t unk08; // 08 stride for CbCr?
-
- uint32_t ofs[6]; // 1c..20 ofs
- uint32_t bucket_size; // 24
- uint32_t inter_ring_data_size; // 28
- uint16_t unk2c; // 2c
- uint16_t alternate_scan; // 2e
- uint16_t unk30; // 30 not seen set yet
- uint16_t picture_structure; // 32
- uint16_t pad2[3];
- uint16_t unk3a; // 3a set on I frame?
-
- uint32_t f_code[4]; // 3c
- uint32_t picture_coding_type; // 4c
- uint32_t intra_dc_precision; // 50
- uint32_t q_scale_type; // 54
- uint32_t top_field_first; // 58
- uint32_t full_pel_forward_vector; // 5c
- uint32_t full_pel_backward_vector; // 60
- uint8_t intra_quantizer_matrix[0x40]; // 64
- uint8_t non_intra_quantizer_matrix[0x40]; // a4
+ uint16_t width; // 00 in mb units
+ uint16_t height; // 02 in mb units
+
+ uint32_t unk04; // 04 stride for Y?
+ uint32_t unk08; // 08 stride for CbCr?
+
+ uint32_t ofs[6]; // 1c..20 ofs
+ uint32_t bucket_size; // 24
+ uint32_t inter_ring_data_size; // 28
+ uint16_t unk2c; // 2c
+ uint16_t alternate_scan; // 2e
+ uint16_t unk30; // 30 not seen set yet
+ uint16_t picture_structure; // 32
+ uint16_t pad2[3];
+ uint16_t unk3a; // 3a set on I frame?
+
+ uint32_t f_code[4]; // 3c
+ uint32_t picture_coding_type; // 4c
+ uint32_t intra_dc_precision; // 50
+ uint32_t q_scale_type; // 54
+ uint32_t top_field_first; // 58
+ uint32_t full_pel_forward_vector; // 5c
+ uint32_t full_pel_backward_vector; // 60
+ uint8_t intra_quantizer_matrix[0x40]; // 64
+ uint8_t non_intra_quantizer_matrix[0x40]; // a4
};
struct mpeg4_picparm_vp {
- uint32_t width; // 00 in normal units
- uint32_t height; // 04 in normal units
- uint32_t unk08; // stride 1
- uint32_t unk0c; // stride 2
- uint32_t ofs[6]; // 10..24 ofs
- uint32_t bucket_size; // 28
- uint32_t pad1; // 2c, pad
- uint32_t pad2; // 30
- uint32_t inter_ring_data_size; // 34
-
- uint32_t trd[2]; // 38, 3c
- uint32_t trb[2]; // 40, 44
- uint32_t u48; // XXX codec selection? Should test with different values of VdpDecoderProfile
- uint16_t f_code_fw; // 4c
- uint16_t f_code_bw; // 4e
- uint8_t interlaced; // 50
-
- uint8_t quant_type; // bool, written to 528
- uint8_t quarter_sample; // bool, written to 548
- uint8_t short_video_header; // bool, negated written to 528 shifted by 1
- uint8_t u54; // bool, written to 0x740
- uint8_t vop_coding_type; // 55
- uint8_t rounding_control; // 56
- uint8_t alternate_vertical_scan_flag; // 57 bool
- uint8_t top_field_first; // bool, written to vuc
-
- uint8_t pad4[3]; // 59, 5a, 5b, contains garbage on blob
-
- uint32_t intra[0x10]; // 5c
- uint32_t non_intra[0x10]; // 9c
- uint32_t pad5[0x10]; // bc what does this do?
- // udc..uff pad?
+ uint32_t width; // 00 in normal units
+ uint32_t height; // 04 in normal units
+ uint32_t unk08; // stride 1
+ uint32_t unk0c; // stride 2
+ uint32_t ofs[6]; // 10..24 ofs
+ uint32_t bucket_size; // 28
+ uint32_t pad1; // 2c, pad
+ uint32_t pad2; // 30
+ uint32_t inter_ring_data_size; // 34
+
+ uint32_t trd[2]; // 38, 3c
+ uint32_t trb[2]; // 40, 44
+ uint32_t u48; // XXX codec selection? Should test with different values of VdpDecoderProfile
+ uint16_t f_code_fw; // 4c
+ uint16_t f_code_bw; // 4e
+ uint8_t interlaced; // 50
+
+ uint8_t quant_type; // bool, written to 528
+ uint8_t quarter_sample; // bool, written to 548
+ uint8_t short_video_header; // bool, negated written to 528 shifted by 1
+ uint8_t u54; // bool, written to 0x740
+ uint8_t vop_coding_type; // 55
+ uint8_t rounding_control; // 56
+ uint8_t alternate_vertical_scan_flag; // 57 bool
+ uint8_t top_field_first; // bool, written to vuc
+
+ uint8_t pad4[3]; // 59, 5a, 5b, contains garbage on blob
+
+ uint32_t intra[0x10]; // 5c
+ uint32_t non_intra[0x10]; // 9c
+ uint32_t pad5[0x10]; // bc what does this do?
+ // udc..uff pad?
};
// Full version, with data pumped from BSP
struct vc1_picparm_vp {
- uint32_t bucket_size; // 00
- uint32_t pad; // 04
-
- uint32_t inter_ring_data_size; // 08
- uint32_t unk0c; // stride 1
- uint32_t unk10; // stride 2
- uint32_t ofs[6]; // 14..28 ofs
-
- uint16_t width; // 2c
- uint16_t height; // 2e
-
- uint8_t profile; // 30 0 = simple, 1 = main, 2 = advanced
- uint8_t loopfilter; // 31 written into vuc
- uint8_t fastuvmc; // 32, written into vuc
- uint8_t dquant; // 33
-
- uint8_t overlap; // 34
- uint8_t quantizer; // 35
- uint8_t u36; // 36, bool
- uint8_t pad2; // 37, to align to 0x38
+ uint32_t bucket_size; // 00
+ uint32_t pad; // 04
+
+ uint32_t inter_ring_data_size; // 08
+ uint32_t unk0c; // stride 1
+ uint32_t unk10; // stride 2
+ uint32_t ofs[6]; // 14..28 ofs
+
+ uint16_t width; // 2c
+ uint16_t height; // 2e
+
+ uint8_t profile; // 30 0 = simple, 1 = main, 2 = advanced
+ uint8_t loopfilter; // 31 written into vuc
+ uint8_t fastuvmc; // 32, written into vuc
+ uint8_t dquant; // 33
+
+ uint8_t overlap; // 34
+ uint8_t quantizer; // 35
+ uint8_t u36; // 36, bool
+ uint8_t pad2; // 37, to align to 0x38
};
struct h264_picparm_vp { // 700..a00
- uint16_t width, height;
- uint32_t stride1, stride2; // 04 08
- uint32_t ofs[6]; // 0c..24 in-image offset
-
- uint32_t tmp_stride;
- uint32_t bucket_size; // 28 bucket size
- uint32_t inter_ring_data_size; // 2c
-
- unsigned mb_adaptive_frame_field_flag : 1; // 0
- unsigned direct_8x8_inference_flag : 1; // 1 0x02: into vuc ofs 56
- unsigned weighted_pred_flag : 1; // 2 0x04
- unsigned constrained_intra_pred_flag : 1; // 3 0x08: into vuc ofs 68
- unsigned is_reference : 1; // 4
- unsigned interlace : 1; // 5 field_pic_flag
- unsigned bottom_field_flag : 1; // 6
- unsigned second_field : 1; // 7 0x80: nfi yet
-
- signed log2_max_frame_num_minus4 : 4; // 31 0..3
- unsigned chroma_format_idc : 2; // 31 4..5
- unsigned pic_order_cnt_type : 2; // 31 6..7
- signed pic_init_qp_minus26 : 6; // 32 0..5
- signed chroma_qp_index_offset : 5; // 32 6..10
- signed second_chroma_qp_index_offset : 5; // 32 11..15
-
- unsigned weighted_bipred_idc : 2; // 34 0..1
- unsigned fifo_dec_index : 7; // 34 2..8
- unsigned tmp_idx : 5; // 34 9..13
- unsigned frame_number : 16; // 34 14..29
- unsigned u34_3030 : 1; // 34 30..30 pp.u34[30:30]
- unsigned u34_3131 : 1; // 34 31..31 pad?
-
- uint32_t field_order_cnt[2]; // 38, 3c
-
- struct { // 40
- unsigned fifo_idx : 7; // 00 0..6
- unsigned tmp_idx : 5; // 00 7..11
- unsigned top_is_reference : 1; // 00 12
- unsigned bottom_is_reference : 1; // 00 13
- unsigned is_long_term : 1; // 00 14
- unsigned notseenyet : 1; // 00 15 pad?
- unsigned field_pic_flag : 1; // 00 16
- unsigned top_field_marking : 4; // 00 17..20
- unsigned bottom_field_marking : 4; // 00 21..24
- unsigned pad : 7; // 00 d25..31
-
- uint32_t field_order_cnt[2]; // 04,08
- uint32_t frame_idx; // 0c
- } refs[0x10];
-
- uint8_t m4x4[6][16]; // 140
- uint8_t m8x8[2][64]; // 1a0
- uint32_t u220; // 220 number of extra reorder_list to append?
- uint8_t u224[0x20]; // 224..244 reorder_list append ?
- uint8_t nfi244[0xb0]; // add some pad to make sure nulls are read
+ uint16_t width, height;
+ uint32_t stride1, stride2; // 04 08
+ uint32_t ofs[6]; // 0c..24 in-image offset
+
+ uint32_t tmp_stride;
+ uint32_t bucket_size; // 28 bucket size
+ uint32_t inter_ring_data_size; // 2c
+
+ unsigned mb_adaptive_frame_field_flag : 1; // 0
+ unsigned direct_8x8_inference_flag : 1; // 1 0x02: into vuc ofs 56
+ unsigned weighted_pred_flag : 1; // 2 0x04
+ unsigned constrained_intra_pred_flag : 1; // 3 0x08: into vuc ofs 68
+ unsigned is_reference : 1; // 4
+ unsigned interlace : 1; // 5 field_pic_flag
+ unsigned bottom_field_flag : 1; // 6
+ unsigned second_field : 1; // 7 0x80: nfi yet
+
+ signed log2_max_frame_num_minus4 : 4; // 31 0..3
+ unsigned chroma_format_idc : 2; // 31 4..5
+ unsigned pic_order_cnt_type : 2; // 31 6..7
+ signed pic_init_qp_minus26 : 6; // 32 0..5
+ signed chroma_qp_index_offset : 5; // 32 6..10
+ signed second_chroma_qp_index_offset : 5; // 32 11..15
+
+ unsigned weighted_bipred_idc : 2; // 34 0..1
+ unsigned fifo_dec_index : 7; // 34 2..8
+ unsigned tmp_idx : 5; // 34 9..13
+ unsigned frame_number : 16; // 34 14..29
+ unsigned u34_3030 : 1; // 34 30..30 pp.u34[30:30]
+ unsigned u34_3131 : 1; // 34 31..31 pad?
+
+ uint32_t field_order_cnt[2]; // 38, 3c
+
+ struct { // 40
+ unsigned fifo_idx : 7; // 00 0..6
+ unsigned tmp_idx : 5; // 00 7..11
+ unsigned top_is_reference : 1; // 00 12
+ unsigned bottom_is_reference : 1; // 00 13
+ unsigned is_long_term : 1; // 00 14
+ unsigned notseenyet : 1; // 00 15 pad?
+ unsigned field_pic_flag : 1; // 00 16
+ unsigned top_field_marking : 4; // 00 17..20
+ unsigned bottom_field_marking : 4; // 00 21..24
+ unsigned pad : 7; // 00 d25..31
+
+ uint32_t field_order_cnt[2]; // 04,08
+ uint32_t frame_idx; // 0c
+ } refs[0x10];
+
+ uint8_t m4x4[6][16]; // 140
+ uint8_t m8x8[2][64]; // 1a0
+ uint32_t u220; // 220 number of extra reorder_list to append?
+ uint8_t u224[0x20]; // 224..244 reorder_list append ?
+ uint8_t nfi244[0xb0]; // add some pad to make sure nulls are read
};
static void
diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h
index a44fd3efcf7..1319c3290cf 100644
--- a/src/gallium/drivers/nouveau/nouveau_winsys.h
+++ b/src/gallium/drivers/nouveau/nouveau_winsys.h
@@ -65,18 +65,18 @@ PUSH_KICK(struct nouveau_pushbuf *push)
static inline uint32_t
nouveau_screen_transfer_flags(unsigned pipe)
{
- uint32_t flags = 0;
-
- if (!(pipe & PIPE_TRANSFER_UNSYNCHRONIZED)) {
- if (pipe & PIPE_TRANSFER_READ)
- flags |= NOUVEAU_BO_RD;
- if (pipe & PIPE_TRANSFER_WRITE)
- flags |= NOUVEAU_BO_WR;
- if (pipe & PIPE_TRANSFER_DONTBLOCK)
- flags |= NOUVEAU_BO_NOBLOCK;
- }
-
- return flags;
+ uint32_t flags = 0;
+
+ if (!(pipe & PIPE_TRANSFER_UNSYNCHRONIZED)) {
+ if (pipe & PIPE_TRANSFER_READ)
+ flags |= NOUVEAU_BO_RD;
+ if (pipe & PIPE_TRANSFER_WRITE)
+ flags |= NOUVEAU_BO_WR;
+ if (pipe & PIPE_TRANSFER_DONTBLOCK)
+ flags |= NOUVEAU_BO_NOBLOCK;
+ }
+
+ return flags;
}
extern struct pipe_screen *
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index 03301649e38..bdecb0a32b3 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -172,6 +172,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 0;
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h
index 69c121274a9..fb74a9748a3 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -16,6 +16,7 @@
#include "nv50/nv50_program.h"
#include "nv50/nv50_resource.h"
#include "nv50/nv50_transfer.h"
+#include "nv50/nv50_query.h"
#include "nouveau_context.h"
#include "nouveau_debug.h"
@@ -195,17 +196,6 @@ void nv50_default_kick_notify(struct nouveau_pushbuf *);
/* nv50_draw.c */
extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *);
-/* nv50_query.c */
-void nv50_init_query_functions(struct nv50_context *);
-void nv50_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t method,
- struct pipe_query *, unsigned result_offset);
-void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *);
-void nva0_so_target_save_offset(struct pipe_context *,
- struct pipe_stream_output_target *,
- unsigned index, bool seralize);
-
-#define NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
-
/* nv50_shader_state.c */
void nv50_vertprog_validate(struct nv50_context *);
void nv50_gmtyprog_validate(struct nv50_context *);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index eff4477472c..299629b6438 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -336,7 +336,6 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
info->io.ucpCBSlot = 15;
info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET;
info->io.genUserClip = prog->vp.clpd_nr;
- info->io.sampleInterp = prog->fp.sample_interp;
info->io.resInfoCBSlot = 15;
info->io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET;
@@ -374,6 +373,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
prog->code = info->bin.code;
prog->code_size = info->bin.codeSize;
prog->fixups = info->bin.relocData;
+ prog->interps = info->bin.interpData;
prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1);
prog->tls_space = info->bin.tlsSpace;
@@ -420,8 +420,8 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
switch (prog->type) {
case PIPE_SHADER_VERTEX: heap = nv50->screen->vp_code_heap; break;
- case PIPE_SHADER_GEOMETRY: heap = nv50->screen->fp_code_heap; break;
- case PIPE_SHADER_FRAGMENT: heap = nv50->screen->gp_code_heap; break;
+ case PIPE_SHADER_GEOMETRY: heap = nv50->screen->gp_code_heap; break;
+ case PIPE_SHADER_FRAGMENT: heap = nv50->screen->fp_code_heap; break;
default:
assert(!"invalid program type");
return false;
@@ -456,6 +456,10 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
if (prog->fixups)
nv50_ir_relocate_code(prog->fixups, prog->code, prog->code_base, 0, 0);
+ if (prog->interps)
+ nv50_ir_change_interp(prog->interps, prog->code,
+ prog->fp.force_persample_interp,
+ false /* flatshade */);
nv50_sifc_linear_u8(&nv50->base, nv50->screen->code,
(prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h
index f4e8e9402ca..24cc96567d7 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h
@@ -86,7 +86,7 @@ struct nv50_program {
uint32_t interp; /* 0x1988 */
uint32_t colors; /* 0x1904 */
uint8_t has_samplemask;
- uint8_t sample_interp;
+ uint8_t force_persample_interp;
} fp;
struct {
@@ -99,6 +99,7 @@ struct nv50_program {
} gp;
void *fixups; /* relocation records */
+ void *interps; /* interpolation records */
struct nouveau_heap *mem;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index 5368ee73750..dd9b85b7208 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -25,356 +25,46 @@
#define NV50_PUSH_EXPLICIT_SPACE_CHECKING
#include "nv50/nv50_context.h"
-#include "nv_object.xml.h"
-
-#define NV50_QUERY_STATE_READY 0
-#define NV50_QUERY_STATE_ACTIVE 1
-#define NV50_QUERY_STATE_ENDED 2
-#define NV50_QUERY_STATE_FLUSHED 3
-
-/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts
- * (since we use only a single GPU channel per screen) will not work properly.
- *
- * The first is not that big of an issue because OpenGL does not allow nested
- * queries anyway.
- */
-
-struct nv50_query {
- uint32_t *data;
- uint16_t type;
- uint16_t index;
- uint32_t sequence;
- struct nouveau_bo *bo;
- uint32_t base;
- uint32_t offset; /* base + i * 32 */
- uint8_t state;
- bool is64bit;
- int nesting; /* only used for occlusion queries */
- struct nouveau_mm_allocation *mm;
- struct nouveau_fence *fence;
-};
-
-#define NV50_QUERY_ALLOC_SPACE 256
-
-static inline struct nv50_query *
-nv50_query(struct pipe_query *pipe)
-{
- return (struct nv50_query *)pipe;
-}
-
-static bool
-nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size)
-{
- struct nv50_screen *screen = nv50->screen;
- int ret;
-
- if (q->bo) {
- nouveau_bo_ref(NULL, &q->bo);
- if (q->mm) {
- if (q->state == NV50_QUERY_STATE_READY)
- nouveau_mm_free(q->mm);
- else
- nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work,
- q->mm);
- }
- }
- if (size) {
- q->mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base);
- if (!q->bo)
- return false;
- q->offset = q->base;
-
- ret = nouveau_bo_map(q->bo, 0, screen->base.client);
- if (ret) {
- nv50_query_allocate(nv50, q, 0);
- return false;
- }
- q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base);
- }
- return true;
-}
-
-static void
-nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
-{
- nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0);
- nouveau_fence_ref(NULL, &nv50_query(pq)->fence);
- FREE(nv50_query(pq));
-}
+#include "nv50/nv50_query.h"
+#include "nv50/nv50_query_hw.h"
static struct pipe_query *
-nv50_query_create(struct pipe_context *pipe, unsigned type, unsigned index)
+nv50_create_query(struct pipe_context *pipe, unsigned type, unsigned index)
{
struct nv50_context *nv50 = nv50_context(pipe);
struct nv50_query *q;
- q = CALLOC_STRUCT(nv50_query);
- if (!q)
- return NULL;
-
- if (!nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE)) {
- FREE(q);
- return NULL;
- }
-
- q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED ||
- type == PIPE_QUERY_PRIMITIVES_EMITTED ||
- type == PIPE_QUERY_SO_STATISTICS ||
- type == PIPE_QUERY_PIPELINE_STATISTICS);
- q->type = type;
-
- if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
- q->offset -= 32;
- q->data -= 32 / sizeof(*q->data); /* we advance before query_begin ! */
- }
-
+ q = nv50_hw_create_query(nv50, type, index);
return (struct pipe_query *)q;
}
static void
-nv50_query_get(struct nouveau_pushbuf *push, struct nv50_query *q,
- unsigned offset, uint32_t get)
+nv50_destroy_query(struct pipe_context *pipe, struct pipe_query *pq)
{
- offset += q->offset;
-
- PUSH_SPACE(push, 5);
- PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
- BEGIN_NV04(push, NV50_3D(QUERY_ADDRESS_HIGH), 4);
- PUSH_DATAh(push, q->bo->offset + offset);
- PUSH_DATA (push, q->bo->offset + offset);
- PUSH_DATA (push, q->sequence);
- PUSH_DATA (push, get);
+ struct nv50_query *q = nv50_query(pq);
+ q->funcs->destroy_query(nv50_context(pipe), q);
}
static boolean
-nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
+nv50_begin_query(struct pipe_context *pipe, struct pipe_query *pq)
{
- struct nv50_context *nv50 = nv50_context(pipe);
- struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nv50_query *q = nv50_query(pq);
-
- /* For occlusion queries we have to change the storage, because a previous
- * query might set the initial render conition to false even *after* we re-
- * initialized it to true.
- */
- if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
- q->offset += 32;
- q->data += 32 / sizeof(*q->data);
- if (q->offset - q->base == NV50_QUERY_ALLOC_SPACE)
- nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE);
-
- /* XXX: can we do this with the GPU, and sync with respect to a previous
- * query ?
- */
- q->data[0] = q->sequence; /* initialize sequence */
- q->data[1] = 1; /* initial render condition = true */
- q->data[4] = q->sequence + 1; /* for comparison COND_MODE */
- q->data[5] = 0;
- }
- if (!q->is64bit)
- q->data[0] = q->sequence++; /* the previously used one */
-
- switch (q->type) {
- case PIPE_QUERY_OCCLUSION_COUNTER:
- q->nesting = nv50->screen->num_occlusion_queries_active++;
- if (q->nesting) {
- nv50_query_get(push, q, 0x10, 0x0100f002);
- } else {
- PUSH_SPACE(push, 4);
- BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);
- PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT);
- BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
- PUSH_DATA (push, 1);
- }
- break;
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- nv50_query_get(push, q, 0x10, 0x06805002);
- break;
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- nv50_query_get(push, q, 0x10, 0x05805002);
- break;
- case PIPE_QUERY_SO_STATISTICS:
- nv50_query_get(push, q, 0x20, 0x05805002);
- nv50_query_get(push, q, 0x30, 0x06805002);
- break;
- case PIPE_QUERY_PIPELINE_STATISTICS:
- nv50_query_get(push, q, 0x80, 0x00801002); /* VFETCH, VERTICES */
- nv50_query_get(push, q, 0x90, 0x01801002); /* VFETCH, PRIMS */
- nv50_query_get(push, q, 0xa0, 0x02802002); /* VP, LAUNCHES */
- nv50_query_get(push, q, 0xb0, 0x03806002); /* GP, LAUNCHES */
- nv50_query_get(push, q, 0xc0, 0x04806002); /* GP, PRIMS_OUT */
- nv50_query_get(push, q, 0xd0, 0x07804002); /* RAST, PRIMS_IN */
- nv50_query_get(push, q, 0xe0, 0x08804002); /* RAST, PRIMS_OUT */
- nv50_query_get(push, q, 0xf0, 0x0980a002); /* ROP, PIXELS */
- break;
- case PIPE_QUERY_TIME_ELAPSED:
- nv50_query_get(push, q, 0x10, 0x00005002);
- break;
- default:
- break;
- }
- q->state = NV50_QUERY_STATE_ACTIVE;
- return true;
+ return q->funcs->begin_query(nv50_context(pipe), q);
}
static void
-nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
+nv50_end_query(struct pipe_context *pipe, struct pipe_query *pq)
{
- struct nv50_context *nv50 = nv50_context(pipe);
- struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nv50_query *q = nv50_query(pq);
-
- q->state = NV50_QUERY_STATE_ENDED;
-
- switch (q->type) {
- case PIPE_QUERY_OCCLUSION_COUNTER:
- nv50_query_get(push, q, 0, 0x0100f002);
- if (--nv50->screen->num_occlusion_queries_active == 0) {
- PUSH_SPACE(push, 2);
- BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
- PUSH_DATA (push, 0);
- }
- break;
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- nv50_query_get(push, q, 0, 0x06805002);
- break;
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- nv50_query_get(push, q, 0, 0x05805002);
- break;
- case PIPE_QUERY_SO_STATISTICS:
- nv50_query_get(push, q, 0x00, 0x05805002);
- nv50_query_get(push, q, 0x10, 0x06805002);
- break;
- case PIPE_QUERY_PIPELINE_STATISTICS:
- nv50_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */
- nv50_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */
- nv50_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */
- nv50_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */
- nv50_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */
- nv50_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
- nv50_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
- nv50_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */
- break;
- case PIPE_QUERY_TIMESTAMP:
- q->sequence++;
- /* fall through */
- case PIPE_QUERY_TIME_ELAPSED:
- nv50_query_get(push, q, 0, 0x00005002);
- break;
- case PIPE_QUERY_GPU_FINISHED:
- q->sequence++;
- nv50_query_get(push, q, 0, 0x1000f010);
- break;
- case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
- q->sequence++;
- nv50_query_get(push, q, 0, 0x0d005002 | (q->index << 5));
- break;
- case PIPE_QUERY_TIMESTAMP_DISJOINT:
- /* This query is not issued on GPU because disjoint is forced to false */
- q->state = NV50_QUERY_STATE_READY;
- break;
- default:
- assert(0);
- break;
- }
-
- if (q->is64bit)
- nouveau_fence_ref(nv50->screen->base.fence.current, &q->fence);
-}
-
-static inline void
-nv50_query_update(struct nv50_query *q)
-{
- if (q->is64bit) {
- if (nouveau_fence_signalled(q->fence))
- q->state = NV50_QUERY_STATE_READY;
- } else {
- if (q->data[0] == q->sequence)
- q->state = NV50_QUERY_STATE_READY;
- }
+ q->funcs->end_query(nv50_context(pipe), q);
}
static boolean
-nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
- boolean wait, union pipe_query_result *result)
+nv50_get_query_result(struct pipe_context *pipe, struct pipe_query *pq,
+ boolean wait, union pipe_query_result *result)
{
- struct nv50_context *nv50 = nv50_context(pipe);
struct nv50_query *q = nv50_query(pq);
- uint64_t *res64 = (uint64_t *)result;
- uint32_t *res32 = (uint32_t *)result;
- uint8_t *res8 = (uint8_t *)result;
- uint64_t *data64 = (uint64_t *)q->data;
- int i;
-
- if (q->state != NV50_QUERY_STATE_READY)
- nv50_query_update(q);
-
- if (q->state != NV50_QUERY_STATE_READY) {
- if (!wait) {
- /* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */
- if (q->state != NV50_QUERY_STATE_FLUSHED) {
- q->state = NV50_QUERY_STATE_FLUSHED;
- PUSH_KICK(nv50->base.pushbuf);
- }
- return false;
- }
- if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nv50->screen->base.client))
- return false;
- }
- q->state = NV50_QUERY_STATE_READY;
-
- switch (q->type) {
- case PIPE_QUERY_GPU_FINISHED:
- res8[0] = true;
- break;
- case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
- res64[0] = q->data[1] - q->data[5];
- break;
- case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
- case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
- res64[0] = data64[0] - data64[2];
- break;
- case PIPE_QUERY_SO_STATISTICS:
- res64[0] = data64[0] - data64[4];
- res64[1] = data64[2] - data64[6];
- break;
- case PIPE_QUERY_PIPELINE_STATISTICS:
- for (i = 0; i < 8; ++i)
- res64[i] = data64[i * 2] - data64[16 + i * 2];
- break;
- case PIPE_QUERY_TIMESTAMP:
- res64[0] = data64[1];
- break;
- case PIPE_QUERY_TIMESTAMP_DISJOINT:
- res64[0] = 1000000000;
- res8[8] = false;
- break;
- case PIPE_QUERY_TIME_ELAPSED:
- res64[0] = data64[1] - data64[3];
- break;
- case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
- res32[0] = q->data[1];
- break;
- default:
- return false;
- }
-
- return true;
-}
-
-void
-nv84_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq)
-{
- struct nv50_query *q = nv50_query(pq);
- unsigned offset = q->offset;
-
- PUSH_SPACE(push, 5);
- PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
- BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
- PUSH_DATAh(push, q->bo->offset + offset);
- PUSH_DATA (push, q->bo->offset + offset);
- PUSH_DATA (push, q->sequence);
- PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
+ return q->funcs->get_query_result(nv50_context(pipe), q, wait, result);
}
static void
@@ -384,7 +74,8 @@ nv50_render_condition(struct pipe_context *pipe,
{
struct nv50_context *nv50 = nv50_context(pipe);
struct nouveau_pushbuf *push = nv50->base.pushbuf;
- struct nv50_query *q;
+ struct nv50_query *q = nv50_query(pq);
+ struct nv50_hw_query *hq = nv50_hw_query(q);
uint32_t cond;
bool wait =
mode != PIPE_RENDER_COND_NO_WAIT &&
@@ -394,7 +85,6 @@ nv50_render_condition(struct pipe_context *pipe,
cond = NV50_3D_COND_MODE_ALWAYS;
}
else {
- q = nv50_query(pq);
/* NOTE: comparison of 2 queries only works if both have completed */
switch (q->type) {
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
@@ -405,7 +95,7 @@ nv50_render_condition(struct pipe_context *pipe,
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
if (likely(!condition)) {
- if (unlikely(q->nesting))
+ if (unlikely(hq->nesting))
cond = wait ? NV50_3D_COND_MODE_NOT_EQUAL :
NV50_3D_COND_MODE_ALWAYS;
else
@@ -440,48 +130,15 @@ nv50_render_condition(struct pipe_context *pipe,
PUSH_DATA (push, 0);
}
- PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
BEGIN_NV04(push, NV50_3D(COND_ADDRESS_HIGH), 3);
- PUSH_DATAh(push, q->bo->offset + q->offset);
- PUSH_DATA (push, q->bo->offset + q->offset);
+ PUSH_DATAh(push, hq->bo->offset + hq->offset);
+ PUSH_DATA (push, hq->bo->offset + hq->offset);
PUSH_DATA (push, cond);
BEGIN_NV04(push, NV50_2D(COND_ADDRESS_HIGH), 2);
- PUSH_DATAh(push, q->bo->offset + q->offset);
- PUSH_DATA (push, q->bo->offset + q->offset);
-}
-
-void
-nv50_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method,
- struct pipe_query *pq, unsigned result_offset)
-{
- struct nv50_query *q = nv50_query(pq);
-
- nv50_query_update(q);
- if (q->state != NV50_QUERY_STATE_READY)
- nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, push->client);
- q->state = NV50_QUERY_STATE_READY;
-
- BEGIN_NV04(push, SUBC_3D(method), 1);
- PUSH_DATA (push, q->data[result_offset / 4]);
-}
-
-void
-nva0_so_target_save_offset(struct pipe_context *pipe,
- struct pipe_stream_output_target *ptarg,
- unsigned index, bool serialize)
-{
- struct nv50_so_target *targ = nv50_so_target(ptarg);
-
- if (serialize) {
- struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf;
- PUSH_SPACE(push, 2);
- BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
- PUSH_DATA (push, 0);
- }
-
- nv50_query(targ->pq)->index = index;
- nv50_query_end(pipe, targ->pq);
+ PUSH_DATAh(push, hq->bo->offset + hq->offset);
+ PUSH_DATA (push, hq->bo->offset + hq->offset);
}
void
@@ -489,10 +146,10 @@ nv50_init_query_functions(struct nv50_context *nv50)
{
struct pipe_context *pipe = &nv50->base.pipe;
- pipe->create_query = nv50_query_create;
- pipe->destroy_query = nv50_query_destroy;
- pipe->begin_query = nv50_query_begin;
- pipe->end_query = nv50_query_end;
- pipe->get_query_result = nv50_query_result;
+ pipe->create_query = nv50_create_query;
+ pipe->destroy_query = nv50_destroy_query;
+ pipe->begin_query = nv50_begin_query;
+ pipe->end_query = nv50_end_query;
+ pipe->get_query_result = nv50_get_query_result;
pipe->render_condition = nv50_render_condition;
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.h b/src/gallium/drivers/nouveau/nv50/nv50_query.h
new file mode 100644
index 00000000000..d990285c857
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.h
@@ -0,0 +1,33 @@
+#ifndef __NV50_QUERY_H__
+#define __NV50_QUERY_H__
+
+#include "pipe/p_context.h"
+
+#include "nouveau_context.h"
+
+struct nv50_context;
+struct nv50_query;
+
+struct nv50_query_funcs {
+ void (*destroy_query)(struct nv50_context *, struct nv50_query *);
+ boolean (*begin_query)(struct nv50_context *, struct nv50_query *);
+ void (*end_query)(struct nv50_context *, struct nv50_query *);
+ boolean (*get_query_result)(struct nv50_context *, struct nv50_query *,
+ boolean, union pipe_query_result *);
+};
+
+struct nv50_query {
+ const struct nv50_query_funcs *funcs;
+ uint16_t type;
+ uint16_t index;
+};
+
+static inline struct nv50_query *
+nv50_query(struct pipe_query *pipe)
+{
+ return (struct nv50_query *)pipe;
+}
+
+void nv50_init_query_functions(struct nv50_context *);
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
new file mode 100644
index 00000000000..945ce7abe50
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
@@ -0,0 +1,406 @@
+/*
+ * Copyright 2011 Christoph Bumiller
+ * Copyright 2015 Samuel Pitoiset
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#define NV50_PUSH_EXPLICIT_SPACE_CHECKING
+
+#include "nv50/nv50_context.h"
+#include "nv50/nv50_query_hw.h"
+#include "nv_object.xml.h"
+
+#define NV50_HW_QUERY_STATE_READY 0
+#define NV50_HW_QUERY_STATE_ACTIVE 1
+#define NV50_HW_QUERY_STATE_ENDED 2
+#define NV50_HW_QUERY_STATE_FLUSHED 3
+
+/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts
+ * (since we use only a single GPU channel per screen) will not work properly.
+ *
+ * The first is not that big of an issue because OpenGL does not allow nested
+ * queries anyway.
+ */
+
+#define NV50_HW_QUERY_ALLOC_SPACE 256
+
+static bool
+nv50_hw_query_allocate(struct nv50_context *nv50, struct nv50_query *q,
+ int size)
+{
+ struct nv50_screen *screen = nv50->screen;
+ struct nv50_hw_query *hq = nv50_hw_query(q);
+ int ret;
+
+ if (hq->bo) {
+ nouveau_bo_ref(NULL, &hq->bo);
+ if (hq->mm) {
+ if (hq->state == NV50_HW_QUERY_STATE_READY)
+ nouveau_mm_free(hq->mm);
+ else
+ nouveau_fence_work(screen->base.fence.current,
+ nouveau_mm_free_work, hq->mm);
+ }
+ }
+ if (size) {
+ hq->mm = nouveau_mm_allocate(screen->base.mm_GART, size,
+ &hq->bo, &hq->base_offset);
+ if (!hq->bo)
+ return false;
+ hq->offset = hq->base_offset;
+
+ ret = nouveau_bo_map(hq->bo, 0, screen->base.client);
+ if (ret) {
+ nv50_hw_query_allocate(nv50, q, 0);
+ return false;
+ }
+ hq->data = (uint32_t *)((uint8_t *)hq->bo->map + hq->base_offset);
+ }
+ return true;
+}
+
+static void
+nv50_hw_query_get(struct nouveau_pushbuf *push, struct nv50_query *q,
+ unsigned offset, uint32_t get)
+{
+ struct nv50_hw_query *hq = nv50_hw_query(q);
+
+ offset += hq->offset;
+
+ PUSH_SPACE(push, 5);
+ PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
+ BEGIN_NV04(push, NV50_3D(QUERY_ADDRESS_HIGH), 4);
+ PUSH_DATAh(push, hq->bo->offset + offset);
+ PUSH_DATA (push, hq->bo->offset + offset);
+ PUSH_DATA (push, hq->sequence);
+ PUSH_DATA (push, get);
+}
+
+static inline void
+nv50_hw_query_update(struct nv50_query *q)
+{
+ struct nv50_hw_query *hq = nv50_hw_query(q);
+
+ if (hq->is64bit) {
+ if (nouveau_fence_signalled(hq->fence))
+ hq->state = NV50_HW_QUERY_STATE_READY;
+ } else {
+ if (hq->data[0] == hq->sequence)
+ hq->state = NV50_HW_QUERY_STATE_READY;
+ }
+}
+
+static void
+nv50_hw_destroy_query(struct nv50_context *nv50, struct nv50_query *q)
+{
+ struct nv50_hw_query *hq = nv50_hw_query(q);
+ nv50_hw_query_allocate(nv50, q, 0);
+ nouveau_fence_ref(NULL, &hq->fence);
+ FREE(hq);
+}
+
+static boolean
+nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_hw_query *hq = nv50_hw_query(q);
+
+ /* For occlusion queries we have to change the storage, because a previous
+ * query might set the initial render condition to false even *after* we re-
+ * initialized it to true.
+ */
+ if (hq->rotate) {
+ hq->offset += hq->rotate;
+ hq->data += hq->rotate / sizeof(*hq->data);
+ if (hq->offset - hq->base_offset == NV50_HW_QUERY_ALLOC_SPACE)
+ nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE);
+
+ /* XXX: can we do this with the GPU, and sync with respect to a previous
+ * query ?
+ */
+ hq->data[0] = hq->sequence; /* initialize sequence */
+ hq->data[1] = 1; /* initial render condition = true */
+ hq->data[4] = hq->sequence + 1; /* for comparison COND_MODE */
+ hq->data[5] = 0;
+ }
+ if (!hq->is64bit)
+ hq->data[0] = hq->sequence++; /* the previously used one */
+
+ switch (q->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ hq->nesting = nv50->screen->num_occlusion_queries_active++;
+ if (hq->nesting) {
+ nv50_hw_query_get(push, q, 0x10, 0x0100f002);
+ } else {
+ PUSH_SPACE(push, 4);
+ BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);
+ PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT);
+ BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
+ PUSH_DATA (push, 1);
+ }
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ nv50_hw_query_get(push, q, 0x10, 0x06805002);
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ nv50_hw_query_get(push, q, 0x10, 0x05805002);
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ nv50_hw_query_get(push, q, 0x20, 0x05805002);
+ nv50_hw_query_get(push, q, 0x30, 0x06805002);
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ nv50_hw_query_get(push, q, 0x80, 0x00801002); /* VFETCH, VERTICES */
+ nv50_hw_query_get(push, q, 0x90, 0x01801002); /* VFETCH, PRIMS */
+ nv50_hw_query_get(push, q, 0xa0, 0x02802002); /* VP, LAUNCHES */
+ nv50_hw_query_get(push, q, 0xb0, 0x03806002); /* GP, LAUNCHES */
+ nv50_hw_query_get(push, q, 0xc0, 0x04806002); /* GP, PRIMS_OUT */
+ nv50_hw_query_get(push, q, 0xd0, 0x07804002); /* RAST, PRIMS_IN */
+ nv50_hw_query_get(push, q, 0xe0, 0x08804002); /* RAST, PRIMS_OUT */
+ nv50_hw_query_get(push, q, 0xf0, 0x0980a002); /* ROP, PIXELS */
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ nv50_hw_query_get(push, q, 0x10, 0x00005002);
+ break;
+ default:
+ assert(0);
+ return false;
+ }
+ hq->state = NV50_HW_QUERY_STATE_ACTIVE;
+ return true;
+}
+
+static void
+nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_hw_query *hq = nv50_hw_query(q);
+
+ hq->state = NV50_HW_QUERY_STATE_ENDED;
+
+ switch (q->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ nv50_hw_query_get(push, q, 0, 0x0100f002);
+ if (--nv50->screen->num_occlusion_queries_active == 0) {
+ PUSH_SPACE(push, 2);
+ BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ }
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ nv50_hw_query_get(push, q, 0, 0x06805002);
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ nv50_hw_query_get(push, q, 0, 0x05805002);
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ nv50_hw_query_get(push, q, 0x00, 0x05805002);
+ nv50_hw_query_get(push, q, 0x10, 0x06805002);
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ nv50_hw_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */
+ nv50_hw_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */
+ nv50_hw_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */
+ nv50_hw_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */
+ nv50_hw_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */
+ nv50_hw_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
+ nv50_hw_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
+ nv50_hw_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */
+ break;
+ case PIPE_QUERY_TIMESTAMP:
+ hq->sequence++;
+ /* fall through */
+ case PIPE_QUERY_TIME_ELAPSED:
+ nv50_hw_query_get(push, q, 0, 0x00005002);
+ break;
+ case PIPE_QUERY_GPU_FINISHED:
+ hq->sequence++;
+ nv50_hw_query_get(push, q, 0, 0x1000f010);
+ break;
+ case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
+ hq->sequence++;
+ nv50_hw_query_get(push, q, 0, 0x0d005002 | (q->index << 5));
+ break;
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ /* This query is not issued on GPU because disjoint is forced to false */
+ hq->state = NV50_HW_QUERY_STATE_READY;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ if (hq->is64bit)
+ nouveau_fence_ref(nv50->screen->base.fence.current, &hq->fence);
+}
+
+static boolean
+nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q,
+ boolean wait, union pipe_query_result *result)
+{
+ struct nv50_hw_query *hq = nv50_hw_query(q);
+ uint64_t *res64 = (uint64_t *)result;
+ uint32_t *res32 = (uint32_t *)result;
+ uint8_t *res8 = (uint8_t *)result;
+ uint64_t *data64 = (uint64_t *)hq->data;
+ int i;
+
+ if (hq->state != NV50_HW_QUERY_STATE_READY)
+ nv50_hw_query_update(q);
+
+ if (hq->state != NV50_HW_QUERY_STATE_READY) {
+ if (!wait) {
+ /* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */
+ if (hq->state != NV50_HW_QUERY_STATE_FLUSHED) {
+ hq->state = NV50_HW_QUERY_STATE_FLUSHED;
+ PUSH_KICK(nv50->base.pushbuf);
+ }
+ return false;
+ }
+ if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nv50->screen->base.client))
+ return false;
+ }
+ hq->state = NV50_HW_QUERY_STATE_READY;
+
+ switch (q->type) {
+ case PIPE_QUERY_GPU_FINISHED:
+ res8[0] = true;
+ break;
+ case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
+ res64[0] = hq->data[1] - hq->data[5];
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
+ case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
+ res64[0] = data64[0] - data64[2];
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ res64[0] = data64[0] - data64[4];
+ res64[1] = data64[2] - data64[6];
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ for (i = 0; i < 8; ++i)
+ res64[i] = data64[i * 2] - data64[16 + i * 2];
+ break;
+ case PIPE_QUERY_TIMESTAMP:
+ res64[0] = data64[1];
+ break;
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ res64[0] = 1000000000;
+ res8[8] = false;
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ res64[0] = data64[1] - data64[3];
+ break;
+ case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
+ res32[0] = hq->data[1];
+ break;
+ default:
+ assert(0);
+ return false;
+ }
+
+ return true;
+}
+
+static const struct nv50_query_funcs hw_query_funcs = {
+ .destroy_query = nv50_hw_destroy_query,
+ .begin_query = nv50_hw_begin_query,
+ .end_query = nv50_hw_end_query,
+ .get_query_result = nv50_hw_get_query_result,
+};
+
+struct nv50_query *
+nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index)
+{
+ struct nv50_hw_query *hq;
+ struct nv50_query *q;
+
+ hq = CALLOC_STRUCT(nv50_hw_query);
+ if (!hq)
+ return NULL;
+
+ q = &hq->base;
+ q->funcs = &hw_query_funcs;
+ q->type = type;
+
+ switch (q->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ hq->rotate = 32;
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ case PIPE_QUERY_SO_STATISTICS:
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ hq->is64bit = true;
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ case PIPE_QUERY_TIMESTAMP:
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ case PIPE_QUERY_GPU_FINISHED:
+ case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
+ break;
+ default:
+ debug_printf("invalid query type: %u\n", type);
+ FREE(q);
+ return NULL;
+ }
+
+ if (!nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE)) {
+ FREE(hq);
+ return NULL;
+ }
+
+ if (hq->rotate) {
+ /* we advance before query_begin ! */
+ hq->offset -= hq->rotate;
+ hq->data -= hq->rotate / sizeof(*hq->data);
+ }
+
+ return q;
+}
+
+void
+nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method,
+ struct nv50_query *q, unsigned result_offset)
+{
+ struct nv50_hw_query *hq = nv50_hw_query(q);
+
+ nv50_hw_query_update(q);
+ if (hq->state != NV50_HW_QUERY_STATE_READY)
+ nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, push->client);
+ hq->state = NV50_HW_QUERY_STATE_READY;
+
+ BEGIN_NV04(push, SUBC_3D(method), 1);
+ PUSH_DATA (push, hq->data[result_offset / 4]);
+}
+
+void
+nv84_hw_query_fifo_wait(struct nouveau_pushbuf *push, struct nv50_query *q)
+{
+ struct nv50_hw_query *hq = nv50_hw_query(q);
+ unsigned offset = hq->offset;
+
+ PUSH_SPACE(push, 5);
+ PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
+ PUSH_DATAh(push, hq->bo->offset + offset);
+ PUSH_DATA (push, hq->bo->offset + offset);
+ PUSH_DATA (push, hq->sequence);
+ PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h
new file mode 100644
index 00000000000..294c67de9a4
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h
@@ -0,0 +1,40 @@
+#ifndef __NV50_QUERY_HW_H__
+#define __NV50_QUERY_HW_H__
+
+#include "nouveau_fence.h"
+#include "nouveau_mm.h"
+
+#include "nv50_query.h"
+
+#define NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
+
+struct nv50_hw_query {
+ struct nv50_query base;
+ uint32_t *data;
+ uint32_t sequence;
+ struct nouveau_bo *bo;
+ uint32_t base_offset;
+ uint32_t offset; /* base + i * rotate */
+ uint8_t state;
+ bool is64bit;
+ uint8_t rotate;
+ int nesting; /* only used for occlusion queries */
+ struct nouveau_mm_allocation *mm;
+ struct nouveau_fence *fence;
+};
+
+static inline struct nv50_hw_query *
+nv50_hw_query(struct nv50_query *q)
+{
+ return (struct nv50_hw_query *)q;
+}
+
+struct nv50_query *
+nv50_hw_create_query(struct nv50_context *, unsigned, unsigned);
+void
+nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t,
+ struct nv50_query *, unsigned);
+void
+nv84_hw_query_fifo_wait(struct nouveau_pushbuf *, struct nv50_query *);
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_resource.c b/src/gallium/drivers/nouveau/nv50/nv50_resource.c
index d289b4a24e8..325c19fb80c 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_resource.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_resource.c
@@ -32,8 +32,8 @@ nv50_resource_from_handle(struct pipe_screen * screen,
struct pipe_surface *
nv50_surface_from_buffer(struct pipe_context *pipe,
- struct pipe_resource *pbuf,
- const struct pipe_surface *templ)
+ struct pipe_resource *pbuf,
+ const struct pipe_surface *templ)
{
struct nv50_surface *sf = CALLOC_STRUCT(nv50_surface);
if (!sf)
@@ -65,8 +65,8 @@ nv50_surface_from_buffer(struct pipe_context *pipe,
static struct pipe_surface *
nv50_surface_create(struct pipe_context *pipe,
- struct pipe_resource *pres,
- const struct pipe_surface *templ)
+ struct pipe_resource *pres,
+ const struct pipe_surface *templ)
{
/* surfaces are assumed to be miptrees all over the place. */
assert(pres->target != PIPE_BUFFER);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index ec51d00f266..a9e0c478322 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -180,6 +180,8 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+ case PIPE_CAP_SHAREABLE_SHADERS:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP:
return 1; /* class_3d >= NVA0_3D_CLASS; */
@@ -191,6 +193,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_INDEP_BLEND_FUNC:
case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_SAMPLE_SHADING:
+ case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
return class_3d >= NVA3_3D_CLASS;
/* unsupported caps */
@@ -215,8 +218,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
- case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
- case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
index 941555ffbf8..9b911043132 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
@@ -27,6 +27,7 @@
#include "util/u_inlines.h"
#include "nv50/nv50_context.h"
+#include "nv50/nv50_query_hw.h"
void
nv50_constbufs_validate(struct nv50_context *nv50)
@@ -168,11 +169,23 @@ nv50_fragprog_validate(struct nv50_context *nv50)
{
struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nv50_program *fp = nv50->fragprog;
+ struct pipe_rasterizer_state *rast = &nv50->rast->pipe;
- fp->fp.sample_interp = nv50->min_samples > 1;
+ if (fp->fp.force_persample_interp != rast->force_persample_interp) {
+ /* Force the program to be reuploaded, which will trigger interp fixups
+ * to get applied
+ */
+ if (fp->mem)
+ nouveau_heap_free(&fp->mem);
+
+ fp->fp.force_persample_interp = rast->force_persample_interp;
+ }
+
+ if (fp->mem && !(nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_MIN_SAMPLES)))
+ return;
if (!nv50_program_validate(nv50, fp))
- return;
+ return;
nv50_program_update_context_state(nv50, fp, 1);
BEGIN_NV04(push, NV50_3D(FP_REG_ALLOC_TEMP), 1);
@@ -629,7 +642,7 @@ nv50_stream_output_validate(struct nv50_context *nv50)
const unsigned n = nv50->screen->base.class_3d >= NVA0_3D_CLASS ? 4 : 3;
if (n == 4 && !targ->clean)
- nv84_query_fifo_wait(push, targ->pq);
+ nv84_hw_query_fifo_wait(push, nv50_query(targ->pq));
BEGIN_NV04(push, NV50_3D(STRMOUT_ADDRESS_HIGH(i)), n);
PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset);
PUSH_DATA (push, buf->address + targ->pipe.buffer_offset);
@@ -638,8 +651,8 @@ nv50_stream_output_validate(struct nv50_context *nv50)
PUSH_DATA(push, targ->pipe.buffer_size);
if (!targ->clean) {
assert(targ->pq);
- nv50_query_pushbuf_submit(push, NVA0_3D_STRMOUT_OFFSET(i),
- targ->pq, 0x4);
+ nv50_hw_query_pushbuf_submit(push, NVA0_3D_STRMOUT_OFFSET(i),
+ nv50_query(targ->pq), 0x4);
} else {
BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1);
PUSH_DATA(push, 0);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c
index 410e6311e60..6c8c9f0b4e6 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -30,6 +30,7 @@
#include "nv50/nv50_stateobj.h"
#include "nv50/nv50_context.h"
+#include "nv50/nv50_query_hw.h"
#include "nv50/nv50_3d.xml.h"
#include "nv50/nv50_texture.xml.h"
@@ -725,6 +726,9 @@ nv50_sp_state_create(struct pipe_context *pipe,
if (cso->stream_output.num_outputs)
prog->pipe.stream_output = cso->stream_output;
+ prog->translated = nv50_program_translate(
+ prog, nv50_context(pipe)->screen->base.device->chipset);
+
return (void *)prog;
}
@@ -1033,7 +1037,7 @@ nv50_so_target_create(struct pipe_context *pipe,
if (nouveau_context(pipe)->screen->class_3d >= NVA0_3D_CLASS) {
targ->pq = pipe->create_query(pipe,
- NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET, 0);
+ NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET, 0);
if (!targ->pq) {
FREE(targ);
return NULL;
@@ -1057,6 +1061,24 @@ nv50_so_target_create(struct pipe_context *pipe,
}
static void
+nva0_so_target_save_offset(struct pipe_context *pipe,
+ struct pipe_stream_output_target *ptarg,
+ unsigned index, bool serialize)
+{
+ struct nv50_so_target *targ = nv50_so_target(ptarg);
+
+ if (serialize) {
+ struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf;
+ PUSH_SPACE(push, 2);
+ BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
+ PUSH_DATA (push, 0);
+ }
+
+ nv50_query(targ->pq)->index = index;
+ pipe->end_query(pipe, targ->pq);
+}
+
+static void
nv50_so_target_destroy(struct pipe_context *pipe,
struct pipe_stream_output_target *ptarg)
{
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
index 66dcf43533b..b6181edf24f 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -487,7 +487,7 @@ static struct state_validate {
{ nv50_validate_viewport, NV50_NEW_VIEWPORT },
{ nv50_vertprog_validate, NV50_NEW_VERTPROG },
{ nv50_gmtyprog_validate, NV50_NEW_GMTYPROG },
- { nv50_fragprog_validate, NV50_NEW_FRAGPROG |
+ { nv50_fragprog_validate, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER |
NV50_NEW_MIN_SAMPLES },
{ nv50_fp_linkage_validate, NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG |
NV50_NEW_GMTYPROG | NV50_NEW_RASTERIZER },
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
index 64348b3c378..237d76d6adb 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -220,10 +220,14 @@ nv50_resource_copy_region(struct pipe_context *pipe,
nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
if (m2mf) {
+ struct nv50_miptree *src_mt = nv50_miptree(src);
+ struct nv50_miptree *dst_mt = nv50_miptree(dst);
struct nv50_m2mf_rect drect, srect;
unsigned i;
- unsigned nx = util_format_get_nblocksx(src->format, src_box->width);
- unsigned ny = util_format_get_nblocksy(src->format, src_box->height);
+ unsigned nx = util_format_get_nblocksx(src->format, src_box->width)
+ << src_mt->ms_x;
+ unsigned ny = util_format_get_nblocksy(src->format, src_box->height)
+ << src_mt->ms_y;
nv50_m2mf_rect_setup(&drect, dst, dst_level, dstx, dsty, dstz);
nv50_m2mf_rect_setup(&srect, src, src_level,
@@ -232,15 +236,15 @@ nv50_resource_copy_region(struct pipe_context *pipe,
for (i = 0; i < src_box->depth; ++i) {
nv50_m2mf_transfer_rect(nv50, &drect, &srect, nx, ny);
- if (nv50_miptree(dst)->layout_3d)
+ if (dst_mt->layout_3d)
drect.z++;
else
- drect.base += nv50_miptree(dst)->layer_stride;
+ drect.base += dst_mt->layer_stride;
- if (nv50_miptree(src)->layout_3d)
+ if (src_mt->layout_3d)
srect.z++;
else
- srect.base += nv50_miptree(src)->layer_stride;
+ srect.base += src_mt->layer_stride;
}
return;
}
@@ -270,7 +274,7 @@ nv50_resource_copy_region(struct pipe_context *pipe,
static void
nv50_clear_render_target(struct pipe_context *pipe,
struct pipe_surface *dst,
- const union pipe_color_union *color,
+ const union pipe_color_union *color,
unsigned dstx, unsigned dsty,
unsigned width, unsigned height)
{
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index f5f47087bef..9fa6fceeefa 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -27,6 +27,7 @@
#include "translate/translate.h"
#include "nv50/nv50_context.h"
+#include "nv50/nv50_query_hw.h"
#include "nv50/nv50_resource.h"
#include "nv50/nv50_3d.xml.h"
@@ -745,7 +746,8 @@ nva0_draw_stream_output(struct nv50_context *nv50,
PUSH_DATA (push, 0);
BEGIN_NV04(push, NVA0_3D(DRAW_TFB_STRIDE), 1);
PUSH_DATA (push, so->stride);
- nv50_query_pushbuf_submit(push, NVA0_3D_DRAW_TFB_BYTES, so->pq, 0x4);
+ nv50_hw_query_pushbuf_submit(push, NVA0_3D_DRAW_TFB_BYTES,
+ nv50_query(so->pq), 0x4);
BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1);
PUSH_DATA (push, 0);
diff --git a/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c b/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c
index 7780a179399..d13480c21d5 100644
--- a/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c
+++ b/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c
@@ -27,33 +27,33 @@
static void dump_comm_vp(struct nouveau_vp3_decoder *dec, struct comm *comm, u32 comm_seq,
struct nouveau_bo *inter_bo, unsigned slice_size)
{
- unsigned i, idx = comm->pvp_cur_index & 0xf;
- debug_printf("Status: %08x, stage: %08x\n", comm->status_vp[idx], comm->pvp_stage);
+ unsigned i, idx = comm->pvp_cur_index & 0xf;
+ debug_printf("Status: %08x, stage: %08x\n", comm->status_vp[idx], comm->pvp_stage);
#if 0
- debug_printf("Acked byte ofs: %x, bsp byte ofs: %x\n", comm->acked_byte_ofs, comm->byte_ofs);
- debug_printf("Irq/parse indexes: %i %i\n", comm->irq_index, comm->parse_endpos_index);
+ debug_printf("Acked byte ofs: %x, bsp byte ofs: %x\n", comm->acked_byte_ofs, comm->byte_ofs);
+ debug_printf("Irq/parse indexes: %i %i\n", comm->irq_index, comm->parse_endpos_index);
- for (i = 0; i != comm->irq_index; ++i)
- debug_printf("irq[%i] = { @ %08x -> %04x }\n", i, comm->irq_pos[i], comm->irq_470[i]);
- for (i = 0; i != comm->parse_endpos_index; ++i)
- debug_printf("parse_endpos[%i] = { @ %08x}\n", i, comm->parse_endpos[i]);
+ for (i = 0; i != comm->irq_index; ++i)
+ debug_printf("irq[%i] = { @ %08x -> %04x }\n", i, comm->irq_pos[i], comm->irq_470[i]);
+ for (i = 0; i != comm->parse_endpos_index; ++i)
+ debug_printf("parse_endpos[%i] = { @ %08x}\n", i, comm->parse_endpos[i]);
#endif
- debug_printf("mb_y = %u\n", comm->mb_y[idx]);
- if (comm->status_vp[idx] <= 1)
- return;
-
- if ((comm->pvp_stage & 0xff) != 0xff) {
- unsigned *map;
- int ret = nouveau_bo_map(inter_bo, NOUVEAU_BO_RD|NOUVEAU_BO_NOBLOCK, dec->client);
- assert(ret >= 0);
- map = inter_bo->map;
- for (i = 0; i < comm->byte_ofs + slice_size; i += 0x10) {
- debug_printf("%05x: %08x %08x %08x %08x\n", i, map[i/4], map[i/4+1], map[i/4+2], map[i/4+3]);
- }
- munmap(inter_bo->map, inter_bo->size);
- inter_bo->map = NULL;
- }
- assert((comm->pvp_stage & 0xff) == 0xff);
+ debug_printf("mb_y = %u\n", comm->mb_y[idx]);
+ if (comm->status_vp[idx] <= 1)
+ return;
+
+ if ((comm->pvp_stage & 0xff) != 0xff) {
+ unsigned *map;
+ int ret = nouveau_bo_map(inter_bo, NOUVEAU_BO_RD|NOUVEAU_BO_NOBLOCK, dec->client);
+ assert(ret >= 0);
+ map = inter_bo->map;
+ for (i = 0; i < comm->byte_ofs + slice_size; i += 0x10) {
+ debug_printf("%05x: %08x %08x %08x %08x\n", i, map[i/4], map[i/4+1], map[i/4+2], map[i/4+3]);
+ }
+ munmap(inter_bo->map, inter_bo->size);
+ inter_bo->map = NULL;
+ }
+ assert((comm->pvp_stage & 0xff) == 0xff);
}
#endif
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index a168dd684ab..68048f9d6c0 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -252,10 +252,10 @@ nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info)
}
}
- vp->vp.clip_enable = info->io.clipDistanceMask;
- for (i = 0; i < 8; ++i)
- if (info->io.cullDistanceMask & (1 << i))
- vp->vp.clip_mode |= 1 << (i * 4);
+ vp->vp.clip_enable =
+ (1 << (info->io.clipDistances + info->io.cullDistances)) - 1;
+ for (i = 0; i < info->io.cullDistances; ++i)
+ vp->vp.clip_mode |= 1 << ((info->io.clipDistances + i) * 4);
if (info->io.genUserClip < 0)
vp->vp.num_ucps = PIPE_MAX_CLIP_PLANES + 1; /* prevent rebuilding */
@@ -269,8 +269,6 @@ nvc0_vp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info)
vp->hdr[0] = 0x20061 | (1 << 10);
vp->hdr[4] = 0xff000;
- vp->hdr[18] = info->io.clipDistanceMask;
-
return nvc0_vtgp_gen_header(vp, info);
}
@@ -424,6 +422,11 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
for (i = 0; i < info->numInputs; ++i) {
m = nvc0_hdr_interp_mode(&info->in[i]);
+ if (info->in[i].sn == TGSI_SEMANTIC_COLOR) {
+ fp->fp.colors |= 1 << info->in[i].si;
+ if (info->in[i].sc)
+ fp->fp.color_interp[info->in[i].si] = m | (info->in[i].mask << 4);
+ }
for (c = 0; c < 4; ++c) {
if (!(info->in[i].mask & (1 << c)))
continue;
@@ -531,7 +534,6 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
info->io.genUserClip = prog->vp.num_ucps;
info->io.ucpBase = 256;
info->io.ucpCBSlot = 15;
- info->io.sampleInterp = prog->fp.sample_interp;
if (prog->type == PIPE_SHADER_COMPUTE) {
if (chipset >= NVISA_GK104_CHIPSET) {
@@ -575,6 +577,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
prog->immd_data = info->immd.buf;
prog->immd_size = info->immd.bufSize;
prog->relocs = info->bin.relocData;
+ prog->interps = info->bin.interpData;
prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1));
prog->num_barriers = info->numBarriers;
@@ -713,6 +716,23 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
if (prog->relocs)
nv50_ir_relocate_code(prog->relocs, prog->code, code_pos, lib_pos, 0);
+ if (prog->interps) {
+ nv50_ir_change_interp(prog->interps, prog->code,
+ prog->fp.force_persample_interp,
+ prog->fp.flatshade);
+ for (int i = 0; i < 2; i++) {
+ unsigned mask = prog->fp.color_interp[i] >> 4;
+ unsigned interp = prog->fp.color_interp[i] & 3;
+ if (!mask)
+ continue;
+ prog->hdr[14] &= ~(0xff << (8 * i));
+ if (prog->fp.flatshade)
+ interp = NVC0_INTERP_FLAT;
+ for (int c = 0; c < 4; c++)
+ if (mask & (1 << c))
+ prog->hdr[14] |= interp << (2 * (4 * i + c));
+ }
+ }
#ifdef DEBUG
if (debug_get_bool_option("NV50_PROG_DEBUG", false))
@@ -773,6 +793,7 @@ nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog)
FREE(prog->code); /* may be 0 for hardcoded shaders */
FREE(prog->immd_data);
FREE(prog->relocs);
+ FREE(prog->interps);
if (prog->type == PIPE_SHADER_COMPUTE && prog->cp.syms)
FREE(prog->cp.syms);
if (prog->tfb) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
index 390e0c7a4f0..9c45e7b3e31 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
@@ -45,8 +45,10 @@ struct nvc0_program {
} vp;
struct {
uint8_t early_z;
- uint8_t in_pos[PIPE_MAX_SHADER_INPUTS];
- uint8_t sample_interp;
+ uint8_t colors;
+ uint8_t color_interp[2];
+ bool force_persample_interp;
+ bool flatshade;
} fp;
struct {
uint32_t tess_mode; /* ~0 if defined by the other stage */
@@ -61,6 +63,7 @@ struct nvc0_program {
uint8_t num_barriers;
void *relocs;
+ void *interps;
struct nvc0_transform_feedback_state *tfb;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index e4752e2dbc5..f53921092a5 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -28,6 +28,7 @@
#include "nvc0/nvc0_query.h"
#include "nvc0/nvc0_query_sw.h"
#include "nvc0/nvc0_query_hw.h"
+#include "nvc0/nvc0_query_hw_metric.h"
#include "nvc0/nvc0_query_hw_sm.h"
static struct pipe_query *
@@ -188,7 +189,7 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
count++;
} else
if (screen->base.class_3d < NVE4_3D_CLASS) {
- count++;
+ count += 2;
}
}
}
@@ -218,6 +219,17 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
return 1;
}
}
+ } else
+ if (id == NVC0_HW_METRIC_QUERY_GROUP) {
+ if (screen->compute) {
+ if (screen->base.class_3d < NVE4_3D_CLASS) {
+ info->name = "Performance metrics";
+ info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
+ info->max_active_queries = 1;
+ info->num_queries = NVC0_HW_METRIC_QUERY_COUNT;
+ return 1;
+ }
+ }
}
#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
else if (id == NVC0_SW_QUERY_DRV_STAT_GROUP) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.h b/src/gallium/drivers/nouveau/nvc0/nvc0_query.h
index 6883ab6ab9d..c46361c31aa 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.h
@@ -32,7 +32,8 @@ nvc0_query(struct pipe_query *pipe)
* Driver queries groups:
*/
#define NVC0_HW_SM_QUERY_GROUP 0
-#define NVC0_SW_QUERY_DRV_STAT_GROUP 1
+#define NVC0_HW_METRIC_QUERY_GROUP 1
+#define NVC0_SW_QUERY_DRV_STAT_GROUP 2
void nvc0_init_query_functions(struct nvc0_context *);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
index 25aa09be42a..fb2806a805e 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
@@ -431,7 +431,7 @@ nvc0_hw_metric_get_driver_query_info(struct nvc0_screen *screen, unsigned id,
id = nvc0_hw_metric_get_next_query_id(queries, id);
info->name = nvc0_hw_metric_names[id];
info->query_type = NVC0_HW_METRIC_QUERY(id);
- info->group_id = -1;
+ info->group_id = NVC0_HW_METRIC_QUERY_GROUP;
return 1;
}
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c
index 12b5a025064..15c803c4307 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c
@@ -26,7 +26,8 @@ nvc0_resource_from_handle(struct pipe_screen * screen,
} else {
struct pipe_resource *res = nv50_miptree_from_handle(screen,
templ, whandle);
- nv04_resource(res)->vtbl = &nvc0_miptree_vtbl;
+ if (res)
+ nv04_resource(res)->vtbl = &nvc0_miptree_vtbl;
return res;
}
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index af8e5f72670..6ad3980911d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -179,6 +179,9 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+ case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+ case PIPE_CAP_SHAREABLE_SHADERS:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
@@ -201,8 +204,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
- case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
- case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
case PIPE_CAP_VENDOR_ID:
@@ -352,45 +353,51 @@ static int
nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
enum pipe_compute_cap param, void *data)
{
- uint64_t *data64 = (uint64_t *)data;
- uint32_t *data32 = (uint32_t *)data;
- const uint16_t obj_class = nvc0_screen(pscreen)->compute->oclass;
+ struct nvc0_screen *screen = nvc0_screen(pscreen);
+ const uint16_t obj_class = screen->compute->oclass;
+
+#define RET(x) do { \
+ if (data) \
+ memcpy(data, x, sizeof(x)); \
+ return sizeof(x); \
+} while (0)
switch (param) {
case PIPE_COMPUTE_CAP_GRID_DIMENSION:
- data64[0] = 3;
- return 8;
+ RET((uint64_t []) { 3 });
case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
- data64[0] = (obj_class >= NVE4_COMPUTE_CLASS) ? 0x7fffffff : 65535;
- data64[1] = 65535;
- data64[2] = 65535;
- return 24;
+ if (obj_class >= NVE4_COMPUTE_CLASS) {
+ RET(((uint64_t []) { 0x7fffffff, 65535, 65535 }));
+ } else {
+ RET(((uint64_t []) { 65535, 65535, 65535 }));
+ }
case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
- data64[0] = 1024;
- data64[1] = 1024;
- data64[2] = 64;
- return 24;
+ RET(((uint64_t []) { 1024, 1024, 64 }));
case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
- data64[0] = 1024;
- return 8;
+ RET((uint64_t []) { 1024 });
case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g[] */
- data64[0] = (uint64_t)1 << 40;
- return 8;
+ RET((uint64_t []) { 1ULL << 40 });
case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */
- data64[0] = 48 << 10;
- return 8;
+ RET((uint64_t []) { 48 << 10 });
case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */
- data64[0] = 512 << 10;
- return 8;
+ RET((uint64_t []) { 512 << 10 });
case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
- data64[0] = 4096;
- return 8;
+ RET((uint64_t []) { 4096 });
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
- data32[0] = 32;
- return 4;
+ RET((uint32_t []) { 32 });
+ case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
+ RET((uint64_t []) { 1ULL << 40 });
+ case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
+ RET((uint32_t []) { 0 });
+ case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
+ RET((uint32_t []) { screen->mp_count_compute });
+ case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
+ RET((uint32_t []) { 512 }); /* FIXME: arbitrary limit */
default:
return 0;
}
+
+#undef RET
}
static void
@@ -827,6 +834,8 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATA (push, 1);
BEGIN_NVC0(push, NVC0_3D(BLEND_ENABLE_COMMON), 1);
PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(SHADE_MODEL), 1);
+ PUSH_DATA (push, NVC0_3D_SHADE_MODEL_SMOOTH);
if (screen->eng3d->oclass < NVE4_3D_CLASS) {
BEGIN_NVC0(push, NVC0_3D(TEX_MISC), 1);
PUSH_DATA (push, NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
index 857eb0316c7..8b73102b98b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
@@ -38,6 +38,7 @@ struct nvc0_graph_state {
uint32_t constant_elts;
int32_t index_bias;
uint16_t scissor;
+ bool flatshade;
uint8_t patch_vertices;
uint8_t vbo_mode; /* 0 = normal, 1 = translate, 3 = translate, forced */
uint8_t num_vtxbufs;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
index af837fc4a33..8595800592c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
@@ -107,8 +107,54 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_program *fp = nvc0->fragprog;
+ struct pipe_rasterizer_state *rast = &nvc0->rast->pipe;
- fp->fp.sample_interp = nvc0->min_samples > 1;
+ if (fp->fp.force_persample_interp != rast->force_persample_interp) {
+ /* Force the program to be reuploaded, which will trigger interp fixups
+ * to get applied
+ */
+ if (fp->mem)
+ nouveau_heap_free(&fp->mem);
+
+ fp->fp.force_persample_interp = rast->force_persample_interp;
+ }
+
+ /* Shade model works well enough when both colors follow it. However if one
+ * (or both) is explicitly set, then we have to go the patching route.
+ */
+ bool has_explicit_color = fp->fp.colors &&
+ (((fp->fp.colors & 1) && !fp->fp.color_interp[0]) ||
+ ((fp->fp.colors & 2) && !fp->fp.color_interp[1]));
+ bool hwflatshade = false;
+ if (has_explicit_color && fp->fp.flatshade != rast->flatshade) {
+ /* Force re-upload */
+ if (fp->mem)
+ nouveau_heap_free(&fp->mem);
+
+ fp->fp.flatshade = rast->flatshade;
+
+ /* Always smooth-shade in this mode, the shader will decide on its own
+ * when to flat-shade.
+ */
+ } else if (!has_explicit_color) {
+ hwflatshade = rast->flatshade;
+
+ /* No need to binary-patch the shader each time, make sure that it's set
+ * up for the default behaviour.
+ */
+ fp->fp.flatshade = 0;
+ }
+
+ if (hwflatshade != nvc0->state.flatshade) {
+ nvc0->state.flatshade = hwflatshade;
+ BEGIN_NVC0(push, NVC0_3D(SHADE_MODEL), 1);
+ PUSH_DATA (push, hwflatshade ? NVC0_3D_SHADE_MODEL_FLAT :
+ NVC0_3D_SHADE_MODEL_SMOOTH);
+ }
+
+ if (fp->mem && !(nvc0->dirty & NVC0_NEW_FRAGPROG)) {
+ return;
+ }
if (!nvc0_program_validate(nvc0, fp))
return;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index 742bef39247..ba1714da010 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -212,9 +212,6 @@ nvc0_rasterizer_state_create(struct pipe_context *pipe,
* always emit 16 commands, one for each scissor rectangle, here.
*/
- SB_BEGIN_3D(so, SHADE_MODEL, 1);
- SB_DATA (so, cso->flatshade ? NVC0_3D_SHADE_MODEL_FLAT :
- NVC0_3D_SHADE_MODEL_SMOOTH);
SB_IMMED_3D(so, PROVOKING_VERTEX_LAST, !cso->flatshade_first);
SB_IMMED_3D(so, VERTEX_TWO_SIDE_ENABLE, cso->light_twoside);
@@ -683,6 +680,9 @@ nvc0_sp_state_create(struct pipe_context *pipe,
if (cso->stream_output.num_outputs)
prog->pipe.stream_output = cso->stream_output;
+ prog->translated = nvc0_program_translate(
+ prog, nvc0_context(pipe)->screen->base.device->chipset);
+
return (void *)prog;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index aec06097bbd..205e7dc6ae9 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -606,6 +606,9 @@ nvc0_switch_pipe_context(struct nvc0_context *ctx_to)
ctx_to->constbuf_dirty[s] = (1 << NVC0_MAX_PIPE_CONSTBUFS) - 1;
}
+ /* Reset tfb as the shader that owns it may have been deleted. */
+ ctx_to->state.tfb = NULL;
+
if (!ctx_to->vertex)
ctx_to->dirty &= ~(NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS);
if (!ctx_to->idxbuf.buffer)
@@ -645,7 +648,7 @@ static struct state_validate {
{ nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG },
{ nvc0_validate_tess_state, NVC0_NEW_TESSFACTOR },
{ nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG },
- { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG },
+ { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG | NVC0_NEW_RASTERIZER },
{ nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA |
NVC0_NEW_RASTERIZER },
{ nvc0_validate_derived_2, NVC0_NEW_ZSA | NVC0_NEW_FRAMEBUFFER },
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h
index 8bc33c6a0e0..f9680f5a90f 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h
@@ -23,7 +23,7 @@ struct nvc0_blend_stateobj {
struct nvc0_rasterizer_stateobj {
struct pipe_rasterizer_state pipe;
int size;
- uint32_t state[44];
+ uint32_t state[42];
};
struct nvc0_zsa_stateobj {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index dbdf292c862..be123349148 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -225,10 +225,14 @@ nvc0_resource_copy_region(struct pipe_context *pipe,
nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
if (m2mf) {
+ struct nv50_miptree *src_mt = nv50_miptree(src);
+ struct nv50_miptree *dst_mt = nv50_miptree(dst);
struct nv50_m2mf_rect drect, srect;
unsigned i;
- unsigned nx = util_format_get_nblocksx(src->format, src_box->width);
- unsigned ny = util_format_get_nblocksy(src->format, src_box->height);
+ unsigned nx = util_format_get_nblocksx(src->format, src_box->width)
+ << src_mt->ms_x;
+ unsigned ny = util_format_get_nblocksy(src->format, src_box->height)
+ << src_mt->ms_y;
nv50_m2mf_rect_setup(&drect, dst, dst_level, dstx, dsty, dstz);
nv50_m2mf_rect_setup(&srect, src, src_level,
@@ -237,15 +241,15 @@ nvc0_resource_copy_region(struct pipe_context *pipe,
for (i = 0; i < src_box->depth; ++i) {
nvc0->m2mf_copy_rect(nvc0, &drect, &srect, nx, ny);
- if (nv50_miptree(dst)->layout_3d)
+ if (dst_mt->layout_3d)
drect.z++;
else
- drect.base += nv50_miptree(dst)->layer_stride;
+ drect.base += dst_mt->layer_stride;
- if (nv50_miptree(src)->layout_3d)
+ if (src_mt->layout_3d)
srect.z++;
else
- srect.base += nv50_miptree(src)->layer_stride;
+ srect.base += src_mt->layer_stride;
}
return;
}
@@ -493,57 +497,57 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe,
unsigned dstx, unsigned dsty,
unsigned width, unsigned height)
{
- struct nvc0_context *nvc0 = nvc0_context(pipe);
- struct nouveau_pushbuf *push = nvc0->base.pushbuf;
- struct nv50_miptree *mt = nv50_miptree(dst->texture);
- struct nv50_surface *sf = nv50_surface(dst);
- uint32_t mode = 0;
- int unk = mt->base.base.target == PIPE_TEXTURE_2D;
- unsigned z;
-
- if (!PUSH_SPACE(push, 32 + sf->depth))
- return;
-
- PUSH_REFN (push, mt->base.bo, mt->base.domain | NOUVEAU_BO_WR);
-
- if (clear_flags & PIPE_CLEAR_DEPTH) {
- BEGIN_NVC0(push, NVC0_3D(CLEAR_DEPTH), 1);
- PUSH_DATAf(push, depth);
- mode |= NVC0_3D_CLEAR_BUFFERS_Z;
- }
-
- if (clear_flags & PIPE_CLEAR_STENCIL) {
- BEGIN_NVC0(push, NVC0_3D(CLEAR_STENCIL), 1);
- PUSH_DATA (push, stencil & 0xff);
- mode |= NVC0_3D_CLEAR_BUFFERS_S;
- }
-
- BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2);
- PUSH_DATA (push, ( width << 16) | dstx);
- PUSH_DATA (push, (height << 16) | dsty);
-
- BEGIN_NVC0(push, NVC0_3D(ZETA_ADDRESS_HIGH), 5);
- PUSH_DATAh(push, mt->base.address + sf->offset);
- PUSH_DATA (push, mt->base.address + sf->offset);
- PUSH_DATA (push, nvc0_format_table[dst->format].rt);
- PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode);
- PUSH_DATA (push, mt->layer_stride >> 2);
- BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1);
- PUSH_DATA (push, 1);
- BEGIN_NVC0(push, NVC0_3D(ZETA_HORIZ), 3);
- PUSH_DATA (push, sf->width);
- PUSH_DATA (push, sf->height);
- PUSH_DATA (push, (unk << 16) | (dst->u.tex.first_layer + sf->depth));
- BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1);
- PUSH_DATA (push, dst->u.tex.first_layer);
-
- BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth);
- for (z = 0; z < sf->depth; ++z) {
- PUSH_DATA (push, mode |
- (z << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT));
- }
-
- nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nv50_miptree *mt = nv50_miptree(dst->texture);
+ struct nv50_surface *sf = nv50_surface(dst);
+ uint32_t mode = 0;
+ int unk = mt->base.base.target == PIPE_TEXTURE_2D;
+ unsigned z;
+
+ if (!PUSH_SPACE(push, 32 + sf->depth))
+ return;
+
+ PUSH_REFN (push, mt->base.bo, mt->base.domain | NOUVEAU_BO_WR);
+
+ if (clear_flags & PIPE_CLEAR_DEPTH) {
+ BEGIN_NVC0(push, NVC0_3D(CLEAR_DEPTH), 1);
+ PUSH_DATAf(push, depth);
+ mode |= NVC0_3D_CLEAR_BUFFERS_Z;
+ }
+
+ if (clear_flags & PIPE_CLEAR_STENCIL) {
+ BEGIN_NVC0(push, NVC0_3D(CLEAR_STENCIL), 1);
+ PUSH_DATA (push, stencil & 0xff);
+ mode |= NVC0_3D_CLEAR_BUFFERS_S;
+ }
+
+ BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2);
+ PUSH_DATA (push, ( width << 16) | dstx);
+ PUSH_DATA (push, (height << 16) | dsty);
+
+ BEGIN_NVC0(push, NVC0_3D(ZETA_ADDRESS_HIGH), 5);
+ PUSH_DATAh(push, mt->base.address + sf->offset);
+ PUSH_DATA (push, mt->base.address + sf->offset);
+ PUSH_DATA (push, nvc0_format_table[dst->format].rt);
+ PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode);
+ PUSH_DATA (push, mt->layer_stride >> 2);
+ BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_3D(ZETA_HORIZ), 3);
+ PUSH_DATA (push, sf->width);
+ PUSH_DATA (push, sf->height);
+ PUSH_DATA (push, (unk << 16) | (dst->u.tex.first_layer + sf->depth));
+ BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1);
+ PUSH_DATA (push, dst->u.tex.first_layer);
+
+ BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth);
+ for (z = 0; z < sf->depth; ++z) {
+ PUSH_DATA (push, mode |
+ (z << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT));
+ }
+
+ nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
}
void
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
index 8b23a4887da..9c19ba20a7e 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
@@ -27,6 +27,7 @@ struct push_context {
struct {
bool enabled;
bool value;
+ uint8_t width;
unsigned stride;
const uint8_t *data;
} edgeflag;
@@ -53,6 +54,7 @@ nvc0_push_context_init(struct nvc0_context *nvc0, struct push_context *ctx)
/* silence warnings */
ctx->edgeflag.data = NULL;
ctx->edgeflag.stride = 0;
+ ctx->edgeflag.width = 0;
}
static inline void
@@ -100,6 +102,7 @@ nvc0_push_map_edgeflag(struct push_context *ctx, struct nvc0_context *nvc0,
struct nv04_resource *buf = nv04_resource(vb->buffer);
ctx->edgeflag.stride = vb->stride;
+ ctx->edgeflag.width = util_format_get_blocksize(ve->src_format);
if (buf) {
unsigned offset = vb->buffer_offset + ve->src_offset;
ctx->edgeflag.data = nouveau_resource_map_offset(&nvc0->base,
@@ -137,10 +140,17 @@ prim_restart_search_i32(const uint32_t *elts, unsigned push, uint32_t index)
}
static inline bool
-ef_value(const struct push_context *ctx, uint32_t index)
+ef_value_8(const struct push_context *ctx, uint32_t index)
{
- float *pf = (float *)&ctx->edgeflag.data[index * ctx->edgeflag.stride];
- return *pf ? true : false;
+ uint8_t *pf = (uint8_t *)&ctx->edgeflag.data[index * ctx->edgeflag.stride];
+ return !!*pf;
+}
+
+static inline bool
+ef_value_32(const struct push_context *ctx, uint32_t index)
+{
+ uint32_t *pf = (uint32_t *)&ctx->edgeflag.data[index * ctx->edgeflag.stride];
+ return !!*pf;
}
static inline bool
@@ -154,7 +164,11 @@ static inline unsigned
ef_toggle_search_i08(struct push_context *ctx, const uint8_t *elts, unsigned n)
{
unsigned i;
- for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i);
+ bool ef = ctx->edgeflag.value;
+ if (ctx->edgeflag.width == 1)
+ for (i = 0; i < n && ef_value_8(ctx, elts[i]) == ef; ++i);
+ else
+ for (i = 0; i < n && ef_value_32(ctx, elts[i]) == ef; ++i);
return i;
}
@@ -162,7 +176,11 @@ static inline unsigned
ef_toggle_search_i16(struct push_context *ctx, const uint16_t *elts, unsigned n)
{
unsigned i;
- for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i);
+ bool ef = ctx->edgeflag.value;
+ if (ctx->edgeflag.width == 1)
+ for (i = 0; i < n && ef_value_8(ctx, elts[i]) == ef; ++i);
+ else
+ for (i = 0; i < n && ef_value_32(ctx, elts[i]) == ef; ++i);
return i;
}
@@ -170,7 +188,11 @@ static inline unsigned
ef_toggle_search_i32(struct push_context *ctx, const uint32_t *elts, unsigned n)
{
unsigned i;
- for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i);
+ bool ef = ctx->edgeflag.value;
+ if (ctx->edgeflag.width == 1)
+ for (i = 0; i < n && ef_value_8(ctx, elts[i]) == ef; ++i);
+ else
+ for (i = 0; i < n && ef_value_32(ctx, elts[i]) == ef; ++i);
return i;
}
@@ -178,7 +200,11 @@ static inline unsigned
ef_toggle_search_seq(struct push_context *ctx, unsigned start, unsigned n)
{
unsigned i;
- for (i = 0; i < n && ef_value(ctx, start++) == ctx->edgeflag.value; ++i);
+ bool ef = ctx->edgeflag.value;
+ if (ctx->edgeflag.width == 1)
+ for (i = 0; i < n && ef_value_8(ctx, start++) == ef; ++i);
+ else
+ for (i = 0; i < n && ef_value_32(ctx, start++) == ef; ++i);
return i;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c b/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c
index 28bcb629e43..91543782dfc 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c
@@ -27,33 +27,33 @@
static void dump_comm_vp(struct nouveau_vp3_decoder *dec, struct comm *comm, u32 comm_seq,
struct nouveau_bo *inter_bo, unsigned slice_size)
{
- unsigned i, idx = comm->pvp_cur_index & 0xf;
- debug_printf("Status: %08x, stage: %08x\n", comm->status_vp[idx], comm->pvp_stage);
+ unsigned i, idx = comm->pvp_cur_index & 0xf;
+ debug_printf("Status: %08x, stage: %08x\n", comm->status_vp[idx], comm->pvp_stage);
#if 0
- debug_printf("Acked byte ofs: %x, bsp byte ofs: %x\n", comm->acked_byte_ofs, comm->byte_ofs);
- debug_printf("Irq/parse indexes: %i %i\n", comm->irq_index, comm->parse_endpos_index);
+ debug_printf("Acked byte ofs: %x, bsp byte ofs: %x\n", comm->acked_byte_ofs, comm->byte_ofs);
+ debug_printf("Irq/parse indexes: %i %i\n", comm->irq_index, comm->parse_endpos_index);
- for (i = 0; i != comm->irq_index; ++i)
- debug_printf("irq[%i] = { @ %08x -> %04x }\n", i, comm->irq_pos[i], comm->irq_470[i]);
- for (i = 0; i != comm->parse_endpos_index; ++i)
- debug_printf("parse_endpos[%i] = { @ %08x}\n", i, comm->parse_endpos[i]);
+ for (i = 0; i != comm->irq_index; ++i)
+ debug_printf("irq[%i] = { @ %08x -> %04x }\n", i, comm->irq_pos[i], comm->irq_470[i]);
+ for (i = 0; i != comm->parse_endpos_index; ++i)
+ debug_printf("parse_endpos[%i] = { @ %08x}\n", i, comm->parse_endpos[i]);
#endif
- debug_printf("mb_y = %u\n", comm->mb_y[idx]);
- if (comm->status_vp[idx] <= 1)
- return;
-
- if ((comm->pvp_stage & 0xff) != 0xff) {
- unsigned *map;
- int ret = nouveau_bo_map(inter_bo, NOUVEAU_BO_RD|NOUVEAU_BO_NOBLOCK, dec->client);
- assert(ret >= 0);
- map = inter_bo->map;
- for (i = 0; i < comm->byte_ofs + slice_size; i += 0x10) {
- debug_printf("%05x: %08x %08x %08x %08x\n", i, map[i/4], map[i/4+1], map[i/4+2], map[i/4+3]);
- }
- munmap(inter_bo->map, inter_bo->size);
- inter_bo->map = NULL;
- }
- assert((comm->pvp_stage & 0xff) == 0xff);
+ debug_printf("mb_y = %u\n", comm->mb_y[idx]);
+ if (comm->status_vp[idx] <= 1)
+ return;
+
+ if ((comm->pvp_stage & 0xff) != 0xff) {
+ unsigned *map;
+ int ret = nouveau_bo_map(inter_bo, NOUVEAU_BO_RD|NOUVEAU_BO_NOBLOCK, dec->client);
+ assert(ret >= 0);
+ map = inter_bo->map;
+ for (i = 0; i < comm->byte_ofs + slice_size; i += 0x10) {
+ debug_printf("%05x: %08x %08x %08x %08x\n", i, map[i/4], map[i/4+1], map[i/4+2], map[i/4+3]);
+ }
+ munmap(inter_bo->map, inter_bo->size);
+ inter_bo->map = NULL;
+ }
+ assert((comm->pvp_stage & 0xff) == 0xff);
}
#endif
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index a576abdfaf2..d5981248a86 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -198,6 +198,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 0;
/* SWTCL-only features. */
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index bc6980660a5..ee7beee3001 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -635,7 +635,7 @@ static int replace_gpr_with_pv_ps(struct r600_bytecode *bc,
return 0;
}
-void r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *neg)
+void r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *neg, unsigned abs)
{
switch(value) {
case 0:
@@ -655,11 +655,11 @@ void r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *ne
break;
case 0xBF800000: /* -1.0f */
*sel = V_SQ_ALU_SRC_1;
- *neg ^= 1;
+ *neg ^= !abs;
break;
case 0xBF000000: /* -0.5f */
*sel = V_SQ_ALU_SRC_0_5;
- *neg ^= 1;
+ *neg ^= !abs;
break;
default:
*sel = V_SQ_ALU_SRC_LITERAL;
@@ -1208,7 +1208,7 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
}
if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL)
r600_bytecode_special_constants(nalu->src[i].value,
- &nalu->src[i].sel, &nalu->src[i].neg);
+ &nalu->src[i].sel, &nalu->src[i].neg, nalu->src[i].abs);
}
if (nalu->dst.sel >= bc->ngpr) {
bc->ngpr = nalu->dst.sel + 1;
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 7cf3a090908..d48ad1ebf01 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -255,7 +255,7 @@ int r600_bytecode_add_cfinst(struct r600_bytecode *bc,
int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
const struct r600_bytecode_alu *alu, unsigned type);
void r600_bytecode_special_constants(uint32_t value,
- unsigned *sel, unsigned *neg);
+ unsigned *sel, unsigned *neg, unsigned abs);
void r600_bytecode_disasm(struct r600_bytecode *bc);
void r600_bytecode_alu_read(struct r600_bytecode *bc,
struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1);
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 9a97de9965e..9f4cda2c142 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -344,6 +344,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 0;
/* Stream output. */
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 8efe902a329..fc6335ae8bc 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -162,10 +162,6 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
goto error;
}
- /* disable SB for geom shaders on R6xx/R7xx due to some mysterious gs piglit regressions with it enabled. */
- if (rctx->b.chip_class <= R700) {
- use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_GEOMETRY);
- }
/* disable SB for shaders using doubles */
use_sb &= !shader->shader.uses_doubles;
@@ -1008,7 +1004,7 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
- r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
+ r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg, r600_src->abs);
if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
return;
}
diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c
index f341ecb41a5..0dc6c918331 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -443,6 +443,27 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
return &rbuffer->b.b;
}
+struct pipe_resource *r600_aligned_buffer_create(struct pipe_screen *screen,
+ unsigned bind,
+ unsigned usage,
+ unsigned size,
+ unsigned alignment)
+{
+ struct pipe_resource buffer;
+
+ memset(&buffer, 0, sizeof buffer);
+ buffer.target = PIPE_BUFFER;
+ buffer.format = PIPE_FORMAT_R8_UNORM;
+ buffer.bind = bind;
+ buffer.usage = usage;
+ buffer.flags = 0;
+ buffer.width0 = size;
+ buffer.height0 = 1;
+ buffer.depth0 = 1;
+ buffer.array_size = 1;
+ return r600_buffer_create(screen, &buffer, alignment);
+}
+
struct pipe_resource *
r600_buffer_from_user_memory(struct pipe_screen *screen,
const struct pipe_resource *templ,
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 7ac94caad9f..0ad36849645 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -360,6 +360,8 @@ static const struct debug_named_value common_debug_options[] = {
{ "precompile", DBG_PRECOMPILE, "Compile one shader variant at shader creation." },
{ "nowc", DBG_NO_WC, "Disable GTT write combining" },
{ "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." },
+ { "nodcc", DBG_NO_DCC, "Disable DCC." },
+ { "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." },
DEBUG_NAMED_VALUE_END /* must be last */
};
@@ -416,6 +418,7 @@ static const char* r600_get_chip_name(struct r600_common_screen *rscreen)
case CHIP_ICELAND: return "AMD ICELAND";
case CHIP_CARRIZO: return "AMD CARRIZO";
case CHIP_FIJI: return "AMD FIJI";
+ case CHIP_STONEY: return "AMD STONEY";
default: return "AMD unknown";
}
}
@@ -540,6 +543,11 @@ const char *r600_get_llvm_processor_name(enum radeon_family family)
case CHIP_ICELAND: return "iceland";
case CHIP_CARRIZO: return "carrizo";
case CHIP_FIJI: return "fiji";
+#if HAVE_LLVM <= 0x0307
+ case CHIP_STONEY: return "carrizo";
+#else
+ case CHIP_STONEY: return "stoney";
+#endif
default: return "";
}
}
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index b58b500bd76..c300c0b3332 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -99,6 +99,8 @@
#define DBG_INFO (1llu << 40)
#define DBG_NO_WC (1llu << 41)
#define DBG_CHECK_VM (1llu << 42)
+#define DBG_NO_DCC (1llu << 43)
+#define DBG_NO_DCC_CLEAR (1llu << 44)
#define R600_MAP_BUFFER_ALIGNMENT 64
@@ -214,6 +216,7 @@ struct r600_texture {
struct r600_fmask_info fmask;
struct r600_cmask_info cmask;
struct r600_resource *cmask_buffer;
+ struct r600_resource *dcc_buffer;
unsigned cb_color_info; /* fast clear enable bit */
unsigned color_clear_value[2];
@@ -243,6 +246,7 @@ struct r600_surface {
unsigned cb_color_dim; /* EG only */
unsigned cb_color_pitch; /* EG and later */
unsigned cb_color_slice; /* EG and later */
+ unsigned cb_dcc_base; /* VI and later */
unsigned cb_color_attrib; /* EG and later */
unsigned cb_dcc_control; /* VI and later */
unsigned cb_color_fmask; /* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */
@@ -489,6 +493,11 @@ bool r600_init_resource(struct r600_common_screen *rscreen,
struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
const struct pipe_resource *templ,
unsigned alignment);
+struct pipe_resource * r600_aligned_buffer_create(struct pipe_screen *screen,
+ unsigned bind,
+ unsigned usage,
+ unsigned size,
+ unsigned alignment);
struct pipe_resource *
r600_buffer_from_user_memory(struct pipe_screen *screen,
const struct pipe_resource *templ,
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index fc69f48bb70..edfdfe33187 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -268,6 +268,7 @@ static void r600_texture_destroy(struct pipe_screen *screen,
if (rtex->cmask_buffer != &rtex->resource) {
pipe_resource_reference((struct pipe_resource**)&rtex->cmask_buffer, NULL);
}
+ pipe_resource_reference((struct pipe_resource**)&rtex->dcc_buffer, NULL);
pb_reference(&resource->buf, NULL);
FREE(rtex);
}
@@ -482,6 +483,25 @@ static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen
rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1);
}
+static void vi_texture_alloc_dcc_separate(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex)
+{
+ if (rscreen->debug_flags & DBG_NO_DCC)
+ return;
+
+ rtex->dcc_buffer = (struct r600_resource *)
+ r600_aligned_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
+ PIPE_USAGE_DEFAULT, rtex->surface.dcc_size, rtex->surface.dcc_alignment);
+ if (rtex->dcc_buffer == NULL) {
+ return;
+ }
+
+ r600_screen_clear_buffer(rscreen, &rtex->dcc_buffer->b.b, 0, rtex->surface.dcc_size,
+ 0xFFFFFFFF, true);
+
+ rtex->cb_color_info |= VI_S_028C70_DCC_ENABLE(1);
+}
+
static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
struct r600_texture *rtex)
{
@@ -621,6 +641,8 @@ r600_texture_create_object(struct pipe_screen *screen,
return NULL;
}
}
+ if (rtex->surface.dcc_size)
+ vi_texture_alloc_dcc_separate(rscreen, rtex);
}
/* Now create the backing buffer. */
@@ -1219,6 +1241,81 @@ static void evergreen_set_clear_color(struct r600_texture *rtex,
memcpy(rtex->color_clear_value, &uc, 2 * sizeof(uint32_t));
}
+static void vi_get_fast_clear_parameters(enum pipe_format surface_format,
+ const union pipe_color_union *color,
+ uint32_t* reset_value,
+ bool* clear_words_needed)
+{
+ bool values[4] = {};
+ int i;
+ bool main_value = false;
+ bool extra_value = false;
+ int extra_channel;
+ const struct util_format_description *desc = util_format_description(surface_format);
+
+ *clear_words_needed = true;
+ *reset_value = 0x20202020U;
+
+ /* If we want to clear without needing a fast clear eliminate step, we
+ * can set each channel to 0 or 1 (or 0/max for integer formats). We
+ * have two sets of flags, one for the last or first channel(extra) and
+ * one for the other channels(main).
+ */
+
+ if (surface_format == PIPE_FORMAT_R11G11B10_FLOAT ||
+ surface_format == PIPE_FORMAT_B5G6R5_UNORM ||
+ surface_format == PIPE_FORMAT_B5G6R5_SRGB) {
+ extra_channel = -1;
+ } else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
+ if(r600_translate_colorswap(surface_format) <= 1)
+ extra_channel = desc->nr_channels - 1;
+ else
+ extra_channel = 0;
+ } else
+ return;
+
+ for (i = 0; i < 4; ++i) {
+ int index = desc->swizzle[i] - UTIL_FORMAT_SWIZZLE_X;
+
+ if (desc->swizzle[i] < UTIL_FORMAT_SWIZZLE_X ||
+ desc->swizzle[i] > UTIL_FORMAT_SWIZZLE_W)
+ continue;
+
+ if (util_format_is_pure_sint(surface_format)) {
+ values[i] = color->i[i] != 0;
+ if (color->i[i] != 0 && color->i[i] != INT32_MAX)
+ return;
+ } else if (util_format_is_pure_uint(surface_format)) {
+ values[i] = color->ui[i] != 0U;
+ if (color->ui[i] != 0U && color->ui[i] != UINT32_MAX)
+ return;
+ } else {
+ values[i] = color->f[i] != 0.0F;
+ if (color->f[i] != 0.0F && color->f[i] != 1.0F)
+ return;
+ }
+
+ if (index == extra_channel)
+ extra_value = values[i];
+ else
+ main_value = values[i];
+ }
+
+ for (int i = 0; i < 4; ++i)
+ if (values[i] != main_value &&
+ desc->swizzle[i] - UTIL_FORMAT_SWIZZLE_X != extra_channel &&
+ desc->swizzle[i] >= UTIL_FORMAT_SWIZZLE_X &&
+ desc->swizzle[i] <= UTIL_FORMAT_SWIZZLE_W)
+ return;
+
+ *clear_words_needed = false;
+ if (main_value)
+ *reset_value |= 0x80808080U;
+
+ if (extra_value)
+ *reset_value |= 0x40404040U;
+}
+
void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
struct pipe_framebuffer_state *fb,
struct r600_atom *fb_state,
@@ -1272,18 +1369,36 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
continue;
}
- /* ensure CMASK is enabled */
- r600_texture_alloc_cmask_separate(rctx->screen, tex);
- if (tex->cmask.size == 0) {
- continue;
+ if (tex->dcc_buffer) {
+ uint32_t reset_value;
+ bool clear_words_needed;
+
+ if (rctx->screen->debug_flags & DBG_NO_DCC_CLEAR)
+ continue;
+
+ vi_get_fast_clear_parameters(fb->cbufs[i]->format, color, &reset_value, &clear_words_needed);
+
+ rctx->clear_buffer(&rctx->b, &tex->dcc_buffer->b.b,
+ 0, tex->surface.dcc_size, reset_value, true);
+
+ if (clear_words_needed)
+ tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
+ } else {
+ /* ensure CMASK is enabled */
+ r600_texture_alloc_cmask_separate(rctx->screen, tex);
+ if (tex->cmask.size == 0) {
+ continue;
+ }
+
+ /* Do the fast clear. */
+ rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
+ tex->cmask.offset, tex->cmask.size, 0, true);
+
+ tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
}
- /* Do the fast clear. */
evergreen_set_clear_color(tex, fb->cbufs[i]->format, color);
- rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
- tex->cmask.offset, tex->cmask.size, 0, true);
- tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
if (dirty_cbufs)
*dirty_cbufs |= 1 << i;
rctx->set_atom_dirty(rctx, fb_state, true);
diff --git a/src/gallium/drivers/radeon/r600d_common.h b/src/gallium/drivers/radeon/r600d_common.h
index 115042d153e..a3d182cd30f 100644
--- a/src/gallium/drivers/radeon/r600d_common.h
+++ b/src/gallium/drivers/radeon/r600d_common.h
@@ -202,6 +202,7 @@
#define EG_S_028C70_FAST_CLEAR(x) (((x) & 0x1) << 17)
#define SI_S_028C70_FAST_CLEAR(x) (((x) & 0x1) << 13)
+#define VI_S_028C70_DCC_ENABLE(x) (((x) & 0x1) << 28)
/*CIK+*/
#define R_0300FC_CP_STRMOUT_CNTL 0x0300FC
diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c
index c3ac7e7f2ef..33b01361aa5 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.c
+++ b/src/gallium/drivers/radeon/radeon_uvd.c
@@ -478,6 +478,8 @@ static struct ruvd_h265 get_h265_msg(struct ruvd_decoder *dec, struct pipe_video
result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8;
if (((struct r600_common_screen*)dec->screen)->family == CHIP_CARRIZO)
result.sps_info_flags |= 1 << 9;
+ if (pic->UseRefPicList == true)
+ result.sps_info_flags |= 1 << 10;
result.chroma_format = pic->pps->sps->chroma_format_idc;
result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
@@ -586,6 +588,11 @@ static struct ruvd_h265 get_h265_msg(struct ruvd_decoder *dec, struct pipe_video
memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64);
memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64);
+ for (i = 0 ; i < 2 ; i++) {
+ for (int j = 0 ; j < 15 ; j++)
+ result.direct_reflist[i][j] = pic->RefPicList[i][j];
+ }
+
/* TODO
result.highestTid;
result.isNonRef;
diff --git a/src/gallium/drivers/radeon/radeon_uvd.h b/src/gallium/drivers/radeon/radeon_uvd.h
index 452fbd60880..9cc0a694c30 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.h
+++ b/src/gallium/drivers/radeon/radeon_uvd.h
@@ -233,6 +233,15 @@ struct ruvd_h265 {
uint8_t highestTid;
uint8_t isNonRef;
+
+ uint8_t p010_mode;
+ uint8_t msb_mode;
+ uint8_t luma_10to8;
+ uint8_t chroma_10to8;
+ uint8_t sclr_luma10to8;
+ uint8_t sclr_chroma10to8;
+
+ uint8_t direct_reflist[2][15];
};
struct ruvd_vc1 {
diff --git a/src/gallium/drivers/radeon/radeon_video.c b/src/gallium/drivers/radeon/radeon_video.c
index 3a1834b948f..32bfc32073b 100644
--- a/src/gallium/drivers/radeon/radeon_video.c
+++ b/src/gallium/drivers/radeon/radeon_video.c
@@ -205,11 +205,12 @@ int rvid_get_video_param(struct pipe_screen *screen,
enum pipe_video_cap param)
{
struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
+ enum pipe_video_format codec = u_reduce_video_profile(profile);
if (entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
switch (param) {
case PIPE_VIDEO_CAP_SUPPORTED:
- return u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_MPEG4_AVC &&
+ return codec == PIPE_VIDEO_FORMAT_MPEG4_AVC &&
rvce_is_fw_version_supported(rscreen);
case PIPE_VIDEO_CAP_NPOT_TEXTURES:
return 1;
@@ -232,38 +233,19 @@ int rvid_get_video_param(struct pipe_screen *screen,
}
}
- /* UVD 2.x limits */
- if (rscreen->family < CHIP_PALM) {
- enum pipe_video_format codec = u_reduce_video_profile(profile);
- switch (param) {
- case PIPE_VIDEO_CAP_SUPPORTED:
- /* no support for MPEG4 */
- return codec != PIPE_VIDEO_FORMAT_MPEG4 &&
- /* FIXME: VC-1 simple/main profile is broken */
- profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE &&
- profile != PIPE_VIDEO_PROFILE_VC1_MAIN;
- case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
- case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
- /* MPEG2 only with shaders and no support for
- interlacing on R6xx style UVD */
- return codec != PIPE_VIDEO_FORMAT_MPEG12 &&
- rscreen->family > CHIP_RV770;
- default:
- break;
- }
- }
-
switch (param) {
case PIPE_VIDEO_CAP_SUPPORTED:
- switch (u_reduce_video_profile(profile)) {
+ switch (codec) {
case PIPE_VIDEO_FORMAT_MPEG12:
case PIPE_VIDEO_FORMAT_MPEG4:
case PIPE_VIDEO_FORMAT_MPEG4_AVC:
- return entrypoint != PIPE_VIDEO_ENTRYPOINT_ENCODE;
+ if (rscreen->family < CHIP_PALM)
+ /* no support for MPEG4 */
+ return codec != PIPE_VIDEO_FORMAT_MPEG4;
+ return true;
case PIPE_VIDEO_FORMAT_VC1:
/* FIXME: VC-1 simple/main profile is broken */
- return profile == PIPE_VIDEO_PROFILE_VC1_ADVANCED &&
- entrypoint != PIPE_VIDEO_ENTRYPOINT_ENCODE;
+ return profile == PIPE_VIDEO_PROFILE_VC1_ADVANCED;
case PIPE_VIDEO_FORMAT_HEVC:
/* Carrizo only supports HEVC Main */
return rscreen->family >= CHIP_CARRIZO &&
@@ -280,13 +262,17 @@ int rvid_get_video_param(struct pipe_screen *screen,
case PIPE_VIDEO_CAP_PREFERED_FORMAT:
return PIPE_FORMAT_NV12;
case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
- if (u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_HEVC)
- return false; //The hardware doesn't support interlaced HEVC.
- return true;
case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
- if (u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_HEVC)
- return false; //The hardware doesn't support interlaced HEVC.
- return true;
+ if (rscreen->family < CHIP_PALM) {
+ /* MPEG2 only with shaders and no support for
+ interlacing on R6xx style UVD */
+ return codec != PIPE_VIDEO_FORMAT_MPEG12 &&
+ rscreen->family > CHIP_RV770;
+ } else {
+ if (u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_HEVC)
+ return false; //The firmware doesn't support interlaced HEVC.
+ return true;
+ }
case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
return true;
case PIPE_VIDEO_CAP_MAX_LEVEL:
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index b91e1adf41d..8bf1e15f3be 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -137,6 +137,7 @@ enum radeon_family {
CHIP_ICELAND,
CHIP_CARRIZO,
CHIP_FIJI,
+ CHIP_STONEY,
CHIP_LAST,
};
@@ -331,6 +332,7 @@ struct radeon_surf_level {
uint32_t nblk_z;
uint32_t pitch_bytes;
uint32_t mode;
+ uint64_t dcc_offset;
};
struct radeon_surf {
@@ -366,6 +368,9 @@ struct radeon_surf {
uint32_t stencil_tiling_index[RADEON_SURF_MAX_LEVEL];
uint32_t pipe_config;
uint32_t num_banks;
+
+ uint64_t dcc_size;
+ uint64_t dcc_alignment;
};
struct radeon_bo_list_item {
diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c
index 6454b8ce8c0..e53af1dd6b5 100644
--- a/src/gallium/drivers/radeonsi/cik_sdma.c
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -242,7 +242,8 @@ void cik_sdma_copy(struct pipe_context *ctx,
if (src->format != dst->format ||
rdst->surface.nsamples > 1 || rsrc->surface.nsamples > 1 ||
- (rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level)) {
+ (rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level) ||
+ rdst->dcc_buffer || rsrc->dcc_buffer) {
goto fallback;
}
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index 082ea850675..fce014a1e6b 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -326,7 +326,7 @@ void si_decompress_color_textures(struct si_context *sctx,
assert(view);
tex = (struct r600_texture *)view->texture;
- assert(tex->cmask.size || tex->fmask.size);
+ assert(tex->cmask.size || tex->fmask.size || tex->dcc_buffer);
si_blit_decompress_color(&sctx->b.b, tex,
view->u.tex.first_level, view->u.tex.last_level,
@@ -455,7 +455,7 @@ static void si_decompress_subresource(struct pipe_context *ctx,
si_blit_decompress_depth_in_place(sctx, rtex, true,
level, level,
first_layer, last_layer);
- } else if (rtex->fmask.size || rtex->cmask.size) {
+ } else if (rtex->fmask.size || rtex->cmask.size || rtex->dcc_buffer) {
si_blit_decompress_color(ctx, rtex, level, level,
first_layer, last_layer);
}
@@ -507,7 +507,7 @@ void si_resource_copy_region(struct pipe_context *ctx,
util_blitter_default_dst_texture(&dst_templ, dst, dst_level, dstz);
util_blitter_default_src_texture(&src_templ, src, src_level);
- if (util_format_is_compressed(src->format) &&
+ if (util_format_is_compressed(src->format) ||
util_format_is_compressed(dst->format)) {
unsigned blocksize = util_format_get_blocksize(src->format);
@@ -536,7 +536,7 @@ void si_resource_copy_region(struct pipe_context *ctx,
src_force_level = src_level;
} else if (!util_blitter_is_copy_supported(sctx->blitter, dst, src) ||
/* also *8_SNORM has precision issues, use UNORM instead */
- util_format_is_snorm(src->format)) {
+ util_format_is_snorm8(src->format)) {
if (util_format_is_subsampled_422(src->format)) {
src_templ.format = PIPE_FORMAT_R8G8B8A8_UINT;
dst_templ.format = PIPE_FORMAT_R8G8B8A8_UINT;
@@ -675,7 +675,8 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
info->src.box.depth == 1 &&
dst->surface.level[info->dst.level].mode >= RADEON_SURF_MODE_1D &&
!(dst->surface.flags & RADEON_SURF_SCANOUT) &&
- (!dst->cmask.size || !dst->dirty_level_mask) /* dst cannot be fast-cleared */) {
+ (!dst->cmask.size || !dst->dirty_level_mask) && /* dst cannot be fast-cleared */
+ !dst->dcc_buffer) {
si_blitter_begin(ctx, SI_COLOR_RESOLVE |
(info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND));
util_blitter_custom_resolve_color(sctx->blitter,
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 13738da5e2c..a8ff6f27319 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -181,6 +181,11 @@ static void si_set_sampler_view(struct si_context *sctx, unsigned shader,
rview->resource, RADEON_USAGE_READ,
r600_get_sampler_view_priority(rview->resource));
+ if (rview->dcc_buffer && rview->dcc_buffer != rview->resource)
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ rview->dcc_buffer, RADEON_USAGE_READ,
+ RADEON_PRIO_DCC);
+
pipe_sampler_view_reference(&views->views[slot], view);
memcpy(views->desc.list + slot*8, view_desc, 8*4);
views->desc.enabled_mask |= 1llu << slot;
@@ -229,7 +234,8 @@ static void si_set_sampler_views(struct pipe_context *ctx,
} else {
samplers->depth_texture_mask &= ~(1 << slot);
}
- if (rtex->cmask.size || rtex->fmask.size) {
+ if (rtex->cmask.size || rtex->fmask.size ||
+ (rtex->dcc_buffer && rtex->dirty_level_mask)) {
samplers->compressed_colortex_mask |= 1 << slot;
} else {
samplers->compressed_colortex_mask &= ~(1 << slot);
diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c
index 31b0b41e5a4..581e89f42d8 100644
--- a/src/gallium/drivers/radeonsi/si_dma.c
+++ b/src/gallium/drivers/radeonsi/si_dma.c
@@ -248,7 +248,8 @@ void si_dma_copy(struct pipe_context *ctx,
if (src->format != dst->format || src_box->depth > 1 ||
(rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level) ||
rdst->cmask.size || rdst->fmask.size ||
- rsrc->cmask.size || rsrc->fmask.size) {
+ rsrc->cmask.size || rsrc->fmask.size ||
+ rdst->dcc_buffer || rsrc->dcc_buffer) {
goto fallback;
}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 5f910c95ef3..60baad3d13c 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -55,8 +55,6 @@ static void si_destroy_context(struct pipe_context *context)
if (sctx->pstipple_sampler_state)
sctx->b.b.delete_sampler_state(&sctx->b.b, sctx->pstipple_sampler_state);
- if (sctx->dummy_pixel_shader)
- sctx->b.b.delete_fs_state(&sctx->b.b, sctx->dummy_pixel_shader);
if (sctx->fixed_func_tcs_shader.cso)
sctx->b.b.delete_tcs_state(&sctx->b.b, sctx->fixed_func_tcs_shader.cso);
if (sctx->custom_dsa_flush)
@@ -300,6 +298,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_GATHER_SM5:
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 1;
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
@@ -578,6 +577,33 @@ static bool si_initialize_pipe_config(struct si_screen *sscreen)
return true;
}
+static bool si_init_gs_info(struct si_screen *sscreen)
+{
+ switch (sscreen->b.family) {
+ case CHIP_OLAND:
+ case CHIP_HAINAN:
+ case CHIP_KAVERI:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ case CHIP_ICELAND:
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ sscreen->gs_table_depth = 16;
+ return true;
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_VERDE:
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ case CHIP_TONGA:
+ case CHIP_FIJI:
+ sscreen->gs_table_depth = 32;
+ return true;
+ default:
+ return false;
+ }
+}
+
struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
{
struct si_screen *sscreen = CALLOC_STRUCT(si_screen);
@@ -595,7 +621,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
sscreen->b.b.resource_create = r600_resource_create_common;
if (!r600_common_screen_init(&sscreen->b, ws) ||
- !si_initialize_pipe_config(sscreen)) {
+ !si_initialize_pipe_config(sscreen) ||
+ !si_init_gs_info(sscreen)) {
FREE(sscreen);
return NULL;
}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index d7a2282952a..42cd8803c36 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -42,6 +42,7 @@
#define SI_BASE_VERTEX_UNKNOWN INT_MIN
#define SI_RESTART_INDEX_UNKNOWN INT_MIN
#define SI_NUM_SMOOTH_AA_SAMPLES 8
+#define SI_GS_PER_ES 128
/* Instruction cache. */
#define SI_CONTEXT_INV_ICACHE (R600_CONTEXT_PRIVATE_FLAG << 0)
@@ -85,6 +86,7 @@ struct si_compute;
struct si_screen {
struct r600_common_screen b;
+ unsigned gs_table_depth;
};
struct si_blend_color {
@@ -96,6 +98,7 @@ struct si_sampler_view {
struct pipe_sampler_view base;
struct list_head list;
struct r600_resource *resource;
+ struct r600_resource *dcc_buffer;
/* [0..7] = image descriptor
* [4..7] = buffer descriptor */
uint32_t state[8];
@@ -203,9 +206,6 @@ struct si_context {
struct si_pm4_state *init_config;
bool init_config_has_vgt_flush;
struct si_pm4_state *vgt_shader_config[4];
- /* With rasterizer discard, there doesn't have to be a pixel shader.
- * In that case, we bind this one: */
- void *dummy_pixel_shader;
/* shaders */
struct si_shader_ctx_state ps_shader;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 243bdc6e6d7..18b64056bc7 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -266,6 +266,7 @@ static void si_emit_cb_target_mask(struct si_context *sctx, struct r600_atom *at
* Reproducible with Unigine Heaven 4.0 and drirc missing.
*/
if (blend->dual_src_blend &&
+ sctx->ps_shader.cso &&
(sctx->ps_shader.cso->ps_colors_written & 0x3) != 0x3)
mask = 0;
@@ -697,6 +698,7 @@ static void *si_create_rs_state(struct pipe_context *ctx,
rs->clamp_fragment_color = state->clamp_fragment_color;
rs->flatshade = state->flatshade;
rs->sprite_coord_enable = state->sprite_coord_enable;
+ rs->rasterizer_discard = state->rasterizer_discard;
rs->pa_sc_line_stipple = state->line_stipple_enable ?
S_028A0C_LINE_PATTERN(state->line_stipple_pattern) |
S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0;
@@ -1924,8 +1926,21 @@ static void si_initialize_color_surface(struct si_context *sctx,
surf->cb_color_info = color_info;
surf->cb_color_attrib = color_attrib;
- if (sctx->b.chip_class >= VI)
- surf->cb_dcc_control = S_028C78_OVERWRITE_COMBINER_DISABLE(1);
+ if (sctx->b.chip_class >= VI && rtex->dcc_buffer) {
+ unsigned max_uncompressed_block_size = 2;
+ uint64_t dcc_offset = rtex->surface.level[level].dcc_offset;
+
+ if (rtex->surface.nsamples > 1) {
+ if (rtex->surface.bpe == 1)
+ max_uncompressed_block_size = 0;
+ else if (rtex->surface.bpe == 2)
+ max_uncompressed_block_size = 1;
+ }
+
+ surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
+ S_028C78_INDEPENDENT_64B_BLOCKS(1);
+ surf->cb_dcc_base = (rtex->dcc_buffer->gpu_address + dcc_offset) >> 8;
+ }
if (rtex->fmask.size) {
surf->cb_color_fmask = (offset + rtex->fmask.offset) >> 8;
@@ -2249,6 +2264,12 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
RADEON_PRIO_CMASK);
}
+ if (tex->dcc_buffer && tex->dcc_buffer != &tex->resource) {
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ tex->dcc_buffer, RADEON_USAGE_READWRITE,
+ RADEON_PRIO_DCC);
+ }
+
radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
sctx->b.chip_class >= VI ? 14 : 13);
radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */
@@ -2266,7 +2287,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
radeon_emit(cs, tex->color_clear_value[1]); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */
if (sctx->b.chip_class >= VI)
- radeon_emit(cs, 0); /* R_028C94_CB_COLOR0_DCC_BASE */
+ radeon_emit(cs, cb->cb_dcc_base); /* R_028C94_CB_COLOR0_DCC_BASE */
}
/* set CB_COLOR1_INFO for possible dual-src blending */
if (i == 1 && state->cbufs[0] &&
@@ -2633,8 +2654,18 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
view->state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1));
view->state[5] = (S_008F24_BASE_ARRAY(state->u.tex.first_layer) |
S_008F24_LAST_ARRAY(last_layer));
- view->state[6] = 0;
- view->state[7] = 0;
+
+ if (tmp->dcc_buffer) {
+ uint64_t dcc_offset = surflevel[base_level].dcc_offset;
+ unsigned swap = r600_translate_colorswap(pipe_format);
+
+ view->state[6] = S_008F28_COMPRESSION_EN(1) | S_008F28_ALPHA_IS_ON_MSB(swap <= 1);
+ view->state[7] = (tmp->dcc_buffer->gpu_address + dcc_offset) >> 8;
+ view->dcc_buffer = tmp->dcc_buffer;
+ } else {
+ view->state[6] = 0;
+ view->state[7] = 0;
+ }
/* Initialize the sampler view for FMASK. */
if (tmp->fmask.size) {
@@ -3262,7 +3293,7 @@ static void si_init_config(struct si_context *sctx)
si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
/* FIXME calculate these values somehow ??? */
- si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, 0x80);
+ si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
@@ -3336,6 +3367,7 @@ static void si_init_config(struct si_context *sctx)
break;
case CHIP_KABINI:
case CHIP_MULLINS:
+ case CHIP_STONEY:
raster_config = 0x00000000;
raster_config_1 = 0x00000000;
break;
@@ -3406,7 +3438,8 @@ static void si_init_config(struct si_context *sctx)
if (sctx->b.chip_class >= VI) {
si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL,
- S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1));
+ S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) |
+ S_028424_OVERWRITE_COMBINER_WATERMARK(4));
si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30);
si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
}
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index fba6619d2fd..8b9a311cd3f 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -61,6 +61,7 @@ struct si_state_rasterizer {
bool poly_smooth;
bool uses_poly_offset;
bool clamp_fragment_color;
+ bool rasterizer_discard;
};
struct si_dsa_stencil_ref_part {
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index ce6c98c3124..cf0891a2ab7 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -223,6 +223,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
unsigned prim = info->mode;
unsigned primgroup_size = 128; /* recommended without a GS */
+ unsigned max_primgroup_in_wave = 2;
/* SWITCH_ON_EOP(0) is always preferable. */
bool wd_switch_on_eop = false;
@@ -246,13 +247,10 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
/* primgroup_size must be set to a multiple of NUM_PATCHES */
primgroup_size = (primgroup_size / num_patches) * num_patches;
- /* SWITCH_ON_EOI must be set if PrimID is used.
- * If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */
+ /* SWITCH_ON_EOI must be set if PrimID is used. */
if ((sctx->tcs_shader.cso && sctx->tcs_shader.cso->info.uses_primid) ||
- sctx->tes_shader.cso->info.uses_primid) {
+ sctx->tes_shader.cso->info.uses_primid)
ia_switch_on_eoi = true;
- partial_es_wave = true;
- }
/* Bug with tessellation and GS on Bonaire and older 2 SE chips. */
if ((sctx->b.family == CHIP_TAHITI ||
@@ -269,10 +267,6 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
wd_switch_on_eop = true;
}
- if (sctx->b.streamout.streamout_enabled ||
- sctx->b.streamout.prims_gen_query_enabled)
- partial_vs_wave = true;
-
if (sctx->b.chip_class >= CIK) {
/* WD_SWITCH_ON_EOP has no effect on GPUs with less than
* 4 shader engines. Set 1 to pass the assertion below.
@@ -282,7 +276,8 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
prim == PIPE_PRIM_LINE_LOOP ||
prim == PIPE_PRIM_TRIANGLE_FAN ||
prim == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY ||
- info->primitive_restart)
+ info->primitive_restart ||
+ info->count_from_stream_output)
wd_switch_on_eop = true;
/* Hawaii hangs if instancing is enabled and WD_SWITCH_ON_EOP is 0.
@@ -292,14 +287,34 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
(info->indirect || info->instance_count > 1))
wd_switch_on_eop = true;
- /* USE_OPAQUE doesn't work when WD_SWITCH_ON_EOP is 0. */
- if (info->count_from_stream_output)
- wd_switch_on_eop = true;
+ /* Required on CIK and later. */
+ if (sctx->b.screen->info.max_se > 2 && !wd_switch_on_eop)
+ ia_switch_on_eoi = true;
+
+ /* Required by Hawaii and, for some special cases, by VI. */
+ if (ia_switch_on_eoi &&
+ (sctx->b.family == CHIP_HAWAII ||
+ (sctx->b.chip_class == VI &&
+ (sctx->gs_shader.cso || max_primgroup_in_wave != 2))))
+ partial_vs_wave = true;
+
+ /* Instancing bug on Bonaire. */
+ if (sctx->b.family == CHIP_BONAIRE && ia_switch_on_eoi &&
+ (info->indirect || info->instance_count > 1))
+ partial_vs_wave = true;
/* If the WD switch is false, the IA switch must be false too. */
assert(wd_switch_on_eop || !ia_switch_on_eop);
}
+ /* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */
+ if (ia_switch_on_eoi)
+ partial_es_wave = true;
+
+ /* GS requirement. */
+ if (SI_GS_PER_ES / primgroup_size >= sctx->screen->gs_table_depth - 3)
+ partial_es_wave = true;
+
/* Hw bug with single-primitive instances and SWITCH_ON_EOI
* on multi-SE chips. */
if (sctx->b.screen->info.max_se >= 2 && ia_switch_on_eoi &&
@@ -308,18 +323,14 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
u_prims_for_vertices(info->mode, info->count) <= 1)))
sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
- /* Instancing bug on 2 SE chips. */
- if (sctx->b.screen->info.max_se == 2 && ia_switch_on_eoi &&
- (info->indirect || info->instance_count > 1))
- partial_vs_wave = true;
-
return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) |
S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) |
S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) |
S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1) |
S_028AA8_WD_SWITCH_ON_EOP(sctx->b.chip_class >= CIK ? wd_switch_on_eop : 0) |
- S_028AA8_MAX_PRIMGRP_IN_WAVE(sctx->b.chip_class >= VI ? 2 : 0);
+ S_028AA8_MAX_PRIMGRP_IN_WAVE(sctx->b.chip_class >= VI ?
+ max_primgroup_in_wave : 0);
}
static unsigned si_get_ls_hs_config(struct si_context *sctx,
@@ -636,6 +647,17 @@ void si_emit_cache_flush(struct si_context *si_ctx, struct r600_atom *atom)
S_0085F0_CB5_DEST_BASE_ENA(1) |
S_0085F0_CB6_DEST_BASE_ENA(1) |
S_0085F0_CB7_DEST_BASE_ENA(1);
+
+ /* Necessary for DCC */
+ if (sctx->chip_class >= VI) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0) | compute);
+ radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_DATA_TS) |
+ EVENT_INDEX(5));
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ }
}
if (sctx->flags & SI_CONTEXT_FLUSH_AND_INV_DB) {
cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) |
@@ -728,6 +750,7 @@ static void si_get_draw_start_count(struct si_context *sctx,
void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
{
struct si_context *sctx = (struct si_context *)ctx;
+ struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
struct pipe_index_buffer ib = {};
unsigned mask;
@@ -735,7 +758,11 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
(info->indexed || !info->count_from_stream_output))
return;
- if (!sctx->ps_shader.cso || !sctx->vs_shader.cso) {
+ if (!sctx->vs_shader.cso) {
+ assert(0);
+ return;
+ }
+ if (!sctx->ps_shader.cso && (!rs || !rs->rasterizer_discard)) {
assert(0);
return;
}
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index eea00e0fafc..4a3a04caa52 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -799,11 +799,11 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
struct si_context *sctx = (struct si_context *)ctx;
struct si_shader_selector *sel = state;
- if (sctx->vs_shader.cso == sel || !sel)
+ if (sctx->vs_shader.cso == sel)
return;
sctx->vs_shader.cso = sel;
- sctx->vs_shader.current = sel->first_variant;
+ sctx->vs_shader.current = sel ? sel->first_variant : NULL;
si_mark_atom_dirty(sctx, &sctx->clip_regs);
si_update_viewports_and_scissors(sctx);
}
@@ -864,16 +864,6 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
si_update_viewports_and_scissors(sctx);
}
-static void si_make_dummy_ps(struct si_context *sctx)
-{
- if (!sctx->dummy_pixel_shader) {
- sctx->dummy_pixel_shader =
- util_make_fragment_cloneinput_shader(&sctx->b.b, 0,
- TGSI_SEMANTIC_GENERIC,
- TGSI_INTERPOLATE_CONSTANT);
- }
-}
-
static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
@@ -883,14 +873,8 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
if (sctx->ps_shader.cso == sel)
return;
- /* use a dummy shader if binding a NULL shader */
- if (!sel) {
- si_make_dummy_ps(sctx);
- sel = sctx->dummy_pixel_shader;
- }
-
sctx->ps_shader.cso = sel;
- sctx->ps_shader.current = sel->first_variant;
+ sctx->ps_shader.current = sel ? sel->first_variant : NULL;
si_mark_atom_dirty(sctx, &sctx->cb_target_mask);
}
@@ -956,13 +940,15 @@ static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom)
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
struct si_shader *ps = sctx->ps_shader.current;
struct si_shader *vs = si_get_vs_state(sctx);
- struct tgsi_shader_info *psinfo = &ps->selector->info;
+ struct tgsi_shader_info *psinfo;
struct tgsi_shader_info *vsinfo = &vs->selector->info;
unsigned i, j, tmp, num_written = 0;
- if (!ps->nparam)
+ if (!ps || !ps->nparam)
return;
+ psinfo = &ps->selector->info;
+
radeon_set_context_reg_seq(cs, R_028644_SPI_PS_INPUT_CNTL_0, ps->nparam);
for (i = 0; i < psinfo->num_inputs; i++) {
@@ -1025,7 +1011,12 @@ static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom
{
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
struct si_shader *ps = sctx->ps_shader.current;
- unsigned input_ena = ps->spi_ps_input_ena;
+ unsigned input_ena;
+
+ if (!ps)
+ return;
+
+ input_ena = ps->spi_ps_input_ena;
/* we need to enable at least one of them, otherwise we hang the GPU */
assert(G_0286CC_PERSP_SAMPLE_ENA(input_ena) ||
@@ -1531,23 +1522,38 @@ bool si_update_shaders(struct si_context *sctx)
si_update_vgt_shader_config(sctx);
- r = si_shader_select(ctx, &sctx->ps_shader);
- if (r)
- return false;
- si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4);
-
- if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
- sctx->sprite_coord_enable != rs->sprite_coord_enable ||
- sctx->flatshade != rs->flatshade) {
- sctx->sprite_coord_enable = rs->sprite_coord_enable;
- sctx->flatshade = rs->flatshade;
- si_mark_atom_dirty(sctx, &sctx->spi_map);
- }
+ if (sctx->ps_shader.cso) {
+ r = si_shader_select(ctx, &sctx->ps_shader);
+ if (r)
+ return false;
+ si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4);
+
+ if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
+ sctx->sprite_coord_enable != rs->sprite_coord_enable ||
+ sctx->flatshade != rs->flatshade) {
+ sctx->sprite_coord_enable = rs->sprite_coord_enable;
+ sctx->flatshade = rs->flatshade;
+ si_mark_atom_dirty(sctx, &sctx->spi_map);
+ }
+
+ if (si_pm4_state_changed(sctx, ps) ||
+ sctx->force_persample_interp != rs->force_persample_interp) {
+ sctx->force_persample_interp = rs->force_persample_interp;
+ si_mark_atom_dirty(sctx, &sctx->spi_ps_input);
+ }
+
+ if (sctx->ps_db_shader_control != sctx->ps_shader.current->db_shader_control) {
+ sctx->ps_db_shader_control = sctx->ps_shader.current->db_shader_control;
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
+ }
+
+ if (sctx->smoothing_enabled != sctx->ps_shader.current->key.ps.poly_line_smoothing) {
+ sctx->smoothing_enabled = sctx->ps_shader.current->key.ps.poly_line_smoothing;
+ si_mark_atom_dirty(sctx, &sctx->msaa_config);
- if (si_pm4_state_changed(sctx, ps) ||
- sctx->force_persample_interp != rs->force_persample_interp) {
- sctx->force_persample_interp = rs->force_persample_interp;
- si_mark_atom_dirty(sctx, &sctx->spi_ps_input);
+ if (sctx->b.chip_class == SI)
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
+ }
}
if (si_pm4_state_changed(sctx, ls) ||
@@ -1559,19 +1565,6 @@ bool si_update_shaders(struct si_context *sctx)
if (!si_update_spi_tmpring_size(sctx))
return false;
}
-
- if (sctx->ps_db_shader_control != sctx->ps_shader.current->db_shader_control) {
- sctx->ps_db_shader_control = sctx->ps_shader.current->db_shader_control;
- si_mark_atom_dirty(sctx, &sctx->db_render_state);
- }
-
- if (sctx->smoothing_enabled != sctx->ps_shader.current->key.ps.poly_line_smoothing) {
- sctx->smoothing_enabled = sctx->ps_shader.current->key.ps.poly_line_smoothing;
- si_mark_atom_dirty(sctx, &sctx->msaa_config);
-
- if (sctx->b.chip_class == SI)
- si_mark_atom_dirty(sctx, &sctx->db_render_state);
- }
return true;
}
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index e7006d2fa0d..c0fc82b2f2c 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -249,6 +249,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 0;
}
/* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c
index d7a3360713f..23ec4ef3cb6 100644
--- a/src/gallium/drivers/softpipe/sp_state_sampler.c
+++ b/src/gallium/drivers/softpipe/sp_state_sampler.c
@@ -214,10 +214,10 @@ prepare_shader_sampling(
row_stride[j] = sp_tex->stride[j];
img_stride[j] = sp_tex->img_stride[j];
}
- if (view->target == PIPE_TEXTURE_1D_ARRAY ||
- view->target == PIPE_TEXTURE_2D_ARRAY ||
- view->target == PIPE_TEXTURE_CUBE ||
- view->target == PIPE_TEXTURE_CUBE_ARRAY) {
+ if (tex->target == PIPE_TEXTURE_1D_ARRAY ||
+ tex->target == PIPE_TEXTURE_2D_ARRAY ||
+ tex->target == PIPE_TEXTURE_CUBE ||
+ tex->target == PIPE_TEXTURE_CUBE_ARRAY) {
num_layers = view->u.tex.last_layer - view->u.tex.first_layer + 1;
for (j = first_level; j <= last_level; j++) {
mip_offsets[j] += view->u.tex.first_layer *
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 8a0935062b6..e3e28a3ef32 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -1033,6 +1033,7 @@ img_filter_2d_linear_repeat_POT(const struct sp_sampler_view *sp_sview,
addr.value = 0;
addr.bits.level = args->level;
+ addr.bits.z = sp_sview->base.u.tex.first_layer;
/* Can we fetch all four at once:
*/
@@ -1081,6 +1082,7 @@ img_filter_2d_nearest_repeat_POT(const struct sp_sampler_view *sp_sview,
addr.value = 0;
addr.bits.level = args->level;
+ addr.bits.z = sp_sview->base.u.tex.first_layer;
out = get_texel_2d_no_border(sp_sview, addr, x0, y0);
for (c = 0; c < TGSI_QUAD_SIZE; c++)
@@ -1111,6 +1113,7 @@ img_filter_2d_nearest_clamp_POT(const struct sp_sampler_view *sp_sview,
addr.value = 0;
addr.bits.level = args->level;
+ addr.bits.z = sp_sview->base.u.tex.first_layer;
x0 = util_ifloor(u);
if (x0 < 0)
@@ -1154,7 +1157,8 @@ img_filter_1d_nearest(const struct sp_sampler_view *sp_sview,
sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
- out = get_texel_2d(sp_sview, sp_samp, addr, x, 0);
+ out = get_texel_1d_array(sp_sview, sp_samp, addr, x,
+ sp_sview->base.u.tex.first_layer);
for (c = 0; c < TGSI_QUAD_SIZE; c++)
rgba[TGSI_NUM_CHANNELS*c] = out[c];
@@ -1215,6 +1219,7 @@ img_filter_2d_nearest(const struct sp_sampler_view *sp_sview,
addr.value = 0;
addr.bits.level = args->level;
+ addr.bits.z = sp_sview->base.u.tex.first_layer;
sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
@@ -1396,8 +1401,10 @@ img_filter_1d_linear(const struct sp_sampler_view *sp_sview,
sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw);
- tx0 = get_texel_2d(sp_sview, sp_samp, addr, x0, 0);
- tx1 = get_texel_2d(sp_sview, sp_samp, addr, x1, 0);
+ tx0 = get_texel_1d_array(sp_sview, sp_samp, addr, x0,
+ sp_sview->base.u.tex.first_layer);
+ tx1 = get_texel_1d_array(sp_sview, sp_samp, addr, x1,
+ sp_sview->base.u.tex.first_layer);
/* interpolate R, G, B, A */
for (c = 0; c < TGSI_QUAD_SIZE; c++)
@@ -1523,6 +1530,7 @@ img_filter_2d_linear(const struct sp_sampler_view *sp_sview,
addr.value = 0;
addr.bits.level = args->level;
+ addr.bits.z = sp_sview->base.u.tex.first_layer;
sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw);
sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
@@ -3252,10 +3260,22 @@ sp_get_texels(const struct sp_sampler_view *sp_sview,
switch (sp_sview->base.target) {
case PIPE_BUFFER:
+ for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+ const int x = CLAMP(v_i[j] + offset[0] +
+ sp_sview->base.u.buf.first_element,
+ sp_sview->base.u.buf.first_element,
+ sp_sview->base.u.buf.last_element);
+ tx = get_texel_2d_no_border(sp_sview, addr, x, 0);
+ for (c = 0; c < 4; c++) {
+ rgba[c][j] = tx[c];
+ }
+ }
+ break;
case PIPE_TEXTURE_1D:
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
const int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
- tx = get_texel_2d_no_border(sp_sview, addr, x, 0);
+ tx = get_texel_2d_no_border(sp_sview, addr, x,
+ sp_sview->base.u.tex.first_layer);
for (c = 0; c < 4; c++) {
rgba[c][j] = tx[c];
}
@@ -3277,7 +3297,8 @@ sp_get_texels(const struct sp_sampler_view *sp_sview,
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
const int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
const int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
- tx = get_texel_2d_no_border(sp_sview, addr, x, y);
+ tx = get_texel_3d_no_border(sp_sview, addr, x, y,
+ sp_sview->base.u.tex.first_layer);
for (c = 0; c < 4; c++) {
rgba[c][j] = tx[c];
}
@@ -3307,6 +3328,7 @@ sp_get_texels(const struct sp_sampler_view *sp_sview,
}
break;
case PIPE_TEXTURE_CUBE: /* TXF can't work on CUBE according to spec */
+ case PIPE_TEXTURE_CUBE_ARRAY:
default:
assert(!"Unknown or CUBE texture type in TXF processing\n");
break;
diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c
index e1ea5df24ca..3347f5f1883 100644
--- a/src/gallium/drivers/softpipe/sp_texture.c
+++ b/src/gallium/drivers/softpipe/sp_texture.c
@@ -127,7 +127,8 @@ softpipe_can_create_resource(struct pipe_screen *screen,
*/
static boolean
softpipe_displaytarget_layout(struct pipe_screen *screen,
- struct softpipe_resource *spr)
+ struct softpipe_resource *spr,
+ const void *map_front_private)
{
struct sw_winsys *winsys = softpipe_screen(screen)->winsys;
@@ -139,6 +140,7 @@ softpipe_displaytarget_layout(struct pipe_screen *screen,
spr->base.width0,
spr->base.height0,
64,
+ map_front_private,
&spr->stride[0] );
return spr->dt != NULL;
@@ -149,8 +151,9 @@ softpipe_displaytarget_layout(struct pipe_screen *screen,
* Create new pipe_resource given the template information.
*/
static struct pipe_resource *
-softpipe_resource_create(struct pipe_screen *screen,
- const struct pipe_resource *templat)
+softpipe_resource_create_front(struct pipe_screen *screen,
+ const struct pipe_resource *templat,
+ const void *map_front_private)
{
struct softpipe_resource *spr = CALLOC_STRUCT(softpipe_resource);
if (!spr)
@@ -169,7 +172,7 @@ softpipe_resource_create(struct pipe_screen *screen,
if (spr->base.bind & (PIPE_BIND_DISPLAY_TARGET |
PIPE_BIND_SCANOUT |
PIPE_BIND_SHARED)) {
- if (!softpipe_displaytarget_layout(screen, spr))
+ if (!softpipe_displaytarget_layout(screen, spr, map_front_private))
goto fail;
}
else {
@@ -184,6 +187,12 @@ softpipe_resource_create(struct pipe_screen *screen,
return NULL;
}
+static struct pipe_resource *
+softpipe_resource_create(struct pipe_screen *screen,
+ const struct pipe_resource *templat)
+{
+ return softpipe_resource_create_front(screen, templat, NULL);
+}
static void
softpipe_resource_destroy(struct pipe_screen *pscreen,
@@ -514,6 +523,7 @@ void
softpipe_init_screen_texture_funcs(struct pipe_screen *screen)
{
screen->resource_create = softpipe_resource_create;
+ screen->resource_create_front = softpipe_resource_create_front;
screen->resource_destroy = softpipe_resource_destroy;
screen->resource_from_handle = softpipe_resource_from_handle;
screen->resource_get_handle = softpipe_resource_get_handle;
diff --git a/src/gallium/drivers/svga/svga_cmd.c b/src/gallium/drivers/svga/svga_cmd.c
index d3cf52f08e2..0e1e332d6cb 100644
--- a/src/gallium/drivers/svga/svga_cmd.c
+++ b/src/gallium/drivers/svga/svga_cmd.c
@@ -1016,6 +1016,8 @@ SVGA3D_BeginDrawPrimitives(struct svga_winsys_context *swc,
*decls = declArray;
*ranges = rangeArray;
+ swc->hints |= SVGA_HINT_FLAG_DRAW_EMITTED;
+
return PIPE_OK;
}
diff --git a/src/gallium/drivers/svga/svga_cmd_vgpu10.c b/src/gallium/drivers/svga/svga_cmd_vgpu10.c
index 596ba953cd2..5c121089f91 100644
--- a/src/gallium/drivers/svga/svga_cmd_vgpu10.c
+++ b/src/gallium/drivers/svga/svga_cmd_vgpu10.c
@@ -535,6 +535,7 @@ SVGA3D_vgpu10_Draw(struct svga_winsys_context *swc,
SVGA3D_COPY_BASIC_2(vertexCount, startVertexLocation);
+ swc->hints |= SVGA_HINT_FLAG_DRAW_EMITTED;
swc->commit(swc);
return PIPE_OK;
}
@@ -550,6 +551,7 @@ SVGA3D_vgpu10_DrawIndexed(struct svga_winsys_context *swc,
SVGA3D_COPY_BASIC_3(indexCount, startIndexLocation,
baseVertexLocation);
+ swc->hints |= SVGA_HINT_FLAG_DRAW_EMITTED;
swc->commit(swc);
return PIPE_OK;
}
@@ -566,6 +568,7 @@ SVGA3D_vgpu10_DrawInstanced(struct svga_winsys_context *swc,
SVGA3D_COPY_BASIC_4(vertexCountPerInstance, instanceCount,
startVertexLocation, startInstanceLocation);
+ swc->hints |= SVGA_HINT_FLAG_DRAW_EMITTED;
swc->commit(swc);
return PIPE_OK;
}
@@ -584,6 +587,8 @@ SVGA3D_vgpu10_DrawIndexedInstanced(struct svga_winsys_context *swc,
startIndexLocation, baseVertexLocation,
startInstanceLocation);
+
+ swc->hints |= SVGA_HINT_FLAG_DRAW_EMITTED;
swc->commit(swc);
return PIPE_OK;
}
@@ -593,6 +598,7 @@ SVGA3D_vgpu10_DrawAuto(struct svga_winsys_context *swc)
{
SVGA3D_CREATE_COMMAND(DrawAuto, DRAW_AUTO);
+ swc->hints |= SVGA_HINT_FLAG_DRAW_EMITTED;
swc->commit(swc);
return PIPE_OK;
}
diff --git a/src/gallium/drivers/svga/svga_draw_arrays.c b/src/gallium/drivers/svga/svga_draw_arrays.c
index 5635411d938..caf4b17de16 100644
--- a/src/gallium/drivers/svga/svga_draw_arrays.c
+++ b/src/gallium/drivers/svga/svga_draw_arrays.c
@@ -32,6 +32,7 @@
#include "svga_draw.h"
#include "svga_draw_private.h"
#include "svga_context.h"
+#include "svga_shader.h"
#define DBG 0
@@ -206,6 +207,32 @@ svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
unsigned gen_prim, gen_size, gen_nr, gen_type;
u_generate_func gen_func;
enum pipe_error ret = PIPE_OK;
+ unsigned api_pv = hwtnl->api_pv;
+ struct svga_context *svga = hwtnl->svga;
+
+ if (svga->curr.rast->templ.flatshade &&
+ svga->state.hw_draw.fs->constant_color_output) {
+ /* The fragment color is a constant, not per-vertex so the whole
+ * primitive will be the same color (except for possible blending).
+ * We can ignore the current provoking vertex state and use whatever
+ * the hardware wants.
+ */
+ api_pv = hwtnl->hw_pv;
+
+ if (hwtnl->api_fillmode == PIPE_POLYGON_MODE_FILL) {
+ /* Do some simple primitive conversions to avoid index buffer
+ * generation below. Note that polygons and quads are not directly
+ * supported by the svga device. Also note, we can only do this
+ * for flat/constant-colored rendering because of provoking vertex.
+ */
+ if (prim == PIPE_PRIM_POLYGON) {
+ prim = PIPE_PRIM_TRIANGLE_FAN;
+ }
+ else if (prim == PIPE_PRIM_QUADS && count == 4) {
+ prim = PIPE_PRIM_TRIANGLE_FAN;
+ }
+ }
+ }
if (hwtnl->api_fillmode != PIPE_POLYGON_MODE_FILL &&
prim >= PIPE_PRIM_TRIANGLES) {
@@ -226,7 +253,7 @@ svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
prim,
start,
count,
- hwtnl->api_pv,
+ api_pv,
hwtnl->hw_pv,
&gen_prim, &gen_size, &gen_nr, &gen_func);
}
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index f6fafca5c0b..5aa7b0d86eb 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -382,6 +382,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 0;
}
diff --git a/src/gallium/drivers/svga/svga_shader.h b/src/gallium/drivers/svga/svga_shader.h
index efcac408626..f49fdb46d0e 100644
--- a/src/gallium/drivers/svga/svga_shader.h
+++ b/src/gallium/drivers/svga/svga_shader.h
@@ -155,6 +155,9 @@ struct svga_shader_variant
* applied to any of the varyings.
*/
+ /** Is the color output just a constant value? (fragment shader only) */
+ boolean constant_color_output;
+
/** For FS-based polygon stipple */
unsigned pstipple_sampler_unit;
diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c
index 202eee276b7..4c16f4313a0 100644
--- a/src/gallium/drivers/svga/svga_tgsi.c
+++ b/src/gallium/drivers/svga/svga_tgsi.c
@@ -240,6 +240,13 @@ svga_tgsi_vgpu9_translate(struct svga_context *svga,
variant->pstipple_sampler_unit = emit.pstipple_sampler_unit;
+ /* If there was exactly one write to a fragment shader output register
+ * and it came from a constant buffer, we know all fragments will have
+ * the same color (except for blending).
+ */
+ variant->constant_color_output =
+ emit.constant_color_output && emit.num_output_writes == 1;
+
#if 0
if (!svga_shader_verify(variant->tokens, variant->nr_tokens) ||
SVGA_DEBUG & DEBUG_TGSI) {
diff --git a/src/gallium/drivers/svga/svga_tgsi_emit.h b/src/gallium/drivers/svga/svga_tgsi_emit.h
index 0b82483ab2e..83f0c8bd4d0 100644
--- a/src/gallium/drivers/svga/svga_tgsi_emit.h
+++ b/src/gallium/drivers/svga/svga_tgsi_emit.h
@@ -84,6 +84,9 @@ struct svga_shader_emitter
int dynamic_branching_level;
+ unsigned num_output_writes;
+ boolean constant_color_output;
+
boolean in_main_func;
boolean created_common_immediate;
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c
index 00c91a4fa61..dbb90f7654e 100644
--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -99,6 +99,7 @@ translate_dst_register( struct svga_shader_emitter *emit,
* Need to lookup a table built at decl time:
*/
dest = emit->output_map[reg->Register.Index];
+ emit->num_output_writes++;
break;
default:
@@ -2103,6 +2104,29 @@ emit_simple_instruction(struct svga_shader_emitter *emit,
/**
+ * TGSI_OPCODE_MOVE is only special-cased here to detect the
+ * svga_fragment_shader::constant_color_output case.
+ */
+static boolean
+emit_mov(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn)
+{
+ const struct tgsi_full_src_register *src = &insn->Src[0];
+ const struct tgsi_full_dst_register *dst = &insn->Dst[0];
+
+ if (emit->unit == PIPE_SHADER_FRAGMENT &&
+ dst->Register.File == TGSI_FILE_OUTPUT &&
+ dst->Register.Index == 0 &&
+ src->Register.File == TGSI_FILE_CONSTANT &&
+ !src->Register.Indirect) {
+ emit->constant_color_output = TRUE;
+ }
+
+ return emit_simple_instruction(emit, SVGA3DOP_MOV, insn);
+}
+
+
+/**
* Translate/emit TGSI DDX, DDY instructions.
*/
static boolean
@@ -3045,6 +3069,9 @@ svga_emit_instruction(struct svga_shader_emitter *emit,
case TGSI_OPCODE_SSG:
return emit_ssg( emit, insn );
+ case TGSI_OPCODE_MOV:
+ return emit_mov( emit, insn );
+
default:
{
unsigned opcode = translate_opcode(insn->Instruction.Opcode);
diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
index d62f2bbcc96..e70ee689c59 100644
--- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
+++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
@@ -202,6 +202,9 @@ struct svga_shader_emitter_v10
/* user clip plane constant slot indexes */
unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES];
+ unsigned num_output_writes;
+ boolean constant_color_output;
+
boolean uses_flat_interp;
/* For all shaders: const reg index for RECT coord scaling */
@@ -913,6 +916,8 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit,
*/
assert(sem_name == TGSI_SEMANTIC_COLOR);
index = emit->info.output_semantic_index[index];
+
+ emit->num_output_writes++;
}
}
}
@@ -3097,7 +3102,7 @@ emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit)
unsigned i;
unsigned clip_plane_enable = emit->key.clip_plane_enable;
unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index;
- unsigned num_written_clipdist = emit->info.num_written_clipdistance;
+ int num_written_clipdist = emit->info.num_written_clipdistance;
assert(emit->clip_dist_out_index != INVALID_INDEX);
assert(emit->clip_dist_tmp_index != INVALID_INDEX);
@@ -3109,7 +3114,7 @@ emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit)
*/
emit->clip_dist_tmp_index = INVALID_INDEX;
- for (i = 0; i < 2 && num_written_clipdist; i++, num_written_clipdist-=4) {
+ for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) {
tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i);
@@ -5573,6 +5578,29 @@ emit_simple(struct svga_shader_emitter_v10 *emit,
/**
+ * We only special case the MOV instruction to try to detect constant
+ * color writes in the fragment shader.
+ */
+static boolean
+emit_mov(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ const struct tgsi_full_src_register *src = &inst->Src[0];
+ const struct tgsi_full_dst_register *dst = &inst->Dst[0];
+
+ if (emit->unit == PIPE_SHADER_FRAGMENT &&
+ dst->Register.File == TGSI_FILE_OUTPUT &&
+ dst->Register.Index == 0 &&
+ src->Register.File == TGSI_FILE_CONSTANT &&
+ !src->Register.Indirect) {
+ emit->constant_color_output = TRUE;
+ }
+
+ return emit_simple(emit, inst);
+}
+
+
+/**
* Emit a simple VGPU10 instruction which writes to multiple dest registers,
* where TGSI only uses one dest register.
*/
@@ -5652,7 +5680,6 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
case TGSI_OPCODE_MAD:
case TGSI_OPCODE_MAX:
case TGSI_OPCODE_MIN:
- case TGSI_OPCODE_MOV:
case TGSI_OPCODE_MUL:
case TGSI_OPCODE_NOP:
case TGSI_OPCODE_NOT:
@@ -5677,7 +5704,8 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
/* simple instructions */
return emit_simple(emit, inst);
-
+ case TGSI_OPCODE_MOV:
+ return emit_mov(emit, inst);
case TGSI_OPCODE_EMIT:
return emit_vertex(emit, inst);
case TGSI_OPCODE_ENDPRIM:
@@ -6762,6 +6790,13 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga,
variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit;
+ /* If there was exactly one write to a fragment shader output register
+ * and it came from a constant buffer, we know all fragments will have
+ * the same color (except for blending).
+ */
+ variant->constant_color_output =
+ emit->constant_color_output && emit->num_output_writes == 1;
+
/** keep track in the variant if flat interpolation is used
* for any of the varyings.
*/
diff --git a/src/gallium/drivers/svga/svga_winsys.h b/src/gallium/drivers/svga/svga_winsys.h
index c750603989f..3129e46ed06 100644
--- a/src/gallium/drivers/svga/svga_winsys.h
+++ b/src/gallium/drivers/svga/svga_winsys.h
@@ -85,6 +85,8 @@ struct winsys_handle;
#define SVGA_QUERY_FLAG_SET (1 << 0)
#define SVGA_QUERY_FLAG_REF (1 << 1)
+#define SVGA_HINT_FLAG_DRAW_EMITTED (1 << 0)
+
/** Opaque surface handle */
struct svga_winsys_surface;
@@ -213,6 +215,11 @@ struct svga_winsys_context
uint32 cid;
/**
+ * Flags to hint the current context state
+ */
+ uint32 hints;
+
+ /**
** BEGIN new functions for guest-backed surfaces.
**/
diff --git a/src/gallium/drivers/vc4/vc4_cl_dump.c b/src/gallium/drivers/vc4/vc4_cl_dump.c
index 6d748010baf..476d2b5b0b1 100644
--- a/src/gallium/drivers/vc4/vc4_cl_dump.c
+++ b/src/gallium/drivers/vc4/vc4_cl_dump.c
@@ -22,6 +22,7 @@
*/
#include "util/u_math.h"
+#include "util/u_prim.h"
#include "util/macros.h"
#include "vc4_context.h"
@@ -163,6 +164,26 @@ dump_VC4_PACKET_LOAD_TILE_BUFFER_GENERAL(void *cl, uint32_t offset, uint32_t hw_
}
static void
+dump_VC4_PACKET_GL_INDEXED_PRIMITIVE(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+ uint8_t *b = cl + offset;
+ uint32_t *count = cl + offset + 1;
+ uint32_t *ib_offset = cl + offset + 5;
+ uint32_t *max_index = cl + offset + 9;
+
+ fprintf(stderr, "0x%08x 0x%08x: 0x%02x %s %s\n",
+ offset, hw_offset,
+ b[0], (b[0] & VC4_INDEX_BUFFER_U16) ? "16-bit" : "8-bit",
+ u_prim_name(b[0] & 0x7));
+ fprintf(stderr, "0x%08x 0x%08x: %d verts\n",
+ offset + 1, hw_offset + 1, *count);
+ fprintf(stderr, "0x%08x 0x%08x: 0x%08x IB offset\n",
+ offset + 5, hw_offset + 5, *ib_offset);
+ fprintf(stderr, "0x%08x 0x%08x: 0x%08x max index\n",
+ offset + 9, hw_offset + 9, *max_index);
+}
+
+static void
dump_VC4_PACKET_FLAT_SHADE_FLAGS(void *cl, uint32_t offset, uint32_t hw_offset)
{
uint32_t *bits = cl + offset;
@@ -262,14 +283,14 @@ dump_VC4_PACKET_TILE_RENDERING_MODE_CONFIG(void *cl, uint32_t offset, uint32_t h
shorts[1]);
const char *format = "???";
- switch ((bytes[0] >> 2) & 3) {
- case 0:
+ switch (VC4_GET_FIELD(shorts[2], VC4_RENDER_CONFIG_FORMAT)) {
+ case VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED:
format = "BGR565_DITHERED";
break;
- case 1:
+ case VC4_RENDER_CONFIG_FORMAT_RGBA8888:
format = "RGBA8888";
break;
- case 2:
+ case VC4_RENDER_CONFIG_FORMAT_BGR565:
format = "BGR565";
break;
}
@@ -277,29 +298,31 @@ dump_VC4_PACKET_TILE_RENDERING_MODE_CONFIG(void *cl, uint32_t offset, uint32_t h
format = "64bit";
const char *tiling = "???";
- switch ((bytes[0] >> 6) & 3) {
- case 0:
+ switch (VC4_GET_FIELD(shorts[2], VC4_RENDER_CONFIG_MEMORY_FORMAT)) {
+ case VC4_TILING_FORMAT_LINEAR:
tiling = "linear";
break;
- case 1:
+ case VC4_TILING_FORMAT_T:
tiling = "T";
break;
- case 2:
+ case VC4_TILING_FORMAT_LT:
tiling = "LT";
break;
}
- fprintf(stderr, "0x%08x 0x%08x: 0x%02x %s %s %s\n",
+ fprintf(stderr, "0x%08x 0x%08x: 0x%02x %s %s %s %s\n",
offset + 8, hw_offset + 8,
bytes[0],
format, tiling,
- (bytes[0] & VC4_RENDER_CONFIG_MS_MODE_4X) ? "ms" : "ss");
+ (shorts[2] & VC4_RENDER_CONFIG_MS_MODE_4X) ? "ms" : "ss",
+ (shorts[2] & VC4_RENDER_CONFIG_DECIMATE_MODE_4X) ?
+ "ms_decimate" : "ss_decimate");
const char *earlyz = "";
- if (bytes[1] & (1 << 3)) {
+ if (shorts[2] & VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE) {
earlyz = "early_z disabled";
} else {
- if (bytes[1] & (1 << 2))
+ if (shorts[2] & VC4_RENDER_CONFIG_EARLY_Z_DIRECTION_G)
earlyz = "early_z >";
else
earlyz = "early_z <";
@@ -356,7 +379,7 @@ static const struct packet_info {
PACKET_DUMP(VC4_PACKET_STORE_TILE_BUFFER_GENERAL),
PACKET_DUMP(VC4_PACKET_LOAD_TILE_BUFFER_GENERAL),
- PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE),
+ PACKET_DUMP(VC4_PACKET_GL_INDEXED_PRIMITIVE),
PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE),
PACKET(VC4_PACKET_COMPRESSED_PRIMITIVE),
diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h
index c7698422951..86f2ce5e608 100644
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -250,10 +250,10 @@ struct vc4_context {
bool needs_flush;
/**
- * Set when needs_flush, and the queued rendering is not just composed
- * of full-buffer clears.
+ * Number of draw calls (not counting full buffer clears) queued in
+ * the current job.
*/
- bool draw_call_queued;
+ uint32_t draw_calls_queued;
/** Maximum index buffer valid for the current shader_rec. */
uint32_t max_index;
@@ -291,7 +291,10 @@ struct vc4_context {
struct vc4_vertex_stateobj *vtx;
- struct pipe_blend_color blend_color;
+ struct {
+ struct pipe_blend_color f;
+ uint8_t ub[4];
+ } blend_color;
struct pipe_stencil_ref stencil_ref;
unsigned sample_mask;
struct pipe_framebuffer_state framebuffer;
diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c
index a4e5e092b1a..624a236c573 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -25,6 +25,7 @@
#include "util/u_prim.h"
#include "util/u_format.h"
#include "util/u_pack_color.h"
+#include "util/u_upload_mgr.h"
#include "indices/u_primconvert.h"
#include "vc4_context.h"
@@ -100,7 +101,7 @@ vc4_start_draw(struct vc4_context *vc4)
VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES));
vc4->needs_flush = true;
- vc4->draw_call_queued = true;
+ vc4->draw_calls_queued++;
vc4->draw_width = width;
vc4->draw_height = height;
@@ -226,6 +227,38 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4, const struct pipe_draw_info *i
vc4->max_index = max_index;
}
+/**
+ * HW-2116 workaround: Flush the batch before triggering the hardware state
+ * counter wraparound behavior.
+ *
+ * State updates are tracked by a global counter which increments at the first
+ * state update after a draw or a START_BINNING. Tiles can then have their
+ * state updated at draw time with a set of cheap checks for whether the
+ * state's copy of the global counter matches the global counter the last time
+ * that state was written to the tile.
+ *
+ * The state counters are relatively small and wrap around quickly, so you
+ * could get false negatives for needing to update a particular state in the
+ * tile. To avoid this, the hardware attempts to write all of the state in
+ * the tile at wraparound time. This apparently is broken, so we just flush
+ * everything before that behavior is triggered. A batch flush is sufficient
+ * to get our current contents drawn and reset the counters to 0.
+ *
+ * Note that we can't just use VC4_PACKET_FLUSH_ALL, because that caps the
+ * tiles with VC4_PACKET_RETURN_FROM_LIST.
+ */
+static void
+vc4_hw_2116_workaround(struct pipe_context *pctx)
+{
+ struct vc4_context *vc4 = vc4_context(pctx);
+
+ if (vc4->draw_calls_queued == 0x1ef0) {
+ perf_debug("Flushing batch due to HW-2116 workaround "
+ "(too many draw calls per scene\n");
+ vc4_flush(pctx);
+ }
+}
+
static void
vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
{
@@ -244,6 +277,8 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
vc4_update_shadow_textures(pctx, &vc4->verttex);
vc4_update_shadow_textures(pctx, &vc4->fragtex);
+ vc4_hw_2116_workaround(pctx);
+
vc4_get_draw_cl_space(vc4);
if (vc4->prim_mode != info->mode) {
@@ -285,7 +320,15 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
info->count, &offset);
index_size = 2;
} else {
- prsc = vc4->indexbuf.buffer;
+ if (vc4->indexbuf.user_buffer) {
+ prsc = NULL;
+ u_upload_data(vc4->uploader, 0,
+ info->count * index_size,
+ vc4->indexbuf.user_buffer,
+ &offset, &prsc);
+ } else {
+ prsc = vc4->indexbuf.buffer;
+ }
}
struct vc4_resource *rsc = vc4_resource(prsc);
@@ -300,7 +343,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
cl_reloc(vc4, &vc4->bcl, &bcl, rsc->bo, offset);
cl_u32(&bcl, vc4->max_index);
- if (vc4->indexbuf.index_size == 4)
+ if (vc4->indexbuf.index_size == 4 || vc4->indexbuf.user_buffer)
pipe_resource_reference(&prsc, NULL);
} else {
cl_u8(&bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
@@ -343,8 +386,8 @@ vc4_clear(struct pipe_context *pctx, unsigned buffers,
/* We can't flag new buffers for clearing once we've queued draws. We
* could avoid this by using the 3d engine to clear.
*/
- if (vc4->draw_call_queued) {
- perf_debug("Flushing rendering to process new clear.");
+ if (vc4->draw_calls_queued) {
+ perf_debug("Flushing rendering to process new clear.\n");
vc4_flush(pctx);
}
diff --git a/src/gallium/drivers/vc4/vc4_job.c b/src/gallium/drivers/vc4/vc4_job.c
index 7ebd9f160eb..9ad79c2ea10 100644
--- a/src/gallium/drivers/vc4/vc4_job.c
+++ b/src/gallium/drivers/vc4/vc4_job.c
@@ -55,7 +55,7 @@ vc4_job_reset(struct vc4_context *vc4)
vc4->shader_rec_count = 0;
vc4->needs_flush = false;
- vc4->draw_call_queued = false;
+ vc4->draw_calls_queued = 0;
/* We have no hardware context saved between our draw calls, so we
* need to flag the next draw as needing all state emitted. Emitting
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
index 17b524653bb..373c9e12d11 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
@@ -86,11 +86,11 @@ vc4_nir_srgb_encode(nir_builder *b, nir_ssa_def *linear)
}
static nir_ssa_def *
-vc4_blend_channel(nir_builder *b,
- nir_ssa_def **src,
- nir_ssa_def **dst,
- unsigned factor,
- int channel)
+vc4_blend_channel_f(nir_builder *b,
+ nir_ssa_def **src,
+ nir_ssa_def **dst,
+ unsigned factor,
+ int channel)
{
switch(factor) {
case PIPE_BLENDFACTOR_ONE:
@@ -146,8 +146,75 @@ vc4_blend_channel(nir_builder *b,
}
static nir_ssa_def *
-vc4_blend_func(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
- unsigned func)
+vc4_nir_set_packed_chan(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1,
+ int chan)
+{
+ unsigned chan_mask = 0xff << (chan * 8);
+ return nir_ior(b,
+ nir_iand(b, src0, nir_imm_int(b, ~chan_mask)),
+ nir_iand(b, src1, nir_imm_int(b, chan_mask)));
+}
+
+static nir_ssa_def *
+vc4_blend_channel_i(nir_builder *b,
+ nir_ssa_def *src,
+ nir_ssa_def *dst,
+ nir_ssa_def *src_a,
+ nir_ssa_def *dst_a,
+ unsigned factor,
+ int a_chan)
+{
+ switch (factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ return nir_imm_int(b, ~0);
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ return src;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return src_a;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return dst_a;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ return dst;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ return vc4_nir_set_packed_chan(b,
+ nir_umin_4x8(b,
+ src_a,
+ nir_inot(b, dst_a)),
+ nir_imm_int(b, ~0),
+ a_chan);
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_RGBA);
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_AAAA);
+ case PIPE_BLENDFACTOR_ZERO:
+ return nir_imm_int(b, 0);
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ return nir_inot(b, src);
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ return nir_inot(b, src_a);
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ return nir_inot(b, dst_a);
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ return nir_inot(b, dst);
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ return nir_inot(b, vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_RGBA));
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ return nir_inot(b, vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_AAAA));
+
+ default:
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ /* Unsupported. */
+ fprintf(stderr, "Unknown blend factor %d\n", factor);
+ return nir_imm_int(b, ~0);
+ }
+}
+
+static nir_ssa_def *
+vc4_blend_func_f(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
+ unsigned func)
{
switch (func) {
case PIPE_BLEND_ADD:
@@ -169,9 +236,33 @@ vc4_blend_func(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
}
}
+static nir_ssa_def *
+vc4_blend_func_i(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
+ unsigned func)
+{
+ switch (func) {
+ case PIPE_BLEND_ADD:
+ return nir_usadd_4x8(b, src, dst);
+ case PIPE_BLEND_SUBTRACT:
+ return nir_ussub_4x8(b, src, dst);
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return nir_ussub_4x8(b, dst, src);
+ case PIPE_BLEND_MIN:
+ return nir_umin_4x8(b, src, dst);
+ case PIPE_BLEND_MAX:
+ return nir_umax_4x8(b, src, dst);
+
+ default:
+ /* Unsupported. */
+ fprintf(stderr, "Unknown blend func %d\n", func);
+ return src;
+
+ }
+}
+
static void
-vc4_do_blending(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
- nir_ssa_def **src_color, nir_ssa_def **dst_color)
+vc4_do_blending_f(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
+ nir_ssa_def **src_color, nir_ssa_def **dst_color)
{
struct pipe_rt_blend_state *blend = &c->fs_key->blend;
@@ -192,20 +283,106 @@ vc4_do_blending(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
int dst_factor = ((i != 3) ? blend->rgb_dst_factor :
blend->alpha_dst_factor);
src_blend[i] = nir_fmul(b, src_color[i],
- vc4_blend_channel(b,
- src_color, dst_color,
- src_factor, i));
+ vc4_blend_channel_f(b,
+ src_color, dst_color,
+ src_factor, i));
dst_blend[i] = nir_fmul(b, dst_color[i],
- vc4_blend_channel(b,
- src_color, dst_color,
- dst_factor, i));
+ vc4_blend_channel_f(b,
+ src_color, dst_color,
+ dst_factor, i));
}
for (int i = 0; i < 4; i++) {
- result[i] = vc4_blend_func(b, src_blend[i], dst_blend[i],
- ((i != 3) ? blend->rgb_func :
- blend->alpha_func));
+ result[i] = vc4_blend_func_f(b, src_blend[i], dst_blend[i],
+ ((i != 3) ? blend->rgb_func :
+ blend->alpha_func));
+ }
+}
+
+static nir_ssa_def *
+vc4_nir_splat(nir_builder *b, nir_ssa_def *src)
+{
+ nir_ssa_def *or1 = nir_ior(b, src, nir_ishl(b, src, nir_imm_int(b, 8)));
+ return nir_ior(b, or1, nir_ishl(b, or1, nir_imm_int(b, 16)));
+}
+
+static nir_ssa_def *
+vc4_do_blending_i(struct vc4_compile *c, nir_builder *b,
+ nir_ssa_def *src_color, nir_ssa_def *dst_color,
+ nir_ssa_def *src_float_a)
+{
+ struct pipe_rt_blend_state *blend = &c->fs_key->blend;
+
+ if (!blend->blend_enable)
+ return src_color;
+
+ enum pipe_format color_format = c->fs_key->color_format;
+ const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
+ nir_ssa_def *imm_0xff = nir_imm_int(b, 0xff);
+ nir_ssa_def *src_a = nir_pack_unorm_4x8(b, src_float_a);
+ nir_ssa_def *dst_a;
+ int alpha_chan;
+ for (alpha_chan = 0; alpha_chan < 4; alpha_chan++) {
+ if (format_swiz[alpha_chan] == 3)
+ break;
+ }
+ if (alpha_chan != 4) {
+ nir_ssa_def *shift = nir_imm_int(b, alpha_chan * 8);
+ dst_a = vc4_nir_splat(b, nir_iand(b, nir_ushr(b, dst_color,
+ shift), imm_0xff));
+ } else {
+ dst_a = nir_imm_int(b, ~0);
+ }
+
+ nir_ssa_def *src_factor = vc4_blend_channel_i(b,
+ src_color, dst_color,
+ src_a, dst_a,
+ blend->rgb_src_factor,
+ alpha_chan);
+ nir_ssa_def *dst_factor = vc4_blend_channel_i(b,
+ src_color, dst_color,
+ src_a, dst_a,
+ blend->rgb_dst_factor,
+ alpha_chan);
+
+ if (alpha_chan != 4 &&
+ blend->alpha_src_factor != blend->rgb_src_factor) {
+ nir_ssa_def *src_alpha_factor =
+ vc4_blend_channel_i(b,
+ src_color, dst_color,
+ src_a, dst_a,
+ blend->alpha_src_factor,
+ alpha_chan);
+ src_factor = vc4_nir_set_packed_chan(b, src_factor,
+ src_alpha_factor,
+ alpha_chan);
+ }
+ if (alpha_chan != 4 &&
+ blend->alpha_dst_factor != blend->rgb_dst_factor) {
+ nir_ssa_def *dst_alpha_factor =
+ vc4_blend_channel_i(b,
+ src_color, dst_color,
+ src_a, dst_a,
+ blend->alpha_dst_factor,
+ alpha_chan);
+ dst_factor = vc4_nir_set_packed_chan(b, dst_factor,
+ dst_alpha_factor,
+ alpha_chan);
+ }
+ nir_ssa_def *src_blend = nir_umul_unorm_4x8(b, src_color, src_factor);
+ nir_ssa_def *dst_blend = nir_umul_unorm_4x8(b, dst_color, dst_factor);
+
+ nir_ssa_def *result =
+ vc4_blend_func_i(b, src_blend, dst_blend, blend->rgb_func);
+ if (alpha_chan != 4 && blend->alpha_func != blend->rgb_func) {
+ nir_ssa_def *result_a = vc4_blend_func_i(b,
+ src_blend,
+ dst_blend,
+ blend->alpha_func);
+ result = vc4_nir_set_packed_chan(b, result, result_a,
+ alpha_chan);
}
+ return result;
}
static nir_ssa_def *
@@ -299,12 +476,33 @@ vc4_nir_emit_alpha_test_discard(struct vc4_compile *c, nir_builder *b,
nir_builder_instr_insert(b, &discard->instr);
}
+static nir_ssa_def *
+vc4_nir_swizzle_and_pack(struct vc4_compile *c, nir_builder *b,
+ nir_ssa_def **colors)
+{
+ enum pipe_format color_format = c->fs_key->color_format;
+ const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
+
+ nir_ssa_def *swizzled[4];
+ for (int i = 0; i < 4; i++) {
+ swizzled[i] = vc4_nir_get_swizzled_channel(b, colors,
+ format_swiz[i]);
+ }
+
+ return nir_pack_unorm_4x8(b,
+ nir_vec4(b,
+ swizzled[0], swizzled[1],
+ swizzled[2], swizzled[3]));
+
+}
+
static void
vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
nir_intrinsic_instr *intr)
{
enum pipe_format color_format = c->fs_key->color_format;
const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
+ bool srgb = util_format_is_srgb(color_format);
/* Pull out the float src/dst color components. */
nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b);
@@ -315,45 +513,39 @@ vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
unpacked_dst_color[i] = nir_swizzle(b, dst_vec4, &i, 1, false);
}
- /* Unswizzle the destination color. */
- nir_ssa_def *dst_color[4];
- for (unsigned i = 0; i < 4; i++) {
- dst_color[i] = vc4_nir_get_swizzled_channel(b,
- unpacked_dst_color,
- format_swiz[i]);
- }
-
vc4_nir_emit_alpha_test_discard(c, b, src_color[3]);
- /* Turn dst color to linear. */
- if (util_format_is_srgb(color_format)) {
+ nir_ssa_def *packed_color;
+ if (srgb) {
+ /* Unswizzle the destination color. */
+ nir_ssa_def *dst_color[4];
+ for (unsigned i = 0; i < 4; i++) {
+ dst_color[i] = vc4_nir_get_swizzled_channel(b,
+ unpacked_dst_color,
+ format_swiz[i]);
+ }
+
+ /* Turn dst color to linear. */
for (int i = 0; i < 3; i++)
dst_color[i] = vc4_nir_srgb_decode(b, dst_color[i]);
- }
- nir_ssa_def *blend_color[4];
- vc4_do_blending(c, b, blend_color, src_color, dst_color);
+ nir_ssa_def *blend_color[4];
+ vc4_do_blending_f(c, b, blend_color, src_color, dst_color);
- /* sRGB encode the output color */
- if (util_format_is_srgb(color_format)) {
+ /* sRGB encode the output color */
for (int i = 0; i < 3; i++)
blend_color[i] = vc4_nir_srgb_encode(b, blend_color[i]);
- }
- nir_ssa_def *swizzled_outputs[4];
- for (int i = 0; i < 4; i++) {
- swizzled_outputs[i] =
- vc4_nir_get_swizzled_channel(b, blend_color,
- format_swiz[i]);
- }
+ packed_color = vc4_nir_swizzle_and_pack(c, b, blend_color);
+ } else {
+ nir_ssa_def *packed_src_color =
+ vc4_nir_swizzle_and_pack(c, b, src_color);
- nir_ssa_def *packed_color =
- nir_pack_unorm_4x8(b,
- nir_vec4(b,
- swizzled_outputs[0],
- swizzled_outputs[1],
- swizzled_outputs[2],
- swizzled_outputs[3]));
+ packed_color =
+ vc4_do_blending_i(c, b,
+ packed_src_color, packed_dst_color,
+ src_color[3]);
+ }
packed_color = vc4_logicop(b, c->fs_key->logicop_func,
packed_color, packed_dst_color);
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
index caf706aa2a6..7ea263afb68 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
@@ -406,6 +406,7 @@ vc4_nir_lower_io_instr(struct vc4_compile *c, nir_builder *b,
case nir_intrinsic_load_uniform:
case nir_intrinsic_load_uniform_indirect:
+ case nir_intrinsic_load_user_clip_plane:
vc4_nir_lower_uniform(c, b, intr);
break;
diff --git a/src/gallium/drivers/vc4/vc4_opt_algebraic.c b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
index 5b435832b92..f1bab810eff 100644
--- a/src/gallium/drivers/vc4/vc4_opt_algebraic.c
+++ b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
@@ -64,6 +64,7 @@ is_constant_value(struct vc4_compile *c, struct qreg reg,
uint32_t val)
{
if (reg.file == QFILE_UNIF &&
+ !reg.pack &&
c->uniform_contents[reg.index] == QUNIFORM_CONSTANT &&
c->uniform_data[reg.index] == val) {
return true;
diff --git a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
index fd2539aed95..0eee5c34e1d 100644
--- a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
+++ b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
@@ -41,34 +41,77 @@ qir_opt_copy_propagation(struct vc4_compile *c)
bool debug = false;
list_for_each_entry(struct qinst, inst, &c->instructions, link) {
- for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
- int index = inst->src[i].index;
- if (inst->src[i].file == QFILE_TEMP &&
- c->defs[index] &&
- c->defs[index]->op == QOP_MOV &&
- (c->defs[index]->src[0].file == QFILE_TEMP ||
- c->defs[index]->src[0].file == QFILE_UNIF)) {
- /* If it has a pack, it shouldn't be an SSA
- * def.
+ int nsrc = qir_get_op_nsrc(inst->op);
+ for (int i = 0; i < nsrc; i++) {
+ if (inst->src[i].file != QFILE_TEMP)
+ continue;
+
+ struct qinst *mov = c->defs[inst->src[i].index];
+ if (!mov ||
+ (mov->op != QOP_MOV &&
+ mov->op != QOP_FMOV &&
+ mov->op != QOP_MMOV)) {
+ continue;
+ }
+
+ if (mov->src[0].file != QFILE_TEMP &&
+ mov->src[0].file != QFILE_UNIF) {
+ continue;
+ }
+
+ if (mov->dst.pack)
+ continue;
+
+ uint8_t unpack;
+ if (mov->src[0].pack) {
+ /* Make sure that the meaning of the unpack
+ * would be the same between the two
+ * instructions.
*/
- assert(!c->defs[index]->dst.pack);
+ if (qir_is_float_input(inst) !=
+ qir_is_float_input(mov)) {
+ continue;
+ }
- if (debug) {
- fprintf(stderr, "Copy propagate: ");
- qir_dump_inst(c, inst);
- fprintf(stderr, "\n");
+ /* There's only one unpack field, so make sure
+ * this instruction doesn't already use it.
+ */
+ bool already_has_unpack = false;
+ for (int j = 0; j < nsrc; j++) {
+ if (inst->src[j].pack)
+ already_has_unpack = true;
}
+ if (already_has_unpack)
+ continue;
- inst->src[i] = c->defs[index]->src[0];
+ /* A destination pack requires the PM bit to
+ * be set to a specific value already, which
+ * may be different from ours.
+ */
+ if (inst->dst.pack)
+ continue;
- if (debug) {
- fprintf(stderr, "to: ");
- qir_dump_inst(c, inst);
- fprintf(stderr, "\n");
- }
+ unpack = mov->src[0].pack;
+ } else {
+ unpack = inst->src[i].pack;
+ }
- progress = true;
+ if (debug) {
+ fprintf(stderr, "Copy propagate: ");
+ qir_dump_inst(c, inst);
+ fprintf(stderr, "\n");
}
+
+ inst->src[i] = mov->src[0];
+ inst->src[i].pack = unpack;
+
+ if (debug) {
+ fprintf(stderr, "to: ");
+ qir_dump_inst(c, inst);
+ fprintf(stderr, "\n");
+ }
+
+ progress = true;
}
}
return progress;
diff --git a/src/gallium/drivers/vc4/vc4_opt_cse.c b/src/gallium/drivers/vc4/vc4_opt_cse.c
index 0e5480ea781..8b4d429074c 100644
--- a/src/gallium/drivers/vc4/vc4_opt_cse.c
+++ b/src/gallium/drivers/vc4/vc4_opt_cse.c
@@ -65,6 +65,7 @@ vc4_find_cse(struct vc4_compile *c, struct hash_table *ht,
struct qinst *inst, uint32_t sf_count)
{
if (inst->dst.file != QFILE_TEMP ||
+ !c->defs[inst->dst.index] ||
inst->op == QOP_MOV ||
qir_get_op_nsrc(inst->op) > 4) {
return NULL;
diff --git a/src/gallium/drivers/vc4/vc4_opt_small_immediates.c b/src/gallium/drivers/vc4/vc4_opt_small_immediates.c
index d6e98f0aebf..e61562171aa 100644
--- a/src/gallium/drivers/vc4/vc4_opt_small_immediates.c
+++ b/src/gallium/drivers/vc4/vc4_opt_small_immediates.c
@@ -56,6 +56,7 @@ qir_opt_small_immediates(struct vc4_compile *c)
struct qreg src = qir_follow_movs(c, inst->src[i]);
if (src.file != QFILE_UNIF ||
+ src.pack ||
c->uniform_contents[src.index] !=
QUNIFORM_CONSTANT) {
continue;
@@ -72,9 +73,6 @@ qir_opt_small_immediates(struct vc4_compile *c)
continue;
}
- if (qir_src_needs_a_file(inst))
- continue;
-
uint32_t imm = c->uniform_data[src.index];
uint32_t small_imm = qpu_encode_small_immediate(imm);
if (small_imm == ~0)
diff --git a/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c b/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c
index f2cdf8f694f..73ded766db9 100644
--- a/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c
+++ b/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c
@@ -58,7 +58,7 @@ qir_opt_vpm_writes(struct vc4_compile *c)
}
for (int i = 0; i < vpm_write_count; i++) {
- if (vpm_writes[i]->op != QOP_MOV ||
+ if (!qir_is_raw_mov(vpm_writes[i]) ||
vpm_writes[i]->src[0].file != QFILE_TEMP) {
continue;
}
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 6e9ec6530c6..a48dad804e2 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -738,6 +738,20 @@ ntq_emit_pack_unorm_4x8(struct vc4_compile *c, nir_alu_instr *instr)
vec4 = nir_instr_as_alu(instr->src[0].src.ssa->parent_instr);
}
+ /* If the pack is replicating the same channel 4 times, use the 8888
+ * pack flag. This is common for blending using the alpha
+ * channel.
+ */
+ if (instr->src[0].swizzle[0] == instr->src[0].swizzle[1] &&
+ instr->src[0].swizzle[0] == instr->src[0].swizzle[2] &&
+ instr->src[0].swizzle[0] == instr->src[0].swizzle[3]) {
+ struct qreg *dest = ntq_get_dest(c, &instr->dest.dest);
+ *dest = qir_PACK_8888_F(c,
+ ntq_get_src(c, instr->src[0].src,
+ instr->src[0].swizzle[0]));
+ return;
+ }
+
for (int i = 0; i < 4; i++) {
int swiz = instr->src[0].swizzle[i];
struct qreg src;
@@ -1040,41 +1054,37 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
*dest = ntq_emit_ubfe(c, src[0], src[1], src[2]);
break;
- default:
- fprintf(stderr, "unknown NIR ALU inst: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
- abort();
- }
-}
+ case nir_op_usadd_4x8:
+ *dest = qir_V8ADDS(c, src[0], src[1]);
+ break;
-static void
-clip_distance_discard(struct vc4_compile *c)
-{
- for (int i = 0; i < PIPE_MAX_CLIP_PLANES; i++) {
- if (!(c->key->ucp_enables & (1 << i)))
- continue;
+ case nir_op_ussub_4x8:
+ *dest = qir_V8SUBS(c, src[0], src[1]);
+ break;
- struct qreg dist =
- emit_fragment_varying(c,
- VARYING_SLOT_CLIP_DIST0 + (i / 4),
- i % 4);
+ case nir_op_umin_4x8:
+ *dest = qir_V8MIN(c, src[0], src[1]);
+ break;
- qir_SF(c, dist);
+ case nir_op_umax_4x8:
+ *dest = qir_V8MAX(c, src[0], src[1]);
+ break;
- if (c->discard.file == QFILE_NULL)
- c->discard = qir_uniform_ui(c, 0);
+ case nir_op_umul_unorm_4x8:
+ *dest = qir_V8MULD(c, src[0], src[1]);
+ break;
- c->discard = qir_SEL_X_Y_NS(c, qir_uniform_ui(c, ~0),
- c->discard);
+ default:
+ fprintf(stderr, "unknown NIR ALU inst: ");
+ nir_print_instr(&instr->instr, stderr);
+ fprintf(stderr, "\n");
+ abort();
}
}
static void
emit_frag_end(struct vc4_compile *c)
{
- clip_distance_discard(c);
-
struct qreg color;
if (c->output_color_index != -1) {
color = c->outputs[c->output_color_index];
@@ -1190,45 +1200,6 @@ emit_stub_vpm_read(struct vc4_compile *c)
}
static void
-emit_ucp_clipdistance(struct vc4_compile *c)
-{
- unsigned cv;
- if (c->output_clipvertex_index != -1)
- cv = c->output_clipvertex_index;
- else if (c->output_position_index != -1)
- cv = c->output_position_index;
- else
- return;
-
- for (int plane = 0; plane < PIPE_MAX_CLIP_PLANES; plane++) {
- if (!(c->key->ucp_enables & (1 << plane)))
- continue;
-
- /* Pick the next outputs[] that hasn't been written to, since
- * there are no other program writes left to be processed at
- * this point. If something had been declared but not written
- * (like a w component), we'll just smash over the top of it.
- */
- uint32_t output_index = c->num_outputs++;
- add_output(c, output_index,
- VARYING_SLOT_CLIP_DIST0 + plane / 4,
- plane % 4);
-
-
- struct qreg dist = qir_uniform_f(c, 0.0);
- for (int i = 0; i < 4; i++) {
- struct qreg pos_chan = c->outputs[cv + i];
- struct qreg ucp =
- qir_uniform(c, QUNIFORM_USER_CLIP_PLANE,
- plane * 4 + i);
- dist = qir_FADD(c, dist, qir_FMUL(c, pos_chan, ucp));
- }
-
- c->outputs[output_index] = dist;
- }
-}
-
-static void
emit_vert_end(struct vc4_compile *c,
struct vc4_varying_slot *fs_inputs,
uint32_t num_fs_inputs)
@@ -1236,7 +1207,6 @@ emit_vert_end(struct vc4_compile *c,
struct qreg rcp_w = qir_RCP(c, c->outputs[c->output_position_index + 3]);
emit_stub_vpm_read(c);
- emit_ucp_clipdistance(c);
emit_scaled_viewport_write(c, rcp_w);
emit_zs_write(c, rcp_w);
@@ -1391,9 +1361,6 @@ ntq_setup_outputs(struct vc4_compile *c)
case VARYING_SLOT_POS:
c->output_position_index = loc;
break;
- case VARYING_SLOT_CLIP_VERTEX:
- c->output_clipvertex_index = loc;
- break;
case VARYING_SLOT_PSIZ:
c->output_point_size_index = loc;
break;
@@ -1486,6 +1453,11 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
break;
+ case nir_intrinsic_load_user_clip_plane:
+ *dest = qir_uniform(c, QUNIFORM_USER_CLIP_PLANE,
+ instr->const_index[0]);
+ break;
+
case nir_intrinsic_load_input:
assert(instr->num_components == 1);
if (instr->const_index[0] == VC4_NIR_TLB_COLOR_READ_INPUT) {
@@ -1683,10 +1655,18 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
c->s = tgsi_to_nir(tokens, &nir_options);
nir_opt_global_to_local(c->s);
nir_convert_to_ssa(c->s);
+
if (stage == QSTAGE_FRAG)
vc4_nir_lower_blend(c);
+
if (c->fs_key && c->fs_key->light_twoside)
nir_lower_two_sided_color(c->s);
+
+ if (stage == QSTAGE_FRAG)
+ nir_lower_clip_fs(c->s, c->key->ucp_enables);
+ else
+ nir_lower_clip_vs(c->s, c->key->ucp_enables);
+
vc4_nir_lower_io(c);
nir_lower_idiv(c->s);
nir_lower_load_const_to_scalar(c->s);
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index e385fbb65ae..7894b081b19 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -36,10 +36,17 @@ struct qir_op_info {
static const struct qir_op_info qir_op_info[] = {
[QOP_MOV] = { "mov", 1, 1 },
+ [QOP_FMOV] = { "fmov", 1, 1 },
+ [QOP_MMOV] = { "mmov", 1, 1 },
[QOP_FADD] = { "fadd", 1, 2 },
[QOP_FSUB] = { "fsub", 1, 2 },
[QOP_FMUL] = { "fmul", 1, 2 },
[QOP_MUL24] = { "mul24", 1, 2 },
+ [QOP_V8MULD] = {"v8muld", 1, 2 },
+ [QOP_V8MIN] = {"v8min", 1, 2 },
+ [QOP_V8MAX] = {"v8max", 1, 2 },
+ [QOP_V8ADDS] = {"v8adds", 1, 2 },
+ [QOP_V8SUBS] = {"v8subs", 1, 2 },
[QOP_FMIN] = { "fmin", 1, 2 },
[QOP_FMAX] = { "fmax", 1, 2 },
[QOP_FMINABS] = { "fminabs", 1, 2 },
@@ -71,11 +78,6 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_RSQ] = { "rsq", 1, 1, false, true },
[QOP_EXP2] = { "exp2", 1, 2, false, true },
[QOP_LOG2] = { "log2", 1, 2, false, true },
- [QOP_PACK_8888_F] = { "pack_8888_f", 1, 1 },
- [QOP_PACK_8A_F] = { "pack_8a_f", 1, 1 },
- [QOP_PACK_8B_F] = { "pack_8b_f", 1, 1 },
- [QOP_PACK_8C_F] = { "pack_8c_f", 1, 1 },
- [QOP_PACK_8D_F] = { "pack_8d_f", 1, 1 },
[QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true },
[QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true },
[QOP_TLB_Z_WRITE] = { "tlb_z", 0, 1, true },
@@ -95,18 +97,6 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_TEX_B] = { "tex_b", 0, 2 },
[QOP_TEX_DIRECT] = { "tex_direct", 0, 2 },
[QOP_TEX_RESULT] = { "tex_result", 1, 0, true },
- [QOP_UNPACK_8A_F] = { "unpack_8a_f", 1, 1 },
- [QOP_UNPACK_8B_F] = { "unpack_8b_f", 1, 1 },
- [QOP_UNPACK_8C_F] = { "unpack_8c_f", 1, 1 },
- [QOP_UNPACK_8D_F] = { "unpack_8d_f", 1, 1 },
- [QOP_UNPACK_16A_F] = { "unpack_16a_f", 1, 1 },
- [QOP_UNPACK_16B_F] = { "unpack_16b_f", 1, 1 },
- [QOP_UNPACK_8A_I] = { "unpack_8a_i", 1, 1 },
- [QOP_UNPACK_8B_I] = { "unpack_8b_i", 1, 1 },
- [QOP_UNPACK_8C_I] = { "unpack_8c_i", 1, 1 },
- [QOP_UNPACK_8D_I] = { "unpack_8d_i", 1, 1 },
- [QOP_UNPACK_16A_I] = { "unpack_16a_i", 1, 1 },
- [QOP_UNPACK_16B_I] = { "unpack_16b_i", 1, 1 },
};
static const char *
@@ -171,8 +161,14 @@ bool
qir_is_mul(struct qinst *inst)
{
switch (inst->op) {
+ case QOP_MMOV:
case QOP_FMUL:
case QOP_MUL24:
+ case QOP_V8MULD:
+ case QOP_V8MIN:
+ case QOP_V8MAX:
+ case QOP_V8ADDS:
+ case QOP_V8SUBS:
return true;
default:
return false;
@@ -180,6 +176,35 @@ qir_is_mul(struct qinst *inst)
}
bool
+qir_is_float_input(struct qinst *inst)
+{
+ switch (inst->op) {
+ case QOP_FMOV:
+ case QOP_FMUL:
+ case QOP_FADD:
+ case QOP_FSUB:
+ case QOP_FMIN:
+ case QOP_FMAX:
+ case QOP_FMINABS:
+ case QOP_FMAXABS:
+ case QOP_FTOI:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool
+qir_is_raw_mov(struct qinst *inst)
+{
+ return ((inst->op == QOP_MOV ||
+ inst->op == QOP_FMOV ||
+ inst->op == QOP_MMOV) &&
+ !inst->dst.pack &&
+ !inst->src[0].pack);
+}
+
+bool
qir_is_tex(struct qinst *inst)
{
return inst->op >= QOP_TEX_S && inst->op <= QOP_TEX_DIRECT;
@@ -204,28 +229,6 @@ qir_depends_on_flags(struct qinst *inst)
}
bool
-qir_src_needs_a_file(struct qinst *inst)
-{
- switch (inst->op) {
- case QOP_UNPACK_8A_F:
- case QOP_UNPACK_8B_F:
- case QOP_UNPACK_8C_F:
- case QOP_UNPACK_8D_F:
- case QOP_UNPACK_16A_F:
- case QOP_UNPACK_16B_F:
- case QOP_UNPACK_8A_I:
- case QOP_UNPACK_8B_I:
- case QOP_UNPACK_8C_I:
- case QOP_UNPACK_8D_I:
- case QOP_UNPACK_16A_I:
- case QOP_UNPACK_16B_I:
- return true;
- default:
- return false;
- }
-}
-
-bool
qir_writes_r4(struct qinst *inst)
{
switch (inst->op) {
@@ -295,6 +298,7 @@ qir_dump_inst(struct vc4_compile *c, struct qinst *inst)
for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
fprintf(stderr, ", ");
qir_print_reg(c, inst->src[i], false);
+ vc4_qpu_disasm_unpack(stderr, inst->src[i].pack);
}
}
@@ -385,7 +389,6 @@ qir_compile_init(void)
list_inithead(&c->instructions);
c->output_position_index = -1;
- c->output_clipvertex_index = -1;
c->output_color_index = -1;
c->output_point_size_index = -1;
@@ -411,7 +414,8 @@ qir_follow_movs(struct vc4_compile *c, struct qreg reg)
{
while (reg.file == QFILE_TEMP &&
c->defs[reg.index] &&
- c->defs[reg.index]->op == QOP_MOV) {
+ c->defs[reg.index]->op == QOP_MOV &&
+ !c->defs[reg.index]->dst.pack) {
reg = c->defs[reg.index]->src[0];
}
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index ddde96db6b4..a92ad93ee07 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -37,6 +37,7 @@
#include "util/u_math.h"
#include "vc4_screen.h"
+#include "vc4_qpu_defines.h"
#include "pipe/p_state.h"
struct nir_builder;
@@ -64,9 +65,16 @@ struct qreg {
enum qop {
QOP_UNDEF,
QOP_MOV,
+ QOP_FMOV,
+ QOP_MMOV,
QOP_FADD,
QOP_FSUB,
QOP_FMUL,
+ QOP_V8MULD,
+ QOP_V8MIN,
+ QOP_V8MAX,
+ QOP_V8ADDS,
+ QOP_V8SUBS,
QOP_MUL24,
QOP_FMIN,
QOP_FMAX,
@@ -105,11 +113,6 @@ enum qop {
QOP_LOG2,
QOP_VW_SETUP,
QOP_VR_SETUP,
- QOP_PACK_8888_F,
- QOP_PACK_8A_F,
- QOP_PACK_8B_F,
- QOP_PACK_8C_F,
- QOP_PACK_8D_F,
QOP_TLB_DISCARD_SETUP,
QOP_TLB_STENCIL_SETUP,
QOP_TLB_Z_WRITE,
@@ -123,20 +126,6 @@ enum qop {
QOP_FRAG_W,
QOP_FRAG_REV_FLAG,
- QOP_UNPACK_8A_F,
- QOP_UNPACK_8B_F,
- QOP_UNPACK_8C_F,
- QOP_UNPACK_8D_F,
- QOP_UNPACK_16A_F,
- QOP_UNPACK_16B_F,
-
- QOP_UNPACK_8A_I,
- QOP_UNPACK_8B_I,
- QOP_UNPACK_8C_I,
- QOP_UNPACK_8D_I,
- QOP_UNPACK_16A_I,
- QOP_UNPACK_16B_I,
-
/** Texture x coordinate parameter write */
QOP_TEX_S,
/** Texture y coordinate parameter write */
@@ -248,6 +237,8 @@ enum quniform_contents {
QUNIFORM_BLEND_CONST_COLOR_Y,
QUNIFORM_BLEND_CONST_COLOR_Z,
QUNIFORM_BLEND_CONST_COLOR_W,
+ QUNIFORM_BLEND_CONST_COLOR_RGBA,
+ QUNIFORM_BLEND_CONST_COLOR_AAAA,
QUNIFORM_STENCIL,
@@ -399,7 +390,6 @@ struct vc4_compile {
uint32_t num_outputs;
uint32_t num_texture_samples;
uint32_t output_position_index;
- uint32_t output_clipvertex_index;
uint32_t output_color_index;
uint32_t output_point_size_index;
@@ -457,10 +447,11 @@ bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst);
bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst);
bool qir_is_multi_instruction(struct qinst *inst);
bool qir_is_mul(struct qinst *inst);
+bool qir_is_raw_mov(struct qinst *inst);
bool qir_is_tex(struct qinst *inst);
+bool qir_is_float_input(struct qinst *inst);
bool qir_depends_on_flags(struct qinst *inst);
bool qir_writes_r4(struct qinst *inst);
-bool qir_src_needs_a_file(struct qinst *inst);
struct qreg qir_follow_movs(struct vc4_compile *c, struct qreg reg);
void qir_dump(struct vc4_compile *c);
@@ -561,9 +552,16 @@ qir_##name(struct vc4_compile *c, struct qreg dest, struct qreg a) \
}
QIR_ALU1(MOV)
+QIR_ALU1(FMOV)
+QIR_ALU1(MMOV)
QIR_ALU2(FADD)
QIR_ALU2(FSUB)
QIR_ALU2(FMUL)
+QIR_ALU2(V8MULD)
+QIR_ALU2(V8MIN)
+QIR_ALU2(V8MAX)
+QIR_ALU2(V8ADDS)
+QIR_ALU2(V8SUBS)
QIR_ALU2(MUL24)
QIR_ALU1(SEL_X_0_ZS)
QIR_ALU1(SEL_X_0_ZC)
@@ -596,11 +594,6 @@ QIR_ALU1(RCP)
QIR_ALU1(RSQ)
QIR_ALU1(EXP2)
QIR_ALU1(LOG2)
-QIR_ALU1(PACK_8888_F)
-QIR_PACK(PACK_8A_F)
-QIR_PACK(PACK_8B_F)
-QIR_PACK(PACK_8C_F)
-QIR_PACK(PACK_8D_F)
QIR_ALU1(VARY_ADD_C)
QIR_NODST_2(TEX_S)
QIR_NODST_2(TEX_T)
@@ -622,41 +615,50 @@ QIR_NODST_1(TLB_STENCIL_SETUP)
static inline struct qreg
qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i)
{
- struct qreg t = qir_get_temp(c);
- qir_emit(c, qir_inst(QOP_UNPACK_8A_F + i, t, src, c->undef));
+ struct qreg t = qir_FMOV(c, src);
+ c->defs[t.index]->src[0].pack = QPU_UNPACK_8A + i;
return t;
}
static inline struct qreg
qir_UNPACK_8_I(struct vc4_compile *c, struct qreg src, int i)
{
- struct qreg t = qir_get_temp(c);
- qir_emit(c, qir_inst(QOP_UNPACK_8A_I + i, t, src, c->undef));
+ struct qreg t = qir_MOV(c, src);
+ c->defs[t.index]->src[0].pack = QPU_UNPACK_8A + i;
return t;
}
static inline struct qreg
qir_UNPACK_16_F(struct vc4_compile *c, struct qreg src, int i)
{
- struct qreg t = qir_get_temp(c);
- qir_emit(c, qir_inst(QOP_UNPACK_16A_F + i, t, src, c->undef));
+ struct qreg t = qir_FMOV(c, src);
+ c->defs[t.index]->src[0].pack = QPU_UNPACK_16A + i;
return t;
}
static inline struct qreg
qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i)
{
- struct qreg t = qir_get_temp(c);
- qir_emit(c, qir_inst(QOP_UNPACK_16A_I + i, t, src, c->undef));
+ struct qreg t = qir_MOV(c, src);
+ c->defs[t.index]->src[0].pack = QPU_UNPACK_16A + i;
return t;
}
-static inline struct qreg
+static inline void
qir_PACK_8_F(struct vc4_compile *c, struct qreg dest, struct qreg val, int chan)
{
- qir_emit(c, qir_inst(QOP_PACK_8A_F + chan, dest, val, c->undef));
+ assert(!dest.pack);
+ dest.pack = QPU_PACK_MUL_8A + chan;
+ qir_emit(c, qir_inst(QOP_MMOV, dest, val, c->undef));
if (dest.file == QFILE_TEMP)
c->defs[dest.index] = NULL;
+}
+
+static inline struct qreg
+qir_PACK_8888_F(struct vc4_compile *c, struct qreg val)
+{
+ struct qreg dest = qir_MMOV(c, val);
+ c->defs[dest.index]->dst.pack = QPU_PACK_MUL_8888;
return dest;
}
diff --git a/src/gallium/drivers/vc4/vc4_qpu.h b/src/gallium/drivers/vc4/vc4_qpu.h
index 0719d2828b5..866ca5c1300 100644
--- a/src/gallium/drivers/vc4/vc4_qpu.h
+++ b/src/gallium/drivers/vc4/vc4_qpu.h
@@ -213,6 +213,9 @@ void
vc4_qpu_disasm_pack_a(FILE *out, uint32_t pack);
void
+vc4_qpu_disasm_unpack(FILE *out, uint32_t pack);
+
+void
vc4_qpu_validate(uint64_t *insts, uint32_t num_inst);
#endif /* VC4_QPU_H */
diff --git a/src/gallium/drivers/vc4/vc4_qpu_defines.h b/src/gallium/drivers/vc4/vc4_qpu_defines.h
index eb3dfb33827..626dc3be6be 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_defines.h
+++ b/src/gallium/drivers/vc4/vc4_qpu_defines.h
@@ -200,8 +200,8 @@ enum qpu_pack_a {
enum qpu_unpack {
QPU_UNPACK_NOP,
- QPU_UNPACK_16A_TO_F32,
- QPU_UNPACK_16B_TO_F32,
+ QPU_UNPACK_16A,
+ QPU_UNPACK_16B,
QPU_UNPACK_8D_REP,
QPU_UNPACK_8A,
QPU_UNPACK_8B,
diff --git a/src/gallium/drivers/vc4/vc4_qpu_disasm.c b/src/gallium/drivers/vc4/vc4_qpu_disasm.c
index 0879787ec03..c46fd1a0e3f 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_disasm.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_disasm.c
@@ -98,8 +98,8 @@ static const char *qpu_pack_mul[] = {
*/
static const char *qpu_unpack[] = {
[QPU_UNPACK_NOP] = "",
- [QPU_UNPACK_16A_TO_F32] = "16a",
- [QPU_UNPACK_16B_TO_F32] = "16b",
+ [QPU_UNPACK_16A] = "16a",
+ [QPU_UNPACK_16B] = "16b",
[QPU_UNPACK_8D_REP] = "8d_rep",
[QPU_UNPACK_8A] = "8a",
[QPU_UNPACK_8B] = "8b",
@@ -257,6 +257,13 @@ vc4_qpu_disasm_pack_a(FILE *out, uint32_t pack)
fprintf(out, "%s", DESC(qpu_pack_a, pack));
}
+void
+vc4_qpu_disasm_unpack(FILE *out, uint32_t unpack)
+{
+ if (unpack != QPU_UNPACK_NOP)
+ fprintf(out, ".%s", DESC(qpu_unpack, unpack));
+}
+
static void
print_alu_dst(uint64_t inst, bool is_mul)
{
@@ -315,10 +322,9 @@ print_alu_src(uint64_t inst, uint32_t mux)
fprintf(stderr, "%s", DESC(special_read_b, raddr - 32));
}
- if (unpack != QPU_UNPACK_NOP &&
- ((mux == QPU_MUX_A && !(inst & QPU_PM)) ||
+ if (((mux == QPU_MUX_A && !(inst & QPU_PM)) ||
(mux == QPU_MUX_R4 && (inst & QPU_PM)))) {
- fprintf(stderr, ".%s", DESC(qpu_unpack, unpack));
+ vc4_qpu_disasm_unpack(stderr, unpack);
}
}
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index adf3a8b3658..133e1385178 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -101,7 +101,8 @@ swap_file(struct qpu_reg *src)
static void
fixup_raddr_conflict(struct vc4_compile *c,
struct qpu_reg dst,
- struct qpu_reg *src0, struct qpu_reg *src1)
+ struct qpu_reg *src0, struct qpu_reg *src1,
+ struct qinst *inst, uint64_t *unpack)
{
uint32_t mux0 = src0->mux == QPU_MUX_SMALL_IMM ? QPU_MUX_B : src0->mux;
uint32_t mux1 = src1->mux == QPU_MUX_SMALL_IMM ? QPU_MUX_B : src1->mux;
@@ -117,7 +118,21 @@ fixup_raddr_conflict(struct vc4_compile *c,
return;
if (mux0 == QPU_MUX_A) {
- queue(c, qpu_a_MOV(qpu_rb(31), *src0));
+ /* Make sure we use the same type of MOV as the instruction,
+ * in case of unpacks.
+ */
+ if (qir_is_float_input(inst))
+ queue(c, qpu_a_FMAX(qpu_rb(31), *src0, *src0));
+ else
+ queue(c, qpu_a_MOV(qpu_rb(31), *src0));
+
+ /* If we had an unpack on this A-file source, we need to put
+ * it into this MOV, not into the later move from regfile B.
+ */
+ if (inst->src[0].pack) {
+ *last_inst(c) |= *unpack;
+ *unpack = 0;
+ }
*src0 = qpu_rb(31);
} else {
queue(c, qpu_a_MOV(qpu_ra(31), *src0));
@@ -125,6 +140,27 @@ fixup_raddr_conflict(struct vc4_compile *c,
}
}
+static void
+set_last_dst_pack(struct vc4_compile *c, struct qinst *inst)
+{
+ bool had_pm = *last_inst(c) & QPU_PM;
+ bool had_ws = *last_inst(c) & QPU_WS;
+ uint32_t unpack = QPU_GET_FIELD(*last_inst(c), QPU_UNPACK);
+
+ if (!inst->dst.pack)
+ return;
+
+ *last_inst(c) |= QPU_SET_FIELD(inst->dst.pack, QPU_PACK);
+
+ if (qir_is_mul(inst)) {
+ assert(!unpack || had_pm);
+ *last_inst(c) |= QPU_PM;
+ } else {
+ assert(!unpack || !had_pm);
+ assert(!had_ws); /* dst must be a-file to pack. */
+ }
+}
+
void
vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
{
@@ -134,15 +170,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
uint32_t vpm_read_fifo_count = 0;
uint32_t vpm_read_offset = 0;
int last_vpm_read_index = -1;
- /* Map from the QIR ops enum order to QPU unpack bits. */
- static const uint32_t unpack_map[] = {
- QPU_UNPACK_8A,
- QPU_UNPACK_8B,
- QPU_UNPACK_8C,
- QPU_UNPACK_8D,
- QPU_UNPACK_16A_TO_F32,
- QPU_UNPACK_16B_TO_F32,
- };
list_inithead(&c->qpu_inst_list);
@@ -203,9 +230,22 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
A(NOT),
M(FMUL),
+ M(V8MULD),
+ M(V8MIN),
+ M(V8MAX),
+ M(V8ADDS),
+ M(V8SUBS),
M(MUL24),
+
+ /* If we replicate src[0] out to src[1], this works
+ * out the same as a MOV.
+ */
+ [QOP_MOV] = { QPU_A_OR },
+ [QOP_FMOV] = { QPU_A_FMAX },
+ [QOP_MMOV] = { QPU_M_V8MIN },
};
+ uint64_t unpack = 0;
struct qpu_reg src[4];
for (int i = 0; i < qir_get_op_nsrc(qinst->op); i++) {
int index = qinst->src[i].index;
@@ -215,6 +255,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
break;
case QFILE_TEMP:
src[i] = temp_registers[index];
+ if (qinst->src[i].pack) {
+ assert(!unpack ||
+ unpack == qinst->src[i].pack);
+ unpack = QPU_SET_FIELD(qinst->src[i].pack,
+ QPU_UNPACK);
+ if (src[i].mux == QPU_MUX_R4)
+ unpack |= QPU_PM;
+ }
break;
case QFILE_UNIF:
src[i] = qpu_unif();
@@ -259,19 +307,11 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
}
switch (qinst->op) {
- case QOP_MOV:
- /* Skip emitting the MOV if it's a no-op. */
- if (dst.mux == QPU_MUX_A || dst.mux == QPU_MUX_B ||
- dst.mux != src[0].mux || dst.addr != src[0].addr) {
- queue(c, qpu_a_MOV(dst, src[0]));
- }
- break;
-
case QOP_SEL_X_0_ZS:
case QOP_SEL_X_0_ZC:
case QOP_SEL_X_0_NS:
case QOP_SEL_X_0_NC:
- queue(c, qpu_a_MOV(dst, src[0]));
+ queue(c, qpu_a_MOV(dst, src[0]) | unpack);
set_last_cond_add(c, qinst->op - QOP_SEL_X_0_ZS +
QPU_COND_ZS);
@@ -285,10 +325,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
case QOP_SEL_X_Y_NS:
case QOP_SEL_X_Y_NC:
queue(c, qpu_a_MOV(dst, src[0]));
+ if (qinst->src[0].pack)
+ *(last_inst(c)) |= unpack;
set_last_cond_add(c, qinst->op - QOP_SEL_X_Y_ZS +
QPU_COND_ZS);
queue(c, qpu_a_MOV(dst, src[1]));
+ if (qinst->src[1].pack)
+ *(last_inst(c)) |= unpack;
set_last_cond_add(c, ((qinst->op - QOP_SEL_X_Y_ZS) ^
1) + QPU_COND_ZS);
@@ -301,19 +345,19 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
switch (qinst->op) {
case QOP_RCP:
queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_RECIP),
- src[0]));
+ src[0]) | unpack);
break;
case QOP_RSQ:
queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_RECIPSQRT),
- src[0]));
+ src[0]) | unpack);
break;
case QOP_EXP2:
queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_EXP),
- src[0]));
+ src[0]) | unpack);
break;
case QOP_LOG2:
queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_LOG),
- src[0]));
+ src[0]) | unpack);
break;
default:
abort();
@@ -324,25 +368,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
break;
- case QOP_PACK_8888_F:
- queue(c, qpu_m_MOV(dst, src[0]));
- *last_inst(c) |= QPU_PM;
- *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8888,
- QPU_PACK);
- break;
-
- case QOP_PACK_8A_F:
- case QOP_PACK_8B_F:
- case QOP_PACK_8C_F:
- case QOP_PACK_8D_F:
- queue(c,
- qpu_m_MOV(dst, src[0]) |
- QPU_PM |
- QPU_SET_FIELD(QPU_PACK_MUL_8A +
- qinst->op - QOP_PACK_8A_F,
- QPU_PACK));
- break;
-
case QOP_FRAG_X:
queue(c, qpu_a_ITOF(dst,
qpu_ra(QPU_R_XY_PIXEL_COORD)));
@@ -367,16 +392,19 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
case QOP_TLB_DISCARD_SETUP:
discard = true;
- queue(c, qpu_a_MOV(src[0], src[0]));
+ queue(c, qpu_a_MOV(src[0], src[0]) | unpack);
*last_inst(c) |= QPU_SF;
break;
case QOP_TLB_STENCIL_SETUP:
- queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_STENCIL_SETUP), src[0]));
+ assert(!unpack);
+ queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_STENCIL_SETUP),
+ src[0]) | unpack);
break;
case QOP_TLB_Z_WRITE:
- queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_Z), src[0]));
+ queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_Z),
+ src[0]) | unpack);
if (discard) {
set_last_cond_add(c, QPU_COND_ZS);
}
@@ -392,14 +420,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
break;
case QOP_TLB_COLOR_WRITE:
- queue(c, qpu_a_MOV(qpu_tlbc(), src[0]));
+ queue(c, qpu_a_MOV(qpu_tlbc(), src[0]) | unpack);
if (discard) {
set_last_cond_add(c, QPU_COND_ZS);
}
break;
case QOP_VARY_ADD_C:
- queue(c, qpu_a_FADD(dst, src[0], qpu_r5()));
+ queue(c, qpu_a_FADD(dst, src[0], qpu_r5()) | unpack);
break;
case QOP_TEX_S:
@@ -408,12 +436,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
case QOP_TEX_B:
queue(c, qpu_a_MOV(qpu_rb(QPU_W_TMU0_S +
(qinst->op - QOP_TEX_S)),
- src[0]));
+ src[0]) | unpack);
break;
case QOP_TEX_DIRECT:
- fixup_raddr_conflict(c, dst, &src[0], &src[1]);
- queue(c, qpu_a_ADD(qpu_rb(QPU_W_TMU0_S), src[0], src[1]));
+ fixup_raddr_conflict(c, dst, &src[0], &src[1],
+ qinst, &unpack);
+ queue(c, qpu_a_ADD(qpu_rb(QPU_W_TMU0_S),
+ src[0], src[1]) | unpack);
break;
case QOP_TEX_RESULT:
@@ -424,67 +454,16 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
queue(c, qpu_a_MOV(dst, qpu_r4()));
break;
- case QOP_UNPACK_8A_F:
- case QOP_UNPACK_8B_F:
- case QOP_UNPACK_8C_F:
- case QOP_UNPACK_8D_F:
- case QOP_UNPACK_16A_F:
- case QOP_UNPACK_16B_F: {
- if (src[0].mux == QPU_MUX_R4) {
- queue(c, qpu_a_MOV(dst, src[0]));
- *last_inst(c) |= QPU_PM;
- *last_inst(c) |= QPU_SET_FIELD(QPU_UNPACK_8A +
- (qinst->op -
- QOP_UNPACK_8A_F),
- QPU_UNPACK);
- } else {
- assert(src[0].mux == QPU_MUX_A);
-
- /* Since we're setting the pack bits, if the
- * destination is in A it would get re-packed.
- */
- queue(c, qpu_a_FMAX((dst.mux == QPU_MUX_A ?
- qpu_rb(31) : dst),
- src[0], src[0]));
- *last_inst(c) |=
- QPU_SET_FIELD(unpack_map[qinst->op -
- QOP_UNPACK_8A_F],
- QPU_UNPACK);
-
- if (dst.mux == QPU_MUX_A) {
- queue(c, qpu_a_MOV(dst, qpu_rb(31)));
- }
- }
- }
- break;
-
- case QOP_UNPACK_8A_I:
- case QOP_UNPACK_8B_I:
- case QOP_UNPACK_8C_I:
- case QOP_UNPACK_8D_I:
- case QOP_UNPACK_16A_I:
- case QOP_UNPACK_16B_I: {
- assert(src[0].mux == QPU_MUX_A);
-
- /* Since we're setting the pack bits, if the
- * destination is in A it would get re-packed.
- */
- queue(c, qpu_a_MOV((dst.mux == QPU_MUX_A ?
- qpu_rb(31) : dst), src[0]));
- *last_inst(c) |= QPU_SET_FIELD(unpack_map[qinst->op -
- QOP_UNPACK_8A_I],
- QPU_UNPACK);
-
- if (dst.mux == QPU_MUX_A) {
- queue(c, qpu_a_MOV(dst, qpu_rb(31)));
- }
- }
- break;
-
default:
assert(qinst->op < ARRAY_SIZE(translate));
assert(translate[qinst->op].op != 0); /* NOPs */
+ /* Skip emitting the MOV if it's a no-op. */
+ if (qir_is_raw_mov(qinst) &&
+ dst.mux == src[0].mux && dst.addr == src[0].addr) {
+ break;
+ }
+
/* If we have only one source, put it in the second
* argument slot as well so that we don't take up
* another raddr just to get unused data.
@@ -492,27 +471,19 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
if (qir_get_op_nsrc(qinst->op) == 1)
src[1] = src[0];
- fixup_raddr_conflict(c, dst, &src[0], &src[1]);
+ fixup_raddr_conflict(c, dst, &src[0], &src[1],
+ qinst, &unpack);
if (qir_is_mul(qinst)) {
queue(c, qpu_m_alu2(translate[qinst->op].op,
dst,
- src[0], src[1]));
- if (qinst->dst.pack) {
- *last_inst(c) |= QPU_PM;
- *last_inst(c) |= QPU_SET_FIELD(qinst->dst.pack,
- QPU_PACK);
- }
+ src[0], src[1]) | unpack);
} else {
queue(c, qpu_a_alu2(translate[qinst->op].op,
dst,
- src[0], src[1]));
- if (qinst->dst.pack) {
- assert(dst.mux == QPU_MUX_A);
- *last_inst(c) |= QPU_SET_FIELD(qinst->dst.pack,
- QPU_PACK);
- }
+ src[0], src[1]) | unpack);
}
+ set_last_dst_pack(c, qinst);
break;
}
diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c
index 3ced50f3a44..bca36c3e7f4 100644
--- a/src/gallium/drivers/vc4/vc4_register_allocate.c
+++ b/src/gallium/drivers/vc4/vc4_register_allocate.c
@@ -282,23 +282,23 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
class_bits[inst->dst.index] &= CLASS_BIT_A;
}
- if (qir_src_needs_a_file(inst)) {
- switch (inst->op) {
- case QOP_UNPACK_8A_F:
- case QOP_UNPACK_8B_F:
- case QOP_UNPACK_8C_F:
- case QOP_UNPACK_8D_F:
- /* Special case: these can be done as R4
- * unpacks, as well.
- */
- class_bits[inst->src[0].index] &= (CLASS_BIT_A |
- CLASS_BIT_R4);
- break;
- default:
- class_bits[inst->src[0].index] &= CLASS_BIT_A;
- break;
+ /* Apply restrictions for src unpacks. The integer unpacks
+ * can only be done from regfile A, while float unpacks can be
+ * either A or R4.
+ */
+ for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
+ if (inst->src[i].file == QFILE_TEMP &&
+ inst->src[i].pack) {
+ if (qir_is_float_input(inst)) {
+ class_bits[inst->src[i].index] &=
+ CLASS_BIT_A | CLASS_BIT_R4;
+ } else {
+ class_bits[inst->src[i].index] &=
+ CLASS_BIT_A;
+ }
}
}
+
ip++;
}
diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c
index 5d5166fd818..122bda0bac6 100644
--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
@@ -667,11 +667,16 @@ vc4_get_shadow_index_buffer(struct pipe_context *pctx,
shadow_offset, &shadow_rsc, &data);
uint16_t *dst = data;
- struct pipe_transfer *src_transfer;
- uint32_t *src = pipe_buffer_map_range(pctx, &orig->base.b,
- ib->offset,
- count * 4,
- PIPE_TRANSFER_READ, &src_transfer);
+ struct pipe_transfer *src_transfer = NULL;
+ uint32_t *src;
+ if (ib->user_buffer) {
+ src = ib->user_buffer;
+ } else {
+ src = pipe_buffer_map_range(pctx, &orig->base.b,
+ ib->offset,
+ count * 4,
+ PIPE_TRANSFER_READ, &src_transfer);
+ }
for (int i = 0; i < count; i++) {
uint32_t src_index = src[i];
@@ -679,7 +684,8 @@ vc4_get_shadow_index_buffer(struct pipe_context *pctx,
dst[i] = src_index;
}
- pctx->transfer_unmap(pctx, src_transfer);
+ if (src_transfer)
+ pctx->transfer_unmap(pctx, src_transfer);
return shadow_rsc;
}
diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c
index 774ec095652..bb867611804 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -94,6 +94,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_SHADOW_MAP:
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
case PIPE_CAP_TWO_SIDED_STENCIL:
+ case PIPE_CAP_USER_INDEX_BUFFERS:
return 1;
/* lying for GL 2.0 */
@@ -152,7 +153,6 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
case PIPE_CAP_VERTEX_COLOR_CLAMPED:
case PIPE_CAP_USER_VERTEX_BUFFERS:
- case PIPE_CAP_USER_INDEX_BUFFERS:
case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
@@ -183,6 +183,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 0;
/* Stream output. */
diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c
index 76980ca32af..10dabd09f5e 100644
--- a/src/gallium/drivers/vc4/vc4_simulator.c
+++ b/src/gallium/drivers/vc4/vc4_simulator.c
@@ -32,6 +32,11 @@
#include "vc4_simulator_validate.h"
#include "simpenrose/simpenrose.h"
+/* A marker placed just after each BO, then checked after rendering to make
+ * sure it's still there.
+ */
+#define BO_SENTINEL 0xfedcba98
+
#define OVERFLOW_SIZE (32 * 1024 * 1024)
static struct drm_gem_cma_object *
@@ -49,10 +54,12 @@ vc4_wrap_bo_with_cma(struct drm_device *dev, struct vc4_bo *bo)
obj->vaddr = screen->simulator_mem_base + dev->simulator_mem_next;
obj->paddr = simpenrose_hw_addr(obj->vaddr);
- dev->simulator_mem_next += size;
+ dev->simulator_mem_next += size + sizeof(uint32_t);
dev->simulator_mem_next = align(dev->simulator_mem_next, 4096);
assert(dev->simulator_mem_next <= screen->simulator_mem_size);
+ *(uint32_t *)(obj->vaddr + bo->size) = BO_SENTINEL;
+
return obj;
}
@@ -109,6 +116,7 @@ vc4_simulator_unpin_bos(struct vc4_exec_info *exec)
struct drm_vc4_bo *drm_bo = to_vc4_bo(&obj->base);
struct vc4_bo *bo = drm_bo->bo;
+ assert(*(uint32_t *)(obj->vaddr + bo->size) == BO_SENTINEL);
memcpy(bo->map, obj->vaddr, bo->size);
if (drm_bo->validated_shader) {
@@ -197,6 +205,8 @@ vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args)
list_for_each_entry_safe(struct drm_vc4_bo, bo, &exec.unref_list,
unref_head) {
list_del(&bo->unref_head);
+ assert(*(uint32_t *)(bo->base.vaddr + bo->bo->size) ==
+ BO_SENTINEL);
vc4_bo_unreference(&bo->bo);
free(bo);
}
diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c
index 8a759c2ca4c..78aa344ab1d 100644
--- a/src/gallium/drivers/vc4/vc4_state.c
+++ b/src/gallium/drivers/vc4/vc4_state.c
@@ -51,7 +51,9 @@ vc4_set_blend_color(struct pipe_context *pctx,
const struct pipe_blend_color *blend_color)
{
struct vc4_context *vc4 = vc4_context(pctx);
- vc4->blend_color = *blend_color;
+ vc4->blend_color.f = *blend_color;
+ for (int i = 0; i < 4; i++)
+ vc4->blend_color.ub[i] = float_to_ubyte(blend_color->color[i]);
vc4->dirty |= VC4_DIRTY_BLEND_COLOR;
}
@@ -303,10 +305,10 @@ vc4_set_index_buffer(struct pipe_context *pctx,
struct vc4_context *vc4 = vc4_context(pctx);
if (ib) {
- assert(!ib->user_buffer);
pipe_resource_reference(&vc4->indexbuf.buffer, ib->buffer);
vc4->indexbuf.index_size = ib->index_size;
vc4->indexbuf.offset = ib->offset;
+ vc4->indexbuf.user_buffer = ib->user_buffer;
} else {
pipe_resource_reference(&vc4->indexbuf.buffer, NULL);
}
diff --git a/src/gallium/drivers/vc4/vc4_uniforms.c b/src/gallium/drivers/vc4/vc4_uniforms.c
index 85d6998205e..f5ad481f186 100644
--- a/src/gallium/drivers/vc4/vc4_uniforms.c
+++ b/src/gallium/drivers/vc4/vc4_uniforms.c
@@ -262,11 +262,35 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
case QUNIFORM_BLEND_CONST_COLOR_Z:
case QUNIFORM_BLEND_CONST_COLOR_W:
cl_aligned_f(&uniforms,
- CLAMP(vc4->blend_color.color[uinfo->contents[i] -
- QUNIFORM_BLEND_CONST_COLOR_X],
+ CLAMP(vc4->blend_color.f.color[uinfo->contents[i] -
+ QUNIFORM_BLEND_CONST_COLOR_X],
0, 1));
break;
+ case QUNIFORM_BLEND_CONST_COLOR_RGBA: {
+ const uint8_t *format_swiz =
+ vc4_get_format_swizzle(vc4->framebuffer.cbufs[0]->format);
+ uint32_t color = 0;
+ for (int i = 0; i < 4; i++) {
+ if (format_swiz[i] >= 4)
+ continue;
+
+ color |= (vc4->blend_color.ub[format_swiz[i]] <<
+ (i * 8));
+ }
+ cl_aligned_u32(&uniforms, color);
+ break;
+ }
+
+ case QUNIFORM_BLEND_CONST_COLOR_AAAA: {
+ uint8_t a = vc4->blend_color.ub[3];
+ cl_aligned_u32(&uniforms, ((a) |
+ (a << 8) |
+ (a << 16) |
+ (a << 24)));
+ break;
+ }
+
case QUNIFORM_STENCIL:
cl_aligned_u32(&uniforms,
vc4->zsa->stencil_uniforms[uinfo->data[i]] |
@@ -330,6 +354,8 @@ vc4_set_shader_uniform_dirty_flags(struct vc4_compiled_shader *shader)
case QUNIFORM_BLEND_CONST_COLOR_Y:
case QUNIFORM_BLEND_CONST_COLOR_Z:
case QUNIFORM_BLEND_CONST_COLOR_W:
+ case QUNIFORM_BLEND_CONST_COLOR_RGBA:
+ case QUNIFORM_BLEND_CONST_COLOR_AAAA:
dirty |= VC4_DIRTY_BLEND_COLOR;
break;
diff --git a/src/gallium/drivers/virgl/Automake.inc b/src/gallium/drivers/virgl/Automake.inc
new file mode 100644
index 00000000000..b05d3e314c8
--- /dev/null
+++ b/src/gallium/drivers/virgl/Automake.inc
@@ -0,0 +1,11 @@
+if HAVE_GALLIUM_VIRGL
+
+TARGET_DRIVERS += virtio_gpu
+TARGET_CPPFLAGS += -DGALLIUM_VIRGL
+TARGET_LIB_DEPS += \
+ $(top_builddir)/src/gallium/drivers/virgl/libvirgl.la \
+ $(top_builddir)/src/gallium/winsys/virgl/drm/libvirgldrm.la \
+ $(top_builddir)/src/gallium/winsys/virgl/vtest/libvirglvtest.la \
+ $(LIBDRM_LIBS)
+
+endif
diff --git a/src/gallium/drivers/virgl/Makefile.am b/src/gallium/drivers/virgl/Makefile.am
new file mode 100644
index 00000000000..82d9756143f
--- /dev/null
+++ b/src/gallium/drivers/virgl/Makefile.am
@@ -0,0 +1,32 @@
+# Copyright © 2014, 2015 Red Hat.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+include Makefile.sources
+include $(top_srcdir)/src/gallium/Automake.inc
+
+AM_CPPFLAGS = \
+ $(GALLIUM_DRIVER_CFLAGS) \
+ $(LIBDRM_CFLAGS)
+
+noinst_LTLIBRARIES = libvirgl.la
+
+libvirgl_la_SOURCES = $(C_SOURCES)
diff --git a/src/gallium/drivers/virgl/Makefile.sources b/src/gallium/drivers/virgl/Makefile.sources
new file mode 100644
index 00000000000..c27d284e248
--- /dev/null
+++ b/src/gallium/drivers/virgl/Makefile.sources
@@ -0,0 +1,18 @@
+C_SOURCES := \
+ virgl_buffer.c \
+ virgl_context.c \
+ virgl_context.h \
+ virgl_encode.c \
+ virgl_encode.h \
+ virgl_hw.h \
+ virgl_protocol.h \
+ virgl_public.h \
+ virgl_query.c \
+ virgl_resource.c \
+ virgl_resource.h \
+ virgl_screen.c \
+ virgl_screen.h \
+ virgl_streamout.c \
+ virgl_texture.c \
+ virgl_tgsi.c \
+ virgl_winsys.h
diff --git a/src/gallium/drivers/virgl/virgl_buffer.c b/src/gallium/drivers/virgl/virgl_buffer.c
new file mode 100644
index 00000000000..ce19fb949d0
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_buffer.c
@@ -0,0 +1,172 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "virgl_context.h"
+#include "virgl_resource.h"
+#include "virgl_screen.h"
+
+static void virgl_buffer_destroy(struct pipe_screen *screen,
+ struct pipe_resource *buf)
+{
+ struct virgl_screen *vs = virgl_screen(screen);
+ struct virgl_buffer *vbuf = virgl_buffer(buf);
+
+ util_range_destroy(&vbuf->valid_buffer_range);
+ vs->vws->resource_unref(vs->vws, vbuf->base.hw_res);
+ FREE(vbuf);
+}
+
+static void *virgl_buffer_transfer_map(struct pipe_context *ctx,
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ struct pipe_transfer **transfer)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ struct virgl_screen *vs = virgl_screen(ctx->screen);
+ struct virgl_buffer *vbuf = virgl_buffer(resource);
+ struct virgl_transfer *trans;
+ void *ptr;
+ bool readback;
+ uint32_t offset;
+ bool doflushwait = false;
+
+ if ((usage & PIPE_TRANSFER_READ) && (vbuf->on_list == TRUE))
+ doflushwait = true;
+ else
+ doflushwait = virgl_res_needs_flush_wait(vctx, &vbuf->base, usage);
+
+ if (doflushwait)
+ ctx->flush(ctx, NULL, 0);
+
+ trans = util_slab_alloc(&vctx->texture_transfer_pool);
+ if (trans == NULL)
+ return NULL;
+
+ trans->base.resource = resource;
+ trans->base.level = level;
+ trans->base.usage = usage;
+ trans->base.box = *box;
+ trans->base.stride = 0;
+ trans->base.layer_stride = 0;
+
+ offset = box->x;
+
+ readback = virgl_res_needs_readback(vctx, &vbuf->base, usage);
+ if (readback)
+ vs->vws->transfer_get(vs->vws, vbuf->base.hw_res, box, trans->base.stride, trans->base.layer_stride, offset, level);
+
+ if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED))
+ doflushwait = true;
+
+ if (doflushwait || readback)
+ vs->vws->resource_wait(vs->vws, vbuf->base.hw_res);
+
+ ptr = vs->vws->resource_map(vs->vws, vbuf->base.hw_res);
+ if (!ptr) {
+ return NULL;
+ }
+
+ trans->offset = offset;
+ *transfer = &trans->base;
+
+ return ptr + trans->offset;
+}
+
+static void virgl_buffer_transfer_unmap(struct pipe_context *ctx,
+ struct pipe_transfer *transfer)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ struct virgl_transfer *trans = virgl_transfer(transfer);
+ struct virgl_buffer *vbuf = virgl_buffer(transfer->resource);
+
+ if (trans->base.usage & PIPE_TRANSFER_WRITE) {
+ if (!(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
+ struct virgl_screen *vs = virgl_screen(ctx->screen);
+ vbuf->base.clean = FALSE;
+ vctx->num_transfers++;
+ vs->vws->transfer_put(vs->vws, vbuf->base.hw_res,
+ &transfer->box, trans->base.stride, trans->base.layer_stride, trans->offset, transfer->level);
+
+ }
+ }
+
+ util_slab_free(&vctx->texture_transfer_pool, trans);
+}
+
+static void virgl_buffer_transfer_flush_region(struct pipe_context *ctx,
+ struct pipe_transfer *transfer,
+ const struct pipe_box *box)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ struct virgl_buffer *vbuf = virgl_buffer(transfer->resource);
+
+ if (!vbuf->on_list) {
+ struct pipe_resource *res = NULL;
+
+ list_addtail(&vbuf->flush_list, &vctx->to_flush_bufs);
+ vbuf->on_list = TRUE;
+ pipe_resource_reference(&res, &vbuf->base.u.b);
+ }
+
+ util_range_add(&vbuf->valid_buffer_range, transfer->box.x + box->x,
+ transfer->box.x + box->x + box->width);
+
+ vbuf->base.clean = FALSE;
+}
+
+static const struct u_resource_vtbl virgl_buffer_vtbl =
+{
+ u_default_resource_get_handle, /* get_handle */
+ virgl_buffer_destroy, /* resource_destroy */
+ virgl_buffer_transfer_map, /* transfer_map */
+ virgl_buffer_transfer_flush_region, /* transfer_flush_region */
+ virgl_buffer_transfer_unmap, /* transfer_unmap */
+ virgl_transfer_inline_write /* transfer_inline_write */
+};
+
+struct pipe_resource *virgl_buffer_create(struct virgl_screen *vs,
+ const struct pipe_resource *template)
+{
+ struct virgl_buffer *buf;
+ uint32_t size;
+ uint32_t vbind;
+ buf = CALLOC_STRUCT(virgl_buffer);
+ buf->base.clean = TRUE;
+ buf->base.u.b = *template;
+ buf->base.u.b.screen = &vs->base;
+ buf->base.u.vtbl = &virgl_buffer_vtbl;
+ pipe_reference_init(&buf->base.u.b.reference, 1);
+ util_range_init(&buf->valid_buffer_range);
+
+ vbind = pipe_to_virgl_bind(template->bind);
+ size = template->width0;
+
+ buf->base.hw_res = vs->vws->resource_create(vs->vws, template->target, template->format, vbind, template->width0, 1, 1, 1, 0, 0, size);
+
+ util_range_set_empty(&buf->valid_buffer_range);
+ return &buf->base.u.b;
+}
diff --git a/src/gallium/drivers/virgl/virgl_context.c b/src/gallium/drivers/virgl/virgl_context.c
new file mode 100644
index 00000000000..e4f02ba1096
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_context.c
@@ -0,0 +1,963 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "pipe/p_shader_tokens.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_screen.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_format.h"
+#include "util/u_transfer.h"
+#include "util/u_helpers.h"
+#include "util/u_slab.h"
+#include "util/u_upload_mgr.h"
+#include "util/u_blitter.h"
+#include "tgsi/tgsi_text.h"
+#include "indices/u_primconvert.h"
+
+#include "pipebuffer/pb_buffer.h"
+#include "state_tracker/graw.h"
+
+#include "virgl_encode.h"
+#include "virgl_context.h"
+#include "virgl_protocol.h"
+#include "virgl_resource.h"
+#include "virgl_screen.h"
+
+static uint32_t next_handle;
+uint32_t virgl_object_assign_handle(void)
+{
+ return ++next_handle;
+}
+
+static void virgl_buffer_flush(struct virgl_context *vctx,
+ struct virgl_buffer *vbuf)
+{
+ struct virgl_screen *rs = virgl_screen(vctx->base.screen);
+ struct pipe_box box;
+
+ assert(vbuf->on_list);
+
+ box.height = 1;
+ box.depth = 1;
+ box.y = 0;
+ box.z = 0;
+
+ box.x = vbuf->valid_buffer_range.start;
+ box.width = MIN2(vbuf->valid_buffer_range.end - vbuf->valid_buffer_range.start, vbuf->base.u.b.width0);
+
+ vctx->num_transfers++;
+ rs->vws->transfer_put(rs->vws, vbuf->base.hw_res,
+ &box, 0, 0, box.x, 0);
+
+ util_range_set_empty(&vbuf->valid_buffer_range);
+}
+
+static void virgl_attach_res_framebuffer(struct virgl_context *vctx)
+{
+ struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws;
+ struct pipe_surface *surf;
+ struct virgl_resource *res;
+ unsigned i;
+
+ surf = vctx->framebuffer.zsbuf;
+ if (surf) {
+ res = virgl_resource(surf->texture);
+ if (res)
+ vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
+ }
+ for (i = 0; i < vctx->framebuffer.nr_cbufs; i++) {
+ surf = vctx->framebuffer.cbufs[i];
+ if (surf) {
+ res = virgl_resource(surf->texture);
+ if (res)
+ vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
+ }
+ }
+}
+
+static void virgl_attach_res_sampler_views(struct virgl_context *vctx,
+ unsigned shader_type)
+{
+ struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws;
+ struct virgl_textures_info *tinfo = &vctx->samplers[shader_type];
+ struct virgl_resource *res;
+ uint32_t remaining_mask = tinfo->enabled_mask;
+ unsigned i;
+ while (remaining_mask) {
+ i = u_bit_scan(&remaining_mask);
+ assert(tinfo->views[i]);
+
+ res = virgl_resource(tinfo->views[i]->base.texture);
+ if (res)
+ vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
+ }
+}
+
+static void virgl_attach_res_vertex_buffers(struct virgl_context *vctx)
+{
+ struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws;
+ struct virgl_resource *res;
+ unsigned i;
+
+ for (i = 0; i < vctx->num_vertex_buffers; i++) {
+ res = virgl_resource(vctx->vertex_buffer[i].buffer);
+ if (res)
+ vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
+ }
+}
+
+static void virgl_attach_res_index_buffer(struct virgl_context *vctx)
+{
+ struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws;
+ struct virgl_resource *res;
+
+ res = virgl_resource(vctx->index_buffer.buffer);
+ if (res)
+ vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
+}
+
+static void virgl_attach_res_so_targets(struct virgl_context *vctx)
+{
+ struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws;
+ struct virgl_resource *res;
+ unsigned i;
+
+ for (i = 0; i < vctx->num_so_targets; i++) {
+ res = virgl_resource(vctx->so_targets[i].base.buffer);
+ if (res)
+ vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
+ }
+}
+
+static void virgl_attach_res_uniform_buffers(struct virgl_context *vctx,
+ unsigned shader_type)
+{
+ struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws;
+ struct virgl_resource *res;
+ unsigned i;
+ for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
+ res = virgl_resource(vctx->ubos[shader_type][i]);
+ if (res) {
+ vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE);
+ }
+ }
+}
+
+/*
+ * after flushing, the hw context still has a bunch of
+ * resources bound, so we need to rebind those here.
+ */
+static void virgl_reemit_res(struct virgl_context *vctx)
+{
+ unsigned shader_type;
+
+ /* reattach any flushed resources */
+ /* framebuffer, sampler views, vertex/index/uniform/stream buffers */
+ virgl_attach_res_framebuffer(vctx);
+
+ for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++) {
+ virgl_attach_res_sampler_views(vctx, shader_type);
+ virgl_attach_res_uniform_buffers(vctx, shader_type);
+ }
+ virgl_attach_res_index_buffer(vctx);
+ virgl_attach_res_vertex_buffers(vctx);
+ virgl_attach_res_so_targets(vctx);
+}
+
+static struct pipe_surface *virgl_create_surface(struct pipe_context *ctx,
+ struct pipe_resource *resource,
+ const struct pipe_surface *templ)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ struct virgl_surface *surf;
+ struct virgl_resource *res = virgl_resource(resource);
+ uint32_t handle;
+
+ surf = CALLOC_STRUCT(virgl_surface);
+ if (surf == NULL)
+ return NULL;
+
+ res->clean = FALSE;
+ handle = virgl_object_assign_handle();
+ pipe_reference_init(&surf->base.reference, 1);
+ pipe_resource_reference(&surf->base.texture, resource);
+ surf->base.context = ctx;
+ surf->base.format = templ->format;
+ if (resource->target != PIPE_BUFFER) {
+ surf->base.width = u_minify(resource->width0, templ->u.tex.level);
+ surf->base.height = u_minify(resource->height0, templ->u.tex.level);
+ surf->base.u.tex.level = templ->u.tex.level;
+ surf->base.u.tex.first_layer = templ->u.tex.first_layer;
+ surf->base.u.tex.last_layer = templ->u.tex.last_layer;
+ } else {
+ surf->base.width = templ->u.buf.last_element - templ->u.buf.first_element + 1;
+ surf->base.height = resource->height0;
+ surf->base.u.buf.first_element = templ->u.buf.first_element;
+ surf->base.u.buf.last_element = templ->u.buf.last_element;
+ }
+ virgl_encoder_create_surface(vctx, handle, res, &surf->base);
+ surf->handle = handle;
+ return &surf->base;
+}
+
+static void virgl_surface_destroy(struct pipe_context *ctx,
+ struct pipe_surface *psurf)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ struct virgl_surface *surf = virgl_surface(psurf);
+
+ pipe_resource_reference(&surf->base.texture, NULL);
+ virgl_encode_delete_object(vctx, surf->handle, VIRGL_OBJECT_SURFACE);
+ FREE(surf);
+}
+
+static void *virgl_create_blend_state(struct pipe_context *ctx,
+ const struct pipe_blend_state *blend_state)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ uint32_t handle;
+ handle = virgl_object_assign_handle();
+
+ virgl_encode_blend_state(vctx, handle, blend_state);
+ return (void *)(unsigned long)handle;
+
+}
+
+static void virgl_bind_blend_state(struct pipe_context *ctx,
+ void *blend_state)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ uint32_t handle = (unsigned long)blend_state;
+ virgl_encode_bind_object(vctx, handle, VIRGL_OBJECT_BLEND);
+}
+
+static void virgl_delete_blend_state(struct pipe_context *ctx,
+ void *blend_state)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ uint32_t handle = (unsigned long)blend_state;
+ virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_BLEND);
+}
+
+static void *virgl_create_depth_stencil_alpha_state(struct pipe_context *ctx,
+ const struct pipe_depth_stencil_alpha_state *blend_state)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ uint32_t handle;
+ handle = virgl_object_assign_handle();
+
+ virgl_encode_dsa_state(vctx, handle, blend_state);
+ return (void *)(unsigned long)handle;
+}
+
+static void virgl_bind_depth_stencil_alpha_state(struct pipe_context *ctx,
+ void *blend_state)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ uint32_t handle = (unsigned long)blend_state;
+ virgl_encode_bind_object(vctx, handle, VIRGL_OBJECT_DSA);
+}
+
+static void virgl_delete_depth_stencil_alpha_state(struct pipe_context *ctx,
+ void *dsa_state)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ uint32_t handle = (unsigned long)dsa_state;
+ virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_DSA);
+}
+
+static void *virgl_create_rasterizer_state(struct pipe_context *ctx,
+ const struct pipe_rasterizer_state *rs_state)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ uint32_t handle;
+ handle = virgl_object_assign_handle();
+
+ virgl_encode_rasterizer_state(vctx, handle, rs_state);
+ return (void *)(unsigned long)handle;
+}
+
+static void virgl_bind_rasterizer_state(struct pipe_context *ctx,
+ void *rs_state)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ uint32_t handle = (unsigned long)rs_state;
+
+ virgl_encode_bind_object(vctx, handle, VIRGL_OBJECT_RASTERIZER);
+}
+
+static void virgl_delete_rasterizer_state(struct pipe_context *ctx,
+ void *rs_state)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ uint32_t handle = (unsigned long)rs_state;
+ virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_RASTERIZER);
+}
+
+static void virgl_set_framebuffer_state(struct pipe_context *ctx,
+ const struct pipe_framebuffer_state *state)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+
+ vctx->framebuffer = *state;
+ virgl_encoder_set_framebuffer_state(vctx, state);
+ virgl_attach_res_framebuffer(vctx);
+}
+
+static void virgl_set_viewport_states(struct pipe_context *ctx,
+ unsigned start_slot,
+ unsigned num_viewports,
+ const struct pipe_viewport_state *state)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ virgl_encoder_set_viewport_states(vctx, start_slot, num_viewports, state);
+}
+
+static void *virgl_create_vertex_elements_state(struct pipe_context *ctx,
+ unsigned num_elements,
+ const struct pipe_vertex_element *elements)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ uint32_t handle = virgl_object_assign_handle();
+ virgl_encoder_create_vertex_elements(vctx, handle,
+ num_elements, elements);
+ return (void*)(unsigned long)handle;
+
+}
+
+static void virgl_delete_vertex_elements_state(struct pipe_context *ctx,
+ void *ve)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ uint32_t handle = (unsigned long)ve;
+
+ virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_VERTEX_ELEMENTS);
+}
+
+static void virgl_bind_vertex_elements_state(struct pipe_context *ctx,
+ void *ve)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ uint32_t handle = (unsigned long)ve;
+ virgl_encode_bind_object(vctx, handle, VIRGL_OBJECT_VERTEX_ELEMENTS);
+}
+
+static void virgl_set_vertex_buffers(struct pipe_context *ctx,
+ unsigned start_slot,
+ unsigned num_buffers,
+ const struct pipe_vertex_buffer *buffers)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+
+ util_set_vertex_buffers_count(vctx->vertex_buffer,
+ &vctx->num_vertex_buffers,
+ buffers, start_slot, num_buffers);
+
+ vctx->vertex_array_dirty = TRUE;
+}
+
+static void virgl_hw_set_vertex_buffers(struct pipe_context *ctx)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+
+ if (vctx->vertex_array_dirty) {
+ virgl_encoder_set_vertex_buffers(vctx, vctx->num_vertex_buffers, vctx->vertex_buffer);
+ virgl_attach_res_vertex_buffers(vctx);
+ }
+}
+
+static void virgl_set_stencil_ref(struct pipe_context *ctx,
+ const struct pipe_stencil_ref *ref)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ virgl_encoder_set_stencil_ref(vctx, ref);
+}
+
+static void virgl_set_blend_color(struct pipe_context *ctx,
+ const struct pipe_blend_color *color)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ virgl_encoder_set_blend_color(vctx, color);
+}
+
+static void virgl_set_index_buffer(struct pipe_context *ctx,
+ const struct pipe_index_buffer *ib)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+
+ if (ib) {
+ pipe_resource_reference(&vctx->index_buffer.buffer, ib->buffer);
+ memcpy(&vctx->index_buffer, ib, sizeof(*ib));
+ } else {
+ pipe_resource_reference(&vctx->index_buffer.buffer, NULL);
+ }
+}
+
+static void virgl_hw_set_index_buffer(struct pipe_context *ctx,
+ struct pipe_index_buffer *ib)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ virgl_encoder_set_index_buffer(vctx, ib);
+ virgl_attach_res_index_buffer(vctx);
+}
+
+static void virgl_set_constant_buffer(struct pipe_context *ctx,
+ uint shader, uint index,
+ struct pipe_constant_buffer *buf)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+
+ if (buf) {
+ if (!buf->user_buffer){
+ struct virgl_resource *res = virgl_resource(buf->buffer);
+ virgl_encoder_set_uniform_buffer(vctx, shader, index, buf->buffer_offset,
+ buf->buffer_size, res);
+ pipe_resource_reference(&vctx->ubos[shader][index], buf->buffer);
+ return;
+ }
+ pipe_resource_reference(&vctx->ubos[shader][index], NULL);
+ virgl_encoder_write_constant_buffer(vctx, shader, index, buf->buffer_size / 4, buf->user_buffer);
+ } else {
+ virgl_encoder_write_constant_buffer(vctx, shader, index, 0, NULL);
+ pipe_resource_reference(&vctx->ubos[shader][index], NULL);
+ }
+}
+
+void virgl_transfer_inline_write(struct pipe_context *ctx,
+ struct pipe_resource *res,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ const void *data,
+ unsigned stride,
+ unsigned layer_stride)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ struct virgl_screen *vs = virgl_screen(ctx->screen);
+ struct virgl_resource *grres = virgl_resource(res);
+ struct virgl_buffer *vbuf = virgl_buffer(res);
+
+ grres->clean = FALSE;
+
+ if (virgl_res_needs_flush_wait(vctx, &vbuf->base, usage)) {
+ ctx->flush(ctx, NULL, 0);
+
+ vs->vws->resource_wait(vs->vws, vbuf->base.hw_res);
+ }
+
+ virgl_encoder_inline_write(vctx, grres, level, usage,
+ box, data, stride, layer_stride);
+}
+
+static void *virgl_shader_encoder(struct pipe_context *ctx,
+ const struct pipe_shader_state *shader,
+ unsigned type)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ uint32_t handle;
+ struct tgsi_token *new_tokens;
+ int ret;
+
+ new_tokens = virgl_tgsi_transform(shader->tokens);
+ if (!new_tokens)
+ return NULL;
+
+ handle = virgl_object_assign_handle();
+ /* encode VS state */
+ ret = virgl_encode_shader_state(vctx, handle, type,
+ &shader->stream_output,
+ new_tokens);
+ if (ret) {
+ return NULL;
+ }
+
+ FREE(new_tokens);
+ return (void *)(unsigned long)handle;
+
+}
+static void *virgl_create_vs_state(struct pipe_context *ctx,
+ const struct pipe_shader_state *shader)
+{
+ return virgl_shader_encoder(ctx, shader, PIPE_SHADER_VERTEX);
+}
+
+static void *virgl_create_gs_state(struct pipe_context *ctx,
+ const struct pipe_shader_state *shader)
+{
+ return virgl_shader_encoder(ctx, shader, PIPE_SHADER_GEOMETRY);
+}
+
+static void *virgl_create_fs_state(struct pipe_context *ctx,
+ const struct pipe_shader_state *shader)
+{
+ return virgl_shader_encoder(ctx, shader, PIPE_SHADER_FRAGMENT);
+}
+
+static void
+virgl_delete_fs_state(struct pipe_context *ctx,
+ void *fs)
+{
+ uint32_t handle = (unsigned long)fs;
+ struct virgl_context *vctx = virgl_context(ctx);
+
+ virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_SHADER);
+}
+
+static void
+virgl_delete_gs_state(struct pipe_context *ctx,
+ void *gs)
+{
+ uint32_t handle = (unsigned long)gs;
+ struct virgl_context *vctx = virgl_context(ctx);
+
+ virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_SHADER);
+}
+
+static void
+virgl_delete_vs_state(struct pipe_context *ctx,
+ void *vs)
+{
+ uint32_t handle = (unsigned long)vs;
+ struct virgl_context *vctx = virgl_context(ctx);
+
+ virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_SHADER);
+}
+
+static void virgl_bind_vs_state(struct pipe_context *ctx,
+ void *vss)
+{
+ uint32_t handle = (unsigned long)vss;
+ struct virgl_context *vctx = virgl_context(ctx);
+
+ virgl_encode_bind_shader(vctx, handle, PIPE_SHADER_VERTEX);
+}
+
+static void virgl_bind_gs_state(struct pipe_context *ctx,
+ void *vss)
+{
+ uint32_t handle = (unsigned long)vss;
+ struct virgl_context *vctx = virgl_context(ctx);
+
+ virgl_encode_bind_shader(vctx, handle, PIPE_SHADER_GEOMETRY);
+}
+
+
+static void virgl_bind_fs_state(struct pipe_context *ctx,
+ void *vss)
+{
+ uint32_t handle = (unsigned long)vss;
+ struct virgl_context *vctx = virgl_context(ctx);
+
+ virgl_encode_bind_shader(vctx, handle, PIPE_SHADER_FRAGMENT);
+}
+
+static void virgl_clear(struct pipe_context *ctx,
+ unsigned buffers,
+ const union pipe_color_union *color,
+ double depth, unsigned stencil)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+
+ virgl_encode_clear(vctx, buffers, color, depth, stencil);
+}
+
+static void virgl_draw_vbo(struct pipe_context *ctx,
+ const struct pipe_draw_info *dinfo)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ struct virgl_screen *rs = virgl_screen(ctx->screen);
+ struct pipe_index_buffer ib = {};
+ struct pipe_draw_info info = *dinfo;
+
+ if (!(rs->caps.caps.v1.prim_mask & (1 << dinfo->mode))) {
+ util_primconvert_save_index_buffer(vctx->primconvert, &vctx->index_buffer);
+ util_primconvert_draw_vbo(vctx->primconvert, dinfo);
+ return;
+ }
+ if (info.indexed) {
+ pipe_resource_reference(&ib.buffer, vctx->index_buffer.buffer);
+ ib.user_buffer = vctx->index_buffer.user_buffer;
+ ib.index_size = vctx->index_buffer.index_size;
+ ib.offset = vctx->index_buffer.offset + info.start * ib.index_size;
+
+ if (ib.user_buffer) {
+ u_upload_data(vctx->uploader, 0, info.count * ib.index_size,
+ ib.user_buffer, &ib.offset, &ib.buffer);
+ ib.user_buffer = NULL;
+ }
+ }
+
+ u_upload_unmap(vctx->uploader);
+
+ vctx->num_draws++;
+ virgl_hw_set_vertex_buffers(ctx);
+ if (info.indexed)
+ virgl_hw_set_index_buffer(ctx, &ib);
+
+ virgl_encoder_draw_vbo(vctx, &info);
+
+ pipe_resource_reference(&ib.buffer, NULL);
+
+}
+
+static void virgl_flush_eq(struct virgl_context *ctx, void *closure)
+{
+ struct virgl_screen *rs = virgl_screen(ctx->base.screen);
+
+ /* send the buffer to the remote side for decoding */
+ ctx->num_transfers = ctx->num_draws = 0;
+ rs->vws->submit_cmd(rs->vws, ctx->cbuf);
+
+ virgl_encoder_set_sub_ctx(ctx, ctx->hw_sub_ctx_id);
+
+ /* add back current framebuffer resources to reference list? */
+ virgl_reemit_res(ctx);
+}
+
+static void virgl_flush_from_st(struct pipe_context *ctx,
+ struct pipe_fence_handle **fence,
+ enum pipe_flush_flags flags)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ struct virgl_screen *rs = virgl_screen(ctx->screen);
+ struct virgl_buffer *buf, *tmp;
+
+ if (fence)
+ *fence = rs->vws->cs_create_fence(rs->vws);
+
+ LIST_FOR_EACH_ENTRY_SAFE(buf, tmp, &vctx->to_flush_bufs, flush_list) {
+ struct pipe_resource *res = &buf->base.u.b;
+ virgl_buffer_flush(vctx, buf);
+ list_del(&buf->flush_list);
+ buf->on_list = FALSE;
+ pipe_resource_reference(&res, NULL);
+
+ }
+ virgl_flush_eq(vctx, vctx);
+}
+
+static struct pipe_sampler_view *virgl_create_sampler_view(struct pipe_context *ctx,
+ struct pipe_resource *texture,
+ const struct pipe_sampler_view *state)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ struct virgl_sampler_view *grview;
+ uint32_t handle;
+ struct virgl_resource *res;
+
+ if (state == NULL)
+ return NULL;
+
+ grview = CALLOC_STRUCT(virgl_sampler_view);
+ if (!grview)
+ return NULL;
+
+ res = virgl_resource(texture);
+ handle = virgl_object_assign_handle();
+ virgl_encode_sampler_view(vctx, handle, res, state);
+
+ grview->base = *state;
+ grview->base.reference.count = 1;
+
+ grview->base.texture = NULL;
+ grview->base.context = ctx;
+ pipe_resource_reference(&grview->base.texture, texture);
+ grview->handle = handle;
+ return &grview->base;
+}
+
+static void virgl_set_sampler_views(struct pipe_context *ctx,
+ unsigned shader_type,
+ unsigned start_slot,
+ unsigned num_views,
+ struct pipe_sampler_view **views)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ int i;
+ uint32_t disable_mask = ~((1ull << num_views) - 1);
+ struct virgl_textures_info *tinfo = &vctx->samplers[shader_type];
+ uint32_t new_mask = 0;
+ uint32_t remaining_mask;
+
+ remaining_mask = tinfo->enabled_mask & disable_mask;
+
+ while (remaining_mask) {
+ i = u_bit_scan(&remaining_mask);
+ assert(tinfo->views[i]);
+
+ pipe_sampler_view_reference((struct pipe_sampler_view **)&tinfo->views[i], NULL);
+ }
+
+ for (i = 0; i < num_views; i++) {
+ struct virgl_sampler_view *grview = virgl_sampler_view(views[i]);
+
+ if (views[i] == (struct pipe_sampler_view *)tinfo->views[i])
+ continue;
+
+ if (grview) {
+ new_mask |= 1 << i;
+ pipe_sampler_view_reference((struct pipe_sampler_view **)&tinfo->views[i], views[i]);
+ } else {
+ pipe_sampler_view_reference((struct pipe_sampler_view **)&tinfo->views[i], NULL);
+ disable_mask |= 1 << i;
+ }
+ }
+
+ tinfo->enabled_mask &= ~disable_mask;
+ tinfo->enabled_mask |= new_mask;
+ virgl_encode_set_sampler_views(vctx, shader_type, start_slot, num_views, tinfo->views);
+ virgl_attach_res_sampler_views(vctx, shader_type);
+}
+
+static void virgl_destroy_sampler_view(struct pipe_context *ctx,
+ struct pipe_sampler_view *view)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ struct virgl_sampler_view *grview = virgl_sampler_view(view);
+
+ virgl_encode_delete_object(vctx, grview->handle, VIRGL_OBJECT_SAMPLER_VIEW);
+ pipe_resource_reference(&view->texture, NULL);
+ FREE(view);
+}
+
+static void *virgl_create_sampler_state(struct pipe_context *ctx,
+ const struct pipe_sampler_state *state)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ uint32_t handle;
+
+ handle = virgl_object_assign_handle();
+
+ virgl_encode_sampler_state(vctx, handle, state);
+ return (void *)(unsigned long)handle;
+}
+
+static void virgl_delete_sampler_state(struct pipe_context *ctx,
+ void *ss)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ uint32_t handle = (unsigned long)ss;
+
+ virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_SAMPLER_STATE);
+}
+
+static void virgl_bind_sampler_states(struct pipe_context *ctx,
+ unsigned shader, unsigned start_slot,
+ unsigned num_samplers,
+ void **samplers)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ uint32_t handles[32];
+ int i;
+ for (i = 0; i < num_samplers; i++) {
+ handles[i] = (unsigned long)(samplers[i]);
+ }
+ virgl_encode_bind_sampler_states(vctx, shader, start_slot, num_samplers, handles);
+}
+
+static void virgl_set_polygon_stipple(struct pipe_context *ctx,
+ const struct pipe_poly_stipple *ps)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ virgl_encoder_set_polygon_stipple(vctx, ps);
+}
+
+static void virgl_set_scissor_states(struct pipe_context *ctx,
+ unsigned start_slot,
+ unsigned num_scissor,
+ const struct pipe_scissor_state *ss)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ virgl_encoder_set_scissor_state(vctx, start_slot, num_scissor, ss);
+}
+
+static void virgl_set_sample_mask(struct pipe_context *ctx,
+ unsigned sample_mask)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ virgl_encoder_set_sample_mask(vctx, sample_mask);
+}
+
+static void virgl_set_clip_state(struct pipe_context *ctx,
+ const struct pipe_clip_state *clip)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ virgl_encoder_set_clip_state(vctx, clip);
+}
+
+static void virgl_resource_copy_region(struct pipe_context *ctx,
+ struct pipe_resource *dst,
+ unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src,
+ unsigned src_level,
+ const struct pipe_box *src_box)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ struct virgl_resource *dres = virgl_resource(dst);
+ struct virgl_resource *sres = virgl_resource(src);
+
+ dres->clean = FALSE;
+ virgl_encode_resource_copy_region(vctx, dres,
+ dst_level, dstx, dsty, dstz,
+ sres, src_level,
+ src_box);
+}
+
+static void
+virgl_flush_resource(struct pipe_context *pipe,
+ struct pipe_resource *resource)
+{
+}
+
+static void virgl_blit(struct pipe_context *ctx,
+ const struct pipe_blit_info *blit)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ struct virgl_resource *dres = virgl_resource(blit->dst.resource);
+ struct virgl_resource *sres = virgl_resource(blit->src.resource);
+
+ dres->clean = FALSE;
+ virgl_encode_blit(vctx, dres, sres,
+ blit);
+}
+
+static void
+virgl_context_destroy( struct pipe_context *ctx )
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ struct virgl_screen *rs = virgl_screen(ctx->screen);
+
+ vctx->framebuffer.zsbuf = NULL;
+ vctx->framebuffer.nr_cbufs = 0;
+ virgl_encoder_destroy_sub_ctx(vctx, vctx->hw_sub_ctx_id);
+ virgl_flush_eq(vctx, vctx);
+
+ rs->vws->cmd_buf_destroy(vctx->cbuf);
+ if (vctx->uploader)
+ u_upload_destroy(vctx->uploader);
+ util_primconvert_destroy(vctx->primconvert);
+
+ util_slab_destroy(&vctx->texture_transfer_pool);
+ FREE(vctx);
+}
+
+struct pipe_context *virgl_context_create(struct pipe_screen *pscreen,
+ void *priv,
+ unsigned flags)
+{
+ struct virgl_context *vctx;
+ struct virgl_screen *rs = virgl_screen(pscreen);
+ vctx = CALLOC_STRUCT(virgl_context);
+
+ vctx->cbuf = rs->vws->cmd_buf_create(rs->vws);
+ if (!vctx->cbuf) {
+ FREE(vctx);
+ return NULL;
+ }
+
+ vctx->base.destroy = virgl_context_destroy;
+ vctx->base.create_surface = virgl_create_surface;
+ vctx->base.surface_destroy = virgl_surface_destroy;
+ vctx->base.set_framebuffer_state = virgl_set_framebuffer_state;
+ vctx->base.create_blend_state = virgl_create_blend_state;
+ vctx->base.bind_blend_state = virgl_bind_blend_state;
+ vctx->base.delete_blend_state = virgl_delete_blend_state;
+ vctx->base.create_depth_stencil_alpha_state = virgl_create_depth_stencil_alpha_state;
+ vctx->base.bind_depth_stencil_alpha_state = virgl_bind_depth_stencil_alpha_state;
+ vctx->base.delete_depth_stencil_alpha_state = virgl_delete_depth_stencil_alpha_state;
+ vctx->base.create_rasterizer_state = virgl_create_rasterizer_state;
+ vctx->base.bind_rasterizer_state = virgl_bind_rasterizer_state;
+ vctx->base.delete_rasterizer_state = virgl_delete_rasterizer_state;
+
+ vctx->base.set_viewport_states = virgl_set_viewport_states;
+ vctx->base.create_vertex_elements_state = virgl_create_vertex_elements_state;
+ vctx->base.bind_vertex_elements_state = virgl_bind_vertex_elements_state;
+ vctx->base.delete_vertex_elements_state = virgl_delete_vertex_elements_state;
+ vctx->base.set_vertex_buffers = virgl_set_vertex_buffers;
+ vctx->base.set_index_buffer = virgl_set_index_buffer;
+ vctx->base.set_constant_buffer = virgl_set_constant_buffer;
+ vctx->base.transfer_inline_write = virgl_transfer_inline_write;
+
+ vctx->base.create_vs_state = virgl_create_vs_state;
+ vctx->base.create_gs_state = virgl_create_gs_state;
+ vctx->base.create_fs_state = virgl_create_fs_state;
+
+ vctx->base.bind_vs_state = virgl_bind_vs_state;
+ vctx->base.bind_gs_state = virgl_bind_gs_state;
+ vctx->base.bind_fs_state = virgl_bind_fs_state;
+
+ vctx->base.delete_vs_state = virgl_delete_vs_state;
+ vctx->base.delete_gs_state = virgl_delete_gs_state;
+ vctx->base.delete_fs_state = virgl_delete_fs_state;
+
+ vctx->base.clear = virgl_clear;
+ vctx->base.draw_vbo = virgl_draw_vbo;
+ vctx->base.flush = virgl_flush_from_st;
+ vctx->base.screen = pscreen;
+ vctx->base.create_sampler_view = virgl_create_sampler_view;
+ vctx->base.sampler_view_destroy = virgl_destroy_sampler_view;
+ vctx->base.set_sampler_views = virgl_set_sampler_views;
+
+ vctx->base.create_sampler_state = virgl_create_sampler_state;
+ vctx->base.delete_sampler_state = virgl_delete_sampler_state;
+ vctx->base.bind_sampler_states = virgl_bind_sampler_states;
+
+ vctx->base.set_polygon_stipple = virgl_set_polygon_stipple;
+ vctx->base.set_scissor_states = virgl_set_scissor_states;
+ vctx->base.set_sample_mask = virgl_set_sample_mask;
+ vctx->base.set_stencil_ref = virgl_set_stencil_ref;
+ vctx->base.set_clip_state = virgl_set_clip_state;
+
+ vctx->base.set_blend_color = virgl_set_blend_color;
+
+ vctx->base.resource_copy_region = virgl_resource_copy_region;
+ vctx->base.flush_resource = virgl_flush_resource;
+ vctx->base.blit = virgl_blit;
+
+ virgl_init_context_resource_functions(&vctx->base);
+ virgl_init_query_functions(vctx);
+ virgl_init_so_functions(vctx);
+
+ list_inithead(&vctx->to_flush_bufs);
+ util_slab_create(&vctx->texture_transfer_pool, sizeof(struct virgl_transfer),
+ 16, UTIL_SLAB_SINGLETHREADED);
+
+ vctx->primconvert = util_primconvert_create(&vctx->base, rs->caps.caps.v1.prim_mask);
+ vctx->uploader = u_upload_create(&vctx->base, 1024 * 1024, 256,
+ PIPE_BIND_INDEX_BUFFER);
+ if (!vctx->uploader)
+ goto fail;
+
+ vctx->hw_sub_ctx_id = rs->sub_ctx_id++;
+ virgl_encoder_create_sub_ctx(vctx, vctx->hw_sub_ctx_id);
+
+ virgl_encoder_set_sub_ctx(vctx, vctx->hw_sub_ctx_id);
+ return &vctx->base;
+fail:
+ return NULL;
+}
diff --git a/src/gallium/drivers/virgl/virgl_context.h b/src/gallium/drivers/virgl/virgl_context.h
new file mode 100644
index 00000000000..adb8adef33c
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_context.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef VIRGL_CONTEXT_H
+#define VIRGL_CONTEXT_H
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "util/u_slab.h"
+#include "util/list.h"
+
+struct pipe_screen;
+struct tgsi_token;
+struct u_upload_mgr;
+struct virgl_cmd_buf;
+
+struct virgl_sampler_view {
+ struct pipe_sampler_view base;
+ uint32_t handle;
+};
+
+struct virgl_so_target {
+ struct pipe_stream_output_target base;
+ uint32_t handle;
+};
+
+struct virgl_textures_info {
+ struct virgl_sampler_view *views[16];
+ uint32_t enabled_mask;
+};
+
+struct virgl_context {
+ struct pipe_context base;
+ struct virgl_cmd_buf *cbuf;
+
+ struct virgl_textures_info samplers[PIPE_SHADER_TYPES];
+
+ struct pipe_framebuffer_state framebuffer;
+
+ struct util_slab_mempool texture_transfer_pool;
+
+ struct pipe_index_buffer index_buffer;
+ struct u_upload_mgr *uploader;
+
+ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
+ unsigned num_vertex_buffers;
+ boolean vertex_array_dirty;
+
+ struct virgl_so_target so_targets[PIPE_MAX_SO_BUFFERS];
+ unsigned num_so_targets;
+
+ struct pipe_resource *ubos[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS];
+ int num_transfers;
+ int num_draws;
+ struct list_head to_flush_bufs;
+
+ struct primconvert_context *primconvert;
+ uint32_t hw_sub_ctx_id;
+};
+
+static inline struct virgl_sampler_view *
+virgl_sampler_view(struct pipe_sampler_view *view)
+{
+ return (struct virgl_sampler_view *)view;
+};
+
+static inline struct virgl_so_target *
+virgl_so_target(struct pipe_stream_output_target *target)
+{
+ return (struct virgl_so_target *)target;
+}
+
+static inline struct virgl_context *virgl_context(struct pipe_context *ctx)
+{
+ return (struct virgl_context *)ctx;
+}
+
+struct pipe_context *virgl_context_create(struct pipe_screen *pscreen,
+ void *priv, unsigned flags);
+
+void virgl_init_blit_functions(struct virgl_context *vctx);
+void virgl_init_query_functions(struct virgl_context *vctx);
+void virgl_init_so_functions(struct virgl_context *vctx);
+
+void virgl_transfer_inline_write(struct pipe_context *ctx,
+ struct pipe_resource *res,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ const void *data,
+ unsigned stride,
+ unsigned layer_stride);
+
+struct tgsi_token *virgl_tgsi_transform(const struct tgsi_token *tokens_in);
+
+#endif
diff --git a/src/gallium/drivers/virgl/virgl_encode.c b/src/gallium/drivers/virgl/virgl_encode.c
new file mode 100644
index 00000000000..22fb5292819
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_encode.c
@@ -0,0 +1,867 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <stdint.h>
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_parse.h"
+
+#include "virgl_context.h"
+#include "virgl_encode.h"
+#include "virgl_protocol.h"
+#include "virgl_resource.h"
+#include "virgl_screen.h"
+
+static int virgl_encoder_write_cmd_dword(struct virgl_context *ctx,
+ uint32_t dword)
+{
+ int len = (dword >> 16);
+
+ if ((ctx->cbuf->cdw + len + 1) > VIRGL_MAX_CMDBUF_DWORDS)
+ ctx->base.flush(&ctx->base, NULL, 0);
+
+ virgl_encoder_write_dword(ctx->cbuf, dword);
+ return 0;
+}
+
+static void virgl_encoder_write_res(struct virgl_context *ctx,
+ struct virgl_resource *res)
+{
+ struct virgl_winsys *vws = virgl_screen(ctx->base.screen)->vws;
+
+ if (res && res->hw_res)
+ vws->emit_res(vws, ctx->cbuf, res->hw_res, TRUE);
+ else {
+ virgl_encoder_write_dword(ctx->cbuf, 0);
+ }
+}
+
+int virgl_encode_bind_object(struct virgl_context *ctx,
+ uint32_t handle, uint32_t object)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_BIND_OBJECT, object, 1));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+ return 0;
+}
+
+int virgl_encode_delete_object(struct virgl_context *ctx,
+ uint32_t handle, uint32_t object)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_DESTROY_OBJECT, object, 1));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+ return 0;
+}
+
+int virgl_encode_blend_state(struct virgl_context *ctx,
+ uint32_t handle,
+ const struct pipe_blend_state *blend_state)
+{
+ uint32_t tmp;
+ int i;
+
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_BLEND, VIRGL_OBJ_BLEND_SIZE));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+
+ tmp =
+ VIRGL_OBJ_BLEND_S0_INDEPENDENT_BLEND_ENABLE(blend_state->independent_blend_enable) |
+ VIRGL_OBJ_BLEND_S0_LOGICOP_ENABLE(blend_state->logicop_enable) |
+ VIRGL_OBJ_BLEND_S0_DITHER(blend_state->dither) |
+ VIRGL_OBJ_BLEND_S0_ALPHA_TO_COVERAGE(blend_state->alpha_to_coverage) |
+ VIRGL_OBJ_BLEND_S0_ALPHA_TO_ONE(blend_state->alpha_to_one);
+
+ virgl_encoder_write_dword(ctx->cbuf, tmp);
+
+ tmp = VIRGL_OBJ_BLEND_S1_LOGICOP_FUNC(blend_state->logicop_func);
+ virgl_encoder_write_dword(ctx->cbuf, tmp);
+
+ for (i = 0; i < VIRGL_MAX_COLOR_BUFS; i++) {
+ tmp =
+ VIRGL_OBJ_BLEND_S2_RT_BLEND_ENABLE(blend_state->rt[i].blend_enable) |
+ VIRGL_OBJ_BLEND_S2_RT_RGB_FUNC(blend_state->rt[i].rgb_func) |
+ VIRGL_OBJ_BLEND_S2_RT_RGB_SRC_FACTOR(blend_state->rt[i].rgb_src_factor) |
+ VIRGL_OBJ_BLEND_S2_RT_RGB_DST_FACTOR(blend_state->rt[i].rgb_dst_factor)|
+ VIRGL_OBJ_BLEND_S2_RT_ALPHA_FUNC(blend_state->rt[i].alpha_func) |
+ VIRGL_OBJ_BLEND_S2_RT_ALPHA_SRC_FACTOR(blend_state->rt[i].alpha_src_factor) |
+ VIRGL_OBJ_BLEND_S2_RT_ALPHA_DST_FACTOR(blend_state->rt[i].alpha_dst_factor) |
+ VIRGL_OBJ_BLEND_S2_RT_COLORMASK(blend_state->rt[i].colormask);
+ virgl_encoder_write_dword(ctx->cbuf, tmp);
+ }
+ return 0;
+}
+
+int virgl_encode_dsa_state(struct virgl_context *ctx,
+ uint32_t handle,
+ const struct pipe_depth_stencil_alpha_state *dsa_state)
+{
+ uint32_t tmp;
+ int i;
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_DSA, VIRGL_OBJ_DSA_SIZE));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+
+ tmp = VIRGL_OBJ_DSA_S0_DEPTH_ENABLE(dsa_state->depth.enabled) |
+ VIRGL_OBJ_DSA_S0_DEPTH_WRITEMASK(dsa_state->depth.writemask) |
+ VIRGL_OBJ_DSA_S0_DEPTH_FUNC(dsa_state->depth.func) |
+ VIRGL_OBJ_DSA_S0_ALPHA_ENABLED(dsa_state->alpha.enabled) |
+ VIRGL_OBJ_DSA_S0_ALPHA_FUNC(dsa_state->alpha.func);
+ virgl_encoder_write_dword(ctx->cbuf, tmp);
+
+ for (i = 0; i < 2; i++) {
+ tmp = VIRGL_OBJ_DSA_S1_STENCIL_ENABLED(dsa_state->stencil[i].enabled) |
+ VIRGL_OBJ_DSA_S1_STENCIL_FUNC(dsa_state->stencil[i].func) |
+ VIRGL_OBJ_DSA_S1_STENCIL_FAIL_OP(dsa_state->stencil[i].fail_op) |
+ VIRGL_OBJ_DSA_S1_STENCIL_ZPASS_OP(dsa_state->stencil[i].zpass_op) |
+ VIRGL_OBJ_DSA_S1_STENCIL_ZFAIL_OP(dsa_state->stencil[i].zfail_op) |
+ VIRGL_OBJ_DSA_S1_STENCIL_VALUEMASK(dsa_state->stencil[i].valuemask) |
+ VIRGL_OBJ_DSA_S1_STENCIL_WRITEMASK(dsa_state->stencil[i].writemask);
+ virgl_encoder_write_dword(ctx->cbuf, tmp);
+ }
+
+ virgl_encoder_write_dword(ctx->cbuf, fui(dsa_state->alpha.ref_value));
+ return 0;
+}
+int virgl_encode_rasterizer_state(struct virgl_context *ctx,
+ uint32_t handle,
+ const struct pipe_rasterizer_state *state)
+{
+ uint32_t tmp;
+
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_RASTERIZER, VIRGL_OBJ_RS_SIZE));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+
+ tmp = VIRGL_OBJ_RS_S0_FLATSHADE(state->flatshade) |
+ VIRGL_OBJ_RS_S0_DEPTH_CLIP(state->depth_clip) |
+ VIRGL_OBJ_RS_S0_CLIP_HALFZ(state->clip_halfz) |
+ VIRGL_OBJ_RS_S0_RASTERIZER_DISCARD(state->rasterizer_discard) |
+ VIRGL_OBJ_RS_S0_FLATSHADE_FIRST(state->flatshade_first) |
+ VIRGL_OBJ_RS_S0_LIGHT_TWOSIZE(state->light_twoside) |
+ VIRGL_OBJ_RS_S0_SPRITE_COORD_MODE(state->sprite_coord_mode) |
+ VIRGL_OBJ_RS_S0_POINT_QUAD_RASTERIZATION(state->point_quad_rasterization) |
+ VIRGL_OBJ_RS_S0_CULL_FACE(state->cull_face) |
+ VIRGL_OBJ_RS_S0_FILL_FRONT(state->fill_front) |
+ VIRGL_OBJ_RS_S0_FILL_BACK(state->fill_back) |
+ VIRGL_OBJ_RS_S0_SCISSOR(state->scissor) |
+ VIRGL_OBJ_RS_S0_FRONT_CCW(state->front_ccw) |
+ VIRGL_OBJ_RS_S0_CLAMP_VERTEX_COLOR(state->clamp_vertex_color) |
+ VIRGL_OBJ_RS_S0_CLAMP_FRAGMENT_COLOR(state->clamp_fragment_color) |
+ VIRGL_OBJ_RS_S0_OFFSET_LINE(state->offset_line) |
+ VIRGL_OBJ_RS_S0_OFFSET_POINT(state->offset_point) |
+ VIRGL_OBJ_RS_S0_OFFSET_TRI(state->offset_tri) |
+ VIRGL_OBJ_RS_S0_POLY_SMOOTH(state->poly_smooth) |
+ VIRGL_OBJ_RS_S0_POLY_STIPPLE_ENABLE(state->poly_stipple_enable) |
+ VIRGL_OBJ_RS_S0_POINT_SMOOTH(state->point_smooth) |
+ VIRGL_OBJ_RS_S0_POINT_SIZE_PER_VERTEX(state->point_size_per_vertex) |
+ VIRGL_OBJ_RS_S0_MULTISAMPLE(state->multisample) |
+ VIRGL_OBJ_RS_S0_LINE_SMOOTH(state->line_smooth) |
+ VIRGL_OBJ_RS_S0_LINE_STIPPLE_ENABLE(state->line_stipple_enable) |
+ VIRGL_OBJ_RS_S0_LINE_LAST_PIXEL(state->line_last_pixel) |
+ VIRGL_OBJ_RS_S0_HALF_PIXEL_CENTER(state->half_pixel_center) |
+ VIRGL_OBJ_RS_S0_BOTTOM_EDGE_RULE(state->bottom_edge_rule);
+
+ virgl_encoder_write_dword(ctx->cbuf, tmp); /* S0 */
+ virgl_encoder_write_dword(ctx->cbuf, fui(state->point_size)); /* S1 */
+ virgl_encoder_write_dword(ctx->cbuf, state->sprite_coord_enable); /* S2 */
+ tmp = VIRGL_OBJ_RS_S3_LINE_STIPPLE_PATTERN(state->line_stipple_pattern) |
+ VIRGL_OBJ_RS_S3_LINE_STIPPLE_FACTOR(state->line_stipple_factor) |
+ VIRGL_OBJ_RS_S3_CLIP_PLANE_ENABLE(state->clip_plane_enable);
+ virgl_encoder_write_dword(ctx->cbuf, tmp); /* S3 */
+ virgl_encoder_write_dword(ctx->cbuf, fui(state->line_width)); /* S4 */
+ virgl_encoder_write_dword(ctx->cbuf, fui(state->offset_units)); /* S5 */
+ virgl_encoder_write_dword(ctx->cbuf, fui(state->offset_scale)); /* S6 */
+ virgl_encoder_write_dword(ctx->cbuf, fui(state->offset_clamp)); /* S7 */
+ return 0;
+}
+
+static void virgl_emit_shader_header(struct virgl_context *ctx,
+ uint32_t handle, uint32_t len,
+ uint32_t type, uint32_t offlen,
+ uint32_t num_tokens)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_SHADER, len));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+ virgl_encoder_write_dword(ctx->cbuf, type);
+ virgl_encoder_write_dword(ctx->cbuf, offlen);
+ virgl_encoder_write_dword(ctx->cbuf, num_tokens);
+}
+
+static void virgl_emit_shader_streamout(struct virgl_context *ctx,
+ const struct pipe_stream_output_info *so_info)
+{
+ int num_outputs = 0;
+ int i;
+ uint32_t tmp;
+
+ if (so_info)
+ num_outputs = so_info->num_outputs;
+
+ virgl_encoder_write_dword(ctx->cbuf, num_outputs);
+ if (num_outputs) {
+ for (i = 0; i < 4; i++)
+ virgl_encoder_write_dword(ctx->cbuf, so_info->stride[i]);
+
+ for (i = 0; i < so_info->num_outputs; i++) {
+ tmp =
+ VIRGL_OBJ_SHADER_SO_OUTPUT_REGISTER_INDEX(so_info->output[i].register_index) |
+ VIRGL_OBJ_SHADER_SO_OUTPUT_START_COMPONENT(so_info->output[i].start_component) |
+ VIRGL_OBJ_SHADER_SO_OUTPUT_NUM_COMPONENTS(so_info->output[i].num_components) |
+ VIRGL_OBJ_SHADER_SO_OUTPUT_BUFFER(so_info->output[i].output_buffer) |
+ VIRGL_OBJ_SHADER_SO_OUTPUT_DST_OFFSET(so_info->output[i].dst_offset);
+ virgl_encoder_write_dword(ctx->cbuf, tmp);
+ virgl_encoder_write_dword(ctx->cbuf, 0);
+ }
+ }
+}
+
+int virgl_encode_shader_state(struct virgl_context *ctx,
+ uint32_t handle,
+ uint32_t type,
+ const struct pipe_stream_output_info *so_info,
+ const struct tgsi_token *tokens)
+{
+ char *str, *sptr;
+ uint32_t shader_len, len;
+ bool bret;
+ int num_tokens = tgsi_num_tokens(tokens);
+ int str_total_size = 65536;
+ int retry_size = 1;
+ uint32_t left_bytes, base_hdr_size, strm_hdr_size, thispass;
+ bool first_pass;
+ str = CALLOC(1, str_total_size);
+ if (!str)
+ return -1;
+
+ do {
+ int old_size;
+
+ bret = tgsi_dump_str(tokens, TGSI_DUMP_FLOAT_AS_HEX, str, str_total_size);
+ if (bret == false) {
+ fprintf(stderr, "Failed to translate shader in available space - trying again\n");
+ old_size = str_total_size;
+ str_total_size = 65536 * ++retry_size;
+ str = REALLOC(str, old_size, str_total_size);
+ if (!str)
+ return -1;
+ }
+ } while (bret == false && retry_size < 10);
+
+ if (bret == false)
+ return -1;
+
+ shader_len = strlen(str) + 1;
+
+ left_bytes = shader_len;
+
+ base_hdr_size = 5;
+ strm_hdr_size = so_info->num_outputs ? so_info->num_outputs * 2 + 4 : 0;
+ first_pass = true;
+ sptr = str;
+ while (left_bytes) {
+ uint32_t length, offlen;
+ int hdr_len = base_hdr_size + (first_pass ? strm_hdr_size : 0);
+ if (ctx->cbuf->cdw + hdr_len + 1 > VIRGL_MAX_CMDBUF_DWORDS)
+ ctx->base.flush(&ctx->base, NULL, 0);
+
+ thispass = (VIRGL_MAX_CMDBUF_DWORDS - ctx->cbuf->cdw - hdr_len - 1) * 4;
+
+ length = MIN2(thispass, left_bytes);
+ len = ((length + 3) / 4) + hdr_len;
+
+ if (first_pass)
+ offlen = VIRGL_OBJ_SHADER_OFFSET_VAL(shader_len);
+ else
+ offlen = VIRGL_OBJ_SHADER_OFFSET_VAL((uintptr_t)sptr - (uintptr_t)str) | VIRGL_OBJ_SHADER_OFFSET_CONT;
+
+ virgl_emit_shader_header(ctx, handle, len, type, offlen, num_tokens);
+
+ virgl_emit_shader_streamout(ctx, first_pass ? so_info : NULL);
+
+ virgl_encoder_write_block(ctx->cbuf, (uint8_t *)sptr, length);
+
+ sptr += length;
+ first_pass = false;
+ left_bytes -= length;
+ }
+
+ FREE(str);
+ return 0;
+}
+
+
+int virgl_encode_clear(struct virgl_context *ctx,
+ unsigned buffers,
+ const union pipe_color_union *color,
+ double depth, unsigned stencil)
+{
+ int i;
+
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CLEAR, 0, VIRGL_OBJ_CLEAR_SIZE));
+ virgl_encoder_write_dword(ctx->cbuf, buffers);
+ for (i = 0; i < 4; i++)
+ virgl_encoder_write_dword(ctx->cbuf, color->ui[i]);
+ virgl_encoder_write_qword(ctx->cbuf, *(uint64_t *)&depth);
+ virgl_encoder_write_dword(ctx->cbuf, stencil);
+ return 0;
+}
+
+int virgl_encoder_set_framebuffer_state(struct virgl_context *ctx,
+ const struct pipe_framebuffer_state *state)
+{
+ struct virgl_surface *zsurf = virgl_surface(state->zsbuf);
+ int i;
+
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_FRAMEBUFFER_STATE, 0, VIRGL_SET_FRAMEBUFFER_STATE_SIZE(state->nr_cbufs)));
+ virgl_encoder_write_dword(ctx->cbuf, state->nr_cbufs);
+ virgl_encoder_write_dword(ctx->cbuf, zsurf ? zsurf->handle : 0);
+ for (i = 0; i < state->nr_cbufs; i++) {
+ struct virgl_surface *surf = virgl_surface(state->cbufs[i]);
+ virgl_encoder_write_dword(ctx->cbuf, surf ? surf->handle : 0);
+ }
+
+ return 0;
+}
+
+int virgl_encoder_set_viewport_states(struct virgl_context *ctx,
+ int start_slot,
+ int num_viewports,
+ const struct pipe_viewport_state *states)
+{
+ int i,v;
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_VIEWPORT_STATE, 0, VIRGL_SET_VIEWPORT_STATE_SIZE(num_viewports)));
+ virgl_encoder_write_dword(ctx->cbuf, start_slot);
+ for (v = 0; v < num_viewports; v++) {
+ for (i = 0; i < 3; i++)
+ virgl_encoder_write_dword(ctx->cbuf, fui(states[v].scale[i]));
+ for (i = 0; i < 3; i++)
+ virgl_encoder_write_dword(ctx->cbuf, fui(states[v].translate[i]));
+ }
+ return 0;
+}
+
+int virgl_encoder_create_vertex_elements(struct virgl_context *ctx,
+ uint32_t handle,
+ unsigned num_elements,
+ const struct pipe_vertex_element *element)
+{
+ int i;
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_VERTEX_ELEMENTS, VIRGL_OBJ_VERTEX_ELEMENTS_SIZE(num_elements)));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+ for (i = 0; i < num_elements; i++) {
+ virgl_encoder_write_dword(ctx->cbuf, element[i].src_offset);
+ virgl_encoder_write_dword(ctx->cbuf, element[i].instance_divisor);
+ virgl_encoder_write_dword(ctx->cbuf, element[i].vertex_buffer_index);
+ virgl_encoder_write_dword(ctx->cbuf, element[i].src_format);
+ }
+ return 0;
+}
+
+int virgl_encoder_set_vertex_buffers(struct virgl_context *ctx,
+ unsigned num_buffers,
+ const struct pipe_vertex_buffer *buffers)
+{
+ int i;
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_VERTEX_BUFFERS, 0, VIRGL_SET_VERTEX_BUFFERS_SIZE(num_buffers)));
+ for (i = 0; i < num_buffers; i++) {
+ struct virgl_resource *res = virgl_resource(buffers[i].buffer);
+ virgl_encoder_write_dword(ctx->cbuf, buffers[i].stride);
+ virgl_encoder_write_dword(ctx->cbuf, buffers[i].buffer_offset);
+ virgl_encoder_write_res(ctx, res);
+ }
+ return 0;
+}
+
+int virgl_encoder_set_index_buffer(struct virgl_context *ctx,
+ const struct pipe_index_buffer *ib)
+{
+ int length = VIRGL_SET_INDEX_BUFFER_SIZE(ib);
+ struct virgl_resource *res = NULL;
+ if (ib)
+ res = virgl_resource(ib->buffer);
+
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_INDEX_BUFFER, 0, length));
+ virgl_encoder_write_res(ctx, res);
+ if (ib) {
+ virgl_encoder_write_dword(ctx->cbuf, ib->index_size);
+ virgl_encoder_write_dword(ctx->cbuf, ib->offset);
+ }
+ return 0;
+}
+
+int virgl_encoder_draw_vbo(struct virgl_context *ctx,
+ const struct pipe_draw_info *info)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_DRAW_VBO, 0, VIRGL_DRAW_VBO_SIZE));
+ virgl_encoder_write_dword(ctx->cbuf, info->start);
+ virgl_encoder_write_dword(ctx->cbuf, info->count);
+ virgl_encoder_write_dword(ctx->cbuf, info->mode);
+ virgl_encoder_write_dword(ctx->cbuf, info->indexed);
+ virgl_encoder_write_dword(ctx->cbuf, info->instance_count);
+ virgl_encoder_write_dword(ctx->cbuf, info->index_bias);
+ virgl_encoder_write_dword(ctx->cbuf, info->start_instance);
+ virgl_encoder_write_dword(ctx->cbuf, info->primitive_restart);
+ virgl_encoder_write_dword(ctx->cbuf, info->restart_index);
+ virgl_encoder_write_dword(ctx->cbuf, info->min_index);
+ virgl_encoder_write_dword(ctx->cbuf, info->max_index);
+ if (info->count_from_stream_output)
+ virgl_encoder_write_dword(ctx->cbuf, info->count_from_stream_output->buffer_size);
+ else
+ virgl_encoder_write_dword(ctx->cbuf, 0);
+ return 0;
+}
+
+int virgl_encoder_create_surface(struct virgl_context *ctx,
+ uint32_t handle,
+ struct virgl_resource *res,
+ const struct pipe_surface *templat)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_SURFACE, VIRGL_OBJ_SURFACE_SIZE));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+ virgl_encoder_write_res(ctx, res);
+ virgl_encoder_write_dword(ctx->cbuf, templat->format);
+ if (templat->texture->target == PIPE_BUFFER) {
+ virgl_encoder_write_dword(ctx->cbuf, templat->u.buf.first_element);
+ virgl_encoder_write_dword(ctx->cbuf, templat->u.buf.last_element);
+
+ } else {
+ virgl_encoder_write_dword(ctx->cbuf, templat->u.tex.level);
+ virgl_encoder_write_dword(ctx->cbuf, templat->u.tex.first_layer | (templat->u.tex.last_layer << 16));
+ }
+ return 0;
+}
+
+int virgl_encoder_create_so_target(struct virgl_context *ctx,
+ uint32_t handle,
+ struct virgl_resource *res,
+ unsigned buffer_offset,
+ unsigned buffer_size)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_STREAMOUT_TARGET, VIRGL_OBJ_STREAMOUT_SIZE));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+ virgl_encoder_write_res(ctx, res);
+ virgl_encoder_write_dword(ctx->cbuf, buffer_offset);
+ virgl_encoder_write_dword(ctx->cbuf, buffer_size);
+ return 0;
+}
+
+static void virgl_encoder_iw_emit_header_1d(struct virgl_context *ctx,
+ struct virgl_resource *res,
+ unsigned level, unsigned usage,
+ const struct pipe_box *box,
+ unsigned stride, unsigned layer_stride)
+{
+ virgl_encoder_write_res(ctx, res);
+ virgl_encoder_write_dword(ctx->cbuf, level);
+ virgl_encoder_write_dword(ctx->cbuf, usage);
+ virgl_encoder_write_dword(ctx->cbuf, stride);
+ virgl_encoder_write_dword(ctx->cbuf, layer_stride);
+ virgl_encoder_write_dword(ctx->cbuf, box->x);
+ virgl_encoder_write_dword(ctx->cbuf, box->y);
+ virgl_encoder_write_dword(ctx->cbuf, box->z);
+ virgl_encoder_write_dword(ctx->cbuf, box->width);
+ virgl_encoder_write_dword(ctx->cbuf, box->height);
+ virgl_encoder_write_dword(ctx->cbuf, box->depth);
+}
+
+int virgl_encoder_inline_write(struct virgl_context *ctx,
+ struct virgl_resource *res,
+ unsigned level, unsigned usage,
+ const struct pipe_box *box,
+ const void *data, unsigned stride,
+ unsigned layer_stride)
+{
+ uint32_t size = (stride ? stride : box->width) * box->height;
+ uint32_t length, thispass, left_bytes;
+ struct pipe_box mybox = *box;
+
+ length = 11 + (size + 3) / 4;
+ if ((ctx->cbuf->cdw + length + 1) > VIRGL_MAX_CMDBUF_DWORDS) {
+ if (box->height > 1 || box->depth > 1) {
+ debug_printf("inline transfer failed due to multi dimensions and too large\n");
+ assert(0);
+ }
+ }
+
+ left_bytes = size;
+ while (left_bytes) {
+ if (ctx->cbuf->cdw + 12 > VIRGL_MAX_CMDBUF_DWORDS)
+ ctx->base.flush(&ctx->base, NULL, 0);
+
+ thispass = (VIRGL_MAX_CMDBUF_DWORDS - ctx->cbuf->cdw - 12) * 4;
+
+ length = MIN2(thispass, left_bytes);
+
+ mybox.width = length;
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_RESOURCE_INLINE_WRITE, 0, ((length + 3) / 4) + 11));
+ virgl_encoder_iw_emit_header_1d(ctx, res, level, usage, &mybox, stride, layer_stride);
+ virgl_encoder_write_block(ctx->cbuf, data, length);
+ left_bytes -= length;
+ mybox.x += length;
+ data += length;
+ }
+ return 0;
+}
+
+int virgl_encoder_flush_frontbuffer(struct virgl_context *ctx,
+ struct virgl_resource *res)
+{
+// virgl_encoder_write_dword(ctx->cbuf, VIRGL_CMD0(VIRGL_CCMD_FLUSH_FRONTUBFFER, 0, 1));
+// virgl_encoder_write_dword(ctx->cbuf, res_handle);
+ return 0;
+}
+
+int virgl_encode_sampler_state(struct virgl_context *ctx,
+ uint32_t handle,
+ const struct pipe_sampler_state *state)
+{
+ uint32_t tmp;
+ int i;
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_SAMPLER_STATE, VIRGL_OBJ_SAMPLER_STATE_SIZE));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+
+ tmp = VIRGL_OBJ_SAMPLE_STATE_S0_WRAP_S(state->wrap_s) |
+ VIRGL_OBJ_SAMPLE_STATE_S0_WRAP_T(state->wrap_t) |
+ VIRGL_OBJ_SAMPLE_STATE_S0_WRAP_R(state->wrap_r) |
+ VIRGL_OBJ_SAMPLE_STATE_S0_MIN_IMG_FILTER(state->min_img_filter) |
+ VIRGL_OBJ_SAMPLE_STATE_S0_MIN_MIP_FILTER(state->min_mip_filter) |
+ VIRGL_OBJ_SAMPLE_STATE_S0_MAG_IMG_FILTER(state->mag_img_filter) |
+ VIRGL_OBJ_SAMPLE_STATE_S0_COMPARE_MODE(state->compare_mode) |
+ VIRGL_OBJ_SAMPLE_STATE_S0_COMPARE_FUNC(state->compare_func);
+
+ virgl_encoder_write_dword(ctx->cbuf, tmp);
+ virgl_encoder_write_dword(ctx->cbuf, fui(state->lod_bias));
+ virgl_encoder_write_dword(ctx->cbuf, fui(state->min_lod));
+ virgl_encoder_write_dword(ctx->cbuf, fui(state->max_lod));
+ for (i = 0; i < 4; i++)
+ virgl_encoder_write_dword(ctx->cbuf, state->border_color.ui[i]);
+ return 0;
+}
+
+
+int virgl_encode_sampler_view(struct virgl_context *ctx,
+ uint32_t handle,
+ struct virgl_resource *res,
+ const struct pipe_sampler_view *state)
+{
+ uint32_t tmp;
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_SAMPLER_VIEW, VIRGL_OBJ_SAMPLER_VIEW_SIZE));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+ virgl_encoder_write_res(ctx, res);
+ virgl_encoder_write_dword(ctx->cbuf, state->format);
+ if (res->u.b.target == PIPE_BUFFER) {
+ virgl_encoder_write_dword(ctx->cbuf, state->u.buf.first_element);
+ virgl_encoder_write_dword(ctx->cbuf, state->u.buf.last_element);
+ } else {
+ virgl_encoder_write_dword(ctx->cbuf, state->u.tex.first_layer | state->u.tex.last_layer << 16);
+ virgl_encoder_write_dword(ctx->cbuf, state->u.tex.first_level | state->u.tex.last_level << 8);
+ }
+ tmp = VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_R(state->swizzle_r) |
+ VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_G(state->swizzle_g) |
+ VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_B(state->swizzle_b) |
+ VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_A(state->swizzle_a);
+ virgl_encoder_write_dword(ctx->cbuf, tmp);
+ return 0;
+}
+
+int virgl_encode_set_sampler_views(struct virgl_context *ctx,
+ uint32_t shader_type,
+ uint32_t start_slot,
+ uint32_t num_views,
+ struct virgl_sampler_view **views)
+{
+ int i;
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_SAMPLER_VIEWS, 0, VIRGL_SET_SAMPLER_VIEWS_SIZE(num_views)));
+ virgl_encoder_write_dword(ctx->cbuf, shader_type);
+ virgl_encoder_write_dword(ctx->cbuf, start_slot);
+ for (i = 0; i < num_views; i++) {
+ uint32_t handle = views[i] ? views[i]->handle : 0;
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+ }
+ return 0;
+}
+
+int virgl_encode_bind_sampler_states(struct virgl_context *ctx,
+ uint32_t shader_type,
+ uint32_t start_slot,
+ uint32_t num_handles,
+ uint32_t *handles)
+{
+ int i;
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_BIND_SAMPLER_STATES, 0, VIRGL_BIND_SAMPLER_STATES(num_handles)));
+ virgl_encoder_write_dword(ctx->cbuf, shader_type);
+ virgl_encoder_write_dword(ctx->cbuf, start_slot);
+ for (i = 0; i < num_handles; i++)
+ virgl_encoder_write_dword(ctx->cbuf, handles[i]);
+ return 0;
+}
+
+int virgl_encoder_write_constant_buffer(struct virgl_context *ctx,
+ uint32_t shader,
+ uint32_t index,
+ uint32_t size,
+ const void *data)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_CONSTANT_BUFFER, 0, size + 2));
+ virgl_encoder_write_dword(ctx->cbuf, shader);
+ virgl_encoder_write_dword(ctx->cbuf, index);
+ if (data)
+ virgl_encoder_write_block(ctx->cbuf, data, size * 4);
+ return 0;
+}
+
+int virgl_encoder_set_uniform_buffer(struct virgl_context *ctx,
+ uint32_t shader,
+ uint32_t index,
+ uint32_t offset,
+ uint32_t length,
+ struct virgl_resource *res)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_UNIFORM_BUFFER, 0, VIRGL_SET_UNIFORM_BUFFER_SIZE));
+ virgl_encoder_write_dword(ctx->cbuf, shader);
+ virgl_encoder_write_dword(ctx->cbuf, index);
+ virgl_encoder_write_dword(ctx->cbuf, offset);
+ virgl_encoder_write_dword(ctx->cbuf, length);
+ virgl_encoder_write_res(ctx, res);
+ return 0;
+}
+
+
+int virgl_encoder_set_stencil_ref(struct virgl_context *ctx,
+ const struct pipe_stencil_ref *ref)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_STENCIL_REF, 0, VIRGL_SET_STENCIL_REF_SIZE));
+ virgl_encoder_write_dword(ctx->cbuf, VIRGL_STENCIL_REF_VAL(ref->ref_value[0] , (ref->ref_value[1])));
+ return 0;
+}
+
+int virgl_encoder_set_blend_color(struct virgl_context *ctx,
+ const struct pipe_blend_color *color)
+{
+ int i;
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_BLEND_COLOR, 0, VIRGL_SET_BLEND_COLOR_SIZE));
+ for (i = 0; i < 4; i++)
+ virgl_encoder_write_dword(ctx->cbuf, fui(color->color[i]));
+ return 0;
+}
+
+int virgl_encoder_set_scissor_state(struct virgl_context *ctx,
+ unsigned start_slot,
+ int num_scissors,
+ const struct pipe_scissor_state *ss)
+{
+ int i;
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_SCISSOR_STATE, 0, VIRGL_SET_SCISSOR_STATE_SIZE(num_scissors)));
+ virgl_encoder_write_dword(ctx->cbuf, start_slot);
+ for (i = 0; i < num_scissors; i++) {
+ virgl_encoder_write_dword(ctx->cbuf, (ss[i].minx | ss[i].miny << 16));
+ virgl_encoder_write_dword(ctx->cbuf, (ss[i].maxx | ss[i].maxy << 16));
+ }
+ return 0;
+}
+
+void virgl_encoder_set_polygon_stipple(struct virgl_context *ctx,
+ const struct pipe_poly_stipple *ps)
+{
+ int i;
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_POLYGON_STIPPLE, 0, VIRGL_POLYGON_STIPPLE_SIZE));
+ for (i = 0; i < VIRGL_POLYGON_STIPPLE_SIZE; i++) {
+ virgl_encoder_write_dword(ctx->cbuf, ps->stipple[i]);
+ }
+}
+
+void virgl_encoder_set_sample_mask(struct virgl_context *ctx,
+ unsigned sample_mask)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_SAMPLE_MASK, 0, VIRGL_SET_SAMPLE_MASK_SIZE));
+ virgl_encoder_write_dword(ctx->cbuf, sample_mask);
+}
+
+void virgl_encoder_set_clip_state(struct virgl_context *ctx,
+ const struct pipe_clip_state *clip)
+{
+ int i, j;
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_CLIP_STATE, 0, VIRGL_SET_CLIP_STATE_SIZE));
+ for (i = 0; i < VIRGL_MAX_CLIP_PLANES; i++) {
+ for (j = 0; j < 4; j++) {
+ virgl_encoder_write_dword(ctx->cbuf, fui(clip->ucp[i][j]));
+ }
+ }
+}
+
+int virgl_encode_resource_copy_region(struct virgl_context *ctx,
+ struct virgl_resource *dst_res,
+ unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct virgl_resource *src_res,
+ unsigned src_level,
+ const struct pipe_box *src_box)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_RESOURCE_COPY_REGION, 0, VIRGL_CMD_RESOURCE_COPY_REGION_SIZE));
+ virgl_encoder_write_res(ctx, dst_res);
+ virgl_encoder_write_dword(ctx->cbuf, dst_level);
+ virgl_encoder_write_dword(ctx->cbuf, dstx);
+ virgl_encoder_write_dword(ctx->cbuf, dsty);
+ virgl_encoder_write_dword(ctx->cbuf, dstz);
+ virgl_encoder_write_res(ctx, src_res);
+ virgl_encoder_write_dword(ctx->cbuf, src_level);
+ virgl_encoder_write_dword(ctx->cbuf, src_box->x);
+ virgl_encoder_write_dword(ctx->cbuf, src_box->y);
+ virgl_encoder_write_dword(ctx->cbuf, src_box->z);
+ virgl_encoder_write_dword(ctx->cbuf, src_box->width);
+ virgl_encoder_write_dword(ctx->cbuf, src_box->height);
+ virgl_encoder_write_dword(ctx->cbuf, src_box->depth);
+ return 0;
+}
+
+int virgl_encode_blit(struct virgl_context *ctx,
+ struct virgl_resource *dst_res,
+ struct virgl_resource *src_res,
+ const struct pipe_blit_info *blit)
+{
+ uint32_t tmp;
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_BLIT, 0, VIRGL_CMD_BLIT_SIZE));
+ tmp = VIRGL_CMD_BLIT_S0_MASK(blit->mask) |
+ VIRGL_CMD_BLIT_S0_FILTER(blit->filter) |
+ VIRGL_CMD_BLIT_S0_SCISSOR_ENABLE(blit->scissor_enable);
+ virgl_encoder_write_dword(ctx->cbuf, tmp);
+ virgl_encoder_write_dword(ctx->cbuf, (blit->scissor.minx | blit->scissor.miny << 16));
+ virgl_encoder_write_dword(ctx->cbuf, (blit->scissor.maxx | blit->scissor.maxy << 16));
+
+ virgl_encoder_write_res(ctx, dst_res);
+ virgl_encoder_write_dword(ctx->cbuf, blit->dst.level);
+ virgl_encoder_write_dword(ctx->cbuf, blit->dst.format);
+ virgl_encoder_write_dword(ctx->cbuf, blit->dst.box.x);
+ virgl_encoder_write_dword(ctx->cbuf, blit->dst.box.y);
+ virgl_encoder_write_dword(ctx->cbuf, blit->dst.box.z);
+ virgl_encoder_write_dword(ctx->cbuf, blit->dst.box.width);
+ virgl_encoder_write_dword(ctx->cbuf, blit->dst.box.height);
+ virgl_encoder_write_dword(ctx->cbuf, blit->dst.box.depth);
+
+ virgl_encoder_write_res(ctx, src_res);
+ virgl_encoder_write_dword(ctx->cbuf, blit->src.level);
+ virgl_encoder_write_dword(ctx->cbuf, blit->src.format);
+ virgl_encoder_write_dword(ctx->cbuf, blit->src.box.x);
+ virgl_encoder_write_dword(ctx->cbuf, blit->src.box.y);
+ virgl_encoder_write_dword(ctx->cbuf, blit->src.box.z);
+ virgl_encoder_write_dword(ctx->cbuf, blit->src.box.width);
+ virgl_encoder_write_dword(ctx->cbuf, blit->src.box.height);
+ virgl_encoder_write_dword(ctx->cbuf, blit->src.box.depth);
+ return 0;
+}
+
+int virgl_encoder_create_query(struct virgl_context *ctx,
+ uint32_t handle,
+ uint query_type,
+ uint query_index,
+ struct virgl_resource *res,
+ uint32_t offset)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_QUERY, VIRGL_OBJ_QUERY_SIZE));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+ virgl_encoder_write_dword(ctx->cbuf, ((query_type & 0xffff) | (query_index << 16)));
+ virgl_encoder_write_dword(ctx->cbuf, offset);
+ virgl_encoder_write_res(ctx, res);
+ return 0;
+}
+
+int virgl_encoder_begin_query(struct virgl_context *ctx,
+ uint32_t handle)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_BEGIN_QUERY, 0, 1));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+ return 0;
+}
+
+int virgl_encoder_end_query(struct virgl_context *ctx,
+ uint32_t handle)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_END_QUERY, 0, 1));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+ return 0;
+}
+
+int virgl_encoder_get_query_result(struct virgl_context *ctx,
+ uint32_t handle, boolean wait)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_GET_QUERY_RESULT, 0, 2));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+ virgl_encoder_write_dword(ctx->cbuf, wait ? 1 : 0);
+ return 0;
+}
+
+int virgl_encoder_render_condition(struct virgl_context *ctx,
+ uint32_t handle, boolean condition,
+ uint mode)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_RENDER_CONDITION, 0, VIRGL_RENDER_CONDITION_SIZE));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+ virgl_encoder_write_dword(ctx->cbuf, condition);
+ virgl_encoder_write_dword(ctx->cbuf, mode);
+ return 0;
+}
+
+int virgl_encoder_set_so_targets(struct virgl_context *ctx,
+ unsigned num_targets,
+ struct pipe_stream_output_target **targets,
+ unsigned append_bitmask)
+{
+ int i;
+
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_STREAMOUT_TARGETS, 0, num_targets + 1));
+ virgl_encoder_write_dword(ctx->cbuf, append_bitmask);
+ for (i = 0; i < num_targets; i++) {
+ struct virgl_so_target *tg = virgl_so_target(targets[i]);
+ virgl_encoder_write_dword(ctx->cbuf, tg->handle);
+ }
+ return 0;
+}
+
+
+int virgl_encoder_set_sub_ctx(struct virgl_context *ctx, uint32_t sub_ctx_id)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_SUB_CTX, 0, 1));
+ virgl_encoder_write_dword(ctx->cbuf, sub_ctx_id);
+ return 0;
+}
+
+int virgl_encoder_create_sub_ctx(struct virgl_context *ctx, uint32_t sub_ctx_id)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_SUB_CTX, 0, 1));
+ virgl_encoder_write_dword(ctx->cbuf, sub_ctx_id);
+ return 0;
+}
+
+int virgl_encoder_destroy_sub_ctx(struct virgl_context *ctx, uint32_t sub_ctx_id)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_DESTROY_SUB_CTX, 0, 1));
+ virgl_encoder_write_dword(ctx->cbuf, sub_ctx_id);
+ return 0;
+}
+
+int virgl_encode_bind_shader(struct virgl_context *ctx,
+ uint32_t handle, uint32_t type)
+{
+ virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_BIND_SHADER, 0, 2));
+ virgl_encoder_write_dword(ctx->cbuf, handle);
+ virgl_encoder_write_dword(ctx->cbuf, type);
+ return 0;
+}
diff --git a/src/gallium/drivers/virgl/virgl_encode.h b/src/gallium/drivers/virgl/virgl_encode.h
new file mode 100644
index 00000000000..030bcd6d16e
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_encode.h
@@ -0,0 +1,247 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef VIRGL_ENCODE_H
+#define VIRGL_ENCODE_H
+
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "virgl_winsys.h"
+
+struct tgsi_token;
+
+struct virgl_context;
+struct virgl_resource;
+struct virgl_sampler_view;
+
+struct virgl_surface {
+ struct pipe_surface base;
+ uint32_t handle;
+};
+
+static inline struct virgl_surface *virgl_surface(struct pipe_surface *surf)
+{
+ return (struct virgl_surface *)surf;
+}
+
+static inline void virgl_encoder_write_dword(struct virgl_cmd_buf *state,
+ uint32_t dword)
+{
+ state->buf[state->cdw++] = dword;
+}
+
+static inline void virgl_encoder_write_qword(struct virgl_cmd_buf *state,
+ uint64_t qword)
+{
+ memcpy(state->buf + state->cdw, &qword, sizeof(uint64_t));
+ state->cdw += 2;
+}
+
+static inline void virgl_encoder_write_block(struct virgl_cmd_buf *state,
+ const uint8_t *ptr, uint32_t len)
+{
+ int x;
+ memcpy(state->buf + state->cdw, ptr, len);
+ x = (len % 4);
+// fprintf(stderr, "[%d] block %d x is %d\n", state->cdw, len, x);
+ if (x) {
+ uint8_t *mp = (uint8_t *)(state->buf + state->cdw);
+ mp += len;
+ memset(mp, 0, x);
+ }
+ state->cdw += (len + 3) / 4;
+}
+
+extern int virgl_encode_blend_state(struct virgl_context *ctx,
+ uint32_t handle,
+ const struct pipe_blend_state *blend_state);
+extern int virgl_encode_rasterizer_state(struct virgl_context *ctx,
+ uint32_t handle,
+ const struct pipe_rasterizer_state *state);
+
+extern int virgl_encode_shader_state(struct virgl_context *ctx,
+ uint32_t handle,
+ uint32_t type,
+ const struct pipe_stream_output_info *so_info,
+ const struct tgsi_token *tokens);
+
+int virgl_encode_stream_output_info(struct virgl_context *ctx,
+ uint32_t handle,
+ uint32_t type,
+ const struct pipe_shader_state *shader);
+
+int virgl_encoder_set_so_targets(struct virgl_context *ctx,
+ unsigned num_targets,
+ struct pipe_stream_output_target **targets,
+ unsigned append_bitmask);
+
+int virgl_encoder_create_so_target(struct virgl_context *ctx,
+ uint32_t handle,
+ struct virgl_resource *res,
+ unsigned buffer_offset,
+ unsigned buffer_size);
+
+int virgl_encode_clear(struct virgl_context *ctx,
+ unsigned buffers,
+ const union pipe_color_union *color,
+ double depth, unsigned stencil);
+
+int virgl_encode_bind_object(struct virgl_context *ctx,
+ uint32_t handle, uint32_t object);
+int virgl_encode_delete_object(struct virgl_context *ctx,
+ uint32_t handle, uint32_t object);
+
+int virgl_encoder_set_framebuffer_state(struct virgl_context *ctx,
+ const struct pipe_framebuffer_state *state);
+int virgl_encoder_set_viewport_states(struct virgl_context *ctx,
+ int start_slot,
+ int num_viewports,
+ const struct pipe_viewport_state *states);
+
+int virgl_encoder_draw_vbo(struct virgl_context *ctx,
+ const struct pipe_draw_info *info);
+
+
+int virgl_encoder_create_surface(struct virgl_context *ctx,
+ uint32_t handle,
+ struct virgl_resource *res,
+ const struct pipe_surface *templat);
+
+int virgl_encoder_flush_frontbuffer(struct virgl_context *ctx,
+ struct virgl_resource *res);
+
+int virgl_encoder_create_vertex_elements(struct virgl_context *ctx,
+ uint32_t handle,
+ unsigned num_elements,
+ const struct pipe_vertex_element *element);
+
+int virgl_encoder_set_vertex_buffers(struct virgl_context *ctx,
+ unsigned num_buffers,
+ const struct pipe_vertex_buffer *buffers);
+
+
+int virgl_encoder_inline_write(struct virgl_context *ctx,
+ struct virgl_resource *res,
+ unsigned level, unsigned usage,
+ const struct pipe_box *box,
+ const void *data, unsigned stride,
+ unsigned layer_stride);
+int virgl_encode_sampler_state(struct virgl_context *ctx,
+ uint32_t handle,
+ const struct pipe_sampler_state *state);
+int virgl_encode_sampler_view(struct virgl_context *ctx,
+ uint32_t handle,
+ struct virgl_resource *res,
+ const struct pipe_sampler_view *state);
+
+int virgl_encode_set_sampler_views(struct virgl_context *ctx,
+ uint32_t shader_type,
+ uint32_t start_slot,
+ uint32_t num_views,
+ struct virgl_sampler_view **views);
+
+int virgl_encode_bind_sampler_states(struct virgl_context *ctx,
+ uint32_t shader_type,
+ uint32_t start_slot,
+ uint32_t num_handles,
+ uint32_t *handles);
+
+int virgl_encoder_set_index_buffer(struct virgl_context *ctx,
+ const struct pipe_index_buffer *ib);
+
+uint32_t virgl_object_assign_handle(void);
+
+int virgl_encoder_write_constant_buffer(struct virgl_context *ctx,
+ uint32_t shader,
+ uint32_t index,
+ uint32_t size,
+ const void *data);
+
+int virgl_encoder_set_uniform_buffer(struct virgl_context *ctx,
+ uint32_t shader,
+ uint32_t index,
+ uint32_t offset,
+ uint32_t length,
+ struct virgl_resource *res);
+int virgl_encode_dsa_state(struct virgl_context *ctx,
+ uint32_t handle,
+ const struct pipe_depth_stencil_alpha_state *dsa_state);
+
+int virgl_encoder_set_stencil_ref(struct virgl_context *ctx,
+ const struct pipe_stencil_ref *ref);
+
+int virgl_encoder_set_blend_color(struct virgl_context *ctx,
+ const struct pipe_blend_color *color);
+
+int virgl_encoder_set_scissor_state(struct virgl_context *ctx,
+ unsigned start_slot,
+ int num_scissors,
+ const struct pipe_scissor_state *ss);
+
+void virgl_encoder_set_polygon_stipple(struct virgl_context *ctx,
+ const struct pipe_poly_stipple *ps);
+
+void virgl_encoder_set_sample_mask(struct virgl_context *ctx,
+ unsigned sample_mask);
+
+void virgl_encoder_set_clip_state(struct virgl_context *ctx,
+ const struct pipe_clip_state *clip);
+
+int virgl_encode_resource_copy_region(struct virgl_context *ctx,
+ struct virgl_resource *dst_res,
+ unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct virgl_resource *src_res,
+ unsigned src_level,
+ const struct pipe_box *src_box);
+
+int virgl_encode_blit(struct virgl_context *ctx,
+ struct virgl_resource *dst_res,
+ struct virgl_resource *src_res,
+ const struct pipe_blit_info *blit);
+
+int virgl_encoder_create_query(struct virgl_context *ctx,
+ uint32_t handle,
+ uint query_type,
+ uint query_index,
+ struct virgl_resource *res,
+ uint32_t offset);
+
+int virgl_encoder_begin_query(struct virgl_context *ctx,
+ uint32_t handle);
+int virgl_encoder_end_query(struct virgl_context *ctx,
+ uint32_t handle);
+int virgl_encoder_get_query_result(struct virgl_context *ctx,
+ uint32_t handle, boolean wait);
+
+int virgl_encoder_render_condition(struct virgl_context *ctx,
+ uint32_t handle, boolean condition,
+ uint mode);
+
+int virgl_encoder_set_sub_ctx(struct virgl_context *ctx, uint32_t sub_ctx_id);
+int virgl_encoder_create_sub_ctx(struct virgl_context *ctx, uint32_t sub_ctx_id);
+int virgl_encoder_destroy_sub_ctx(struct virgl_context *ctx, uint32_t sub_ctx_id);
+
+int virgl_encode_bind_shader(struct virgl_context *ctx,
+ uint32_t handle, uint32_t type);
+#endif
diff --git a/src/gallium/drivers/virgl/virgl_hw.h b/src/gallium/drivers/virgl/virgl_hw.h
new file mode 100644
index 00000000000..e3c56db2ac6
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_hw.h
@@ -0,0 +1,286 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef VIRGL_HW_H
+#define VIRGL_HW_H
+
+struct virgl_box {
+ uint32_t x, y, z;
+ uint32_t w, h, d;
+};
+
+/* formats known by the HW device - based on gallium subset */
+enum virgl_formats {
+ VIRGL_FORMAT_B8G8R8A8_UNORM = 1,
+ VIRGL_FORMAT_B8G8R8X8_UNORM = 2,
+ VIRGL_FORMAT_A8R8G8B8_UNORM = 3,
+ VIRGL_FORMAT_X8R8G8B8_UNORM = 4,
+ VIRGL_FORMAT_B5G5R5A1_UNORM = 5,
+ VIRGL_FORMAT_B4G4R4A4_UNORM = 6,
+ VIRGL_FORMAT_B5G6R5_UNORM = 7,
+ VIRGL_FORMAT_L8_UNORM = 9, /**< ubyte luminance */
+ VIRGL_FORMAT_A8_UNORM = 10, /**< ubyte alpha */
+ VIRGL_FORMAT_L8A8_UNORM = 12, /**< ubyte alpha, luminance */
+ VIRGL_FORMAT_L16_UNORM = 13, /**< ushort luminance */
+
+ VIRGL_FORMAT_Z16_UNORM = 16,
+ VIRGL_FORMAT_Z32_UNORM = 17,
+ VIRGL_FORMAT_Z32_FLOAT = 18,
+ VIRGL_FORMAT_Z24_UNORM_S8_UINT = 19,
+ VIRGL_FORMAT_S8_UINT_Z24_UNORM = 20,
+ VIRGL_FORMAT_Z24X8_UNORM = 21,
+ VIRGL_FORMAT_S8_UINT = 23, /**< ubyte stencil */
+
+ VIRGL_FORMAT_R32_FLOAT = 28,
+ VIRGL_FORMAT_R32G32_FLOAT = 29,
+ VIRGL_FORMAT_R32G32B32_FLOAT = 30,
+ VIRGL_FORMAT_R32G32B32A32_FLOAT = 31,
+
+ VIRGL_FORMAT_R16_UNORM = 48,
+ VIRGL_FORMAT_R16G16_UNORM = 49,
+
+ VIRGL_FORMAT_R16G16B16A16_UNORM = 51,
+
+ VIRGL_FORMAT_R16_SNORM = 56,
+ VIRGL_FORMAT_R16G16_SNORM = 57,
+ VIRGL_FORMAT_R16G16B16A16_SNORM = 59,
+
+ VIRGL_FORMAT_R8_UNORM = 64,
+ VIRGL_FORMAT_R8G8_UNORM = 65,
+
+ VIRGL_FORMAT_R8G8B8A8_UNORM = 67,
+
+ VIRGL_FORMAT_R8_SNORM = 74,
+ VIRGL_FORMAT_R8G8_SNORM = 75,
+ VIRGL_FORMAT_R8G8B8_SNORM = 76,
+ VIRGL_FORMAT_R8G8B8A8_SNORM = 77,
+
+ VIRGL_FORMAT_R16_FLOAT = 91,
+ VIRGL_FORMAT_R16G16_FLOAT = 92,
+ VIRGL_FORMAT_R16G16B16_FLOAT = 93,
+ VIRGL_FORMAT_R16G16B16A16_FLOAT = 94,
+
+ VIRGL_FORMAT_L8_SRGB = 95,
+ VIRGL_FORMAT_L8A8_SRGB = 96,
+ VIRGL_FORMAT_B8G8R8A8_SRGB = 100,
+ VIRGL_FORMAT_B8G8R8X8_SRGB = 101,
+
+ /* compressed formats */
+ VIRGL_FORMAT_DXT1_RGB = 105,
+ VIRGL_FORMAT_DXT1_RGBA = 106,
+ VIRGL_FORMAT_DXT3_RGBA = 107,
+ VIRGL_FORMAT_DXT5_RGBA = 108,
+
+ /* sRGB, compressed */
+ VIRGL_FORMAT_DXT1_SRGB = 109,
+ VIRGL_FORMAT_DXT1_SRGBA = 110,
+ VIRGL_FORMAT_DXT3_SRGBA = 111,
+ VIRGL_FORMAT_DXT5_SRGBA = 112,
+
+ /* rgtc compressed */
+ VIRGL_FORMAT_RGTC1_UNORM = 113,
+ VIRGL_FORMAT_RGTC1_SNORM = 114,
+ VIRGL_FORMAT_RGTC2_UNORM = 115,
+ VIRGL_FORMAT_RGTC2_SNORM = 116,
+
+ VIRGL_FORMAT_A8B8G8R8_UNORM = 121,
+ VIRGL_FORMAT_B5G5R5X1_UNORM = 122,
+ VIRGL_FORMAT_R11G11B10_FLOAT = 124,
+ VIRGL_FORMAT_R9G9B9E5_FLOAT = 125,
+ VIRGL_FORMAT_Z32_FLOAT_S8X24_UINT = 126,
+
+ VIRGL_FORMAT_B10G10R10A2_UNORM = 131,
+ VIRGL_FORMAT_R8G8B8X8_UNORM = 134,
+ VIRGL_FORMAT_B4G4R4X4_UNORM = 135,
+ VIRGL_FORMAT_B2G3R3_UNORM = 139,
+
+ VIRGL_FORMAT_L16A16_UNORM = 140,
+ VIRGL_FORMAT_A16_UNORM = 141,
+
+ VIRGL_FORMAT_A8_SNORM = 147,
+ VIRGL_FORMAT_L8_SNORM = 148,
+ VIRGL_FORMAT_L8A8_SNORM = 149,
+
+ VIRGL_FORMAT_A16_SNORM = 151,
+ VIRGL_FORMAT_L16_SNORM = 152,
+ VIRGL_FORMAT_L16A16_SNORM = 153,
+
+ VIRGL_FORMAT_A16_FLOAT = 155,
+ VIRGL_FORMAT_L16_FLOAT = 156,
+ VIRGL_FORMAT_L16A16_FLOAT = 157,
+
+ VIRGL_FORMAT_A32_FLOAT = 159,
+ VIRGL_FORMAT_L32_FLOAT = 160,
+ VIRGL_FORMAT_L32A32_FLOAT = 161,
+
+ VIRGL_FORMAT_R8_UINT = 177,
+ VIRGL_FORMAT_R8G8_UINT = 178,
+ VIRGL_FORMAT_R8G8B8_UINT = 179,
+ VIRGL_FORMAT_R8G8B8A8_UINT = 180,
+
+ VIRGL_FORMAT_R8_SINT = 181,
+ VIRGL_FORMAT_R8G8_SINT = 182,
+ VIRGL_FORMAT_R8G8B8_SINT = 183,
+ VIRGL_FORMAT_R8G8B8A8_SINT = 184,
+
+ VIRGL_FORMAT_R16_UINT = 185,
+ VIRGL_FORMAT_R16G16_UINT = 186,
+ VIRGL_FORMAT_R16G16B16_UINT = 187,
+ VIRGL_FORMAT_R16G16B16A16_UINT = 188,
+
+ VIRGL_FORMAT_R16_SINT = 189,
+ VIRGL_FORMAT_R16G16_SINT = 190,
+ VIRGL_FORMAT_R16G16B16_SINT = 191,
+ VIRGL_FORMAT_R16G16B16A16_SINT = 192,
+ VIRGL_FORMAT_R32_UINT = 193,
+ VIRGL_FORMAT_R32G32_UINT = 194,
+ VIRGL_FORMAT_R32G32B32_UINT = 195,
+ VIRGL_FORMAT_R32G32B32A32_UINT = 196,
+
+ VIRGL_FORMAT_R32_SINT = 197,
+ VIRGL_FORMAT_R32G32_SINT = 198,
+ VIRGL_FORMAT_R32G32B32_SINT = 199,
+ VIRGL_FORMAT_R32G32B32A32_SINT = 200,
+
+ VIRGL_FORMAT_A8_UINT = 201,
+ VIRGL_FORMAT_L8_UINT = 203,
+ VIRGL_FORMAT_L8A8_UINT = 204,
+
+ VIRGL_FORMAT_A8_SINT = 205,
+ VIRGL_FORMAT_L8_SINT = 207,
+ VIRGL_FORMAT_L8A8_SINT = 208,
+
+ VIRGL_FORMAT_A16_UINT = 209,
+ VIRGL_FORMAT_L16_UINT = 211,
+ VIRGL_FORMAT_L16A16_UINT = 212,
+
+ VIRGL_FORMAT_A16_SINT = 213,
+ VIRGL_FORMAT_L16_SINT = 215,
+ VIRGL_FORMAT_L16A16_SINT = 216,
+
+ VIRGL_FORMAT_A32_UINT = 217,
+ VIRGL_FORMAT_L32_UINT = 219,
+ VIRGL_FORMAT_L32A32_UINT = 220,
+
+ VIRGL_FORMAT_A32_SINT = 221,
+ VIRGL_FORMAT_L32_SINT = 223,
+ VIRGL_FORMAT_L32A32_SINT = 224,
+
+ VIRGL_FORMAT_B10G10R10A2_UINT = 225,
+ VIRGL_FORMAT_R8G8B8X8_SNORM = 229,
+
+ VIRGL_FORMAT_R8G8B8X8_SRGB = 230,
+
+ VIRGL_FORMAT_B10G10R10X2_UNORM = 233,
+ VIRGL_FORMAT_R16G16B16X16_UNORM = 234,
+ VIRGL_FORMAT_R16G16B16X16_SNORM = 235,
+ VIRGL_FORMAT_MAX,
+};
+
+#define VIRGL_BIND_DEPTH_STENCIL (1 << 0)
+#define VIRGL_BIND_RENDER_TARGET (1 << 1)
+#define VIRGL_BIND_SAMPLER_VIEW (1 << 3)
+#define VIRGL_BIND_VERTEX_BUFFER (1 << 4)
+#define VIRGL_BIND_INDEX_BUFFER (1 << 5)
+#define VIRGL_BIND_CONSTANT_BUFFER (1 << 6)
+#define VIRGL_BIND_DISPLAY_TARGET (1 << 7)
+#define VIRGL_BIND_STREAM_OUTPUT (1 << 11)
+#define VIRGL_BIND_CURSOR (1 << 16)
+#define VIRGL_BIND_CUSTOM (1 << 17)
+#define VIRGL_BIND_SCANOUT (1 << 18)
+
+struct virgl_caps_bool_set1 {
+ unsigned indep_blend_enable:1;
+ unsigned indep_blend_func:1;
+ unsigned cube_map_array:1;
+ unsigned shader_stencil_export:1;
+ unsigned conditional_render:1;
+ unsigned start_instance:1;
+ unsigned primitive_restart:1;
+ unsigned blend_eq_sep:1;
+ unsigned instanceid:1;
+ unsigned vertex_element_instance_divisor:1;
+ unsigned seamless_cube_map:1;
+ unsigned occlusion_query:1;
+ unsigned timer_query:1;
+ unsigned streamout_pause_resume:1;
+ unsigned texture_multisample:1;
+ unsigned fragment_coord_conventions:1;
+ unsigned depth_clip_disable:1;
+ unsigned seamless_cube_map_per_texture:1;
+ unsigned ubo:1;
+ unsigned color_clamping:1; /* not in GL 3.1 core profile */
+ unsigned poly_stipple:1; /* not in GL 3.1 core profile */
+ unsigned mirror_clamp:1;
+ unsigned texture_query_lod:1;
+};
+
+/* endless expansion capabilites - current gallium has 252 formats */
+struct virgl_supported_format_mask {
+ uint32_t bitmask[16];
+};
+/* capabilities set 2 - version 1 - 32-bit and float values */
+struct virgl_caps_v1 {
+ uint32_t max_version;
+ struct virgl_supported_format_mask sampler;
+ struct virgl_supported_format_mask render;
+ struct virgl_supported_format_mask depthstencil;
+ struct virgl_supported_format_mask vertexbuffer;
+ struct virgl_caps_bool_set1 bset;
+ uint32_t glsl_level;
+ uint32_t max_texture_array_layers;
+ uint32_t max_streamout_buffers;
+ uint32_t max_dual_source_render_targets;
+ uint32_t max_render_targets;
+ uint32_t max_samples;
+ uint32_t prim_mask;
+ uint32_t max_tbo_size;
+ uint32_t max_uniform_blocks;
+ uint32_t max_viewports;
+ uint32_t max_texture_gather_components;
+};
+
+union virgl_caps {
+ uint32_t max_version;
+ struct virgl_caps_v1 v1;
+};
+
+enum virgl_errors {
+ VIRGL_ERROR_NONE,
+ VIRGL_ERROR_UNKNOWN,
+ VIRGL_ERROR_UNKNOWN_RESOURCE_FORMAT,
+};
+
+enum virgl_ctx_errors {
+ VIRGL_ERROR_CTX_NONE,
+ VIRGL_ERROR_CTX_UNKNOWN,
+ VIRGL_ERROR_CTX_ILLEGAL_SHADER,
+ VIRGL_ERROR_CTX_ILLEGAL_HANDLE,
+ VIRGL_ERROR_CTX_ILLEGAL_RESOURCE,
+ VIRGL_ERROR_CTX_ILLEGAL_SURFACE,
+ VIRGL_ERROR_CTX_ILLEGAL_VERTEX_FORMAT,
+ VIRGL_ERROR_CTX_ILLEGAL_CMD_BUFFER,
+};
+
+
+#define VIRGL_RESOURCE_Y_0_TOP (1 << 0)
+#endif
diff --git a/src/gallium/drivers/virgl/virgl_protocol.h b/src/gallium/drivers/virgl/virgl_protocol.h
new file mode 100644
index 00000000000..ca3142f5f72
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_protocol.h
@@ -0,0 +1,468 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef VIRGL_PROTOCOL_H
+#define VIRGL_PROTOCOL_H
+
+#define VIRGL_QUERY_STATE_NEW 0
+#define VIRGL_QUERY_STATE_DONE 1
+#define VIRGL_QUERY_STATE_WAIT_HOST 2
+
+struct virgl_host_query_state {
+ uint32_t query_state;
+ uint32_t result_size;
+ uint64_t result;
+};
+
+enum virgl_object_type {
+ VIRGL_OBJECT_NULL,
+ VIRGL_OBJECT_BLEND,
+ VIRGL_OBJECT_RASTERIZER,
+ VIRGL_OBJECT_DSA,
+ VIRGL_OBJECT_SHADER,
+ VIRGL_OBJECT_VERTEX_ELEMENTS,
+ VIRGL_OBJECT_SAMPLER_VIEW,
+ VIRGL_OBJECT_SAMPLER_STATE,
+ VIRGL_OBJECT_SURFACE,
+ VIRGL_OBJECT_QUERY,
+ VIRGL_OBJECT_STREAMOUT_TARGET,
+ VIRGL_MAX_OBJECTS,
+};
+
+/* context cmds to be encoded in the command stream */
+enum virgl_context_cmd {
+ VIRGL_CCMD_NOP = 0,
+ VIRGL_CCMD_CREATE_OBJECT = 1,
+ VIRGL_CCMD_BIND_OBJECT,
+ VIRGL_CCMD_DESTROY_OBJECT,
+ VIRGL_CCMD_SET_VIEWPORT_STATE,
+ VIRGL_CCMD_SET_FRAMEBUFFER_STATE,
+ VIRGL_CCMD_SET_VERTEX_BUFFERS,
+ VIRGL_CCMD_CLEAR,
+ VIRGL_CCMD_DRAW_VBO,
+ VIRGL_CCMD_RESOURCE_INLINE_WRITE,
+ VIRGL_CCMD_SET_SAMPLER_VIEWS,
+ VIRGL_CCMD_SET_INDEX_BUFFER,
+ VIRGL_CCMD_SET_CONSTANT_BUFFER,
+ VIRGL_CCMD_SET_STENCIL_REF,
+ VIRGL_CCMD_SET_BLEND_COLOR,
+ VIRGL_CCMD_SET_SCISSOR_STATE,
+ VIRGL_CCMD_BLIT,
+ VIRGL_CCMD_RESOURCE_COPY_REGION,
+ VIRGL_CCMD_BIND_SAMPLER_STATES,
+ VIRGL_CCMD_BEGIN_QUERY,
+ VIRGL_CCMD_END_QUERY,
+ VIRGL_CCMD_GET_QUERY_RESULT,
+ VIRGL_CCMD_SET_POLYGON_STIPPLE,
+ VIRGL_CCMD_SET_CLIP_STATE,
+ VIRGL_CCMD_SET_SAMPLE_MASK,
+ VIRGL_CCMD_SET_STREAMOUT_TARGETS,
+ VIRGL_CCMD_SET_RENDER_CONDITION,
+ VIRGL_CCMD_SET_UNIFORM_BUFFER,
+
+ VIRGL_CCMD_SET_SUB_CTX,
+ VIRGL_CCMD_CREATE_SUB_CTX,
+ VIRGL_CCMD_DESTROY_SUB_CTX,
+ VIRGL_CCMD_BIND_SHADER,
+};
+
+/*
+ 8-bit cmd headers
+ 8-bit object type
+ 16-bit length
+*/
+
+#define VIRGL_CMD0(cmd, obj, len) ((cmd) | ((obj) << 8) | ((len) << 16))
+
+/* hw specification */
+#define VIRGL_MAX_COLOR_BUFS 8
+#define VIRGL_MAX_CLIP_PLANES 8
+
+#define VIRGL_OBJ_CREATE_HEADER 0
+#define VIRGL_OBJ_CREATE_HANDLE 1
+
+#define VIRGL_OBJ_BIND_HEADER 0
+#define VIRGL_OBJ_BIND_HANDLE 1
+
+#define VIRGL_OBJ_DESTROY_HANDLE 1
+
+/* some of these defines are a specification - not used in the code */
+/* bit offsets for blend state object */
+#define VIRGL_OBJ_BLEND_SIZE (VIRGL_MAX_COLOR_BUFS + 3)
+#define VIRGL_OBJ_BLEND_HANDLE 1
+#define VIRGL_OBJ_BLEND_S0 2
+#define VIRGL_OBJ_BLEND_S0_INDEPENDENT_BLEND_ENABLE(x) ((x) & 0x1 << 0)
+#define VIRGL_OBJ_BLEND_S0_LOGICOP_ENABLE(x) (((x) & 0x1) << 1)
+#define VIRGL_OBJ_BLEND_S0_DITHER(x) (((x) & 0x1) << 2)
+#define VIRGL_OBJ_BLEND_S0_ALPHA_TO_COVERAGE(x) (((x) & 0x1) << 3)
+#define VIRGL_OBJ_BLEND_S0_ALPHA_TO_ONE(x) (((x) & 0x1) << 4)
+#define VIRGL_OBJ_BLEND_S1 3
+#define VIRGL_OBJ_BLEND_S1_LOGICOP_FUNC(x) (((x) & 0xf) << 0)
+/* repeated once per number of cbufs */
+
+#define VIRGL_OBJ_BLEND_S2(cbuf) (4 + (cbuf))
+#define VIRGL_OBJ_BLEND_S2_RT_BLEND_ENABLE(x) (((x) & 0x1) << 0)
+#define VIRGL_OBJ_BLEND_S2_RT_RGB_FUNC(x) (((x) & 0x7) << 1)
+#define VIRGL_OBJ_BLEND_S2_RT_RGB_SRC_FACTOR(x) (((x) & 0x1f) << 4)
+#define VIRGL_OBJ_BLEND_S2_RT_RGB_DST_FACTOR(x) (((x) & 0x1f) << 9)
+#define VIRGL_OBJ_BLEND_S2_RT_ALPHA_FUNC(x) (((x) & 0x7) << 14)
+#define VIRGL_OBJ_BLEND_S2_RT_ALPHA_SRC_FACTOR(x) (((x) & 0x1f) << 17)
+#define VIRGL_OBJ_BLEND_S2_RT_ALPHA_DST_FACTOR(x) (((x) & 0x1f) << 22)
+#define VIRGL_OBJ_BLEND_S2_RT_COLORMASK(x) (((x) & 0xf) << 27)
+
+/* bit offsets for DSA state */
+#define VIRGL_OBJ_DSA_SIZE 5
+#define VIRGL_OBJ_DSA_HANDLE 1
+#define VIRGL_OBJ_DSA_S0 2
+#define VIRGL_OBJ_DSA_S0_DEPTH_ENABLE(x) (((x) & 0x1) << 0)
+#define VIRGL_OBJ_DSA_S0_DEPTH_WRITEMASK(x) (((x) & 0x1) << 1)
+#define VIRGL_OBJ_DSA_S0_DEPTH_FUNC(x) (((x) & 0x7) << 2)
+#define VIRGL_OBJ_DSA_S0_ALPHA_ENABLED(x) (((x) & 0x1) << 8)
+#define VIRGL_OBJ_DSA_S0_ALPHA_FUNC(x) (((x) & 0x7) << 9)
+#define VIRGL_OBJ_DSA_S1 3
+#define VIRGL_OBJ_DSA_S2 4
+#define VIRGL_OBJ_DSA_S1_STENCIL_ENABLED(x) (((x) & 0x1) << 0)
+#define VIRGL_OBJ_DSA_S1_STENCIL_FUNC(x) (((x) & 0x7) << 1)
+#define VIRGL_OBJ_DSA_S1_STENCIL_FAIL_OP(x) (((x) & 0x7) << 4)
+#define VIRGL_OBJ_DSA_S1_STENCIL_ZPASS_OP(x) (((x) & 0x7) << 7)
+#define VIRGL_OBJ_DSA_S1_STENCIL_ZFAIL_OP(x) (((x) & 0x7) << 10)
+#define VIRGL_OBJ_DSA_S1_STENCIL_VALUEMASK(x) (((x) & 0xff) << 13)
+#define VIRGL_OBJ_DSA_S1_STENCIL_WRITEMASK(x) (((x) & 0xff) << 21)
+#define VIRGL_OBJ_DSA_ALPHA_REF 5
+
+/* offsets for rasterizer state */
+#define VIRGL_OBJ_RS_SIZE 9
+#define VIRGL_OBJ_RS_HANDLE 1
+#define VIRGL_OBJ_RS_S0 2
+#define VIRGL_OBJ_RS_S0_FLATSHADE(x) (((x) & 0x1) << 0)
+#define VIRGL_OBJ_RS_S0_DEPTH_CLIP(x) (((x) & 0x1) << 1)
+#define VIRGL_OBJ_RS_S0_CLIP_HALFZ(x) (((x) & 0x1) << 2)
+#define VIRGL_OBJ_RS_S0_RASTERIZER_DISCARD(x) (((x) & 0x1) << 3)
+#define VIRGL_OBJ_RS_S0_FLATSHADE_FIRST(x) (((x) & 0x1) << 4)
+#define VIRGL_OBJ_RS_S0_LIGHT_TWOSIZE(x) (((x) & 0x1) << 5)
+#define VIRGL_OBJ_RS_S0_SPRITE_COORD_MODE(x) (((x) & 0x1) << 6)
+#define VIRGL_OBJ_RS_S0_POINT_QUAD_RASTERIZATION(x) (((x) & 0x1) << 7)
+#define VIRGL_OBJ_RS_S0_CULL_FACE(x) (((x) & 0x3) << 8)
+#define VIRGL_OBJ_RS_S0_FILL_FRONT(x) (((x) & 0x3) << 10)
+#define VIRGL_OBJ_RS_S0_FILL_BACK(x) (((x) & 0x3) << 12)
+#define VIRGL_OBJ_RS_S0_SCISSOR(x) (((x) & 0x1) << 14)
+#define VIRGL_OBJ_RS_S0_FRONT_CCW(x) (((x) & 0x1) << 15)
+#define VIRGL_OBJ_RS_S0_CLAMP_VERTEX_COLOR(x) (((x) & 0x1) << 16)
+#define VIRGL_OBJ_RS_S0_CLAMP_FRAGMENT_COLOR(x) (((x) & 0x1) << 17)
+#define VIRGL_OBJ_RS_S0_OFFSET_LINE(x) (((x) & 0x1) << 18)
+#define VIRGL_OBJ_RS_S0_OFFSET_POINT(x) (((x) & 0x1) << 19)
+#define VIRGL_OBJ_RS_S0_OFFSET_TRI(x) (((x) & 0x1) << 20)
+#define VIRGL_OBJ_RS_S0_POLY_SMOOTH(x) (((x) & 0x1) << 21)
+#define VIRGL_OBJ_RS_S0_POLY_STIPPLE_ENABLE(x) (((x) & 0x1) << 22)
+#define VIRGL_OBJ_RS_S0_POINT_SMOOTH(x) (((x) & 0x1) << 23)
+#define VIRGL_OBJ_RS_S0_POINT_SIZE_PER_VERTEX(x) (((x) & 0x1) << 24)
+#define VIRGL_OBJ_RS_S0_MULTISAMPLE(x) (((x) & 0x1) << 25)
+#define VIRGL_OBJ_RS_S0_LINE_SMOOTH(x) (((x) & 0x1) << 26)
+#define VIRGL_OBJ_RS_S0_LINE_STIPPLE_ENABLE(x) (((x) & 0x1) << 27)
+#define VIRGL_OBJ_RS_S0_LINE_LAST_PIXEL(x) (((x) & 0x1) << 28)
+#define VIRGL_OBJ_RS_S0_HALF_PIXEL_CENTER(x) (((x) & 0x1) << 29)
+#define VIRGL_OBJ_RS_S0_BOTTOM_EDGE_RULE(x) (((x) & 0x1) << 30)
+
+#define VIRGL_OBJ_RS_POINT_SIZE 3
+#define VIRGL_OBJ_RS_SPRITE_COORD_ENABLE 4
+#define VIRGL_OBJ_RS_S3 5
+
+#define VIRGL_OBJ_RS_S3_LINE_STIPPLE_PATTERN(x) (((x) & 0xffff) << 0)
+#define VIRGL_OBJ_RS_S3_LINE_STIPPLE_FACTOR(x) (((x) & 0xff) << 16)
+#define VIRGL_OBJ_RS_S3_CLIP_PLANE_ENABLE(x) (((x) & 0xff) << 24)
+#define VIRGL_OBJ_RS_LINE_WIDTH 6
+#define VIRGL_OBJ_RS_OFFSET_UNITS 7
+#define VIRGL_OBJ_RS_OFFSET_SCALE 8
+#define VIRGL_OBJ_RS_OFFSET_CLAMP 9
+
+#define VIRGL_OBJ_CLEAR_SIZE 8
+#define VIRGL_OBJ_CLEAR_BUFFERS 1
+#define VIRGL_OBJ_CLEAR_COLOR_0 2 /* color is 4 * u32/f32/i32 */
+#define VIRGL_OBJ_CLEAR_COLOR_1 3
+#define VIRGL_OBJ_CLEAR_COLOR_2 4
+#define VIRGL_OBJ_CLEAR_COLOR_3 5
+#define VIRGL_OBJ_CLEAR_DEPTH_0 6 /* depth is a double precision float */
+#define VIRGL_OBJ_CLEAR_DEPTH_1 7
+#define VIRGL_OBJ_CLEAR_STENCIL 8
+
+/* shader object */
+#define VIRGL_OBJ_SHADER_HDR_SIZE(nso) (5 + ((nso) ? (2 * nso) + 4 : 0))
+#define VIRGL_OBJ_SHADER_HANDLE 1
+#define VIRGL_OBJ_SHADER_TYPE 2
+#define VIRGL_OBJ_SHADER_OFFSET 3
+#define VIRGL_OBJ_SHADER_OFFSET_VAL(x) (((x) & 0x7fffffff) << 0)
+/* start contains full length in VAL - also implies continuations */
+/* continuation contains offset in VAL */
+#define VIRGL_OBJ_SHADER_OFFSET_CONT (0x1 << 31)
+#define VIRGL_OBJ_SHADER_NUM_TOKENS 4
+#define VIRGL_OBJ_SHADER_SO_NUM_OUTPUTS 5
+#define VIRGL_OBJ_SHADER_SO_STRIDE(x) (6 + (x))
+#define VIRGL_OBJ_SHADER_SO_OUTPUT0(x) (10 + (x * 2))
+#define VIRGL_OBJ_SHADER_SO_OUTPUT_REGISTER_INDEX(x) (((x) & 0xff) << 0)
+#define VIRGL_OBJ_SHADER_SO_OUTPUT_START_COMPONENT(x) (((x) & 0x3) << 8)
+#define VIRGL_OBJ_SHADER_SO_OUTPUT_NUM_COMPONENTS(x) (((x) & 0x7) << 10)
+#define VIRGL_OBJ_SHADER_SO_OUTPUT_BUFFER(x) (((x) & 0x7) << 13)
+#define VIRGL_OBJ_SHADER_SO_OUTPUT_DST_OFFSET(x) (((x) & 0xffff) << 16)
+#define VIRGL_OBJ_SHADER_SO_OUTPUT0_SO(x) (11 + (x * 2))
+#define VIRGL_OBJ_SHADER_SO_OUTPUT_STREAM(x) (((x) & 0x03) << 0)
+
+/* viewport state */
+#define VIRGL_SET_VIEWPORT_STATE_SIZE(num_viewports) ((6 * num_viewports) + 1)
+#define VIRGL_SET_VIEWPORT_START_SLOT 1
+#define VIRGL_SET_VIEWPORT_STATE_SCALE_0(x) (2 + (x * 6))
+#define VIRGL_SET_VIEWPORT_STATE_SCALE_1(x) (3 + (x * 6))
+#define VIRGL_SET_VIEWPORT_STATE_SCALE_2(x) (4 + (x * 6))
+#define VIRGL_SET_VIEWPORT_STATE_TRANSLATE_0(x) (5 + (x * 6))
+#define VIRGL_SET_VIEWPORT_STATE_TRANSLATE_1(x) (6 + (x * 6))
+#define VIRGL_SET_VIEWPORT_STATE_TRANSLATE_2(x) (7 + (x * 6))
+
+/* framebuffer state */
+#define VIRGL_SET_FRAMEBUFFER_STATE_SIZE(nr_cbufs) (nr_cbufs + 2)
+#define VIRGL_SET_FRAMEBUFFER_STATE_NR_CBUFS 1
+#define VIRGL_SET_FRAMEBUFFER_STATE_NR_ZSURF_HANDLE 2
+#define VIRGL_SET_FRAMEBUFFER_STATE_CBUF_HANDLE(x) ((x) + 3)
+
+/* vertex elements object */
+#define VIRGL_OBJ_VERTEX_ELEMENTS_SIZE(num_elements) (((num_elements) * 4) + 1)
+#define VIRGL_OBJ_VERTEX_ELEMENTS_HANDLE 1
+#define VIRGL_OBJ_VERTEX_ELEMENTS_V0_SRC_OFFSET(x) (((x) * 4) + 2) /* repeated per VE */
+#define VIRGL_OBJ_VERTEX_ELEMENTS_V0_INSTANCE_DIVISOR(x) (((x) * 4) + 3)
+#define VIRGL_OBJ_VERTEX_ELEMENTS_V0_VERTEX_BUFFER_INDEX(x) (((x) * 4) + 4)
+#define VIRGL_OBJ_VERTEX_ELEMENTS_V0_SRC_FORMAT(x) (((x) * 4) + 5)
+
+/* vertex buffers */
+#define VIRGL_SET_VERTEX_BUFFERS_SIZE(num_buffers) ((num_buffers) * 3)
+#define VIRGL_SET_VERTEX_BUFFER_STRIDE(x) (((x) * 3) + 1)
+#define VIRGL_SET_VERTEX_BUFFER_OFFSET(x) (((x) * 3) + 2)
+#define VIRGL_SET_VERTEX_BUFFER_HANDLE(x) (((x) * 3) + 3)
+
+/* index buffer */
+#define VIRGL_SET_INDEX_BUFFER_SIZE(ib) (((ib) ? 2 : 0) + 1)
+#define VIRGL_SET_INDEX_BUFFER_HANDLE 1
+#define VIRGL_SET_INDEX_BUFFER_INDEX_SIZE 2 /* only if sending an IB handle */
+#define VIRGL_SET_INDEX_BUFFER_OFFSET 3 /* only if sending an IB handle */
+
+/* constant buffer */
+#define VIRGL_SET_CONSTANT_BUFFER_SHADER_TYPE 1
+#define VIRGL_SET_CONSTANT_BUFFER_INDEX 2
+#define VIRGL_SET_CONSTANT_BUFFER_DATA_START 3
+
+#define VIRGL_SET_UNIFORM_BUFFER_SIZE 5
+#define VIRGL_SET_UNIFORM_BUFFER_SHADER_TYPE 1
+#define VIRGL_SET_UNIFORM_BUFFER_INDEX 2
+#define VIRGL_SET_UNIFORM_BUFFER_OFFSET 3
+#define VIRGL_SET_UNIFORM_BUFFER_LENGTH 4
+#define VIRGL_SET_UNIFORM_BUFFER_RES_HANDLE 5
+
+/* draw VBO */
+#define VIRGL_DRAW_VBO_SIZE 12
+#define VIRGL_DRAW_VBO_START 1
+#define VIRGL_DRAW_VBO_COUNT 2
+#define VIRGL_DRAW_VBO_MODE 3
+#define VIRGL_DRAW_VBO_INDEXED 4
+#define VIRGL_DRAW_VBO_INSTANCE_COUNT 5
+#define VIRGL_DRAW_VBO_INDEX_BIAS 6
+#define VIRGL_DRAW_VBO_START_INSTANCE 7
+#define VIRGL_DRAW_VBO_PRIMITIVE_RESTART 8
+#define VIRGL_DRAW_VBO_RESTART_INDEX 9
+#define VIRGL_DRAW_VBO_MIN_INDEX 10
+#define VIRGL_DRAW_VBO_MAX_INDEX 11
+#define VIRGL_DRAW_VBO_COUNT_FROM_SO 12
+
+/* create surface */
+#define VIRGL_OBJ_SURFACE_SIZE 5
+#define VIRGL_OBJ_SURFACE_HANDLE 1
+#define VIRGL_OBJ_SURFACE_RES_HANDLE 2
+#define VIRGL_OBJ_SURFACE_FORMAT 3
+#define VIRGL_OBJ_SURFACE_BUFFER_FIRST_ELEMENT 4
+#define VIRGL_OBJ_SURFACE_BUFFER_LAST_ELEMENT 5
+#define VIRGL_OBJ_SURFACE_TEXTURE_LEVEL 4
+#define VIRGL_OBJ_SURFACE_TEXTURE_LAYERS 5
+
+/* create streamout target */
+#define VIRGL_OBJ_STREAMOUT_SIZE 4
+#define VIRGL_OBJ_STREAMOUT_HANDLE 1
+#define VIRGL_OBJ_STREAMOUT_RES_HANDLE 2
+#define VIRGL_OBJ_STREAMOUT_BUFFER_OFFSET 3
+#define VIRGL_OBJ_STREAMOUT_BUFFER_SIZE 4
+
+/* sampler state */
+#define VIRGL_OBJ_SAMPLER_STATE_SIZE 9
+#define VIRGL_OBJ_SAMPLER_STATE_HANDLE 1
+#define VIRGL_OBJ_SAMPLER_STATE_S0 2
+#define VIRGL_OBJ_SAMPLE_STATE_S0_WRAP_S(x) (((x) & 0x7) << 0)
+#define VIRGL_OBJ_SAMPLE_STATE_S0_WRAP_T(x) (((x) & 0x7) << 3)
+#define VIRGL_OBJ_SAMPLE_STATE_S0_WRAP_R(x) (((x) & 0x7) << 6)
+#define VIRGL_OBJ_SAMPLE_STATE_S0_MIN_IMG_FILTER(x) (((x) & 0x3) << 9)
+#define VIRGL_OBJ_SAMPLE_STATE_S0_MIN_MIP_FILTER(x) (((x) & 0x3) << 11)
+#define VIRGL_OBJ_SAMPLE_STATE_S0_MAG_IMG_FILTER(x) (((x) & 0x3) << 13)
+#define VIRGL_OBJ_SAMPLE_STATE_S0_COMPARE_MODE(x) (((x) & 0x1) << 15)
+#define VIRGL_OBJ_SAMPLE_STATE_S0_COMPARE_FUNC(x) (((x) & 0x7) << 16)
+
+#define VIRGL_OBJ_SAMPLER_STATE_LOD_BIAS 3
+#define VIRGL_OBJ_SAMPLER_STATE_MIN_LOD 4
+#define VIRGL_OBJ_SAMPLER_STATE_MAX_LOD 5
+#define VIRGL_OBJ_SAMPLER_STATE_BORDER_COLOR(x) ((x) + 6) /* 6 - 9 */
+
+
+/* sampler view */
+#define VIRGL_OBJ_SAMPLER_VIEW_SIZE 6
+#define VIRGL_OBJ_SAMPLER_VIEW_HANDLE 1
+#define VIRGL_OBJ_SAMPLER_VIEW_RES_HANDLE 2
+#define VIRGL_OBJ_SAMPLER_VIEW_FORMAT 3
+#define VIRGL_OBJ_SAMPLER_VIEW_BUFFER_FIRST_ELEMENT 4
+#define VIRGL_OBJ_SAMPLER_VIEW_BUFFER_LAST_ELEMENT 5
+#define VIRGL_OBJ_SAMPLER_VIEW_TEXTURE_LAYER 4
+#define VIRGL_OBJ_SAMPLER_VIEW_TEXTURE_LEVEL 5
+#define VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE 6
+#define VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_R(x) (((x) & 0x7) << 0)
+#define VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_G(x) (((x) & 0x7) << 3)
+#define VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_B(x) (((x) & 0x7) << 6)
+#define VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_A(x) (((x) & 0x7) << 9)
+
+/* set sampler views */
+#define VIRGL_SET_SAMPLER_VIEWS_SIZE(num_views) ((num_views) + 2)
+#define VIRGL_SET_SAMPLER_VIEWS_SHADER_TYPE 1
+#define VIRGL_SET_SAMPLER_VIEWS_START_SLOT 2
+#define VIRGL_SET_SAMPLER_VIEWS_V0_HANDLE 3
+
+/* bind sampler states */
+#define VIRGL_BIND_SAMPLER_STATES(num_states) ((num_states) + 2)
+#define VIRGL_BIND_SAMPLER_STATES_SHADER_TYPE 1
+#define VIRGL_BIND_SAMPLER_STATES_START_SLOT 2
+#define VIRGL_BIND_SAMPLER_STATES_S0_HANDLE 3
+
+/* set stencil reference */
+#define VIRGL_SET_STENCIL_REF_SIZE 1
+#define VIRGL_SET_STENCIL_REF 1
+#define VIRGL_STENCIL_REF_VAL(f, s) ((f & 0xff) | (((s & 0xff) << 8)))
+
+/* set blend color */
+#define VIRGL_SET_BLEND_COLOR_SIZE 4
+#define VIRGL_SET_BLEND_COLOR(x) ((x) + 1)
+
+/* set scissor state */
+#define VIRGL_SET_SCISSOR_STATE_SIZE(x) (1 + 2 * x)
+#define VIRGL_SET_SCISSOR_START_SLOT 1
+#define VIRGL_SET_SCISSOR_MINX_MINY(x) (2 + (x * 2))
+#define VIRGL_SET_SCISSOR_MAXX_MAXY(x) (3 + (x * 2))
+
+/* resource copy region */
+#define VIRGL_CMD_RESOURCE_COPY_REGION_SIZE 13
+#define VIRGL_CMD_RCR_DST_RES_HANDLE 1
+#define VIRGL_CMD_RCR_DST_LEVEL 2
+#define VIRGL_CMD_RCR_DST_X 3
+#define VIRGL_CMD_RCR_DST_Y 4
+#define VIRGL_CMD_RCR_DST_Z 5
+#define VIRGL_CMD_RCR_SRC_RES_HANDLE 6
+#define VIRGL_CMD_RCR_SRC_LEVEL 7
+#define VIRGL_CMD_RCR_SRC_X 8
+#define VIRGL_CMD_RCR_SRC_Y 9
+#define VIRGL_CMD_RCR_SRC_Z 10
+#define VIRGL_CMD_RCR_SRC_W 11
+#define VIRGL_CMD_RCR_SRC_H 12
+#define VIRGL_CMD_RCR_SRC_D 13
+
+/* blit */
+#define VIRGL_CMD_BLIT_SIZE 21
+#define VIRGL_CMD_BLIT_S0 1
+#define VIRGL_CMD_BLIT_S0_MASK(x) (((x) & 0xff) << 0)
+#define VIRGL_CMD_BLIT_S0_FILTER(x) (((x) & 0x3) << 8)
+#define VIRGL_CMD_BLIT_S0_SCISSOR_ENABLE(x) (((x) & 0x1) << 10)
+#define VIRGL_CMD_BLIT_SCISSOR_MINX_MINY 2
+#define VIRGL_CMD_BLIT_SCISSOR_MAXX_MAXY 3
+#define VIRGL_CMD_BLIT_DST_RES_HANDLE 4
+#define VIRGL_CMD_BLIT_DST_LEVEL 5
+#define VIRGL_CMD_BLIT_DST_FORMAT 6
+#define VIRGL_CMD_BLIT_DST_X 7
+#define VIRGL_CMD_BLIT_DST_Y 8
+#define VIRGL_CMD_BLIT_DST_Z 9
+#define VIRGL_CMD_BLIT_DST_W 10
+#define VIRGL_CMD_BLIT_DST_H 11
+#define VIRGL_CMD_BLIT_DST_D 12
+#define VIRGL_CMD_BLIT_SRC_RES_HANDLE 13
+#define VIRGL_CMD_BLIT_SRC_LEVEL 14
+#define VIRGL_CMD_BLIT_SRC_FORMAT 15
+#define VIRGL_CMD_BLIT_SRC_X 16
+#define VIRGL_CMD_BLIT_SRC_Y 17
+#define VIRGL_CMD_BLIT_SRC_Z 18
+#define VIRGL_CMD_BLIT_SRC_W 19
+#define VIRGL_CMD_BLIT_SRC_H 20
+#define VIRGL_CMD_BLIT_SRC_D 21
+
+/* query object */
+#define VIRGL_OBJ_QUERY_SIZE 4
+#define VIRGL_OBJ_QUERY_HANDLE 1
+#define VIRGL_OBJ_QUERY_TYPE_INDEX 2
+#define VIRGL_OBJ_QUERY_TYPE(x) (x & 0xffff)
+#define VIRGL_OBJ_QUERY_INDEX(x) ((x & 0xffff) << 16)
+#define VIRGL_OBJ_QUERY_OFFSET 3
+#define VIRGL_OBJ_QUERY_RES_HANDLE 4
+
+#define VIRGL_QUERY_BEGIN_HANDLE 1
+
+#define VIRGL_QUERY_END_HANDLE 1
+
+#define VIRGL_QUERY_RESULT_HANDLE 1
+#define VIRGL_QUERY_RESULT_WAIT 2
+
+/* render condition */
+#define VIRGL_RENDER_CONDITION_SIZE 3
+#define VIRGL_RENDER_CONDITION_HANDLE 1
+#define VIRGL_RENDER_CONDITION_CONDITION 2
+#define VIRGL_RENDER_CONDITION_MODE 3
+
+/* resource inline write */
+#define VIRGL_RESOURCE_IW_RES_HANDLE 1
+#define VIRGL_RESOURCE_IW_LEVEL 2
+#define VIRGL_RESOURCE_IW_USAGE 3
+#define VIRGL_RESOURCE_IW_STRIDE 4
+#define VIRGL_RESOURCE_IW_LAYER_STRIDE 5
+#define VIRGL_RESOURCE_IW_X 6
+#define VIRGL_RESOURCE_IW_Y 7
+#define VIRGL_RESOURCE_IW_Z 8
+#define VIRGL_RESOURCE_IW_W 9
+#define VIRGL_RESOURCE_IW_H 10
+#define VIRGL_RESOURCE_IW_D 11
+#define VIRGL_RESOURCE_IW_DATA_START 12
+
+/* set streamout targets */
+#define VIRGL_SET_STREAMOUT_TARGETS_APPEND_BITMASK 1
+#define VIRGL_SET_STREAMOUT_TARGETS_H0 2
+
+/* set sample mask */
+#define VIRGL_SET_SAMPLE_MASK_SIZE 1
+#define VIRGL_SET_SAMPLE_MASK_MASK 1
+
+/* set clip state */
+#define VIRGL_SET_CLIP_STATE_SIZE 32
+#define VIRGL_SET_CLIP_STATE_C0 1
+
+/* polygon stipple */
+#define VIRGL_POLYGON_STIPPLE_SIZE 32
+#define VIRGL_POLYGON_STIPPLE_P0 1
+
+#define VIRGL_BIND_SHADER_SIZE 2
+#define VIRGL_BIND_SHADER_HANDLE 1
+#define VIRGL_BIND_SHADER_TYPE 2
+
+#endif
diff --git a/src/gallium/drivers/virgl/virgl_public.h b/src/gallium/drivers/virgl/virgl_public.h
new file mode 100644
index 00000000000..a3ea560df7b
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_public.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef VIRGL_PUBLIC_H
+#define VIRGL_PUBLIC_H
+
+struct pipe_screen;
+struct virgl_winsys;
+
+struct pipe_screen *
+virgl_create_screen(struct virgl_winsys *vws);
+#endif
diff --git a/src/gallium/drivers/virgl/virgl_query.c b/src/gallium/drivers/virgl/virgl_query.c
new file mode 100644
index 00000000000..b0200556342
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_query.c
@@ -0,0 +1,175 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "virgl_context.h"
+#include "virgl_encode.h"
+#include "virgl_protocol.h"
+#include "virgl_resource.h"
+
+struct virgl_query {
+ uint32_t handle;
+ struct virgl_resource *buf;
+
+ unsigned index;
+ unsigned type;
+ unsigned result_size;
+ unsigned result_gotten_sent;
+};
+
+static inline struct virgl_query *virgl_query(struct pipe_query *q)
+{
+ return (struct virgl_query *)q;
+}
+
+static void virgl_render_condition(struct pipe_context *ctx,
+ struct pipe_query *q,
+ boolean condition,
+ uint mode)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ struct virgl_query *query = virgl_query(q);
+ uint32_t handle = 0;
+ if (q)
+ handle = query->handle;
+ virgl_encoder_render_condition(vctx, handle, condition, mode);
+}
+
+static struct pipe_query *virgl_create_query(struct pipe_context *ctx,
+ unsigned query_type, unsigned index)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ struct virgl_query *query;
+ uint32_t handle;
+
+ query = CALLOC_STRUCT(virgl_query);
+ if (!query)
+ return NULL;
+
+ query->buf = (struct virgl_resource *)pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM,
+ PIPE_USAGE_STAGING, sizeof(struct virgl_host_query_state));
+ if (!query->buf) {
+ FREE(query);
+ return NULL;
+ }
+
+ handle = virgl_object_assign_handle();
+ query->type = query_type;
+ query->index = index;
+ query->handle = handle;
+ query->buf->clean = FALSE;
+ virgl_encoder_create_query(vctx, handle, query_type, index, query->buf, 0);
+
+ return (struct pipe_query *)query;
+}
+
+static void virgl_destroy_query(struct pipe_context *ctx,
+ struct pipe_query *q)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ struct virgl_query *query = virgl_query(q);
+
+ virgl_encode_delete_object(vctx, query->handle, VIRGL_OBJECT_QUERY);
+
+ pipe_resource_reference((struct pipe_resource **)&query->buf, NULL);
+ FREE(query);
+}
+
+static boolean virgl_begin_query(struct pipe_context *ctx,
+ struct pipe_query *q)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ struct virgl_query *query = virgl_query(q);
+
+ query->buf->clean = FALSE;
+ virgl_encoder_begin_query(vctx, query->handle);
+ return true;
+}
+
+static void virgl_end_query(struct pipe_context *ctx,
+ struct pipe_query *q)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ struct virgl_query *query = virgl_query(q);
+ struct pipe_box box;
+
+ uint32_t qs = VIRGL_QUERY_STATE_WAIT_HOST;
+ u_box_1d(0, 4, &box);
+ virgl_transfer_inline_write(ctx, &query->buf->u.b, 0, PIPE_TRANSFER_WRITE,
+ &box, &qs, 0, 0);
+
+
+ virgl_encoder_end_query(vctx, query->handle);
+}
+
+static boolean virgl_get_query_result(struct pipe_context *ctx,
+ struct pipe_query *q,
+ boolean wait,
+ union pipe_query_result *result)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ struct virgl_query *query = virgl_query(q);
+ struct pipe_transfer *transfer;
+ struct virgl_host_query_state *host_state;
+
+ /* ask host for query result */
+ if (!query->result_gotten_sent) {
+ query->result_gotten_sent = 1;
+ virgl_encoder_get_query_result(vctx, query->handle, 0);
+ ctx->flush(ctx, NULL, 0);
+ }
+
+ /* do we have to flush? */
+ /* now we can do the transfer to get the result back? */
+ remap:
+ host_state = pipe_buffer_map(ctx, &query->buf->u.b,
+ PIPE_TRANSFER_READ, &transfer);
+
+ if (host_state->query_state != VIRGL_QUERY_STATE_DONE) {
+ pipe_buffer_unmap(ctx, transfer);
+ if (wait)
+ goto remap;
+ else
+ return FALSE;
+ }
+
+ if (query->type == PIPE_QUERY_TIMESTAMP || query->type == PIPE_QUERY_TIME_ELAPSED)
+ result->u64 = host_state->result;
+ else
+ result->u64 = (uint32_t)host_state->result;
+
+ pipe_buffer_unmap(ctx, transfer);
+ query->result_gotten_sent = 0;
+ return TRUE;
+}
+
+void virgl_init_query_functions(struct virgl_context *vctx)
+{
+ vctx->base.render_condition = virgl_render_condition;
+ vctx->base.create_query = virgl_create_query;
+ vctx->base.destroy_query = virgl_destroy_query;
+ vctx->base.begin_query = virgl_begin_query;
+ vctx->base.end_query = virgl_end_query;
+ vctx->base.get_query_result = virgl_get_query_result;
+}
diff --git a/src/gallium/drivers/virgl/virgl_resource.c b/src/gallium/drivers/virgl/virgl_resource.c
new file mode 100644
index 00000000000..0b2fc4ec497
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_resource.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "util/u_inlines.h"
+#include "virgl_context.h"
+#include "virgl_resource.h"
+#include "virgl_screen.h"
+
+bool virgl_res_needs_flush_wait(struct virgl_context *vctx,
+ struct virgl_resource *res,
+ unsigned usage)
+{
+ struct virgl_screen *vs = virgl_screen(vctx->base.screen);
+
+ if ((!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) && vs->vws->res_is_referenced(vs->vws, vctx->cbuf, res->hw_res)) {
+ return true;
+ }
+ return false;
+}
+
+bool virgl_res_needs_readback(struct virgl_context *vctx,
+ struct virgl_resource *res,
+ unsigned usage)
+{
+ bool readback = true;
+ if (res->clean)
+ readback = false;
+ else if (usage & PIPE_TRANSFER_DISCARD_RANGE)
+ readback = false;
+ else if ((usage & (PIPE_TRANSFER_WRITE | PIPE_TRANSFER_FLUSH_EXPLICIT)) ==
+ (PIPE_TRANSFER_WRITE | PIPE_TRANSFER_FLUSH_EXPLICIT))
+ readback = false;
+ return readback;
+}
+
+static struct pipe_resource *virgl_resource_create(struct pipe_screen *screen,
+ const struct pipe_resource *templ)
+{
+ struct virgl_screen *vs = virgl_screen(screen);
+ if (templ->target == PIPE_BUFFER)
+ return virgl_buffer_create(vs, templ);
+ else
+ return virgl_texture_create(vs, templ);
+}
+
+static struct pipe_resource *virgl_resource_from_handle(struct pipe_screen *screen,
+ const struct pipe_resource *templ,
+ struct winsys_handle *whandle)
+{
+ struct virgl_screen *vs = virgl_screen(screen);
+ if (templ->target == PIPE_BUFFER)
+ return NULL;
+ else
+ return virgl_texture_from_handle(vs, templ, whandle);
+}
+
+void virgl_init_screen_resource_functions(struct pipe_screen *screen)
+{
+ screen->resource_create = virgl_resource_create;
+ screen->resource_from_handle = virgl_resource_from_handle;
+ screen->resource_get_handle = u_resource_get_handle_vtbl;
+ screen->resource_destroy = u_resource_destroy_vtbl;
+}
+
+void virgl_init_context_resource_functions(struct pipe_context *ctx)
+{
+ ctx->transfer_map = u_transfer_map_vtbl;
+ ctx->transfer_flush_region = u_transfer_flush_region_vtbl;
+ ctx->transfer_unmap = u_transfer_unmap_vtbl;
+ ctx->transfer_inline_write = u_transfer_inline_write_vtbl;
+}
diff --git a/src/gallium/drivers/virgl/virgl_resource.h b/src/gallium/drivers/virgl/virgl_resource.h
new file mode 100644
index 00000000000..bab9bcb9b4e
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_resource.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef VIRGL_RESOURCE_H
+#define VIRGL_RESOURCE_H
+
+#include "util/u_resource.h"
+#include "util/u_range.h"
+#include "util/list.h"
+#include "util/u_transfer.h"
+
+#include "virgl_hw.h"
+#define VR_MAX_TEXTURE_2D_LEVELS 15
+
+struct winsys_handle;
+struct virgl_screen;
+struct virgl_context;
+
+struct virgl_resource {
+ struct u_resource u;
+ struct virgl_hw_res *hw_res;
+ boolean clean;
+};
+
+struct virgl_buffer {
+ struct virgl_resource base;
+
+ struct list_head flush_list;
+ boolean on_list;
+
+ /* The buffer range which is initialized (with a write transfer,
+ * streamout, DMA, or as a random access target). The rest of
+ * the buffer is considered invalid and can be mapped unsynchronized.
+ *
+ * This allows unsychronized mapping of a buffer range which hasn't
+ * been used yet. It's for applications which forget to use
+ * the unsynchronized map flag and expect the driver to figure it out.
+ */
+ struct util_range valid_buffer_range;
+};
+
+struct virgl_texture {
+ struct virgl_resource base;
+
+ unsigned long level_offset[VR_MAX_TEXTURE_2D_LEVELS];
+ unsigned stride[VR_MAX_TEXTURE_2D_LEVELS];
+};
+
+struct virgl_transfer {
+ struct pipe_transfer base;
+ uint32_t offset;
+ struct virgl_resource *resolve_tmp;
+};
+
+void virgl_resource_destroy(struct pipe_screen *screen,
+ struct pipe_resource *resource);
+
+void virgl_init_screen_resource_functions(struct pipe_screen *screen);
+
+void virgl_init_context_resource_functions(struct pipe_context *ctx);
+
+struct pipe_resource *virgl_texture_create(struct virgl_screen *vs,
+ const struct pipe_resource *templ);
+
+struct pipe_resource *virgl_texture_from_handle(struct virgl_screen *vs,
+ const struct pipe_resource *templ,
+ struct winsys_handle *whandle);
+
+static inline struct virgl_resource *virgl_resource(struct pipe_resource *r)
+{
+ return (struct virgl_resource *)r;
+}
+
+static inline struct virgl_buffer *virgl_buffer(struct pipe_resource *r)
+{
+ return (struct virgl_buffer *)r;
+}
+
+static inline struct virgl_texture *virgl_texture(struct pipe_resource *r)
+{
+ return (struct virgl_texture *)r;
+}
+
+static inline struct virgl_transfer *virgl_transfer(struct pipe_transfer *trans)
+{
+ return (struct virgl_transfer *)trans;
+}
+
+struct pipe_resource *virgl_buffer_create(struct virgl_screen *vs,
+ const struct pipe_resource *templ);
+
+static inline unsigned pipe_to_virgl_bind(unsigned pbind)
+{
+ unsigned outbind = 0;
+ if (pbind & PIPE_BIND_DEPTH_STENCIL)
+ outbind |= VIRGL_BIND_DEPTH_STENCIL;
+ if (pbind & PIPE_BIND_RENDER_TARGET)
+ outbind |= VIRGL_BIND_RENDER_TARGET;
+ if (pbind & PIPE_BIND_SAMPLER_VIEW)
+ outbind |= VIRGL_BIND_SAMPLER_VIEW;
+ if (pbind & PIPE_BIND_VERTEX_BUFFER)
+ outbind |= VIRGL_BIND_VERTEX_BUFFER;
+ if (pbind & PIPE_BIND_INDEX_BUFFER)
+ outbind |= VIRGL_BIND_INDEX_BUFFER;
+ if (pbind & PIPE_BIND_CONSTANT_BUFFER)
+ outbind |= VIRGL_BIND_CONSTANT_BUFFER;
+ if (pbind & PIPE_BIND_DISPLAY_TARGET)
+ outbind |= VIRGL_BIND_DISPLAY_TARGET;
+ if (pbind & PIPE_BIND_STREAM_OUTPUT)
+ outbind |= VIRGL_BIND_STREAM_OUTPUT;
+ if (pbind & PIPE_BIND_CURSOR)
+ outbind |= VIRGL_BIND_CURSOR;
+ if (pbind & PIPE_BIND_CUSTOM)
+ outbind |= VIRGL_BIND_CUSTOM;
+ if (pbind & PIPE_BIND_SCANOUT)
+ outbind |= VIRGL_BIND_SCANOUT;
+ return outbind;
+}
+
+bool virgl_res_needs_flush_wait(struct virgl_context *vctx,
+ struct virgl_resource *res,
+ unsigned usage);
+bool virgl_res_needs_readback(struct virgl_context *vctx,
+ struct virgl_resource *res,
+ unsigned usage);
+#endif
diff --git a/src/gallium/drivers/virgl/virgl_screen.c b/src/gallium/drivers/virgl/virgl_screen.c
new file mode 100644
index 00000000000..cca379d47ab
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_screen.c
@@ -0,0 +1,553 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "util/u_memory.h"
+#include "util/u_format.h"
+#include "util/u_format_s3tc.h"
+#include "util/u_video.h"
+#include "os/os_time.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_screen.h"
+#include "draw/draw_context.h"
+
+#include "tgsi/tgsi_exec.h"
+
+#include "virgl_screen.h"
+#include "virgl_resource.h"
+#include "virgl_public.h"
+#include "virgl_context.h"
+
+#define SP_MAX_TEXTURE_2D_LEVELS 15 /* 16K x 16K */
+#define SP_MAX_TEXTURE_3D_LEVELS 9 /* 512 x 512 x 512 */
+#define SP_MAX_TEXTURE_CUBE_LEVELS 13 /* 4K x 4K */
+
+static const char *
+virgl_get_vendor(struct pipe_screen *screen)
+{
+ return "Red Hat";
+}
+
+
+static const char *
+virgl_get_name(struct pipe_screen *screen)
+{
+ return "virgl";
+}
+
+static int
+virgl_get_param(struct pipe_screen *screen, enum pipe_cap param)
+{
+ struct virgl_screen *vscreen = virgl_screen(screen);
+ switch (param) {
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 1;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 1;
+ case PIPE_CAP_SM3:
+ return 1;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 1;
+ case PIPE_CAP_POINT_SPRITE:
+ return 1;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return vscreen->caps.caps.v1.max_render_targets;
+ case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+ return vscreen->caps.caps.v1.max_dual_source_render_targets;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return vscreen->caps.caps.v1.bset.occlusion_query;
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ return vscreen->caps.caps.v1.bset.mirror_clamp;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 1;
+ case PIPE_CAP_TEXTURE_SWIZZLE:
+ return 1;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return SP_MAX_TEXTURE_2D_LEVELS;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return SP_MAX_TEXTURE_3D_LEVELS;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return SP_MAX_TEXTURE_CUBE_LEVELS;
+ case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+ return 1;
+ case PIPE_CAP_INDEP_BLEND_ENABLE:
+ return vscreen->caps.caps.v1.bset.indep_blend_enable;
+ case PIPE_CAP_INDEP_BLEND_FUNC:
+ return vscreen->caps.caps.v1.bset.indep_blend_func;
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+ return vscreen->caps.caps.v1.bset.fragment_coord_conventions;
+ case PIPE_CAP_DEPTH_CLIP_DISABLE:
+ return vscreen->caps.caps.v1.bset.depth_clip_disable;
+ case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+ return vscreen->caps.caps.v1.max_streamout_buffers;
+ case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+ case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+ return 16*4;
+ case PIPE_CAP_PRIMITIVE_RESTART:
+ return vscreen->caps.caps.v1.bset.primitive_restart;
+ case PIPE_CAP_SHADER_STENCIL_EXPORT:
+ return vscreen->caps.caps.v1.bset.shader_stencil_export;
+ case PIPE_CAP_TGSI_INSTANCEID:
+ case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+ return 1;
+ case PIPE_CAP_SEAMLESS_CUBE_MAP:
+ return vscreen->caps.caps.v1.bset.seamless_cube_map;
+ case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+ return vscreen->caps.caps.v1.bset.seamless_cube_map_per_texture;
+ case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+ return vscreen->caps.caps.v1.max_texture_array_layers;
+ case PIPE_CAP_MIN_TEXEL_OFFSET:
+ case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
+ return -8;
+ case PIPE_CAP_MAX_TEXEL_OFFSET:
+ case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
+ return 7;
+ case PIPE_CAP_CONDITIONAL_RENDER:
+ return vscreen->caps.caps.v1.bset.conditional_render;
+ case PIPE_CAP_TEXTURE_BARRIER:
+ return 0;
+ case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
+ return 1;
+ case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
+ case PIPE_CAP_VERTEX_COLOR_CLAMPED:
+ return vscreen->caps.caps.v1.bset.color_clamping;
+ case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
+ return 1;
+ case PIPE_CAP_GLSL_FEATURE_LEVEL:
+ return vscreen->caps.caps.v1.glsl_level;
+ case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+ return 0;
+ case PIPE_CAP_COMPUTE:
+ return 0;
+ case PIPE_CAP_USER_VERTEX_BUFFERS:
+ return 0;
+ case PIPE_CAP_USER_INDEX_BUFFERS:
+ case PIPE_CAP_USER_CONSTANT_BUFFERS:
+ return 1;
+ case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
+ return 16;
+ case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+ return vscreen->caps.caps.v1.bset.streamout_pause_resume;
+ case PIPE_CAP_START_INSTANCE:
+ return vscreen->caps.caps.v1.bset.start_instance;
+ case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
+ case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+ return 0;
+ case PIPE_CAP_QUERY_TIMESTAMP:
+ return 1;
+ case PIPE_CAP_QUERY_TIME_ELAPSED:
+ return 0;
+ case PIPE_CAP_TGSI_TEXCOORD:
+ return 0;
+ case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
+ return VIRGL_MAP_BUFFER_ALIGNMENT;
+ case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+ return vscreen->caps.caps.v1.max_tbo_size > 0;
+ case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
+ return 0;
+ case PIPE_CAP_CUBE_MAP_ARRAY:
+ return vscreen->caps.caps.v1.bset.cube_map_array;
+ case PIPE_CAP_TEXTURE_MULTISAMPLE:
+ return vscreen->caps.caps.v1.bset.texture_multisample;
+ case PIPE_CAP_MAX_VIEWPORTS:
+ return vscreen->caps.caps.v1.max_viewports;
+ case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
+ return vscreen->caps.caps.v1.max_tbo_size;
+ case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
+ case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
+ case PIPE_CAP_ENDIANNESS:
+ return 0;
+ case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
+ return 1;
+ case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
+ return 0;
+ case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
+ return 1024;
+ case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
+ return 16384;
+ case PIPE_CAP_TEXTURE_QUERY_LOD:
+ return vscreen->caps.caps.v1.bset.texture_query_lod;
+ case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
+ return vscreen->caps.caps.v1.max_texture_gather_components;
+ case PIPE_CAP_TEXTURE_GATHER_SM5:
+ case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
+ case PIPE_CAP_SAMPLE_SHADING:
+ case PIPE_CAP_FAKE_SW_MSAA:
+ case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
+ case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
+ case PIPE_CAP_MAX_VERTEX_STREAMS:
+ case PIPE_CAP_DRAW_INDIRECT:
+ case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
+ case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+ case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
+ case PIPE_CAP_SAMPLER_VIEW_TARGET:
+ case PIPE_CAP_CLIP_HALFZ:
+ case PIPE_CAP_VERTEXID_NOBASE:
+ case PIPE_CAP_POLYGON_OFFSET_CLAMP:
+ case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
+ case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
+ case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
+ case PIPE_CAP_TGSI_TXQS:
+ case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+ case PIPE_CAP_SHAREABLE_SHADERS:
+ return 0;
+ case PIPE_CAP_VENDOR_ID:
+ return 0x1af4;
+ case PIPE_CAP_DEVICE_ID:
+ return 0x1010;
+ case PIPE_CAP_ACCELERATED:
+ return 1;
+ case PIPE_CAP_UMA:
+ case PIPE_CAP_VIDEO_MEMORY:
+ return 0;
+ }
+ /* should only get here on unhandled cases */
+ debug_printf("Unexpected PIPE_CAP %d query\n", param);
+ return 0;
+}
+
+static int
+virgl_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap param)
+{
+ struct virgl_screen *vscreen = virgl_screen(screen);
+ switch(shader)
+ {
+ case PIPE_SHADER_FRAGMENT:
+ case PIPE_SHADER_VERTEX:
+ case PIPE_SHADER_GEOMETRY:
+ switch (param) {
+ case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+ return INT_MAX;
+ case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+ return 1;
+ case PIPE_SHADER_CAP_MAX_INPUTS:
+ if (vscreen->caps.caps.v1.glsl_level < 150)
+ return 16;
+ return shader == PIPE_SHADER_VERTEX ? 16 : 32;
+ case PIPE_SHADER_CAP_MAX_OUTPUTS:
+ return 128;
+ // case PIPE_SHADER_CAP_MAX_CONSTS:
+ // return 4096;
+ case PIPE_SHADER_CAP_MAX_TEMPS:
+ return 256;
+ case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+ return vscreen->caps.caps.v1.max_uniform_blocks;
+ // case PIPE_SHADER_CAP_MAX_ADDRS:
+ // return 1;
+ case PIPE_SHADER_CAP_MAX_PREDS:
+ return 0;
+ case PIPE_SHADER_CAP_SUBROUTINES:
+ return 1;
+ case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
+ return 16;
+ case PIPE_SHADER_CAP_INTEGERS:
+ return vscreen->caps.caps.v1.glsl_level >= 130;
+ case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+ return 32;
+ case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+ return 4096 * sizeof(float[4]);
+ default:
+ return 0;
+ }
+ default:
+ return 0;
+ }
+}
+
+static float
+virgl_get_paramf(struct pipe_screen *screen, enum pipe_capf param)
+{
+ switch (param) {
+ case PIPE_CAPF_MAX_LINE_WIDTH:
+ /* fall-through */
+ case PIPE_CAPF_MAX_LINE_WIDTH_AA:
+ return 255.0; /* arbitrary */
+ case PIPE_CAPF_MAX_POINT_WIDTH:
+ /* fall-through */
+ case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+ return 255.0; /* arbitrary */
+ case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
+ return 16.0;
+ case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
+ return 16.0; /* arbitrary */
+ case PIPE_CAPF_GUARD_BAND_LEFT:
+ case PIPE_CAPF_GUARD_BAND_TOP:
+ case PIPE_CAPF_GUARD_BAND_RIGHT:
+ case PIPE_CAPF_GUARD_BAND_BOTTOM:
+ return 0.0;
+ }
+ /* should only get here on unhandled cases */
+ debug_printf("Unexpected PIPE_CAPF %d query\n", param);
+ return 0.0;
+}
+
+static boolean
+virgl_is_vertex_format_supported(struct pipe_screen *screen,
+ enum pipe_format format)
+{
+ struct virgl_screen *vscreen = virgl_screen(screen);
+ const struct util_format_description *format_desc;
+ int i;
+
+ format_desc = util_format_description(format);
+ if (!format_desc)
+ return FALSE;
+
+ if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
+ int vformat = VIRGL_FORMAT_R11G11B10_FLOAT;
+ int big = vformat / 32;
+ int small = vformat % 32;
+ if (!(vscreen->caps.caps.v1.vertexbuffer.bitmask[big] & (1 << small)))
+ return FALSE;
+ return TRUE;
+ }
+
+ /* Find the first non-VOID channel. */
+ for (i = 0; i < 4; i++) {
+ if (format_desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
+ break;
+ }
+ }
+
+ if (i == 4)
+ return FALSE;
+
+ if (format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
+ return FALSE;
+
+ if (format_desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED)
+ return FALSE;
+ return TRUE;
+}
+
+/**
+ * Query format support for creating a texture, drawing surface, etc.
+ * \param format the format to test
+ * \param type one of PIPE_TEXTURE, PIPE_SURFACE
+ */
+static boolean
+virgl_is_format_supported( struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned sample_count,
+ unsigned bind)
+{
+ struct virgl_screen *vscreen = virgl_screen(screen);
+ const struct util_format_description *format_desc;
+ int i;
+
+ assert(target == PIPE_BUFFER ||
+ target == PIPE_TEXTURE_1D ||
+ target == PIPE_TEXTURE_1D_ARRAY ||
+ target == PIPE_TEXTURE_2D ||
+ target == PIPE_TEXTURE_2D_ARRAY ||
+ target == PIPE_TEXTURE_RECT ||
+ target == PIPE_TEXTURE_3D ||
+ target == PIPE_TEXTURE_CUBE ||
+ target == PIPE_TEXTURE_CUBE_ARRAY);
+
+ format_desc = util_format_description(format);
+ if (!format_desc)
+ return FALSE;
+
+ if (util_format_is_intensity(format))
+ return FALSE;
+
+ if (sample_count > 1) {
+ if (!vscreen->caps.caps.v1.bset.texture_multisample)
+ return FALSE;
+ if (sample_count > vscreen->caps.caps.v1.max_samples)
+ return FALSE;
+ }
+
+ if (bind & PIPE_BIND_VERTEX_BUFFER) {
+ return virgl_is_vertex_format_supported(screen, format);
+ }
+
+ if (bind & PIPE_BIND_RENDER_TARGET) {
+ if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS)
+ return FALSE;
+
+ /*
+ * Although possible, it is unnatural to render into compressed or YUV
+ * surfaces. So disable these here to avoid going into weird paths
+ * inside the state trackers.
+ */
+ if (format_desc->block.width != 1 ||
+ format_desc->block.height != 1)
+ return FALSE;
+
+ {
+ int big = format / 32;
+ int small = format % 32;
+ if (!(vscreen->caps.caps.v1.render.bitmask[big] & (1 << small)))
+ return FALSE;
+ }
+ }
+
+ if (bind & PIPE_BIND_DEPTH_STENCIL) {
+ if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
+ return FALSE;
+ }
+
+ /*
+ * All other operations (sampling, transfer, etc).
+ */
+
+ if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
+ if (util_format_s3tc_enabled)
+ goto out_lookup;
+ return FALSE;
+ }
+ if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
+ goto out_lookup;
+ }
+
+ if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
+ goto out_lookup;
+ } else if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
+ goto out_lookup;
+ }
+
+ /* Find the first non-VOID channel. */
+ for (i = 0; i < 4; i++) {
+ if (format_desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
+ break;
+ }
+ }
+
+ if (i == 4)
+ return FALSE;
+
+ /* no L4A4 */
+ if (format_desc->nr_channels < 4 && format_desc->channel[i].size == 4)
+ return FALSE;
+
+ out_lookup:
+ {
+ int big = format / 32;
+ int small = format % 32;
+ if (!(vscreen->caps.caps.v1.sampler.bitmask[big] & (1 << small)))
+ return FALSE;
+ }
+ /*
+ * Everything else should be supported by u_format.
+ */
+ return TRUE;
+}
+
+static void virgl_flush_frontbuffer(struct pipe_screen *screen,
+ struct pipe_resource *res,
+ unsigned level, unsigned layer,
+ void *winsys_drawable_handle, struct pipe_box *sub_box)
+{
+ struct virgl_screen *vscreen = virgl_screen(screen);
+ struct virgl_winsys *vws = vscreen->vws;
+ struct virgl_resource *vres = virgl_resource(res);
+
+ if (vws->flush_frontbuffer)
+ vws->flush_frontbuffer(vws, vres->hw_res, level, layer, winsys_drawable_handle,
+ sub_box);
+}
+
+static void virgl_fence_reference(struct pipe_screen *screen,
+ struct pipe_fence_handle **ptr,
+ struct pipe_fence_handle *fence)
+{
+ struct virgl_screen *vscreen = virgl_screen(screen);
+ struct virgl_winsys *vws = vscreen->vws;
+
+ vws->fence_reference(vws, ptr, fence);
+}
+
+static boolean virgl_fence_finish(struct pipe_screen *screen,
+ struct pipe_fence_handle *fence,
+ uint64_t timeout)
+{
+ struct virgl_screen *vscreen = virgl_screen(screen);
+ struct virgl_winsys *vws = vscreen->vws;
+
+ return vws->fence_wait(vws, fence, timeout);
+}
+
+static uint64_t
+virgl_get_timestamp(struct pipe_screen *_screen)
+{
+ return os_time_get_nano();
+}
+
+static void
+virgl_destroy_screen(struct pipe_screen *screen)
+{
+ struct virgl_screen *vscreen = virgl_screen(screen);
+ struct virgl_winsys *vws = vscreen->vws;
+
+ if (vws)
+ vws->destroy(vws);
+ FREE(vscreen);
+}
+
+struct pipe_screen *
+virgl_create_screen(struct virgl_winsys *vws)
+{
+ struct virgl_screen *screen = CALLOC_STRUCT(virgl_screen);
+
+ if (!screen)
+ return NULL;
+
+ screen->vws = vws;
+ screen->base.get_name = virgl_get_name;
+ screen->base.get_vendor = virgl_get_vendor;
+ screen->base.get_param = virgl_get_param;
+ screen->base.get_shader_param = virgl_get_shader_param;
+ screen->base.get_paramf = virgl_get_paramf;
+ screen->base.is_format_supported = virgl_is_format_supported;
+ screen->base.destroy = virgl_destroy_screen;
+ screen->base.context_create = virgl_context_create;
+ screen->base.flush_frontbuffer = virgl_flush_frontbuffer;
+ screen->base.get_timestamp = virgl_get_timestamp;
+ screen->base.fence_reference = virgl_fence_reference;
+ //screen->base.fence_signalled = virgl_fence_signalled;
+ screen->base.fence_finish = virgl_fence_finish;
+
+ virgl_init_screen_resource_functions(&screen->base);
+
+ vws->get_caps(vws, &screen->caps);
+
+
+ util_format_s3tc_init();
+ return &screen->base;
+}
diff --git a/src/gallium/drivers/virgl/virgl_screen.h b/src/gallium/drivers/virgl/virgl_screen.h
new file mode 100644
index 00000000000..52e72ca4958
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_screen.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef VIRGL_H
+#define VIRGL_H
+
+#include "pipe/p_screen.h"
+#include "virgl_winsys.h"
+
+struct virgl_screen {
+ struct pipe_screen base;
+ struct virgl_winsys *vws;
+
+ struct virgl_drm_caps caps;
+
+ uint32_t sub_ctx_id;
+};
+
+
+static inline struct virgl_screen *
+virgl_screen(struct pipe_screen *pipe)
+{
+ return (struct virgl_screen *)pipe;
+}
+
+#define VIRGL_MAP_BUFFER_ALIGNMENT 64
+
+#endif
diff --git a/src/gallium/drivers/virgl/virgl_streamout.c b/src/gallium/drivers/virgl/virgl_streamout.c
new file mode 100644
index 00000000000..b6a65fff29e
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_streamout.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "virgl_context.h"
+#include "virgl_encode.h"
+#include "virgl_protocol.h"
+#include "virgl_resource.h"
+
+static struct pipe_stream_output_target *virgl_create_so_target(
+ struct pipe_context *ctx,
+ struct pipe_resource *buffer,
+ unsigned buffer_offset,
+ unsigned buffer_size)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ struct virgl_resource *res = virgl_resource(buffer);
+ struct virgl_so_target *t = CALLOC_STRUCT(virgl_so_target);
+ uint32_t handle;
+
+ if (!t)
+ return NULL;
+ handle = virgl_object_assign_handle();
+
+ t->base.reference.count = 1;
+ t->base.context = ctx;
+ pipe_resource_reference(&t->base.buffer, buffer);
+ t->base.buffer_offset = buffer_offset;
+ t->base.buffer_size = buffer_size;
+ t->handle = handle;
+ res->clean = FALSE;
+ virgl_encoder_create_so_target(vctx, handle, res, buffer_offset, buffer_size);
+ return &t->base;
+}
+
+static void virgl_destroy_so_target(struct pipe_context *ctx,
+ struct pipe_stream_output_target *target)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ struct virgl_so_target *t = virgl_so_target(target);
+
+ pipe_resource_reference(&t->base.buffer, NULL);
+ virgl_encode_delete_object(vctx, t->handle, VIRGL_OBJECT_STREAMOUT_TARGET);
+ FREE(t);
+}
+
+static void virgl_set_so_targets(struct pipe_context *ctx,
+ unsigned num_targets,
+ struct pipe_stream_output_target **targets,
+ const unsigned *offset)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ int i;
+ for (i = 0; i < num_targets; i++) {
+ pipe_resource_reference(&vctx->so_targets[i].base.buffer, targets[i]->buffer);
+ }
+ for (i = num_targets; i < vctx->num_so_targets; i++)
+ pipe_resource_reference(&vctx->so_targets[i].base.buffer, NULL);
+ vctx->num_so_targets = num_targets;
+ virgl_encoder_set_so_targets(vctx, num_targets, targets, 0);//append_bitmask);
+}
+
+void virgl_init_so_functions(struct virgl_context *vctx)
+{
+ vctx->base.create_stream_output_target = virgl_create_so_target;
+ vctx->base.stream_output_target_destroy = virgl_destroy_so_target;
+ vctx->base.set_stream_output_targets = virgl_set_so_targets;
+}
diff --git a/src/gallium/drivers/virgl/virgl_texture.c b/src/gallium/drivers/virgl/virgl_texture.c
new file mode 100644
index 00000000000..31189626144
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_texture.c
@@ -0,0 +1,351 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "util/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+
+#include "virgl_context.h"
+#include "virgl_resource.h"
+#include "virgl_screen.h"
+
+static void virgl_copy_region_with_blit(struct pipe_context *pipe,
+ struct pipe_resource *dst,
+ unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src,
+ unsigned src_level,
+ const struct pipe_box *src_box)
+{
+ struct pipe_blit_info blit;
+
+ memset(&blit, 0, sizeof(blit));
+ blit.src.resource = src;
+ blit.src.format = src->format;
+ blit.src.level = src_level;
+ blit.src.box = *src_box;
+ blit.dst.resource = dst;
+ blit.dst.format = dst->format;
+ blit.dst.level = dst_level;
+ blit.dst.box.x = dstx;
+ blit.dst.box.y = dsty;
+ blit.dst.box.z = dstz;
+ blit.dst.box.width = src_box->width;
+ blit.dst.box.height = src_box->height;
+ blit.dst.box.depth = src_box->depth;
+ blit.mask = util_format_get_mask(src->format) &
+ util_format_get_mask(dst->format);
+ blit.filter = PIPE_TEX_FILTER_NEAREST;
+
+ if (blit.mask) {
+ pipe->blit(pipe, &blit);
+ }
+}
+static void virgl_init_temp_resource_from_box(struct pipe_resource *res,
+ struct pipe_resource *orig,
+ const struct pipe_box *box,
+ unsigned level, unsigned flags)
+{
+ memset(res, 0, sizeof(*res));
+ res->format = orig->format;
+ res->width0 = box->width;
+ res->height0 = box->height;
+ res->depth0 = 1;
+ res->array_size = 1;
+ res->usage = PIPE_USAGE_STAGING;
+ res->flags = flags;
+
+ /* We must set the correct texture target and dimensions for a 3D box. */
+ if (box->depth > 1 && util_max_layer(orig, level) > 0)
+ res->target = orig->target;
+ else
+ res->target = PIPE_TEXTURE_2D;
+
+ switch (res->target) {
+ case PIPE_TEXTURE_1D_ARRAY:
+ case PIPE_TEXTURE_2D_ARRAY:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ res->array_size = box->depth;
+ break;
+ case PIPE_TEXTURE_3D:
+ res->depth0 = box->depth;
+ break;
+ default:
+ break;
+ }
+}
+
+static unsigned
+vrend_get_tex_image_offset(const struct virgl_texture *res,
+ unsigned level, unsigned layer)
+{
+ const struct pipe_resource *pres = &res->base.u.b;
+ const unsigned hgt = u_minify(pres->height0, level);
+ const unsigned nblocksy = util_format_get_nblocksy(pres->format, hgt);
+ unsigned offset = res->level_offset[level];
+
+ if (pres->target == PIPE_TEXTURE_CUBE ||
+ pres->target == PIPE_TEXTURE_CUBE_ARRAY ||
+ pres->target == PIPE_TEXTURE_3D ||
+ pres->target == PIPE_TEXTURE_2D_ARRAY) {
+ offset += layer * nblocksy * res->stride[level];
+ }
+ else if (pres->target == PIPE_TEXTURE_1D_ARRAY) {
+ offset += layer * res->stride[level];
+ }
+ else {
+ assert(layer == 0);
+ }
+
+ return offset;
+}
+
+static void *virgl_texture_transfer_map(struct pipe_context *ctx,
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ struct pipe_transfer **transfer)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ struct virgl_screen *vs = virgl_screen(ctx->screen);
+ struct virgl_texture *vtex = virgl_texture(resource);
+ enum pipe_format format = resource->format;
+ struct virgl_transfer *trans;
+ void *ptr;
+ boolean readback = TRUE;
+ uint32_t offset;
+ struct virgl_hw_res *hw_res;
+ const unsigned h = u_minify(vtex->base.u.b.height0, level);
+ const unsigned nblocksy = util_format_get_nblocksy(format, h);
+ bool is_depth = util_format_has_depth(util_format_description(resource->format));
+ uint32_t l_stride;
+ bool doflushwait;
+
+ doflushwait = virgl_res_needs_flush_wait(vctx, &vtex->base, usage);
+ if (doflushwait)
+ ctx->flush(ctx, NULL, 0);
+
+ trans = util_slab_alloc(&vctx->texture_transfer_pool);
+ if (trans == NULL)
+ return NULL;
+
+ trans->base.resource = resource;
+ trans->base.level = level;
+ trans->base.usage = usage;
+ trans->base.box = *box;
+ trans->base.stride = vtex->stride[level];
+ trans->base.layer_stride = trans->base.stride * nblocksy;
+
+ if (resource->target != PIPE_TEXTURE_3D &&
+ resource->target != PIPE_TEXTURE_CUBE &&
+ resource->target != PIPE_TEXTURE_1D_ARRAY &&
+ resource->target != PIPE_TEXTURE_2D_ARRAY &&
+ resource->target != PIPE_TEXTURE_CUBE_ARRAY)
+ l_stride = 0;
+ else
+ l_stride = trans->base.layer_stride;
+
+ if (is_depth && resource->nr_samples > 1) {
+ struct pipe_resource tmp_resource;
+ virgl_init_temp_resource_from_box(&tmp_resource, resource, box,
+ level, 0);
+
+ trans->resolve_tmp = (struct virgl_resource *)ctx->screen->resource_create(ctx->screen, &tmp_resource);
+
+ virgl_copy_region_with_blit(ctx, &trans->resolve_tmp->u.b, 0, 0, 0, 0, resource, level, box);
+ ctx->flush(ctx, NULL, 0);
+ /* we want to do a resolve blit into the temporary */
+ hw_res = trans->resolve_tmp->hw_res;
+ offset = 0;
+ } else {
+ offset = vrend_get_tex_image_offset(vtex, level, box->z);
+
+ offset += box->y / util_format_get_blockheight(format) * trans->base.stride +
+ box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
+ hw_res = vtex->base.hw_res;
+ trans->resolve_tmp = NULL;
+ }
+
+ readback = virgl_res_needs_readback(vctx, &vtex->base, usage);
+ if (readback)
+ vs->vws->transfer_get(vs->vws, hw_res, box, trans->base.stride, l_stride, offset, level);
+
+ if (doflushwait || readback)
+ vs->vws->resource_wait(vs->vws, vtex->base.hw_res);
+
+ ptr = vs->vws->resource_map(vs->vws, hw_res);
+ if (!ptr) {
+ return NULL;
+ }
+
+ trans->offset = offset;
+ *transfer = &trans->base;
+
+ return ptr + trans->offset;
+}
+
+static void virgl_texture_transfer_unmap(struct pipe_context *ctx,
+ struct pipe_transfer *transfer)
+{
+ struct virgl_context *vctx = virgl_context(ctx);
+ struct virgl_transfer *trans = virgl_transfer(transfer);
+ struct virgl_texture *vtex = virgl_texture(transfer->resource);
+ uint32_t l_stride;
+
+ if (transfer->resource->target != PIPE_TEXTURE_3D &&
+ transfer->resource->target != PIPE_TEXTURE_CUBE &&
+ transfer->resource->target != PIPE_TEXTURE_1D_ARRAY &&
+ transfer->resource->target != PIPE_TEXTURE_2D_ARRAY &&
+ transfer->resource->target != PIPE_TEXTURE_CUBE_ARRAY)
+ l_stride = 0;
+ else
+ l_stride = trans->base.layer_stride;
+
+ if (trans->base.usage & PIPE_TRANSFER_WRITE) {
+ if (!(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
+ struct virgl_screen *vs = virgl_screen(ctx->screen);
+ vtex->base.clean = FALSE;
+ vctx->num_transfers++;
+ vs->vws->transfer_put(vs->vws, vtex->base.hw_res,
+ &transfer->box, trans->base.stride, l_stride, trans->offset, transfer->level);
+
+ }
+ }
+
+ if (trans->resolve_tmp)
+ pipe_resource_reference((struct pipe_resource **)&trans->resolve_tmp, NULL);
+
+ util_slab_free(&vctx->texture_transfer_pool, trans);
+}
+
+
+static boolean
+vrend_resource_layout(struct virgl_texture *res,
+ uint32_t *total_size)
+{
+ struct pipe_resource *pt = &res->base.u.b;
+ unsigned level;
+ unsigned width = pt->width0;
+ unsigned height = pt->height0;
+ unsigned depth = pt->depth0;
+ unsigned buffer_size = 0;
+
+ for (level = 0; level <= pt->last_level; level++) {
+ unsigned slices;
+
+ if (pt->target == PIPE_TEXTURE_CUBE)
+ slices = 6;
+ else if (pt->target == PIPE_TEXTURE_3D)
+ slices = depth;
+ else
+ slices = pt->array_size;
+
+ res->stride[level] = util_format_get_stride(pt->format, width);
+ res->level_offset[level] = buffer_size;
+
+ buffer_size += (util_format_get_nblocksy(pt->format, height) *
+ slices * res->stride[level]);
+
+ width = u_minify(width, 1);
+ height = u_minify(height, 1);
+ depth = u_minify(depth, 1);
+ }
+
+ if (pt->nr_samples <= 1)
+ *total_size = buffer_size;
+ else /* don't create guest backing store for MSAA */
+ *total_size = 0;
+ return TRUE;
+}
+
+static boolean virgl_texture_get_handle(struct pipe_screen *screen,
+ struct pipe_resource *ptex,
+ struct winsys_handle *whandle)
+{
+ struct virgl_screen *vs = virgl_screen(screen);
+ struct virgl_texture *vtex = virgl_texture(ptex);
+
+ return vs->vws->resource_get_handle(vs->vws, vtex->base.hw_res, vtex->stride[0], whandle);
+}
+
+static void virgl_texture_destroy(struct pipe_screen *screen,
+ struct pipe_resource *res)
+{
+ struct virgl_screen *vs = virgl_screen(screen);
+ struct virgl_texture *vtex = virgl_texture(res);
+ vs->vws->resource_unref(vs->vws, vtex->base.hw_res);
+ FREE(vtex);
+}
+
+static const struct u_resource_vtbl virgl_texture_vtbl =
+{
+ virgl_texture_get_handle, /* get_handle */
+ virgl_texture_destroy, /* resource_destroy */
+ virgl_texture_transfer_map, /* transfer_map */
+ NULL, /* transfer_flush_region */
+ virgl_texture_transfer_unmap, /* transfer_unmap */
+ NULL /* transfer_inline_write */
+};
+
+struct pipe_resource *
+virgl_texture_from_handle(struct virgl_screen *vs,
+ const struct pipe_resource *template,
+ struct winsys_handle *whandle)
+{
+ struct virgl_texture *tex;
+ uint32_t size;
+
+ tex = CALLOC_STRUCT(virgl_texture);
+ tex->base.u.b = *template;
+ tex->base.u.b.screen = &vs->base;
+ pipe_reference_init(&tex->base.u.b.reference, 1);
+ tex->base.u.vtbl = &virgl_texture_vtbl;
+ vrend_resource_layout(tex, &size);
+
+ tex->base.hw_res = vs->vws->resource_create_from_handle(vs->vws, whandle);
+ return &tex->base.u.b;
+}
+
+struct pipe_resource *virgl_texture_create(struct virgl_screen *vs,
+ const struct pipe_resource *template)
+{
+ struct virgl_texture *tex;
+ uint32_t size;
+ unsigned vbind;
+
+ tex = CALLOC_STRUCT(virgl_texture);
+ tex->base.clean = TRUE;
+ tex->base.u.b = *template;
+ tex->base.u.b.screen = &vs->base;
+ pipe_reference_init(&tex->base.u.b.reference, 1);
+ tex->base.u.vtbl = &virgl_texture_vtbl;
+ vrend_resource_layout(tex, &size);
+
+ vbind = pipe_to_virgl_bind(template->bind);
+ tex->base.hw_res = vs->vws->resource_create(vs->vws, template->target, template->format, vbind, template->width0, template->height0, template->depth0, template->array_size, template->last_level, template->nr_samples, size);
+ if (!tex->base.hw_res) {
+ FREE(tex);
+ return NULL;
+ }
+ return &tex->base.u.b;
+}
diff --git a/src/gallium/drivers/virgl/virgl_tgsi.c b/src/gallium/drivers/virgl/virgl_tgsi.c
new file mode 100644
index 00000000000..641b0b3e3b5
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_tgsi.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* the virgl hw tgsi vs what the current gallium want will diverge over time.
+ so add a transform stage to remove things we don't want to send unless
+ the receiver supports it.
+*/
+#include "tgsi/tgsi_transform.h"
+#include "virgl_context.h"
+struct virgl_transform_context {
+ struct tgsi_transform_context base;
+};
+
+/* for now just strip out the new properties the remote doesn't understand
+ yet */
+static void
+virgl_tgsi_transform_property(struct tgsi_transform_context *ctx,
+ struct tgsi_full_property *prop)
+{
+ switch (prop->Property.PropertyName) {
+ case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
+ case TGSI_PROPERTY_NUM_CULLDIST_ENABLED:
+ break;
+ default:
+ ctx->emit_property(ctx, prop);
+ break;
+ }
+}
+
+struct tgsi_token *virgl_tgsi_transform(const struct tgsi_token *tokens_in)
+{
+
+ struct virgl_transform_context transform;
+ const uint newLen = tgsi_num_tokens(tokens_in);
+ struct tgsi_token *new_tokens;
+
+ new_tokens = tgsi_alloc_tokens(newLen);
+ if (!new_tokens)
+ return NULL;
+
+ memset(&transform, 0, sizeof(transform));
+ transform.base.transform_property = virgl_tgsi_transform_property;
+ tgsi_transform_shader(tokens_in, new_tokens, newLen, &transform.base);
+
+ return new_tokens;
+}
diff --git a/src/gallium/drivers/virgl/virgl_winsys.h b/src/gallium/drivers/virgl/virgl_winsys.h
new file mode 100644
index 00000000000..ea21f2b6712
--- /dev/null
+++ b/src/gallium/drivers/virgl/virgl_winsys.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright 2014, 2015 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef VIRGL_WINSYS_H
+#define VIRGL_WINSYS_H
+
+#include "pipe/p_defines.h"
+#include "virgl_hw.h"
+
+struct pipe_box;
+struct pipe_fence_handle;
+struct winsys_handle;
+struct virgl_hw_res;
+
+#define VIRGL_MAX_CMDBUF_DWORDS (16*1024)
+
+struct virgl_drm_caps {
+ union virgl_caps caps;
+};
+
+struct virgl_cmd_buf {
+ unsigned cdw;
+ uint32_t *buf;
+};
+
+struct virgl_winsys {
+ unsigned pci_id;
+
+ void (*destroy)(struct virgl_winsys *vws);
+
+ int (*transfer_put)(struct virgl_winsys *vws,
+ struct virgl_hw_res *res,
+ const struct pipe_box *box,
+ uint32_t stride, uint32_t layer_stride,
+ uint32_t buf_offset, uint32_t level);
+
+ int (*transfer_get)(struct virgl_winsys *vws,
+ struct virgl_hw_res *res,
+ const struct pipe_box *box,
+ uint32_t stride, uint32_t layer_stride,
+ uint32_t buf_offset, uint32_t level);
+
+ struct virgl_hw_res *(*resource_create)(struct virgl_winsys *vws,
+ enum pipe_texture_target target,
+ uint32_t format, uint32_t bind,
+ uint32_t width, uint32_t height,
+ uint32_t depth, uint32_t array_size,
+ uint32_t last_level, uint32_t nr_samples,
+ uint32_t size);
+
+ void (*resource_unref)(struct virgl_winsys *vws, struct virgl_hw_res *res);
+
+ void *(*resource_map)(struct virgl_winsys *vws, struct virgl_hw_res *res);
+ void (*resource_wait)(struct virgl_winsys *vws, struct virgl_hw_res *res);
+
+ struct virgl_hw_res *(*resource_create_from_handle)(struct virgl_winsys *vws,
+ struct winsys_handle *whandle);
+ boolean (*resource_get_handle)(struct virgl_winsys *vws,
+ struct virgl_hw_res *res,
+ uint32_t stride,
+ struct winsys_handle *whandle);
+
+ struct virgl_cmd_buf *(*cmd_buf_create)(struct virgl_winsys *ws);
+ void (*cmd_buf_destroy)(struct virgl_cmd_buf *buf);
+
+ void (*emit_res)(struct virgl_winsys *vws, struct virgl_cmd_buf *buf, struct virgl_hw_res *res, boolean write_buffer);
+ int (*submit_cmd)(struct virgl_winsys *vws, struct virgl_cmd_buf *buf);
+
+ boolean (*res_is_referenced)(struct virgl_winsys *vws,
+ struct virgl_cmd_buf *buf,
+ struct virgl_hw_res *res);
+
+ int (*get_caps)(struct virgl_winsys *vws, struct virgl_drm_caps *caps);
+
+ /* fence */
+ struct pipe_fence_handle *(*cs_create_fence)(struct virgl_winsys *vws);
+ bool (*fence_wait)(struct virgl_winsys *vws,
+ struct pipe_fence_handle *fence,
+ uint64_t timeout);
+
+ void (*fence_reference)(struct virgl_winsys *vws,
+ struct pipe_fence_handle **dst,
+ struct pipe_fence_handle *src);
+
+ /* for sw paths */
+ void (*flush_frontbuffer)(struct virgl_winsys *vws,
+ struct virgl_hw_res *res,
+ unsigned level, unsigned layer,
+ void *winsys_drawable_handle,
+ struct pipe_box *sub_box);
+};
+
+
+#endif