diff options
author | Jason Ekstrand <[email protected]> | 2015-11-03 15:45:04 -0800 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2015-11-03 15:45:04 -0800 |
commit | b00e3f221b3f6dd0e87697c53331fd033b6e8676 (patch) | |
tree | a59dfeca8fd404c65da59a663e0abda301e893a2 /src/gallium/drivers | |
parent | a1e7b8701a4687f29b013364a852aa773c80f960 (diff) | |
parent | 5d4b019d2a6d4deb4db11780618515cf1fa8a4fc (diff) |
Merge remote-tracking branch 'mesa-public/master' into vulkan
Diffstat (limited to 'src/gallium/drivers')
164 files changed, 8171 insertions, 1808 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c index a9498835011..3906c9b996e 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c @@ -81,7 +81,7 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */ info->restart_index : 0xffffffff); - if (ctx->rasterizer && ctx->rasterizer->point_size_per_vertex && + if (ctx->rasterizer->point_size_per_vertex && (info->mode == PIPE_PRIM_POINTS)) primtype = DI_PT_POINTLIST_PSIZE; @@ -137,7 +137,7 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) .key = { /* do binning pass first: */ .binning_pass = true, - .color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false, + .color_two_side = ctx->rasterizer->light_twoside, // TODO set .half_precision based on render target format, // ie. float16 and smaller use half, float32 use full.. .half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF), @@ -149,9 +149,9 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) .fsaturate_t = fd3_ctx->fsaturate_t, .fsaturate_r = fd3_ctx->fsaturate_r, }, - .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade, - .sprite_coord_enable = ctx->rasterizer ? ctx->rasterizer->sprite_coord_enable : 0, - .sprite_coord_mode = ctx->rasterizer ? ctx->rasterizer->sprite_coord_mode : false, + .rasterflat = ctx->rasterizer->flatshade, + .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable, + .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode, }; unsigned dirty; diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 411f5b76329..8f9c8b0623c 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -627,7 +627,7 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, ctx->prog.dirty = 0; } - if ((dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) && ctx->blend) { + if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) { struct fd3_blend_stateobj *blend = fd3_blend_stateobj(ctx->blend); uint32_t i; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c index 025753c037e..7bd5163529a 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c @@ -118,12 +118,12 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) .key = { /* do binning pass first: */ .binning_pass = true, - .color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false, - .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade, + .color_two_side = ctx->rasterizer->light_twoside, + .rasterflat = ctx->rasterizer->flatshade, // TODO set .half_precision based on render target format, // ie. float16 and smaller use half, float32 use full.. .half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF), - .ucp_enables = ctx->rasterizer ? ctx->rasterizer->clip_plane_enable : 0, + .ucp_enables = ctx->rasterizer->clip_plane_enable, .has_per_samp = (fd4_ctx->fsaturate || fd4_ctx->vsaturate), .vsaturate_s = fd4_ctx->vsaturate_s, .vsaturate_t = fd4_ctx->vsaturate_t, @@ -132,9 +132,9 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) .fsaturate_t = fd4_ctx->fsaturate_t, .fsaturate_r = fd4_ctx->fsaturate_r, }, - .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade, - .sprite_coord_enable = ctx->rasterizer ? ctx->rasterizer->sprite_coord_enable : false, - .sprite_coord_mode = ctx->rasterizer ? ctx->rasterizer->sprite_coord_mode : false, + .rasterflat = ctx->rasterizer->flatshade, + .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable, + .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode, }; unsigned dirty; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index c7ed1d2e379..cf5dd7b0f17 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -594,7 +594,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, ctx->prog.dirty = 0; } - if ((dirty & FD_DIRTY_BLEND) && ctx->blend) { + if ((dirty & FD_DIRTY_BLEND)) { struct fd4_blend_stateobj *blend = fd4_blend_stateobj(ctx->blend); uint32_t i; diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 50d140fe903..9f8c33263fb 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -238,6 +238,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: return 0; case PIPE_CAP_MAX_VIEWPORTS: diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index 5812af626cb..2d2fd375656 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -252,6 +252,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: return 0; case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h index 5efe9da2d22..2e9470e66e9 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h @@ -202,14 +202,16 @@ static inline void gen6_3DSTATE_WM(struct ilo_builder *builder, const struct ilo_state_raster *rs, const struct ilo_state_ps *ps, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 9; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 6, 6); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2); dw[1] = kernel_offset; @@ -221,6 +223,11 @@ gen6_3DSTATE_WM(struct ilo_builder *builder, dw[6] = rs->wm[2] | ps->ps[4]; dw[7] = 0; /* kernel 1 */ dw[8] = 0; /* kernel 2 */ + + if (ilo_state_ps_get_scratch_size(ps)) { + ilo_builder_batch_reloc(builder, pos + 2, scratch_bo, + ps->ps[0], 0); + } } static inline void @@ -329,14 +336,16 @@ gen8_3DSTATE_WM_CHROMAKEY(struct ilo_builder *builder) static inline void gen7_3DSTATE_PS(struct ilo_builder *builder, const struct ilo_state_ps *ps, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 8; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 7, 7.5); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2); dw[1] = kernel_offset; @@ -347,19 +356,26 @@ gen7_3DSTATE_PS(struct ilo_builder *builder, dw[5] = ps->ps[5]; dw[6] = 0; /* kernel 1 */ dw[7] = 0; /* kernel 2 */ + + if (ilo_state_ps_get_scratch_size(ps)) { + ilo_builder_batch_reloc(builder, pos + 3, scratch_bo, + ps->ps[3], 0); + } } static inline void gen8_3DSTATE_PS(struct ilo_builder *builder, const struct ilo_state_ps *ps, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 12; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 8, 8); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2); dw[1] = kernel_offset; @@ -374,6 +390,11 @@ gen8_3DSTATE_PS(struct ilo_builder *builder, dw[9] = 0; dw[10] = 0; /* kernel 2 */ dw[11] = 0; + + if (ilo_state_ps_get_scratch_size(ps)) { + ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo, + ps->ps[1], 0); + } } static inline void diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h index 6e94fb25f1f..3a448719c15 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h @@ -477,14 +477,16 @@ gen8_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder, static inline void gen6_3DSTATE_VS(struct ilo_builder *builder, const struct ilo_state_vs *vs, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 6; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 6, 7.5); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2); dw[1] = kernel_offset; @@ -493,19 +495,26 @@ gen6_3DSTATE_VS(struct ilo_builder *builder, dw[3] = vs->vs[1]; dw[4] = vs->vs[2]; dw[5] = vs->vs[3]; + + if (ilo_state_vs_get_scratch_size(vs)) { + ilo_builder_batch_reloc(builder, pos + 3, scratch_bo, + vs->vs[1], 0); + } } static inline void gen8_3DSTATE_VS(struct ilo_builder *builder, const struct ilo_state_vs *vs, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 9; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 8, 8); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2); dw[1] = kernel_offset; @@ -517,19 +526,26 @@ gen8_3DSTATE_VS(struct ilo_builder *builder, dw[6] = vs->vs[2]; dw[7] = vs->vs[3]; dw[8] = vs->vs[4]; + + if (ilo_state_vs_get_scratch_size(vs)) { + ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo, + vs->vs[1], 0); + } } static inline void gen7_3DSTATE_HS(struct ilo_builder *builder, const struct ilo_state_hs *hs, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 7; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 7, 7.5); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2); /* see hs_set_gen7_3DSTATE_HS() */ @@ -539,19 +555,26 @@ gen7_3DSTATE_HS(struct ilo_builder *builder, dw[4] = hs->hs[2]; dw[5] = hs->hs[3]; dw[6] = 0; + + if (ilo_state_hs_get_scratch_size(hs)) { + ilo_builder_batch_reloc(builder, pos + 4, scratch_bo, + hs->hs[2], 0); + } } static inline void gen8_3DSTATE_HS(struct ilo_builder *builder, const struct ilo_state_hs *hs, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 9; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 8, 8); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2); /* see hs_set_gen7_3DSTATE_HS() */ @@ -563,6 +586,11 @@ gen8_3DSTATE_HS(struct ilo_builder *builder, dw[6] = 0; dw[7] = hs->hs[3]; dw[8] = 0; + + if (ilo_state_hs_get_scratch_size(hs)) { + ilo_builder_batch_reloc64(builder, pos + 5, scratch_bo, + hs->hs[2], 0); + } } static inline void @@ -586,14 +614,16 @@ gen7_3DSTATE_TE(struct ilo_builder *builder, static inline void gen7_3DSTATE_DS(struct ilo_builder *builder, const struct ilo_state_ds *ds, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 6; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 7, 7.5); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2); /* see ds_set_gen7_3DSTATE_DS() */ @@ -602,19 +632,26 @@ gen7_3DSTATE_DS(struct ilo_builder *builder, dw[3] = ds->ds[1]; dw[4] = ds->ds[2]; dw[5] = ds->ds[3]; + + if (ilo_state_ds_get_scratch_size(ds)) { + ilo_builder_batch_reloc(builder, pos + 3, scratch_bo, + ds->ds[1], 0); + } } static inline void gen8_3DSTATE_DS(struct ilo_builder *builder, const struct ilo_state_ds *ds, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 9; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 8, 8); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2); /* see ds_set_gen7_3DSTATE_DS() */ @@ -626,19 +663,26 @@ gen8_3DSTATE_DS(struct ilo_builder *builder, dw[6] = ds->ds[2]; dw[7] = ds->ds[3]; dw[8] = ds->ds[4]; + + if (ilo_state_ds_get_scratch_size(ds)) { + ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo, + ds->ds[1], 0); + } } static inline void gen6_3DSTATE_GS(struct ilo_builder *builder, const struct ilo_state_gs *gs, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 7; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 6, 6); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); dw[1] = kernel_offset; @@ -648,6 +692,11 @@ gen6_3DSTATE_GS(struct ilo_builder *builder, dw[4] = gs->gs[2]; dw[5] = gs->gs[3]; dw[6] = gs->gs[4]; + + if (ilo_state_gs_get_scratch_size(gs)) { + ilo_builder_batch_reloc(builder, pos + 3, scratch_bo, + gs->gs[1], 0); + } } static inline void @@ -677,14 +726,16 @@ gen6_3DSTATE_GS_SVB_INDEX(struct ilo_builder *builder, static inline void gen7_3DSTATE_GS(struct ilo_builder *builder, const struct ilo_state_gs *gs, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 7; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 7, 7.5); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); dw[1] = kernel_offset; @@ -694,19 +745,26 @@ gen7_3DSTATE_GS(struct ilo_builder *builder, dw[4] = gs->gs[2]; dw[5] = gs->gs[3]; dw[6] = 0; + + if (ilo_state_gs_get_scratch_size(gs)) { + ilo_builder_batch_reloc(builder, pos + 3, scratch_bo, + gs->gs[1], 0); + } } static inline void gen8_3DSTATE_GS(struct ilo_builder *builder, const struct ilo_state_gs *gs, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 10; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 8, 8); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); dw[1] = kernel_offset; @@ -719,6 +777,11 @@ gen8_3DSTATE_GS(struct ilo_builder *builder, dw[7] = gs->gs[3]; dw[8] = 0; dw[9] = gs->gs[4]; + + if (ilo_state_gs_get_scratch_size(gs)) { + ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo, + gs->gs[1], 0); + } } static inline void diff --git a/src/gallium/drivers/ilo/core/ilo_state_compute.c b/src/gallium/drivers/ilo/core/ilo_state_compute.c index a5fe5e1a6b0..ba3ff9001e1 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_compute.c +++ b/src/gallium/drivers/ilo/core/ilo_state_compute.c @@ -158,7 +158,8 @@ compute_interface_get_gen6_read_end(const struct ilo_dev *dev, */ assert(per_thread_read <= 63); - /* From the Haswell PRM, volume 2d, page 199: + /* + * From the Haswell PRM, volume 2d, page 199: * * "(Cross-Thread Constant Data Read Length) [0,127]" */ @@ -210,38 +211,68 @@ compute_validate_gen6(const struct ilo_dev *dev, return true; } -static uint8_t -compute_get_gen6_scratch_space(const struct ilo_dev *dev, - const struct ilo_state_compute_info *info) +static uint32_t +compute_get_gen6_per_thread_scratch_size(const struct ilo_dev *dev, + const struct ilo_state_compute_info *info, + uint8_t *per_thread_space) { - uint32_t scratch_size = 0; - uint8_t i; + ILO_DEV_ASSERT(dev, 6, 7); - ILO_DEV_ASSERT(dev, 6, 8); + /* + * From the Sandy Bridge PRM, volume 2 part 2, page 30: + * + * "(Per Thread Scratch Space) + * Range = [0,11] indicating [1k bytes, 12k bytes] [DevSNB]" + */ + assert(info->per_thread_scratch_size <= 12 * 1024); - for (i = 0; i < info->interface_count; i++) { - if (scratch_size < info->interfaces[i].scratch_size) - scratch_size = info->interfaces[i].scratch_size; + if (!info->per_thread_scratch_size) { + *per_thread_space = 0; + return 0; } - if (ilo_dev_gen(dev) >= ILO_GEN(8)) { - assert(scratch_size <= 2 * 1024 * 1024); + *per_thread_space = (info->per_thread_scratch_size > 1024) ? + (info->per_thread_scratch_size - 1) / 1024 : 0; + + return 1024 * (1 + *per_thread_space); +} - /* next power of two, starting from 1KB */ - return (scratch_size > 1024) ? - (util_last_bit(scratch_size - 1) - 10): 0; - } else if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) { - assert(scratch_size <= 2 * 1024 * 1024); +static uint32_t +compute_get_gen75_per_thread_scratch_size(const struct ilo_dev *dev, + const struct ilo_state_compute_info *info, + uint8_t *per_thread_space) +{ + ILO_DEV_ASSERT(dev, 7.5, 8); - /* next power of two, starting from 2KB */ - return (scratch_size > 2048) ? - (util_last_bit(scratch_size - 1) - 11): 0; - } else { - assert(scratch_size <= 12 * 1024); + /* + * From the Haswell PRM, volume 2b, page 407: + * + * "(Per Thread Scratch Space) + * [0,10] Indicating [2k bytes, 2 Mbytes]" + * + * "Note: The scratch space should be declared as 2x the desired + * scratch space. The stack will start at the half-way point instead + * of the end. The upper half of scratch space will not be accessed + * and so does not have to be allocated in memory." + * + * From the Broadwell PRM, volume 2a, page 450: + * + * "(Per Thread Scratch Space) + * [0,11] indicating [1k bytes, 2 Mbytes]" + */ + assert(info->per_thread_scratch_size <= + ((ilo_dev_gen(dev) >= ILO_GEN(8)) ? 2 : 1) * 1024 * 1024); - return (scratch_size > 1024) ? - (scratch_size - 1) / 1024 : 0; + if (!info->per_thread_scratch_size) { + *per_thread_space = 0; + return 0; } + + /* next power of two, starting from 1KB */ + *per_thread_space = (info->per_thread_scratch_size > 1024) ? + (util_last_bit(info->per_thread_scratch_size - 1) - 10) : 0; + + return 1 << (10 + *per_thread_space); } static bool @@ -250,7 +281,8 @@ compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute, const struct ilo_state_compute_info *info) { struct compute_urb_configuration urb; - uint8_t scratch_space; + uint32_t per_thread_size; + uint8_t per_thread_space; uint32_t dw1, dw2, dw4; @@ -260,9 +292,16 @@ compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute, !compute_validate_gen6(dev, info, &urb)) return false; - scratch_space = compute_get_gen6_scratch_space(dev, info); + if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) { + per_thread_size = compute_get_gen75_per_thread_scratch_size(dev, + info, &per_thread_space); + } else { + per_thread_size = compute_get_gen6_per_thread_scratch_size(dev, + info, &per_thread_space); + } + + dw1 = per_thread_space << GEN6_VFE_DW1_SCRATCH_SPACE_PER_THREAD__SHIFT; - dw1 = scratch_space << GEN6_VFE_DW1_SCRATCH_SPACE_PER_THREAD__SHIFT; dw2 = (dev->thread_count - 1) << GEN6_VFE_DW2_MAX_THREADS__SHIFT | urb.urb_entry_count << GEN6_VFE_DW2_URB_ENTRY_COUNT__SHIFT | GEN6_VFE_DW2_RESET_GATEWAY_TIMER | @@ -281,6 +320,8 @@ compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute, compute->vfe[1] = dw2; compute->vfe[2] = dw4; + compute->scratch_size = per_thread_size * dev->thread_count; + return true; } diff --git a/src/gallium/drivers/ilo/core/ilo_state_compute.h b/src/gallium/drivers/ilo/core/ilo_state_compute.h index 346f7b617f4..bd56bba4369 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_compute.h +++ b/src/gallium/drivers/ilo/core/ilo_state_compute.h @@ -45,8 +45,6 @@ struct ilo_state_compute_interface_info { /* usually 0 unless there are multiple interfaces */ uint32_t kernel_offset; - uint32_t scratch_size; - uint8_t sampler_count; uint8_t surface_count; @@ -65,6 +63,8 @@ struct ilo_state_compute_info { const struct ilo_state_compute_interface_info *interfaces; uint8_t interface_count; + uint32_t per_thread_scratch_size; + uint32_t cv_urb_alloc_size; uint32_t curbe_alloc_size; }; @@ -74,6 +74,8 @@ struct ilo_state_compute { uint32_t (*idrt)[6]; uint8_t idrt_count; + + uint32_t scratch_size; }; static inline size_t @@ -89,4 +91,10 @@ ilo_state_compute_init(struct ilo_state_compute *compute, const struct ilo_dev *dev, const struct ilo_state_compute_info *info); +static inline uint32_t +ilo_state_compute_get_scratch_size(const struct ilo_state_compute *compute) +{ + return compute->scratch_size; +} + #endif /* ILO_STATE_COMPUTE_H */ diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader.c b/src/gallium/drivers/ilo/core/ilo_state_shader.c index f67326c7f10..aec4fd6d8a6 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_shader.c +++ b/src/gallium/drivers/ilo/core/ilo_state_shader.c @@ -37,7 +37,9 @@ enum vertex_stage { struct vertex_ff { uint8_t grf_start; - uint8_t scratch_space; + + uint8_t per_thread_scratch_space; + uint32_t per_thread_scratch_size; uint8_t sampler_count; uint8_t surface_count; @@ -59,13 +61,6 @@ vertex_validate_gen6_kernel(const struct ilo_dev *dev, * others. */ const uint8_t max_grf_start = (stage == STAGE_GS) ? 16 : 32; - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 134: - * - * "(Per-Thread Scratch Space) - * Range [0,11] indicating [1K Bytes, 2M Bytes]" - */ - const uint32_t max_scratch_size = 2 * 1024 * 1024; ILO_DEV_ASSERT(dev, 6, 8); @@ -73,7 +68,6 @@ vertex_validate_gen6_kernel(const struct ilo_dev *dev, assert(!kernel->offset); assert(kernel->grf_start < max_grf_start); - assert(kernel->scratch_size <= max_scratch_size); return true; } @@ -112,18 +106,33 @@ vertex_get_gen6_ff(const struct ilo_dev *dev, const struct ilo_state_shader_kernel_info *kernel, const struct ilo_state_shader_resource_info *resource, const struct ilo_state_shader_urb_info *urb, + uint32_t per_thread_scratch_size, struct vertex_ff *ff) { ILO_DEV_ASSERT(dev, 6, 8); + memset(ff, 0, sizeof(*ff)); + if (!vertex_validate_gen6_kernel(dev, stage, kernel) || !vertex_validate_gen6_urb(dev, stage, urb)) return false; ff->grf_start = kernel->grf_start; - /* next power of two, starting from 1KB */ - ff->scratch_space = (kernel->scratch_size > 1024) ? - (util_last_bit(kernel->scratch_size - 1) - 10): 0; + + if (per_thread_scratch_size) { + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 134: + * + * "(Per-Thread Scratch Space) + * Range [0,11] indicating [1K Bytes, 2M Bytes]" + */ + assert(per_thread_scratch_size <= 2 * 1024 * 1024); + + /* next power of two, starting from 1KB */ + ff->per_thread_scratch_space = (per_thread_scratch_size > 1024) ? + (util_last_bit(per_thread_scratch_size - 1) - 10) : 0; + ff->per_thread_scratch_size = 1 << (10 + ff->per_thread_scratch_space); + } ff->sampler_count = (resource->sampler_count <= 12) ? (resource->sampler_count + 3) / 4 : 4; @@ -192,8 +201,8 @@ vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs, ILO_DEV_ASSERT(dev, 6, 8); - if (!vertex_get_gen6_ff(dev, STAGE_VS, &info->kernel, - &info->resource, &info->urb, &ff)) + if (!vertex_get_gen6_ff(dev, STAGE_VS, &info->kernel, &info->resource, + &info->urb, info->per_thread_scratch_size, &ff)) return false; thread_count = vs_get_gen6_thread_count(dev, info); @@ -207,7 +216,8 @@ vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs, if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav) dw2 |= GEN75_THREADDISP_ACCESS_UAV; - dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + dw3 = ff.per_thread_scratch_space << + GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; dw4 = ff.grf_start << GEN6_VS_DW4_URB_GRF_START__SHIFT | ff.vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT | @@ -234,6 +244,8 @@ vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs, if (ilo_dev_gen(dev) >= ILO_GEN(8)) vs->vs[4] = ff.user_clip_enables << GEN8_VS_DW8_UCP_CLIP_ENABLES__SHIFT; + vs->scratch_size = ff.per_thread_scratch_size * thread_count; + return true; } @@ -273,8 +285,8 @@ hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs, ILO_DEV_ASSERT(dev, 7, 8); - if (!vertex_get_gen6_ff(dev, STAGE_HS, &info->kernel, - &info->resource, &info->urb, &ff)) + if (!vertex_get_gen6_ff(dev, STAGE_HS, &info->kernel, &info->resource, + &info->urb, info->per_thread_scratch_size, &ff)) return false; thread_count = hs_get_gen7_thread_count(dev, info); @@ -282,19 +294,22 @@ hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs, dw1 = ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT | ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT; - if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) + dw2 = 0 << GEN7_HS_DW2_INSTANCE_COUNT__SHIFT; + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) + dw2 |= thread_count << GEN8_HS_DW2_MAX_THREADS__SHIFT; + else if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) dw1 |= thread_count << GEN75_HS_DW1_DISPATCH_MAX_THREADS__SHIFT; else dw1 |= thread_count << GEN7_HS_DW1_DISPATCH_MAX_THREADS__SHIFT; - dw2 = 0 << GEN7_HS_DW2_INSTANCE_COUNT__SHIFT; - if (info->dispatch_enable) dw2 |= GEN7_HS_DW2_HS_ENABLE; if (info->stats_enable) dw2 |= GEN7_HS_DW2_STATISTICS; - dw4 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + dw4 = ff.per_thread_scratch_space << + GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; dw5 = GEN7_HS_DW5_INCLUDE_VERTEX_HANDLES | ff.grf_start << GEN7_HS_DW5_URB_GRF_START__SHIFT | @@ -310,6 +325,8 @@ hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs, hs->hs[2] = dw4; hs->hs[3] = dw5; + hs->scratch_size = ff.per_thread_scratch_size * thread_count; + return true; } @@ -373,8 +390,8 @@ ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds, ILO_DEV_ASSERT(dev, 7, 8); - if (!vertex_get_gen6_ff(dev, STAGE_DS, &info->kernel, - &info->resource, &info->urb, &ff)) + if (!vertex_get_gen6_ff(dev, STAGE_DS, &info->kernel, &info->resource, + &info->urb, info->per_thread_scratch_size, &ff)) return false; thread_count = ds_get_gen7_thread_count(dev, info); @@ -385,7 +402,8 @@ ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds, if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav) dw2 |= GEN75_THREADDISP_ACCESS_UAV; - dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + dw3 = ff.per_thread_scratch_space << + GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; dw4 = ff.grf_start << GEN7_DS_DW4_URB_GRF_START__SHIFT | ff.vue_read_len << GEN7_DS_DW4_URB_READ_LEN__SHIFT | @@ -412,6 +430,8 @@ ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds, if (ilo_dev_gen(dev) >= ILO_GEN(8)) ds->ds[4] = ff.user_clip_enables << GEN8_DS_DW8_UCP_CLIP_ENABLES__SHIFT; + ds->scratch_size = ff.per_thread_scratch_size * thread_count; + return true; } @@ -425,8 +445,8 @@ gs_get_gen6_ff(const struct ilo_dev *dev, ILO_DEV_ASSERT(dev, 6, 8); - if (!vertex_get_gen6_ff(dev, STAGE_GS, &info->kernel, - &info->resource, &info->urb, ff)) + if (!vertex_get_gen6_ff(dev, STAGE_GS, &info->kernel, &info->resource, + &info->urb, info->per_thread_scratch_size, ff)) return false; /* @@ -510,7 +530,8 @@ gs_set_gen6_3DSTATE_GS(struct ilo_state_gs *gs, ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT | ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT; - dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + dw3 = ff.per_thread_scratch_space << + GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; dw4 = ff.vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT | ff.vue_read_offset << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT | @@ -550,6 +571,8 @@ gs_set_gen6_3DSTATE_GS(struct ilo_state_gs *gs, gs->gs[3] = dw5; gs->gs[4] = dw6; + gs->scratch_size = ff.per_thread_scratch_size * thread_count; + return true; } @@ -588,7 +611,8 @@ gs_set_gen7_3DSTATE_GS(struct ilo_state_gs *gs, if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav) dw2 |= GEN75_THREADDISP_ACCESS_UAV; - dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + dw3 = ff.per_thread_scratch_space << + GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; dw4 = vertex_size << GEN7_GS_DW4_OUTPUT_SIZE__SHIFT | 0 << GEN7_GS_DW4_OUTPUT_TOPO__SHIFT | @@ -618,6 +642,8 @@ gs_set_gen7_3DSTATE_GS(struct ilo_state_gs *gs, if (ilo_dev_gen(dev) >= ILO_GEN(8)) gs->gs[4] = ff.user_clip_enables << GEN8_GS_DW9_UCP_CLIP_ENABLES__SHIFT; + gs->scratch_size = ff.per_thread_scratch_size * thread_count; + return true; } diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader.h b/src/gallium/drivers/ilo/core/ilo_state_shader.h index 44690c5b0bb..35651090d66 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_shader.h +++ b/src/gallium/drivers/ilo/core/ilo_state_shader.h @@ -42,8 +42,6 @@ struct ilo_state_shader_kernel_info { uint8_t grf_start; uint8_t pcb_attr_count; - - uint32_t scratch_size; }; /** @@ -77,6 +75,7 @@ struct ilo_state_vs_info { struct ilo_state_shader_resource_info resource; struct ilo_state_shader_urb_info urb; + uint32_t per_thread_scratch_size; bool dispatch_enable; bool stats_enable; }; @@ -86,6 +85,7 @@ struct ilo_state_hs_info { struct ilo_state_shader_resource_info resource; struct ilo_state_shader_urb_info urb; + uint32_t per_thread_scratch_size; bool dispatch_enable; bool stats_enable; }; @@ -95,6 +95,7 @@ struct ilo_state_ds_info { struct ilo_state_shader_resource_info resource; struct ilo_state_shader_urb_info urb; + uint32_t per_thread_scratch_size; bool dispatch_enable; bool stats_enable; }; @@ -119,6 +120,7 @@ struct ilo_state_gs_info { struct ilo_state_gs_sol_info sol; + uint32_t per_thread_scratch_size; bool dispatch_enable; bool stats_enable; }; @@ -158,6 +160,8 @@ struct ilo_state_ps_info { struct ilo_state_ps_io_info io; struct ilo_state_ps_params_info params; + uint32_t per_thread_scratch_size; + /* bitmask of GEN6_PS_DISPATCH_x */ uint8_t valid_kernels; bool per_sample_dispatch; @@ -173,23 +177,28 @@ struct ilo_state_ps_info { struct ilo_state_vs { uint32_t vs[5]; + uint32_t scratch_size; }; struct ilo_state_hs { uint32_t hs[4]; + uint32_t scratch_size; }; struct ilo_state_ds { uint32_t te[3]; uint32_t ds[5]; + uint32_t scratch_size; }; struct ilo_state_gs { uint32_t gs[5]; + uint32_t scratch_size; }; struct ilo_state_ps { uint32_t ps[8]; + uint32_t scratch_size; struct ilo_state_ps_dispatch_conds { bool ps_valid; @@ -211,6 +220,12 @@ bool ilo_state_vs_init_disabled(struct ilo_state_vs *vs, const struct ilo_dev *dev); +static inline uint32_t +ilo_state_vs_get_scratch_size(const struct ilo_state_vs *vs) +{ + return vs->scratch_size; +} + bool ilo_state_hs_init(struct ilo_state_hs *hs, const struct ilo_dev *dev, @@ -221,6 +236,12 @@ ilo_state_hs_init_disabled(struct ilo_state_hs *hs, const struct ilo_dev *dev); +static inline uint32_t +ilo_state_hs_get_scratch_size(const struct ilo_state_hs *hs) +{ + return hs->scratch_size; +} + bool ilo_state_ds_init(struct ilo_state_ds *ds, const struct ilo_dev *dev, @@ -230,6 +251,12 @@ bool ilo_state_ds_init_disabled(struct ilo_state_ds *ds, const struct ilo_dev *dev); +static inline uint32_t +ilo_state_ds_get_scratch_size(const struct ilo_state_ds *ds) +{ + return ds->scratch_size; +} + bool ilo_state_gs_init(struct ilo_state_gs *gs, const struct ilo_dev *dev, @@ -239,6 +266,12 @@ bool ilo_state_gs_init_disabled(struct ilo_state_gs *gs, const struct ilo_dev *dev); +static inline uint32_t +ilo_state_gs_get_scratch_size(const struct ilo_state_gs *gs) +{ + return gs->scratch_size; +} + bool ilo_state_ps_init(struct ilo_state_ps *ps, const struct ilo_dev *dev, @@ -253,4 +286,10 @@ ilo_state_ps_set_params(struct ilo_state_ps *ps, const struct ilo_dev *dev, const struct ilo_state_ps_params_info *params); +static inline uint32_t +ilo_state_ps_get_scratch_size(const struct ilo_state_ps *ps) +{ + return ps->scratch_size; +} + #endif /* ILO_STATE_SHADER_H */ diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c b/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c index ceeb68a460e..5c3ca1ebe37 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c +++ b/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c @@ -34,7 +34,8 @@ struct pixel_ff { uint32_t kernel_offsets[3]; uint8_t grf_starts[3]; bool pcb_enable; - uint8_t scratch_space; + uint8_t per_thread_scratch_space; + uint32_t per_thread_scratch_size; uint8_t sampler_count; uint8_t surface_count; @@ -56,13 +57,6 @@ ps_kernel_validate_gen6(const struct ilo_dev *dev, { /* "Dispatch GRF Start Register for Constant/Setup Data" is U7 */ const uint8_t max_grf_start = 128; - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 271: - * - * "(Per-Thread Scratch Space) - * Range [0,11] indicating [1k bytes, 2M bytes] in powers of two" - */ - const uint32_t max_scratch_size = 2 * 1024 * 1024; ILO_DEV_ASSERT(dev, 6, 8); @@ -70,7 +64,6 @@ ps_kernel_validate_gen6(const struct ilo_dev *dev, assert(kernel->offset % 64 == 0); assert(kernel->grf_start < max_grf_start); - assert(kernel->scratch_size <= max_scratch_size); return true; } @@ -325,7 +318,6 @@ ps_get_gen6_ff_kernels(const struct ilo_dev *dev, const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8; const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16; const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32; - uint32_t scratch_size; ILO_DEV_ASSERT(dev, 6, 8); @@ -363,21 +355,6 @@ ps_get_gen6_ff_kernels(const struct ilo_dev *dev, ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) && kernel_32->pcb_attr_count)); - scratch_size = 0; - if ((ff->dispatch_modes & GEN6_PS_DISPATCH_8) && - scratch_size < kernel_8->scratch_size) - scratch_size = kernel_8->scratch_size; - if ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) && - scratch_size < kernel_16->scratch_size) - scratch_size = kernel_16->scratch_size; - if ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) && - scratch_size < kernel_32->scratch_size) - scratch_size = kernel_32->scratch_size; - - /* next power of two, starting from 1KB */ - ff->scratch_space = (scratch_size > 1024) ? - (util_last_bit(scratch_size - 1) - 10): 0; - /* GPU hangs on Haswell if none of the dispatch mode bits is set */ if (ilo_dev_gen(dev) == ILO_GEN(7.5) && !ff->dispatch_modes) ff->dispatch_modes |= GEN6_PS_DISPATCH_8; @@ -401,6 +378,21 @@ ps_get_gen6_ff(const struct ilo_dev *dev, if (!ps_validate_gen6(dev, info) || !ps_get_gen6_ff_kernels(dev, info, ff)) return false; + if (info->per_thread_scratch_size) { + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 271: + * + * "(Per-Thread Scratch Space) + * Range [0,11] indicating [1k bytes, 2M bytes] in powers of two" + */ + assert(info->per_thread_scratch_size <= 2 * 1024 * 1024); + + /* next power of two, starting from 1KB */ + ff->per_thread_scratch_space = (info->per_thread_scratch_size > 1024) ? + (util_last_bit(info->per_thread_scratch_size - 1) - 10) : 0; + ff->per_thread_scratch_size = 1 << (10 + ff->per_thread_scratch_space); + } + ff->sampler_count = (resource->sampler_count <= 12) ? (resource->sampler_count + 3) / 4 : 4; ff->surface_count = resource->surface_count; @@ -441,7 +433,8 @@ ps_set_gen6_3dstate_wm(struct ilo_state_ps *ps, if (false) dw2 |= GEN6_THREADDISP_FP_MODE_ALT; - dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + dw3 = ff->per_thread_scratch_space << + GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; dw4 = ff->grf_starts[0] << GEN6_WM_DW4_URB_GRF_START0__SHIFT | ff->grf_starts[1] << GEN6_WM_DW4_URB_GRF_START1__SHIFT | @@ -539,7 +532,8 @@ ps_set_gen7_3DSTATE_PS(struct ilo_state_ps *ps, if (false) dw2 |= GEN6_THREADDISP_FP_MODE_ALT; - dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + dw3 = ff->per_thread_scratch_space << + GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; dw4 = io->posoffset << GEN7_PS_DW4_POSOFFSET__SHIFT | ff->dispatch_modes << GEN7_PS_DW4_DISPATCH_MODE__SHIFT; @@ -603,7 +597,8 @@ ps_set_gen8_3DSTATE_PS(struct ilo_state_ps *ps, if (false) dw3 |= GEN6_THREADDISP_FP_MODE_ALT; - dw4 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + dw4 = ff->per_thread_scratch_space << + GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; dw6 = ff->thread_count << GEN8_PS_DW6_MAX_THREADS__SHIFT | io->posoffset << GEN8_PS_DW6_POSOFFSET__SHIFT | @@ -705,6 +700,7 @@ ilo_state_ps_init(struct ilo_state_ps *ps, ret &= ps_set_gen6_3dstate_wm(ps, dev, info, &ff); } + ps->scratch_size = ff.per_thread_scratch_size * ff.thread_count; /* save conditions */ ps->conds = ff.conds; diff --git a/src/gallium/drivers/ilo/ilo_blit.h b/src/gallium/drivers/ilo/ilo_blit.h index da0bfe9c4c9..bad4dab8404 100644 --- a/src/gallium/drivers/ilo/ilo_blit.h +++ b/src/gallium/drivers/ilo/ilo_blit.h @@ -58,10 +58,12 @@ ilo_blit_resolve_slices(struct ilo_context *ilo, * As it is only used to resolve HiZ right now, return early when there is * no HiZ. */ - if (!ilo_image_can_enable_aux(&tex->image, level)) + if (tex->image.aux.type != ILO_IMAGE_AUX_HIZ || + !ilo_image_can_enable_aux(&tex->image, level)) return; - if (ilo_image_can_enable_aux(&tex->image, level)) { + if (tex->image.aux.type == ILO_IMAGE_AUX_HIZ && + ilo_image_can_enable_aux(&tex->image, level)) { ilo_blit_resolve_slices_for_hiz(ilo, res, level, first_slice, num_slices, resolve_flags); } diff --git a/src/gallium/drivers/ilo/ilo_draw.c b/src/gallium/drivers/ilo/ilo_draw.c index 433348d9326..69f36ae5df6 100644 --- a/src/gallium/drivers/ilo/ilo_draw.c +++ b/src/gallium/drivers/ilo/ilo_draw.c @@ -547,6 +547,7 @@ static void ilo_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { struct ilo_context *ilo = ilo_context(pipe); + int vs_scratch_size, gs_scratch_size, fs_scratch_size; if (ilo_debug & ILO_DEBUG_DRAW) { if (info->indexed) { @@ -574,8 +575,15 @@ ilo_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) ilo_finalize_3d_states(ilo, info); + /* upload kernels */ ilo_shader_cache_upload(ilo->shader_cache, &ilo->cp->builder); + /* prepare scratch spaces */ + ilo_shader_cache_get_max_scratch_sizes(ilo->shader_cache, + &vs_scratch_size, &gs_scratch_size, &fs_scratch_size); + ilo_render_prepare_scratch_spaces(ilo->render, + vs_scratch_size, gs_scratch_size, fs_scratch_size); + ilo_blit_resolve_framebuffer(ilo); /* If draw_vbo ever fails, return immediately. */ diff --git a/src/gallium/drivers/ilo/ilo_render.c b/src/gallium/drivers/ilo/ilo_render.c index 21f75de11a0..8bc04df4fab 100644 --- a/src/gallium/drivers/ilo/ilo_render.c +++ b/src/gallium/drivers/ilo/ilo_render.c @@ -67,10 +67,49 @@ ilo_render_create(struct ilo_builder *builder) void ilo_render_destroy(struct ilo_render *render) { + intel_bo_unref(render->vs_scratch.bo); + intel_bo_unref(render->gs_scratch.bo); + intel_bo_unref(render->fs_scratch.bo); + intel_bo_unref(render->workaround_bo); FREE(render); } +static bool +resize_scratch_space(struct ilo_render *render, + struct ilo_render_scratch_space *scratch, + const char *name, int new_size) +{ + struct intel_bo *bo; + + if (scratch->size >= new_size) + return true; + + bo = intel_winsys_alloc_bo(render->builder->winsys, name, new_size, false); + if (!bo) + return false; + + intel_bo_unref(scratch->bo); + scratch->bo = bo; + scratch->size = new_size; + + return true; +} + +bool +ilo_render_prepare_scratch_spaces(struct ilo_render *render, + int vs_scratch_size, + int gs_scratch_size, + int fs_scratch_size) +{ + return (resize_scratch_space(render, &render->vs_scratch, + "vs scratch", vs_scratch_size) && + resize_scratch_space(render, &render->gs_scratch, + "gs scratch", gs_scratch_size) && + resize_scratch_space(render, &render->fs_scratch, + "fs scratch", fs_scratch_size)); +} + void ilo_render_get_sample_position(const struct ilo_render *render, unsigned sample_count, diff --git a/src/gallium/drivers/ilo/ilo_render.h b/src/gallium/drivers/ilo/ilo_render.h index 098af73ec9b..31fd1e6f859 100644 --- a/src/gallium/drivers/ilo/ilo_render.h +++ b/src/gallium/drivers/ilo/ilo_render.h @@ -43,6 +43,12 @@ ilo_render_create(struct ilo_builder *builder); void ilo_render_destroy(struct ilo_render *render); +bool +ilo_render_prepare_scratch_spaces(struct ilo_render *render, + int vs_scratch_size, + int gs_scratch_size, + int fs_scratch_size); + void ilo_render_get_sample_position(const struct ilo_render *render, unsigned sample_count, diff --git a/src/gallium/drivers/ilo/ilo_render_gen.h b/src/gallium/drivers/ilo/ilo_render_gen.h index 6b133750043..f227d6bf4da 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen.h +++ b/src/gallium/drivers/ilo/ilo_render_gen.h @@ -51,6 +51,11 @@ struct ilo_render { struct intel_bo *workaround_bo; + struct ilo_render_scratch_space { + struct intel_bo *bo; + int size; + } vs_scratch, gs_scratch, fs_scratch; + struct ilo_state_sample_pattern sample_pattern; bool hw_ctx_changed; diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c index c1f759f3043..910e6c0fb7a 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen6.c +++ b/src/gallium/drivers/ilo/ilo_render_gen6.c @@ -475,10 +475,13 @@ gen6_draw_vs(struct ilo_render *r, gen6_wa_pre_3dstate_vs_toggle(r); if (ilo_dev_gen(r->dev) == ILO_GEN(6) && - ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)) - gen6_3DSTATE_VS(r->builder, &cso->vs_sol.vs, kernel_offset); - else - gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset); + ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)) { + gen6_3DSTATE_VS(r->builder, &cso->vs_sol.vs, + kernel_offset, r->vs_scratch.bo); + } else { + gen6_3DSTATE_VS(r->builder, &cso->vs, + kernel_offset, r->vs_scratch.bo); + } } } @@ -501,7 +504,8 @@ gen6_draw_gs(struct ilo_render *r, cso = ilo_shader_get_kernel_cso(vec->gs); kernel_offset = ilo_shader_get_kernel_offset(vec->gs); - gen6_3DSTATE_GS(r->builder, &cso->gs, kernel_offset); + gen6_3DSTATE_GS(r->builder, &cso->gs, + kernel_offset, r->gs_scratch.bo); } else if (ilo_dev_gen(r->dev) == ILO_GEN(6) && ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)) { const int verts_per_prim = @@ -524,9 +528,10 @@ gen6_draw_gs(struct ilo_render *r, kernel_offset = ilo_shader_get_kernel_offset(vec->vs) + ilo_shader_get_kernel_param(vec->vs, param); - gen6_3DSTATE_GS(r->builder, &cso->vs_sol.sol, kernel_offset); + gen6_3DSTATE_GS(r->builder, &cso->vs_sol.sol, + kernel_offset, r->gs_scratch.bo); } else { - gen6_3DSTATE_GS(r->builder, &vec->disabled_gs, 0); + gen6_3DSTATE_GS(r->builder, &vec->disabled_gs, 0, NULL); } } } @@ -672,7 +677,7 @@ gen6_draw_wm(struct ilo_render *r, gen6_wa_pre_3dstate_wm_max_threads(r); gen6_3DSTATE_WM(r->builder, &vec->rasterizer->rs, - &cso->ps, kernel_offset); + &cso->ps, kernel_offset, r->fs_scratch.bo); } } @@ -817,10 +822,10 @@ gen6_rectlist_vs_to_sf(struct ilo_render *r, gen6_wa_post_3dstate_constant_vs(r); gen6_wa_pre_3dstate_vs_toggle(r); - gen6_3DSTATE_VS(r->builder, &blitter->vs, 0); + gen6_3DSTATE_VS(r->builder, &blitter->vs, 0, NULL); gen6_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0); - gen6_3DSTATE_GS(r->builder, &blitter->gs, 0); + gen6_3DSTATE_GS(r->builder, &blitter->gs, 0, NULL); gen6_3DSTATE_CLIP(r->builder, &blitter->fb.rs); gen6_3DSTATE_SF(r->builder, &blitter->fb.rs, &blitter->sbe); @@ -833,7 +838,7 @@ gen6_rectlist_wm(struct ilo_render *r, gen6_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0); gen6_wa_pre_3dstate_wm_max_threads(r); - gen6_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps, 0); + gen6_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps, 0, NULL); } static void diff --git a/src/gallium/drivers/ilo/ilo_render_gen7.c b/src/gallium/drivers/ilo/ilo_render_gen7.c index 6623a8bcb43..330ba6c88d6 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen7.c +++ b/src/gallium/drivers/ilo/ilo_render_gen7.c @@ -318,10 +318,13 @@ gen7_draw_vs(struct ilo_render *r, const union ilo_shader_cso *cso = ilo_shader_get_kernel_cso(vec->vs); const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->vs); - if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) - gen8_3DSTATE_VS(r->builder, &cso->vs, kernel_offset); - else - gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset); + if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) { + gen8_3DSTATE_VS(r->builder, &cso->vs, + kernel_offset, r->vs_scratch.bo); + } else { + gen6_3DSTATE_VS(r->builder, &cso->vs, + kernel_offset, r->vs_scratch.bo); + } } } @@ -338,9 +341,9 @@ gen7_draw_hs(struct ilo_render *r, gen7_3DSTATE_CONSTANT_HS(r->builder, 0, 0, 0); if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) - gen8_3DSTATE_HS(r->builder, hs, kernel_offset); + gen8_3DSTATE_HS(r->builder, hs, kernel_offset, NULL); else - gen7_3DSTATE_HS(r->builder, hs, kernel_offset); + gen7_3DSTATE_HS(r->builder, hs, kernel_offset, NULL); } /* 3DSTATE_BINDING_TABLE_POINTERS_HS */ @@ -373,9 +376,9 @@ gen7_draw_ds(struct ilo_render *r, gen7_3DSTATE_CONSTANT_DS(r->builder, 0, 0, 0); if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) - gen8_3DSTATE_DS(r->builder, ds, kernel_offset); + gen8_3DSTATE_DS(r->builder, ds, kernel_offset, NULL); else - gen7_3DSTATE_DS(r->builder, ds, kernel_offset); + gen7_3DSTATE_DS(r->builder, ds, kernel_offset, NULL); } /* 3DSTATE_BINDING_TABLE_POINTERS_DS */ @@ -397,9 +400,9 @@ gen7_draw_gs(struct ilo_render *r, gen7_3DSTATE_CONSTANT_GS(r->builder, 0, 0, 0); if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) - gen8_3DSTATE_GS(r->builder, gs, kernel_offset); + gen8_3DSTATE_GS(r->builder, gs, kernel_offset, NULL); else - gen7_3DSTATE_GS(r->builder, gs, kernel_offset); + gen7_3DSTATE_GS(r->builder, gs, kernel_offset, NULL); } /* 3DSTATE_BINDING_TABLE_POINTERS_GS */ @@ -534,7 +537,7 @@ gen7_draw_wm(struct ilo_render *r, if (r->hw_ctx_changed) gen7_wa_pre_3dstate_ps_max_threads(r); - gen7_3DSTATE_PS(r->builder, &cso->ps, kernel_offset); + gen7_3DSTATE_PS(r->builder, &cso->ps, kernel_offset, r->fs_scratch.bo); } /* 3DSTATE_SCISSOR_STATE_POINTERS */ @@ -678,18 +681,18 @@ gen7_rectlist_vs_to_sf(struct ilo_render *r, const struct ilo_blitter *blitter) { gen7_3DSTATE_CONSTANT_VS(r->builder, NULL, NULL, 0); - gen6_3DSTATE_VS(r->builder, &blitter->vs, 0); + gen6_3DSTATE_VS(r->builder, &blitter->vs, 0, NULL); gen7_3DSTATE_CONSTANT_HS(r->builder, NULL, NULL, 0); - gen7_3DSTATE_HS(r->builder, &blitter->hs, 0); + gen7_3DSTATE_HS(r->builder, &blitter->hs, 0, NULL); gen7_3DSTATE_TE(r->builder, &blitter->ds); gen7_3DSTATE_CONSTANT_DS(r->builder, NULL, NULL, 0); - gen7_3DSTATE_DS(r->builder, &blitter->ds, 0); + gen7_3DSTATE_DS(r->builder, &blitter->ds, 0, NULL); gen7_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0); - gen7_3DSTATE_GS(r->builder, &blitter->gs, 0); + gen7_3DSTATE_GS(r->builder, &blitter->gs, 0, NULL); gen7_3DSTATE_STREAMOUT(r->builder, &blitter->sol); @@ -711,7 +714,7 @@ gen7_rectlist_wm(struct ilo_render *r, gen7_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0); gen7_wa_pre_3dstate_ps_max_threads(r); - gen7_3DSTATE_PS(r->builder, &blitter->ps, 0); + gen7_3DSTATE_PS(r->builder, &blitter->ps, 0, NULL); } static void diff --git a/src/gallium/drivers/ilo/ilo_render_gen8.c b/src/gallium/drivers/ilo/ilo_render_gen8.c index 65494b4058a..efe0e0d501b 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen8.c +++ b/src/gallium/drivers/ilo/ilo_render_gen8.c @@ -125,7 +125,7 @@ gen8_draw_wm(struct ilo_render *r, /* 3DSTATE_PS */ if (DIRTY(FS) || r->instruction_bo_changed) - gen8_3DSTATE_PS(r->builder, &cso->ps, kernel_offset); + gen8_3DSTATE_PS(r->builder, &cso->ps, kernel_offset, r->fs_scratch.bo); /* 3DSTATE_PS_EXTRA */ if (DIRTY(FS)) diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c index e1a7dc56685..888f7aa6782 100644 --- a/src/gallium/drivers/ilo/ilo_screen.c +++ b/src/gallium/drivers/ilo/ilo_screen.c @@ -474,6 +474,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c index 73b625e9de4..c61716dc791 100644 --- a/src/gallium/drivers/ilo/ilo_shader.c +++ b/src/gallium/drivers/ilo/ilo_shader.c @@ -37,6 +37,10 @@ struct ilo_shader_cache { struct list_head shaders; struct list_head changed; + + int max_vs_scratch_size; + int max_gs_scratch_size; + int max_fs_scratch_size; }; /** @@ -121,6 +125,8 @@ ilo_shader_cache_upload(struct ilo_shader_cache *shc, struct ilo_shader *sh; LIST_FOR_EACH_ENTRY(sh, &shader->variants, list) { + int scratch_size, *cur_max; + if (sh->uploaded) continue; @@ -128,6 +134,29 @@ ilo_shader_cache_upload(struct ilo_shader_cache *shc, sh->kernel_size, sh->kernel); sh->uploaded = true; + + switch (shader->info.type) { + case PIPE_SHADER_VERTEX: + scratch_size = ilo_state_vs_get_scratch_size(&sh->cso.vs); + cur_max = &shc->max_vs_scratch_size; + break; + case PIPE_SHADER_GEOMETRY: + scratch_size = ilo_state_gs_get_scratch_size(&sh->cso.gs); + cur_max = &shc->max_gs_scratch_size; + break; + case PIPE_SHADER_FRAGMENT: + scratch_size = ilo_state_ps_get_scratch_size(&sh->cso.ps); + cur_max = &shc->max_fs_scratch_size; + break; + default: + assert(!"unknown shader type"); + scratch_size = 0; + cur_max = &shc->max_vs_scratch_size; + break; + } + + if (*cur_max < scratch_size) + *cur_max = scratch_size; } list_del(&shader->list); @@ -155,6 +184,21 @@ ilo_shader_cache_invalidate(struct ilo_shader_cache *shc) LIST_FOR_EACH_ENTRY(sh, &shader->variants, list) sh->uploaded = false; } + + shc->max_vs_scratch_size = 0; + shc->max_gs_scratch_size = 0; + shc->max_fs_scratch_size = 0; +} + +void +ilo_shader_cache_get_max_scratch_sizes(const struct ilo_shader_cache *shc, + int *vs_scratch_size, + int *gs_scratch_size, + int *fs_scratch_size) +{ + *vs_scratch_size = shc->max_vs_scratch_size; + *gs_scratch_size = shc->max_gs_scratch_size; + *fs_scratch_size = shc->max_fs_scratch_size; } /** @@ -578,7 +622,6 @@ init_shader_kernel(const struct ilo_shader *kernel, kern->grf_start = kernel->in.start_grf; kern->pcb_attr_count = (kernel->pcb.cbuf0_size + kernel->pcb.clip_state_size + 15) / 16; - kern->scratch_size = 0; } static void @@ -602,6 +645,7 @@ init_vs(struct ilo_shader *kernel, init_shader_urb(kernel, state, &info.urb); init_shader_kernel(kernel, state, &info.kernel); init_shader_resource(kernel, state, &info.resource); + info.per_thread_scratch_size = kernel->per_thread_scratch_size; info.dispatch_enable = true; info.stats_enable = true; @@ -640,6 +684,7 @@ init_gs(struct ilo_shader *kernel, init_shader_urb(kernel, state, &info.urb); init_shader_kernel(kernel, state, &info.kernel); init_shader_resource(kernel, state, &info.resource); + info.per_thread_scratch_size = kernel->per_thread_scratch_size; info.dispatch_enable = true; info.stats_enable = true; @@ -664,6 +709,7 @@ init_ps(struct ilo_shader *kernel, init_shader_kernel(kernel, state, &info.kernel_8); init_shader_resource(kernel, state, &info.resource); + info.per_thread_scratch_size = kernel->per_thread_scratch_size; info.io.has_rt_write = true; info.io.posoffset = GEN6_POSOFFSET_NONE; info.io.attr_count = kernel->in.count; diff --git a/src/gallium/drivers/ilo/ilo_shader.h b/src/gallium/drivers/ilo/ilo_shader.h index 01de54146b1..10dcf739430 100644 --- a/src/gallium/drivers/ilo/ilo_shader.h +++ b/src/gallium/drivers/ilo/ilo_shader.h @@ -120,6 +120,12 @@ ilo_shader_cache_upload(struct ilo_shader_cache *shc, void ilo_shader_cache_invalidate(struct ilo_shader_cache *shc); +void +ilo_shader_cache_get_max_scratch_sizes(const struct ilo_shader_cache *shc, + int *vs_scratch_size, + int *gs_scratch_size, + int *fs_scratch_size); + struct ilo_shader_state * ilo_shader_create_vs(const struct ilo_dev *dev, const struct pipe_shader_state *state, diff --git a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h index 01c86675202..1f0cda174e8 100644 --- a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h +++ b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h @@ -139,6 +139,7 @@ struct ilo_shader { void *kernel; int kernel_size; + int per_thread_scratch_size; struct ilo_kernel_routing routing; struct ilo_state_ps_params_info ps_params; diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index e2ed267da78..d1c50aefc84 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -299,6 +299,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: return 0; } /* should only get here on unhandled cases */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 4c8167a9e7d..1778b13f9dd 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -854,10 +854,10 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, jit_tex->img_stride[j] = lp_tex->img_stride[j]; } - if (view->target == PIPE_TEXTURE_1D_ARRAY || - view->target == PIPE_TEXTURE_2D_ARRAY || - view->target == PIPE_TEXTURE_CUBE || - view->target == PIPE_TEXTURE_CUBE_ARRAY) { + if (res->target == PIPE_TEXTURE_1D_ARRAY || + res->target == PIPE_TEXTURE_2D_ARRAY || + res->target == PIPE_TEXTURE_CUBE || + res->target == PIPE_TEXTURE_CUBE_ARRAY) { /* * For array textures, we don't have first_layer, instead * adjust last_layer (stored as depth) plus the mip level offsets diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index b205f02fdba..1e055878f7c 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -275,10 +275,10 @@ prepare_shader_sampling( row_stride[j] = lp_tex->row_stride[j]; img_stride[j] = lp_tex->img_stride[j]; } - if (view->target == PIPE_TEXTURE_1D_ARRAY || - view->target == PIPE_TEXTURE_2D_ARRAY || - view->target == PIPE_TEXTURE_CUBE || - view->target == PIPE_TEXTURE_CUBE_ARRAY) { + if (tex->target == PIPE_TEXTURE_1D_ARRAY || + tex->target == PIPE_TEXTURE_2D_ARRAY || + tex->target == PIPE_TEXTURE_CUBE || + tex->target == PIPE_TEXTURE_CUBE_ARRAY) { num_layers = view->u.tex.last_layer - view->u.tex.first_layer + 1; for (j = first_level; j <= last_level; j++) { mip_offsets[j] += view->u.tex.first_layer * diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index af46342fdf2..7862ac8f217 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -200,7 +200,8 @@ llvmpipe_can_create_resource(struct pipe_screen *screen, static boolean llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen, - struct llvmpipe_resource *lpr) + struct llvmpipe_resource *lpr, + const void *map_front_private) { struct sw_winsys *winsys = screen->winsys; @@ -215,12 +216,13 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen, lpr->base.format, width, height, 64, + map_front_private, &lpr->row_stride[0] ); if (lpr->dt == NULL) return FALSE; - { + if (!map_front_private) { void *map = winsys->displaytarget_map(winsys, lpr->dt, PIPE_TRANSFER_WRITE); @@ -235,8 +237,9 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen, static struct pipe_resource * -llvmpipe_resource_create(struct pipe_screen *_screen, - const struct pipe_resource *templat) +llvmpipe_resource_create_front(struct pipe_screen *_screen, + const struct pipe_resource *templat, + const void *map_front_private) { struct llvmpipe_screen *screen = llvmpipe_screen(_screen); struct llvmpipe_resource *lpr = CALLOC_STRUCT(llvmpipe_resource); @@ -254,7 +257,7 @@ llvmpipe_resource_create(struct pipe_screen *_screen, PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)) { /* displayable surface */ - if (!llvmpipe_displaytarget_layout(screen, lpr)) + if (!llvmpipe_displaytarget_layout(screen, lpr, map_front_private)) goto fail; } else { @@ -300,7 +303,12 @@ llvmpipe_resource_create(struct pipe_screen *_screen, FREE(lpr); return NULL; } - +static struct pipe_resource * +llvmpipe_resource_create(struct pipe_screen *_screen, + const struct pipe_resource *templat) +{ + return llvmpipe_resource_create_front(_screen, templat, NULL); +} static void llvmpipe_resource_destroy(struct pipe_screen *pscreen, @@ -797,6 +805,7 @@ llvmpipe_init_screen_resource_funcs(struct pipe_screen *screen) #endif screen->resource_create = llvmpipe_resource_create; + screen->resource_create_front = llvmpipe_resource_create_front; screen->resource_destroy = llvmpipe_resource_destroy; screen->resource_from_handle = llvmpipe_resource_from_handle; screen->resource_get_handle = llvmpipe_resource_get_handle; diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index c18e9f5b435..83f81135590 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -73,6 +73,9 @@ NV50_C_SOURCES := \ nv50/nv50_program.h \ nv50/nv50_push.c \ nv50/nv50_query.c \ + nv50/nv50_query.h \ + nv50/nv50_query_hw.c \ + nv50/nv50_query_hw.h \ nv50/nv50_resource.c \ nv50/nv50_resource.h \ nv50/nv50_screen.c \ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp index cce60550ae5..6ad9dd31681 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp @@ -1128,7 +1128,6 @@ nv50_ir_init_prog_info(struct nv50_ir_prog_info *info) info->prop.gp.instanceCount = 1; info->prop.gp.maxVertices = 1; } - info->io.clipDistance = 0xff; info->io.pointSize = 0xff; info->io.instanceId = 0xff; info->io.vertexId = 0xff; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h index a610c773f55..0d544581697 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h @@ -73,8 +73,8 @@ public: Instruction *mkCvt(operation, DataType, Value *, DataType, Value *); CmpInstruction *mkCmp(operation, CondCode, DataType, - Value *, - DataType, Value *, Value *, Value * = NULL); + Value *, + DataType, Value *, Value *, Value * = NULL); TexInstruction *mkTex(operation, TexTarget, uint16_t tic, uint16_t tsc, const std::vector<Value *> &def, diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index 2b9edcf9172..c0cab3299b5 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -99,6 +99,7 @@ struct nv50_ir_prog_info uint8_t sourceRep; /* NV50_PROGRAM_IR */ const void *source; void *relocData; + void *interpData; struct nv50_ir_prog_symbol *syms; uint16_t numSyms; } bin; @@ -143,6 +144,7 @@ struct nv50_ir_prog_info bool earlyFragTests; bool separateFragData; bool usesDiscard; + bool sampleInterp; /* perform sample interp on all fp inputs */ } fp; struct { uint32_t inputOffset; /* base address for user args */ @@ -154,9 +156,8 @@ struct nv50_ir_prog_info uint8_t numBarriers; struct { - uint8_t clipDistance; /* index of first clip distance output */ - uint8_t clipDistanceMask; /* mask of clip distances defined */ - uint8_t cullDistanceMask; /* clip distance mode (1 bit per output) */ + uint8_t clipDistances; /* number of clip distance outputs */ + uint8_t cullDistances; /* number of cull distance outputs */ int8_t genUserClip; /* request user clip planes for ClipVertex */ uint16_t ucpBase; /* base address for UCPs */ uint8_t ucpCBSlot; /* constant buffer index of UCP data */ @@ -168,7 +169,6 @@ struct nv50_ir_prog_info int8_t viewportId; /* output index of ViewportIndex */ uint8_t fragDepth; /* output index of FragDepth */ uint8_t sampleMask; /* output index of SampleMask */ - bool sampleInterp; /* perform sample interp on all fp inputs */ uint8_t backFaceColor[2]; /* input/output indices of back face colour */ uint8_t globalAccess; /* 1 for read, 2 for wr, 3 for rw */ bool fp64; /* program uses fp64 math */ @@ -198,6 +198,10 @@ extern void nv50_ir_relocate_code(void *relocData, uint32_t *code, uint32_t libPos, uint32_t dataPos); +extern void +nv50_ir_change_interp(void *interpData, uint32_t *code, + bool force_per_sample, bool flatshade); + /* obtain code that will be shared among programs */ extern void nv50_ir_get_target_library(uint32_t chipset, const uint32_t **code, uint32_t *size); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index 8f1542959c9..d712c9c300a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -1437,6 +1437,30 @@ CodeEmitterGK110::emitInterpMode(const Instruction *i) code[1] |= (i->ipa & 0xc) << (19 - 2); } +static void +interpApply(const InterpEntry *entry, uint32_t *code, + bool force_persample_interp, bool flatshade) +{ + int ipa = entry->ipa; + int reg = entry->reg; + int loc = entry->loc; + + if (flatshade && + (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) { + ipa = NV50_IR_INTERP_FLAT; + reg = 0xff; + } else if (force_persample_interp && + (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT && + (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) { + ipa |= NV50_IR_INTERP_CENTROID; + } + code[loc + 1] &= ~(0xf << 19); + code[loc + 1] |= (ipa & 0x3) << 21; + code[loc + 1] |= (ipa & 0xc) << (19 - 2); + code[loc + 0] &= ~(0xff << 23); + code[loc + 0] |= reg << 23; +} + void CodeEmitterGK110::emitINTERP(const Instruction *i) { @@ -1448,10 +1472,13 @@ CodeEmitterGK110::emitINTERP(const Instruction *i) if (i->saturate) code[1] |= 1 << 18; - if (i->op == OP_PINTERP) + if (i->op == OP_PINTERP) { srcId(i->src(1), 23); - else + addInterp(i->ipa, SDATA(i->src(1)).id, interpApply); + } else { code[0] |= 0xff << 23; + addInterp(i->ipa, 0xff, interpApply); + } srcId(i->src(0).getIndirect(0), 10); emitInterpMode(i); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index 6e22788341f..a327d572470 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -2217,6 +2217,30 @@ CodeEmitterGM107::emitAL2P() emitGPR (0x00, insn->def(0)); } +static void +interpApply(const InterpEntry *entry, uint32_t *code, + bool force_persample_interp, bool flatshade) +{ + int ipa = entry->ipa; + int reg = entry->reg; + int loc = entry->loc; + + if (flatshade && + (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) { + ipa = NV50_IR_INTERP_FLAT; + reg = 0xff; + } else if (force_persample_interp && + (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT && + (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) { + ipa |= NV50_IR_INTERP_CENTROID; + } + code[loc + 1] &= ~(0xf << 0x14); + code[loc + 1] |= (ipa & 0x3) << 0x16; + code[loc + 1] |= (ipa & 0xc) << (0x14 - 2); + code[loc + 0] &= ~(0xff << 0x14); + code[loc + 0] |= reg << 0x14; +} + void CodeEmitterGM107::emitIPA() { @@ -2255,10 +2279,12 @@ CodeEmitterGM107::emitIPA() emitGPR(0x14, insn->src(1)); if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET) emitGPR(0x27, insn->src(2)); + addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, interpApply); } else { if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET) emitGPR(0x27, insn->src(1)); emitGPR(0x14); + addInterp(insn->ipa, 0xff, interpApply); } if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp index 90147668c91..9f1e4b803d5 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp @@ -372,7 +372,7 @@ CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc) mode |= 3 << (s * 2); break; default: - ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile()); + ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile()); assert(0); break; } @@ -876,6 +876,30 @@ CodeEmitterNV50::emitPFETCH(const Instruction *i) emitFlagsRd(i); } +static void +interpApply(const InterpEntry *entry, uint32_t *code, + bool force_persample_interp, bool flatshade) +{ + int ipa = entry->ipa; + int encSize = entry->reg; + int loc = entry->loc; + + if ((ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT && + (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) { + if (force_persample_interp) { + if (encSize == 8) + code[loc + 1] |= 1 << 16; + else + code[loc + 0] |= 1 << 24; + } else { + if (encSize == 8) + code[loc + 1] &= ~(1 << 16); + else + code[loc + 0] &= ~(1 << 24); + } + } +} + void CodeEmitterNV50::emitINTERP(const Instruction *i) { @@ -904,6 +928,8 @@ CodeEmitterNV50::emitINTERP(const Instruction *i) code[0] |= 1; emitFlagsRd(i); } + + addInterp(i->ipa, i->encSize, interpApply); } void diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index 6bf5219d346..fd103146c72 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -1618,6 +1618,29 @@ CodeEmitterNVC0::emitInterpMode(const Instruction *i) } } +static void +interpApply(const InterpEntry *entry, uint32_t *code, + bool force_persample_interp, bool flatshade) +{ + int ipa = entry->ipa; + int reg = entry->reg; + int loc = entry->loc; + + if (flatshade && + (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) { + ipa = NV50_IR_INTERP_FLAT; + reg = 0x3f; + } else if (force_persample_interp && + (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT && + (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) { + ipa |= NV50_IR_INTERP_CENTROID; + } + code[loc + 0] &= ~(0xf << 6); + code[loc + 0] |= ipa << 6; + code[loc + 0] &= ~(0x3f << 26); + code[loc + 0] |= reg << 26; +} + void CodeEmitterNVC0::emitINTERP(const Instruction *i) { @@ -1630,10 +1653,13 @@ CodeEmitterNVC0::emitINTERP(const Instruction *i) if (i->saturate) code[0] |= 1 << 5; - if (i->op == OP_PINTERP) + if (i->op == OP_PINTERP) { srcId(i->src(1), 26); - else + addInterp(i->ipa, SDATA(i->src(1)).id, interpApply); + } else { code[0] |= 0x3f << 26; + addInterp(i->ipa, 0x3f, interpApply); + } srcId(i->src(0).getIndirect(0), 20); } else { diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index c8efaf5947a..6a7cb4224f4 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -910,7 +910,7 @@ bool Source::scanSource() info->bin.tlsSpace += (scan.file_max[TGSI_FILE_TEMPORARY] + 1) * 16; if (info->io.genUserClip > 0) { - info->io.clipDistanceMask = (1 << info->io.genUserClip) - 1; + info->io.clipDistances = info->io.genUserClip; const unsigned int nOut = (info->io.genUserClip + 3) / 4; @@ -919,7 +919,7 @@ bool Source::scanSource() info->out[i].id = i; info->out[i].sn = TGSI_SEMANTIC_CLIPDIST; info->out[i].si = n; - info->out[i].mask = info->io.clipDistanceMask >> (n * 4); + info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4); } } @@ -969,6 +969,12 @@ void Source::scanProperty(const struct tgsi_full_property *prop) else info->prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; /* anything but points */ break; + case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED: + info->io.clipDistances = prop->u[0].Data; + break; + case TGSI_PROPERTY_NUM_CULLDIST_ENABLED: + info->io.cullDistances = prop->u[0].Data; + break; default: INFO("unhandled TGSI property %d\n", prop->Property.PropertyName); break; @@ -1054,7 +1060,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) default: break; } - if (decl->Interp.Location || info->io.sampleInterp) + if (decl->Interp.Location) info->in[i].centroid = 1; } @@ -1086,8 +1092,6 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) clipVertexOutput = i; break; case TGSI_SEMANTIC_CLIPDIST: - info->io.clipDistanceMask |= - decl->Declaration.UsageMask << (si * 4); info->io.genUserClip = -1; break; case TGSI_SEMANTIC_SAMPLEMASK: @@ -1119,6 +1123,10 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) case TGSI_SEMANTIC_VERTEXID: info->io.vertexId = first; break; + case TGSI_SEMANTIC_SAMPLEID: + case TGSI_SEMANTIC_SAMPLEPOS: + info->prop.fp.sampleInterp = 1; + break; default: break; } @@ -1338,6 +1346,8 @@ private: void handleINTERP(Value *dst0[4]); + uint8_t translateInterpMode(const struct nv50_ir_varying *var, + operation& op); Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr); void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork); @@ -1451,8 +1461,8 @@ Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address) return sym; } -static inline uint8_t -translateInterpMode(const struct nv50_ir_varying *var, operation& op) +uint8_t +Converter::translateInterpMode(const struct nv50_ir_varying *var, operation& op) { uint8_t mode = NV50_IR_INTERP_PERSPECTIVE; @@ -1468,7 +1478,7 @@ translateInterpMode(const struct nv50_ir_varying *var, operation& op) op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC) ? OP_PINTERP : OP_LINTERP; - if (var->centroid) + if (var->centroid || info->prop.fp.sampleInterp) mode |= NV50_IR_INTERP_CENTROID; return mode; @@ -1628,7 +1638,7 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr) // don't load masked inputs, won't be assigned a slot if (!ptr && !(info->in[idx].mask & (1 << swz))) return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f); - if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE) + if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE) return mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_FACE, 0)); return interpolate(src, c, shiftAddress(ptr)); } else diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp index fe530c76b62..afc8ff1374f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp @@ -166,7 +166,7 @@ void Target::destroy(Target *targ) delete targ; } -CodeEmitter::CodeEmitter(const Target *target) : targ(target) +CodeEmitter::CodeEmitter(const Target *target) : targ(target), interpInfo(NULL) { } @@ -388,6 +388,7 @@ Program::emitBinary(struct nv50_ir_prog_info *info) } } info->bin.relocData = emit->getRelocInfo(); + info->bin.interpData = emit->getInterpInfo(); emitSymbolTable(info); @@ -428,6 +429,29 @@ CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m, return true; } +bool +CodeEmitter::addInterp(int ipa, int reg, InterpApply apply) +{ + unsigned int n = interpInfo ? interpInfo->count : 0; + + if (!(n % RELOC_ALLOC_INCREMENT)) { + size_t size = sizeof(InterpInfo) + n * sizeof(InterpEntry); + interpInfo = reinterpret_cast<InterpInfo *>( + REALLOC(interpInfo, n ? size : 0, + size + RELOC_ALLOC_INCREMENT * sizeof(InterpEntry))); + if (!interpInfo) + return false; + if (n == 0) + memset(interpInfo, 0, sizeof(InterpInfo)); + } + ++interpInfo->count; + + interpInfo->entry[n] = InterpEntry(ipa, reg, codeSize >> 2); + interpInfo->apply = apply; + + return true; +} + void RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const { @@ -472,6 +496,19 @@ nv50_ir_relocate_code(void *relocData, uint32_t *code, } void +nv50_ir_change_interp(void *interpData, uint32_t *code, + bool force_persample_interp, bool flatshade) +{ + nv50_ir::InterpInfo *info = reinterpret_cast<nv50_ir::InterpInfo *>( + interpData); + + // force_persample_interp: all non-flat -> per-sample + // flatshade: all color -> flat + for (unsigned i = 0; i < info->count; ++i) + info->apply(&info->entry[i], code, force_persample_interp, flatshade); +} + +void nv50_ir_get_target_library(uint32_t chipset, const uint32_t **code, uint32_t *size) { diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h index 591916eb412..4e33997e1c1 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h @@ -58,6 +58,23 @@ struct RelocInfo RelocEntry entry[0]; }; +struct InterpEntry +{ + InterpEntry(int ipa, int reg, int loc) : ipa(ipa), reg(reg), loc(loc) {} + uint32_t ipa:4; // SC mode used to identify colors + uint32_t reg:8; // The reg used for perspective division + uint32_t loc:20; // Let's hope we don't have more than 1M-sized shaders +}; + +typedef void (*InterpApply)(const InterpEntry*, uint32_t*, bool, bool); + +struct InterpInfo +{ + uint32_t count; + InterpApply apply; + InterpEntry entry[0]; +}; + class CodeEmitter { public: @@ -78,6 +95,9 @@ public: inline void *getRelocInfo() const { return relocInfo; } + bool addInterp(int ipa, int reg, InterpApply apply); + inline void *getInterpInfo() const { return interpInfo; } + virtual void prepareEmission(Program *); virtual void prepareEmission(Function *); virtual void prepareEmission(BasicBlock *); @@ -92,6 +112,7 @@ protected: uint32_t codeSizeLimit; RelocInfo *relocInfo; + InterpInfo *interpInfo; }; diff --git a/src/gallium/drivers/nouveau/nouveau_heap.c b/src/gallium/drivers/nouveau/nouveau_heap.c index f4aa5081dfe..3d415a5f30e 100644 --- a/src/gallium/drivers/nouveau/nouveau_heap.c +++ b/src/gallium/drivers/nouveau/nouveau_heap.c @@ -29,95 +29,95 @@ int nouveau_heap_init(struct nouveau_heap **heap, unsigned start, unsigned size) { - struct nouveau_heap *r; + struct nouveau_heap *r; - r = calloc(1, sizeof(struct nouveau_heap)); - if (!r) - return 1; + r = calloc(1, sizeof(struct nouveau_heap)); + if (!r) + return 1; - r->start = start; - r->size = size; - *heap = r; - return 0; + r->start = start; + r->size = size; + *heap = r; + return 0; } void nouveau_heap_destroy(struct nouveau_heap **heap) { - if (!*heap) - return; - free(*heap); - *heap = NULL; + if (!*heap) + return; + free(*heap); + *heap = NULL; } int nouveau_heap_alloc(struct nouveau_heap *heap, unsigned size, void *priv, struct nouveau_heap **res) { - struct nouveau_heap *r; + struct nouveau_heap *r; - if (!heap || !size || !res || *res) - return 1; + if (!heap || !size || !res || *res) + return 1; - while (heap) { - if (!heap->in_use && heap->size >= size) { - r = calloc(1, sizeof(struct nouveau_heap)); - if (!r) - return 1; + while (heap) { + if (!heap->in_use && heap->size >= size) { + r = calloc(1, sizeof(struct nouveau_heap)); + if (!r) + return 1; - r->start = (heap->start + heap->size) - size; - r->size = size; - r->in_use = 1; - r->priv = priv; + r->start = (heap->start + heap->size) - size; + r->size = size; + r->in_use = 1; + r->priv = priv; - heap->size -= size; + heap->size -= size; - r->next = heap->next; - if (heap->next) - heap->next->prev = r; - r->prev = heap; - heap->next = r; + r->next = heap->next; + if (heap->next) + heap->next->prev = r; + r->prev = heap; + heap->next = r; - *res = r; - return 0; - } + *res = r; + return 0; + } - heap = heap->next; - } + heap = heap->next; + } - return 1; + return 1; } void nouveau_heap_free(struct nouveau_heap **res) { - struct nouveau_heap *r; - - if (!res || !*res) - return; - r = *res; - *res = NULL; - - r->in_use = 0; - - if (r->next && !r->next->in_use) { - struct nouveau_heap *new = r->next; - - new->prev = r->prev; - if (r->prev) - r->prev->next = new; - new->size += r->size; - new->start = r->start; - - free(r); - r = new; - } - - if (r->prev && !r->prev->in_use) { - r->prev->next = r->next; - if (r->next) - r->next->prev = r->prev; - r->prev->size += r->size; - free(r); - } + struct nouveau_heap *r; + + if (!res || !*res) + return; + r = *res; + *res = NULL; + + r->in_use = 0; + + if (r->next && !r->next->in_use) { + struct nouveau_heap *new = r->next; + + new->prev = r->prev; + if (r->prev) + r->prev->next = new; + new->size += r->size; + new->start = r->start; + + free(r); + r = new; + } + + if (r->prev && !r->prev->in_use) { + r->prev->next = r->next; + if (r->next) + r->next->prev = r->prev; + r->prev->size += r->size; + free(r); + } } diff --git a/src/gallium/drivers/nouveau/nouveau_heap.h b/src/gallium/drivers/nouveau/nouveau_heap.h index a3d64a65623..99f610ed4c8 100644 --- a/src/gallium/drivers/nouveau/nouveau_heap.h +++ b/src/gallium/drivers/nouveau/nouveau_heap.h @@ -44,15 +44,15 @@ * full size of the heap. */ struct nouveau_heap { - struct nouveau_heap *prev; - struct nouveau_heap *next; + struct nouveau_heap *prev; + struct nouveau_heap *next; - void *priv; + void *priv; - unsigned start; - unsigned size; + unsigned start; + unsigned size; - int in_use; + int in_use; }; int diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index b2290e7e784..47603b0b7fd 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -30,211 +30,211 @@ int nouveau_mesa_debug = 0; static const char * nouveau_screen_get_name(struct pipe_screen *pscreen) { - struct nouveau_device *dev = nouveau_screen(pscreen)->device; - static char buffer[128]; + struct nouveau_device *dev = nouveau_screen(pscreen)->device; + static char buffer[128]; - util_snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); - return buffer; + util_snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); + return buffer; } static const char * nouveau_screen_get_vendor(struct pipe_screen *pscreen) { - return "nouveau"; + return "nouveau"; } static const char * nouveau_screen_get_device_vendor(struct pipe_screen *pscreen) { - return "NVIDIA"; + return "NVIDIA"; } static uint64_t nouveau_screen_get_timestamp(struct pipe_screen *pscreen) { - int64_t cpu_time = os_time_get() * 1000; + int64_t cpu_time = os_time_get() * 1000; - /* getparam of PTIMER_TIME takes about x10 as long (several usecs) */ + /* getparam of PTIMER_TIME takes about x10 as long (several usecs) */ - return cpu_time + nouveau_screen(pscreen)->cpu_gpu_time_delta; + return cpu_time + nouveau_screen(pscreen)->cpu_gpu_time_delta; } static void nouveau_screen_fence_ref(struct pipe_screen *pscreen, - struct pipe_fence_handle **ptr, - struct pipe_fence_handle *pfence) + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *pfence) { - nouveau_fence_ref(nouveau_fence(pfence), (struct nouveau_fence **)ptr); + nouveau_fence_ref(nouveau_fence(pfence), (struct nouveau_fence **)ptr); } static boolean nouveau_screen_fence_finish(struct pipe_screen *screen, - struct pipe_fence_handle *pfence, + struct pipe_fence_handle *pfence, uint64_t timeout) { - if (!timeout) - return nouveau_fence_signalled(nouveau_fence(pfence)); + if (!timeout) + return nouveau_fence_signalled(nouveau_fence(pfence)); - return nouveau_fence_wait(nouveau_fence(pfence)); + return nouveau_fence_wait(nouveau_fence(pfence)); } struct nouveau_bo * nouveau_screen_bo_from_handle(struct pipe_screen *pscreen, - struct winsys_handle *whandle, - unsigned *out_stride) + struct winsys_handle *whandle, + unsigned *out_stride) { - struct nouveau_device *dev = nouveau_screen(pscreen)->device; - struct nouveau_bo *bo = 0; - int ret; - - if (whandle->type != DRM_API_HANDLE_TYPE_SHARED && - whandle->type != DRM_API_HANDLE_TYPE_FD) { - debug_printf("%s: attempt to import unsupported handle type %d\n", - __FUNCTION__, whandle->type); - return NULL; - } - - if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) - ret = nouveau_bo_name_ref(dev, whandle->handle, &bo); - else - ret = nouveau_bo_prime_handle_ref(dev, whandle->handle, &bo); - - if (ret) { - debug_printf("%s: ref name 0x%08x failed with %d\n", - __FUNCTION__, whandle->handle, ret); - return NULL; - } - - *out_stride = whandle->stride; - return bo; + struct nouveau_device *dev = nouveau_screen(pscreen)->device; + struct nouveau_bo *bo = 0; + int ret; + + if (whandle->type != DRM_API_HANDLE_TYPE_SHARED && + whandle->type != DRM_API_HANDLE_TYPE_FD) { + debug_printf("%s: attempt to import unsupported handle type %d\n", + __FUNCTION__, whandle->type); + return NULL; + } + + if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) + ret = nouveau_bo_name_ref(dev, whandle->handle, &bo); + else + ret = nouveau_bo_prime_handle_ref(dev, whandle->handle, &bo); + + if (ret) { + debug_printf("%s: ref name 0x%08x failed with %d\n", + __FUNCTION__, whandle->handle, ret); + return NULL; + } + + *out_stride = whandle->stride; + return bo; } bool nouveau_screen_bo_get_handle(struct pipe_screen *pscreen, - struct nouveau_bo *bo, - unsigned stride, - struct winsys_handle *whandle) + struct nouveau_bo *bo, + unsigned stride, + struct winsys_handle *whandle) { - whandle->stride = stride; - - if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) { - return nouveau_bo_name_get(bo, &whandle->handle) == 0; - } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) { - whandle->handle = bo->handle; - return true; - } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) { - return nouveau_bo_set_prime(bo, (int *)&whandle->handle) == 0; - } else { - return false; - } + whandle->stride = stride; + + if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) { + return nouveau_bo_name_get(bo, &whandle->handle) == 0; + } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) { + whandle->handle = bo->handle; + return true; + } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) { + return nouveau_bo_set_prime(bo, (int *)&whandle->handle) == 0; + } else { + return false; + } } int nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev) { - struct pipe_screen *pscreen = &screen->base; - struct nv04_fifo nv04_data = { .vram = 0xbeef0201, .gart = 0xbeef0202 }; - struct nvc0_fifo nvc0_data = { }; - uint64_t time; - int size, ret; - void *data; - union nouveau_bo_config mm_config; - - char *nv_dbg = getenv("NOUVEAU_MESA_DEBUG"); - if (nv_dbg) - nouveau_mesa_debug = atoi(nv_dbg); - - /* - * this is initialized to 1 in nouveau_drm_screen_create after screen - * is fully constructed and added to the global screen list. - */ - screen->refcount = -1; - - if (dev->chipset < 0xc0) { - data = &nv04_data; - size = sizeof(nv04_data); - } else { - data = &nvc0_data; - size = sizeof(nvc0_data); - } - - /* - * Set default VRAM domain if not overridden - */ - if (!screen->vram_domain) { - if (dev->vram_size > 0) - screen->vram_domain = NOUVEAU_BO_VRAM; - else - screen->vram_domain = NOUVEAU_BO_GART; - } - - ret = nouveau_object_new(&dev->object, 0, NOUVEAU_FIFO_CHANNEL_CLASS, - data, size, &screen->channel); - if (ret) - return ret; - screen->device = dev; - - ret = nouveau_client_new(screen->device, &screen->client); - if (ret) - return ret; - ret = nouveau_pushbuf_new(screen->client, screen->channel, - 4, 512 * 1024, 1, - &screen->pushbuf); - if (ret) - return ret; - - /* getting CPU time first appears to be more accurate */ - screen->cpu_gpu_time_delta = os_time_get(); - - ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_PTIMER_TIME, &time); - if (!ret) - screen->cpu_gpu_time_delta = time - screen->cpu_gpu_time_delta * 1000; - - pscreen->get_name = nouveau_screen_get_name; - pscreen->get_vendor = nouveau_screen_get_vendor; - pscreen->get_device_vendor = nouveau_screen_get_device_vendor; - - pscreen->get_timestamp = nouveau_screen_get_timestamp; - - pscreen->fence_reference = nouveau_screen_fence_ref; - pscreen->fence_finish = nouveau_screen_fence_finish; - - util_format_s3tc_init(); - - screen->lowmem_bindings = PIPE_BIND_GLOBAL; /* gallium limit */ - screen->vidmem_bindings = - PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL | - PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | - PIPE_BIND_CURSOR | - PIPE_BIND_SAMPLER_VIEW | - PIPE_BIND_SHADER_BUFFER | PIPE_BIND_SHADER_IMAGE | - PIPE_BIND_COMPUTE_RESOURCE | - PIPE_BIND_GLOBAL; - screen->sysmem_bindings = - PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_STREAM_OUTPUT | - PIPE_BIND_COMMAND_ARGS_BUFFER; - - memset(&mm_config, 0, sizeof(mm_config)); - - screen->mm_GART = nouveau_mm_create(dev, - NOUVEAU_BO_GART | NOUVEAU_BO_MAP, - &mm_config); - screen->mm_VRAM = nouveau_mm_create(dev, NOUVEAU_BO_VRAM, &mm_config); - return 0; + struct pipe_screen *pscreen = &screen->base; + struct nv04_fifo nv04_data = { .vram = 0xbeef0201, .gart = 0xbeef0202 }; + struct nvc0_fifo nvc0_data = { }; + uint64_t time; + int size, ret; + void *data; + union nouveau_bo_config mm_config; + + char *nv_dbg = getenv("NOUVEAU_MESA_DEBUG"); + if (nv_dbg) + nouveau_mesa_debug = atoi(nv_dbg); + + /* + * this is initialized to 1 in nouveau_drm_screen_create after screen + * is fully constructed and added to the global screen list. + */ + screen->refcount = -1; + + if (dev->chipset < 0xc0) { + data = &nv04_data; + size = sizeof(nv04_data); + } else { + data = &nvc0_data; + size = sizeof(nvc0_data); + } + + /* + * Set default VRAM domain if not overridden + */ + if (!screen->vram_domain) { + if (dev->vram_size > 0) + screen->vram_domain = NOUVEAU_BO_VRAM; + else + screen->vram_domain = NOUVEAU_BO_GART; + } + + ret = nouveau_object_new(&dev->object, 0, NOUVEAU_FIFO_CHANNEL_CLASS, + data, size, &screen->channel); + if (ret) + return ret; + screen->device = dev; + + ret = nouveau_client_new(screen->device, &screen->client); + if (ret) + return ret; + ret = nouveau_pushbuf_new(screen->client, screen->channel, + 4, 512 * 1024, 1, + &screen->pushbuf); + if (ret) + return ret; + + /* getting CPU time first appears to be more accurate */ + screen->cpu_gpu_time_delta = os_time_get(); + + ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_PTIMER_TIME, &time); + if (!ret) + screen->cpu_gpu_time_delta = time - screen->cpu_gpu_time_delta * 1000; + + pscreen->get_name = nouveau_screen_get_name; + pscreen->get_vendor = nouveau_screen_get_vendor; + pscreen->get_device_vendor = nouveau_screen_get_device_vendor; + + pscreen->get_timestamp = nouveau_screen_get_timestamp; + + pscreen->fence_reference = nouveau_screen_fence_ref; + pscreen->fence_finish = nouveau_screen_fence_finish; + + util_format_s3tc_init(); + + screen->lowmem_bindings = PIPE_BIND_GLOBAL; /* gallium limit */ + screen->vidmem_bindings = + PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL | + PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | + PIPE_BIND_CURSOR | + PIPE_BIND_SAMPLER_VIEW | + PIPE_BIND_SHADER_BUFFER | PIPE_BIND_SHADER_IMAGE | + PIPE_BIND_COMPUTE_RESOURCE | + PIPE_BIND_GLOBAL; + screen->sysmem_bindings = + PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_STREAM_OUTPUT | + PIPE_BIND_COMMAND_ARGS_BUFFER; + + memset(&mm_config, 0, sizeof(mm_config)); + + screen->mm_GART = nouveau_mm_create(dev, + NOUVEAU_BO_GART | NOUVEAU_BO_MAP, + &mm_config); + screen->mm_VRAM = nouveau_mm_create(dev, NOUVEAU_BO_VRAM, &mm_config); + return 0; } void nouveau_screen_fini(struct nouveau_screen *screen) { - nouveau_mm_destroy(screen->mm_GART); - nouveau_mm_destroy(screen->mm_VRAM); + nouveau_mm_destroy(screen->mm_GART); + nouveau_mm_destroy(screen->mm_VRAM); - nouveau_pushbuf_del(&screen->pushbuf); + nouveau_pushbuf_del(&screen->pushbuf); - nouveau_client_del(&screen->client); - nouveau_object_del(&screen->channel); + nouveau_client_del(&screen->client); + nouveau_object_del(&screen->channel); - nouveau_device_del(&screen->device); + nouveau_device_del(&screen->device); } diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h index 4fdde9fbf3d..328646fe3ce 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.h +++ b/src/gallium/drivers/nouveau/nouveau_screen.h @@ -16,47 +16,47 @@ extern int nouveau_mesa_debug; struct nouveau_bo; struct nouveau_screen { - struct pipe_screen base; - struct nouveau_device *device; - struct nouveau_object *channel; - struct nouveau_client *client; - struct nouveau_pushbuf *pushbuf; + struct pipe_screen base; + struct nouveau_device *device; + struct nouveau_object *channel; + struct nouveau_client *client; + struct nouveau_pushbuf *pushbuf; - int refcount; + int refcount; - unsigned vidmem_bindings; /* PIPE_BIND_* where VRAM placement is desired */ - unsigned sysmem_bindings; /* PIPE_BIND_* where GART placement is desired */ - unsigned lowmem_bindings; /* PIPE_BIND_* that require an address < 4 GiB */ - /* - * For bindings with (vidmem & sysmem) bits set, PIPE_USAGE_* decides - * placement. - */ + unsigned vidmem_bindings; /* PIPE_BIND_* where VRAM placement is desired */ + unsigned sysmem_bindings; /* PIPE_BIND_* where GART placement is desired */ + unsigned lowmem_bindings; /* PIPE_BIND_* that require an address < 4 GiB */ + /* + * For bindings with (vidmem & sysmem) bits set, PIPE_USAGE_* decides + * placement. + */ - uint16_t class_3d; + uint16_t class_3d; - struct { - struct nouveau_fence *head; - struct nouveau_fence *tail; - struct nouveau_fence *current; - u32 sequence; - u32 sequence_ack; - void (*emit)(struct pipe_screen *, u32 *sequence); - u32 (*update)(struct pipe_screen *); - } fence; + struct { + struct nouveau_fence *head; + struct nouveau_fence *tail; + struct nouveau_fence *current; + u32 sequence; + u32 sequence_ack; + void (*emit)(struct pipe_screen *, u32 *sequence); + u32 (*update)(struct pipe_screen *); + } fence; - struct nouveau_mman *mm_VRAM; - struct nouveau_mman *mm_GART; + struct nouveau_mman *mm_VRAM; + struct nouveau_mman *mm_GART; - int64_t cpu_gpu_time_delta; + int64_t cpu_gpu_time_delta; - bool hint_buf_keep_sysmem_copy; + bool hint_buf_keep_sysmem_copy; - unsigned vram_domain; + unsigned vram_domain; - struct { - unsigned profiles_checked; - unsigned profiles_present; - } firmware_info; + struct { + unsigned profiles_checked; + unsigned profiles_present; + } firmware_info; #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS union { @@ -100,10 +100,10 @@ struct nouveau_screen { #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS # define NOUVEAU_DRV_STAT(s, n, v) do { \ - (s)->stats.named.n += (v); \ + (s)->stats.named.n += (v); \ } while(0) -# define NOUVEAU_DRV_STAT_RES(r, n, v) do { \ - nouveau_screen((r)->base.screen)->stats.named.n += (v); \ +# define NOUVEAU_DRV_STAT_RES(r, n, v) do { \ + nouveau_screen((r)->base.screen)->stats.named.n += (v); \ } while(0) # define NOUVEAU_DRV_STAT_IFD(x) x #else @@ -115,20 +115,20 @@ struct nouveau_screen { static inline struct nouveau_screen * nouveau_screen(struct pipe_screen *pscreen) { - return (struct nouveau_screen *)pscreen; + return (struct nouveau_screen *)pscreen; } bool nouveau_drm_screen_unref(struct nouveau_screen *screen); bool nouveau_screen_bo_get_handle(struct pipe_screen *pscreen, - struct nouveau_bo *bo, - unsigned stride, - struct winsys_handle *whandle); + struct nouveau_bo *bo, + unsigned stride, + struct winsys_handle *whandle); struct nouveau_bo * nouveau_screen_bo_from_handle(struct pipe_screen *pscreen, - struct winsys_handle *whandle, - unsigned *out_stride); + struct winsys_handle *whandle, + unsigned *out_stride); int nouveau_screen_init(struct nouveau_screen *, struct nouveau_device *); diff --git a/src/gallium/drivers/nouveau/nouveau_statebuf.h b/src/gallium/drivers/nouveau/nouveau_statebuf.h index f38014091ba..da5d7972d9c 100644 --- a/src/gallium/drivers/nouveau/nouveau_statebuf.h +++ b/src/gallium/drivers/nouveau/nouveau_statebuf.h @@ -6,9 +6,9 @@ struct nouveau_statebuf_builder { - uint32_t* p; + uint32_t* p; #ifdef DEBUG - uint32_t* pend; + uint32_t* pend; #endif }; @@ -22,7 +22,7 @@ struct nouveau_statebuf_builder static inline uint32_t sb_header(unsigned subc, unsigned mthd, unsigned size) { - return (size << 18) | (subc << 13) | mthd; + return (size << 18) | (subc << 13) | mthd; } #define sb_method(sb, v, n) sb_data(sb, sb_header(SUBC_3D(v), n)); diff --git a/src/gallium/drivers/nouveau/nouveau_video.c b/src/gallium/drivers/nouveau/nouveau_video.c index e414a534418..8bb12b22ac1 100644 --- a/src/gallium/drivers/nouveau/nouveau_video.c +++ b/src/gallium/drivers/nouveau/nouveau_video.c @@ -831,7 +831,7 @@ error: static int nouveau_screen_get_video_param(struct pipe_screen *pscreen, enum pipe_video_profile profile, - enum pipe_video_entrypoint entrypoint, + enum pipe_video_entrypoint entrypoint, enum pipe_video_cap param) { switch (param) { diff --git a/src/gallium/drivers/nouveau/nouveau_video.h b/src/gallium/drivers/nouveau/nouveau_video.h index fd1bd527deb..3ef6f89ce28 100644 --- a/src/gallium/drivers/nouveau/nouveau_video.h +++ b/src/gallium/drivers/nouveau/nouveau_video.h @@ -83,7 +83,7 @@ BEGIN_NI04(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size) static inline void PUSH_MTHDl(struct nouveau_pushbuf *push, int subc, int mthd, struct nouveau_bo *bo, uint32_t offset, - struct nouveau_bufctx *ctx, int bin, uint32_t rw) + struct nouveau_bufctx *ctx, int bin, uint32_t rw) { nouveau_bufctx_mthd(ctx, bin, NV04_FIFO_PKHDR(subc, mthd, 1), bo, offset, diff --git a/src/gallium/drivers/nouveau/nouveau_vp3_video.h b/src/gallium/drivers/nouveau/nouveau_vp3_video.h index 33e3bef3df3..58df5ee847f 100644 --- a/src/gallium/drivers/nouveau/nouveau_vp3_video.h +++ b/src/gallium/drivers/nouveau/nouveau_vp3_video.h @@ -117,22 +117,22 @@ struct nouveau_vp3_decoder { }; struct comm { - uint32_t bsp_cur_index; // 000 - uint32_t byte_ofs; // 004 - uint32_t status[0x10]; // 008 - uint32_t pos[0x10]; // 048 - uint8_t pad[0x100 - 0x88]; // 0a0 bool comm_encrypted - - uint32_t pvp_cur_index; // 100 - uint32_t acked_byte_ofs; // 104 - uint32_t status_vp[0x10]; // 108 - uint16_t mb_y[0x10]; //148 - uint32_t pvp_stage; // 168 0xeeXX - uint16_t parse_endpos_index; // 16c - uint16_t irq_index; // 16e - uint8_t irq_470[0x10]; // 170 - uint32_t irq_pos[0x10]; // 180 - uint32_t parse_endpos[0x10]; // 1c0 + uint32_t bsp_cur_index; // 000 + uint32_t byte_ofs; // 004 + uint32_t status[0x10]; // 008 + uint32_t pos[0x10]; // 048 + uint8_t pad[0x100 - 0x88]; // 0a0 bool comm_encrypted + + uint32_t pvp_cur_index; // 100 + uint32_t acked_byte_ofs; // 104 + uint32_t status_vp[0x10]; // 108 + uint16_t mb_y[0x10]; //148 + uint32_t pvp_stage; // 168 0xeeXX + uint16_t parse_endpos_index; // 16c + uint16_t irq_index; // 16e + uint8_t irq_470[0x10]; // 170 + uint32_t irq_pos[0x10]; // 180 + uint32_t parse_endpos[0x10]; // 1c0 }; static inline uint32_t nouveau_vp3_video_align(uint32_t h) diff --git a/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c b/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c index 6d968c18399..692772e49d1 100644 --- a/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c +++ b/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c @@ -23,90 +23,90 @@ #include "nouveau_vp3_video.h" struct strparm_bsp { - uint32_t w0[4]; // bits 0-23 length, bits 24-31 addr_hi - uint32_t w1[4]; // bit 8-24 addr_lo - uint32_t unk20; // should be idx * 0x8000000, bitstream offset - uint32_t do_crypto_crap; // set to 0 + uint32_t w0[4]; // bits 0-23 length, bits 24-31 addr_hi + uint32_t w1[4]; // bit 8-24 addr_lo + uint32_t unk20; // should be idx * 0x8000000, bitstream offset + uint32_t do_crypto_crap; // set to 0 }; struct mpeg12_picparm_bsp { - uint16_t width; - uint16_t height; - uint8_t picture_structure; - uint8_t picture_coding_type; - uint8_t intra_dc_precision; - uint8_t frame_pred_frame_dct; - uint8_t concealment_motion_vectors; - uint8_t intra_vlc_format; - uint16_t pad; - uint8_t f_code[2][2]; + uint16_t width; + uint16_t height; + uint8_t picture_structure; + uint8_t picture_coding_type; + uint8_t intra_dc_precision; + uint8_t frame_pred_frame_dct; + uint8_t concealment_motion_vectors; + uint8_t intra_vlc_format; + uint16_t pad; + uint8_t f_code[2][2]; }; struct mpeg4_picparm_bsp { - uint16_t width; - uint16_t height; - uint8_t vop_time_increment_size; - uint8_t interlaced; - uint8_t resync_marker_disable; + uint16_t width; + uint16_t height; + uint8_t vop_time_increment_size; + uint8_t interlaced; + uint8_t resync_marker_disable; }; struct vc1_picparm_bsp { - uint16_t width; - uint16_t height; - uint8_t profile; // 04 0 simple, 1 main, 2 advanced - uint8_t postprocflag; // 05 - uint8_t pulldown; // 06 - uint8_t interlaced; // 07 - uint8_t tfcntrflag; // 08 - uint8_t finterpflag; // 09 - uint8_t psf; // 0a - uint8_t pad; // 0b - uint8_t multires; // 0c - uint8_t syncmarker; // 0d - uint8_t rangered; // 0e - uint8_t maxbframes; // 0f - uint8_t dquant; // 10 - uint8_t panscan_flag; // 11 - uint8_t refdist_flag; // 12 - uint8_t quantizer; // 13 - uint8_t extended_mv; // 14 - uint8_t extended_dmv; // 15 - uint8_t overlap; // 16 - uint8_t vstransform; // 17 + uint16_t width; + uint16_t height; + uint8_t profile; // 04 0 simple, 1 main, 2 advanced + uint8_t postprocflag; // 05 + uint8_t pulldown; // 06 + uint8_t interlaced; // 07 + uint8_t tfcntrflag; // 08 + uint8_t finterpflag; // 09 + uint8_t psf; // 0a + uint8_t pad; // 0b + uint8_t multires; // 0c + uint8_t syncmarker; // 0d + uint8_t rangered; // 0e + uint8_t maxbframes; // 0f + uint8_t dquant; // 10 + uint8_t panscan_flag; // 11 + uint8_t refdist_flag; // 12 + uint8_t quantizer; // 13 + uint8_t extended_mv; // 14 + uint8_t extended_dmv; // 15 + uint8_t overlap; // 16 + uint8_t vstransform; // 17 }; struct h264_picparm_bsp { - // 00 - uint32_t unk00; - // 04 - uint32_t log2_max_frame_num_minus4; // 04 checked - uint32_t pic_order_cnt_type; // 08 checked - uint32_t log2_max_pic_order_cnt_lsb_minus4; // 0c checked - uint32_t delta_pic_order_always_zero_flag; // 10, or unknown + // 00 + uint32_t unk00; + // 04 + uint32_t log2_max_frame_num_minus4; // 04 checked + uint32_t pic_order_cnt_type; // 08 checked + uint32_t log2_max_pic_order_cnt_lsb_minus4; // 0c checked + uint32_t delta_pic_order_always_zero_flag; // 10, or unknown - uint32_t frame_mbs_only_flag; // 14, always 1? - uint32_t direct_8x8_inference_flag; // 18, always 1? - uint32_t width_mb; // 1c checked - uint32_t height_mb; // 20 checked - // 24 - //struct picparm2 - uint32_t entropy_coding_mode_flag; // 00, checked - uint32_t pic_order_present_flag; // 04 checked - uint32_t unk; // 08 seems to be 0? - uint32_t pad1; // 0c seems to be 0? - uint32_t pad2; // 10 always 0 ? - uint32_t num_ref_idx_l0_active_minus1; // 14 always 0? - uint32_t num_ref_idx_l1_active_minus1; // 18 always 0? - uint32_t weighted_pred_flag; // 1c checked - uint32_t weighted_bipred_idc; // 20 checked - uint32_t pic_init_qp_minus26; // 24 checked - uint32_t deblocking_filter_control_present_flag; // 28 always 1? - uint32_t redundant_pic_cnt_present_flag; // 2c always 0? - uint32_t transform_8x8_mode_flag; // 30 checked - uint32_t mb_adaptive_frame_field_flag; // 34 checked-ish - uint8_t field_pic_flag; // 38 checked - uint8_t bottom_field_flag; // 39 checked - uint8_t real_pad[0x1b]; // XX why? + uint32_t frame_mbs_only_flag; // 14, always 1? + uint32_t direct_8x8_inference_flag; // 18, always 1? + uint32_t width_mb; // 1c checked + uint32_t height_mb; // 20 checked + // 24 + //struct picparm2 + uint32_t entropy_coding_mode_flag; // 00, checked + uint32_t pic_order_present_flag; // 04 checked + uint32_t unk; // 08 seems to be 0? + uint32_t pad1; // 0c seems to be 0? + uint32_t pad2; // 10 always 0 ? + uint32_t num_ref_idx_l0_active_minus1; // 14 always 0? + uint32_t num_ref_idx_l1_active_minus1; // 18 always 0? + uint32_t weighted_pred_flag; // 1c checked + uint32_t weighted_bipred_idc; // 20 checked + uint32_t pic_init_qp_minus26; // 24 checked + uint32_t deblocking_filter_control_present_flag; // 28 always 1? + uint32_t redundant_pic_cnt_present_flag; // 2c always 0? + uint32_t transform_8x8_mode_flag; // 30 checked + uint32_t mb_adaptive_frame_field_flag; // 34 checked-ish + uint8_t field_pic_flag; // 38 checked + uint8_t bottom_field_flag; // 39 checked + uint8_t real_pad[0x1b]; // XX why? }; static uint32_t diff --git a/src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c b/src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c index 25283b79952..53f5db0003d 100644 --- a/src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c +++ b/src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c @@ -23,147 +23,147 @@ #include "nouveau_vp3_video.h" struct mpeg12_picparm_vp { - uint16_t width; // 00 in mb units - uint16_t height; // 02 in mb units - - uint32_t unk04; // 04 stride for Y? - uint32_t unk08; // 08 stride for CbCr? - - uint32_t ofs[6]; // 1c..20 ofs - uint32_t bucket_size; // 24 - uint32_t inter_ring_data_size; // 28 - uint16_t unk2c; // 2c - uint16_t alternate_scan; // 2e - uint16_t unk30; // 30 not seen set yet - uint16_t picture_structure; // 32 - uint16_t pad2[3]; - uint16_t unk3a; // 3a set on I frame? - - uint32_t f_code[4]; // 3c - uint32_t picture_coding_type; // 4c - uint32_t intra_dc_precision; // 50 - uint32_t q_scale_type; // 54 - uint32_t top_field_first; // 58 - uint32_t full_pel_forward_vector; // 5c - uint32_t full_pel_backward_vector; // 60 - uint8_t intra_quantizer_matrix[0x40]; // 64 - uint8_t non_intra_quantizer_matrix[0x40]; // a4 + uint16_t width; // 00 in mb units + uint16_t height; // 02 in mb units + + uint32_t unk04; // 04 stride for Y? + uint32_t unk08; // 08 stride for CbCr? + + uint32_t ofs[6]; // 1c..20 ofs + uint32_t bucket_size; // 24 + uint32_t inter_ring_data_size; // 28 + uint16_t unk2c; // 2c + uint16_t alternate_scan; // 2e + uint16_t unk30; // 30 not seen set yet + uint16_t picture_structure; // 32 + uint16_t pad2[3]; + uint16_t unk3a; // 3a set on I frame? + + uint32_t f_code[4]; // 3c + uint32_t picture_coding_type; // 4c + uint32_t intra_dc_precision; // 50 + uint32_t q_scale_type; // 54 + uint32_t top_field_first; // 58 + uint32_t full_pel_forward_vector; // 5c + uint32_t full_pel_backward_vector; // 60 + uint8_t intra_quantizer_matrix[0x40]; // 64 + uint8_t non_intra_quantizer_matrix[0x40]; // a4 }; struct mpeg4_picparm_vp { - uint32_t width; // 00 in normal units - uint32_t height; // 04 in normal units - uint32_t unk08; // stride 1 - uint32_t unk0c; // stride 2 - uint32_t ofs[6]; // 10..24 ofs - uint32_t bucket_size; // 28 - uint32_t pad1; // 2c, pad - uint32_t pad2; // 30 - uint32_t inter_ring_data_size; // 34 - - uint32_t trd[2]; // 38, 3c - uint32_t trb[2]; // 40, 44 - uint32_t u48; // XXX codec selection? Should test with different values of VdpDecoderProfile - uint16_t f_code_fw; // 4c - uint16_t f_code_bw; // 4e - uint8_t interlaced; // 50 - - uint8_t quant_type; // bool, written to 528 - uint8_t quarter_sample; // bool, written to 548 - uint8_t short_video_header; // bool, negated written to 528 shifted by 1 - uint8_t u54; // bool, written to 0x740 - uint8_t vop_coding_type; // 55 - uint8_t rounding_control; // 56 - uint8_t alternate_vertical_scan_flag; // 57 bool - uint8_t top_field_first; // bool, written to vuc - - uint8_t pad4[3]; // 59, 5a, 5b, contains garbage on blob - - uint32_t intra[0x10]; // 5c - uint32_t non_intra[0x10]; // 9c - uint32_t pad5[0x10]; // bc what does this do? - // udc..uff pad? + uint32_t width; // 00 in normal units + uint32_t height; // 04 in normal units + uint32_t unk08; // stride 1 + uint32_t unk0c; // stride 2 + uint32_t ofs[6]; // 10..24 ofs + uint32_t bucket_size; // 28 + uint32_t pad1; // 2c, pad + uint32_t pad2; // 30 + uint32_t inter_ring_data_size; // 34 + + uint32_t trd[2]; // 38, 3c + uint32_t trb[2]; // 40, 44 + uint32_t u48; // XXX codec selection? Should test with different values of VdpDecoderProfile + uint16_t f_code_fw; // 4c + uint16_t f_code_bw; // 4e + uint8_t interlaced; // 50 + + uint8_t quant_type; // bool, written to 528 + uint8_t quarter_sample; // bool, written to 548 + uint8_t short_video_header; // bool, negated written to 528 shifted by 1 + uint8_t u54; // bool, written to 0x740 + uint8_t vop_coding_type; // 55 + uint8_t rounding_control; // 56 + uint8_t alternate_vertical_scan_flag; // 57 bool + uint8_t top_field_first; // bool, written to vuc + + uint8_t pad4[3]; // 59, 5a, 5b, contains garbage on blob + + uint32_t intra[0x10]; // 5c + uint32_t non_intra[0x10]; // 9c + uint32_t pad5[0x10]; // bc what does this do? + // udc..uff pad? }; // Full version, with data pumped from BSP struct vc1_picparm_vp { - uint32_t bucket_size; // 00 - uint32_t pad; // 04 - - uint32_t inter_ring_data_size; // 08 - uint32_t unk0c; // stride 1 - uint32_t unk10; // stride 2 - uint32_t ofs[6]; // 14..28 ofs - - uint16_t width; // 2c - uint16_t height; // 2e - - uint8_t profile; // 30 0 = simple, 1 = main, 2 = advanced - uint8_t loopfilter; // 31 written into vuc - uint8_t fastuvmc; // 32, written into vuc - uint8_t dquant; // 33 - - uint8_t overlap; // 34 - uint8_t quantizer; // 35 - uint8_t u36; // 36, bool - uint8_t pad2; // 37, to align to 0x38 + uint32_t bucket_size; // 00 + uint32_t pad; // 04 + + uint32_t inter_ring_data_size; // 08 + uint32_t unk0c; // stride 1 + uint32_t unk10; // stride 2 + uint32_t ofs[6]; // 14..28 ofs + + uint16_t width; // 2c + uint16_t height; // 2e + + uint8_t profile; // 30 0 = simple, 1 = main, 2 = advanced + uint8_t loopfilter; // 31 written into vuc + uint8_t fastuvmc; // 32, written into vuc + uint8_t dquant; // 33 + + uint8_t overlap; // 34 + uint8_t quantizer; // 35 + uint8_t u36; // 36, bool + uint8_t pad2; // 37, to align to 0x38 }; struct h264_picparm_vp { // 700..a00 - uint16_t width, height; - uint32_t stride1, stride2; // 04 08 - uint32_t ofs[6]; // 0c..24 in-image offset - - uint32_t tmp_stride; - uint32_t bucket_size; // 28 bucket size - uint32_t inter_ring_data_size; // 2c - - unsigned mb_adaptive_frame_field_flag : 1; // 0 - unsigned direct_8x8_inference_flag : 1; // 1 0x02: into vuc ofs 56 - unsigned weighted_pred_flag : 1; // 2 0x04 - unsigned constrained_intra_pred_flag : 1; // 3 0x08: into vuc ofs 68 - unsigned is_reference : 1; // 4 - unsigned interlace : 1; // 5 field_pic_flag - unsigned bottom_field_flag : 1; // 6 - unsigned second_field : 1; // 7 0x80: nfi yet - - signed log2_max_frame_num_minus4 : 4; // 31 0..3 - unsigned chroma_format_idc : 2; // 31 4..5 - unsigned pic_order_cnt_type : 2; // 31 6..7 - signed pic_init_qp_minus26 : 6; // 32 0..5 - signed chroma_qp_index_offset : 5; // 32 6..10 - signed second_chroma_qp_index_offset : 5; // 32 11..15 - - unsigned weighted_bipred_idc : 2; // 34 0..1 - unsigned fifo_dec_index : 7; // 34 2..8 - unsigned tmp_idx : 5; // 34 9..13 - unsigned frame_number : 16; // 34 14..29 - unsigned u34_3030 : 1; // 34 30..30 pp.u34[30:30] - unsigned u34_3131 : 1; // 34 31..31 pad? - - uint32_t field_order_cnt[2]; // 38, 3c - - struct { // 40 - unsigned fifo_idx : 7; // 00 0..6 - unsigned tmp_idx : 5; // 00 7..11 - unsigned top_is_reference : 1; // 00 12 - unsigned bottom_is_reference : 1; // 00 13 - unsigned is_long_term : 1; // 00 14 - unsigned notseenyet : 1; // 00 15 pad? - unsigned field_pic_flag : 1; // 00 16 - unsigned top_field_marking : 4; // 00 17..20 - unsigned bottom_field_marking : 4; // 00 21..24 - unsigned pad : 7; // 00 d25..31 - - uint32_t field_order_cnt[2]; // 04,08 - uint32_t frame_idx; // 0c - } refs[0x10]; - - uint8_t m4x4[6][16]; // 140 - uint8_t m8x8[2][64]; // 1a0 - uint32_t u220; // 220 number of extra reorder_list to append? - uint8_t u224[0x20]; // 224..244 reorder_list append ? - uint8_t nfi244[0xb0]; // add some pad to make sure nulls are read + uint16_t width, height; + uint32_t stride1, stride2; // 04 08 + uint32_t ofs[6]; // 0c..24 in-image offset + + uint32_t tmp_stride; + uint32_t bucket_size; // 28 bucket size + uint32_t inter_ring_data_size; // 2c + + unsigned mb_adaptive_frame_field_flag : 1; // 0 + unsigned direct_8x8_inference_flag : 1; // 1 0x02: into vuc ofs 56 + unsigned weighted_pred_flag : 1; // 2 0x04 + unsigned constrained_intra_pred_flag : 1; // 3 0x08: into vuc ofs 68 + unsigned is_reference : 1; // 4 + unsigned interlace : 1; // 5 field_pic_flag + unsigned bottom_field_flag : 1; // 6 + unsigned second_field : 1; // 7 0x80: nfi yet + + signed log2_max_frame_num_minus4 : 4; // 31 0..3 + unsigned chroma_format_idc : 2; // 31 4..5 + unsigned pic_order_cnt_type : 2; // 31 6..7 + signed pic_init_qp_minus26 : 6; // 32 0..5 + signed chroma_qp_index_offset : 5; // 32 6..10 + signed second_chroma_qp_index_offset : 5; // 32 11..15 + + unsigned weighted_bipred_idc : 2; // 34 0..1 + unsigned fifo_dec_index : 7; // 34 2..8 + unsigned tmp_idx : 5; // 34 9..13 + unsigned frame_number : 16; // 34 14..29 + unsigned u34_3030 : 1; // 34 30..30 pp.u34[30:30] + unsigned u34_3131 : 1; // 34 31..31 pad? + + uint32_t field_order_cnt[2]; // 38, 3c + + struct { // 40 + unsigned fifo_idx : 7; // 00 0..6 + unsigned tmp_idx : 5; // 00 7..11 + unsigned top_is_reference : 1; // 00 12 + unsigned bottom_is_reference : 1; // 00 13 + unsigned is_long_term : 1; // 00 14 + unsigned notseenyet : 1; // 00 15 pad? + unsigned field_pic_flag : 1; // 00 16 + unsigned top_field_marking : 4; // 00 17..20 + unsigned bottom_field_marking : 4; // 00 21..24 + unsigned pad : 7; // 00 d25..31 + + uint32_t field_order_cnt[2]; // 04,08 + uint32_t frame_idx; // 0c + } refs[0x10]; + + uint8_t m4x4[6][16]; // 140 + uint8_t m8x8[2][64]; // 1a0 + uint32_t u220; // 220 number of extra reorder_list to append? + uint8_t u224[0x20]; // 224..244 reorder_list append ? + uint8_t nfi244[0xb0]; // add some pad to make sure nulls are read }; static void diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index a44fd3efcf7..1319c3290cf 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -65,18 +65,18 @@ PUSH_KICK(struct nouveau_pushbuf *push) static inline uint32_t nouveau_screen_transfer_flags(unsigned pipe) { - uint32_t flags = 0; - - if (!(pipe & PIPE_TRANSFER_UNSYNCHRONIZED)) { - if (pipe & PIPE_TRANSFER_READ) - flags |= NOUVEAU_BO_RD; - if (pipe & PIPE_TRANSFER_WRITE) - flags |= NOUVEAU_BO_WR; - if (pipe & PIPE_TRANSFER_DONTBLOCK) - flags |= NOUVEAU_BO_NOBLOCK; - } - - return flags; + uint32_t flags = 0; + + if (!(pipe & PIPE_TRANSFER_UNSYNCHRONIZED)) { + if (pipe & PIPE_TRANSFER_READ) + flags |= NOUVEAU_BO_RD; + if (pipe & PIPE_TRANSFER_WRITE) + flags |= NOUVEAU_BO_WR; + if (pipe & PIPE_TRANSFER_DONTBLOCK) + flags |= NOUVEAU_BO_NOBLOCK; + } + + return flags; } extern struct pipe_screen * diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c index 03301649e38..bdecb0a32b3 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c @@ -172,6 +172,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h index 69c121274a9..fb74a9748a3 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h @@ -16,6 +16,7 @@ #include "nv50/nv50_program.h" #include "nv50/nv50_resource.h" #include "nv50/nv50_transfer.h" +#include "nv50/nv50_query.h" #include "nouveau_context.h" #include "nouveau_debug.h" @@ -195,17 +196,6 @@ void nv50_default_kick_notify(struct nouveau_pushbuf *); /* nv50_draw.c */ extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *); -/* nv50_query.c */ -void nv50_init_query_functions(struct nv50_context *); -void nv50_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t method, - struct pipe_query *, unsigned result_offset); -void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *); -void nva0_so_target_save_offset(struct pipe_context *, - struct pipe_stream_output_target *, - unsigned index, bool seralize); - -#define NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0) - /* nv50_shader_state.c */ void nv50_vertprog_validate(struct nv50_context *); void nv50_gmtyprog_validate(struct nv50_context *); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c index eff4477472c..299629b6438 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c @@ -336,7 +336,6 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset) info->io.ucpCBSlot = 15; info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET; info->io.genUserClip = prog->vp.clpd_nr; - info->io.sampleInterp = prog->fp.sample_interp; info->io.resInfoCBSlot = 15; info->io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET; @@ -374,6 +373,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset) prog->code = info->bin.code; prog->code_size = info->bin.codeSize; prog->fixups = info->bin.relocData; + prog->interps = info->bin.interpData; prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1); prog->tls_space = info->bin.tlsSpace; @@ -420,8 +420,8 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog) switch (prog->type) { case PIPE_SHADER_VERTEX: heap = nv50->screen->vp_code_heap; break; - case PIPE_SHADER_GEOMETRY: heap = nv50->screen->fp_code_heap; break; - case PIPE_SHADER_FRAGMENT: heap = nv50->screen->gp_code_heap; break; + case PIPE_SHADER_GEOMETRY: heap = nv50->screen->gp_code_heap; break; + case PIPE_SHADER_FRAGMENT: heap = nv50->screen->fp_code_heap; break; default: assert(!"invalid program type"); return false; @@ -456,6 +456,10 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog) if (prog->fixups) nv50_ir_relocate_code(prog->fixups, prog->code, prog->code_base, 0, 0); + if (prog->interps) + nv50_ir_change_interp(prog->interps, prog->code, + prog->fp.force_persample_interp, + false /* flatshade */); nv50_sifc_linear_u8(&nv50->base, nv50->screen->code, (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base, diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h index f4e8e9402ca..24cc96567d7 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h @@ -86,7 +86,7 @@ struct nv50_program { uint32_t interp; /* 0x1988 */ uint32_t colors; /* 0x1904 */ uint8_t has_samplemask; - uint8_t sample_interp; + uint8_t force_persample_interp; } fp; struct { @@ -99,6 +99,7 @@ struct nv50_program { } gp; void *fixups; /* relocation records */ + void *interps; /* interpolation records */ struct nouveau_heap *mem; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index 5368ee73750..dd9b85b7208 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -25,356 +25,46 @@ #define NV50_PUSH_EXPLICIT_SPACE_CHECKING #include "nv50/nv50_context.h" -#include "nv_object.xml.h" - -#define NV50_QUERY_STATE_READY 0 -#define NV50_QUERY_STATE_ACTIVE 1 -#define NV50_QUERY_STATE_ENDED 2 -#define NV50_QUERY_STATE_FLUSHED 3 - -/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts - * (since we use only a single GPU channel per screen) will not work properly. - * - * The first is not that big of an issue because OpenGL does not allow nested - * queries anyway. - */ - -struct nv50_query { - uint32_t *data; - uint16_t type; - uint16_t index; - uint32_t sequence; - struct nouveau_bo *bo; - uint32_t base; - uint32_t offset; /* base + i * 32 */ - uint8_t state; - bool is64bit; - int nesting; /* only used for occlusion queries */ - struct nouveau_mm_allocation *mm; - struct nouveau_fence *fence; -}; - -#define NV50_QUERY_ALLOC_SPACE 256 - -static inline struct nv50_query * -nv50_query(struct pipe_query *pipe) -{ - return (struct nv50_query *)pipe; -} - -static bool -nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size) -{ - struct nv50_screen *screen = nv50->screen; - int ret; - - if (q->bo) { - nouveau_bo_ref(NULL, &q->bo); - if (q->mm) { - if (q->state == NV50_QUERY_STATE_READY) - nouveau_mm_free(q->mm); - else - nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, - q->mm); - } - } - if (size) { - q->mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base); - if (!q->bo) - return false; - q->offset = q->base; - - ret = nouveau_bo_map(q->bo, 0, screen->base.client); - if (ret) { - nv50_query_allocate(nv50, q, 0); - return false; - } - q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base); - } - return true; -} - -static void -nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) -{ - nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0); - nouveau_fence_ref(NULL, &nv50_query(pq)->fence); - FREE(nv50_query(pq)); -} +#include "nv50/nv50_query.h" +#include "nv50/nv50_query_hw.h" static struct pipe_query * -nv50_query_create(struct pipe_context *pipe, unsigned type, unsigned index) +nv50_create_query(struct pipe_context *pipe, unsigned type, unsigned index) { struct nv50_context *nv50 = nv50_context(pipe); struct nv50_query *q; - q = CALLOC_STRUCT(nv50_query); - if (!q) - return NULL; - - if (!nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE)) { - FREE(q); - return NULL; - } - - q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED || - type == PIPE_QUERY_PRIMITIVES_EMITTED || - type == PIPE_QUERY_SO_STATISTICS || - type == PIPE_QUERY_PIPELINE_STATISTICS); - q->type = type; - - if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { - q->offset -= 32; - q->data -= 32 / sizeof(*q->data); /* we advance before query_begin ! */ - } - + q = nv50_hw_create_query(nv50, type, index); return (struct pipe_query *)q; } static void -nv50_query_get(struct nouveau_pushbuf *push, struct nv50_query *q, - unsigned offset, uint32_t get) +nv50_destroy_query(struct pipe_context *pipe, struct pipe_query *pq) { - offset += q->offset; - - PUSH_SPACE(push, 5); - PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR); - BEGIN_NV04(push, NV50_3D(QUERY_ADDRESS_HIGH), 4); - PUSH_DATAh(push, q->bo->offset + offset); - PUSH_DATA (push, q->bo->offset + offset); - PUSH_DATA (push, q->sequence); - PUSH_DATA (push, get); + struct nv50_query *q = nv50_query(pq); + q->funcs->destroy_query(nv50_context(pipe), q); } static boolean -nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) +nv50_begin_query(struct pipe_context *pipe, struct pipe_query *pq) { - struct nv50_context *nv50 = nv50_context(pipe); - struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_query *q = nv50_query(pq); - - /* For occlusion queries we have to change the storage, because a previous - * query might set the initial render conition to false even *after* we re- - * initialized it to true. - */ - if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { - q->offset += 32; - q->data += 32 / sizeof(*q->data); - if (q->offset - q->base == NV50_QUERY_ALLOC_SPACE) - nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE); - - /* XXX: can we do this with the GPU, and sync with respect to a previous - * query ? - */ - q->data[0] = q->sequence; /* initialize sequence */ - q->data[1] = 1; /* initial render condition = true */ - q->data[4] = q->sequence + 1; /* for comparison COND_MODE */ - q->data[5] = 0; - } - if (!q->is64bit) - q->data[0] = q->sequence++; /* the previously used one */ - - switch (q->type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - q->nesting = nv50->screen->num_occlusion_queries_active++; - if (q->nesting) { - nv50_query_get(push, q, 0x10, 0x0100f002); - } else { - PUSH_SPACE(push, 4); - BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1); - PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT); - BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); - PUSH_DATA (push, 1); - } - break; - case PIPE_QUERY_PRIMITIVES_GENERATED: - nv50_query_get(push, q, 0x10, 0x06805002); - break; - case PIPE_QUERY_PRIMITIVES_EMITTED: - nv50_query_get(push, q, 0x10, 0x05805002); - break; - case PIPE_QUERY_SO_STATISTICS: - nv50_query_get(push, q, 0x20, 0x05805002); - nv50_query_get(push, q, 0x30, 0x06805002); - break; - case PIPE_QUERY_PIPELINE_STATISTICS: - nv50_query_get(push, q, 0x80, 0x00801002); /* VFETCH, VERTICES */ - nv50_query_get(push, q, 0x90, 0x01801002); /* VFETCH, PRIMS */ - nv50_query_get(push, q, 0xa0, 0x02802002); /* VP, LAUNCHES */ - nv50_query_get(push, q, 0xb0, 0x03806002); /* GP, LAUNCHES */ - nv50_query_get(push, q, 0xc0, 0x04806002); /* GP, PRIMS_OUT */ - nv50_query_get(push, q, 0xd0, 0x07804002); /* RAST, PRIMS_IN */ - nv50_query_get(push, q, 0xe0, 0x08804002); /* RAST, PRIMS_OUT */ - nv50_query_get(push, q, 0xf0, 0x0980a002); /* ROP, PIXELS */ - break; - case PIPE_QUERY_TIME_ELAPSED: - nv50_query_get(push, q, 0x10, 0x00005002); - break; - default: - break; - } - q->state = NV50_QUERY_STATE_ACTIVE; - return true; + return q->funcs->begin_query(nv50_context(pipe), q); } static void -nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq) +nv50_end_query(struct pipe_context *pipe, struct pipe_query *pq) { - struct nv50_context *nv50 = nv50_context(pipe); - struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_query *q = nv50_query(pq); - - q->state = NV50_QUERY_STATE_ENDED; - - switch (q->type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - nv50_query_get(push, q, 0, 0x0100f002); - if (--nv50->screen->num_occlusion_queries_active == 0) { - PUSH_SPACE(push, 2); - BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); - PUSH_DATA (push, 0); - } - break; - case PIPE_QUERY_PRIMITIVES_GENERATED: - nv50_query_get(push, q, 0, 0x06805002); - break; - case PIPE_QUERY_PRIMITIVES_EMITTED: - nv50_query_get(push, q, 0, 0x05805002); - break; - case PIPE_QUERY_SO_STATISTICS: - nv50_query_get(push, q, 0x00, 0x05805002); - nv50_query_get(push, q, 0x10, 0x06805002); - break; - case PIPE_QUERY_PIPELINE_STATISTICS: - nv50_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */ - nv50_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */ - nv50_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */ - nv50_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */ - nv50_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */ - nv50_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */ - nv50_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */ - nv50_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */ - break; - case PIPE_QUERY_TIMESTAMP: - q->sequence++; - /* fall through */ - case PIPE_QUERY_TIME_ELAPSED: - nv50_query_get(push, q, 0, 0x00005002); - break; - case PIPE_QUERY_GPU_FINISHED: - q->sequence++; - nv50_query_get(push, q, 0, 0x1000f010); - break; - case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: - q->sequence++; - nv50_query_get(push, q, 0, 0x0d005002 | (q->index << 5)); - break; - case PIPE_QUERY_TIMESTAMP_DISJOINT: - /* This query is not issued on GPU because disjoint is forced to false */ - q->state = NV50_QUERY_STATE_READY; - break; - default: - assert(0); - break; - } - - if (q->is64bit) - nouveau_fence_ref(nv50->screen->base.fence.current, &q->fence); -} - -static inline void -nv50_query_update(struct nv50_query *q) -{ - if (q->is64bit) { - if (nouveau_fence_signalled(q->fence)) - q->state = NV50_QUERY_STATE_READY; - } else { - if (q->data[0] == q->sequence) - q->state = NV50_QUERY_STATE_READY; - } + q->funcs->end_query(nv50_context(pipe), q); } static boolean -nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, - boolean wait, union pipe_query_result *result) +nv50_get_query_result(struct pipe_context *pipe, struct pipe_query *pq, + boolean wait, union pipe_query_result *result) { - struct nv50_context *nv50 = nv50_context(pipe); struct nv50_query *q = nv50_query(pq); - uint64_t *res64 = (uint64_t *)result; - uint32_t *res32 = (uint32_t *)result; - uint8_t *res8 = (uint8_t *)result; - uint64_t *data64 = (uint64_t *)q->data; - int i; - - if (q->state != NV50_QUERY_STATE_READY) - nv50_query_update(q); - - if (q->state != NV50_QUERY_STATE_READY) { - if (!wait) { - /* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */ - if (q->state != NV50_QUERY_STATE_FLUSHED) { - q->state = NV50_QUERY_STATE_FLUSHED; - PUSH_KICK(nv50->base.pushbuf); - } - return false; - } - if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nv50->screen->base.client)) - return false; - } - q->state = NV50_QUERY_STATE_READY; - - switch (q->type) { - case PIPE_QUERY_GPU_FINISHED: - res8[0] = true; - break; - case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */ - res64[0] = q->data[1] - q->data[5]; - break; - case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ - case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */ - res64[0] = data64[0] - data64[2]; - break; - case PIPE_QUERY_SO_STATISTICS: - res64[0] = data64[0] - data64[4]; - res64[1] = data64[2] - data64[6]; - break; - case PIPE_QUERY_PIPELINE_STATISTICS: - for (i = 0; i < 8; ++i) - res64[i] = data64[i * 2] - data64[16 + i * 2]; - break; - case PIPE_QUERY_TIMESTAMP: - res64[0] = data64[1]; - break; - case PIPE_QUERY_TIMESTAMP_DISJOINT: - res64[0] = 1000000000; - res8[8] = false; - break; - case PIPE_QUERY_TIME_ELAPSED: - res64[0] = data64[1] - data64[3]; - break; - case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: - res32[0] = q->data[1]; - break; - default: - return false; - } - - return true; -} - -void -nv84_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq) -{ - struct nv50_query *q = nv50_query(pq); - unsigned offset = q->offset; - - PUSH_SPACE(push, 5); - PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); - BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4); - PUSH_DATAh(push, q->bo->offset + offset); - PUSH_DATA (push, q->bo->offset + offset); - PUSH_DATA (push, q->sequence); - PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL); + return q->funcs->get_query_result(nv50_context(pipe), q, wait, result); } static void @@ -384,7 +74,8 @@ nv50_render_condition(struct pipe_context *pipe, { struct nv50_context *nv50 = nv50_context(pipe); struct nouveau_pushbuf *push = nv50->base.pushbuf; - struct nv50_query *q; + struct nv50_query *q = nv50_query(pq); + struct nv50_hw_query *hq = nv50_hw_query(q); uint32_t cond; bool wait = mode != PIPE_RENDER_COND_NO_WAIT && @@ -394,7 +85,6 @@ nv50_render_condition(struct pipe_context *pipe, cond = NV50_3D_COND_MODE_ALWAYS; } else { - q = nv50_query(pq); /* NOTE: comparison of 2 queries only works if both have completed */ switch (q->type) { case PIPE_QUERY_SO_OVERFLOW_PREDICATE: @@ -405,7 +95,7 @@ nv50_render_condition(struct pipe_context *pipe, case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: if (likely(!condition)) { - if (unlikely(q->nesting)) + if (unlikely(hq->nesting)) cond = wait ? NV50_3D_COND_MODE_NOT_EQUAL : NV50_3D_COND_MODE_ALWAYS; else @@ -440,48 +130,15 @@ nv50_render_condition(struct pipe_context *pipe, PUSH_DATA (push, 0); } - PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); BEGIN_NV04(push, NV50_3D(COND_ADDRESS_HIGH), 3); - PUSH_DATAh(push, q->bo->offset + q->offset); - PUSH_DATA (push, q->bo->offset + q->offset); + PUSH_DATAh(push, hq->bo->offset + hq->offset); + PUSH_DATA (push, hq->bo->offset + hq->offset); PUSH_DATA (push, cond); BEGIN_NV04(push, NV50_2D(COND_ADDRESS_HIGH), 2); - PUSH_DATAh(push, q->bo->offset + q->offset); - PUSH_DATA (push, q->bo->offset + q->offset); -} - -void -nv50_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method, - struct pipe_query *pq, unsigned result_offset) -{ - struct nv50_query *q = nv50_query(pq); - - nv50_query_update(q); - if (q->state != NV50_QUERY_STATE_READY) - nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, push->client); - q->state = NV50_QUERY_STATE_READY; - - BEGIN_NV04(push, SUBC_3D(method), 1); - PUSH_DATA (push, q->data[result_offset / 4]); -} - -void -nva0_so_target_save_offset(struct pipe_context *pipe, - struct pipe_stream_output_target *ptarg, - unsigned index, bool serialize) -{ - struct nv50_so_target *targ = nv50_so_target(ptarg); - - if (serialize) { - struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf; - PUSH_SPACE(push, 2); - BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1); - PUSH_DATA (push, 0); - } - - nv50_query(targ->pq)->index = index; - nv50_query_end(pipe, targ->pq); + PUSH_DATAh(push, hq->bo->offset + hq->offset); + PUSH_DATA (push, hq->bo->offset + hq->offset); } void @@ -489,10 +146,10 @@ nv50_init_query_functions(struct nv50_context *nv50) { struct pipe_context *pipe = &nv50->base.pipe; - pipe->create_query = nv50_query_create; - pipe->destroy_query = nv50_query_destroy; - pipe->begin_query = nv50_query_begin; - pipe->end_query = nv50_query_end; - pipe->get_query_result = nv50_query_result; + pipe->create_query = nv50_create_query; + pipe->destroy_query = nv50_destroy_query; + pipe->begin_query = nv50_begin_query; + pipe->end_query = nv50_end_query; + pipe->get_query_result = nv50_get_query_result; pipe->render_condition = nv50_render_condition; } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.h b/src/gallium/drivers/nouveau/nv50/nv50_query.h new file mode 100644 index 00000000000..d990285c857 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.h @@ -0,0 +1,33 @@ +#ifndef __NV50_QUERY_H__ +#define __NV50_QUERY_H__ + +#include "pipe/p_context.h" + +#include "nouveau_context.h" + +struct nv50_context; +struct nv50_query; + +struct nv50_query_funcs { + void (*destroy_query)(struct nv50_context *, struct nv50_query *); + boolean (*begin_query)(struct nv50_context *, struct nv50_query *); + void (*end_query)(struct nv50_context *, struct nv50_query *); + boolean (*get_query_result)(struct nv50_context *, struct nv50_query *, + boolean, union pipe_query_result *); +}; + +struct nv50_query { + const struct nv50_query_funcs *funcs; + uint16_t type; + uint16_t index; +}; + +static inline struct nv50_query * +nv50_query(struct pipe_query *pipe) +{ + return (struct nv50_query *)pipe; +} + +void nv50_init_query_functions(struct nv50_context *); + +#endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c new file mode 100644 index 00000000000..945ce7abe50 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c @@ -0,0 +1,406 @@ +/* + * Copyright 2011 Christoph Bumiller + * Copyright 2015 Samuel Pitoiset + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#define NV50_PUSH_EXPLICIT_SPACE_CHECKING + +#include "nv50/nv50_context.h" +#include "nv50/nv50_query_hw.h" +#include "nv_object.xml.h" + +#define NV50_HW_QUERY_STATE_READY 0 +#define NV50_HW_QUERY_STATE_ACTIVE 1 +#define NV50_HW_QUERY_STATE_ENDED 2 +#define NV50_HW_QUERY_STATE_FLUSHED 3 + +/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts + * (since we use only a single GPU channel per screen) will not work properly. + * + * The first is not that big of an issue because OpenGL does not allow nested + * queries anyway. + */ + +#define NV50_HW_QUERY_ALLOC_SPACE 256 + +static bool +nv50_hw_query_allocate(struct nv50_context *nv50, struct nv50_query *q, + int size) +{ + struct nv50_screen *screen = nv50->screen; + struct nv50_hw_query *hq = nv50_hw_query(q); + int ret; + + if (hq->bo) { + nouveau_bo_ref(NULL, &hq->bo); + if (hq->mm) { + if (hq->state == NV50_HW_QUERY_STATE_READY) + nouveau_mm_free(hq->mm); + else + nouveau_fence_work(screen->base.fence.current, + nouveau_mm_free_work, hq->mm); + } + } + if (size) { + hq->mm = nouveau_mm_allocate(screen->base.mm_GART, size, + &hq->bo, &hq->base_offset); + if (!hq->bo) + return false; + hq->offset = hq->base_offset; + + ret = nouveau_bo_map(hq->bo, 0, screen->base.client); + if (ret) { + nv50_hw_query_allocate(nv50, q, 0); + return false; + } + hq->data = (uint32_t *)((uint8_t *)hq->bo->map + hq->base_offset); + } + return true; +} + +static void +nv50_hw_query_get(struct nouveau_pushbuf *push, struct nv50_query *q, + unsigned offset, uint32_t get) +{ + struct nv50_hw_query *hq = nv50_hw_query(q); + + offset += hq->offset; + + PUSH_SPACE(push, 5); + PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR); + BEGIN_NV04(push, NV50_3D(QUERY_ADDRESS_HIGH), 4); + PUSH_DATAh(push, hq->bo->offset + offset); + PUSH_DATA (push, hq->bo->offset + offset); + PUSH_DATA (push, hq->sequence); + PUSH_DATA (push, get); +} + +static inline void +nv50_hw_query_update(struct nv50_query *q) +{ + struct nv50_hw_query *hq = nv50_hw_query(q); + + if (hq->is64bit) { + if (nouveau_fence_signalled(hq->fence)) + hq->state = NV50_HW_QUERY_STATE_READY; + } else { + if (hq->data[0] == hq->sequence) + hq->state = NV50_HW_QUERY_STATE_READY; + } +} + +static void +nv50_hw_destroy_query(struct nv50_context *nv50, struct nv50_query *q) +{ + struct nv50_hw_query *hq = nv50_hw_query(q); + nv50_hw_query_allocate(nv50, q, 0); + nouveau_fence_ref(NULL, &hq->fence); + FREE(hq); +} + +static boolean +nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_hw_query *hq = nv50_hw_query(q); + + /* For occlusion queries we have to change the storage, because a previous + * query might set the initial render condition to false even *after* we re- + * initialized it to true. + */ + if (hq->rotate) { + hq->offset += hq->rotate; + hq->data += hq->rotate / sizeof(*hq->data); + if (hq->offset - hq->base_offset == NV50_HW_QUERY_ALLOC_SPACE) + nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE); + + /* XXX: can we do this with the GPU, and sync with respect to a previous + * query ? + */ + hq->data[0] = hq->sequence; /* initialize sequence */ + hq->data[1] = 1; /* initial render condition = true */ + hq->data[4] = hq->sequence + 1; /* for comparison COND_MODE */ + hq->data[5] = 0; + } + if (!hq->is64bit) + hq->data[0] = hq->sequence++; /* the previously used one */ + + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + hq->nesting = nv50->screen->num_occlusion_queries_active++; + if (hq->nesting) { + nv50_hw_query_get(push, q, 0x10, 0x0100f002); + } else { + PUSH_SPACE(push, 4); + BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1); + PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT); + BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); + PUSH_DATA (push, 1); + } + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + nv50_hw_query_get(push, q, 0x10, 0x06805002); + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + nv50_hw_query_get(push, q, 0x10, 0x05805002); + break; + case PIPE_QUERY_SO_STATISTICS: + nv50_hw_query_get(push, q, 0x20, 0x05805002); + nv50_hw_query_get(push, q, 0x30, 0x06805002); + break; + case PIPE_QUERY_PIPELINE_STATISTICS: + nv50_hw_query_get(push, q, 0x80, 0x00801002); /* VFETCH, VERTICES */ + nv50_hw_query_get(push, q, 0x90, 0x01801002); /* VFETCH, PRIMS */ + nv50_hw_query_get(push, q, 0xa0, 0x02802002); /* VP, LAUNCHES */ + nv50_hw_query_get(push, q, 0xb0, 0x03806002); /* GP, LAUNCHES */ + nv50_hw_query_get(push, q, 0xc0, 0x04806002); /* GP, PRIMS_OUT */ + nv50_hw_query_get(push, q, 0xd0, 0x07804002); /* RAST, PRIMS_IN */ + nv50_hw_query_get(push, q, 0xe0, 0x08804002); /* RAST, PRIMS_OUT */ + nv50_hw_query_get(push, q, 0xf0, 0x0980a002); /* ROP, PIXELS */ + break; + case PIPE_QUERY_TIME_ELAPSED: + nv50_hw_query_get(push, q, 0x10, 0x00005002); + break; + default: + assert(0); + return false; + } + hq->state = NV50_HW_QUERY_STATE_ACTIVE; + return true; +} + +static void +nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_hw_query *hq = nv50_hw_query(q); + + hq->state = NV50_HW_QUERY_STATE_ENDED; + + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + nv50_hw_query_get(push, q, 0, 0x0100f002); + if (--nv50->screen->num_occlusion_queries_active == 0) { + PUSH_SPACE(push, 2); + BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); + PUSH_DATA (push, 0); + } + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + nv50_hw_query_get(push, q, 0, 0x06805002); + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + nv50_hw_query_get(push, q, 0, 0x05805002); + break; + case PIPE_QUERY_SO_STATISTICS: + nv50_hw_query_get(push, q, 0x00, 0x05805002); + nv50_hw_query_get(push, q, 0x10, 0x06805002); + break; + case PIPE_QUERY_PIPELINE_STATISTICS: + nv50_hw_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */ + nv50_hw_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */ + nv50_hw_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */ + nv50_hw_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */ + nv50_hw_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */ + nv50_hw_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */ + nv50_hw_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */ + nv50_hw_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */ + break; + case PIPE_QUERY_TIMESTAMP: + hq->sequence++; + /* fall through */ + case PIPE_QUERY_TIME_ELAPSED: + nv50_hw_query_get(push, q, 0, 0x00005002); + break; + case PIPE_QUERY_GPU_FINISHED: + hq->sequence++; + nv50_hw_query_get(push, q, 0, 0x1000f010); + break; + case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: + hq->sequence++; + nv50_hw_query_get(push, q, 0, 0x0d005002 | (q->index << 5)); + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: + /* This query is not issued on GPU because disjoint is forced to false */ + hq->state = NV50_HW_QUERY_STATE_READY; + break; + default: + assert(0); + break; + } + if (hq->is64bit) + nouveau_fence_ref(nv50->screen->base.fence.current, &hq->fence); +} + +static boolean +nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q, + boolean wait, union pipe_query_result *result) +{ + struct nv50_hw_query *hq = nv50_hw_query(q); + uint64_t *res64 = (uint64_t *)result; + uint32_t *res32 = (uint32_t *)result; + uint8_t *res8 = (uint8_t *)result; + uint64_t *data64 = (uint64_t *)hq->data; + int i; + + if (hq->state != NV50_HW_QUERY_STATE_READY) + nv50_hw_query_update(q); + + if (hq->state != NV50_HW_QUERY_STATE_READY) { + if (!wait) { + /* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */ + if (hq->state != NV50_HW_QUERY_STATE_FLUSHED) { + hq->state = NV50_HW_QUERY_STATE_FLUSHED; + PUSH_KICK(nv50->base.pushbuf); + } + return false; + } + if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nv50->screen->base.client)) + return false; + } + hq->state = NV50_HW_QUERY_STATE_READY; + + switch (q->type) { + case PIPE_QUERY_GPU_FINISHED: + res8[0] = true; + break; + case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */ + res64[0] = hq->data[1] - hq->data[5]; + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ + case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */ + res64[0] = data64[0] - data64[2]; + break; + case PIPE_QUERY_SO_STATISTICS: + res64[0] = data64[0] - data64[4]; + res64[1] = data64[2] - data64[6]; + break; + case PIPE_QUERY_PIPELINE_STATISTICS: + for (i = 0; i < 8; ++i) + res64[i] = data64[i * 2] - data64[16 + i * 2]; + break; + case PIPE_QUERY_TIMESTAMP: + res64[0] = data64[1]; + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: + res64[0] = 1000000000; + res8[8] = false; + break; + case PIPE_QUERY_TIME_ELAPSED: + res64[0] = data64[1] - data64[3]; + break; + case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: + res32[0] = hq->data[1]; + break; + default: + assert(0); + return false; + } + + return true; +} + +static const struct nv50_query_funcs hw_query_funcs = { + .destroy_query = nv50_hw_destroy_query, + .begin_query = nv50_hw_begin_query, + .end_query = nv50_hw_end_query, + .get_query_result = nv50_hw_get_query_result, +}; + +struct nv50_query * +nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index) +{ + struct nv50_hw_query *hq; + struct nv50_query *q; + + hq = CALLOC_STRUCT(nv50_hw_query); + if (!hq) + return NULL; + + q = &hq->base; + q->funcs = &hw_query_funcs; + q->type = type; + + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + hq->rotate = 32; + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + case PIPE_QUERY_PRIMITIVES_EMITTED: + case PIPE_QUERY_SO_STATISTICS: + case PIPE_QUERY_PIPELINE_STATISTICS: + hq->is64bit = true; + break; + case PIPE_QUERY_TIME_ELAPSED: + case PIPE_QUERY_TIMESTAMP: + case PIPE_QUERY_TIMESTAMP_DISJOINT: + case PIPE_QUERY_GPU_FINISHED: + case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: + break; + default: + debug_printf("invalid query type: %u\n", type); + FREE(q); + return NULL; + } + + if (!nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE)) { + FREE(hq); + return NULL; + } + + if (hq->rotate) { + /* we advance before query_begin ! */ + hq->offset -= hq->rotate; + hq->data -= hq->rotate / sizeof(*hq->data); + } + + return q; +} + +void +nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method, + struct nv50_query *q, unsigned result_offset) +{ + struct nv50_hw_query *hq = nv50_hw_query(q); + + nv50_hw_query_update(q); + if (hq->state != NV50_HW_QUERY_STATE_READY) + nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, push->client); + hq->state = NV50_HW_QUERY_STATE_READY; + + BEGIN_NV04(push, SUBC_3D(method), 1); + PUSH_DATA (push, hq->data[result_offset / 4]); +} + +void +nv84_hw_query_fifo_wait(struct nouveau_pushbuf *push, struct nv50_query *q) +{ + struct nv50_hw_query *hq = nv50_hw_query(q); + unsigned offset = hq->offset; + + PUSH_SPACE(push, 5); + PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4); + PUSH_DATAh(push, hq->bo->offset + offset); + PUSH_DATA (push, hq->bo->offset + offset); + PUSH_DATA (push, hq->sequence); + PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL); +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h new file mode 100644 index 00000000000..294c67de9a4 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h @@ -0,0 +1,40 @@ +#ifndef __NV50_QUERY_HW_H__ +#define __NV50_QUERY_HW_H__ + +#include "nouveau_fence.h" +#include "nouveau_mm.h" + +#include "nv50_query.h" + +#define NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0) + +struct nv50_hw_query { + struct nv50_query base; + uint32_t *data; + uint32_t sequence; + struct nouveau_bo *bo; + uint32_t base_offset; + uint32_t offset; /* base + i * rotate */ + uint8_t state; + bool is64bit; + uint8_t rotate; + int nesting; /* only used for occlusion queries */ + struct nouveau_mm_allocation *mm; + struct nouveau_fence *fence; +}; + +static inline struct nv50_hw_query * +nv50_hw_query(struct nv50_query *q) +{ + return (struct nv50_hw_query *)q; +} + +struct nv50_query * +nv50_hw_create_query(struct nv50_context *, unsigned, unsigned); +void +nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t, + struct nv50_query *, unsigned); +void +nv84_hw_query_fifo_wait(struct nouveau_pushbuf *, struct nv50_query *); + +#endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_resource.c b/src/gallium/drivers/nouveau/nv50/nv50_resource.c index d289b4a24e8..325c19fb80c 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_resource.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_resource.c @@ -32,8 +32,8 @@ nv50_resource_from_handle(struct pipe_screen * screen, struct pipe_surface * nv50_surface_from_buffer(struct pipe_context *pipe, - struct pipe_resource *pbuf, - const struct pipe_surface *templ) + struct pipe_resource *pbuf, + const struct pipe_surface *templ) { struct nv50_surface *sf = CALLOC_STRUCT(nv50_surface); if (!sf) @@ -65,8 +65,8 @@ nv50_surface_from_buffer(struct pipe_context *pipe, static struct pipe_surface * nv50_surface_create(struct pipe_context *pipe, - struct pipe_resource *pres, - const struct pipe_surface *templ) + struct pipe_resource *pres, + const struct pipe_surface *templ) { /* surfaces are assumed to be miptrees all over the place. */ assert(pres->target != PIPE_BUFFER); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index ec51d00f266..a9e0c478322 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -180,6 +180,8 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: case PIPE_CAP_DEPTH_BOUNDS_TEST: case PIPE_CAP_TGSI_TXQS: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: + case PIPE_CAP_SHAREABLE_SHADERS: return 1; case PIPE_CAP_SEAMLESS_CUBE_MAP: return 1; /* class_3d >= NVA0_3D_CLASS; */ @@ -191,6 +193,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_INDEP_BLEND_FUNC: case PIPE_CAP_TEXTURE_QUERY_LOD: case PIPE_CAP_SAMPLE_SHADING: + case PIPE_CAP_FORCE_PERSAMPLE_INTERP: return class_3d >= NVA3_3D_CLASS; /* unsupported caps */ @@ -215,8 +218,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: - case PIPE_CAP_FORCE_PERSAMPLE_INTERP: - case PIPE_CAP_SHAREABLE_SHADERS: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c index 941555ffbf8..9b911043132 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c @@ -27,6 +27,7 @@ #include "util/u_inlines.h" #include "nv50/nv50_context.h" +#include "nv50/nv50_query_hw.h" void nv50_constbufs_validate(struct nv50_context *nv50) @@ -168,11 +169,23 @@ nv50_fragprog_validate(struct nv50_context *nv50) { struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_program *fp = nv50->fragprog; + struct pipe_rasterizer_state *rast = &nv50->rast->pipe; - fp->fp.sample_interp = nv50->min_samples > 1; + if (fp->fp.force_persample_interp != rast->force_persample_interp) { + /* Force the program to be reuploaded, which will trigger interp fixups + * to get applied + */ + if (fp->mem) + nouveau_heap_free(&fp->mem); + + fp->fp.force_persample_interp = rast->force_persample_interp; + } + + if (fp->mem && !(nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_MIN_SAMPLES))) + return; if (!nv50_program_validate(nv50, fp)) - return; + return; nv50_program_update_context_state(nv50, fp, 1); BEGIN_NV04(push, NV50_3D(FP_REG_ALLOC_TEMP), 1); @@ -629,7 +642,7 @@ nv50_stream_output_validate(struct nv50_context *nv50) const unsigned n = nv50->screen->base.class_3d >= NVA0_3D_CLASS ? 4 : 3; if (n == 4 && !targ->clean) - nv84_query_fifo_wait(push, targ->pq); + nv84_hw_query_fifo_wait(push, nv50_query(targ->pq)); BEGIN_NV04(push, NV50_3D(STRMOUT_ADDRESS_HIGH(i)), n); PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset); PUSH_DATA (push, buf->address + targ->pipe.buffer_offset); @@ -638,8 +651,8 @@ nv50_stream_output_validate(struct nv50_context *nv50) PUSH_DATA(push, targ->pipe.buffer_size); if (!targ->clean) { assert(targ->pq); - nv50_query_pushbuf_submit(push, NVA0_3D_STRMOUT_OFFSET(i), - targ->pq, 0x4); + nv50_hw_query_pushbuf_submit(push, NVA0_3D_STRMOUT_OFFSET(i), + nv50_query(targ->pq), 0x4); } else { BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1); PUSH_DATA(push, 0); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c index 410e6311e60..6c8c9f0b4e6 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c @@ -30,6 +30,7 @@ #include "nv50/nv50_stateobj.h" #include "nv50/nv50_context.h" +#include "nv50/nv50_query_hw.h" #include "nv50/nv50_3d.xml.h" #include "nv50/nv50_texture.xml.h" @@ -725,6 +726,9 @@ nv50_sp_state_create(struct pipe_context *pipe, if (cso->stream_output.num_outputs) prog->pipe.stream_output = cso->stream_output; + prog->translated = nv50_program_translate( + prog, nv50_context(pipe)->screen->base.device->chipset); + return (void *)prog; } @@ -1033,7 +1037,7 @@ nv50_so_target_create(struct pipe_context *pipe, if (nouveau_context(pipe)->screen->class_3d >= NVA0_3D_CLASS) { targ->pq = pipe->create_query(pipe, - NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET, 0); + NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET, 0); if (!targ->pq) { FREE(targ); return NULL; @@ -1057,6 +1061,24 @@ nv50_so_target_create(struct pipe_context *pipe, } static void +nva0_so_target_save_offset(struct pipe_context *pipe, + struct pipe_stream_output_target *ptarg, + unsigned index, bool serialize) +{ + struct nv50_so_target *targ = nv50_so_target(ptarg); + + if (serialize) { + struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf; + PUSH_SPACE(push, 2); + BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1); + PUSH_DATA (push, 0); + } + + nv50_query(targ->pq)->index = index; + pipe->end_query(pipe, targ->pq); +} + +static void nv50_so_target_destroy(struct pipe_context *pipe, struct pipe_stream_output_target *ptarg) { diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c index 66dcf43533b..b6181edf24f 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c @@ -487,7 +487,7 @@ static struct state_validate { { nv50_validate_viewport, NV50_NEW_VIEWPORT }, { nv50_vertprog_validate, NV50_NEW_VERTPROG }, { nv50_gmtyprog_validate, NV50_NEW_GMTYPROG }, - { nv50_fragprog_validate, NV50_NEW_FRAGPROG | + { nv50_fragprog_validate, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER | NV50_NEW_MIN_SAMPLES }, { nv50_fp_linkage_validate, NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG | NV50_NEW_RASTERIZER }, diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c index 64348b3c378..237d76d6adb 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c @@ -220,10 +220,14 @@ nv50_resource_copy_region(struct pipe_context *pipe, nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; if (m2mf) { + struct nv50_miptree *src_mt = nv50_miptree(src); + struct nv50_miptree *dst_mt = nv50_miptree(dst); struct nv50_m2mf_rect drect, srect; unsigned i; - unsigned nx = util_format_get_nblocksx(src->format, src_box->width); - unsigned ny = util_format_get_nblocksy(src->format, src_box->height); + unsigned nx = util_format_get_nblocksx(src->format, src_box->width) + << src_mt->ms_x; + unsigned ny = util_format_get_nblocksy(src->format, src_box->height) + << src_mt->ms_y; nv50_m2mf_rect_setup(&drect, dst, dst_level, dstx, dsty, dstz); nv50_m2mf_rect_setup(&srect, src, src_level, @@ -232,15 +236,15 @@ nv50_resource_copy_region(struct pipe_context *pipe, for (i = 0; i < src_box->depth; ++i) { nv50_m2mf_transfer_rect(nv50, &drect, &srect, nx, ny); - if (nv50_miptree(dst)->layout_3d) + if (dst_mt->layout_3d) drect.z++; else - drect.base += nv50_miptree(dst)->layer_stride; + drect.base += dst_mt->layer_stride; - if (nv50_miptree(src)->layout_3d) + if (src_mt->layout_3d) srect.z++; else - srect.base += nv50_miptree(src)->layer_stride; + srect.base += src_mt->layer_stride; } return; } @@ -270,7 +274,7 @@ nv50_resource_copy_region(struct pipe_context *pipe, static void nv50_clear_render_target(struct pipe_context *pipe, struct pipe_surface *dst, - const union pipe_color_union *color, + const union pipe_color_union *color, unsigned dstx, unsigned dsty, unsigned width, unsigned height) { diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c index f5f47087bef..9fa6fceeefa 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c @@ -27,6 +27,7 @@ #include "translate/translate.h" #include "nv50/nv50_context.h" +#include "nv50/nv50_query_hw.h" #include "nv50/nv50_resource.h" #include "nv50/nv50_3d.xml.h" @@ -745,7 +746,8 @@ nva0_draw_stream_output(struct nv50_context *nv50, PUSH_DATA (push, 0); BEGIN_NV04(push, NVA0_3D(DRAW_TFB_STRIDE), 1); PUSH_DATA (push, so->stride); - nv50_query_pushbuf_submit(push, NVA0_3D_DRAW_TFB_BYTES, so->pq, 0x4); + nv50_hw_query_pushbuf_submit(push, NVA0_3D_DRAW_TFB_BYTES, + nv50_query(so->pq), 0x4); BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1); PUSH_DATA (push, 0); diff --git a/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c b/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c index 7780a179399..d13480c21d5 100644 --- a/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c +++ b/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c @@ -27,33 +27,33 @@ static void dump_comm_vp(struct nouveau_vp3_decoder *dec, struct comm *comm, u32 comm_seq, struct nouveau_bo *inter_bo, unsigned slice_size) { - unsigned i, idx = comm->pvp_cur_index & 0xf; - debug_printf("Status: %08x, stage: %08x\n", comm->status_vp[idx], comm->pvp_stage); + unsigned i, idx = comm->pvp_cur_index & 0xf; + debug_printf("Status: %08x, stage: %08x\n", comm->status_vp[idx], comm->pvp_stage); #if 0 - debug_printf("Acked byte ofs: %x, bsp byte ofs: %x\n", comm->acked_byte_ofs, comm->byte_ofs); - debug_printf("Irq/parse indexes: %i %i\n", comm->irq_index, comm->parse_endpos_index); + debug_printf("Acked byte ofs: %x, bsp byte ofs: %x\n", comm->acked_byte_ofs, comm->byte_ofs); + debug_printf("Irq/parse indexes: %i %i\n", comm->irq_index, comm->parse_endpos_index); - for (i = 0; i != comm->irq_index; ++i) - debug_printf("irq[%i] = { @ %08x -> %04x }\n", i, comm->irq_pos[i], comm->irq_470[i]); - for (i = 0; i != comm->parse_endpos_index; ++i) - debug_printf("parse_endpos[%i] = { @ %08x}\n", i, comm->parse_endpos[i]); + for (i = 0; i != comm->irq_index; ++i) + debug_printf("irq[%i] = { @ %08x -> %04x }\n", i, comm->irq_pos[i], comm->irq_470[i]); + for (i = 0; i != comm->parse_endpos_index; ++i) + debug_printf("parse_endpos[%i] = { @ %08x}\n", i, comm->parse_endpos[i]); #endif - debug_printf("mb_y = %u\n", comm->mb_y[idx]); - if (comm->status_vp[idx] <= 1) - return; - - if ((comm->pvp_stage & 0xff) != 0xff) { - unsigned *map; - int ret = nouveau_bo_map(inter_bo, NOUVEAU_BO_RD|NOUVEAU_BO_NOBLOCK, dec->client); - assert(ret >= 0); - map = inter_bo->map; - for (i = 0; i < comm->byte_ofs + slice_size; i += 0x10) { - debug_printf("%05x: %08x %08x %08x %08x\n", i, map[i/4], map[i/4+1], map[i/4+2], map[i/4+3]); - } - munmap(inter_bo->map, inter_bo->size); - inter_bo->map = NULL; - } - assert((comm->pvp_stage & 0xff) == 0xff); + debug_printf("mb_y = %u\n", comm->mb_y[idx]); + if (comm->status_vp[idx] <= 1) + return; + + if ((comm->pvp_stage & 0xff) != 0xff) { + unsigned *map; + int ret = nouveau_bo_map(inter_bo, NOUVEAU_BO_RD|NOUVEAU_BO_NOBLOCK, dec->client); + assert(ret >= 0); + map = inter_bo->map; + for (i = 0; i < comm->byte_ofs + slice_size; i += 0x10) { + debug_printf("%05x: %08x %08x %08x %08x\n", i, map[i/4], map[i/4+1], map[i/4+2], map[i/4+3]); + } + munmap(inter_bo->map, inter_bo->size); + inter_bo->map = NULL; + } + assert((comm->pvp_stage & 0xff) == 0xff); } #endif diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index a168dd684ab..68048f9d6c0 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -252,10 +252,10 @@ nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info) } } - vp->vp.clip_enable = info->io.clipDistanceMask; - for (i = 0; i < 8; ++i) - if (info->io.cullDistanceMask & (1 << i)) - vp->vp.clip_mode |= 1 << (i * 4); + vp->vp.clip_enable = + (1 << (info->io.clipDistances + info->io.cullDistances)) - 1; + for (i = 0; i < info->io.cullDistances; ++i) + vp->vp.clip_mode |= 1 << ((info->io.clipDistances + i) * 4); if (info->io.genUserClip < 0) vp->vp.num_ucps = PIPE_MAX_CLIP_PLANES + 1; /* prevent rebuilding */ @@ -269,8 +269,6 @@ nvc0_vp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info) vp->hdr[0] = 0x20061 | (1 << 10); vp->hdr[4] = 0xff000; - vp->hdr[18] = info->io.clipDistanceMask; - return nvc0_vtgp_gen_header(vp, info); } @@ -424,6 +422,11 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info) for (i = 0; i < info->numInputs; ++i) { m = nvc0_hdr_interp_mode(&info->in[i]); + if (info->in[i].sn == TGSI_SEMANTIC_COLOR) { + fp->fp.colors |= 1 << info->in[i].si; + if (info->in[i].sc) + fp->fp.color_interp[info->in[i].si] = m | (info->in[i].mask << 4); + } for (c = 0; c < 4; ++c) { if (!(info->in[i].mask & (1 << c))) continue; @@ -531,7 +534,6 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset) info->io.genUserClip = prog->vp.num_ucps; info->io.ucpBase = 256; info->io.ucpCBSlot = 15; - info->io.sampleInterp = prog->fp.sample_interp; if (prog->type == PIPE_SHADER_COMPUTE) { if (chipset >= NVISA_GK104_CHIPSET) { @@ -575,6 +577,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset) prog->immd_data = info->immd.buf; prog->immd_size = info->immd.bufSize; prog->relocs = info->bin.relocData; + prog->interps = info->bin.interpData; prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1)); prog->num_barriers = info->numBarriers; @@ -713,6 +716,23 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog) if (prog->relocs) nv50_ir_relocate_code(prog->relocs, prog->code, code_pos, lib_pos, 0); + if (prog->interps) { + nv50_ir_change_interp(prog->interps, prog->code, + prog->fp.force_persample_interp, + prog->fp.flatshade); + for (int i = 0; i < 2; i++) { + unsigned mask = prog->fp.color_interp[i] >> 4; + unsigned interp = prog->fp.color_interp[i] & 3; + if (!mask) + continue; + prog->hdr[14] &= ~(0xff << (8 * i)); + if (prog->fp.flatshade) + interp = NVC0_INTERP_FLAT; + for (int c = 0; c < 4; c++) + if (mask & (1 << c)) + prog->hdr[14] |= interp << (2 * (4 * i + c)); + } + } #ifdef DEBUG if (debug_get_bool_option("NV50_PROG_DEBUG", false)) @@ -773,6 +793,7 @@ nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog) FREE(prog->code); /* may be 0 for hardcoded shaders */ FREE(prog->immd_data); FREE(prog->relocs); + FREE(prog->interps); if (prog->type == PIPE_SHADER_COMPUTE && prog->cp.syms) FREE(prog->cp.syms); if (prog->tfb) { diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h index 390e0c7a4f0..9c45e7b3e31 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h @@ -45,8 +45,10 @@ struct nvc0_program { } vp; struct { uint8_t early_z; - uint8_t in_pos[PIPE_MAX_SHADER_INPUTS]; - uint8_t sample_interp; + uint8_t colors; + uint8_t color_interp[2]; + bool force_persample_interp; + bool flatshade; } fp; struct { uint32_t tess_mode; /* ~0 if defined by the other stage */ @@ -61,6 +63,7 @@ struct nvc0_program { uint8_t num_barriers; void *relocs; + void *interps; struct nvc0_transform_feedback_state *tfb; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c index e4752e2dbc5..f53921092a5 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c @@ -28,6 +28,7 @@ #include "nvc0/nvc0_query.h" #include "nvc0/nvc0_query_sw.h" #include "nvc0/nvc0_query_hw.h" +#include "nvc0/nvc0_query_hw_metric.h" #include "nvc0/nvc0_query_hw_sm.h" static struct pipe_query * @@ -188,7 +189,7 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen, count++; } else if (screen->base.class_3d < NVE4_3D_CLASS) { - count++; + count += 2; } } } @@ -218,6 +219,17 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen, return 1; } } + } else + if (id == NVC0_HW_METRIC_QUERY_GROUP) { + if (screen->compute) { + if (screen->base.class_3d < NVE4_3D_CLASS) { + info->name = "Performance metrics"; + info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU; + info->max_active_queries = 1; + info->num_queries = NVC0_HW_METRIC_QUERY_COUNT; + return 1; + } + } } #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS else if (id == NVC0_SW_QUERY_DRV_STAT_GROUP) { diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.h b/src/gallium/drivers/nouveau/nvc0/nvc0_query.h index 6883ab6ab9d..c46361c31aa 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.h @@ -32,7 +32,8 @@ nvc0_query(struct pipe_query *pipe) * Driver queries groups: */ #define NVC0_HW_SM_QUERY_GROUP 0 -#define NVC0_SW_QUERY_DRV_STAT_GROUP 1 +#define NVC0_HW_METRIC_QUERY_GROUP 1 +#define NVC0_SW_QUERY_DRV_STAT_GROUP 2 void nvc0_init_query_functions(struct nvc0_context *); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c index 25aa09be42a..fb2806a805e 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c @@ -431,7 +431,7 @@ nvc0_hw_metric_get_driver_query_info(struct nvc0_screen *screen, unsigned id, id = nvc0_hw_metric_get_next_query_id(queries, id); info->name = nvc0_hw_metric_names[id]; info->query_type = NVC0_HW_METRIC_QUERY(id); - info->group_id = -1; + info->group_id = NVC0_HW_METRIC_QUERY_GROUP; return 1; } } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c index 12b5a025064..15c803c4307 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c @@ -26,7 +26,8 @@ nvc0_resource_from_handle(struct pipe_screen * screen, } else { struct pipe_resource *res = nv50_miptree_from_handle(screen, templ, whandle); - nv04_resource(res)->vtbl = &nvc0_miptree_vtbl; + if (res) + nv04_resource(res)->vtbl = &nvc0_miptree_vtbl; return res; } } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index af8e5f72670..6ad3980911d 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -179,6 +179,9 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: case PIPE_CAP_DEPTH_BOUNDS_TEST: case PIPE_CAP_TGSI_TXQS: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: + case PIPE_CAP_FORCE_PERSAMPLE_INTERP: + case PIPE_CAP_SHAREABLE_SHADERS: return 1; case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: return (class_3d >= NVE4_3D_CLASS) ? 1 : 0; @@ -201,8 +204,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_VERTEXID_NOBASE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: - case PIPE_CAP_FORCE_PERSAMPLE_INTERP: - case PIPE_CAP_SHAREABLE_SHADERS: return 0; case PIPE_CAP_VENDOR_ID: @@ -352,45 +353,51 @@ static int nvc0_screen_get_compute_param(struct pipe_screen *pscreen, enum pipe_compute_cap param, void *data) { - uint64_t *data64 = (uint64_t *)data; - uint32_t *data32 = (uint32_t *)data; - const uint16_t obj_class = nvc0_screen(pscreen)->compute->oclass; + struct nvc0_screen *screen = nvc0_screen(pscreen); + const uint16_t obj_class = screen->compute->oclass; + +#define RET(x) do { \ + if (data) \ + memcpy(data, x, sizeof(x)); \ + return sizeof(x); \ +} while (0) switch (param) { case PIPE_COMPUTE_CAP_GRID_DIMENSION: - data64[0] = 3; - return 8; + RET((uint64_t []) { 3 }); case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: - data64[0] = (obj_class >= NVE4_COMPUTE_CLASS) ? 0x7fffffff : 65535; - data64[1] = 65535; - data64[2] = 65535; - return 24; + if (obj_class >= NVE4_COMPUTE_CLASS) { + RET(((uint64_t []) { 0x7fffffff, 65535, 65535 })); + } else { + RET(((uint64_t []) { 65535, 65535, 65535 })); + } case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: - data64[0] = 1024; - data64[1] = 1024; - data64[2] = 64; - return 24; + RET(((uint64_t []) { 1024, 1024, 64 })); case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: - data64[0] = 1024; - return 8; + RET((uint64_t []) { 1024 }); case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g[] */ - data64[0] = (uint64_t)1 << 40; - return 8; + RET((uint64_t []) { 1ULL << 40 }); case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */ - data64[0] = 48 << 10; - return 8; + RET((uint64_t []) { 48 << 10 }); case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */ - data64[0] = 512 << 10; - return 8; + RET((uint64_t []) { 512 << 10 }); case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */ - data64[0] = 4096; - return 8; + RET((uint64_t []) { 4096 }); case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: - data32[0] = 32; - return 4; + RET((uint32_t []) { 32 }); + case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: + RET((uint64_t []) { 1ULL << 40 }); + case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: + RET((uint32_t []) { 0 }); + case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: + RET((uint32_t []) { screen->mp_count_compute }); + case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: + RET((uint32_t []) { 512 }); /* FIXME: arbitrary limit */ default: return 0; } + +#undef RET } static void @@ -827,6 +834,8 @@ nvc0_screen_create(struct nouveau_device *dev) PUSH_DATA (push, 1); BEGIN_NVC0(push, NVC0_3D(BLEND_ENABLE_COMMON), 1); PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_3D(SHADE_MODEL), 1); + PUSH_DATA (push, NVC0_3D_SHADE_MODEL_SMOOTH); if (screen->eng3d->oclass < NVE4_3D_CLASS) { BEGIN_NVC0(push, NVC0_3D(TEX_MISC), 1); PUSH_DATA (push, NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h index 857eb0316c7..8b73102b98b 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h @@ -38,6 +38,7 @@ struct nvc0_graph_state { uint32_t constant_elts; int32_t index_bias; uint16_t scissor; + bool flatshade; uint8_t patch_vertices; uint8_t vbo_mode; /* 0 = normal, 1 = translate, 3 = translate, forced */ uint8_t num_vtxbufs; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c index af837fc4a33..8595800592c 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c @@ -107,8 +107,54 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_program *fp = nvc0->fragprog; + struct pipe_rasterizer_state *rast = &nvc0->rast->pipe; - fp->fp.sample_interp = nvc0->min_samples > 1; + if (fp->fp.force_persample_interp != rast->force_persample_interp) { + /* Force the program to be reuploaded, which will trigger interp fixups + * to get applied + */ + if (fp->mem) + nouveau_heap_free(&fp->mem); + + fp->fp.force_persample_interp = rast->force_persample_interp; + } + + /* Shade model works well enough when both colors follow it. However if one + * (or both) is explicitly set, then we have to go the patching route. + */ + bool has_explicit_color = fp->fp.colors && + (((fp->fp.colors & 1) && !fp->fp.color_interp[0]) || + ((fp->fp.colors & 2) && !fp->fp.color_interp[1])); + bool hwflatshade = false; + if (has_explicit_color && fp->fp.flatshade != rast->flatshade) { + /* Force re-upload */ + if (fp->mem) + nouveau_heap_free(&fp->mem); + + fp->fp.flatshade = rast->flatshade; + + /* Always smooth-shade in this mode, the shader will decide on its own + * when to flat-shade. + */ + } else if (!has_explicit_color) { + hwflatshade = rast->flatshade; + + /* No need to binary-patch the shader each time, make sure that it's set + * up for the default behaviour. + */ + fp->fp.flatshade = 0; + } + + if (hwflatshade != nvc0->state.flatshade) { + nvc0->state.flatshade = hwflatshade; + BEGIN_NVC0(push, NVC0_3D(SHADE_MODEL), 1); + PUSH_DATA (push, hwflatshade ? NVC0_3D_SHADE_MODEL_FLAT : + NVC0_3D_SHADE_MODEL_SMOOTH); + } + + if (fp->mem && !(nvc0->dirty & NVC0_NEW_FRAGPROG)) { + return; + } if (!nvc0_program_validate(nvc0, fp)) return; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c index 742bef39247..ba1714da010 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c @@ -212,9 +212,6 @@ nvc0_rasterizer_state_create(struct pipe_context *pipe, * always emit 16 commands, one for each scissor rectangle, here. */ - SB_BEGIN_3D(so, SHADE_MODEL, 1); - SB_DATA (so, cso->flatshade ? NVC0_3D_SHADE_MODEL_FLAT : - NVC0_3D_SHADE_MODEL_SMOOTH); SB_IMMED_3D(so, PROVOKING_VERTEX_LAST, !cso->flatshade_first); SB_IMMED_3D(so, VERTEX_TWO_SIDE_ENABLE, cso->light_twoside); @@ -683,6 +680,9 @@ nvc0_sp_state_create(struct pipe_context *pipe, if (cso->stream_output.num_outputs) prog->pipe.stream_output = cso->stream_output; + prog->translated = nvc0_program_translate( + prog, nvc0_context(pipe)->screen->base.device->chipset); + return (void *)prog; } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c index aec06097bbd..205e7dc6ae9 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c @@ -606,6 +606,9 @@ nvc0_switch_pipe_context(struct nvc0_context *ctx_to) ctx_to->constbuf_dirty[s] = (1 << NVC0_MAX_PIPE_CONSTBUFS) - 1; } + /* Reset tfb as the shader that owns it may have been deleted. */ + ctx_to->state.tfb = NULL; + if (!ctx_to->vertex) ctx_to->dirty &= ~(NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS); if (!ctx_to->idxbuf.buffer) @@ -645,7 +648,7 @@ static struct state_validate { { nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG }, { nvc0_validate_tess_state, NVC0_NEW_TESSFACTOR }, { nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG }, - { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG }, + { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG | NVC0_NEW_RASTERIZER }, { nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA | NVC0_NEW_RASTERIZER }, { nvc0_validate_derived_2, NVC0_NEW_ZSA | NVC0_NEW_FRAMEBUFFER }, diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h index 8bc33c6a0e0..f9680f5a90f 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h @@ -23,7 +23,7 @@ struct nvc0_blend_stateobj { struct nvc0_rasterizer_stateobj { struct pipe_rasterizer_state pipe; int size; - uint32_t state[44]; + uint32_t state[42]; }; struct nvc0_zsa_stateobj { diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c index dbdf292c862..be123349148 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c @@ -225,10 +225,14 @@ nvc0_resource_copy_region(struct pipe_context *pipe, nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; if (m2mf) { + struct nv50_miptree *src_mt = nv50_miptree(src); + struct nv50_miptree *dst_mt = nv50_miptree(dst); struct nv50_m2mf_rect drect, srect; unsigned i; - unsigned nx = util_format_get_nblocksx(src->format, src_box->width); - unsigned ny = util_format_get_nblocksy(src->format, src_box->height); + unsigned nx = util_format_get_nblocksx(src->format, src_box->width) + << src_mt->ms_x; + unsigned ny = util_format_get_nblocksy(src->format, src_box->height) + << src_mt->ms_y; nv50_m2mf_rect_setup(&drect, dst, dst_level, dstx, dsty, dstz); nv50_m2mf_rect_setup(&srect, src, src_level, @@ -237,15 +241,15 @@ nvc0_resource_copy_region(struct pipe_context *pipe, for (i = 0; i < src_box->depth; ++i) { nvc0->m2mf_copy_rect(nvc0, &drect, &srect, nx, ny); - if (nv50_miptree(dst)->layout_3d) + if (dst_mt->layout_3d) drect.z++; else - drect.base += nv50_miptree(dst)->layer_stride; + drect.base += dst_mt->layer_stride; - if (nv50_miptree(src)->layout_3d) + if (src_mt->layout_3d) srect.z++; else - srect.base += nv50_miptree(src)->layer_stride; + srect.base += src_mt->layer_stride; } return; } @@ -493,57 +497,57 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe, unsigned dstx, unsigned dsty, unsigned width, unsigned height) { - struct nvc0_context *nvc0 = nvc0_context(pipe); - struct nouveau_pushbuf *push = nvc0->base.pushbuf; - struct nv50_miptree *mt = nv50_miptree(dst->texture); - struct nv50_surface *sf = nv50_surface(dst); - uint32_t mode = 0; - int unk = mt->base.base.target == PIPE_TEXTURE_2D; - unsigned z; - - if (!PUSH_SPACE(push, 32 + sf->depth)) - return; - - PUSH_REFN (push, mt->base.bo, mt->base.domain | NOUVEAU_BO_WR); - - if (clear_flags & PIPE_CLEAR_DEPTH) { - BEGIN_NVC0(push, NVC0_3D(CLEAR_DEPTH), 1); - PUSH_DATAf(push, depth); - mode |= NVC0_3D_CLEAR_BUFFERS_Z; - } - - if (clear_flags & PIPE_CLEAR_STENCIL) { - BEGIN_NVC0(push, NVC0_3D(CLEAR_STENCIL), 1); - PUSH_DATA (push, stencil & 0xff); - mode |= NVC0_3D_CLEAR_BUFFERS_S; - } - - BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2); - PUSH_DATA (push, ( width << 16) | dstx); - PUSH_DATA (push, (height << 16) | dsty); - - BEGIN_NVC0(push, NVC0_3D(ZETA_ADDRESS_HIGH), 5); - PUSH_DATAh(push, mt->base.address + sf->offset); - PUSH_DATA (push, mt->base.address + sf->offset); - PUSH_DATA (push, nvc0_format_table[dst->format].rt); - PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode); - PUSH_DATA (push, mt->layer_stride >> 2); - BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1); - PUSH_DATA (push, 1); - BEGIN_NVC0(push, NVC0_3D(ZETA_HORIZ), 3); - PUSH_DATA (push, sf->width); - PUSH_DATA (push, sf->height); - PUSH_DATA (push, (unk << 16) | (dst->u.tex.first_layer + sf->depth)); - BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1); - PUSH_DATA (push, dst->u.tex.first_layer); - - BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth); - for (z = 0; z < sf->depth; ++z) { - PUSH_DATA (push, mode | - (z << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT)); - } - - nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nv50_miptree *mt = nv50_miptree(dst->texture); + struct nv50_surface *sf = nv50_surface(dst); + uint32_t mode = 0; + int unk = mt->base.base.target == PIPE_TEXTURE_2D; + unsigned z; + + if (!PUSH_SPACE(push, 32 + sf->depth)) + return; + + PUSH_REFN (push, mt->base.bo, mt->base.domain | NOUVEAU_BO_WR); + + if (clear_flags & PIPE_CLEAR_DEPTH) { + BEGIN_NVC0(push, NVC0_3D(CLEAR_DEPTH), 1); + PUSH_DATAf(push, depth); + mode |= NVC0_3D_CLEAR_BUFFERS_Z; + } + + if (clear_flags & PIPE_CLEAR_STENCIL) { + BEGIN_NVC0(push, NVC0_3D(CLEAR_STENCIL), 1); + PUSH_DATA (push, stencil & 0xff); + mode |= NVC0_3D_CLEAR_BUFFERS_S; + } + + BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2); + PUSH_DATA (push, ( width << 16) | dstx); + PUSH_DATA (push, (height << 16) | dsty); + + BEGIN_NVC0(push, NVC0_3D(ZETA_ADDRESS_HIGH), 5); + PUSH_DATAh(push, mt->base.address + sf->offset); + PUSH_DATA (push, mt->base.address + sf->offset); + PUSH_DATA (push, nvc0_format_table[dst->format].rt); + PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode); + PUSH_DATA (push, mt->layer_stride >> 2); + BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, NVC0_3D(ZETA_HORIZ), 3); + PUSH_DATA (push, sf->width); + PUSH_DATA (push, sf->height); + PUSH_DATA (push, (unk << 16) | (dst->u.tex.first_layer + sf->depth)); + BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1); + PUSH_DATA (push, dst->u.tex.first_layer); + + BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth); + for (z = 0; z < sf->depth; ++z) { + PUSH_DATA (push, mode | + (z << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT)); + } + + nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; } void diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c index 8b23a4887da..9c19ba20a7e 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c @@ -27,6 +27,7 @@ struct push_context { struct { bool enabled; bool value; + uint8_t width; unsigned stride; const uint8_t *data; } edgeflag; @@ -53,6 +54,7 @@ nvc0_push_context_init(struct nvc0_context *nvc0, struct push_context *ctx) /* silence warnings */ ctx->edgeflag.data = NULL; ctx->edgeflag.stride = 0; + ctx->edgeflag.width = 0; } static inline void @@ -100,6 +102,7 @@ nvc0_push_map_edgeflag(struct push_context *ctx, struct nvc0_context *nvc0, struct nv04_resource *buf = nv04_resource(vb->buffer); ctx->edgeflag.stride = vb->stride; + ctx->edgeflag.width = util_format_get_blocksize(ve->src_format); if (buf) { unsigned offset = vb->buffer_offset + ve->src_offset; ctx->edgeflag.data = nouveau_resource_map_offset(&nvc0->base, @@ -137,10 +140,17 @@ prim_restart_search_i32(const uint32_t *elts, unsigned push, uint32_t index) } static inline bool -ef_value(const struct push_context *ctx, uint32_t index) +ef_value_8(const struct push_context *ctx, uint32_t index) { - float *pf = (float *)&ctx->edgeflag.data[index * ctx->edgeflag.stride]; - return *pf ? true : false; + uint8_t *pf = (uint8_t *)&ctx->edgeflag.data[index * ctx->edgeflag.stride]; + return !!*pf; +} + +static inline bool +ef_value_32(const struct push_context *ctx, uint32_t index) +{ + uint32_t *pf = (uint32_t *)&ctx->edgeflag.data[index * ctx->edgeflag.stride]; + return !!*pf; } static inline bool @@ -154,7 +164,11 @@ static inline unsigned ef_toggle_search_i08(struct push_context *ctx, const uint8_t *elts, unsigned n) { unsigned i; - for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i); + bool ef = ctx->edgeflag.value; + if (ctx->edgeflag.width == 1) + for (i = 0; i < n && ef_value_8(ctx, elts[i]) == ef; ++i); + else + for (i = 0; i < n && ef_value_32(ctx, elts[i]) == ef; ++i); return i; } @@ -162,7 +176,11 @@ static inline unsigned ef_toggle_search_i16(struct push_context *ctx, const uint16_t *elts, unsigned n) { unsigned i; - for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i); + bool ef = ctx->edgeflag.value; + if (ctx->edgeflag.width == 1) + for (i = 0; i < n && ef_value_8(ctx, elts[i]) == ef; ++i); + else + for (i = 0; i < n && ef_value_32(ctx, elts[i]) == ef; ++i); return i; } @@ -170,7 +188,11 @@ static inline unsigned ef_toggle_search_i32(struct push_context *ctx, const uint32_t *elts, unsigned n) { unsigned i; - for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i); + bool ef = ctx->edgeflag.value; + if (ctx->edgeflag.width == 1) + for (i = 0; i < n && ef_value_8(ctx, elts[i]) == ef; ++i); + else + for (i = 0; i < n && ef_value_32(ctx, elts[i]) == ef; ++i); return i; } @@ -178,7 +200,11 @@ static inline unsigned ef_toggle_search_seq(struct push_context *ctx, unsigned start, unsigned n) { unsigned i; - for (i = 0; i < n && ef_value(ctx, start++) == ctx->edgeflag.value; ++i); + bool ef = ctx->edgeflag.value; + if (ctx->edgeflag.width == 1) + for (i = 0; i < n && ef_value_8(ctx, start++) == ef; ++i); + else + for (i = 0; i < n && ef_value_32(ctx, start++) == ef; ++i); return i; } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c b/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c index 28bcb629e43..91543782dfc 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c @@ -27,33 +27,33 @@ static void dump_comm_vp(struct nouveau_vp3_decoder *dec, struct comm *comm, u32 comm_seq, struct nouveau_bo *inter_bo, unsigned slice_size) { - unsigned i, idx = comm->pvp_cur_index & 0xf; - debug_printf("Status: %08x, stage: %08x\n", comm->status_vp[idx], comm->pvp_stage); + unsigned i, idx = comm->pvp_cur_index & 0xf; + debug_printf("Status: %08x, stage: %08x\n", comm->status_vp[idx], comm->pvp_stage); #if 0 - debug_printf("Acked byte ofs: %x, bsp byte ofs: %x\n", comm->acked_byte_ofs, comm->byte_ofs); - debug_printf("Irq/parse indexes: %i %i\n", comm->irq_index, comm->parse_endpos_index); + debug_printf("Acked byte ofs: %x, bsp byte ofs: %x\n", comm->acked_byte_ofs, comm->byte_ofs); + debug_printf("Irq/parse indexes: %i %i\n", comm->irq_index, comm->parse_endpos_index); - for (i = 0; i != comm->irq_index; ++i) - debug_printf("irq[%i] = { @ %08x -> %04x }\n", i, comm->irq_pos[i], comm->irq_470[i]); - for (i = 0; i != comm->parse_endpos_index; ++i) - debug_printf("parse_endpos[%i] = { @ %08x}\n", i, comm->parse_endpos[i]); + for (i = 0; i != comm->irq_index; ++i) + debug_printf("irq[%i] = { @ %08x -> %04x }\n", i, comm->irq_pos[i], comm->irq_470[i]); + for (i = 0; i != comm->parse_endpos_index; ++i) + debug_printf("parse_endpos[%i] = { @ %08x}\n", i, comm->parse_endpos[i]); #endif - debug_printf("mb_y = %u\n", comm->mb_y[idx]); - if (comm->status_vp[idx] <= 1) - return; - - if ((comm->pvp_stage & 0xff) != 0xff) { - unsigned *map; - int ret = nouveau_bo_map(inter_bo, NOUVEAU_BO_RD|NOUVEAU_BO_NOBLOCK, dec->client); - assert(ret >= 0); - map = inter_bo->map; - for (i = 0; i < comm->byte_ofs + slice_size; i += 0x10) { - debug_printf("%05x: %08x %08x %08x %08x\n", i, map[i/4], map[i/4+1], map[i/4+2], map[i/4+3]); - } - munmap(inter_bo->map, inter_bo->size); - inter_bo->map = NULL; - } - assert((comm->pvp_stage & 0xff) == 0xff); + debug_printf("mb_y = %u\n", comm->mb_y[idx]); + if (comm->status_vp[idx] <= 1) + return; + + if ((comm->pvp_stage & 0xff) != 0xff) { + unsigned *map; + int ret = nouveau_bo_map(inter_bo, NOUVEAU_BO_RD|NOUVEAU_BO_NOBLOCK, dec->client); + assert(ret >= 0); + map = inter_bo->map; + for (i = 0; i < comm->byte_ofs + slice_size; i += 0x10) { + debug_printf("%05x: %08x %08x %08x %08x\n", i, map[i/4], map[i/4+1], map[i/4+2], map[i/4+3]); + } + munmap(inter_bo->map, inter_bo->size); + inter_bo->map = NULL; + } + assert((comm->pvp_stage & 0xff) == 0xff); } #endif diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index a576abdfaf2..d5981248a86 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -198,6 +198,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: return 0; /* SWTCL-only features. */ diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index bc6980660a5..ee7beee3001 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -635,7 +635,7 @@ static int replace_gpr_with_pv_ps(struct r600_bytecode *bc, return 0; } -void r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *neg) +void r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *neg, unsigned abs) { switch(value) { case 0: @@ -655,11 +655,11 @@ void r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *ne break; case 0xBF800000: /* -1.0f */ *sel = V_SQ_ALU_SRC_1; - *neg ^= 1; + *neg ^= !abs; break; case 0xBF000000: /* -0.5f */ *sel = V_SQ_ALU_SRC_0_5; - *neg ^= 1; + *neg ^= !abs; break; default: *sel = V_SQ_ALU_SRC_LITERAL; @@ -1208,7 +1208,7 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc, } if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL) r600_bytecode_special_constants(nalu->src[i].value, - &nalu->src[i].sel, &nalu->src[i].neg); + &nalu->src[i].sel, &nalu->src[i].neg, nalu->src[i].abs); } if (nalu->dst.sel >= bc->ngpr) { bc->ngpr = nalu->dst.sel + 1; diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 7cf3a090908..d48ad1ebf01 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -255,7 +255,7 @@ int r600_bytecode_add_cfinst(struct r600_bytecode *bc, int r600_bytecode_add_alu_type(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu, unsigned type); void r600_bytecode_special_constants(uint32_t value, - unsigned *sel, unsigned *neg); + unsigned *sel, unsigned *neg, unsigned abs); void r600_bytecode_disasm(struct r600_bytecode *bc); void r600_bytecode_alu_read(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1); diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 9a97de9965e..9f4cda2c142 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -344,6 +344,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_DEPTH_BOUNDS_TEST: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: return 0; /* Stream output. */ diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 8efe902a329..fc6335ae8bc 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -162,10 +162,6 @@ int r600_pipe_shader_create(struct pipe_context *ctx, goto error; } - /* disable SB for geom shaders on R6xx/R7xx due to some mysterious gs piglit regressions with it enabled. */ - if (rctx->b.chip_class <= R700) { - use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_GEOMETRY); - } /* disable SB for shaders using doubles */ use_sb &= !shader->shader.uses_doubles; @@ -1008,7 +1004,7 @@ static void tgsi_src(struct r600_shader_ctx *ctx, (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; - r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); + r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg, r600_src->abs); if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) return; } diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c index f341ecb41a5..0dc6c918331 100644 --- a/src/gallium/drivers/radeon/r600_buffer_common.c +++ b/src/gallium/drivers/radeon/r600_buffer_common.c @@ -443,6 +443,27 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, return &rbuffer->b.b; } +struct pipe_resource *r600_aligned_buffer_create(struct pipe_screen *screen, + unsigned bind, + unsigned usage, + unsigned size, + unsigned alignment) +{ + struct pipe_resource buffer; + + memset(&buffer, 0, sizeof buffer); + buffer.target = PIPE_BUFFER; + buffer.format = PIPE_FORMAT_R8_UNORM; + buffer.bind = bind; + buffer.usage = usage; + buffer.flags = 0; + buffer.width0 = size; + buffer.height0 = 1; + buffer.depth0 = 1; + buffer.array_size = 1; + return r600_buffer_create(screen, &buffer, alignment); +} + struct pipe_resource * r600_buffer_from_user_memory(struct pipe_screen *screen, const struct pipe_resource *templ, diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 7ac94caad9f..0ad36849645 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -360,6 +360,8 @@ static const struct debug_named_value common_debug_options[] = { { "precompile", DBG_PRECOMPILE, "Compile one shader variant at shader creation." }, { "nowc", DBG_NO_WC, "Disable GTT write combining" }, { "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." }, + { "nodcc", DBG_NO_DCC, "Disable DCC." }, + { "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." }, DEBUG_NAMED_VALUE_END /* must be last */ }; @@ -416,6 +418,7 @@ static const char* r600_get_chip_name(struct r600_common_screen *rscreen) case CHIP_ICELAND: return "AMD ICELAND"; case CHIP_CARRIZO: return "AMD CARRIZO"; case CHIP_FIJI: return "AMD FIJI"; + case CHIP_STONEY: return "AMD STONEY"; default: return "AMD unknown"; } } @@ -540,6 +543,11 @@ const char *r600_get_llvm_processor_name(enum radeon_family family) case CHIP_ICELAND: return "iceland"; case CHIP_CARRIZO: return "carrizo"; case CHIP_FIJI: return "fiji"; +#if HAVE_LLVM <= 0x0307 + case CHIP_STONEY: return "carrizo"; +#else + case CHIP_STONEY: return "stoney"; +#endif default: return ""; } } diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index b58b500bd76..c300c0b3332 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -99,6 +99,8 @@ #define DBG_INFO (1llu << 40) #define DBG_NO_WC (1llu << 41) #define DBG_CHECK_VM (1llu << 42) +#define DBG_NO_DCC (1llu << 43) +#define DBG_NO_DCC_CLEAR (1llu << 44) #define R600_MAP_BUFFER_ALIGNMENT 64 @@ -214,6 +216,7 @@ struct r600_texture { struct r600_fmask_info fmask; struct r600_cmask_info cmask; struct r600_resource *cmask_buffer; + struct r600_resource *dcc_buffer; unsigned cb_color_info; /* fast clear enable bit */ unsigned color_clear_value[2]; @@ -243,6 +246,7 @@ struct r600_surface { unsigned cb_color_dim; /* EG only */ unsigned cb_color_pitch; /* EG and later */ unsigned cb_color_slice; /* EG and later */ + unsigned cb_dcc_base; /* VI and later */ unsigned cb_color_attrib; /* EG and later */ unsigned cb_dcc_control; /* VI and later */ unsigned cb_color_fmask; /* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */ @@ -489,6 +493,11 @@ bool r600_init_resource(struct r600_common_screen *rscreen, struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ, unsigned alignment); +struct pipe_resource * r600_aligned_buffer_create(struct pipe_screen *screen, + unsigned bind, + unsigned usage, + unsigned size, + unsigned alignment); struct pipe_resource * r600_buffer_from_user_memory(struct pipe_screen *screen, const struct pipe_resource *templ, diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index fc69f48bb70..edfdfe33187 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -268,6 +268,7 @@ static void r600_texture_destroy(struct pipe_screen *screen, if (rtex->cmask_buffer != &rtex->resource) { pipe_resource_reference((struct pipe_resource**)&rtex->cmask_buffer, NULL); } + pipe_resource_reference((struct pipe_resource**)&rtex->dcc_buffer, NULL); pb_reference(&resource->buf, NULL); FREE(rtex); } @@ -482,6 +483,25 @@ static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1); } +static void vi_texture_alloc_dcc_separate(struct r600_common_screen *rscreen, + struct r600_texture *rtex) +{ + if (rscreen->debug_flags & DBG_NO_DCC) + return; + + rtex->dcc_buffer = (struct r600_resource *) + r600_aligned_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM, + PIPE_USAGE_DEFAULT, rtex->surface.dcc_size, rtex->surface.dcc_alignment); + if (rtex->dcc_buffer == NULL) { + return; + } + + r600_screen_clear_buffer(rscreen, &rtex->dcc_buffer->b.b, 0, rtex->surface.dcc_size, + 0xFFFFFFFF, true); + + rtex->cb_color_info |= VI_S_028C70_DCC_ENABLE(1); +} + static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen, struct r600_texture *rtex) { @@ -621,6 +641,8 @@ r600_texture_create_object(struct pipe_screen *screen, return NULL; } } + if (rtex->surface.dcc_size) + vi_texture_alloc_dcc_separate(rscreen, rtex); } /* Now create the backing buffer. */ @@ -1219,6 +1241,81 @@ static void evergreen_set_clear_color(struct r600_texture *rtex, memcpy(rtex->color_clear_value, &uc, 2 * sizeof(uint32_t)); } +static void vi_get_fast_clear_parameters(enum pipe_format surface_format, + const union pipe_color_union *color, + uint32_t* reset_value, + bool* clear_words_needed) +{ + bool values[4] = {}; + int i; + bool main_value = false; + bool extra_value = false; + int extra_channel; + const struct util_format_description *desc = util_format_description(surface_format); + + *clear_words_needed = true; + *reset_value = 0x20202020U; + + /* If we want to clear without needing a fast clear eliminate step, we + * can set each channel to 0 or 1 (or 0/max for integer formats). We + * have two sets of flags, one for the last or first channel(extra) and + * one for the other channels(main). + */ + + if (surface_format == PIPE_FORMAT_R11G11B10_FLOAT || + surface_format == PIPE_FORMAT_B5G6R5_UNORM || + surface_format == PIPE_FORMAT_B5G6R5_SRGB) { + extra_channel = -1; + } else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) { + if(r600_translate_colorswap(surface_format) <= 1) + extra_channel = desc->nr_channels - 1; + else + extra_channel = 0; + } else + return; + + for (i = 0; i < 4; ++i) { + int index = desc->swizzle[i] - UTIL_FORMAT_SWIZZLE_X; + + if (desc->swizzle[i] < UTIL_FORMAT_SWIZZLE_X || + desc->swizzle[i] > UTIL_FORMAT_SWIZZLE_W) + continue; + + if (util_format_is_pure_sint(surface_format)) { + values[i] = color->i[i] != 0; + if (color->i[i] != 0 && color->i[i] != INT32_MAX) + return; + } else if (util_format_is_pure_uint(surface_format)) { + values[i] = color->ui[i] != 0U; + if (color->ui[i] != 0U && color->ui[i] != UINT32_MAX) + return; + } else { + values[i] = color->f[i] != 0.0F; + if (color->f[i] != 0.0F && color->f[i] != 1.0F) + return; + } + + if (index == extra_channel) + extra_value = values[i]; + else + main_value = values[i]; + } + + for (int i = 0; i < 4; ++i) + if (values[i] != main_value && + desc->swizzle[i] - UTIL_FORMAT_SWIZZLE_X != extra_channel && + desc->swizzle[i] >= UTIL_FORMAT_SWIZZLE_X && + desc->swizzle[i] <= UTIL_FORMAT_SWIZZLE_W) + return; + + *clear_words_needed = false; + if (main_value) + *reset_value |= 0x80808080U; + + if (extra_value) + *reset_value |= 0x40404040U; +} + void evergreen_do_fast_color_clear(struct r600_common_context *rctx, struct pipe_framebuffer_state *fb, struct r600_atom *fb_state, @@ -1272,18 +1369,36 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, continue; } - /* ensure CMASK is enabled */ - r600_texture_alloc_cmask_separate(rctx->screen, tex); - if (tex->cmask.size == 0) { - continue; + if (tex->dcc_buffer) { + uint32_t reset_value; + bool clear_words_needed; + + if (rctx->screen->debug_flags & DBG_NO_DCC_CLEAR) + continue; + + vi_get_fast_clear_parameters(fb->cbufs[i]->format, color, &reset_value, &clear_words_needed); + + rctx->clear_buffer(&rctx->b, &tex->dcc_buffer->b.b, + 0, tex->surface.dcc_size, reset_value, true); + + if (clear_words_needed) + tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level; + } else { + /* ensure CMASK is enabled */ + r600_texture_alloc_cmask_separate(rctx->screen, tex); + if (tex->cmask.size == 0) { + continue; + } + + /* Do the fast clear. */ + rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b, + tex->cmask.offset, tex->cmask.size, 0, true); + + tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level; } - /* Do the fast clear. */ evergreen_set_clear_color(tex, fb->cbufs[i]->format, color); - rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b, - tex->cmask.offset, tex->cmask.size, 0, true); - tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level; if (dirty_cbufs) *dirty_cbufs |= 1 << i; rctx->set_atom_dirty(rctx, fb_state, true); diff --git a/src/gallium/drivers/radeon/r600d_common.h b/src/gallium/drivers/radeon/r600d_common.h index 115042d153e..a3d182cd30f 100644 --- a/src/gallium/drivers/radeon/r600d_common.h +++ b/src/gallium/drivers/radeon/r600d_common.h @@ -202,6 +202,7 @@ #define EG_S_028C70_FAST_CLEAR(x) (((x) & 0x1) << 17) #define SI_S_028C70_FAST_CLEAR(x) (((x) & 0x1) << 13) +#define VI_S_028C70_DCC_ENABLE(x) (((x) & 0x1) << 28) /*CIK+*/ #define R_0300FC_CP_STRMOUT_CNTL 0x0300FC diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c index c3ac7e7f2ef..33b01361aa5 100644 --- a/src/gallium/drivers/radeon/radeon_uvd.c +++ b/src/gallium/drivers/radeon/radeon_uvd.c @@ -478,6 +478,8 @@ static struct ruvd_h265 get_h265_msg(struct ruvd_decoder *dec, struct pipe_video result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8; if (((struct r600_common_screen*)dec->screen)->family == CHIP_CARRIZO) result.sps_info_flags |= 1 << 9; + if (pic->UseRefPicList == true) + result.sps_info_flags |= 1 << 10; result.chroma_format = pic->pps->sps->chroma_format_idc; result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8; @@ -586,6 +588,11 @@ static struct ruvd_h265 get_h265_msg(struct ruvd_decoder *dec, struct pipe_video memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64); memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64); + for (i = 0 ; i < 2 ; i++) { + for (int j = 0 ; j < 15 ; j++) + result.direct_reflist[i][j] = pic->RefPicList[i][j]; + } + /* TODO result.highestTid; result.isNonRef; diff --git a/src/gallium/drivers/radeon/radeon_uvd.h b/src/gallium/drivers/radeon/radeon_uvd.h index 452fbd60880..9cc0a694c30 100644 --- a/src/gallium/drivers/radeon/radeon_uvd.h +++ b/src/gallium/drivers/radeon/radeon_uvd.h @@ -233,6 +233,15 @@ struct ruvd_h265 { uint8_t highestTid; uint8_t isNonRef; + + uint8_t p010_mode; + uint8_t msb_mode; + uint8_t luma_10to8; + uint8_t chroma_10to8; + uint8_t sclr_luma10to8; + uint8_t sclr_chroma10to8; + + uint8_t direct_reflist[2][15]; }; struct ruvd_vc1 { diff --git a/src/gallium/drivers/radeon/radeon_video.c b/src/gallium/drivers/radeon/radeon_video.c index 3a1834b948f..32bfc32073b 100644 --- a/src/gallium/drivers/radeon/radeon_video.c +++ b/src/gallium/drivers/radeon/radeon_video.c @@ -205,11 +205,12 @@ int rvid_get_video_param(struct pipe_screen *screen, enum pipe_video_cap param) { struct r600_common_screen *rscreen = (struct r600_common_screen *)screen; + enum pipe_video_format codec = u_reduce_video_profile(profile); if (entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) { switch (param) { case PIPE_VIDEO_CAP_SUPPORTED: - return u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_MPEG4_AVC && + return codec == PIPE_VIDEO_FORMAT_MPEG4_AVC && rvce_is_fw_version_supported(rscreen); case PIPE_VIDEO_CAP_NPOT_TEXTURES: return 1; @@ -232,38 +233,19 @@ int rvid_get_video_param(struct pipe_screen *screen, } } - /* UVD 2.x limits */ - if (rscreen->family < CHIP_PALM) { - enum pipe_video_format codec = u_reduce_video_profile(profile); - switch (param) { - case PIPE_VIDEO_CAP_SUPPORTED: - /* no support for MPEG4 */ - return codec != PIPE_VIDEO_FORMAT_MPEG4 && - /* FIXME: VC-1 simple/main profile is broken */ - profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE && - profile != PIPE_VIDEO_PROFILE_VC1_MAIN; - case PIPE_VIDEO_CAP_PREFERS_INTERLACED: - case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: - /* MPEG2 only with shaders and no support for - interlacing on R6xx style UVD */ - return codec != PIPE_VIDEO_FORMAT_MPEG12 && - rscreen->family > CHIP_RV770; - default: - break; - } - } - switch (param) { case PIPE_VIDEO_CAP_SUPPORTED: - switch (u_reduce_video_profile(profile)) { + switch (codec) { case PIPE_VIDEO_FORMAT_MPEG12: case PIPE_VIDEO_FORMAT_MPEG4: case PIPE_VIDEO_FORMAT_MPEG4_AVC: - return entrypoint != PIPE_VIDEO_ENTRYPOINT_ENCODE; + if (rscreen->family < CHIP_PALM) + /* no support for MPEG4 */ + return codec != PIPE_VIDEO_FORMAT_MPEG4; + return true; case PIPE_VIDEO_FORMAT_VC1: /* FIXME: VC-1 simple/main profile is broken */ - return profile == PIPE_VIDEO_PROFILE_VC1_ADVANCED && - entrypoint != PIPE_VIDEO_ENTRYPOINT_ENCODE; + return profile == PIPE_VIDEO_PROFILE_VC1_ADVANCED; case PIPE_VIDEO_FORMAT_HEVC: /* Carrizo only supports HEVC Main */ return rscreen->family >= CHIP_CARRIZO && @@ -280,13 +262,17 @@ int rvid_get_video_param(struct pipe_screen *screen, case PIPE_VIDEO_CAP_PREFERED_FORMAT: return PIPE_FORMAT_NV12; case PIPE_VIDEO_CAP_PREFERS_INTERLACED: - if (u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_HEVC) - return false; //The hardware doesn't support interlaced HEVC. - return true; case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: - if (u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_HEVC) - return false; //The hardware doesn't support interlaced HEVC. - return true; + if (rscreen->family < CHIP_PALM) { + /* MPEG2 only with shaders and no support for + interlacing on R6xx style UVD */ + return codec != PIPE_VIDEO_FORMAT_MPEG12 && + rscreen->family > CHIP_RV770; + } else { + if (u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_HEVC) + return false; //The firmware doesn't support interlaced HEVC. + return true; + } case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE: return true; case PIPE_VIDEO_CAP_MAX_LEVEL: diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index b91e1adf41d..8bf1e15f3be 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -137,6 +137,7 @@ enum radeon_family { CHIP_ICELAND, CHIP_CARRIZO, CHIP_FIJI, + CHIP_STONEY, CHIP_LAST, }; @@ -331,6 +332,7 @@ struct radeon_surf_level { uint32_t nblk_z; uint32_t pitch_bytes; uint32_t mode; + uint64_t dcc_offset; }; struct radeon_surf { @@ -366,6 +368,9 @@ struct radeon_surf { uint32_t stencil_tiling_index[RADEON_SURF_MAX_LEVEL]; uint32_t pipe_config; uint32_t num_banks; + + uint64_t dcc_size; + uint64_t dcc_alignment; }; struct radeon_bo_list_item { diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c index 6454b8ce8c0..e53af1dd6b5 100644 --- a/src/gallium/drivers/radeonsi/cik_sdma.c +++ b/src/gallium/drivers/radeonsi/cik_sdma.c @@ -242,7 +242,8 @@ void cik_sdma_copy(struct pipe_context *ctx, if (src->format != dst->format || rdst->surface.nsamples > 1 || rsrc->surface.nsamples > 1 || - (rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level)) { + (rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level) || + rdst->dcc_buffer || rsrc->dcc_buffer) { goto fallback; } diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 082ea850675..fce014a1e6b 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -326,7 +326,7 @@ void si_decompress_color_textures(struct si_context *sctx, assert(view); tex = (struct r600_texture *)view->texture; - assert(tex->cmask.size || tex->fmask.size); + assert(tex->cmask.size || tex->fmask.size || tex->dcc_buffer); si_blit_decompress_color(&sctx->b.b, tex, view->u.tex.first_level, view->u.tex.last_level, @@ -455,7 +455,7 @@ static void si_decompress_subresource(struct pipe_context *ctx, si_blit_decompress_depth_in_place(sctx, rtex, true, level, level, first_layer, last_layer); - } else if (rtex->fmask.size || rtex->cmask.size) { + } else if (rtex->fmask.size || rtex->cmask.size || rtex->dcc_buffer) { si_blit_decompress_color(ctx, rtex, level, level, first_layer, last_layer); } @@ -507,7 +507,7 @@ void si_resource_copy_region(struct pipe_context *ctx, util_blitter_default_dst_texture(&dst_templ, dst, dst_level, dstz); util_blitter_default_src_texture(&src_templ, src, src_level); - if (util_format_is_compressed(src->format) && + if (util_format_is_compressed(src->format) || util_format_is_compressed(dst->format)) { unsigned blocksize = util_format_get_blocksize(src->format); @@ -536,7 +536,7 @@ void si_resource_copy_region(struct pipe_context *ctx, src_force_level = src_level; } else if (!util_blitter_is_copy_supported(sctx->blitter, dst, src) || /* also *8_SNORM has precision issues, use UNORM instead */ - util_format_is_snorm(src->format)) { + util_format_is_snorm8(src->format)) { if (util_format_is_subsampled_422(src->format)) { src_templ.format = PIPE_FORMAT_R8G8B8A8_UINT; dst_templ.format = PIPE_FORMAT_R8G8B8A8_UINT; @@ -675,7 +675,8 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx, info->src.box.depth == 1 && dst->surface.level[info->dst.level].mode >= RADEON_SURF_MODE_1D && !(dst->surface.flags & RADEON_SURF_SCANOUT) && - (!dst->cmask.size || !dst->dirty_level_mask) /* dst cannot be fast-cleared */) { + (!dst->cmask.size || !dst->dirty_level_mask) && /* dst cannot be fast-cleared */ + !dst->dcc_buffer) { si_blitter_begin(ctx, SI_COLOR_RESOLVE | (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND)); util_blitter_custom_resolve_color(sctx->blitter, diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 13738da5e2c..a8ff6f27319 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -181,6 +181,11 @@ static void si_set_sampler_view(struct si_context *sctx, unsigned shader, rview->resource, RADEON_USAGE_READ, r600_get_sampler_view_priority(rview->resource)); + if (rview->dcc_buffer && rview->dcc_buffer != rview->resource) + radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, + rview->dcc_buffer, RADEON_USAGE_READ, + RADEON_PRIO_DCC); + pipe_sampler_view_reference(&views->views[slot], view); memcpy(views->desc.list + slot*8, view_desc, 8*4); views->desc.enabled_mask |= 1llu << slot; @@ -229,7 +234,8 @@ static void si_set_sampler_views(struct pipe_context *ctx, } else { samplers->depth_texture_mask &= ~(1 << slot); } - if (rtex->cmask.size || rtex->fmask.size) { + if (rtex->cmask.size || rtex->fmask.size || + (rtex->dcc_buffer && rtex->dirty_level_mask)) { samplers->compressed_colortex_mask |= 1 << slot; } else { samplers->compressed_colortex_mask &= ~(1 << slot); diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c index 31b0b41e5a4..581e89f42d8 100644 --- a/src/gallium/drivers/radeonsi/si_dma.c +++ b/src/gallium/drivers/radeonsi/si_dma.c @@ -248,7 +248,8 @@ void si_dma_copy(struct pipe_context *ctx, if (src->format != dst->format || src_box->depth > 1 || (rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level) || rdst->cmask.size || rdst->fmask.size || - rsrc->cmask.size || rsrc->fmask.size) { + rsrc->cmask.size || rsrc->fmask.size || + rdst->dcc_buffer || rsrc->dcc_buffer) { goto fallback; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 5f910c95ef3..60baad3d13c 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -55,8 +55,6 @@ static void si_destroy_context(struct pipe_context *context) if (sctx->pstipple_sampler_state) sctx->b.b.delete_sampler_state(&sctx->b.b, sctx->pstipple_sampler_state); - if (sctx->dummy_pixel_shader) - sctx->b.b.delete_fs_state(&sctx->b.b, sctx->dummy_pixel_shader); if (sctx->fixed_func_tcs_shader.cso) sctx->b.b.delete_tcs_state(&sctx->b.b, sctx->fixed_func_tcs_shader.cso); if (sctx->custom_dsa_flush) @@ -300,6 +298,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_GATHER_SM5: case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: return 1; case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: @@ -578,6 +577,33 @@ static bool si_initialize_pipe_config(struct si_screen *sscreen) return true; } +static bool si_init_gs_info(struct si_screen *sscreen) +{ + switch (sscreen->b.family) { + case CHIP_OLAND: + case CHIP_HAINAN: + case CHIP_KAVERI: + case CHIP_KABINI: + case CHIP_MULLINS: + case CHIP_ICELAND: + case CHIP_CARRIZO: + case CHIP_STONEY: + sscreen->gs_table_depth = 16; + return true; + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_TONGA: + case CHIP_FIJI: + sscreen->gs_table_depth = 32; + return true; + default: + return false; + } +} + struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) { struct si_screen *sscreen = CALLOC_STRUCT(si_screen); @@ -595,7 +621,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) sscreen->b.b.resource_create = r600_resource_create_common; if (!r600_common_screen_init(&sscreen->b, ws) || - !si_initialize_pipe_config(sscreen)) { + !si_initialize_pipe_config(sscreen) || + !si_init_gs_info(sscreen)) { FREE(sscreen); return NULL; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index d7a2282952a..42cd8803c36 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -42,6 +42,7 @@ #define SI_BASE_VERTEX_UNKNOWN INT_MIN #define SI_RESTART_INDEX_UNKNOWN INT_MIN #define SI_NUM_SMOOTH_AA_SAMPLES 8 +#define SI_GS_PER_ES 128 /* Instruction cache. */ #define SI_CONTEXT_INV_ICACHE (R600_CONTEXT_PRIVATE_FLAG << 0) @@ -85,6 +86,7 @@ struct si_compute; struct si_screen { struct r600_common_screen b; + unsigned gs_table_depth; }; struct si_blend_color { @@ -96,6 +98,7 @@ struct si_sampler_view { struct pipe_sampler_view base; struct list_head list; struct r600_resource *resource; + struct r600_resource *dcc_buffer; /* [0..7] = image descriptor * [4..7] = buffer descriptor */ uint32_t state[8]; @@ -203,9 +206,6 @@ struct si_context { struct si_pm4_state *init_config; bool init_config_has_vgt_flush; struct si_pm4_state *vgt_shader_config[4]; - /* With rasterizer discard, there doesn't have to be a pixel shader. - * In that case, we bind this one: */ - void *dummy_pixel_shader; /* shaders */ struct si_shader_ctx_state ps_shader; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 243bdc6e6d7..18b64056bc7 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -266,6 +266,7 @@ static void si_emit_cb_target_mask(struct si_context *sctx, struct r600_atom *at * Reproducible with Unigine Heaven 4.0 and drirc missing. */ if (blend->dual_src_blend && + sctx->ps_shader.cso && (sctx->ps_shader.cso->ps_colors_written & 0x3) != 0x3) mask = 0; @@ -697,6 +698,7 @@ static void *si_create_rs_state(struct pipe_context *ctx, rs->clamp_fragment_color = state->clamp_fragment_color; rs->flatshade = state->flatshade; rs->sprite_coord_enable = state->sprite_coord_enable; + rs->rasterizer_discard = state->rasterizer_discard; rs->pa_sc_line_stipple = state->line_stipple_enable ? S_028A0C_LINE_PATTERN(state->line_stipple_pattern) | S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0; @@ -1924,8 +1926,21 @@ static void si_initialize_color_surface(struct si_context *sctx, surf->cb_color_info = color_info; surf->cb_color_attrib = color_attrib; - if (sctx->b.chip_class >= VI) - surf->cb_dcc_control = S_028C78_OVERWRITE_COMBINER_DISABLE(1); + if (sctx->b.chip_class >= VI && rtex->dcc_buffer) { + unsigned max_uncompressed_block_size = 2; + uint64_t dcc_offset = rtex->surface.level[level].dcc_offset; + + if (rtex->surface.nsamples > 1) { + if (rtex->surface.bpe == 1) + max_uncompressed_block_size = 0; + else if (rtex->surface.bpe == 2) + max_uncompressed_block_size = 1; + } + + surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) | + S_028C78_INDEPENDENT_64B_BLOCKS(1); + surf->cb_dcc_base = (rtex->dcc_buffer->gpu_address + dcc_offset) >> 8; + } if (rtex->fmask.size) { surf->cb_color_fmask = (offset + rtex->fmask.offset) >> 8; @@ -2249,6 +2264,12 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom RADEON_PRIO_CMASK); } + if (tex->dcc_buffer && tex->dcc_buffer != &tex->resource) { + radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, + tex->dcc_buffer, RADEON_USAGE_READWRITE, + RADEON_PRIO_DCC); + } + radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, sctx->b.chip_class >= VI ? 14 : 13); radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */ @@ -2266,7 +2287,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom radeon_emit(cs, tex->color_clear_value[1]); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */ if (sctx->b.chip_class >= VI) - radeon_emit(cs, 0); /* R_028C94_CB_COLOR0_DCC_BASE */ + radeon_emit(cs, cb->cb_dcc_base); /* R_028C94_CB_COLOR0_DCC_BASE */ } /* set CB_COLOR1_INFO for possible dual-src blending */ if (i == 1 && state->cbufs[0] && @@ -2633,8 +2654,18 @@ si_create_sampler_view_custom(struct pipe_context *ctx, view->state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1)); view->state[5] = (S_008F24_BASE_ARRAY(state->u.tex.first_layer) | S_008F24_LAST_ARRAY(last_layer)); - view->state[6] = 0; - view->state[7] = 0; + + if (tmp->dcc_buffer) { + uint64_t dcc_offset = surflevel[base_level].dcc_offset; + unsigned swap = r600_translate_colorswap(pipe_format); + + view->state[6] = S_008F28_COMPRESSION_EN(1) | S_008F28_ALPHA_IS_ON_MSB(swap <= 1); + view->state[7] = (tmp->dcc_buffer->gpu_address + dcc_offset) >> 8; + view->dcc_buffer = tmp->dcc_buffer; + } else { + view->state[6] = 0; + view->state[7] = 0; + } /* Initialize the sampler view for FMASK. */ if (tmp->fmask.size) { @@ -3262,7 +3293,7 @@ static void si_init_config(struct si_context *sctx) si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); /* FIXME calculate these values somehow ??? */ - si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, 0x80); + si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES); si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40); si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2); @@ -3336,6 +3367,7 @@ static void si_init_config(struct si_context *sctx) break; case CHIP_KABINI: case CHIP_MULLINS: + case CHIP_STONEY: raster_config = 0x00000000; raster_config_1 = 0x00000000; break; @@ -3406,7 +3438,8 @@ static void si_init_config(struct si_context *sctx) if (sctx->b.chip_class >= VI) { si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL, - S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1)); + S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) | + S_028424_OVERWRITE_COMBINER_WATERMARK(4)); si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30); si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32); } diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index fba6619d2fd..8b9a311cd3f 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -61,6 +61,7 @@ struct si_state_rasterizer { bool poly_smooth; bool uses_poly_offset; bool clamp_fragment_color; + bool rasterizer_discard; }; struct si_dsa_stencil_ref_part { diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index ce6c98c3124..cf0891a2ab7 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -223,6 +223,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; unsigned prim = info->mode; unsigned primgroup_size = 128; /* recommended without a GS */ + unsigned max_primgroup_in_wave = 2; /* SWITCH_ON_EOP(0) is always preferable. */ bool wd_switch_on_eop = false; @@ -246,13 +247,10 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, /* primgroup_size must be set to a multiple of NUM_PATCHES */ primgroup_size = (primgroup_size / num_patches) * num_patches; - /* SWITCH_ON_EOI must be set if PrimID is used. - * If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */ + /* SWITCH_ON_EOI must be set if PrimID is used. */ if ((sctx->tcs_shader.cso && sctx->tcs_shader.cso->info.uses_primid) || - sctx->tes_shader.cso->info.uses_primid) { + sctx->tes_shader.cso->info.uses_primid) ia_switch_on_eoi = true; - partial_es_wave = true; - } /* Bug with tessellation and GS on Bonaire and older 2 SE chips. */ if ((sctx->b.family == CHIP_TAHITI || @@ -269,10 +267,6 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, wd_switch_on_eop = true; } - if (sctx->b.streamout.streamout_enabled || - sctx->b.streamout.prims_gen_query_enabled) - partial_vs_wave = true; - if (sctx->b.chip_class >= CIK) { /* WD_SWITCH_ON_EOP has no effect on GPUs with less than * 4 shader engines. Set 1 to pass the assertion below. @@ -282,7 +276,8 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, prim == PIPE_PRIM_LINE_LOOP || prim == PIPE_PRIM_TRIANGLE_FAN || prim == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY || - info->primitive_restart) + info->primitive_restart || + info->count_from_stream_output) wd_switch_on_eop = true; /* Hawaii hangs if instancing is enabled and WD_SWITCH_ON_EOP is 0. @@ -292,14 +287,34 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, (info->indirect || info->instance_count > 1)) wd_switch_on_eop = true; - /* USE_OPAQUE doesn't work when WD_SWITCH_ON_EOP is 0. */ - if (info->count_from_stream_output) - wd_switch_on_eop = true; + /* Required on CIK and later. */ + if (sctx->b.screen->info.max_se > 2 && !wd_switch_on_eop) + ia_switch_on_eoi = true; + + /* Required by Hawaii and, for some special cases, by VI. */ + if (ia_switch_on_eoi && + (sctx->b.family == CHIP_HAWAII || + (sctx->b.chip_class == VI && + (sctx->gs_shader.cso || max_primgroup_in_wave != 2)))) + partial_vs_wave = true; + + /* Instancing bug on Bonaire. */ + if (sctx->b.family == CHIP_BONAIRE && ia_switch_on_eoi && + (info->indirect || info->instance_count > 1)) + partial_vs_wave = true; /* If the WD switch is false, the IA switch must be false too. */ assert(wd_switch_on_eop || !ia_switch_on_eop); } + /* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */ + if (ia_switch_on_eoi) + partial_es_wave = true; + + /* GS requirement. */ + if (SI_GS_PER_ES / primgroup_size >= sctx->screen->gs_table_depth - 3) + partial_es_wave = true; + /* Hw bug with single-primitive instances and SWITCH_ON_EOI * on multi-SE chips. */ if (sctx->b.screen->info.max_se >= 2 && ia_switch_on_eoi && @@ -308,18 +323,14 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, u_prims_for_vertices(info->mode, info->count) <= 1))) sctx->b.flags |= SI_CONTEXT_VGT_FLUSH; - /* Instancing bug on 2 SE chips. */ - if (sctx->b.screen->info.max_se == 2 && ia_switch_on_eoi && - (info->indirect || info->instance_count > 1)) - partial_vs_wave = true; - return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) | S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) | S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) | S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) | S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1) | S_028AA8_WD_SWITCH_ON_EOP(sctx->b.chip_class >= CIK ? wd_switch_on_eop : 0) | - S_028AA8_MAX_PRIMGRP_IN_WAVE(sctx->b.chip_class >= VI ? 2 : 0); + S_028AA8_MAX_PRIMGRP_IN_WAVE(sctx->b.chip_class >= VI ? + max_primgroup_in_wave : 0); } static unsigned si_get_ls_hs_config(struct si_context *sctx, @@ -636,6 +647,17 @@ void si_emit_cache_flush(struct si_context *si_ctx, struct r600_atom *atom) S_0085F0_CB5_DEST_BASE_ENA(1) | S_0085F0_CB6_DEST_BASE_ENA(1) | S_0085F0_CB7_DEST_BASE_ENA(1); + + /* Necessary for DCC */ + if (sctx->chip_class >= VI) { + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0) | compute); + radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_DATA_TS) | + EVENT_INDEX(5)); + radeon_emit(cs, 0); + radeon_emit(cs, 0); + radeon_emit(cs, 0); + radeon_emit(cs, 0); + } } if (sctx->flags & SI_CONTEXT_FLUSH_AND_INV_DB) { cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) | @@ -728,6 +750,7 @@ static void si_get_draw_start_count(struct si_context *sctx, void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { struct si_context *sctx = (struct si_context *)ctx; + struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; struct pipe_index_buffer ib = {}; unsigned mask; @@ -735,7 +758,11 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) (info->indexed || !info->count_from_stream_output)) return; - if (!sctx->ps_shader.cso || !sctx->vs_shader.cso) { + if (!sctx->vs_shader.cso) { + assert(0); + return; + } + if (!sctx->ps_shader.cso && (!rs || !rs->rasterizer_discard)) { assert(0); return; } diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index eea00e0fafc..4a3a04caa52 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -799,11 +799,11 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state) struct si_context *sctx = (struct si_context *)ctx; struct si_shader_selector *sel = state; - if (sctx->vs_shader.cso == sel || !sel) + if (sctx->vs_shader.cso == sel) return; sctx->vs_shader.cso = sel; - sctx->vs_shader.current = sel->first_variant; + sctx->vs_shader.current = sel ? sel->first_variant : NULL; si_mark_atom_dirty(sctx, &sctx->clip_regs); si_update_viewports_and_scissors(sctx); } @@ -864,16 +864,6 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state) si_update_viewports_and_scissors(sctx); } -static void si_make_dummy_ps(struct si_context *sctx) -{ - if (!sctx->dummy_pixel_shader) { - sctx->dummy_pixel_shader = - util_make_fragment_cloneinput_shader(&sctx->b.b, 0, - TGSI_SEMANTIC_GENERIC, - TGSI_INTERPOLATE_CONSTANT); - } -} - static void si_bind_ps_shader(struct pipe_context *ctx, void *state) { struct si_context *sctx = (struct si_context *)ctx; @@ -883,14 +873,8 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state) if (sctx->ps_shader.cso == sel) return; - /* use a dummy shader if binding a NULL shader */ - if (!sel) { - si_make_dummy_ps(sctx); - sel = sctx->dummy_pixel_shader; - } - sctx->ps_shader.cso = sel; - sctx->ps_shader.current = sel->first_variant; + sctx->ps_shader.current = sel ? sel->first_variant : NULL; si_mark_atom_dirty(sctx, &sctx->cb_target_mask); } @@ -956,13 +940,15 @@ static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom) struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; struct si_shader *ps = sctx->ps_shader.current; struct si_shader *vs = si_get_vs_state(sctx); - struct tgsi_shader_info *psinfo = &ps->selector->info; + struct tgsi_shader_info *psinfo; struct tgsi_shader_info *vsinfo = &vs->selector->info; unsigned i, j, tmp, num_written = 0; - if (!ps->nparam) + if (!ps || !ps->nparam) return; + psinfo = &ps->selector->info; + radeon_set_context_reg_seq(cs, R_028644_SPI_PS_INPUT_CNTL_0, ps->nparam); for (i = 0; i < psinfo->num_inputs; i++) { @@ -1025,7 +1011,12 @@ static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom { struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; struct si_shader *ps = sctx->ps_shader.current; - unsigned input_ena = ps->spi_ps_input_ena; + unsigned input_ena; + + if (!ps) + return; + + input_ena = ps->spi_ps_input_ena; /* we need to enable at least one of them, otherwise we hang the GPU */ assert(G_0286CC_PERSP_SAMPLE_ENA(input_ena) || @@ -1531,23 +1522,38 @@ bool si_update_shaders(struct si_context *sctx) si_update_vgt_shader_config(sctx); - r = si_shader_select(ctx, &sctx->ps_shader); - if (r) - return false; - si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4); - - if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) || - sctx->sprite_coord_enable != rs->sprite_coord_enable || - sctx->flatshade != rs->flatshade) { - sctx->sprite_coord_enable = rs->sprite_coord_enable; - sctx->flatshade = rs->flatshade; - si_mark_atom_dirty(sctx, &sctx->spi_map); - } + if (sctx->ps_shader.cso) { + r = si_shader_select(ctx, &sctx->ps_shader); + if (r) + return false; + si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4); + + if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) || + sctx->sprite_coord_enable != rs->sprite_coord_enable || + sctx->flatshade != rs->flatshade) { + sctx->sprite_coord_enable = rs->sprite_coord_enable; + sctx->flatshade = rs->flatshade; + si_mark_atom_dirty(sctx, &sctx->spi_map); + } + + if (si_pm4_state_changed(sctx, ps) || + sctx->force_persample_interp != rs->force_persample_interp) { + sctx->force_persample_interp = rs->force_persample_interp; + si_mark_atom_dirty(sctx, &sctx->spi_ps_input); + } + + if (sctx->ps_db_shader_control != sctx->ps_shader.current->db_shader_control) { + sctx->ps_db_shader_control = sctx->ps_shader.current->db_shader_control; + si_mark_atom_dirty(sctx, &sctx->db_render_state); + } + + if (sctx->smoothing_enabled != sctx->ps_shader.current->key.ps.poly_line_smoothing) { + sctx->smoothing_enabled = sctx->ps_shader.current->key.ps.poly_line_smoothing; + si_mark_atom_dirty(sctx, &sctx->msaa_config); - if (si_pm4_state_changed(sctx, ps) || - sctx->force_persample_interp != rs->force_persample_interp) { - sctx->force_persample_interp = rs->force_persample_interp; - si_mark_atom_dirty(sctx, &sctx->spi_ps_input); + if (sctx->b.chip_class == SI) + si_mark_atom_dirty(sctx, &sctx->db_render_state); + } } if (si_pm4_state_changed(sctx, ls) || @@ -1559,19 +1565,6 @@ bool si_update_shaders(struct si_context *sctx) if (!si_update_spi_tmpring_size(sctx)) return false; } - - if (sctx->ps_db_shader_control != sctx->ps_shader.current->db_shader_control) { - sctx->ps_db_shader_control = sctx->ps_shader.current->db_shader_control; - si_mark_atom_dirty(sctx, &sctx->db_render_state); - } - - if (sctx->smoothing_enabled != sctx->ps_shader.current->key.ps.poly_line_smoothing) { - sctx->smoothing_enabled = sctx->ps_shader.current->key.ps.poly_line_smoothing; - si_mark_atom_dirty(sctx, &sctx->msaa_config); - - if (sctx->b.chip_class == SI) - si_mark_atom_dirty(sctx, &sctx->db_render_state); - } return true; } diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index e7006d2fa0d..c0fc82b2f2c 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -249,6 +249,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: return 0; } /* should only get here on unhandled cases */ diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index d7a3360713f..23ec4ef3cb6 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -214,10 +214,10 @@ prepare_shader_sampling( row_stride[j] = sp_tex->stride[j]; img_stride[j] = sp_tex->img_stride[j]; } - if (view->target == PIPE_TEXTURE_1D_ARRAY || - view->target == PIPE_TEXTURE_2D_ARRAY || - view->target == PIPE_TEXTURE_CUBE || - view->target == PIPE_TEXTURE_CUBE_ARRAY) { + if (tex->target == PIPE_TEXTURE_1D_ARRAY || + tex->target == PIPE_TEXTURE_2D_ARRAY || + tex->target == PIPE_TEXTURE_CUBE || + tex->target == PIPE_TEXTURE_CUBE_ARRAY) { num_layers = view->u.tex.last_layer - view->u.tex.first_layer + 1; for (j = first_level; j <= last_level; j++) { mip_offsets[j] += view->u.tex.first_layer * diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 8a0935062b6..e3e28a3ef32 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -1033,6 +1033,7 @@ img_filter_2d_linear_repeat_POT(const struct sp_sampler_view *sp_sview, addr.value = 0; addr.bits.level = args->level; + addr.bits.z = sp_sview->base.u.tex.first_layer; /* Can we fetch all four at once: */ @@ -1081,6 +1082,7 @@ img_filter_2d_nearest_repeat_POT(const struct sp_sampler_view *sp_sview, addr.value = 0; addr.bits.level = args->level; + addr.bits.z = sp_sview->base.u.tex.first_layer; out = get_texel_2d_no_border(sp_sview, addr, x0, y0); for (c = 0; c < TGSI_QUAD_SIZE; c++) @@ -1111,6 +1113,7 @@ img_filter_2d_nearest_clamp_POT(const struct sp_sampler_view *sp_sview, addr.value = 0; addr.bits.level = args->level; + addr.bits.z = sp_sview->base.u.tex.first_layer; x0 = util_ifloor(u); if (x0 < 0) @@ -1154,7 +1157,8 @@ img_filter_1d_nearest(const struct sp_sampler_view *sp_sview, sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x); - out = get_texel_2d(sp_sview, sp_samp, addr, x, 0); + out = get_texel_1d_array(sp_sview, sp_samp, addr, x, + sp_sview->base.u.tex.first_layer); for (c = 0; c < TGSI_QUAD_SIZE; c++) rgba[TGSI_NUM_CHANNELS*c] = out[c]; @@ -1215,6 +1219,7 @@ img_filter_2d_nearest(const struct sp_sampler_view *sp_sview, addr.value = 0; addr.bits.level = args->level; + addr.bits.z = sp_sview->base.u.tex.first_layer; sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x); sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y); @@ -1396,8 +1401,10 @@ img_filter_1d_linear(const struct sp_sampler_view *sp_sview, sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw); - tx0 = get_texel_2d(sp_sview, sp_samp, addr, x0, 0); - tx1 = get_texel_2d(sp_sview, sp_samp, addr, x1, 0); + tx0 = get_texel_1d_array(sp_sview, sp_samp, addr, x0, + sp_sview->base.u.tex.first_layer); + tx1 = get_texel_1d_array(sp_sview, sp_samp, addr, x1, + sp_sview->base.u.tex.first_layer); /* interpolate R, G, B, A */ for (c = 0; c < TGSI_QUAD_SIZE; c++) @@ -1523,6 +1530,7 @@ img_filter_2d_linear(const struct sp_sampler_view *sp_sview, addr.value = 0; addr.bits.level = args->level; + addr.bits.z = sp_sview->base.u.tex.first_layer; sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw); sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw); @@ -3252,10 +3260,22 @@ sp_get_texels(const struct sp_sampler_view *sp_sview, switch (sp_sview->base.target) { case PIPE_BUFFER: + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + const int x = CLAMP(v_i[j] + offset[0] + + sp_sview->base.u.buf.first_element, + sp_sview->base.u.buf.first_element, + sp_sview->base.u.buf.last_element); + tx = get_texel_2d_no_border(sp_sview, addr, x, 0); + for (c = 0; c < 4; c++) { + rgba[c][j] = tx[c]; + } + } + break; case PIPE_TEXTURE_1D: for (j = 0; j < TGSI_QUAD_SIZE; j++) { const int x = CLAMP(v_i[j] + offset[0], 0, width - 1); - tx = get_texel_2d_no_border(sp_sview, addr, x, 0); + tx = get_texel_2d_no_border(sp_sview, addr, x, + sp_sview->base.u.tex.first_layer); for (c = 0; c < 4; c++) { rgba[c][j] = tx[c]; } @@ -3277,7 +3297,8 @@ sp_get_texels(const struct sp_sampler_view *sp_sview, for (j = 0; j < TGSI_QUAD_SIZE; j++) { const int x = CLAMP(v_i[j] + offset[0], 0, width - 1); const int y = CLAMP(v_j[j] + offset[1], 0, height - 1); - tx = get_texel_2d_no_border(sp_sview, addr, x, y); + tx = get_texel_3d_no_border(sp_sview, addr, x, y, + sp_sview->base.u.tex.first_layer); for (c = 0; c < 4; c++) { rgba[c][j] = tx[c]; } @@ -3307,6 +3328,7 @@ sp_get_texels(const struct sp_sampler_view *sp_sview, } break; case PIPE_TEXTURE_CUBE: /* TXF can't work on CUBE according to spec */ + case PIPE_TEXTURE_CUBE_ARRAY: default: assert(!"Unknown or CUBE texture type in TXF processing\n"); break; diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index e1ea5df24ca..3347f5f1883 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -127,7 +127,8 @@ softpipe_can_create_resource(struct pipe_screen *screen, */ static boolean softpipe_displaytarget_layout(struct pipe_screen *screen, - struct softpipe_resource *spr) + struct softpipe_resource *spr, + const void *map_front_private) { struct sw_winsys *winsys = softpipe_screen(screen)->winsys; @@ -139,6 +140,7 @@ softpipe_displaytarget_layout(struct pipe_screen *screen, spr->base.width0, spr->base.height0, 64, + map_front_private, &spr->stride[0] ); return spr->dt != NULL; @@ -149,8 +151,9 @@ softpipe_displaytarget_layout(struct pipe_screen *screen, * Create new pipe_resource given the template information. */ static struct pipe_resource * -softpipe_resource_create(struct pipe_screen *screen, - const struct pipe_resource *templat) +softpipe_resource_create_front(struct pipe_screen *screen, + const struct pipe_resource *templat, + const void *map_front_private) { struct softpipe_resource *spr = CALLOC_STRUCT(softpipe_resource); if (!spr) @@ -169,7 +172,7 @@ softpipe_resource_create(struct pipe_screen *screen, if (spr->base.bind & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)) { - if (!softpipe_displaytarget_layout(screen, spr)) + if (!softpipe_displaytarget_layout(screen, spr, map_front_private)) goto fail; } else { @@ -184,6 +187,12 @@ softpipe_resource_create(struct pipe_screen *screen, return NULL; } +static struct pipe_resource * +softpipe_resource_create(struct pipe_screen *screen, + const struct pipe_resource *templat) +{ + return softpipe_resource_create_front(screen, templat, NULL); +} static void softpipe_resource_destroy(struct pipe_screen *pscreen, @@ -514,6 +523,7 @@ void softpipe_init_screen_texture_funcs(struct pipe_screen *screen) { screen->resource_create = softpipe_resource_create; + screen->resource_create_front = softpipe_resource_create_front; screen->resource_destroy = softpipe_resource_destroy; screen->resource_from_handle = softpipe_resource_from_handle; screen->resource_get_handle = softpipe_resource_get_handle; diff --git a/src/gallium/drivers/svga/svga_cmd.c b/src/gallium/drivers/svga/svga_cmd.c index d3cf52f08e2..0e1e332d6cb 100644 --- a/src/gallium/drivers/svga/svga_cmd.c +++ b/src/gallium/drivers/svga/svga_cmd.c @@ -1016,6 +1016,8 @@ SVGA3D_BeginDrawPrimitives(struct svga_winsys_context *swc, *decls = declArray; *ranges = rangeArray; + swc->hints |= SVGA_HINT_FLAG_DRAW_EMITTED; + return PIPE_OK; } diff --git a/src/gallium/drivers/svga/svga_cmd_vgpu10.c b/src/gallium/drivers/svga/svga_cmd_vgpu10.c index 596ba953cd2..5c121089f91 100644 --- a/src/gallium/drivers/svga/svga_cmd_vgpu10.c +++ b/src/gallium/drivers/svga/svga_cmd_vgpu10.c @@ -535,6 +535,7 @@ SVGA3D_vgpu10_Draw(struct svga_winsys_context *swc, SVGA3D_COPY_BASIC_2(vertexCount, startVertexLocation); + swc->hints |= SVGA_HINT_FLAG_DRAW_EMITTED; swc->commit(swc); return PIPE_OK; } @@ -550,6 +551,7 @@ SVGA3D_vgpu10_DrawIndexed(struct svga_winsys_context *swc, SVGA3D_COPY_BASIC_3(indexCount, startIndexLocation, baseVertexLocation); + swc->hints |= SVGA_HINT_FLAG_DRAW_EMITTED; swc->commit(swc); return PIPE_OK; } @@ -566,6 +568,7 @@ SVGA3D_vgpu10_DrawInstanced(struct svga_winsys_context *swc, SVGA3D_COPY_BASIC_4(vertexCountPerInstance, instanceCount, startVertexLocation, startInstanceLocation); + swc->hints |= SVGA_HINT_FLAG_DRAW_EMITTED; swc->commit(swc); return PIPE_OK; } @@ -584,6 +587,8 @@ SVGA3D_vgpu10_DrawIndexedInstanced(struct svga_winsys_context *swc, startIndexLocation, baseVertexLocation, startInstanceLocation); + + swc->hints |= SVGA_HINT_FLAG_DRAW_EMITTED; swc->commit(swc); return PIPE_OK; } @@ -593,6 +598,7 @@ SVGA3D_vgpu10_DrawAuto(struct svga_winsys_context *swc) { SVGA3D_CREATE_COMMAND(DrawAuto, DRAW_AUTO); + swc->hints |= SVGA_HINT_FLAG_DRAW_EMITTED; swc->commit(swc); return PIPE_OK; } diff --git a/src/gallium/drivers/svga/svga_draw_arrays.c b/src/gallium/drivers/svga/svga_draw_arrays.c index 5635411d938..caf4b17de16 100644 --- a/src/gallium/drivers/svga/svga_draw_arrays.c +++ b/src/gallium/drivers/svga/svga_draw_arrays.c @@ -32,6 +32,7 @@ #include "svga_draw.h" #include "svga_draw_private.h" #include "svga_context.h" +#include "svga_shader.h" #define DBG 0 @@ -206,6 +207,32 @@ svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl, unsigned gen_prim, gen_size, gen_nr, gen_type; u_generate_func gen_func; enum pipe_error ret = PIPE_OK; + unsigned api_pv = hwtnl->api_pv; + struct svga_context *svga = hwtnl->svga; + + if (svga->curr.rast->templ.flatshade && + svga->state.hw_draw.fs->constant_color_output) { + /* The fragment color is a constant, not per-vertex so the whole + * primitive will be the same color (except for possible blending). + * We can ignore the current provoking vertex state and use whatever + * the hardware wants. + */ + api_pv = hwtnl->hw_pv; + + if (hwtnl->api_fillmode == PIPE_POLYGON_MODE_FILL) { + /* Do some simple primitive conversions to avoid index buffer + * generation below. Note that polygons and quads are not directly + * supported by the svga device. Also note, we can only do this + * for flat/constant-colored rendering because of provoking vertex. + */ + if (prim == PIPE_PRIM_POLYGON) { + prim = PIPE_PRIM_TRIANGLE_FAN; + } + else if (prim == PIPE_PRIM_QUADS && count == 4) { + prim = PIPE_PRIM_TRIANGLE_FAN; + } + } + } if (hwtnl->api_fillmode != PIPE_POLYGON_MODE_FILL && prim >= PIPE_PRIM_TRIANGLES) { @@ -226,7 +253,7 @@ svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl, prim, start, count, - hwtnl->api_pv, + api_pv, hwtnl->hw_pv, &gen_prim, &gen_size, &gen_nr, &gen_func); } diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index f6fafca5c0b..5aa7b0d86eb 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -382,6 +382,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: return 0; } diff --git a/src/gallium/drivers/svga/svga_shader.h b/src/gallium/drivers/svga/svga_shader.h index efcac408626..f49fdb46d0e 100644 --- a/src/gallium/drivers/svga/svga_shader.h +++ b/src/gallium/drivers/svga/svga_shader.h @@ -155,6 +155,9 @@ struct svga_shader_variant * applied to any of the varyings. */ + /** Is the color output just a constant value? (fragment shader only) */ + boolean constant_color_output; + /** For FS-based polygon stipple */ unsigned pstipple_sampler_unit; diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c index 202eee276b7..4c16f4313a0 100644 --- a/src/gallium/drivers/svga/svga_tgsi.c +++ b/src/gallium/drivers/svga/svga_tgsi.c @@ -240,6 +240,13 @@ svga_tgsi_vgpu9_translate(struct svga_context *svga, variant->pstipple_sampler_unit = emit.pstipple_sampler_unit; + /* If there was exactly one write to a fragment shader output register + * and it came from a constant buffer, we know all fragments will have + * the same color (except for blending). + */ + variant->constant_color_output = + emit.constant_color_output && emit.num_output_writes == 1; + #if 0 if (!svga_shader_verify(variant->tokens, variant->nr_tokens) || SVGA_DEBUG & DEBUG_TGSI) { diff --git a/src/gallium/drivers/svga/svga_tgsi_emit.h b/src/gallium/drivers/svga/svga_tgsi_emit.h index 0b82483ab2e..83f0c8bd4d0 100644 --- a/src/gallium/drivers/svga/svga_tgsi_emit.h +++ b/src/gallium/drivers/svga/svga_tgsi_emit.h @@ -84,6 +84,9 @@ struct svga_shader_emitter int dynamic_branching_level; + unsigned num_output_writes; + boolean constant_color_output; + boolean in_main_func; boolean created_common_immediate; diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index 00c91a4fa61..dbb90f7654e 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -99,6 +99,7 @@ translate_dst_register( struct svga_shader_emitter *emit, * Need to lookup a table built at decl time: */ dest = emit->output_map[reg->Register.Index]; + emit->num_output_writes++; break; default: @@ -2103,6 +2104,29 @@ emit_simple_instruction(struct svga_shader_emitter *emit, /** + * TGSI_OPCODE_MOVE is only special-cased here to detect the + * svga_fragment_shader::constant_color_output case. + */ +static boolean +emit_mov(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + const struct tgsi_full_src_register *src = &insn->Src[0]; + const struct tgsi_full_dst_register *dst = &insn->Dst[0]; + + if (emit->unit == PIPE_SHADER_FRAGMENT && + dst->Register.File == TGSI_FILE_OUTPUT && + dst->Register.Index == 0 && + src->Register.File == TGSI_FILE_CONSTANT && + !src->Register.Indirect) { + emit->constant_color_output = TRUE; + } + + return emit_simple_instruction(emit, SVGA3DOP_MOV, insn); +} + + +/** * Translate/emit TGSI DDX, DDY instructions. */ static boolean @@ -3045,6 +3069,9 @@ svga_emit_instruction(struct svga_shader_emitter *emit, case TGSI_OPCODE_SSG: return emit_ssg( emit, insn ); + case TGSI_OPCODE_MOV: + return emit_mov( emit, insn ); + default: { unsigned opcode = translate_opcode(insn->Instruction.Opcode); diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c index d62f2bbcc96..e70ee689c59 100644 --- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c +++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c @@ -202,6 +202,9 @@ struct svga_shader_emitter_v10 /* user clip plane constant slot indexes */ unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES]; + unsigned num_output_writes; + boolean constant_color_output; + boolean uses_flat_interp; /* For all shaders: const reg index for RECT coord scaling */ @@ -913,6 +916,8 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit, */ assert(sem_name == TGSI_SEMANTIC_COLOR); index = emit->info.output_semantic_index[index]; + + emit->num_output_writes++; } } } @@ -3097,7 +3102,7 @@ emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit) unsigned i; unsigned clip_plane_enable = emit->key.clip_plane_enable; unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index; - unsigned num_written_clipdist = emit->info.num_written_clipdistance; + int num_written_clipdist = emit->info.num_written_clipdistance; assert(emit->clip_dist_out_index != INVALID_INDEX); assert(emit->clip_dist_tmp_index != INVALID_INDEX); @@ -3109,7 +3114,7 @@ emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit) */ emit->clip_dist_tmp_index = INVALID_INDEX; - for (i = 0; i < 2 && num_written_clipdist; i++, num_written_clipdist-=4) { + for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) { tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i); @@ -5573,6 +5578,29 @@ emit_simple(struct svga_shader_emitter_v10 *emit, /** + * We only special case the MOV instruction to try to detect constant + * color writes in the fragment shader. + */ +static boolean +emit_mov(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + const struct tgsi_full_src_register *src = &inst->Src[0]; + const struct tgsi_full_dst_register *dst = &inst->Dst[0]; + + if (emit->unit == PIPE_SHADER_FRAGMENT && + dst->Register.File == TGSI_FILE_OUTPUT && + dst->Register.Index == 0 && + src->Register.File == TGSI_FILE_CONSTANT && + !src->Register.Indirect) { + emit->constant_color_output = TRUE; + } + + return emit_simple(emit, inst); +} + + +/** * Emit a simple VGPU10 instruction which writes to multiple dest registers, * where TGSI only uses one dest register. */ @@ -5652,7 +5680,6 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, case TGSI_OPCODE_MAD: case TGSI_OPCODE_MAX: case TGSI_OPCODE_MIN: - case TGSI_OPCODE_MOV: case TGSI_OPCODE_MUL: case TGSI_OPCODE_NOP: case TGSI_OPCODE_NOT: @@ -5677,7 +5704,8 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, /* simple instructions */ return emit_simple(emit, inst); - + case TGSI_OPCODE_MOV: + return emit_mov(emit, inst); case TGSI_OPCODE_EMIT: return emit_vertex(emit, inst); case TGSI_OPCODE_ENDPRIM: @@ -6762,6 +6790,13 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit; + /* If there was exactly one write to a fragment shader output register + * and it came from a constant buffer, we know all fragments will have + * the same color (except for blending). + */ + variant->constant_color_output = + emit->constant_color_output && emit->num_output_writes == 1; + /** keep track in the variant if flat interpolation is used * for any of the varyings. */ diff --git a/src/gallium/drivers/svga/svga_winsys.h b/src/gallium/drivers/svga/svga_winsys.h index c750603989f..3129e46ed06 100644 --- a/src/gallium/drivers/svga/svga_winsys.h +++ b/src/gallium/drivers/svga/svga_winsys.h @@ -85,6 +85,8 @@ struct winsys_handle; #define SVGA_QUERY_FLAG_SET (1 << 0) #define SVGA_QUERY_FLAG_REF (1 << 1) +#define SVGA_HINT_FLAG_DRAW_EMITTED (1 << 0) + /** Opaque surface handle */ struct svga_winsys_surface; @@ -213,6 +215,11 @@ struct svga_winsys_context uint32 cid; /** + * Flags to hint the current context state + */ + uint32 hints; + + /** ** BEGIN new functions for guest-backed surfaces. **/ diff --git a/src/gallium/drivers/vc4/vc4_cl_dump.c b/src/gallium/drivers/vc4/vc4_cl_dump.c index 6d748010baf..476d2b5b0b1 100644 --- a/src/gallium/drivers/vc4/vc4_cl_dump.c +++ b/src/gallium/drivers/vc4/vc4_cl_dump.c @@ -22,6 +22,7 @@ */ #include "util/u_math.h" +#include "util/u_prim.h" #include "util/macros.h" #include "vc4_context.h" @@ -163,6 +164,26 @@ dump_VC4_PACKET_LOAD_TILE_BUFFER_GENERAL(void *cl, uint32_t offset, uint32_t hw_ } static void +dump_VC4_PACKET_GL_INDEXED_PRIMITIVE(void *cl, uint32_t offset, uint32_t hw_offset) +{ + uint8_t *b = cl + offset; + uint32_t *count = cl + offset + 1; + uint32_t *ib_offset = cl + offset + 5; + uint32_t *max_index = cl + offset + 9; + + fprintf(stderr, "0x%08x 0x%08x: 0x%02x %s %s\n", + offset, hw_offset, + b[0], (b[0] & VC4_INDEX_BUFFER_U16) ? "16-bit" : "8-bit", + u_prim_name(b[0] & 0x7)); + fprintf(stderr, "0x%08x 0x%08x: %d verts\n", + offset + 1, hw_offset + 1, *count); + fprintf(stderr, "0x%08x 0x%08x: 0x%08x IB offset\n", + offset + 5, hw_offset + 5, *ib_offset); + fprintf(stderr, "0x%08x 0x%08x: 0x%08x max index\n", + offset + 9, hw_offset + 9, *max_index); +} + +static void dump_VC4_PACKET_FLAT_SHADE_FLAGS(void *cl, uint32_t offset, uint32_t hw_offset) { uint32_t *bits = cl + offset; @@ -262,14 +283,14 @@ dump_VC4_PACKET_TILE_RENDERING_MODE_CONFIG(void *cl, uint32_t offset, uint32_t h shorts[1]); const char *format = "???"; - switch ((bytes[0] >> 2) & 3) { - case 0: + switch (VC4_GET_FIELD(shorts[2], VC4_RENDER_CONFIG_FORMAT)) { + case VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED: format = "BGR565_DITHERED"; break; - case 1: + case VC4_RENDER_CONFIG_FORMAT_RGBA8888: format = "RGBA8888"; break; - case 2: + case VC4_RENDER_CONFIG_FORMAT_BGR565: format = "BGR565"; break; } @@ -277,29 +298,31 @@ dump_VC4_PACKET_TILE_RENDERING_MODE_CONFIG(void *cl, uint32_t offset, uint32_t h format = "64bit"; const char *tiling = "???"; - switch ((bytes[0] >> 6) & 3) { - case 0: + switch (VC4_GET_FIELD(shorts[2], VC4_RENDER_CONFIG_MEMORY_FORMAT)) { + case VC4_TILING_FORMAT_LINEAR: tiling = "linear"; break; - case 1: + case VC4_TILING_FORMAT_T: tiling = "T"; break; - case 2: + case VC4_TILING_FORMAT_LT: tiling = "LT"; break; } - fprintf(stderr, "0x%08x 0x%08x: 0x%02x %s %s %s\n", + fprintf(stderr, "0x%08x 0x%08x: 0x%02x %s %s %s %s\n", offset + 8, hw_offset + 8, bytes[0], format, tiling, - (bytes[0] & VC4_RENDER_CONFIG_MS_MODE_4X) ? "ms" : "ss"); + (shorts[2] & VC4_RENDER_CONFIG_MS_MODE_4X) ? "ms" : "ss", + (shorts[2] & VC4_RENDER_CONFIG_DECIMATE_MODE_4X) ? + "ms_decimate" : "ss_decimate"); const char *earlyz = ""; - if (bytes[1] & (1 << 3)) { + if (shorts[2] & VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE) { earlyz = "early_z disabled"; } else { - if (bytes[1] & (1 << 2)) + if (shorts[2] & VC4_RENDER_CONFIG_EARLY_Z_DIRECTION_G) earlyz = "early_z >"; else earlyz = "early_z <"; @@ -356,7 +379,7 @@ static const struct packet_info { PACKET_DUMP(VC4_PACKET_STORE_TILE_BUFFER_GENERAL), PACKET_DUMP(VC4_PACKET_LOAD_TILE_BUFFER_GENERAL), - PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE), + PACKET_DUMP(VC4_PACKET_GL_INDEXED_PRIMITIVE), PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE), PACKET(VC4_PACKET_COMPRESSED_PRIMITIVE), diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index c7698422951..86f2ce5e608 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -250,10 +250,10 @@ struct vc4_context { bool needs_flush; /** - * Set when needs_flush, and the queued rendering is not just composed - * of full-buffer clears. + * Number of draw calls (not counting full buffer clears) queued in + * the current job. */ - bool draw_call_queued; + uint32_t draw_calls_queued; /** Maximum index buffer valid for the current shader_rec. */ uint32_t max_index; @@ -291,7 +291,10 @@ struct vc4_context { struct vc4_vertex_stateobj *vtx; - struct pipe_blend_color blend_color; + struct { + struct pipe_blend_color f; + uint8_t ub[4]; + } blend_color; struct pipe_stencil_ref stencil_ref; unsigned sample_mask; struct pipe_framebuffer_state framebuffer; diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index a4e5e092b1a..624a236c573 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -25,6 +25,7 @@ #include "util/u_prim.h" #include "util/u_format.h" #include "util/u_pack_color.h" +#include "util/u_upload_mgr.h" #include "indices/u_primconvert.h" #include "vc4_context.h" @@ -100,7 +101,7 @@ vc4_start_draw(struct vc4_context *vc4) VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES)); vc4->needs_flush = true; - vc4->draw_call_queued = true; + vc4->draw_calls_queued++; vc4->draw_width = width; vc4->draw_height = height; @@ -226,6 +227,38 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4, const struct pipe_draw_info *i vc4->max_index = max_index; } +/** + * HW-2116 workaround: Flush the batch before triggering the hardware state + * counter wraparound behavior. + * + * State updates are tracked by a global counter which increments at the first + * state update after a draw or a START_BINNING. Tiles can then have their + * state updated at draw time with a set of cheap checks for whether the + * state's copy of the global counter matches the global counter the last time + * that state was written to the tile. + * + * The state counters are relatively small and wrap around quickly, so you + * could get false negatives for needing to update a particular state in the + * tile. To avoid this, the hardware attempts to write all of the state in + * the tile at wraparound time. This apparently is broken, so we just flush + * everything before that behavior is triggered. A batch flush is sufficient + * to get our current contents drawn and reset the counters to 0. + * + * Note that we can't just use VC4_PACKET_FLUSH_ALL, because that caps the + * tiles with VC4_PACKET_RETURN_FROM_LIST. + */ +static void +vc4_hw_2116_workaround(struct pipe_context *pctx) +{ + struct vc4_context *vc4 = vc4_context(pctx); + + if (vc4->draw_calls_queued == 0x1ef0) { + perf_debug("Flushing batch due to HW-2116 workaround " + "(too many draw calls per scene\n"); + vc4_flush(pctx); + } +} + static void vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) { @@ -244,6 +277,8 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) vc4_update_shadow_textures(pctx, &vc4->verttex); vc4_update_shadow_textures(pctx, &vc4->fragtex); + vc4_hw_2116_workaround(pctx); + vc4_get_draw_cl_space(vc4); if (vc4->prim_mode != info->mode) { @@ -285,7 +320,15 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) info->count, &offset); index_size = 2; } else { - prsc = vc4->indexbuf.buffer; + if (vc4->indexbuf.user_buffer) { + prsc = NULL; + u_upload_data(vc4->uploader, 0, + info->count * index_size, + vc4->indexbuf.user_buffer, + &offset, &prsc); + } else { + prsc = vc4->indexbuf.buffer; + } } struct vc4_resource *rsc = vc4_resource(prsc); @@ -300,7 +343,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) cl_reloc(vc4, &vc4->bcl, &bcl, rsc->bo, offset); cl_u32(&bcl, vc4->max_index); - if (vc4->indexbuf.index_size == 4) + if (vc4->indexbuf.index_size == 4 || vc4->indexbuf.user_buffer) pipe_resource_reference(&prsc, NULL); } else { cl_u8(&bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE); @@ -343,8 +386,8 @@ vc4_clear(struct pipe_context *pctx, unsigned buffers, /* We can't flag new buffers for clearing once we've queued draws. We * could avoid this by using the 3d engine to clear. */ - if (vc4->draw_call_queued) { - perf_debug("Flushing rendering to process new clear."); + if (vc4->draw_calls_queued) { + perf_debug("Flushing rendering to process new clear.\n"); vc4_flush(pctx); } diff --git a/src/gallium/drivers/vc4/vc4_job.c b/src/gallium/drivers/vc4/vc4_job.c index 7ebd9f160eb..9ad79c2ea10 100644 --- a/src/gallium/drivers/vc4/vc4_job.c +++ b/src/gallium/drivers/vc4/vc4_job.c @@ -55,7 +55,7 @@ vc4_job_reset(struct vc4_context *vc4) vc4->shader_rec_count = 0; vc4->needs_flush = false; - vc4->draw_call_queued = false; + vc4->draw_calls_queued = 0; /* We have no hardware context saved between our draw calls, so we * need to flag the next draw as needing all state emitted. Emitting diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c index 17b524653bb..373c9e12d11 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c @@ -86,11 +86,11 @@ vc4_nir_srgb_encode(nir_builder *b, nir_ssa_def *linear) } static nir_ssa_def * -vc4_blend_channel(nir_builder *b, - nir_ssa_def **src, - nir_ssa_def **dst, - unsigned factor, - int channel) +vc4_blend_channel_f(nir_builder *b, + nir_ssa_def **src, + nir_ssa_def **dst, + unsigned factor, + int channel) { switch(factor) { case PIPE_BLENDFACTOR_ONE: @@ -146,8 +146,75 @@ vc4_blend_channel(nir_builder *b, } static nir_ssa_def * -vc4_blend_func(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst, - unsigned func) +vc4_nir_set_packed_chan(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1, + int chan) +{ + unsigned chan_mask = 0xff << (chan * 8); + return nir_ior(b, + nir_iand(b, src0, nir_imm_int(b, ~chan_mask)), + nir_iand(b, src1, nir_imm_int(b, chan_mask))); +} + +static nir_ssa_def * +vc4_blend_channel_i(nir_builder *b, + nir_ssa_def *src, + nir_ssa_def *dst, + nir_ssa_def *src_a, + nir_ssa_def *dst_a, + unsigned factor, + int a_chan) +{ + switch (factor) { + case PIPE_BLENDFACTOR_ONE: + return nir_imm_int(b, ~0); + case PIPE_BLENDFACTOR_SRC_COLOR: + return src; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return src_a; + case PIPE_BLENDFACTOR_DST_ALPHA: + return dst_a; + case PIPE_BLENDFACTOR_DST_COLOR: + return dst; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return vc4_nir_set_packed_chan(b, + nir_umin_4x8(b, + src_a, + nir_inot(b, dst_a)), + nir_imm_int(b, ~0), + a_chan); + case PIPE_BLENDFACTOR_CONST_COLOR: + return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_RGBA); + case PIPE_BLENDFACTOR_CONST_ALPHA: + return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_AAAA); + case PIPE_BLENDFACTOR_ZERO: + return nir_imm_int(b, 0); + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return nir_inot(b, src); + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return nir_inot(b, src_a); + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return nir_inot(b, dst_a); + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return nir_inot(b, dst); + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return nir_inot(b, vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_RGBA)); + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return nir_inot(b, vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_AAAA)); + + default: + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + /* Unsupported. */ + fprintf(stderr, "Unknown blend factor %d\n", factor); + return nir_imm_int(b, ~0); + } +} + +static nir_ssa_def * +vc4_blend_func_f(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst, + unsigned func) { switch (func) { case PIPE_BLEND_ADD: @@ -169,9 +236,33 @@ vc4_blend_func(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst, } } +static nir_ssa_def * +vc4_blend_func_i(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst, + unsigned func) +{ + switch (func) { + case PIPE_BLEND_ADD: + return nir_usadd_4x8(b, src, dst); + case PIPE_BLEND_SUBTRACT: + return nir_ussub_4x8(b, src, dst); + case PIPE_BLEND_REVERSE_SUBTRACT: + return nir_ussub_4x8(b, dst, src); + case PIPE_BLEND_MIN: + return nir_umin_4x8(b, src, dst); + case PIPE_BLEND_MAX: + return nir_umax_4x8(b, src, dst); + + default: + /* Unsupported. */ + fprintf(stderr, "Unknown blend func %d\n", func); + return src; + + } +} + static void -vc4_do_blending(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result, - nir_ssa_def **src_color, nir_ssa_def **dst_color) +vc4_do_blending_f(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result, + nir_ssa_def **src_color, nir_ssa_def **dst_color) { struct pipe_rt_blend_state *blend = &c->fs_key->blend; @@ -192,20 +283,106 @@ vc4_do_blending(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result, int dst_factor = ((i != 3) ? blend->rgb_dst_factor : blend->alpha_dst_factor); src_blend[i] = nir_fmul(b, src_color[i], - vc4_blend_channel(b, - src_color, dst_color, - src_factor, i)); + vc4_blend_channel_f(b, + src_color, dst_color, + src_factor, i)); dst_blend[i] = nir_fmul(b, dst_color[i], - vc4_blend_channel(b, - src_color, dst_color, - dst_factor, i)); + vc4_blend_channel_f(b, + src_color, dst_color, + dst_factor, i)); } for (int i = 0; i < 4; i++) { - result[i] = vc4_blend_func(b, src_blend[i], dst_blend[i], - ((i != 3) ? blend->rgb_func : - blend->alpha_func)); + result[i] = vc4_blend_func_f(b, src_blend[i], dst_blend[i], + ((i != 3) ? blend->rgb_func : + blend->alpha_func)); + } +} + +static nir_ssa_def * +vc4_nir_splat(nir_builder *b, nir_ssa_def *src) +{ + nir_ssa_def *or1 = nir_ior(b, src, nir_ishl(b, src, nir_imm_int(b, 8))); + return nir_ior(b, or1, nir_ishl(b, or1, nir_imm_int(b, 16))); +} + +static nir_ssa_def * +vc4_do_blending_i(struct vc4_compile *c, nir_builder *b, + nir_ssa_def *src_color, nir_ssa_def *dst_color, + nir_ssa_def *src_float_a) +{ + struct pipe_rt_blend_state *blend = &c->fs_key->blend; + + if (!blend->blend_enable) + return src_color; + + enum pipe_format color_format = c->fs_key->color_format; + const uint8_t *format_swiz = vc4_get_format_swizzle(color_format); + nir_ssa_def *imm_0xff = nir_imm_int(b, 0xff); + nir_ssa_def *src_a = nir_pack_unorm_4x8(b, src_float_a); + nir_ssa_def *dst_a; + int alpha_chan; + for (alpha_chan = 0; alpha_chan < 4; alpha_chan++) { + if (format_swiz[alpha_chan] == 3) + break; + } + if (alpha_chan != 4) { + nir_ssa_def *shift = nir_imm_int(b, alpha_chan * 8); + dst_a = vc4_nir_splat(b, nir_iand(b, nir_ushr(b, dst_color, + shift), imm_0xff)); + } else { + dst_a = nir_imm_int(b, ~0); + } + + nir_ssa_def *src_factor = vc4_blend_channel_i(b, + src_color, dst_color, + src_a, dst_a, + blend->rgb_src_factor, + alpha_chan); + nir_ssa_def *dst_factor = vc4_blend_channel_i(b, + src_color, dst_color, + src_a, dst_a, + blend->rgb_dst_factor, + alpha_chan); + + if (alpha_chan != 4 && + blend->alpha_src_factor != blend->rgb_src_factor) { + nir_ssa_def *src_alpha_factor = + vc4_blend_channel_i(b, + src_color, dst_color, + src_a, dst_a, + blend->alpha_src_factor, + alpha_chan); + src_factor = vc4_nir_set_packed_chan(b, src_factor, + src_alpha_factor, + alpha_chan); + } + if (alpha_chan != 4 && + blend->alpha_dst_factor != blend->rgb_dst_factor) { + nir_ssa_def *dst_alpha_factor = + vc4_blend_channel_i(b, + src_color, dst_color, + src_a, dst_a, + blend->alpha_dst_factor, + alpha_chan); + dst_factor = vc4_nir_set_packed_chan(b, dst_factor, + dst_alpha_factor, + alpha_chan); + } + nir_ssa_def *src_blend = nir_umul_unorm_4x8(b, src_color, src_factor); + nir_ssa_def *dst_blend = nir_umul_unorm_4x8(b, dst_color, dst_factor); + + nir_ssa_def *result = + vc4_blend_func_i(b, src_blend, dst_blend, blend->rgb_func); + if (alpha_chan != 4 && blend->alpha_func != blend->rgb_func) { + nir_ssa_def *result_a = vc4_blend_func_i(b, + src_blend, + dst_blend, + blend->alpha_func); + result = vc4_nir_set_packed_chan(b, result, result_a, + alpha_chan); } + return result; } static nir_ssa_def * @@ -299,12 +476,33 @@ vc4_nir_emit_alpha_test_discard(struct vc4_compile *c, nir_builder *b, nir_builder_instr_insert(b, &discard->instr); } +static nir_ssa_def * +vc4_nir_swizzle_and_pack(struct vc4_compile *c, nir_builder *b, + nir_ssa_def **colors) +{ + enum pipe_format color_format = c->fs_key->color_format; + const uint8_t *format_swiz = vc4_get_format_swizzle(color_format); + + nir_ssa_def *swizzled[4]; + for (int i = 0; i < 4; i++) { + swizzled[i] = vc4_nir_get_swizzled_channel(b, colors, + format_swiz[i]); + } + + return nir_pack_unorm_4x8(b, + nir_vec4(b, + swizzled[0], swizzled[1], + swizzled[2], swizzled[3])); + +} + static void vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b, nir_intrinsic_instr *intr) { enum pipe_format color_format = c->fs_key->color_format; const uint8_t *format_swiz = vc4_get_format_swizzle(color_format); + bool srgb = util_format_is_srgb(color_format); /* Pull out the float src/dst color components. */ nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b); @@ -315,45 +513,39 @@ vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b, unpacked_dst_color[i] = nir_swizzle(b, dst_vec4, &i, 1, false); } - /* Unswizzle the destination color. */ - nir_ssa_def *dst_color[4]; - for (unsigned i = 0; i < 4; i++) { - dst_color[i] = vc4_nir_get_swizzled_channel(b, - unpacked_dst_color, - format_swiz[i]); - } - vc4_nir_emit_alpha_test_discard(c, b, src_color[3]); - /* Turn dst color to linear. */ - if (util_format_is_srgb(color_format)) { + nir_ssa_def *packed_color; + if (srgb) { + /* Unswizzle the destination color. */ + nir_ssa_def *dst_color[4]; + for (unsigned i = 0; i < 4; i++) { + dst_color[i] = vc4_nir_get_swizzled_channel(b, + unpacked_dst_color, + format_swiz[i]); + } + + /* Turn dst color to linear. */ for (int i = 0; i < 3; i++) dst_color[i] = vc4_nir_srgb_decode(b, dst_color[i]); - } - nir_ssa_def *blend_color[4]; - vc4_do_blending(c, b, blend_color, src_color, dst_color); + nir_ssa_def *blend_color[4]; + vc4_do_blending_f(c, b, blend_color, src_color, dst_color); - /* sRGB encode the output color */ - if (util_format_is_srgb(color_format)) { + /* sRGB encode the output color */ for (int i = 0; i < 3; i++) blend_color[i] = vc4_nir_srgb_encode(b, blend_color[i]); - } - nir_ssa_def *swizzled_outputs[4]; - for (int i = 0; i < 4; i++) { - swizzled_outputs[i] = - vc4_nir_get_swizzled_channel(b, blend_color, - format_swiz[i]); - } + packed_color = vc4_nir_swizzle_and_pack(c, b, blend_color); + } else { + nir_ssa_def *packed_src_color = + vc4_nir_swizzle_and_pack(c, b, src_color); - nir_ssa_def *packed_color = - nir_pack_unorm_4x8(b, - nir_vec4(b, - swizzled_outputs[0], - swizzled_outputs[1], - swizzled_outputs[2], - swizzled_outputs[3])); + packed_color = + vc4_do_blending_i(c, b, + packed_src_color, packed_dst_color, + src_color[3]); + } packed_color = vc4_logicop(b, c->fs_key->logicop_func, packed_color, packed_dst_color); diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c index caf706aa2a6..7ea263afb68 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c @@ -406,6 +406,7 @@ vc4_nir_lower_io_instr(struct vc4_compile *c, nir_builder *b, case nir_intrinsic_load_uniform: case nir_intrinsic_load_uniform_indirect: + case nir_intrinsic_load_user_clip_plane: vc4_nir_lower_uniform(c, b, intr); break; diff --git a/src/gallium/drivers/vc4/vc4_opt_algebraic.c b/src/gallium/drivers/vc4/vc4_opt_algebraic.c index 5b435832b92..f1bab810eff 100644 --- a/src/gallium/drivers/vc4/vc4_opt_algebraic.c +++ b/src/gallium/drivers/vc4/vc4_opt_algebraic.c @@ -64,6 +64,7 @@ is_constant_value(struct vc4_compile *c, struct qreg reg, uint32_t val) { if (reg.file == QFILE_UNIF && + !reg.pack && c->uniform_contents[reg.index] == QUNIFORM_CONSTANT && c->uniform_data[reg.index] == val) { return true; diff --git a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c index fd2539aed95..0eee5c34e1d 100644 --- a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c +++ b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c @@ -41,34 +41,77 @@ qir_opt_copy_propagation(struct vc4_compile *c) bool debug = false; list_for_each_entry(struct qinst, inst, &c->instructions, link) { - for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { - int index = inst->src[i].index; - if (inst->src[i].file == QFILE_TEMP && - c->defs[index] && - c->defs[index]->op == QOP_MOV && - (c->defs[index]->src[0].file == QFILE_TEMP || - c->defs[index]->src[0].file == QFILE_UNIF)) { - /* If it has a pack, it shouldn't be an SSA - * def. + int nsrc = qir_get_op_nsrc(inst->op); + for (int i = 0; i < nsrc; i++) { + if (inst->src[i].file != QFILE_TEMP) + continue; + + struct qinst *mov = c->defs[inst->src[i].index]; + if (!mov || + (mov->op != QOP_MOV && + mov->op != QOP_FMOV && + mov->op != QOP_MMOV)) { + continue; + } + + if (mov->src[0].file != QFILE_TEMP && + mov->src[0].file != QFILE_UNIF) { + continue; + } + + if (mov->dst.pack) + continue; + + uint8_t unpack; + if (mov->src[0].pack) { + /* Make sure that the meaning of the unpack + * would be the same between the two + * instructions. */ - assert(!c->defs[index]->dst.pack); + if (qir_is_float_input(inst) != + qir_is_float_input(mov)) { + continue; + } - if (debug) { - fprintf(stderr, "Copy propagate: "); - qir_dump_inst(c, inst); - fprintf(stderr, "\n"); + /* There's only one unpack field, so make sure + * this instruction doesn't already use it. + */ + bool already_has_unpack = false; + for (int j = 0; j < nsrc; j++) { + if (inst->src[j].pack) + already_has_unpack = true; } + if (already_has_unpack) + continue; - inst->src[i] = c->defs[index]->src[0]; + /* A destination pack requires the PM bit to + * be set to a specific value already, which + * may be different from ours. + */ + if (inst->dst.pack) + continue; - if (debug) { - fprintf(stderr, "to: "); - qir_dump_inst(c, inst); - fprintf(stderr, "\n"); - } + unpack = mov->src[0].pack; + } else { + unpack = inst->src[i].pack; + } - progress = true; + if (debug) { + fprintf(stderr, "Copy propagate: "); + qir_dump_inst(c, inst); + fprintf(stderr, "\n"); } + + inst->src[i] = mov->src[0]; + inst->src[i].pack = unpack; + + if (debug) { + fprintf(stderr, "to: "); + qir_dump_inst(c, inst); + fprintf(stderr, "\n"); + } + + progress = true; } } return progress; diff --git a/src/gallium/drivers/vc4/vc4_opt_cse.c b/src/gallium/drivers/vc4/vc4_opt_cse.c index 0e5480ea781..8b4d429074c 100644 --- a/src/gallium/drivers/vc4/vc4_opt_cse.c +++ b/src/gallium/drivers/vc4/vc4_opt_cse.c @@ -65,6 +65,7 @@ vc4_find_cse(struct vc4_compile *c, struct hash_table *ht, struct qinst *inst, uint32_t sf_count) { if (inst->dst.file != QFILE_TEMP || + !c->defs[inst->dst.index] || inst->op == QOP_MOV || qir_get_op_nsrc(inst->op) > 4) { return NULL; diff --git a/src/gallium/drivers/vc4/vc4_opt_small_immediates.c b/src/gallium/drivers/vc4/vc4_opt_small_immediates.c index d6e98f0aebf..e61562171aa 100644 --- a/src/gallium/drivers/vc4/vc4_opt_small_immediates.c +++ b/src/gallium/drivers/vc4/vc4_opt_small_immediates.c @@ -56,6 +56,7 @@ qir_opt_small_immediates(struct vc4_compile *c) struct qreg src = qir_follow_movs(c, inst->src[i]); if (src.file != QFILE_UNIF || + src.pack || c->uniform_contents[src.index] != QUNIFORM_CONSTANT) { continue; @@ -72,9 +73,6 @@ qir_opt_small_immediates(struct vc4_compile *c) continue; } - if (qir_src_needs_a_file(inst)) - continue; - uint32_t imm = c->uniform_data[src.index]; uint32_t small_imm = qpu_encode_small_immediate(imm); if (small_imm == ~0) diff --git a/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c b/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c index f2cdf8f694f..73ded766db9 100644 --- a/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c +++ b/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c @@ -58,7 +58,7 @@ qir_opt_vpm_writes(struct vc4_compile *c) } for (int i = 0; i < vpm_write_count; i++) { - if (vpm_writes[i]->op != QOP_MOV || + if (!qir_is_raw_mov(vpm_writes[i]) || vpm_writes[i]->src[0].file != QFILE_TEMP) { continue; } diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 6e9ec6530c6..a48dad804e2 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -738,6 +738,20 @@ ntq_emit_pack_unorm_4x8(struct vc4_compile *c, nir_alu_instr *instr) vec4 = nir_instr_as_alu(instr->src[0].src.ssa->parent_instr); } + /* If the pack is replicating the same channel 4 times, use the 8888 + * pack flag. This is common for blending using the alpha + * channel. + */ + if (instr->src[0].swizzle[0] == instr->src[0].swizzle[1] && + instr->src[0].swizzle[0] == instr->src[0].swizzle[2] && + instr->src[0].swizzle[0] == instr->src[0].swizzle[3]) { + struct qreg *dest = ntq_get_dest(c, &instr->dest.dest); + *dest = qir_PACK_8888_F(c, + ntq_get_src(c, instr->src[0].src, + instr->src[0].swizzle[0])); + return; + } + for (int i = 0; i < 4; i++) { int swiz = instr->src[0].swizzle[i]; struct qreg src; @@ -1040,41 +1054,37 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr) *dest = ntq_emit_ubfe(c, src[0], src[1], src[2]); break; - default: - fprintf(stderr, "unknown NIR ALU inst: "); - nir_print_instr(&instr->instr, stderr); - fprintf(stderr, "\n"); - abort(); - } -} + case nir_op_usadd_4x8: + *dest = qir_V8ADDS(c, src[0], src[1]); + break; -static void -clip_distance_discard(struct vc4_compile *c) -{ - for (int i = 0; i < PIPE_MAX_CLIP_PLANES; i++) { - if (!(c->key->ucp_enables & (1 << i))) - continue; + case nir_op_ussub_4x8: + *dest = qir_V8SUBS(c, src[0], src[1]); + break; - struct qreg dist = - emit_fragment_varying(c, - VARYING_SLOT_CLIP_DIST0 + (i / 4), - i % 4); + case nir_op_umin_4x8: + *dest = qir_V8MIN(c, src[0], src[1]); + break; - qir_SF(c, dist); + case nir_op_umax_4x8: + *dest = qir_V8MAX(c, src[0], src[1]); + break; - if (c->discard.file == QFILE_NULL) - c->discard = qir_uniform_ui(c, 0); + case nir_op_umul_unorm_4x8: + *dest = qir_V8MULD(c, src[0], src[1]); + break; - c->discard = qir_SEL_X_Y_NS(c, qir_uniform_ui(c, ~0), - c->discard); + default: + fprintf(stderr, "unknown NIR ALU inst: "); + nir_print_instr(&instr->instr, stderr); + fprintf(stderr, "\n"); + abort(); } } static void emit_frag_end(struct vc4_compile *c) { - clip_distance_discard(c); - struct qreg color; if (c->output_color_index != -1) { color = c->outputs[c->output_color_index]; @@ -1190,45 +1200,6 @@ emit_stub_vpm_read(struct vc4_compile *c) } static void -emit_ucp_clipdistance(struct vc4_compile *c) -{ - unsigned cv; - if (c->output_clipvertex_index != -1) - cv = c->output_clipvertex_index; - else if (c->output_position_index != -1) - cv = c->output_position_index; - else - return; - - for (int plane = 0; plane < PIPE_MAX_CLIP_PLANES; plane++) { - if (!(c->key->ucp_enables & (1 << plane))) - continue; - - /* Pick the next outputs[] that hasn't been written to, since - * there are no other program writes left to be processed at - * this point. If something had been declared but not written - * (like a w component), we'll just smash over the top of it. - */ - uint32_t output_index = c->num_outputs++; - add_output(c, output_index, - VARYING_SLOT_CLIP_DIST0 + plane / 4, - plane % 4); - - - struct qreg dist = qir_uniform_f(c, 0.0); - for (int i = 0; i < 4; i++) { - struct qreg pos_chan = c->outputs[cv + i]; - struct qreg ucp = - qir_uniform(c, QUNIFORM_USER_CLIP_PLANE, - plane * 4 + i); - dist = qir_FADD(c, dist, qir_FMUL(c, pos_chan, ucp)); - } - - c->outputs[output_index] = dist; - } -} - -static void emit_vert_end(struct vc4_compile *c, struct vc4_varying_slot *fs_inputs, uint32_t num_fs_inputs) @@ -1236,7 +1207,6 @@ emit_vert_end(struct vc4_compile *c, struct qreg rcp_w = qir_RCP(c, c->outputs[c->output_position_index + 3]); emit_stub_vpm_read(c); - emit_ucp_clipdistance(c); emit_scaled_viewport_write(c, rcp_w); emit_zs_write(c, rcp_w); @@ -1391,9 +1361,6 @@ ntq_setup_outputs(struct vc4_compile *c) case VARYING_SLOT_POS: c->output_position_index = loc; break; - case VARYING_SLOT_CLIP_VERTEX: - c->output_clipvertex_index = loc; - break; case VARYING_SLOT_PSIZ: c->output_point_size_index = loc; break; @@ -1486,6 +1453,11 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr) break; + case nir_intrinsic_load_user_clip_plane: + *dest = qir_uniform(c, QUNIFORM_USER_CLIP_PLANE, + instr->const_index[0]); + break; + case nir_intrinsic_load_input: assert(instr->num_components == 1); if (instr->const_index[0] == VC4_NIR_TLB_COLOR_READ_INPUT) { @@ -1683,10 +1655,18 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage, c->s = tgsi_to_nir(tokens, &nir_options); nir_opt_global_to_local(c->s); nir_convert_to_ssa(c->s); + if (stage == QSTAGE_FRAG) vc4_nir_lower_blend(c); + if (c->fs_key && c->fs_key->light_twoside) nir_lower_two_sided_color(c->s); + + if (stage == QSTAGE_FRAG) + nir_lower_clip_fs(c->s, c->key->ucp_enables); + else + nir_lower_clip_vs(c->s, c->key->ucp_enables); + vc4_nir_lower_io(c); nir_lower_idiv(c->s); nir_lower_load_const_to_scalar(c->s); diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index e385fbb65ae..7894b081b19 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -36,10 +36,17 @@ struct qir_op_info { static const struct qir_op_info qir_op_info[] = { [QOP_MOV] = { "mov", 1, 1 }, + [QOP_FMOV] = { "fmov", 1, 1 }, + [QOP_MMOV] = { "mmov", 1, 1 }, [QOP_FADD] = { "fadd", 1, 2 }, [QOP_FSUB] = { "fsub", 1, 2 }, [QOP_FMUL] = { "fmul", 1, 2 }, [QOP_MUL24] = { "mul24", 1, 2 }, + [QOP_V8MULD] = {"v8muld", 1, 2 }, + [QOP_V8MIN] = {"v8min", 1, 2 }, + [QOP_V8MAX] = {"v8max", 1, 2 }, + [QOP_V8ADDS] = {"v8adds", 1, 2 }, + [QOP_V8SUBS] = {"v8subs", 1, 2 }, [QOP_FMIN] = { "fmin", 1, 2 }, [QOP_FMAX] = { "fmax", 1, 2 }, [QOP_FMINABS] = { "fminabs", 1, 2 }, @@ -71,11 +78,6 @@ static const struct qir_op_info qir_op_info[] = { [QOP_RSQ] = { "rsq", 1, 1, false, true }, [QOP_EXP2] = { "exp2", 1, 2, false, true }, [QOP_LOG2] = { "log2", 1, 2, false, true }, - [QOP_PACK_8888_F] = { "pack_8888_f", 1, 1 }, - [QOP_PACK_8A_F] = { "pack_8a_f", 1, 1 }, - [QOP_PACK_8B_F] = { "pack_8b_f", 1, 1 }, - [QOP_PACK_8C_F] = { "pack_8c_f", 1, 1 }, - [QOP_PACK_8D_F] = { "pack_8d_f", 1, 1 }, [QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true }, [QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true }, [QOP_TLB_Z_WRITE] = { "tlb_z", 0, 1, true }, @@ -95,18 +97,6 @@ static const struct qir_op_info qir_op_info[] = { [QOP_TEX_B] = { "tex_b", 0, 2 }, [QOP_TEX_DIRECT] = { "tex_direct", 0, 2 }, [QOP_TEX_RESULT] = { "tex_result", 1, 0, true }, - [QOP_UNPACK_8A_F] = { "unpack_8a_f", 1, 1 }, - [QOP_UNPACK_8B_F] = { "unpack_8b_f", 1, 1 }, - [QOP_UNPACK_8C_F] = { "unpack_8c_f", 1, 1 }, - [QOP_UNPACK_8D_F] = { "unpack_8d_f", 1, 1 }, - [QOP_UNPACK_16A_F] = { "unpack_16a_f", 1, 1 }, - [QOP_UNPACK_16B_F] = { "unpack_16b_f", 1, 1 }, - [QOP_UNPACK_8A_I] = { "unpack_8a_i", 1, 1 }, - [QOP_UNPACK_8B_I] = { "unpack_8b_i", 1, 1 }, - [QOP_UNPACK_8C_I] = { "unpack_8c_i", 1, 1 }, - [QOP_UNPACK_8D_I] = { "unpack_8d_i", 1, 1 }, - [QOP_UNPACK_16A_I] = { "unpack_16a_i", 1, 1 }, - [QOP_UNPACK_16B_I] = { "unpack_16b_i", 1, 1 }, }; static const char * @@ -171,8 +161,14 @@ bool qir_is_mul(struct qinst *inst) { switch (inst->op) { + case QOP_MMOV: case QOP_FMUL: case QOP_MUL24: + case QOP_V8MULD: + case QOP_V8MIN: + case QOP_V8MAX: + case QOP_V8ADDS: + case QOP_V8SUBS: return true; default: return false; @@ -180,6 +176,35 @@ qir_is_mul(struct qinst *inst) } bool +qir_is_float_input(struct qinst *inst) +{ + switch (inst->op) { + case QOP_FMOV: + case QOP_FMUL: + case QOP_FADD: + case QOP_FSUB: + case QOP_FMIN: + case QOP_FMAX: + case QOP_FMINABS: + case QOP_FMAXABS: + case QOP_FTOI: + return true; + default: + return false; + } +} + +bool +qir_is_raw_mov(struct qinst *inst) +{ + return ((inst->op == QOP_MOV || + inst->op == QOP_FMOV || + inst->op == QOP_MMOV) && + !inst->dst.pack && + !inst->src[0].pack); +} + +bool qir_is_tex(struct qinst *inst) { return inst->op >= QOP_TEX_S && inst->op <= QOP_TEX_DIRECT; @@ -204,28 +229,6 @@ qir_depends_on_flags(struct qinst *inst) } bool -qir_src_needs_a_file(struct qinst *inst) -{ - switch (inst->op) { - case QOP_UNPACK_8A_F: - case QOP_UNPACK_8B_F: - case QOP_UNPACK_8C_F: - case QOP_UNPACK_8D_F: - case QOP_UNPACK_16A_F: - case QOP_UNPACK_16B_F: - case QOP_UNPACK_8A_I: - case QOP_UNPACK_8B_I: - case QOP_UNPACK_8C_I: - case QOP_UNPACK_8D_I: - case QOP_UNPACK_16A_I: - case QOP_UNPACK_16B_I: - return true; - default: - return false; - } -} - -bool qir_writes_r4(struct qinst *inst) { switch (inst->op) { @@ -295,6 +298,7 @@ qir_dump_inst(struct vc4_compile *c, struct qinst *inst) for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { fprintf(stderr, ", "); qir_print_reg(c, inst->src[i], false); + vc4_qpu_disasm_unpack(stderr, inst->src[i].pack); } } @@ -385,7 +389,6 @@ qir_compile_init(void) list_inithead(&c->instructions); c->output_position_index = -1; - c->output_clipvertex_index = -1; c->output_color_index = -1; c->output_point_size_index = -1; @@ -411,7 +414,8 @@ qir_follow_movs(struct vc4_compile *c, struct qreg reg) { while (reg.file == QFILE_TEMP && c->defs[reg.index] && - c->defs[reg.index]->op == QOP_MOV) { + c->defs[reg.index]->op == QOP_MOV && + !c->defs[reg.index]->dst.pack) { reg = c->defs[reg.index]->src[0]; } diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index ddde96db6b4..a92ad93ee07 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -37,6 +37,7 @@ #include "util/u_math.h" #include "vc4_screen.h" +#include "vc4_qpu_defines.h" #include "pipe/p_state.h" struct nir_builder; @@ -64,9 +65,16 @@ struct qreg { enum qop { QOP_UNDEF, QOP_MOV, + QOP_FMOV, + QOP_MMOV, QOP_FADD, QOP_FSUB, QOP_FMUL, + QOP_V8MULD, + QOP_V8MIN, + QOP_V8MAX, + QOP_V8ADDS, + QOP_V8SUBS, QOP_MUL24, QOP_FMIN, QOP_FMAX, @@ -105,11 +113,6 @@ enum qop { QOP_LOG2, QOP_VW_SETUP, QOP_VR_SETUP, - QOP_PACK_8888_F, - QOP_PACK_8A_F, - QOP_PACK_8B_F, - QOP_PACK_8C_F, - QOP_PACK_8D_F, QOP_TLB_DISCARD_SETUP, QOP_TLB_STENCIL_SETUP, QOP_TLB_Z_WRITE, @@ -123,20 +126,6 @@ enum qop { QOP_FRAG_W, QOP_FRAG_REV_FLAG, - QOP_UNPACK_8A_F, - QOP_UNPACK_8B_F, - QOP_UNPACK_8C_F, - QOP_UNPACK_8D_F, - QOP_UNPACK_16A_F, - QOP_UNPACK_16B_F, - - QOP_UNPACK_8A_I, - QOP_UNPACK_8B_I, - QOP_UNPACK_8C_I, - QOP_UNPACK_8D_I, - QOP_UNPACK_16A_I, - QOP_UNPACK_16B_I, - /** Texture x coordinate parameter write */ QOP_TEX_S, /** Texture y coordinate parameter write */ @@ -248,6 +237,8 @@ enum quniform_contents { QUNIFORM_BLEND_CONST_COLOR_Y, QUNIFORM_BLEND_CONST_COLOR_Z, QUNIFORM_BLEND_CONST_COLOR_W, + QUNIFORM_BLEND_CONST_COLOR_RGBA, + QUNIFORM_BLEND_CONST_COLOR_AAAA, QUNIFORM_STENCIL, @@ -399,7 +390,6 @@ struct vc4_compile { uint32_t num_outputs; uint32_t num_texture_samples; uint32_t output_position_index; - uint32_t output_clipvertex_index; uint32_t output_color_index; uint32_t output_point_size_index; @@ -457,10 +447,11 @@ bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst); bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst); bool qir_is_multi_instruction(struct qinst *inst); bool qir_is_mul(struct qinst *inst); +bool qir_is_raw_mov(struct qinst *inst); bool qir_is_tex(struct qinst *inst); +bool qir_is_float_input(struct qinst *inst); bool qir_depends_on_flags(struct qinst *inst); bool qir_writes_r4(struct qinst *inst); -bool qir_src_needs_a_file(struct qinst *inst); struct qreg qir_follow_movs(struct vc4_compile *c, struct qreg reg); void qir_dump(struct vc4_compile *c); @@ -561,9 +552,16 @@ qir_##name(struct vc4_compile *c, struct qreg dest, struct qreg a) \ } QIR_ALU1(MOV) +QIR_ALU1(FMOV) +QIR_ALU1(MMOV) QIR_ALU2(FADD) QIR_ALU2(FSUB) QIR_ALU2(FMUL) +QIR_ALU2(V8MULD) +QIR_ALU2(V8MIN) +QIR_ALU2(V8MAX) +QIR_ALU2(V8ADDS) +QIR_ALU2(V8SUBS) QIR_ALU2(MUL24) QIR_ALU1(SEL_X_0_ZS) QIR_ALU1(SEL_X_0_ZC) @@ -596,11 +594,6 @@ QIR_ALU1(RCP) QIR_ALU1(RSQ) QIR_ALU1(EXP2) QIR_ALU1(LOG2) -QIR_ALU1(PACK_8888_F) -QIR_PACK(PACK_8A_F) -QIR_PACK(PACK_8B_F) -QIR_PACK(PACK_8C_F) -QIR_PACK(PACK_8D_F) QIR_ALU1(VARY_ADD_C) QIR_NODST_2(TEX_S) QIR_NODST_2(TEX_T) @@ -622,41 +615,50 @@ QIR_NODST_1(TLB_STENCIL_SETUP) static inline struct qreg qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i) { - struct qreg t = qir_get_temp(c); - qir_emit(c, qir_inst(QOP_UNPACK_8A_F + i, t, src, c->undef)); + struct qreg t = qir_FMOV(c, src); + c->defs[t.index]->src[0].pack = QPU_UNPACK_8A + i; return t; } static inline struct qreg qir_UNPACK_8_I(struct vc4_compile *c, struct qreg src, int i) { - struct qreg t = qir_get_temp(c); - qir_emit(c, qir_inst(QOP_UNPACK_8A_I + i, t, src, c->undef)); + struct qreg t = qir_MOV(c, src); + c->defs[t.index]->src[0].pack = QPU_UNPACK_8A + i; return t; } static inline struct qreg qir_UNPACK_16_F(struct vc4_compile *c, struct qreg src, int i) { - struct qreg t = qir_get_temp(c); - qir_emit(c, qir_inst(QOP_UNPACK_16A_F + i, t, src, c->undef)); + struct qreg t = qir_FMOV(c, src); + c->defs[t.index]->src[0].pack = QPU_UNPACK_16A + i; return t; } static inline struct qreg qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i) { - struct qreg t = qir_get_temp(c); - qir_emit(c, qir_inst(QOP_UNPACK_16A_I + i, t, src, c->undef)); + struct qreg t = qir_MOV(c, src); + c->defs[t.index]->src[0].pack = QPU_UNPACK_16A + i; return t; } -static inline struct qreg +static inline void qir_PACK_8_F(struct vc4_compile *c, struct qreg dest, struct qreg val, int chan) { - qir_emit(c, qir_inst(QOP_PACK_8A_F + chan, dest, val, c->undef)); + assert(!dest.pack); + dest.pack = QPU_PACK_MUL_8A + chan; + qir_emit(c, qir_inst(QOP_MMOV, dest, val, c->undef)); if (dest.file == QFILE_TEMP) c->defs[dest.index] = NULL; +} + +static inline struct qreg +qir_PACK_8888_F(struct vc4_compile *c, struct qreg val) +{ + struct qreg dest = qir_MMOV(c, val); + c->defs[dest.index]->dst.pack = QPU_PACK_MUL_8888; return dest; } diff --git a/src/gallium/drivers/vc4/vc4_qpu.h b/src/gallium/drivers/vc4/vc4_qpu.h index 0719d2828b5..866ca5c1300 100644 --- a/src/gallium/drivers/vc4/vc4_qpu.h +++ b/src/gallium/drivers/vc4/vc4_qpu.h @@ -213,6 +213,9 @@ void vc4_qpu_disasm_pack_a(FILE *out, uint32_t pack); void +vc4_qpu_disasm_unpack(FILE *out, uint32_t pack); + +void vc4_qpu_validate(uint64_t *insts, uint32_t num_inst); #endif /* VC4_QPU_H */ diff --git a/src/gallium/drivers/vc4/vc4_qpu_defines.h b/src/gallium/drivers/vc4/vc4_qpu_defines.h index eb3dfb33827..626dc3be6be 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_defines.h +++ b/src/gallium/drivers/vc4/vc4_qpu_defines.h @@ -200,8 +200,8 @@ enum qpu_pack_a { enum qpu_unpack { QPU_UNPACK_NOP, - QPU_UNPACK_16A_TO_F32, - QPU_UNPACK_16B_TO_F32, + QPU_UNPACK_16A, + QPU_UNPACK_16B, QPU_UNPACK_8D_REP, QPU_UNPACK_8A, QPU_UNPACK_8B, diff --git a/src/gallium/drivers/vc4/vc4_qpu_disasm.c b/src/gallium/drivers/vc4/vc4_qpu_disasm.c index 0879787ec03..c46fd1a0e3f 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_disasm.c +++ b/src/gallium/drivers/vc4/vc4_qpu_disasm.c @@ -98,8 +98,8 @@ static const char *qpu_pack_mul[] = { */ static const char *qpu_unpack[] = { [QPU_UNPACK_NOP] = "", - [QPU_UNPACK_16A_TO_F32] = "16a", - [QPU_UNPACK_16B_TO_F32] = "16b", + [QPU_UNPACK_16A] = "16a", + [QPU_UNPACK_16B] = "16b", [QPU_UNPACK_8D_REP] = "8d_rep", [QPU_UNPACK_8A] = "8a", [QPU_UNPACK_8B] = "8b", @@ -257,6 +257,13 @@ vc4_qpu_disasm_pack_a(FILE *out, uint32_t pack) fprintf(out, "%s", DESC(qpu_pack_a, pack)); } +void +vc4_qpu_disasm_unpack(FILE *out, uint32_t unpack) +{ + if (unpack != QPU_UNPACK_NOP) + fprintf(out, ".%s", DESC(qpu_unpack, unpack)); +} + static void print_alu_dst(uint64_t inst, bool is_mul) { @@ -315,10 +322,9 @@ print_alu_src(uint64_t inst, uint32_t mux) fprintf(stderr, "%s", DESC(special_read_b, raddr - 32)); } - if (unpack != QPU_UNPACK_NOP && - ((mux == QPU_MUX_A && !(inst & QPU_PM)) || + if (((mux == QPU_MUX_A && !(inst & QPU_PM)) || (mux == QPU_MUX_R4 && (inst & QPU_PM)))) { - fprintf(stderr, ".%s", DESC(qpu_unpack, unpack)); + vc4_qpu_disasm_unpack(stderr, unpack); } } diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index adf3a8b3658..133e1385178 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -101,7 +101,8 @@ swap_file(struct qpu_reg *src) static void fixup_raddr_conflict(struct vc4_compile *c, struct qpu_reg dst, - struct qpu_reg *src0, struct qpu_reg *src1) + struct qpu_reg *src0, struct qpu_reg *src1, + struct qinst *inst, uint64_t *unpack) { uint32_t mux0 = src0->mux == QPU_MUX_SMALL_IMM ? QPU_MUX_B : src0->mux; uint32_t mux1 = src1->mux == QPU_MUX_SMALL_IMM ? QPU_MUX_B : src1->mux; @@ -117,7 +118,21 @@ fixup_raddr_conflict(struct vc4_compile *c, return; if (mux0 == QPU_MUX_A) { - queue(c, qpu_a_MOV(qpu_rb(31), *src0)); + /* Make sure we use the same type of MOV as the instruction, + * in case of unpacks. + */ + if (qir_is_float_input(inst)) + queue(c, qpu_a_FMAX(qpu_rb(31), *src0, *src0)); + else + queue(c, qpu_a_MOV(qpu_rb(31), *src0)); + + /* If we had an unpack on this A-file source, we need to put + * it into this MOV, not into the later move from regfile B. + */ + if (inst->src[0].pack) { + *last_inst(c) |= *unpack; + *unpack = 0; + } *src0 = qpu_rb(31); } else { queue(c, qpu_a_MOV(qpu_ra(31), *src0)); @@ -125,6 +140,27 @@ fixup_raddr_conflict(struct vc4_compile *c, } } +static void +set_last_dst_pack(struct vc4_compile *c, struct qinst *inst) +{ + bool had_pm = *last_inst(c) & QPU_PM; + bool had_ws = *last_inst(c) & QPU_WS; + uint32_t unpack = QPU_GET_FIELD(*last_inst(c), QPU_UNPACK); + + if (!inst->dst.pack) + return; + + *last_inst(c) |= QPU_SET_FIELD(inst->dst.pack, QPU_PACK); + + if (qir_is_mul(inst)) { + assert(!unpack || had_pm); + *last_inst(c) |= QPU_PM; + } else { + assert(!unpack || !had_pm); + assert(!had_ws); /* dst must be a-file to pack. */ + } +} + void vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) { @@ -134,15 +170,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) uint32_t vpm_read_fifo_count = 0; uint32_t vpm_read_offset = 0; int last_vpm_read_index = -1; - /* Map from the QIR ops enum order to QPU unpack bits. */ - static const uint32_t unpack_map[] = { - QPU_UNPACK_8A, - QPU_UNPACK_8B, - QPU_UNPACK_8C, - QPU_UNPACK_8D, - QPU_UNPACK_16A_TO_F32, - QPU_UNPACK_16B_TO_F32, - }; list_inithead(&c->qpu_inst_list); @@ -203,9 +230,22 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) A(NOT), M(FMUL), + M(V8MULD), + M(V8MIN), + M(V8MAX), + M(V8ADDS), + M(V8SUBS), M(MUL24), + + /* If we replicate src[0] out to src[1], this works + * out the same as a MOV. + */ + [QOP_MOV] = { QPU_A_OR }, + [QOP_FMOV] = { QPU_A_FMAX }, + [QOP_MMOV] = { QPU_M_V8MIN }, }; + uint64_t unpack = 0; struct qpu_reg src[4]; for (int i = 0; i < qir_get_op_nsrc(qinst->op); i++) { int index = qinst->src[i].index; @@ -215,6 +255,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) break; case QFILE_TEMP: src[i] = temp_registers[index]; + if (qinst->src[i].pack) { + assert(!unpack || + unpack == qinst->src[i].pack); + unpack = QPU_SET_FIELD(qinst->src[i].pack, + QPU_UNPACK); + if (src[i].mux == QPU_MUX_R4) + unpack |= QPU_PM; + } break; case QFILE_UNIF: src[i] = qpu_unif(); @@ -259,19 +307,11 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) } switch (qinst->op) { - case QOP_MOV: - /* Skip emitting the MOV if it's a no-op. */ - if (dst.mux == QPU_MUX_A || dst.mux == QPU_MUX_B || - dst.mux != src[0].mux || dst.addr != src[0].addr) { - queue(c, qpu_a_MOV(dst, src[0])); - } - break; - case QOP_SEL_X_0_ZS: case QOP_SEL_X_0_ZC: case QOP_SEL_X_0_NS: case QOP_SEL_X_0_NC: - queue(c, qpu_a_MOV(dst, src[0])); + queue(c, qpu_a_MOV(dst, src[0]) | unpack); set_last_cond_add(c, qinst->op - QOP_SEL_X_0_ZS + QPU_COND_ZS); @@ -285,10 +325,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) case QOP_SEL_X_Y_NS: case QOP_SEL_X_Y_NC: queue(c, qpu_a_MOV(dst, src[0])); + if (qinst->src[0].pack) + *(last_inst(c)) |= unpack; set_last_cond_add(c, qinst->op - QOP_SEL_X_Y_ZS + QPU_COND_ZS); queue(c, qpu_a_MOV(dst, src[1])); + if (qinst->src[1].pack) + *(last_inst(c)) |= unpack; set_last_cond_add(c, ((qinst->op - QOP_SEL_X_Y_ZS) ^ 1) + QPU_COND_ZS); @@ -301,19 +345,19 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) switch (qinst->op) { case QOP_RCP: queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_RECIP), - src[0])); + src[0]) | unpack); break; case QOP_RSQ: queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_RECIPSQRT), - src[0])); + src[0]) | unpack); break; case QOP_EXP2: queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_EXP), - src[0])); + src[0]) | unpack); break; case QOP_LOG2: queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_LOG), - src[0])); + src[0]) | unpack); break; default: abort(); @@ -324,25 +368,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) break; - case QOP_PACK_8888_F: - queue(c, qpu_m_MOV(dst, src[0])); - *last_inst(c) |= QPU_PM; - *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8888, - QPU_PACK); - break; - - case QOP_PACK_8A_F: - case QOP_PACK_8B_F: - case QOP_PACK_8C_F: - case QOP_PACK_8D_F: - queue(c, - qpu_m_MOV(dst, src[0]) | - QPU_PM | - QPU_SET_FIELD(QPU_PACK_MUL_8A + - qinst->op - QOP_PACK_8A_F, - QPU_PACK)); - break; - case QOP_FRAG_X: queue(c, qpu_a_ITOF(dst, qpu_ra(QPU_R_XY_PIXEL_COORD))); @@ -367,16 +392,19 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) case QOP_TLB_DISCARD_SETUP: discard = true; - queue(c, qpu_a_MOV(src[0], src[0])); + queue(c, qpu_a_MOV(src[0], src[0]) | unpack); *last_inst(c) |= QPU_SF; break; case QOP_TLB_STENCIL_SETUP: - queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_STENCIL_SETUP), src[0])); + assert(!unpack); + queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_STENCIL_SETUP), + src[0]) | unpack); break; case QOP_TLB_Z_WRITE: - queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_Z), src[0])); + queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_Z), + src[0]) | unpack); if (discard) { set_last_cond_add(c, QPU_COND_ZS); } @@ -392,14 +420,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) break; case QOP_TLB_COLOR_WRITE: - queue(c, qpu_a_MOV(qpu_tlbc(), src[0])); + queue(c, qpu_a_MOV(qpu_tlbc(), src[0]) | unpack); if (discard) { set_last_cond_add(c, QPU_COND_ZS); } break; case QOP_VARY_ADD_C: - queue(c, qpu_a_FADD(dst, src[0], qpu_r5())); + queue(c, qpu_a_FADD(dst, src[0], qpu_r5()) | unpack); break; case QOP_TEX_S: @@ -408,12 +436,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) case QOP_TEX_B: queue(c, qpu_a_MOV(qpu_rb(QPU_W_TMU0_S + (qinst->op - QOP_TEX_S)), - src[0])); + src[0]) | unpack); break; case QOP_TEX_DIRECT: - fixup_raddr_conflict(c, dst, &src[0], &src[1]); - queue(c, qpu_a_ADD(qpu_rb(QPU_W_TMU0_S), src[0], src[1])); + fixup_raddr_conflict(c, dst, &src[0], &src[1], + qinst, &unpack); + queue(c, qpu_a_ADD(qpu_rb(QPU_W_TMU0_S), + src[0], src[1]) | unpack); break; case QOP_TEX_RESULT: @@ -424,67 +454,16 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) queue(c, qpu_a_MOV(dst, qpu_r4())); break; - case QOP_UNPACK_8A_F: - case QOP_UNPACK_8B_F: - case QOP_UNPACK_8C_F: - case QOP_UNPACK_8D_F: - case QOP_UNPACK_16A_F: - case QOP_UNPACK_16B_F: { - if (src[0].mux == QPU_MUX_R4) { - queue(c, qpu_a_MOV(dst, src[0])); - *last_inst(c) |= QPU_PM; - *last_inst(c) |= QPU_SET_FIELD(QPU_UNPACK_8A + - (qinst->op - - QOP_UNPACK_8A_F), - QPU_UNPACK); - } else { - assert(src[0].mux == QPU_MUX_A); - - /* Since we're setting the pack bits, if the - * destination is in A it would get re-packed. - */ - queue(c, qpu_a_FMAX((dst.mux == QPU_MUX_A ? - qpu_rb(31) : dst), - src[0], src[0])); - *last_inst(c) |= - QPU_SET_FIELD(unpack_map[qinst->op - - QOP_UNPACK_8A_F], - QPU_UNPACK); - - if (dst.mux == QPU_MUX_A) { - queue(c, qpu_a_MOV(dst, qpu_rb(31))); - } - } - } - break; - - case QOP_UNPACK_8A_I: - case QOP_UNPACK_8B_I: - case QOP_UNPACK_8C_I: - case QOP_UNPACK_8D_I: - case QOP_UNPACK_16A_I: - case QOP_UNPACK_16B_I: { - assert(src[0].mux == QPU_MUX_A); - - /* Since we're setting the pack bits, if the - * destination is in A it would get re-packed. - */ - queue(c, qpu_a_MOV((dst.mux == QPU_MUX_A ? - qpu_rb(31) : dst), src[0])); - *last_inst(c) |= QPU_SET_FIELD(unpack_map[qinst->op - - QOP_UNPACK_8A_I], - QPU_UNPACK); - - if (dst.mux == QPU_MUX_A) { - queue(c, qpu_a_MOV(dst, qpu_rb(31))); - } - } - break; - default: assert(qinst->op < ARRAY_SIZE(translate)); assert(translate[qinst->op].op != 0); /* NOPs */ + /* Skip emitting the MOV if it's a no-op. */ + if (qir_is_raw_mov(qinst) && + dst.mux == src[0].mux && dst.addr == src[0].addr) { + break; + } + /* If we have only one source, put it in the second * argument slot as well so that we don't take up * another raddr just to get unused data. @@ -492,27 +471,19 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) if (qir_get_op_nsrc(qinst->op) == 1) src[1] = src[0]; - fixup_raddr_conflict(c, dst, &src[0], &src[1]); + fixup_raddr_conflict(c, dst, &src[0], &src[1], + qinst, &unpack); if (qir_is_mul(qinst)) { queue(c, qpu_m_alu2(translate[qinst->op].op, dst, - src[0], src[1])); - if (qinst->dst.pack) { - *last_inst(c) |= QPU_PM; - *last_inst(c) |= QPU_SET_FIELD(qinst->dst.pack, - QPU_PACK); - } + src[0], src[1]) | unpack); } else { queue(c, qpu_a_alu2(translate[qinst->op].op, dst, - src[0], src[1])); - if (qinst->dst.pack) { - assert(dst.mux == QPU_MUX_A); - *last_inst(c) |= QPU_SET_FIELD(qinst->dst.pack, - QPU_PACK); - } + src[0], src[1]) | unpack); } + set_last_dst_pack(c, qinst); break; } diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c index 3ced50f3a44..bca36c3e7f4 100644 --- a/src/gallium/drivers/vc4/vc4_register_allocate.c +++ b/src/gallium/drivers/vc4/vc4_register_allocate.c @@ -282,23 +282,23 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) class_bits[inst->dst.index] &= CLASS_BIT_A; } - if (qir_src_needs_a_file(inst)) { - switch (inst->op) { - case QOP_UNPACK_8A_F: - case QOP_UNPACK_8B_F: - case QOP_UNPACK_8C_F: - case QOP_UNPACK_8D_F: - /* Special case: these can be done as R4 - * unpacks, as well. - */ - class_bits[inst->src[0].index] &= (CLASS_BIT_A | - CLASS_BIT_R4); - break; - default: - class_bits[inst->src[0].index] &= CLASS_BIT_A; - break; + /* Apply restrictions for src unpacks. The integer unpacks + * can only be done from regfile A, while float unpacks can be + * either A or R4. + */ + for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + if (inst->src[i].file == QFILE_TEMP && + inst->src[i].pack) { + if (qir_is_float_input(inst)) { + class_bits[inst->src[i].index] &= + CLASS_BIT_A | CLASS_BIT_R4; + } else { + class_bits[inst->src[i].index] &= + CLASS_BIT_A; + } } } + ip++; } diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c index 5d5166fd818..122bda0bac6 100644 --- a/src/gallium/drivers/vc4/vc4_resource.c +++ b/src/gallium/drivers/vc4/vc4_resource.c @@ -667,11 +667,16 @@ vc4_get_shadow_index_buffer(struct pipe_context *pctx, shadow_offset, &shadow_rsc, &data); uint16_t *dst = data; - struct pipe_transfer *src_transfer; - uint32_t *src = pipe_buffer_map_range(pctx, &orig->base.b, - ib->offset, - count * 4, - PIPE_TRANSFER_READ, &src_transfer); + struct pipe_transfer *src_transfer = NULL; + uint32_t *src; + if (ib->user_buffer) { + src = ib->user_buffer; + } else { + src = pipe_buffer_map_range(pctx, &orig->base.b, + ib->offset, + count * 4, + PIPE_TRANSFER_READ, &src_transfer); + } for (int i = 0; i < count; i++) { uint32_t src_index = src[i]; @@ -679,7 +684,8 @@ vc4_get_shadow_index_buffer(struct pipe_context *pctx, dst[i] = src_index; } - pctx->transfer_unmap(pctx, src_transfer); + if (src_transfer) + pctx->transfer_unmap(pctx, src_transfer); return shadow_rsc; } diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c index 774ec095652..bb867611804 100644 --- a/src/gallium/drivers/vc4/vc4_screen.c +++ b/src/gallium/drivers/vc4/vc4_screen.c @@ -94,6 +94,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_SHADOW_MAP: case PIPE_CAP_BLEND_EQUATION_SEPARATE: case PIPE_CAP_TWO_SIDED_STENCIL: + case PIPE_CAP_USER_INDEX_BUFFERS: return 1; /* lying for GL 2.0 */ @@ -152,7 +153,6 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: case PIPE_CAP_VERTEX_COLOR_CLAMPED: case PIPE_CAP_USER_VERTEX_BUFFERS: - case PIPE_CAP_USER_INDEX_BUFFERS: case PIPE_CAP_QUERY_PIPELINE_STATISTICS: case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: @@ -183,6 +183,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: return 0; /* Stream output. */ diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c index 76980ca32af..10dabd09f5e 100644 --- a/src/gallium/drivers/vc4/vc4_simulator.c +++ b/src/gallium/drivers/vc4/vc4_simulator.c @@ -32,6 +32,11 @@ #include "vc4_simulator_validate.h" #include "simpenrose/simpenrose.h" +/* A marker placed just after each BO, then checked after rendering to make + * sure it's still there. + */ +#define BO_SENTINEL 0xfedcba98 + #define OVERFLOW_SIZE (32 * 1024 * 1024) static struct drm_gem_cma_object * @@ -49,10 +54,12 @@ vc4_wrap_bo_with_cma(struct drm_device *dev, struct vc4_bo *bo) obj->vaddr = screen->simulator_mem_base + dev->simulator_mem_next; obj->paddr = simpenrose_hw_addr(obj->vaddr); - dev->simulator_mem_next += size; + dev->simulator_mem_next += size + sizeof(uint32_t); dev->simulator_mem_next = align(dev->simulator_mem_next, 4096); assert(dev->simulator_mem_next <= screen->simulator_mem_size); + *(uint32_t *)(obj->vaddr + bo->size) = BO_SENTINEL; + return obj; } @@ -109,6 +116,7 @@ vc4_simulator_unpin_bos(struct vc4_exec_info *exec) struct drm_vc4_bo *drm_bo = to_vc4_bo(&obj->base); struct vc4_bo *bo = drm_bo->bo; + assert(*(uint32_t *)(obj->vaddr + bo->size) == BO_SENTINEL); memcpy(bo->map, obj->vaddr, bo->size); if (drm_bo->validated_shader) { @@ -197,6 +205,8 @@ vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args) list_for_each_entry_safe(struct drm_vc4_bo, bo, &exec.unref_list, unref_head) { list_del(&bo->unref_head); + assert(*(uint32_t *)(bo->base.vaddr + bo->bo->size) == + BO_SENTINEL); vc4_bo_unreference(&bo->bo); free(bo); } diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c index 8a759c2ca4c..78aa344ab1d 100644 --- a/src/gallium/drivers/vc4/vc4_state.c +++ b/src/gallium/drivers/vc4/vc4_state.c @@ -51,7 +51,9 @@ vc4_set_blend_color(struct pipe_context *pctx, const struct pipe_blend_color *blend_color) { struct vc4_context *vc4 = vc4_context(pctx); - vc4->blend_color = *blend_color; + vc4->blend_color.f = *blend_color; + for (int i = 0; i < 4; i++) + vc4->blend_color.ub[i] = float_to_ubyte(blend_color->color[i]); vc4->dirty |= VC4_DIRTY_BLEND_COLOR; } @@ -303,10 +305,10 @@ vc4_set_index_buffer(struct pipe_context *pctx, struct vc4_context *vc4 = vc4_context(pctx); if (ib) { - assert(!ib->user_buffer); pipe_resource_reference(&vc4->indexbuf.buffer, ib->buffer); vc4->indexbuf.index_size = ib->index_size; vc4->indexbuf.offset = ib->offset; + vc4->indexbuf.user_buffer = ib->user_buffer; } else { pipe_resource_reference(&vc4->indexbuf.buffer, NULL); } diff --git a/src/gallium/drivers/vc4/vc4_uniforms.c b/src/gallium/drivers/vc4/vc4_uniforms.c index 85d6998205e..f5ad481f186 100644 --- a/src/gallium/drivers/vc4/vc4_uniforms.c +++ b/src/gallium/drivers/vc4/vc4_uniforms.c @@ -262,11 +262,35 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader, case QUNIFORM_BLEND_CONST_COLOR_Z: case QUNIFORM_BLEND_CONST_COLOR_W: cl_aligned_f(&uniforms, - CLAMP(vc4->blend_color.color[uinfo->contents[i] - - QUNIFORM_BLEND_CONST_COLOR_X], + CLAMP(vc4->blend_color.f.color[uinfo->contents[i] - + QUNIFORM_BLEND_CONST_COLOR_X], 0, 1)); break; + case QUNIFORM_BLEND_CONST_COLOR_RGBA: { + const uint8_t *format_swiz = + vc4_get_format_swizzle(vc4->framebuffer.cbufs[0]->format); + uint32_t color = 0; + for (int i = 0; i < 4; i++) { + if (format_swiz[i] >= 4) + continue; + + color |= (vc4->blend_color.ub[format_swiz[i]] << + (i * 8)); + } + cl_aligned_u32(&uniforms, color); + break; + } + + case QUNIFORM_BLEND_CONST_COLOR_AAAA: { + uint8_t a = vc4->blend_color.ub[3]; + cl_aligned_u32(&uniforms, ((a) | + (a << 8) | + (a << 16) | + (a << 24))); + break; + } + case QUNIFORM_STENCIL: cl_aligned_u32(&uniforms, vc4->zsa->stencil_uniforms[uinfo->data[i]] | @@ -330,6 +354,8 @@ vc4_set_shader_uniform_dirty_flags(struct vc4_compiled_shader *shader) case QUNIFORM_BLEND_CONST_COLOR_Y: case QUNIFORM_BLEND_CONST_COLOR_Z: case QUNIFORM_BLEND_CONST_COLOR_W: + case QUNIFORM_BLEND_CONST_COLOR_RGBA: + case QUNIFORM_BLEND_CONST_COLOR_AAAA: dirty |= VC4_DIRTY_BLEND_COLOR; break; diff --git a/src/gallium/drivers/virgl/Automake.inc b/src/gallium/drivers/virgl/Automake.inc new file mode 100644 index 00000000000..b05d3e314c8 --- /dev/null +++ b/src/gallium/drivers/virgl/Automake.inc @@ -0,0 +1,11 @@ +if HAVE_GALLIUM_VIRGL + +TARGET_DRIVERS += virtio_gpu +TARGET_CPPFLAGS += -DGALLIUM_VIRGL +TARGET_LIB_DEPS += \ + $(top_builddir)/src/gallium/drivers/virgl/libvirgl.la \ + $(top_builddir)/src/gallium/winsys/virgl/drm/libvirgldrm.la \ + $(top_builddir)/src/gallium/winsys/virgl/vtest/libvirglvtest.la \ + $(LIBDRM_LIBS) + +endif diff --git a/src/gallium/drivers/virgl/Makefile.am b/src/gallium/drivers/virgl/Makefile.am new file mode 100644 index 00000000000..82d9756143f --- /dev/null +++ b/src/gallium/drivers/virgl/Makefile.am @@ -0,0 +1,32 @@ +# Copyright © 2014, 2015 Red Hat. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +include Makefile.sources +include $(top_srcdir)/src/gallium/Automake.inc + +AM_CPPFLAGS = \ + $(GALLIUM_DRIVER_CFLAGS) \ + $(LIBDRM_CFLAGS) + +noinst_LTLIBRARIES = libvirgl.la + +libvirgl_la_SOURCES = $(C_SOURCES) diff --git a/src/gallium/drivers/virgl/Makefile.sources b/src/gallium/drivers/virgl/Makefile.sources new file mode 100644 index 00000000000..c27d284e248 --- /dev/null +++ b/src/gallium/drivers/virgl/Makefile.sources @@ -0,0 +1,18 @@ +C_SOURCES := \ + virgl_buffer.c \ + virgl_context.c \ + virgl_context.h \ + virgl_encode.c \ + virgl_encode.h \ + virgl_hw.h \ + virgl_protocol.h \ + virgl_public.h \ + virgl_query.c \ + virgl_resource.c \ + virgl_resource.h \ + virgl_screen.c \ + virgl_screen.h \ + virgl_streamout.c \ + virgl_texture.c \ + virgl_tgsi.c \ + virgl_winsys.h diff --git a/src/gallium/drivers/virgl/virgl_buffer.c b/src/gallium/drivers/virgl/virgl_buffer.c new file mode 100644 index 00000000000..ce19fb949d0 --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_buffer.c @@ -0,0 +1,172 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "virgl_context.h" +#include "virgl_resource.h" +#include "virgl_screen.h" + +static void virgl_buffer_destroy(struct pipe_screen *screen, + struct pipe_resource *buf) +{ + struct virgl_screen *vs = virgl_screen(screen); + struct virgl_buffer *vbuf = virgl_buffer(buf); + + util_range_destroy(&vbuf->valid_buffer_range); + vs->vws->resource_unref(vs->vws, vbuf->base.hw_res); + FREE(vbuf); +} + +static void *virgl_buffer_transfer_map(struct pipe_context *ctx, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **transfer) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_screen *vs = virgl_screen(ctx->screen); + struct virgl_buffer *vbuf = virgl_buffer(resource); + struct virgl_transfer *trans; + void *ptr; + bool readback; + uint32_t offset; + bool doflushwait = false; + + if ((usage & PIPE_TRANSFER_READ) && (vbuf->on_list == TRUE)) + doflushwait = true; + else + doflushwait = virgl_res_needs_flush_wait(vctx, &vbuf->base, usage); + + if (doflushwait) + ctx->flush(ctx, NULL, 0); + + trans = util_slab_alloc(&vctx->texture_transfer_pool); + if (trans == NULL) + return NULL; + + trans->base.resource = resource; + trans->base.level = level; + trans->base.usage = usage; + trans->base.box = *box; + trans->base.stride = 0; + trans->base.layer_stride = 0; + + offset = box->x; + + readback = virgl_res_needs_readback(vctx, &vbuf->base, usage); + if (readback) + vs->vws->transfer_get(vs->vws, vbuf->base.hw_res, box, trans->base.stride, trans->base.layer_stride, offset, level); + + if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) + doflushwait = true; + + if (doflushwait || readback) + vs->vws->resource_wait(vs->vws, vbuf->base.hw_res); + + ptr = vs->vws->resource_map(vs->vws, vbuf->base.hw_res); + if (!ptr) { + return NULL; + } + + trans->offset = offset; + *transfer = &trans->base; + + return ptr + trans->offset; +} + +static void virgl_buffer_transfer_unmap(struct pipe_context *ctx, + struct pipe_transfer *transfer) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_transfer *trans = virgl_transfer(transfer); + struct virgl_buffer *vbuf = virgl_buffer(transfer->resource); + + if (trans->base.usage & PIPE_TRANSFER_WRITE) { + if (!(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) { + struct virgl_screen *vs = virgl_screen(ctx->screen); + vbuf->base.clean = FALSE; + vctx->num_transfers++; + vs->vws->transfer_put(vs->vws, vbuf->base.hw_res, + &transfer->box, trans->base.stride, trans->base.layer_stride, trans->offset, transfer->level); + + } + } + + util_slab_free(&vctx->texture_transfer_pool, trans); +} + +static void virgl_buffer_transfer_flush_region(struct pipe_context *ctx, + struct pipe_transfer *transfer, + const struct pipe_box *box) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_buffer *vbuf = virgl_buffer(transfer->resource); + + if (!vbuf->on_list) { + struct pipe_resource *res = NULL; + + list_addtail(&vbuf->flush_list, &vctx->to_flush_bufs); + vbuf->on_list = TRUE; + pipe_resource_reference(&res, &vbuf->base.u.b); + } + + util_range_add(&vbuf->valid_buffer_range, transfer->box.x + box->x, + transfer->box.x + box->x + box->width); + + vbuf->base.clean = FALSE; +} + +static const struct u_resource_vtbl virgl_buffer_vtbl = +{ + u_default_resource_get_handle, /* get_handle */ + virgl_buffer_destroy, /* resource_destroy */ + virgl_buffer_transfer_map, /* transfer_map */ + virgl_buffer_transfer_flush_region, /* transfer_flush_region */ + virgl_buffer_transfer_unmap, /* transfer_unmap */ + virgl_transfer_inline_write /* transfer_inline_write */ +}; + +struct pipe_resource *virgl_buffer_create(struct virgl_screen *vs, + const struct pipe_resource *template) +{ + struct virgl_buffer *buf; + uint32_t size; + uint32_t vbind; + buf = CALLOC_STRUCT(virgl_buffer); + buf->base.clean = TRUE; + buf->base.u.b = *template; + buf->base.u.b.screen = &vs->base; + buf->base.u.vtbl = &virgl_buffer_vtbl; + pipe_reference_init(&buf->base.u.b.reference, 1); + util_range_init(&buf->valid_buffer_range); + + vbind = pipe_to_virgl_bind(template->bind); + size = template->width0; + + buf->base.hw_res = vs->vws->resource_create(vs->vws, template->target, template->format, vbind, template->width0, 1, 1, 1, 0, 0, size); + + util_range_set_empty(&buf->valid_buffer_range); + return &buf->base.u.b; +} diff --git a/src/gallium/drivers/virgl/virgl_context.c b/src/gallium/drivers/virgl/virgl_context.c new file mode 100644 index 00000000000..e4f02ba1096 --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_context.c @@ -0,0 +1,963 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "pipe/p_shader_tokens.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_screen.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_format.h" +#include "util/u_transfer.h" +#include "util/u_helpers.h" +#include "util/u_slab.h" +#include "util/u_upload_mgr.h" +#include "util/u_blitter.h" +#include "tgsi/tgsi_text.h" +#include "indices/u_primconvert.h" + +#include "pipebuffer/pb_buffer.h" +#include "state_tracker/graw.h" + +#include "virgl_encode.h" +#include "virgl_context.h" +#include "virgl_protocol.h" +#include "virgl_resource.h" +#include "virgl_screen.h" + +static uint32_t next_handle; +uint32_t virgl_object_assign_handle(void) +{ + return ++next_handle; +} + +static void virgl_buffer_flush(struct virgl_context *vctx, + struct virgl_buffer *vbuf) +{ + struct virgl_screen *rs = virgl_screen(vctx->base.screen); + struct pipe_box box; + + assert(vbuf->on_list); + + box.height = 1; + box.depth = 1; + box.y = 0; + box.z = 0; + + box.x = vbuf->valid_buffer_range.start; + box.width = MIN2(vbuf->valid_buffer_range.end - vbuf->valid_buffer_range.start, vbuf->base.u.b.width0); + + vctx->num_transfers++; + rs->vws->transfer_put(rs->vws, vbuf->base.hw_res, + &box, 0, 0, box.x, 0); + + util_range_set_empty(&vbuf->valid_buffer_range); +} + +static void virgl_attach_res_framebuffer(struct virgl_context *vctx) +{ + struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws; + struct pipe_surface *surf; + struct virgl_resource *res; + unsigned i; + + surf = vctx->framebuffer.zsbuf; + if (surf) { + res = virgl_resource(surf->texture); + if (res) + vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE); + } + for (i = 0; i < vctx->framebuffer.nr_cbufs; i++) { + surf = vctx->framebuffer.cbufs[i]; + if (surf) { + res = virgl_resource(surf->texture); + if (res) + vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE); + } + } +} + +static void virgl_attach_res_sampler_views(struct virgl_context *vctx, + unsigned shader_type) +{ + struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws; + struct virgl_textures_info *tinfo = &vctx->samplers[shader_type]; + struct virgl_resource *res; + uint32_t remaining_mask = tinfo->enabled_mask; + unsigned i; + while (remaining_mask) { + i = u_bit_scan(&remaining_mask); + assert(tinfo->views[i]); + + res = virgl_resource(tinfo->views[i]->base.texture); + if (res) + vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE); + } +} + +static void virgl_attach_res_vertex_buffers(struct virgl_context *vctx) +{ + struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws; + struct virgl_resource *res; + unsigned i; + + for (i = 0; i < vctx->num_vertex_buffers; i++) { + res = virgl_resource(vctx->vertex_buffer[i].buffer); + if (res) + vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE); + } +} + +static void virgl_attach_res_index_buffer(struct virgl_context *vctx) +{ + struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws; + struct virgl_resource *res; + + res = virgl_resource(vctx->index_buffer.buffer); + if (res) + vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE); +} + +static void virgl_attach_res_so_targets(struct virgl_context *vctx) +{ + struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws; + struct virgl_resource *res; + unsigned i; + + for (i = 0; i < vctx->num_so_targets; i++) { + res = virgl_resource(vctx->so_targets[i].base.buffer); + if (res) + vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE); + } +} + +static void virgl_attach_res_uniform_buffers(struct virgl_context *vctx, + unsigned shader_type) +{ + struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws; + struct virgl_resource *res; + unsigned i; + for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { + res = virgl_resource(vctx->ubos[shader_type][i]); + if (res) { + vws->emit_res(vws, vctx->cbuf, res->hw_res, FALSE); + } + } +} + +/* + * after flushing, the hw context still has a bunch of + * resources bound, so we need to rebind those here. + */ +static void virgl_reemit_res(struct virgl_context *vctx) +{ + unsigned shader_type; + + /* reattach any flushed resources */ + /* framebuffer, sampler views, vertex/index/uniform/stream buffers */ + virgl_attach_res_framebuffer(vctx); + + for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++) { + virgl_attach_res_sampler_views(vctx, shader_type); + virgl_attach_res_uniform_buffers(vctx, shader_type); + } + virgl_attach_res_index_buffer(vctx); + virgl_attach_res_vertex_buffers(vctx); + virgl_attach_res_so_targets(vctx); +} + +static struct pipe_surface *virgl_create_surface(struct pipe_context *ctx, + struct pipe_resource *resource, + const struct pipe_surface *templ) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_surface *surf; + struct virgl_resource *res = virgl_resource(resource); + uint32_t handle; + + surf = CALLOC_STRUCT(virgl_surface); + if (surf == NULL) + return NULL; + + res->clean = FALSE; + handle = virgl_object_assign_handle(); + pipe_reference_init(&surf->base.reference, 1); + pipe_resource_reference(&surf->base.texture, resource); + surf->base.context = ctx; + surf->base.format = templ->format; + if (resource->target != PIPE_BUFFER) { + surf->base.width = u_minify(resource->width0, templ->u.tex.level); + surf->base.height = u_minify(resource->height0, templ->u.tex.level); + surf->base.u.tex.level = templ->u.tex.level; + surf->base.u.tex.first_layer = templ->u.tex.first_layer; + surf->base.u.tex.last_layer = templ->u.tex.last_layer; + } else { + surf->base.width = templ->u.buf.last_element - templ->u.buf.first_element + 1; + surf->base.height = resource->height0; + surf->base.u.buf.first_element = templ->u.buf.first_element; + surf->base.u.buf.last_element = templ->u.buf.last_element; + } + virgl_encoder_create_surface(vctx, handle, res, &surf->base); + surf->handle = handle; + return &surf->base; +} + +static void virgl_surface_destroy(struct pipe_context *ctx, + struct pipe_surface *psurf) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_surface *surf = virgl_surface(psurf); + + pipe_resource_reference(&surf->base.texture, NULL); + virgl_encode_delete_object(vctx, surf->handle, VIRGL_OBJECT_SURFACE); + FREE(surf); +} + +static void *virgl_create_blend_state(struct pipe_context *ctx, + const struct pipe_blend_state *blend_state) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handle; + handle = virgl_object_assign_handle(); + + virgl_encode_blend_state(vctx, handle, blend_state); + return (void *)(unsigned long)handle; + +} + +static void virgl_bind_blend_state(struct pipe_context *ctx, + void *blend_state) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handle = (unsigned long)blend_state; + virgl_encode_bind_object(vctx, handle, VIRGL_OBJECT_BLEND); +} + +static void virgl_delete_blend_state(struct pipe_context *ctx, + void *blend_state) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handle = (unsigned long)blend_state; + virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_BLEND); +} + +static void *virgl_create_depth_stencil_alpha_state(struct pipe_context *ctx, + const struct pipe_depth_stencil_alpha_state *blend_state) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handle; + handle = virgl_object_assign_handle(); + + virgl_encode_dsa_state(vctx, handle, blend_state); + return (void *)(unsigned long)handle; +} + +static void virgl_bind_depth_stencil_alpha_state(struct pipe_context *ctx, + void *blend_state) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handle = (unsigned long)blend_state; + virgl_encode_bind_object(vctx, handle, VIRGL_OBJECT_DSA); +} + +static void virgl_delete_depth_stencil_alpha_state(struct pipe_context *ctx, + void *dsa_state) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handle = (unsigned long)dsa_state; + virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_DSA); +} + +static void *virgl_create_rasterizer_state(struct pipe_context *ctx, + const struct pipe_rasterizer_state *rs_state) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handle; + handle = virgl_object_assign_handle(); + + virgl_encode_rasterizer_state(vctx, handle, rs_state); + return (void *)(unsigned long)handle; +} + +static void virgl_bind_rasterizer_state(struct pipe_context *ctx, + void *rs_state) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handle = (unsigned long)rs_state; + + virgl_encode_bind_object(vctx, handle, VIRGL_OBJECT_RASTERIZER); +} + +static void virgl_delete_rasterizer_state(struct pipe_context *ctx, + void *rs_state) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handle = (unsigned long)rs_state; + virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_RASTERIZER); +} + +static void virgl_set_framebuffer_state(struct pipe_context *ctx, + const struct pipe_framebuffer_state *state) +{ + struct virgl_context *vctx = virgl_context(ctx); + + vctx->framebuffer = *state; + virgl_encoder_set_framebuffer_state(vctx, state); + virgl_attach_res_framebuffer(vctx); +} + +static void virgl_set_viewport_states(struct pipe_context *ctx, + unsigned start_slot, + unsigned num_viewports, + const struct pipe_viewport_state *state) +{ + struct virgl_context *vctx = virgl_context(ctx); + virgl_encoder_set_viewport_states(vctx, start_slot, num_viewports, state); +} + +static void *virgl_create_vertex_elements_state(struct pipe_context *ctx, + unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handle = virgl_object_assign_handle(); + virgl_encoder_create_vertex_elements(vctx, handle, + num_elements, elements); + return (void*)(unsigned long)handle; + +} + +static void virgl_delete_vertex_elements_state(struct pipe_context *ctx, + void *ve) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handle = (unsigned long)ve; + + virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_VERTEX_ELEMENTS); +} + +static void virgl_bind_vertex_elements_state(struct pipe_context *ctx, + void *ve) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handle = (unsigned long)ve; + virgl_encode_bind_object(vctx, handle, VIRGL_OBJECT_VERTEX_ELEMENTS); +} + +static void virgl_set_vertex_buffers(struct pipe_context *ctx, + unsigned start_slot, + unsigned num_buffers, + const struct pipe_vertex_buffer *buffers) +{ + struct virgl_context *vctx = virgl_context(ctx); + + util_set_vertex_buffers_count(vctx->vertex_buffer, + &vctx->num_vertex_buffers, + buffers, start_slot, num_buffers); + + vctx->vertex_array_dirty = TRUE; +} + +static void virgl_hw_set_vertex_buffers(struct pipe_context *ctx) +{ + struct virgl_context *vctx = virgl_context(ctx); + + if (vctx->vertex_array_dirty) { + virgl_encoder_set_vertex_buffers(vctx, vctx->num_vertex_buffers, vctx->vertex_buffer); + virgl_attach_res_vertex_buffers(vctx); + } +} + +static void virgl_set_stencil_ref(struct pipe_context *ctx, + const struct pipe_stencil_ref *ref) +{ + struct virgl_context *vctx = virgl_context(ctx); + virgl_encoder_set_stencil_ref(vctx, ref); +} + +static void virgl_set_blend_color(struct pipe_context *ctx, + const struct pipe_blend_color *color) +{ + struct virgl_context *vctx = virgl_context(ctx); + virgl_encoder_set_blend_color(vctx, color); +} + +static void virgl_set_index_buffer(struct pipe_context *ctx, + const struct pipe_index_buffer *ib) +{ + struct virgl_context *vctx = virgl_context(ctx); + + if (ib) { + pipe_resource_reference(&vctx->index_buffer.buffer, ib->buffer); + memcpy(&vctx->index_buffer, ib, sizeof(*ib)); + } else { + pipe_resource_reference(&vctx->index_buffer.buffer, NULL); + } +} + +static void virgl_hw_set_index_buffer(struct pipe_context *ctx, + struct pipe_index_buffer *ib) +{ + struct virgl_context *vctx = virgl_context(ctx); + virgl_encoder_set_index_buffer(vctx, ib); + virgl_attach_res_index_buffer(vctx); +} + +static void virgl_set_constant_buffer(struct pipe_context *ctx, + uint shader, uint index, + struct pipe_constant_buffer *buf) +{ + struct virgl_context *vctx = virgl_context(ctx); + + if (buf) { + if (!buf->user_buffer){ + struct virgl_resource *res = virgl_resource(buf->buffer); + virgl_encoder_set_uniform_buffer(vctx, shader, index, buf->buffer_offset, + buf->buffer_size, res); + pipe_resource_reference(&vctx->ubos[shader][index], buf->buffer); + return; + } + pipe_resource_reference(&vctx->ubos[shader][index], NULL); + virgl_encoder_write_constant_buffer(vctx, shader, index, buf->buffer_size / 4, buf->user_buffer); + } else { + virgl_encoder_write_constant_buffer(vctx, shader, index, 0, NULL); + pipe_resource_reference(&vctx->ubos[shader][index], NULL); + } +} + +void virgl_transfer_inline_write(struct pipe_context *ctx, + struct pipe_resource *res, + unsigned level, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned layer_stride) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_screen *vs = virgl_screen(ctx->screen); + struct virgl_resource *grres = virgl_resource(res); + struct virgl_buffer *vbuf = virgl_buffer(res); + + grres->clean = FALSE; + + if (virgl_res_needs_flush_wait(vctx, &vbuf->base, usage)) { + ctx->flush(ctx, NULL, 0); + + vs->vws->resource_wait(vs->vws, vbuf->base.hw_res); + } + + virgl_encoder_inline_write(vctx, grres, level, usage, + box, data, stride, layer_stride); +} + +static void *virgl_shader_encoder(struct pipe_context *ctx, + const struct pipe_shader_state *shader, + unsigned type) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handle; + struct tgsi_token *new_tokens; + int ret; + + new_tokens = virgl_tgsi_transform(shader->tokens); + if (!new_tokens) + return NULL; + + handle = virgl_object_assign_handle(); + /* encode VS state */ + ret = virgl_encode_shader_state(vctx, handle, type, + &shader->stream_output, + new_tokens); + if (ret) { + return NULL; + } + + FREE(new_tokens); + return (void *)(unsigned long)handle; + +} +static void *virgl_create_vs_state(struct pipe_context *ctx, + const struct pipe_shader_state *shader) +{ + return virgl_shader_encoder(ctx, shader, PIPE_SHADER_VERTEX); +} + +static void *virgl_create_gs_state(struct pipe_context *ctx, + const struct pipe_shader_state *shader) +{ + return virgl_shader_encoder(ctx, shader, PIPE_SHADER_GEOMETRY); +} + +static void *virgl_create_fs_state(struct pipe_context *ctx, + const struct pipe_shader_state *shader) +{ + return virgl_shader_encoder(ctx, shader, PIPE_SHADER_FRAGMENT); +} + +static void +virgl_delete_fs_state(struct pipe_context *ctx, + void *fs) +{ + uint32_t handle = (unsigned long)fs; + struct virgl_context *vctx = virgl_context(ctx); + + virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_SHADER); +} + +static void +virgl_delete_gs_state(struct pipe_context *ctx, + void *gs) +{ + uint32_t handle = (unsigned long)gs; + struct virgl_context *vctx = virgl_context(ctx); + + virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_SHADER); +} + +static void +virgl_delete_vs_state(struct pipe_context *ctx, + void *vs) +{ + uint32_t handle = (unsigned long)vs; + struct virgl_context *vctx = virgl_context(ctx); + + virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_SHADER); +} + +static void virgl_bind_vs_state(struct pipe_context *ctx, + void *vss) +{ + uint32_t handle = (unsigned long)vss; + struct virgl_context *vctx = virgl_context(ctx); + + virgl_encode_bind_shader(vctx, handle, PIPE_SHADER_VERTEX); +} + +static void virgl_bind_gs_state(struct pipe_context *ctx, + void *vss) +{ + uint32_t handle = (unsigned long)vss; + struct virgl_context *vctx = virgl_context(ctx); + + virgl_encode_bind_shader(vctx, handle, PIPE_SHADER_GEOMETRY); +} + + +static void virgl_bind_fs_state(struct pipe_context *ctx, + void *vss) +{ + uint32_t handle = (unsigned long)vss; + struct virgl_context *vctx = virgl_context(ctx); + + virgl_encode_bind_shader(vctx, handle, PIPE_SHADER_FRAGMENT); +} + +static void virgl_clear(struct pipe_context *ctx, + unsigned buffers, + const union pipe_color_union *color, + double depth, unsigned stencil) +{ + struct virgl_context *vctx = virgl_context(ctx); + + virgl_encode_clear(vctx, buffers, color, depth, stencil); +} + +static void virgl_draw_vbo(struct pipe_context *ctx, + const struct pipe_draw_info *dinfo) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_screen *rs = virgl_screen(ctx->screen); + struct pipe_index_buffer ib = {}; + struct pipe_draw_info info = *dinfo; + + if (!(rs->caps.caps.v1.prim_mask & (1 << dinfo->mode))) { + util_primconvert_save_index_buffer(vctx->primconvert, &vctx->index_buffer); + util_primconvert_draw_vbo(vctx->primconvert, dinfo); + return; + } + if (info.indexed) { + pipe_resource_reference(&ib.buffer, vctx->index_buffer.buffer); + ib.user_buffer = vctx->index_buffer.user_buffer; + ib.index_size = vctx->index_buffer.index_size; + ib.offset = vctx->index_buffer.offset + info.start * ib.index_size; + + if (ib.user_buffer) { + u_upload_data(vctx->uploader, 0, info.count * ib.index_size, + ib.user_buffer, &ib.offset, &ib.buffer); + ib.user_buffer = NULL; + } + } + + u_upload_unmap(vctx->uploader); + + vctx->num_draws++; + virgl_hw_set_vertex_buffers(ctx); + if (info.indexed) + virgl_hw_set_index_buffer(ctx, &ib); + + virgl_encoder_draw_vbo(vctx, &info); + + pipe_resource_reference(&ib.buffer, NULL); + +} + +static void virgl_flush_eq(struct virgl_context *ctx, void *closure) +{ + struct virgl_screen *rs = virgl_screen(ctx->base.screen); + + /* send the buffer to the remote side for decoding */ + ctx->num_transfers = ctx->num_draws = 0; + rs->vws->submit_cmd(rs->vws, ctx->cbuf); + + virgl_encoder_set_sub_ctx(ctx, ctx->hw_sub_ctx_id); + + /* add back current framebuffer resources to reference list? */ + virgl_reemit_res(ctx); +} + +static void virgl_flush_from_st(struct pipe_context *ctx, + struct pipe_fence_handle **fence, + enum pipe_flush_flags flags) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_screen *rs = virgl_screen(ctx->screen); + struct virgl_buffer *buf, *tmp; + + if (fence) + *fence = rs->vws->cs_create_fence(rs->vws); + + LIST_FOR_EACH_ENTRY_SAFE(buf, tmp, &vctx->to_flush_bufs, flush_list) { + struct pipe_resource *res = &buf->base.u.b; + virgl_buffer_flush(vctx, buf); + list_del(&buf->flush_list); + buf->on_list = FALSE; + pipe_resource_reference(&res, NULL); + + } + virgl_flush_eq(vctx, vctx); +} + +static struct pipe_sampler_view *virgl_create_sampler_view(struct pipe_context *ctx, + struct pipe_resource *texture, + const struct pipe_sampler_view *state) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_sampler_view *grview; + uint32_t handle; + struct virgl_resource *res; + + if (state == NULL) + return NULL; + + grview = CALLOC_STRUCT(virgl_sampler_view); + if (!grview) + return NULL; + + res = virgl_resource(texture); + handle = virgl_object_assign_handle(); + virgl_encode_sampler_view(vctx, handle, res, state); + + grview->base = *state; + grview->base.reference.count = 1; + + grview->base.texture = NULL; + grview->base.context = ctx; + pipe_resource_reference(&grview->base.texture, texture); + grview->handle = handle; + return &grview->base; +} + +static void virgl_set_sampler_views(struct pipe_context *ctx, + unsigned shader_type, + unsigned start_slot, + unsigned num_views, + struct pipe_sampler_view **views) +{ + struct virgl_context *vctx = virgl_context(ctx); + int i; + uint32_t disable_mask = ~((1ull << num_views) - 1); + struct virgl_textures_info *tinfo = &vctx->samplers[shader_type]; + uint32_t new_mask = 0; + uint32_t remaining_mask; + + remaining_mask = tinfo->enabled_mask & disable_mask; + + while (remaining_mask) { + i = u_bit_scan(&remaining_mask); + assert(tinfo->views[i]); + + pipe_sampler_view_reference((struct pipe_sampler_view **)&tinfo->views[i], NULL); + } + + for (i = 0; i < num_views; i++) { + struct virgl_sampler_view *grview = virgl_sampler_view(views[i]); + + if (views[i] == (struct pipe_sampler_view *)tinfo->views[i]) + continue; + + if (grview) { + new_mask |= 1 << i; + pipe_sampler_view_reference((struct pipe_sampler_view **)&tinfo->views[i], views[i]); + } else { + pipe_sampler_view_reference((struct pipe_sampler_view **)&tinfo->views[i], NULL); + disable_mask |= 1 << i; + } + } + + tinfo->enabled_mask &= ~disable_mask; + tinfo->enabled_mask |= new_mask; + virgl_encode_set_sampler_views(vctx, shader_type, start_slot, num_views, tinfo->views); + virgl_attach_res_sampler_views(vctx, shader_type); +} + +static void virgl_destroy_sampler_view(struct pipe_context *ctx, + struct pipe_sampler_view *view) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_sampler_view *grview = virgl_sampler_view(view); + + virgl_encode_delete_object(vctx, grview->handle, VIRGL_OBJECT_SAMPLER_VIEW); + pipe_resource_reference(&view->texture, NULL); + FREE(view); +} + +static void *virgl_create_sampler_state(struct pipe_context *ctx, + const struct pipe_sampler_state *state) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handle; + + handle = virgl_object_assign_handle(); + + virgl_encode_sampler_state(vctx, handle, state); + return (void *)(unsigned long)handle; +} + +static void virgl_delete_sampler_state(struct pipe_context *ctx, + void *ss) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handle = (unsigned long)ss; + + virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_SAMPLER_STATE); +} + +static void virgl_bind_sampler_states(struct pipe_context *ctx, + unsigned shader, unsigned start_slot, + unsigned num_samplers, + void **samplers) +{ + struct virgl_context *vctx = virgl_context(ctx); + uint32_t handles[32]; + int i; + for (i = 0; i < num_samplers; i++) { + handles[i] = (unsigned long)(samplers[i]); + } + virgl_encode_bind_sampler_states(vctx, shader, start_slot, num_samplers, handles); +} + +static void virgl_set_polygon_stipple(struct pipe_context *ctx, + const struct pipe_poly_stipple *ps) +{ + struct virgl_context *vctx = virgl_context(ctx); + virgl_encoder_set_polygon_stipple(vctx, ps); +} + +static void virgl_set_scissor_states(struct pipe_context *ctx, + unsigned start_slot, + unsigned num_scissor, + const struct pipe_scissor_state *ss) +{ + struct virgl_context *vctx = virgl_context(ctx); + virgl_encoder_set_scissor_state(vctx, start_slot, num_scissor, ss); +} + +static void virgl_set_sample_mask(struct pipe_context *ctx, + unsigned sample_mask) +{ + struct virgl_context *vctx = virgl_context(ctx); + virgl_encoder_set_sample_mask(vctx, sample_mask); +} + +static void virgl_set_clip_state(struct pipe_context *ctx, + const struct pipe_clip_state *clip) +{ + struct virgl_context *vctx = virgl_context(ctx); + virgl_encoder_set_clip_state(vctx, clip); +} + +static void virgl_resource_copy_region(struct pipe_context *ctx, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_resource *dres = virgl_resource(dst); + struct virgl_resource *sres = virgl_resource(src); + + dres->clean = FALSE; + virgl_encode_resource_copy_region(vctx, dres, + dst_level, dstx, dsty, dstz, + sres, src_level, + src_box); +} + +static void +virgl_flush_resource(struct pipe_context *pipe, + struct pipe_resource *resource) +{ +} + +static void virgl_blit(struct pipe_context *ctx, + const struct pipe_blit_info *blit) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_resource *dres = virgl_resource(blit->dst.resource); + struct virgl_resource *sres = virgl_resource(blit->src.resource); + + dres->clean = FALSE; + virgl_encode_blit(vctx, dres, sres, + blit); +} + +static void +virgl_context_destroy( struct pipe_context *ctx ) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_screen *rs = virgl_screen(ctx->screen); + + vctx->framebuffer.zsbuf = NULL; + vctx->framebuffer.nr_cbufs = 0; + virgl_encoder_destroy_sub_ctx(vctx, vctx->hw_sub_ctx_id); + virgl_flush_eq(vctx, vctx); + + rs->vws->cmd_buf_destroy(vctx->cbuf); + if (vctx->uploader) + u_upload_destroy(vctx->uploader); + util_primconvert_destroy(vctx->primconvert); + + util_slab_destroy(&vctx->texture_transfer_pool); + FREE(vctx); +} + +struct pipe_context *virgl_context_create(struct pipe_screen *pscreen, + void *priv, + unsigned flags) +{ + struct virgl_context *vctx; + struct virgl_screen *rs = virgl_screen(pscreen); + vctx = CALLOC_STRUCT(virgl_context); + + vctx->cbuf = rs->vws->cmd_buf_create(rs->vws); + if (!vctx->cbuf) { + FREE(vctx); + return NULL; + } + + vctx->base.destroy = virgl_context_destroy; + vctx->base.create_surface = virgl_create_surface; + vctx->base.surface_destroy = virgl_surface_destroy; + vctx->base.set_framebuffer_state = virgl_set_framebuffer_state; + vctx->base.create_blend_state = virgl_create_blend_state; + vctx->base.bind_blend_state = virgl_bind_blend_state; + vctx->base.delete_blend_state = virgl_delete_blend_state; + vctx->base.create_depth_stencil_alpha_state = virgl_create_depth_stencil_alpha_state; + vctx->base.bind_depth_stencil_alpha_state = virgl_bind_depth_stencil_alpha_state; + vctx->base.delete_depth_stencil_alpha_state = virgl_delete_depth_stencil_alpha_state; + vctx->base.create_rasterizer_state = virgl_create_rasterizer_state; + vctx->base.bind_rasterizer_state = virgl_bind_rasterizer_state; + vctx->base.delete_rasterizer_state = virgl_delete_rasterizer_state; + + vctx->base.set_viewport_states = virgl_set_viewport_states; + vctx->base.create_vertex_elements_state = virgl_create_vertex_elements_state; + vctx->base.bind_vertex_elements_state = virgl_bind_vertex_elements_state; + vctx->base.delete_vertex_elements_state = virgl_delete_vertex_elements_state; + vctx->base.set_vertex_buffers = virgl_set_vertex_buffers; + vctx->base.set_index_buffer = virgl_set_index_buffer; + vctx->base.set_constant_buffer = virgl_set_constant_buffer; + vctx->base.transfer_inline_write = virgl_transfer_inline_write; + + vctx->base.create_vs_state = virgl_create_vs_state; + vctx->base.create_gs_state = virgl_create_gs_state; + vctx->base.create_fs_state = virgl_create_fs_state; + + vctx->base.bind_vs_state = virgl_bind_vs_state; + vctx->base.bind_gs_state = virgl_bind_gs_state; + vctx->base.bind_fs_state = virgl_bind_fs_state; + + vctx->base.delete_vs_state = virgl_delete_vs_state; + vctx->base.delete_gs_state = virgl_delete_gs_state; + vctx->base.delete_fs_state = virgl_delete_fs_state; + + vctx->base.clear = virgl_clear; + vctx->base.draw_vbo = virgl_draw_vbo; + vctx->base.flush = virgl_flush_from_st; + vctx->base.screen = pscreen; + vctx->base.create_sampler_view = virgl_create_sampler_view; + vctx->base.sampler_view_destroy = virgl_destroy_sampler_view; + vctx->base.set_sampler_views = virgl_set_sampler_views; + + vctx->base.create_sampler_state = virgl_create_sampler_state; + vctx->base.delete_sampler_state = virgl_delete_sampler_state; + vctx->base.bind_sampler_states = virgl_bind_sampler_states; + + vctx->base.set_polygon_stipple = virgl_set_polygon_stipple; + vctx->base.set_scissor_states = virgl_set_scissor_states; + vctx->base.set_sample_mask = virgl_set_sample_mask; + vctx->base.set_stencil_ref = virgl_set_stencil_ref; + vctx->base.set_clip_state = virgl_set_clip_state; + + vctx->base.set_blend_color = virgl_set_blend_color; + + vctx->base.resource_copy_region = virgl_resource_copy_region; + vctx->base.flush_resource = virgl_flush_resource; + vctx->base.blit = virgl_blit; + + virgl_init_context_resource_functions(&vctx->base); + virgl_init_query_functions(vctx); + virgl_init_so_functions(vctx); + + list_inithead(&vctx->to_flush_bufs); + util_slab_create(&vctx->texture_transfer_pool, sizeof(struct virgl_transfer), + 16, UTIL_SLAB_SINGLETHREADED); + + vctx->primconvert = util_primconvert_create(&vctx->base, rs->caps.caps.v1.prim_mask); + vctx->uploader = u_upload_create(&vctx->base, 1024 * 1024, 256, + PIPE_BIND_INDEX_BUFFER); + if (!vctx->uploader) + goto fail; + + vctx->hw_sub_ctx_id = rs->sub_ctx_id++; + virgl_encoder_create_sub_ctx(vctx, vctx->hw_sub_ctx_id); + + virgl_encoder_set_sub_ctx(vctx, vctx->hw_sub_ctx_id); + return &vctx->base; +fail: + return NULL; +} diff --git a/src/gallium/drivers/virgl/virgl_context.h b/src/gallium/drivers/virgl/virgl_context.h new file mode 100644 index 00000000000..adb8adef33c --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_context.h @@ -0,0 +1,115 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef VIRGL_CONTEXT_H +#define VIRGL_CONTEXT_H + +#include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "util/u_slab.h" +#include "util/list.h" + +struct pipe_screen; +struct tgsi_token; +struct u_upload_mgr; +struct virgl_cmd_buf; + +struct virgl_sampler_view { + struct pipe_sampler_view base; + uint32_t handle; +}; + +struct virgl_so_target { + struct pipe_stream_output_target base; + uint32_t handle; +}; + +struct virgl_textures_info { + struct virgl_sampler_view *views[16]; + uint32_t enabled_mask; +}; + +struct virgl_context { + struct pipe_context base; + struct virgl_cmd_buf *cbuf; + + struct virgl_textures_info samplers[PIPE_SHADER_TYPES]; + + struct pipe_framebuffer_state framebuffer; + + struct util_slab_mempool texture_transfer_pool; + + struct pipe_index_buffer index_buffer; + struct u_upload_mgr *uploader; + + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + unsigned num_vertex_buffers; + boolean vertex_array_dirty; + + struct virgl_so_target so_targets[PIPE_MAX_SO_BUFFERS]; + unsigned num_so_targets; + + struct pipe_resource *ubos[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS]; + int num_transfers; + int num_draws; + struct list_head to_flush_bufs; + + struct primconvert_context *primconvert; + uint32_t hw_sub_ctx_id; +}; + +static inline struct virgl_sampler_view * +virgl_sampler_view(struct pipe_sampler_view *view) +{ + return (struct virgl_sampler_view *)view; +}; + +static inline struct virgl_so_target * +virgl_so_target(struct pipe_stream_output_target *target) +{ + return (struct virgl_so_target *)target; +} + +static inline struct virgl_context *virgl_context(struct pipe_context *ctx) +{ + return (struct virgl_context *)ctx; +} + +struct pipe_context *virgl_context_create(struct pipe_screen *pscreen, + void *priv, unsigned flags); + +void virgl_init_blit_functions(struct virgl_context *vctx); +void virgl_init_query_functions(struct virgl_context *vctx); +void virgl_init_so_functions(struct virgl_context *vctx); + +void virgl_transfer_inline_write(struct pipe_context *ctx, + struct pipe_resource *res, + unsigned level, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned layer_stride); + +struct tgsi_token *virgl_tgsi_transform(const struct tgsi_token *tokens_in); + +#endif diff --git a/src/gallium/drivers/virgl/virgl_encode.c b/src/gallium/drivers/virgl/virgl_encode.c new file mode 100644 index 00000000000..22fb5292819 --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_encode.c @@ -0,0 +1,867 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include <stdint.h> + +#include "util/u_memory.h" +#include "util/u_math.h" +#include "pipe/p_state.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" + +#include "virgl_context.h" +#include "virgl_encode.h" +#include "virgl_protocol.h" +#include "virgl_resource.h" +#include "virgl_screen.h" + +static int virgl_encoder_write_cmd_dword(struct virgl_context *ctx, + uint32_t dword) +{ + int len = (dword >> 16); + + if ((ctx->cbuf->cdw + len + 1) > VIRGL_MAX_CMDBUF_DWORDS) + ctx->base.flush(&ctx->base, NULL, 0); + + virgl_encoder_write_dword(ctx->cbuf, dword); + return 0; +} + +static void virgl_encoder_write_res(struct virgl_context *ctx, + struct virgl_resource *res) +{ + struct virgl_winsys *vws = virgl_screen(ctx->base.screen)->vws; + + if (res && res->hw_res) + vws->emit_res(vws, ctx->cbuf, res->hw_res, TRUE); + else { + virgl_encoder_write_dword(ctx->cbuf, 0); + } +} + +int virgl_encode_bind_object(struct virgl_context *ctx, + uint32_t handle, uint32_t object) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_BIND_OBJECT, object, 1)); + virgl_encoder_write_dword(ctx->cbuf, handle); + return 0; +} + +int virgl_encode_delete_object(struct virgl_context *ctx, + uint32_t handle, uint32_t object) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_DESTROY_OBJECT, object, 1)); + virgl_encoder_write_dword(ctx->cbuf, handle); + return 0; +} + +int virgl_encode_blend_state(struct virgl_context *ctx, + uint32_t handle, + const struct pipe_blend_state *blend_state) +{ + uint32_t tmp; + int i; + + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_BLEND, VIRGL_OBJ_BLEND_SIZE)); + virgl_encoder_write_dword(ctx->cbuf, handle); + + tmp = + VIRGL_OBJ_BLEND_S0_INDEPENDENT_BLEND_ENABLE(blend_state->independent_blend_enable) | + VIRGL_OBJ_BLEND_S0_LOGICOP_ENABLE(blend_state->logicop_enable) | + VIRGL_OBJ_BLEND_S0_DITHER(blend_state->dither) | + VIRGL_OBJ_BLEND_S0_ALPHA_TO_COVERAGE(blend_state->alpha_to_coverage) | + VIRGL_OBJ_BLEND_S0_ALPHA_TO_ONE(blend_state->alpha_to_one); + + virgl_encoder_write_dword(ctx->cbuf, tmp); + + tmp = VIRGL_OBJ_BLEND_S1_LOGICOP_FUNC(blend_state->logicop_func); + virgl_encoder_write_dword(ctx->cbuf, tmp); + + for (i = 0; i < VIRGL_MAX_COLOR_BUFS; i++) { + tmp = + VIRGL_OBJ_BLEND_S2_RT_BLEND_ENABLE(blend_state->rt[i].blend_enable) | + VIRGL_OBJ_BLEND_S2_RT_RGB_FUNC(blend_state->rt[i].rgb_func) | + VIRGL_OBJ_BLEND_S2_RT_RGB_SRC_FACTOR(blend_state->rt[i].rgb_src_factor) | + VIRGL_OBJ_BLEND_S2_RT_RGB_DST_FACTOR(blend_state->rt[i].rgb_dst_factor)| + VIRGL_OBJ_BLEND_S2_RT_ALPHA_FUNC(blend_state->rt[i].alpha_func) | + VIRGL_OBJ_BLEND_S2_RT_ALPHA_SRC_FACTOR(blend_state->rt[i].alpha_src_factor) | + VIRGL_OBJ_BLEND_S2_RT_ALPHA_DST_FACTOR(blend_state->rt[i].alpha_dst_factor) | + VIRGL_OBJ_BLEND_S2_RT_COLORMASK(blend_state->rt[i].colormask); + virgl_encoder_write_dword(ctx->cbuf, tmp); + } + return 0; +} + +int virgl_encode_dsa_state(struct virgl_context *ctx, + uint32_t handle, + const struct pipe_depth_stencil_alpha_state *dsa_state) +{ + uint32_t tmp; + int i; + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_DSA, VIRGL_OBJ_DSA_SIZE)); + virgl_encoder_write_dword(ctx->cbuf, handle); + + tmp = VIRGL_OBJ_DSA_S0_DEPTH_ENABLE(dsa_state->depth.enabled) | + VIRGL_OBJ_DSA_S0_DEPTH_WRITEMASK(dsa_state->depth.writemask) | + VIRGL_OBJ_DSA_S0_DEPTH_FUNC(dsa_state->depth.func) | + VIRGL_OBJ_DSA_S0_ALPHA_ENABLED(dsa_state->alpha.enabled) | + VIRGL_OBJ_DSA_S0_ALPHA_FUNC(dsa_state->alpha.func); + virgl_encoder_write_dword(ctx->cbuf, tmp); + + for (i = 0; i < 2; i++) { + tmp = VIRGL_OBJ_DSA_S1_STENCIL_ENABLED(dsa_state->stencil[i].enabled) | + VIRGL_OBJ_DSA_S1_STENCIL_FUNC(dsa_state->stencil[i].func) | + VIRGL_OBJ_DSA_S1_STENCIL_FAIL_OP(dsa_state->stencil[i].fail_op) | + VIRGL_OBJ_DSA_S1_STENCIL_ZPASS_OP(dsa_state->stencil[i].zpass_op) | + VIRGL_OBJ_DSA_S1_STENCIL_ZFAIL_OP(dsa_state->stencil[i].zfail_op) | + VIRGL_OBJ_DSA_S1_STENCIL_VALUEMASK(dsa_state->stencil[i].valuemask) | + VIRGL_OBJ_DSA_S1_STENCIL_WRITEMASK(dsa_state->stencil[i].writemask); + virgl_encoder_write_dword(ctx->cbuf, tmp); + } + + virgl_encoder_write_dword(ctx->cbuf, fui(dsa_state->alpha.ref_value)); + return 0; +} +int virgl_encode_rasterizer_state(struct virgl_context *ctx, + uint32_t handle, + const struct pipe_rasterizer_state *state) +{ + uint32_t tmp; + + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_RASTERIZER, VIRGL_OBJ_RS_SIZE)); + virgl_encoder_write_dword(ctx->cbuf, handle); + + tmp = VIRGL_OBJ_RS_S0_FLATSHADE(state->flatshade) | + VIRGL_OBJ_RS_S0_DEPTH_CLIP(state->depth_clip) | + VIRGL_OBJ_RS_S0_CLIP_HALFZ(state->clip_halfz) | + VIRGL_OBJ_RS_S0_RASTERIZER_DISCARD(state->rasterizer_discard) | + VIRGL_OBJ_RS_S0_FLATSHADE_FIRST(state->flatshade_first) | + VIRGL_OBJ_RS_S0_LIGHT_TWOSIZE(state->light_twoside) | + VIRGL_OBJ_RS_S0_SPRITE_COORD_MODE(state->sprite_coord_mode) | + VIRGL_OBJ_RS_S0_POINT_QUAD_RASTERIZATION(state->point_quad_rasterization) | + VIRGL_OBJ_RS_S0_CULL_FACE(state->cull_face) | + VIRGL_OBJ_RS_S0_FILL_FRONT(state->fill_front) | + VIRGL_OBJ_RS_S0_FILL_BACK(state->fill_back) | + VIRGL_OBJ_RS_S0_SCISSOR(state->scissor) | + VIRGL_OBJ_RS_S0_FRONT_CCW(state->front_ccw) | + VIRGL_OBJ_RS_S0_CLAMP_VERTEX_COLOR(state->clamp_vertex_color) | + VIRGL_OBJ_RS_S0_CLAMP_FRAGMENT_COLOR(state->clamp_fragment_color) | + VIRGL_OBJ_RS_S0_OFFSET_LINE(state->offset_line) | + VIRGL_OBJ_RS_S0_OFFSET_POINT(state->offset_point) | + VIRGL_OBJ_RS_S0_OFFSET_TRI(state->offset_tri) | + VIRGL_OBJ_RS_S0_POLY_SMOOTH(state->poly_smooth) | + VIRGL_OBJ_RS_S0_POLY_STIPPLE_ENABLE(state->poly_stipple_enable) | + VIRGL_OBJ_RS_S0_POINT_SMOOTH(state->point_smooth) | + VIRGL_OBJ_RS_S0_POINT_SIZE_PER_VERTEX(state->point_size_per_vertex) | + VIRGL_OBJ_RS_S0_MULTISAMPLE(state->multisample) | + VIRGL_OBJ_RS_S0_LINE_SMOOTH(state->line_smooth) | + VIRGL_OBJ_RS_S0_LINE_STIPPLE_ENABLE(state->line_stipple_enable) | + VIRGL_OBJ_RS_S0_LINE_LAST_PIXEL(state->line_last_pixel) | + VIRGL_OBJ_RS_S0_HALF_PIXEL_CENTER(state->half_pixel_center) | + VIRGL_OBJ_RS_S0_BOTTOM_EDGE_RULE(state->bottom_edge_rule); + + virgl_encoder_write_dword(ctx->cbuf, tmp); /* S0 */ + virgl_encoder_write_dword(ctx->cbuf, fui(state->point_size)); /* S1 */ + virgl_encoder_write_dword(ctx->cbuf, state->sprite_coord_enable); /* S2 */ + tmp = VIRGL_OBJ_RS_S3_LINE_STIPPLE_PATTERN(state->line_stipple_pattern) | + VIRGL_OBJ_RS_S3_LINE_STIPPLE_FACTOR(state->line_stipple_factor) | + VIRGL_OBJ_RS_S3_CLIP_PLANE_ENABLE(state->clip_plane_enable); + virgl_encoder_write_dword(ctx->cbuf, tmp); /* S3 */ + virgl_encoder_write_dword(ctx->cbuf, fui(state->line_width)); /* S4 */ + virgl_encoder_write_dword(ctx->cbuf, fui(state->offset_units)); /* S5 */ + virgl_encoder_write_dword(ctx->cbuf, fui(state->offset_scale)); /* S6 */ + virgl_encoder_write_dword(ctx->cbuf, fui(state->offset_clamp)); /* S7 */ + return 0; +} + +static void virgl_emit_shader_header(struct virgl_context *ctx, + uint32_t handle, uint32_t len, + uint32_t type, uint32_t offlen, + uint32_t num_tokens) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_SHADER, len)); + virgl_encoder_write_dword(ctx->cbuf, handle); + virgl_encoder_write_dword(ctx->cbuf, type); + virgl_encoder_write_dword(ctx->cbuf, offlen); + virgl_encoder_write_dword(ctx->cbuf, num_tokens); +} + +static void virgl_emit_shader_streamout(struct virgl_context *ctx, + const struct pipe_stream_output_info *so_info) +{ + int num_outputs = 0; + int i; + uint32_t tmp; + + if (so_info) + num_outputs = so_info->num_outputs; + + virgl_encoder_write_dword(ctx->cbuf, num_outputs); + if (num_outputs) { + for (i = 0; i < 4; i++) + virgl_encoder_write_dword(ctx->cbuf, so_info->stride[i]); + + for (i = 0; i < so_info->num_outputs; i++) { + tmp = + VIRGL_OBJ_SHADER_SO_OUTPUT_REGISTER_INDEX(so_info->output[i].register_index) | + VIRGL_OBJ_SHADER_SO_OUTPUT_START_COMPONENT(so_info->output[i].start_component) | + VIRGL_OBJ_SHADER_SO_OUTPUT_NUM_COMPONENTS(so_info->output[i].num_components) | + VIRGL_OBJ_SHADER_SO_OUTPUT_BUFFER(so_info->output[i].output_buffer) | + VIRGL_OBJ_SHADER_SO_OUTPUT_DST_OFFSET(so_info->output[i].dst_offset); + virgl_encoder_write_dword(ctx->cbuf, tmp); + virgl_encoder_write_dword(ctx->cbuf, 0); + } + } +} + +int virgl_encode_shader_state(struct virgl_context *ctx, + uint32_t handle, + uint32_t type, + const struct pipe_stream_output_info *so_info, + const struct tgsi_token *tokens) +{ + char *str, *sptr; + uint32_t shader_len, len; + bool bret; + int num_tokens = tgsi_num_tokens(tokens); + int str_total_size = 65536; + int retry_size = 1; + uint32_t left_bytes, base_hdr_size, strm_hdr_size, thispass; + bool first_pass; + str = CALLOC(1, str_total_size); + if (!str) + return -1; + + do { + int old_size; + + bret = tgsi_dump_str(tokens, TGSI_DUMP_FLOAT_AS_HEX, str, str_total_size); + if (bret == false) { + fprintf(stderr, "Failed to translate shader in available space - trying again\n"); + old_size = str_total_size; + str_total_size = 65536 * ++retry_size; + str = REALLOC(str, old_size, str_total_size); + if (!str) + return -1; + } + } while (bret == false && retry_size < 10); + + if (bret == false) + return -1; + + shader_len = strlen(str) + 1; + + left_bytes = shader_len; + + base_hdr_size = 5; + strm_hdr_size = so_info->num_outputs ? so_info->num_outputs * 2 + 4 : 0; + first_pass = true; + sptr = str; + while (left_bytes) { + uint32_t length, offlen; + int hdr_len = base_hdr_size + (first_pass ? strm_hdr_size : 0); + if (ctx->cbuf->cdw + hdr_len + 1 > VIRGL_MAX_CMDBUF_DWORDS) + ctx->base.flush(&ctx->base, NULL, 0); + + thispass = (VIRGL_MAX_CMDBUF_DWORDS - ctx->cbuf->cdw - hdr_len - 1) * 4; + + length = MIN2(thispass, left_bytes); + len = ((length + 3) / 4) + hdr_len; + + if (first_pass) + offlen = VIRGL_OBJ_SHADER_OFFSET_VAL(shader_len); + else + offlen = VIRGL_OBJ_SHADER_OFFSET_VAL((uintptr_t)sptr - (uintptr_t)str) | VIRGL_OBJ_SHADER_OFFSET_CONT; + + virgl_emit_shader_header(ctx, handle, len, type, offlen, num_tokens); + + virgl_emit_shader_streamout(ctx, first_pass ? so_info : NULL); + + virgl_encoder_write_block(ctx->cbuf, (uint8_t *)sptr, length); + + sptr += length; + first_pass = false; + left_bytes -= length; + } + + FREE(str); + return 0; +} + + +int virgl_encode_clear(struct virgl_context *ctx, + unsigned buffers, + const union pipe_color_union *color, + double depth, unsigned stencil) +{ + int i; + + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CLEAR, 0, VIRGL_OBJ_CLEAR_SIZE)); + virgl_encoder_write_dword(ctx->cbuf, buffers); + for (i = 0; i < 4; i++) + virgl_encoder_write_dword(ctx->cbuf, color->ui[i]); + virgl_encoder_write_qword(ctx->cbuf, *(uint64_t *)&depth); + virgl_encoder_write_dword(ctx->cbuf, stencil); + return 0; +} + +int virgl_encoder_set_framebuffer_state(struct virgl_context *ctx, + const struct pipe_framebuffer_state *state) +{ + struct virgl_surface *zsurf = virgl_surface(state->zsbuf); + int i; + + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_FRAMEBUFFER_STATE, 0, VIRGL_SET_FRAMEBUFFER_STATE_SIZE(state->nr_cbufs))); + virgl_encoder_write_dword(ctx->cbuf, state->nr_cbufs); + virgl_encoder_write_dword(ctx->cbuf, zsurf ? zsurf->handle : 0); + for (i = 0; i < state->nr_cbufs; i++) { + struct virgl_surface *surf = virgl_surface(state->cbufs[i]); + virgl_encoder_write_dword(ctx->cbuf, surf ? surf->handle : 0); + } + + return 0; +} + +int virgl_encoder_set_viewport_states(struct virgl_context *ctx, + int start_slot, + int num_viewports, + const struct pipe_viewport_state *states) +{ + int i,v; + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_VIEWPORT_STATE, 0, VIRGL_SET_VIEWPORT_STATE_SIZE(num_viewports))); + virgl_encoder_write_dword(ctx->cbuf, start_slot); + for (v = 0; v < num_viewports; v++) { + for (i = 0; i < 3; i++) + virgl_encoder_write_dword(ctx->cbuf, fui(states[v].scale[i])); + for (i = 0; i < 3; i++) + virgl_encoder_write_dword(ctx->cbuf, fui(states[v].translate[i])); + } + return 0; +} + +int virgl_encoder_create_vertex_elements(struct virgl_context *ctx, + uint32_t handle, + unsigned num_elements, + const struct pipe_vertex_element *element) +{ + int i; + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_VERTEX_ELEMENTS, VIRGL_OBJ_VERTEX_ELEMENTS_SIZE(num_elements))); + virgl_encoder_write_dword(ctx->cbuf, handle); + for (i = 0; i < num_elements; i++) { + virgl_encoder_write_dword(ctx->cbuf, element[i].src_offset); + virgl_encoder_write_dword(ctx->cbuf, element[i].instance_divisor); + virgl_encoder_write_dword(ctx->cbuf, element[i].vertex_buffer_index); + virgl_encoder_write_dword(ctx->cbuf, element[i].src_format); + } + return 0; +} + +int virgl_encoder_set_vertex_buffers(struct virgl_context *ctx, + unsigned num_buffers, + const struct pipe_vertex_buffer *buffers) +{ + int i; + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_VERTEX_BUFFERS, 0, VIRGL_SET_VERTEX_BUFFERS_SIZE(num_buffers))); + for (i = 0; i < num_buffers; i++) { + struct virgl_resource *res = virgl_resource(buffers[i].buffer); + virgl_encoder_write_dword(ctx->cbuf, buffers[i].stride); + virgl_encoder_write_dword(ctx->cbuf, buffers[i].buffer_offset); + virgl_encoder_write_res(ctx, res); + } + return 0; +} + +int virgl_encoder_set_index_buffer(struct virgl_context *ctx, + const struct pipe_index_buffer *ib) +{ + int length = VIRGL_SET_INDEX_BUFFER_SIZE(ib); + struct virgl_resource *res = NULL; + if (ib) + res = virgl_resource(ib->buffer); + + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_INDEX_BUFFER, 0, length)); + virgl_encoder_write_res(ctx, res); + if (ib) { + virgl_encoder_write_dword(ctx->cbuf, ib->index_size); + virgl_encoder_write_dword(ctx->cbuf, ib->offset); + } + return 0; +} + +int virgl_encoder_draw_vbo(struct virgl_context *ctx, + const struct pipe_draw_info *info) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_DRAW_VBO, 0, VIRGL_DRAW_VBO_SIZE)); + virgl_encoder_write_dword(ctx->cbuf, info->start); + virgl_encoder_write_dword(ctx->cbuf, info->count); + virgl_encoder_write_dword(ctx->cbuf, info->mode); + virgl_encoder_write_dword(ctx->cbuf, info->indexed); + virgl_encoder_write_dword(ctx->cbuf, info->instance_count); + virgl_encoder_write_dword(ctx->cbuf, info->index_bias); + virgl_encoder_write_dword(ctx->cbuf, info->start_instance); + virgl_encoder_write_dword(ctx->cbuf, info->primitive_restart); + virgl_encoder_write_dword(ctx->cbuf, info->restart_index); + virgl_encoder_write_dword(ctx->cbuf, info->min_index); + virgl_encoder_write_dword(ctx->cbuf, info->max_index); + if (info->count_from_stream_output) + virgl_encoder_write_dword(ctx->cbuf, info->count_from_stream_output->buffer_size); + else + virgl_encoder_write_dword(ctx->cbuf, 0); + return 0; +} + +int virgl_encoder_create_surface(struct virgl_context *ctx, + uint32_t handle, + struct virgl_resource *res, + const struct pipe_surface *templat) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_SURFACE, VIRGL_OBJ_SURFACE_SIZE)); + virgl_encoder_write_dword(ctx->cbuf, handle); + virgl_encoder_write_res(ctx, res); + virgl_encoder_write_dword(ctx->cbuf, templat->format); + if (templat->texture->target == PIPE_BUFFER) { + virgl_encoder_write_dword(ctx->cbuf, templat->u.buf.first_element); + virgl_encoder_write_dword(ctx->cbuf, templat->u.buf.last_element); + + } else { + virgl_encoder_write_dword(ctx->cbuf, templat->u.tex.level); + virgl_encoder_write_dword(ctx->cbuf, templat->u.tex.first_layer | (templat->u.tex.last_layer << 16)); + } + return 0; +} + +int virgl_encoder_create_so_target(struct virgl_context *ctx, + uint32_t handle, + struct virgl_resource *res, + unsigned buffer_offset, + unsigned buffer_size) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_STREAMOUT_TARGET, VIRGL_OBJ_STREAMOUT_SIZE)); + virgl_encoder_write_dword(ctx->cbuf, handle); + virgl_encoder_write_res(ctx, res); + virgl_encoder_write_dword(ctx->cbuf, buffer_offset); + virgl_encoder_write_dword(ctx->cbuf, buffer_size); + return 0; +} + +static void virgl_encoder_iw_emit_header_1d(struct virgl_context *ctx, + struct virgl_resource *res, + unsigned level, unsigned usage, + const struct pipe_box *box, + unsigned stride, unsigned layer_stride) +{ + virgl_encoder_write_res(ctx, res); + virgl_encoder_write_dword(ctx->cbuf, level); + virgl_encoder_write_dword(ctx->cbuf, usage); + virgl_encoder_write_dword(ctx->cbuf, stride); + virgl_encoder_write_dword(ctx->cbuf, layer_stride); + virgl_encoder_write_dword(ctx->cbuf, box->x); + virgl_encoder_write_dword(ctx->cbuf, box->y); + virgl_encoder_write_dword(ctx->cbuf, box->z); + virgl_encoder_write_dword(ctx->cbuf, box->width); + virgl_encoder_write_dword(ctx->cbuf, box->height); + virgl_encoder_write_dword(ctx->cbuf, box->depth); +} + +int virgl_encoder_inline_write(struct virgl_context *ctx, + struct virgl_resource *res, + unsigned level, unsigned usage, + const struct pipe_box *box, + const void *data, unsigned stride, + unsigned layer_stride) +{ + uint32_t size = (stride ? stride : box->width) * box->height; + uint32_t length, thispass, left_bytes; + struct pipe_box mybox = *box; + + length = 11 + (size + 3) / 4; + if ((ctx->cbuf->cdw + length + 1) > VIRGL_MAX_CMDBUF_DWORDS) { + if (box->height > 1 || box->depth > 1) { + debug_printf("inline transfer failed due to multi dimensions and too large\n"); + assert(0); + } + } + + left_bytes = size; + while (left_bytes) { + if (ctx->cbuf->cdw + 12 > VIRGL_MAX_CMDBUF_DWORDS) + ctx->base.flush(&ctx->base, NULL, 0); + + thispass = (VIRGL_MAX_CMDBUF_DWORDS - ctx->cbuf->cdw - 12) * 4; + + length = MIN2(thispass, left_bytes); + + mybox.width = length; + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_RESOURCE_INLINE_WRITE, 0, ((length + 3) / 4) + 11)); + virgl_encoder_iw_emit_header_1d(ctx, res, level, usage, &mybox, stride, layer_stride); + virgl_encoder_write_block(ctx->cbuf, data, length); + left_bytes -= length; + mybox.x += length; + data += length; + } + return 0; +} + +int virgl_encoder_flush_frontbuffer(struct virgl_context *ctx, + struct virgl_resource *res) +{ +// virgl_encoder_write_dword(ctx->cbuf, VIRGL_CMD0(VIRGL_CCMD_FLUSH_FRONTUBFFER, 0, 1)); +// virgl_encoder_write_dword(ctx->cbuf, res_handle); + return 0; +} + +int virgl_encode_sampler_state(struct virgl_context *ctx, + uint32_t handle, + const struct pipe_sampler_state *state) +{ + uint32_t tmp; + int i; + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_SAMPLER_STATE, VIRGL_OBJ_SAMPLER_STATE_SIZE)); + virgl_encoder_write_dword(ctx->cbuf, handle); + + tmp = VIRGL_OBJ_SAMPLE_STATE_S0_WRAP_S(state->wrap_s) | + VIRGL_OBJ_SAMPLE_STATE_S0_WRAP_T(state->wrap_t) | + VIRGL_OBJ_SAMPLE_STATE_S0_WRAP_R(state->wrap_r) | + VIRGL_OBJ_SAMPLE_STATE_S0_MIN_IMG_FILTER(state->min_img_filter) | + VIRGL_OBJ_SAMPLE_STATE_S0_MIN_MIP_FILTER(state->min_mip_filter) | + VIRGL_OBJ_SAMPLE_STATE_S0_MAG_IMG_FILTER(state->mag_img_filter) | + VIRGL_OBJ_SAMPLE_STATE_S0_COMPARE_MODE(state->compare_mode) | + VIRGL_OBJ_SAMPLE_STATE_S0_COMPARE_FUNC(state->compare_func); + + virgl_encoder_write_dword(ctx->cbuf, tmp); + virgl_encoder_write_dword(ctx->cbuf, fui(state->lod_bias)); + virgl_encoder_write_dword(ctx->cbuf, fui(state->min_lod)); + virgl_encoder_write_dword(ctx->cbuf, fui(state->max_lod)); + for (i = 0; i < 4; i++) + virgl_encoder_write_dword(ctx->cbuf, state->border_color.ui[i]); + return 0; +} + + +int virgl_encode_sampler_view(struct virgl_context *ctx, + uint32_t handle, + struct virgl_resource *res, + const struct pipe_sampler_view *state) +{ + uint32_t tmp; + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_SAMPLER_VIEW, VIRGL_OBJ_SAMPLER_VIEW_SIZE)); + virgl_encoder_write_dword(ctx->cbuf, handle); + virgl_encoder_write_res(ctx, res); + virgl_encoder_write_dword(ctx->cbuf, state->format); + if (res->u.b.target == PIPE_BUFFER) { + virgl_encoder_write_dword(ctx->cbuf, state->u.buf.first_element); + virgl_encoder_write_dword(ctx->cbuf, state->u.buf.last_element); + } else { + virgl_encoder_write_dword(ctx->cbuf, state->u.tex.first_layer | state->u.tex.last_layer << 16); + virgl_encoder_write_dword(ctx->cbuf, state->u.tex.first_level | state->u.tex.last_level << 8); + } + tmp = VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_R(state->swizzle_r) | + VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_G(state->swizzle_g) | + VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_B(state->swizzle_b) | + VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_A(state->swizzle_a); + virgl_encoder_write_dword(ctx->cbuf, tmp); + return 0; +} + +int virgl_encode_set_sampler_views(struct virgl_context *ctx, + uint32_t shader_type, + uint32_t start_slot, + uint32_t num_views, + struct virgl_sampler_view **views) +{ + int i; + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_SAMPLER_VIEWS, 0, VIRGL_SET_SAMPLER_VIEWS_SIZE(num_views))); + virgl_encoder_write_dword(ctx->cbuf, shader_type); + virgl_encoder_write_dword(ctx->cbuf, start_slot); + for (i = 0; i < num_views; i++) { + uint32_t handle = views[i] ? views[i]->handle : 0; + virgl_encoder_write_dword(ctx->cbuf, handle); + } + return 0; +} + +int virgl_encode_bind_sampler_states(struct virgl_context *ctx, + uint32_t shader_type, + uint32_t start_slot, + uint32_t num_handles, + uint32_t *handles) +{ + int i; + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_BIND_SAMPLER_STATES, 0, VIRGL_BIND_SAMPLER_STATES(num_handles))); + virgl_encoder_write_dword(ctx->cbuf, shader_type); + virgl_encoder_write_dword(ctx->cbuf, start_slot); + for (i = 0; i < num_handles; i++) + virgl_encoder_write_dword(ctx->cbuf, handles[i]); + return 0; +} + +int virgl_encoder_write_constant_buffer(struct virgl_context *ctx, + uint32_t shader, + uint32_t index, + uint32_t size, + const void *data) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_CONSTANT_BUFFER, 0, size + 2)); + virgl_encoder_write_dword(ctx->cbuf, shader); + virgl_encoder_write_dword(ctx->cbuf, index); + if (data) + virgl_encoder_write_block(ctx->cbuf, data, size * 4); + return 0; +} + +int virgl_encoder_set_uniform_buffer(struct virgl_context *ctx, + uint32_t shader, + uint32_t index, + uint32_t offset, + uint32_t length, + struct virgl_resource *res) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_UNIFORM_BUFFER, 0, VIRGL_SET_UNIFORM_BUFFER_SIZE)); + virgl_encoder_write_dword(ctx->cbuf, shader); + virgl_encoder_write_dword(ctx->cbuf, index); + virgl_encoder_write_dword(ctx->cbuf, offset); + virgl_encoder_write_dword(ctx->cbuf, length); + virgl_encoder_write_res(ctx, res); + return 0; +} + + +int virgl_encoder_set_stencil_ref(struct virgl_context *ctx, + const struct pipe_stencil_ref *ref) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_STENCIL_REF, 0, VIRGL_SET_STENCIL_REF_SIZE)); + virgl_encoder_write_dword(ctx->cbuf, VIRGL_STENCIL_REF_VAL(ref->ref_value[0] , (ref->ref_value[1]))); + return 0; +} + +int virgl_encoder_set_blend_color(struct virgl_context *ctx, + const struct pipe_blend_color *color) +{ + int i; + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_BLEND_COLOR, 0, VIRGL_SET_BLEND_COLOR_SIZE)); + for (i = 0; i < 4; i++) + virgl_encoder_write_dword(ctx->cbuf, fui(color->color[i])); + return 0; +} + +int virgl_encoder_set_scissor_state(struct virgl_context *ctx, + unsigned start_slot, + int num_scissors, + const struct pipe_scissor_state *ss) +{ + int i; + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_SCISSOR_STATE, 0, VIRGL_SET_SCISSOR_STATE_SIZE(num_scissors))); + virgl_encoder_write_dword(ctx->cbuf, start_slot); + for (i = 0; i < num_scissors; i++) { + virgl_encoder_write_dword(ctx->cbuf, (ss[i].minx | ss[i].miny << 16)); + virgl_encoder_write_dword(ctx->cbuf, (ss[i].maxx | ss[i].maxy << 16)); + } + return 0; +} + +void virgl_encoder_set_polygon_stipple(struct virgl_context *ctx, + const struct pipe_poly_stipple *ps) +{ + int i; + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_POLYGON_STIPPLE, 0, VIRGL_POLYGON_STIPPLE_SIZE)); + for (i = 0; i < VIRGL_POLYGON_STIPPLE_SIZE; i++) { + virgl_encoder_write_dword(ctx->cbuf, ps->stipple[i]); + } +} + +void virgl_encoder_set_sample_mask(struct virgl_context *ctx, + unsigned sample_mask) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_SAMPLE_MASK, 0, VIRGL_SET_SAMPLE_MASK_SIZE)); + virgl_encoder_write_dword(ctx->cbuf, sample_mask); +} + +void virgl_encoder_set_clip_state(struct virgl_context *ctx, + const struct pipe_clip_state *clip) +{ + int i, j; + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_CLIP_STATE, 0, VIRGL_SET_CLIP_STATE_SIZE)); + for (i = 0; i < VIRGL_MAX_CLIP_PLANES; i++) { + for (j = 0; j < 4; j++) { + virgl_encoder_write_dword(ctx->cbuf, fui(clip->ucp[i][j])); + } + } +} + +int virgl_encode_resource_copy_region(struct virgl_context *ctx, + struct virgl_resource *dst_res, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct virgl_resource *src_res, + unsigned src_level, + const struct pipe_box *src_box) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_RESOURCE_COPY_REGION, 0, VIRGL_CMD_RESOURCE_COPY_REGION_SIZE)); + virgl_encoder_write_res(ctx, dst_res); + virgl_encoder_write_dword(ctx->cbuf, dst_level); + virgl_encoder_write_dword(ctx->cbuf, dstx); + virgl_encoder_write_dword(ctx->cbuf, dsty); + virgl_encoder_write_dword(ctx->cbuf, dstz); + virgl_encoder_write_res(ctx, src_res); + virgl_encoder_write_dword(ctx->cbuf, src_level); + virgl_encoder_write_dword(ctx->cbuf, src_box->x); + virgl_encoder_write_dword(ctx->cbuf, src_box->y); + virgl_encoder_write_dword(ctx->cbuf, src_box->z); + virgl_encoder_write_dword(ctx->cbuf, src_box->width); + virgl_encoder_write_dword(ctx->cbuf, src_box->height); + virgl_encoder_write_dword(ctx->cbuf, src_box->depth); + return 0; +} + +int virgl_encode_blit(struct virgl_context *ctx, + struct virgl_resource *dst_res, + struct virgl_resource *src_res, + const struct pipe_blit_info *blit) +{ + uint32_t tmp; + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_BLIT, 0, VIRGL_CMD_BLIT_SIZE)); + tmp = VIRGL_CMD_BLIT_S0_MASK(blit->mask) | + VIRGL_CMD_BLIT_S0_FILTER(blit->filter) | + VIRGL_CMD_BLIT_S0_SCISSOR_ENABLE(blit->scissor_enable); + virgl_encoder_write_dword(ctx->cbuf, tmp); + virgl_encoder_write_dword(ctx->cbuf, (blit->scissor.minx | blit->scissor.miny << 16)); + virgl_encoder_write_dword(ctx->cbuf, (blit->scissor.maxx | blit->scissor.maxy << 16)); + + virgl_encoder_write_res(ctx, dst_res); + virgl_encoder_write_dword(ctx->cbuf, blit->dst.level); + virgl_encoder_write_dword(ctx->cbuf, blit->dst.format); + virgl_encoder_write_dword(ctx->cbuf, blit->dst.box.x); + virgl_encoder_write_dword(ctx->cbuf, blit->dst.box.y); + virgl_encoder_write_dword(ctx->cbuf, blit->dst.box.z); + virgl_encoder_write_dword(ctx->cbuf, blit->dst.box.width); + virgl_encoder_write_dword(ctx->cbuf, blit->dst.box.height); + virgl_encoder_write_dword(ctx->cbuf, blit->dst.box.depth); + + virgl_encoder_write_res(ctx, src_res); + virgl_encoder_write_dword(ctx->cbuf, blit->src.level); + virgl_encoder_write_dword(ctx->cbuf, blit->src.format); + virgl_encoder_write_dword(ctx->cbuf, blit->src.box.x); + virgl_encoder_write_dword(ctx->cbuf, blit->src.box.y); + virgl_encoder_write_dword(ctx->cbuf, blit->src.box.z); + virgl_encoder_write_dword(ctx->cbuf, blit->src.box.width); + virgl_encoder_write_dword(ctx->cbuf, blit->src.box.height); + virgl_encoder_write_dword(ctx->cbuf, blit->src.box.depth); + return 0; +} + +int virgl_encoder_create_query(struct virgl_context *ctx, + uint32_t handle, + uint query_type, + uint query_index, + struct virgl_resource *res, + uint32_t offset) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_OBJECT, VIRGL_OBJECT_QUERY, VIRGL_OBJ_QUERY_SIZE)); + virgl_encoder_write_dword(ctx->cbuf, handle); + virgl_encoder_write_dword(ctx->cbuf, ((query_type & 0xffff) | (query_index << 16))); + virgl_encoder_write_dword(ctx->cbuf, offset); + virgl_encoder_write_res(ctx, res); + return 0; +} + +int virgl_encoder_begin_query(struct virgl_context *ctx, + uint32_t handle) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_BEGIN_QUERY, 0, 1)); + virgl_encoder_write_dword(ctx->cbuf, handle); + return 0; +} + +int virgl_encoder_end_query(struct virgl_context *ctx, + uint32_t handle) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_END_QUERY, 0, 1)); + virgl_encoder_write_dword(ctx->cbuf, handle); + return 0; +} + +int virgl_encoder_get_query_result(struct virgl_context *ctx, + uint32_t handle, boolean wait) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_GET_QUERY_RESULT, 0, 2)); + virgl_encoder_write_dword(ctx->cbuf, handle); + virgl_encoder_write_dword(ctx->cbuf, wait ? 1 : 0); + return 0; +} + +int virgl_encoder_render_condition(struct virgl_context *ctx, + uint32_t handle, boolean condition, + uint mode) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_RENDER_CONDITION, 0, VIRGL_RENDER_CONDITION_SIZE)); + virgl_encoder_write_dword(ctx->cbuf, handle); + virgl_encoder_write_dword(ctx->cbuf, condition); + virgl_encoder_write_dword(ctx->cbuf, mode); + return 0; +} + +int virgl_encoder_set_so_targets(struct virgl_context *ctx, + unsigned num_targets, + struct pipe_stream_output_target **targets, + unsigned append_bitmask) +{ + int i; + + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_STREAMOUT_TARGETS, 0, num_targets + 1)); + virgl_encoder_write_dword(ctx->cbuf, append_bitmask); + for (i = 0; i < num_targets; i++) { + struct virgl_so_target *tg = virgl_so_target(targets[i]); + virgl_encoder_write_dword(ctx->cbuf, tg->handle); + } + return 0; +} + + +int virgl_encoder_set_sub_ctx(struct virgl_context *ctx, uint32_t sub_ctx_id) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_SUB_CTX, 0, 1)); + virgl_encoder_write_dword(ctx->cbuf, sub_ctx_id); + return 0; +} + +int virgl_encoder_create_sub_ctx(struct virgl_context *ctx, uint32_t sub_ctx_id) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CREATE_SUB_CTX, 0, 1)); + virgl_encoder_write_dword(ctx->cbuf, sub_ctx_id); + return 0; +} + +int virgl_encoder_destroy_sub_ctx(struct virgl_context *ctx, uint32_t sub_ctx_id) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_DESTROY_SUB_CTX, 0, 1)); + virgl_encoder_write_dword(ctx->cbuf, sub_ctx_id); + return 0; +} + +int virgl_encode_bind_shader(struct virgl_context *ctx, + uint32_t handle, uint32_t type) +{ + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_BIND_SHADER, 0, 2)); + virgl_encoder_write_dword(ctx->cbuf, handle); + virgl_encoder_write_dword(ctx->cbuf, type); + return 0; +} diff --git a/src/gallium/drivers/virgl/virgl_encode.h b/src/gallium/drivers/virgl/virgl_encode.h new file mode 100644 index 00000000000..030bcd6d16e --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_encode.h @@ -0,0 +1,247 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef VIRGL_ENCODE_H +#define VIRGL_ENCODE_H + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "virgl_winsys.h" + +struct tgsi_token; + +struct virgl_context; +struct virgl_resource; +struct virgl_sampler_view; + +struct virgl_surface { + struct pipe_surface base; + uint32_t handle; +}; + +static inline struct virgl_surface *virgl_surface(struct pipe_surface *surf) +{ + return (struct virgl_surface *)surf; +} + +static inline void virgl_encoder_write_dword(struct virgl_cmd_buf *state, + uint32_t dword) +{ + state->buf[state->cdw++] = dword; +} + +static inline void virgl_encoder_write_qword(struct virgl_cmd_buf *state, + uint64_t qword) +{ + memcpy(state->buf + state->cdw, &qword, sizeof(uint64_t)); + state->cdw += 2; +} + +static inline void virgl_encoder_write_block(struct virgl_cmd_buf *state, + const uint8_t *ptr, uint32_t len) +{ + int x; + memcpy(state->buf + state->cdw, ptr, len); + x = (len % 4); +// fprintf(stderr, "[%d] block %d x is %d\n", state->cdw, len, x); + if (x) { + uint8_t *mp = (uint8_t *)(state->buf + state->cdw); + mp += len; + memset(mp, 0, x); + } + state->cdw += (len + 3) / 4; +} + +extern int virgl_encode_blend_state(struct virgl_context *ctx, + uint32_t handle, + const struct pipe_blend_state *blend_state); +extern int virgl_encode_rasterizer_state(struct virgl_context *ctx, + uint32_t handle, + const struct pipe_rasterizer_state *state); + +extern int virgl_encode_shader_state(struct virgl_context *ctx, + uint32_t handle, + uint32_t type, + const struct pipe_stream_output_info *so_info, + const struct tgsi_token *tokens); + +int virgl_encode_stream_output_info(struct virgl_context *ctx, + uint32_t handle, + uint32_t type, + const struct pipe_shader_state *shader); + +int virgl_encoder_set_so_targets(struct virgl_context *ctx, + unsigned num_targets, + struct pipe_stream_output_target **targets, + unsigned append_bitmask); + +int virgl_encoder_create_so_target(struct virgl_context *ctx, + uint32_t handle, + struct virgl_resource *res, + unsigned buffer_offset, + unsigned buffer_size); + +int virgl_encode_clear(struct virgl_context *ctx, + unsigned buffers, + const union pipe_color_union *color, + double depth, unsigned stencil); + +int virgl_encode_bind_object(struct virgl_context *ctx, + uint32_t handle, uint32_t object); +int virgl_encode_delete_object(struct virgl_context *ctx, + uint32_t handle, uint32_t object); + +int virgl_encoder_set_framebuffer_state(struct virgl_context *ctx, + const struct pipe_framebuffer_state *state); +int virgl_encoder_set_viewport_states(struct virgl_context *ctx, + int start_slot, + int num_viewports, + const struct pipe_viewport_state *states); + +int virgl_encoder_draw_vbo(struct virgl_context *ctx, + const struct pipe_draw_info *info); + + +int virgl_encoder_create_surface(struct virgl_context *ctx, + uint32_t handle, + struct virgl_resource *res, + const struct pipe_surface *templat); + +int virgl_encoder_flush_frontbuffer(struct virgl_context *ctx, + struct virgl_resource *res); + +int virgl_encoder_create_vertex_elements(struct virgl_context *ctx, + uint32_t handle, + unsigned num_elements, + const struct pipe_vertex_element *element); + +int virgl_encoder_set_vertex_buffers(struct virgl_context *ctx, + unsigned num_buffers, + const struct pipe_vertex_buffer *buffers); + + +int virgl_encoder_inline_write(struct virgl_context *ctx, + struct virgl_resource *res, + unsigned level, unsigned usage, + const struct pipe_box *box, + const void *data, unsigned stride, + unsigned layer_stride); +int virgl_encode_sampler_state(struct virgl_context *ctx, + uint32_t handle, + const struct pipe_sampler_state *state); +int virgl_encode_sampler_view(struct virgl_context *ctx, + uint32_t handle, + struct virgl_resource *res, + const struct pipe_sampler_view *state); + +int virgl_encode_set_sampler_views(struct virgl_context *ctx, + uint32_t shader_type, + uint32_t start_slot, + uint32_t num_views, + struct virgl_sampler_view **views); + +int virgl_encode_bind_sampler_states(struct virgl_context *ctx, + uint32_t shader_type, + uint32_t start_slot, + uint32_t num_handles, + uint32_t *handles); + +int virgl_encoder_set_index_buffer(struct virgl_context *ctx, + const struct pipe_index_buffer *ib); + +uint32_t virgl_object_assign_handle(void); + +int virgl_encoder_write_constant_buffer(struct virgl_context *ctx, + uint32_t shader, + uint32_t index, + uint32_t size, + const void *data); + +int virgl_encoder_set_uniform_buffer(struct virgl_context *ctx, + uint32_t shader, + uint32_t index, + uint32_t offset, + uint32_t length, + struct virgl_resource *res); +int virgl_encode_dsa_state(struct virgl_context *ctx, + uint32_t handle, + const struct pipe_depth_stencil_alpha_state *dsa_state); + +int virgl_encoder_set_stencil_ref(struct virgl_context *ctx, + const struct pipe_stencil_ref *ref); + +int virgl_encoder_set_blend_color(struct virgl_context *ctx, + const struct pipe_blend_color *color); + +int virgl_encoder_set_scissor_state(struct virgl_context *ctx, + unsigned start_slot, + int num_scissors, + const struct pipe_scissor_state *ss); + +void virgl_encoder_set_polygon_stipple(struct virgl_context *ctx, + const struct pipe_poly_stipple *ps); + +void virgl_encoder_set_sample_mask(struct virgl_context *ctx, + unsigned sample_mask); + +void virgl_encoder_set_clip_state(struct virgl_context *ctx, + const struct pipe_clip_state *clip); + +int virgl_encode_resource_copy_region(struct virgl_context *ctx, + struct virgl_resource *dst_res, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct virgl_resource *src_res, + unsigned src_level, + const struct pipe_box *src_box); + +int virgl_encode_blit(struct virgl_context *ctx, + struct virgl_resource *dst_res, + struct virgl_resource *src_res, + const struct pipe_blit_info *blit); + +int virgl_encoder_create_query(struct virgl_context *ctx, + uint32_t handle, + uint query_type, + uint query_index, + struct virgl_resource *res, + uint32_t offset); + +int virgl_encoder_begin_query(struct virgl_context *ctx, + uint32_t handle); +int virgl_encoder_end_query(struct virgl_context *ctx, + uint32_t handle); +int virgl_encoder_get_query_result(struct virgl_context *ctx, + uint32_t handle, boolean wait); + +int virgl_encoder_render_condition(struct virgl_context *ctx, + uint32_t handle, boolean condition, + uint mode); + +int virgl_encoder_set_sub_ctx(struct virgl_context *ctx, uint32_t sub_ctx_id); +int virgl_encoder_create_sub_ctx(struct virgl_context *ctx, uint32_t sub_ctx_id); +int virgl_encoder_destroy_sub_ctx(struct virgl_context *ctx, uint32_t sub_ctx_id); + +int virgl_encode_bind_shader(struct virgl_context *ctx, + uint32_t handle, uint32_t type); +#endif diff --git a/src/gallium/drivers/virgl/virgl_hw.h b/src/gallium/drivers/virgl/virgl_hw.h new file mode 100644 index 00000000000..e3c56db2ac6 --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_hw.h @@ -0,0 +1,286 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef VIRGL_HW_H +#define VIRGL_HW_H + +struct virgl_box { + uint32_t x, y, z; + uint32_t w, h, d; +}; + +/* formats known by the HW device - based on gallium subset */ +enum virgl_formats { + VIRGL_FORMAT_B8G8R8A8_UNORM = 1, + VIRGL_FORMAT_B8G8R8X8_UNORM = 2, + VIRGL_FORMAT_A8R8G8B8_UNORM = 3, + VIRGL_FORMAT_X8R8G8B8_UNORM = 4, + VIRGL_FORMAT_B5G5R5A1_UNORM = 5, + VIRGL_FORMAT_B4G4R4A4_UNORM = 6, + VIRGL_FORMAT_B5G6R5_UNORM = 7, + VIRGL_FORMAT_L8_UNORM = 9, /**< ubyte luminance */ + VIRGL_FORMAT_A8_UNORM = 10, /**< ubyte alpha */ + VIRGL_FORMAT_L8A8_UNORM = 12, /**< ubyte alpha, luminance */ + VIRGL_FORMAT_L16_UNORM = 13, /**< ushort luminance */ + + VIRGL_FORMAT_Z16_UNORM = 16, + VIRGL_FORMAT_Z32_UNORM = 17, + VIRGL_FORMAT_Z32_FLOAT = 18, + VIRGL_FORMAT_Z24_UNORM_S8_UINT = 19, + VIRGL_FORMAT_S8_UINT_Z24_UNORM = 20, + VIRGL_FORMAT_Z24X8_UNORM = 21, + VIRGL_FORMAT_S8_UINT = 23, /**< ubyte stencil */ + + VIRGL_FORMAT_R32_FLOAT = 28, + VIRGL_FORMAT_R32G32_FLOAT = 29, + VIRGL_FORMAT_R32G32B32_FLOAT = 30, + VIRGL_FORMAT_R32G32B32A32_FLOAT = 31, + + VIRGL_FORMAT_R16_UNORM = 48, + VIRGL_FORMAT_R16G16_UNORM = 49, + + VIRGL_FORMAT_R16G16B16A16_UNORM = 51, + + VIRGL_FORMAT_R16_SNORM = 56, + VIRGL_FORMAT_R16G16_SNORM = 57, + VIRGL_FORMAT_R16G16B16A16_SNORM = 59, + + VIRGL_FORMAT_R8_UNORM = 64, + VIRGL_FORMAT_R8G8_UNORM = 65, + + VIRGL_FORMAT_R8G8B8A8_UNORM = 67, + + VIRGL_FORMAT_R8_SNORM = 74, + VIRGL_FORMAT_R8G8_SNORM = 75, + VIRGL_FORMAT_R8G8B8_SNORM = 76, + VIRGL_FORMAT_R8G8B8A8_SNORM = 77, + + VIRGL_FORMAT_R16_FLOAT = 91, + VIRGL_FORMAT_R16G16_FLOAT = 92, + VIRGL_FORMAT_R16G16B16_FLOAT = 93, + VIRGL_FORMAT_R16G16B16A16_FLOAT = 94, + + VIRGL_FORMAT_L8_SRGB = 95, + VIRGL_FORMAT_L8A8_SRGB = 96, + VIRGL_FORMAT_B8G8R8A8_SRGB = 100, + VIRGL_FORMAT_B8G8R8X8_SRGB = 101, + + /* compressed formats */ + VIRGL_FORMAT_DXT1_RGB = 105, + VIRGL_FORMAT_DXT1_RGBA = 106, + VIRGL_FORMAT_DXT3_RGBA = 107, + VIRGL_FORMAT_DXT5_RGBA = 108, + + /* sRGB, compressed */ + VIRGL_FORMAT_DXT1_SRGB = 109, + VIRGL_FORMAT_DXT1_SRGBA = 110, + VIRGL_FORMAT_DXT3_SRGBA = 111, + VIRGL_FORMAT_DXT5_SRGBA = 112, + + /* rgtc compressed */ + VIRGL_FORMAT_RGTC1_UNORM = 113, + VIRGL_FORMAT_RGTC1_SNORM = 114, + VIRGL_FORMAT_RGTC2_UNORM = 115, + VIRGL_FORMAT_RGTC2_SNORM = 116, + + VIRGL_FORMAT_A8B8G8R8_UNORM = 121, + VIRGL_FORMAT_B5G5R5X1_UNORM = 122, + VIRGL_FORMAT_R11G11B10_FLOAT = 124, + VIRGL_FORMAT_R9G9B9E5_FLOAT = 125, + VIRGL_FORMAT_Z32_FLOAT_S8X24_UINT = 126, + + VIRGL_FORMAT_B10G10R10A2_UNORM = 131, + VIRGL_FORMAT_R8G8B8X8_UNORM = 134, + VIRGL_FORMAT_B4G4R4X4_UNORM = 135, + VIRGL_FORMAT_B2G3R3_UNORM = 139, + + VIRGL_FORMAT_L16A16_UNORM = 140, + VIRGL_FORMAT_A16_UNORM = 141, + + VIRGL_FORMAT_A8_SNORM = 147, + VIRGL_FORMAT_L8_SNORM = 148, + VIRGL_FORMAT_L8A8_SNORM = 149, + + VIRGL_FORMAT_A16_SNORM = 151, + VIRGL_FORMAT_L16_SNORM = 152, + VIRGL_FORMAT_L16A16_SNORM = 153, + + VIRGL_FORMAT_A16_FLOAT = 155, + VIRGL_FORMAT_L16_FLOAT = 156, + VIRGL_FORMAT_L16A16_FLOAT = 157, + + VIRGL_FORMAT_A32_FLOAT = 159, + VIRGL_FORMAT_L32_FLOAT = 160, + VIRGL_FORMAT_L32A32_FLOAT = 161, + + VIRGL_FORMAT_R8_UINT = 177, + VIRGL_FORMAT_R8G8_UINT = 178, + VIRGL_FORMAT_R8G8B8_UINT = 179, + VIRGL_FORMAT_R8G8B8A8_UINT = 180, + + VIRGL_FORMAT_R8_SINT = 181, + VIRGL_FORMAT_R8G8_SINT = 182, + VIRGL_FORMAT_R8G8B8_SINT = 183, + VIRGL_FORMAT_R8G8B8A8_SINT = 184, + + VIRGL_FORMAT_R16_UINT = 185, + VIRGL_FORMAT_R16G16_UINT = 186, + VIRGL_FORMAT_R16G16B16_UINT = 187, + VIRGL_FORMAT_R16G16B16A16_UINT = 188, + + VIRGL_FORMAT_R16_SINT = 189, + VIRGL_FORMAT_R16G16_SINT = 190, + VIRGL_FORMAT_R16G16B16_SINT = 191, + VIRGL_FORMAT_R16G16B16A16_SINT = 192, + VIRGL_FORMAT_R32_UINT = 193, + VIRGL_FORMAT_R32G32_UINT = 194, + VIRGL_FORMAT_R32G32B32_UINT = 195, + VIRGL_FORMAT_R32G32B32A32_UINT = 196, + + VIRGL_FORMAT_R32_SINT = 197, + VIRGL_FORMAT_R32G32_SINT = 198, + VIRGL_FORMAT_R32G32B32_SINT = 199, + VIRGL_FORMAT_R32G32B32A32_SINT = 200, + + VIRGL_FORMAT_A8_UINT = 201, + VIRGL_FORMAT_L8_UINT = 203, + VIRGL_FORMAT_L8A8_UINT = 204, + + VIRGL_FORMAT_A8_SINT = 205, + VIRGL_FORMAT_L8_SINT = 207, + VIRGL_FORMAT_L8A8_SINT = 208, + + VIRGL_FORMAT_A16_UINT = 209, + VIRGL_FORMAT_L16_UINT = 211, + VIRGL_FORMAT_L16A16_UINT = 212, + + VIRGL_FORMAT_A16_SINT = 213, + VIRGL_FORMAT_L16_SINT = 215, + VIRGL_FORMAT_L16A16_SINT = 216, + + VIRGL_FORMAT_A32_UINT = 217, + VIRGL_FORMAT_L32_UINT = 219, + VIRGL_FORMAT_L32A32_UINT = 220, + + VIRGL_FORMAT_A32_SINT = 221, + VIRGL_FORMAT_L32_SINT = 223, + VIRGL_FORMAT_L32A32_SINT = 224, + + VIRGL_FORMAT_B10G10R10A2_UINT = 225, + VIRGL_FORMAT_R8G8B8X8_SNORM = 229, + + VIRGL_FORMAT_R8G8B8X8_SRGB = 230, + + VIRGL_FORMAT_B10G10R10X2_UNORM = 233, + VIRGL_FORMAT_R16G16B16X16_UNORM = 234, + VIRGL_FORMAT_R16G16B16X16_SNORM = 235, + VIRGL_FORMAT_MAX, +}; + +#define VIRGL_BIND_DEPTH_STENCIL (1 << 0) +#define VIRGL_BIND_RENDER_TARGET (1 << 1) +#define VIRGL_BIND_SAMPLER_VIEW (1 << 3) +#define VIRGL_BIND_VERTEX_BUFFER (1 << 4) +#define VIRGL_BIND_INDEX_BUFFER (1 << 5) +#define VIRGL_BIND_CONSTANT_BUFFER (1 << 6) +#define VIRGL_BIND_DISPLAY_TARGET (1 << 7) +#define VIRGL_BIND_STREAM_OUTPUT (1 << 11) +#define VIRGL_BIND_CURSOR (1 << 16) +#define VIRGL_BIND_CUSTOM (1 << 17) +#define VIRGL_BIND_SCANOUT (1 << 18) + +struct virgl_caps_bool_set1 { + unsigned indep_blend_enable:1; + unsigned indep_blend_func:1; + unsigned cube_map_array:1; + unsigned shader_stencil_export:1; + unsigned conditional_render:1; + unsigned start_instance:1; + unsigned primitive_restart:1; + unsigned blend_eq_sep:1; + unsigned instanceid:1; + unsigned vertex_element_instance_divisor:1; + unsigned seamless_cube_map:1; + unsigned occlusion_query:1; + unsigned timer_query:1; + unsigned streamout_pause_resume:1; + unsigned texture_multisample:1; + unsigned fragment_coord_conventions:1; + unsigned depth_clip_disable:1; + unsigned seamless_cube_map_per_texture:1; + unsigned ubo:1; + unsigned color_clamping:1; /* not in GL 3.1 core profile */ + unsigned poly_stipple:1; /* not in GL 3.1 core profile */ + unsigned mirror_clamp:1; + unsigned texture_query_lod:1; +}; + +/* endless expansion capabilites - current gallium has 252 formats */ +struct virgl_supported_format_mask { + uint32_t bitmask[16]; +}; +/* capabilities set 2 - version 1 - 32-bit and float values */ +struct virgl_caps_v1 { + uint32_t max_version; + struct virgl_supported_format_mask sampler; + struct virgl_supported_format_mask render; + struct virgl_supported_format_mask depthstencil; + struct virgl_supported_format_mask vertexbuffer; + struct virgl_caps_bool_set1 bset; + uint32_t glsl_level; + uint32_t max_texture_array_layers; + uint32_t max_streamout_buffers; + uint32_t max_dual_source_render_targets; + uint32_t max_render_targets; + uint32_t max_samples; + uint32_t prim_mask; + uint32_t max_tbo_size; + uint32_t max_uniform_blocks; + uint32_t max_viewports; + uint32_t max_texture_gather_components; +}; + +union virgl_caps { + uint32_t max_version; + struct virgl_caps_v1 v1; +}; + +enum virgl_errors { + VIRGL_ERROR_NONE, + VIRGL_ERROR_UNKNOWN, + VIRGL_ERROR_UNKNOWN_RESOURCE_FORMAT, +}; + +enum virgl_ctx_errors { + VIRGL_ERROR_CTX_NONE, + VIRGL_ERROR_CTX_UNKNOWN, + VIRGL_ERROR_CTX_ILLEGAL_SHADER, + VIRGL_ERROR_CTX_ILLEGAL_HANDLE, + VIRGL_ERROR_CTX_ILLEGAL_RESOURCE, + VIRGL_ERROR_CTX_ILLEGAL_SURFACE, + VIRGL_ERROR_CTX_ILLEGAL_VERTEX_FORMAT, + VIRGL_ERROR_CTX_ILLEGAL_CMD_BUFFER, +}; + + +#define VIRGL_RESOURCE_Y_0_TOP (1 << 0) +#endif diff --git a/src/gallium/drivers/virgl/virgl_protocol.h b/src/gallium/drivers/virgl/virgl_protocol.h new file mode 100644 index 00000000000..ca3142f5f72 --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_protocol.h @@ -0,0 +1,468 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef VIRGL_PROTOCOL_H +#define VIRGL_PROTOCOL_H + +#define VIRGL_QUERY_STATE_NEW 0 +#define VIRGL_QUERY_STATE_DONE 1 +#define VIRGL_QUERY_STATE_WAIT_HOST 2 + +struct virgl_host_query_state { + uint32_t query_state; + uint32_t result_size; + uint64_t result; +}; + +enum virgl_object_type { + VIRGL_OBJECT_NULL, + VIRGL_OBJECT_BLEND, + VIRGL_OBJECT_RASTERIZER, + VIRGL_OBJECT_DSA, + VIRGL_OBJECT_SHADER, + VIRGL_OBJECT_VERTEX_ELEMENTS, + VIRGL_OBJECT_SAMPLER_VIEW, + VIRGL_OBJECT_SAMPLER_STATE, + VIRGL_OBJECT_SURFACE, + VIRGL_OBJECT_QUERY, + VIRGL_OBJECT_STREAMOUT_TARGET, + VIRGL_MAX_OBJECTS, +}; + +/* context cmds to be encoded in the command stream */ +enum virgl_context_cmd { + VIRGL_CCMD_NOP = 0, + VIRGL_CCMD_CREATE_OBJECT = 1, + VIRGL_CCMD_BIND_OBJECT, + VIRGL_CCMD_DESTROY_OBJECT, + VIRGL_CCMD_SET_VIEWPORT_STATE, + VIRGL_CCMD_SET_FRAMEBUFFER_STATE, + VIRGL_CCMD_SET_VERTEX_BUFFERS, + VIRGL_CCMD_CLEAR, + VIRGL_CCMD_DRAW_VBO, + VIRGL_CCMD_RESOURCE_INLINE_WRITE, + VIRGL_CCMD_SET_SAMPLER_VIEWS, + VIRGL_CCMD_SET_INDEX_BUFFER, + VIRGL_CCMD_SET_CONSTANT_BUFFER, + VIRGL_CCMD_SET_STENCIL_REF, + VIRGL_CCMD_SET_BLEND_COLOR, + VIRGL_CCMD_SET_SCISSOR_STATE, + VIRGL_CCMD_BLIT, + VIRGL_CCMD_RESOURCE_COPY_REGION, + VIRGL_CCMD_BIND_SAMPLER_STATES, + VIRGL_CCMD_BEGIN_QUERY, + VIRGL_CCMD_END_QUERY, + VIRGL_CCMD_GET_QUERY_RESULT, + VIRGL_CCMD_SET_POLYGON_STIPPLE, + VIRGL_CCMD_SET_CLIP_STATE, + VIRGL_CCMD_SET_SAMPLE_MASK, + VIRGL_CCMD_SET_STREAMOUT_TARGETS, + VIRGL_CCMD_SET_RENDER_CONDITION, + VIRGL_CCMD_SET_UNIFORM_BUFFER, + + VIRGL_CCMD_SET_SUB_CTX, + VIRGL_CCMD_CREATE_SUB_CTX, + VIRGL_CCMD_DESTROY_SUB_CTX, + VIRGL_CCMD_BIND_SHADER, +}; + +/* + 8-bit cmd headers + 8-bit object type + 16-bit length +*/ + +#define VIRGL_CMD0(cmd, obj, len) ((cmd) | ((obj) << 8) | ((len) << 16)) + +/* hw specification */ +#define VIRGL_MAX_COLOR_BUFS 8 +#define VIRGL_MAX_CLIP_PLANES 8 + +#define VIRGL_OBJ_CREATE_HEADER 0 +#define VIRGL_OBJ_CREATE_HANDLE 1 + +#define VIRGL_OBJ_BIND_HEADER 0 +#define VIRGL_OBJ_BIND_HANDLE 1 + +#define VIRGL_OBJ_DESTROY_HANDLE 1 + +/* some of these defines are a specification - not used in the code */ +/* bit offsets for blend state object */ +#define VIRGL_OBJ_BLEND_SIZE (VIRGL_MAX_COLOR_BUFS + 3) +#define VIRGL_OBJ_BLEND_HANDLE 1 +#define VIRGL_OBJ_BLEND_S0 2 +#define VIRGL_OBJ_BLEND_S0_INDEPENDENT_BLEND_ENABLE(x) ((x) & 0x1 << 0) +#define VIRGL_OBJ_BLEND_S0_LOGICOP_ENABLE(x) (((x) & 0x1) << 1) +#define VIRGL_OBJ_BLEND_S0_DITHER(x) (((x) & 0x1) << 2) +#define VIRGL_OBJ_BLEND_S0_ALPHA_TO_COVERAGE(x) (((x) & 0x1) << 3) +#define VIRGL_OBJ_BLEND_S0_ALPHA_TO_ONE(x) (((x) & 0x1) << 4) +#define VIRGL_OBJ_BLEND_S1 3 +#define VIRGL_OBJ_BLEND_S1_LOGICOP_FUNC(x) (((x) & 0xf) << 0) +/* repeated once per number of cbufs */ + +#define VIRGL_OBJ_BLEND_S2(cbuf) (4 + (cbuf)) +#define VIRGL_OBJ_BLEND_S2_RT_BLEND_ENABLE(x) (((x) & 0x1) << 0) +#define VIRGL_OBJ_BLEND_S2_RT_RGB_FUNC(x) (((x) & 0x7) << 1) +#define VIRGL_OBJ_BLEND_S2_RT_RGB_SRC_FACTOR(x) (((x) & 0x1f) << 4) +#define VIRGL_OBJ_BLEND_S2_RT_RGB_DST_FACTOR(x) (((x) & 0x1f) << 9) +#define VIRGL_OBJ_BLEND_S2_RT_ALPHA_FUNC(x) (((x) & 0x7) << 14) +#define VIRGL_OBJ_BLEND_S2_RT_ALPHA_SRC_FACTOR(x) (((x) & 0x1f) << 17) +#define VIRGL_OBJ_BLEND_S2_RT_ALPHA_DST_FACTOR(x) (((x) & 0x1f) << 22) +#define VIRGL_OBJ_BLEND_S2_RT_COLORMASK(x) (((x) & 0xf) << 27) + +/* bit offsets for DSA state */ +#define VIRGL_OBJ_DSA_SIZE 5 +#define VIRGL_OBJ_DSA_HANDLE 1 +#define VIRGL_OBJ_DSA_S0 2 +#define VIRGL_OBJ_DSA_S0_DEPTH_ENABLE(x) (((x) & 0x1) << 0) +#define VIRGL_OBJ_DSA_S0_DEPTH_WRITEMASK(x) (((x) & 0x1) << 1) +#define VIRGL_OBJ_DSA_S0_DEPTH_FUNC(x) (((x) & 0x7) << 2) +#define VIRGL_OBJ_DSA_S0_ALPHA_ENABLED(x) (((x) & 0x1) << 8) +#define VIRGL_OBJ_DSA_S0_ALPHA_FUNC(x) (((x) & 0x7) << 9) +#define VIRGL_OBJ_DSA_S1 3 +#define VIRGL_OBJ_DSA_S2 4 +#define VIRGL_OBJ_DSA_S1_STENCIL_ENABLED(x) (((x) & 0x1) << 0) +#define VIRGL_OBJ_DSA_S1_STENCIL_FUNC(x) (((x) & 0x7) << 1) +#define VIRGL_OBJ_DSA_S1_STENCIL_FAIL_OP(x) (((x) & 0x7) << 4) +#define VIRGL_OBJ_DSA_S1_STENCIL_ZPASS_OP(x) (((x) & 0x7) << 7) +#define VIRGL_OBJ_DSA_S1_STENCIL_ZFAIL_OP(x) (((x) & 0x7) << 10) +#define VIRGL_OBJ_DSA_S1_STENCIL_VALUEMASK(x) (((x) & 0xff) << 13) +#define VIRGL_OBJ_DSA_S1_STENCIL_WRITEMASK(x) (((x) & 0xff) << 21) +#define VIRGL_OBJ_DSA_ALPHA_REF 5 + +/* offsets for rasterizer state */ +#define VIRGL_OBJ_RS_SIZE 9 +#define VIRGL_OBJ_RS_HANDLE 1 +#define VIRGL_OBJ_RS_S0 2 +#define VIRGL_OBJ_RS_S0_FLATSHADE(x) (((x) & 0x1) << 0) +#define VIRGL_OBJ_RS_S0_DEPTH_CLIP(x) (((x) & 0x1) << 1) +#define VIRGL_OBJ_RS_S0_CLIP_HALFZ(x) (((x) & 0x1) << 2) +#define VIRGL_OBJ_RS_S0_RASTERIZER_DISCARD(x) (((x) & 0x1) << 3) +#define VIRGL_OBJ_RS_S0_FLATSHADE_FIRST(x) (((x) & 0x1) << 4) +#define VIRGL_OBJ_RS_S0_LIGHT_TWOSIZE(x) (((x) & 0x1) << 5) +#define VIRGL_OBJ_RS_S0_SPRITE_COORD_MODE(x) (((x) & 0x1) << 6) +#define VIRGL_OBJ_RS_S0_POINT_QUAD_RASTERIZATION(x) (((x) & 0x1) << 7) +#define VIRGL_OBJ_RS_S0_CULL_FACE(x) (((x) & 0x3) << 8) +#define VIRGL_OBJ_RS_S0_FILL_FRONT(x) (((x) & 0x3) << 10) +#define VIRGL_OBJ_RS_S0_FILL_BACK(x) (((x) & 0x3) << 12) +#define VIRGL_OBJ_RS_S0_SCISSOR(x) (((x) & 0x1) << 14) +#define VIRGL_OBJ_RS_S0_FRONT_CCW(x) (((x) & 0x1) << 15) +#define VIRGL_OBJ_RS_S0_CLAMP_VERTEX_COLOR(x) (((x) & 0x1) << 16) +#define VIRGL_OBJ_RS_S0_CLAMP_FRAGMENT_COLOR(x) (((x) & 0x1) << 17) +#define VIRGL_OBJ_RS_S0_OFFSET_LINE(x) (((x) & 0x1) << 18) +#define VIRGL_OBJ_RS_S0_OFFSET_POINT(x) (((x) & 0x1) << 19) +#define VIRGL_OBJ_RS_S0_OFFSET_TRI(x) (((x) & 0x1) << 20) +#define VIRGL_OBJ_RS_S0_POLY_SMOOTH(x) (((x) & 0x1) << 21) +#define VIRGL_OBJ_RS_S0_POLY_STIPPLE_ENABLE(x) (((x) & 0x1) << 22) +#define VIRGL_OBJ_RS_S0_POINT_SMOOTH(x) (((x) & 0x1) << 23) +#define VIRGL_OBJ_RS_S0_POINT_SIZE_PER_VERTEX(x) (((x) & 0x1) << 24) +#define VIRGL_OBJ_RS_S0_MULTISAMPLE(x) (((x) & 0x1) << 25) +#define VIRGL_OBJ_RS_S0_LINE_SMOOTH(x) (((x) & 0x1) << 26) +#define VIRGL_OBJ_RS_S0_LINE_STIPPLE_ENABLE(x) (((x) & 0x1) << 27) +#define VIRGL_OBJ_RS_S0_LINE_LAST_PIXEL(x) (((x) & 0x1) << 28) +#define VIRGL_OBJ_RS_S0_HALF_PIXEL_CENTER(x) (((x) & 0x1) << 29) +#define VIRGL_OBJ_RS_S0_BOTTOM_EDGE_RULE(x) (((x) & 0x1) << 30) + +#define VIRGL_OBJ_RS_POINT_SIZE 3 +#define VIRGL_OBJ_RS_SPRITE_COORD_ENABLE 4 +#define VIRGL_OBJ_RS_S3 5 + +#define VIRGL_OBJ_RS_S3_LINE_STIPPLE_PATTERN(x) (((x) & 0xffff) << 0) +#define VIRGL_OBJ_RS_S3_LINE_STIPPLE_FACTOR(x) (((x) & 0xff) << 16) +#define VIRGL_OBJ_RS_S3_CLIP_PLANE_ENABLE(x) (((x) & 0xff) << 24) +#define VIRGL_OBJ_RS_LINE_WIDTH 6 +#define VIRGL_OBJ_RS_OFFSET_UNITS 7 +#define VIRGL_OBJ_RS_OFFSET_SCALE 8 +#define VIRGL_OBJ_RS_OFFSET_CLAMP 9 + +#define VIRGL_OBJ_CLEAR_SIZE 8 +#define VIRGL_OBJ_CLEAR_BUFFERS 1 +#define VIRGL_OBJ_CLEAR_COLOR_0 2 /* color is 4 * u32/f32/i32 */ +#define VIRGL_OBJ_CLEAR_COLOR_1 3 +#define VIRGL_OBJ_CLEAR_COLOR_2 4 +#define VIRGL_OBJ_CLEAR_COLOR_3 5 +#define VIRGL_OBJ_CLEAR_DEPTH_0 6 /* depth is a double precision float */ +#define VIRGL_OBJ_CLEAR_DEPTH_1 7 +#define VIRGL_OBJ_CLEAR_STENCIL 8 + +/* shader object */ +#define VIRGL_OBJ_SHADER_HDR_SIZE(nso) (5 + ((nso) ? (2 * nso) + 4 : 0)) +#define VIRGL_OBJ_SHADER_HANDLE 1 +#define VIRGL_OBJ_SHADER_TYPE 2 +#define VIRGL_OBJ_SHADER_OFFSET 3 +#define VIRGL_OBJ_SHADER_OFFSET_VAL(x) (((x) & 0x7fffffff) << 0) +/* start contains full length in VAL - also implies continuations */ +/* continuation contains offset in VAL */ +#define VIRGL_OBJ_SHADER_OFFSET_CONT (0x1 << 31) +#define VIRGL_OBJ_SHADER_NUM_TOKENS 4 +#define VIRGL_OBJ_SHADER_SO_NUM_OUTPUTS 5 +#define VIRGL_OBJ_SHADER_SO_STRIDE(x) (6 + (x)) +#define VIRGL_OBJ_SHADER_SO_OUTPUT0(x) (10 + (x * 2)) +#define VIRGL_OBJ_SHADER_SO_OUTPUT_REGISTER_INDEX(x) (((x) & 0xff) << 0) +#define VIRGL_OBJ_SHADER_SO_OUTPUT_START_COMPONENT(x) (((x) & 0x3) << 8) +#define VIRGL_OBJ_SHADER_SO_OUTPUT_NUM_COMPONENTS(x) (((x) & 0x7) << 10) +#define VIRGL_OBJ_SHADER_SO_OUTPUT_BUFFER(x) (((x) & 0x7) << 13) +#define VIRGL_OBJ_SHADER_SO_OUTPUT_DST_OFFSET(x) (((x) & 0xffff) << 16) +#define VIRGL_OBJ_SHADER_SO_OUTPUT0_SO(x) (11 + (x * 2)) +#define VIRGL_OBJ_SHADER_SO_OUTPUT_STREAM(x) (((x) & 0x03) << 0) + +/* viewport state */ +#define VIRGL_SET_VIEWPORT_STATE_SIZE(num_viewports) ((6 * num_viewports) + 1) +#define VIRGL_SET_VIEWPORT_START_SLOT 1 +#define VIRGL_SET_VIEWPORT_STATE_SCALE_0(x) (2 + (x * 6)) +#define VIRGL_SET_VIEWPORT_STATE_SCALE_1(x) (3 + (x * 6)) +#define VIRGL_SET_VIEWPORT_STATE_SCALE_2(x) (4 + (x * 6)) +#define VIRGL_SET_VIEWPORT_STATE_TRANSLATE_0(x) (5 + (x * 6)) +#define VIRGL_SET_VIEWPORT_STATE_TRANSLATE_1(x) (6 + (x * 6)) +#define VIRGL_SET_VIEWPORT_STATE_TRANSLATE_2(x) (7 + (x * 6)) + +/* framebuffer state */ +#define VIRGL_SET_FRAMEBUFFER_STATE_SIZE(nr_cbufs) (nr_cbufs + 2) +#define VIRGL_SET_FRAMEBUFFER_STATE_NR_CBUFS 1 +#define VIRGL_SET_FRAMEBUFFER_STATE_NR_ZSURF_HANDLE 2 +#define VIRGL_SET_FRAMEBUFFER_STATE_CBUF_HANDLE(x) ((x) + 3) + +/* vertex elements object */ +#define VIRGL_OBJ_VERTEX_ELEMENTS_SIZE(num_elements) (((num_elements) * 4) + 1) +#define VIRGL_OBJ_VERTEX_ELEMENTS_HANDLE 1 +#define VIRGL_OBJ_VERTEX_ELEMENTS_V0_SRC_OFFSET(x) (((x) * 4) + 2) /* repeated per VE */ +#define VIRGL_OBJ_VERTEX_ELEMENTS_V0_INSTANCE_DIVISOR(x) (((x) * 4) + 3) +#define VIRGL_OBJ_VERTEX_ELEMENTS_V0_VERTEX_BUFFER_INDEX(x) (((x) * 4) + 4) +#define VIRGL_OBJ_VERTEX_ELEMENTS_V0_SRC_FORMAT(x) (((x) * 4) + 5) + +/* vertex buffers */ +#define VIRGL_SET_VERTEX_BUFFERS_SIZE(num_buffers) ((num_buffers) * 3) +#define VIRGL_SET_VERTEX_BUFFER_STRIDE(x) (((x) * 3) + 1) +#define VIRGL_SET_VERTEX_BUFFER_OFFSET(x) (((x) * 3) + 2) +#define VIRGL_SET_VERTEX_BUFFER_HANDLE(x) (((x) * 3) + 3) + +/* index buffer */ +#define VIRGL_SET_INDEX_BUFFER_SIZE(ib) (((ib) ? 2 : 0) + 1) +#define VIRGL_SET_INDEX_BUFFER_HANDLE 1 +#define VIRGL_SET_INDEX_BUFFER_INDEX_SIZE 2 /* only if sending an IB handle */ +#define VIRGL_SET_INDEX_BUFFER_OFFSET 3 /* only if sending an IB handle */ + +/* constant buffer */ +#define VIRGL_SET_CONSTANT_BUFFER_SHADER_TYPE 1 +#define VIRGL_SET_CONSTANT_BUFFER_INDEX 2 +#define VIRGL_SET_CONSTANT_BUFFER_DATA_START 3 + +#define VIRGL_SET_UNIFORM_BUFFER_SIZE 5 +#define VIRGL_SET_UNIFORM_BUFFER_SHADER_TYPE 1 +#define VIRGL_SET_UNIFORM_BUFFER_INDEX 2 +#define VIRGL_SET_UNIFORM_BUFFER_OFFSET 3 +#define VIRGL_SET_UNIFORM_BUFFER_LENGTH 4 +#define VIRGL_SET_UNIFORM_BUFFER_RES_HANDLE 5 + +/* draw VBO */ +#define VIRGL_DRAW_VBO_SIZE 12 +#define VIRGL_DRAW_VBO_START 1 +#define VIRGL_DRAW_VBO_COUNT 2 +#define VIRGL_DRAW_VBO_MODE 3 +#define VIRGL_DRAW_VBO_INDEXED 4 +#define VIRGL_DRAW_VBO_INSTANCE_COUNT 5 +#define VIRGL_DRAW_VBO_INDEX_BIAS 6 +#define VIRGL_DRAW_VBO_START_INSTANCE 7 +#define VIRGL_DRAW_VBO_PRIMITIVE_RESTART 8 +#define VIRGL_DRAW_VBO_RESTART_INDEX 9 +#define VIRGL_DRAW_VBO_MIN_INDEX 10 +#define VIRGL_DRAW_VBO_MAX_INDEX 11 +#define VIRGL_DRAW_VBO_COUNT_FROM_SO 12 + +/* create surface */ +#define VIRGL_OBJ_SURFACE_SIZE 5 +#define VIRGL_OBJ_SURFACE_HANDLE 1 +#define VIRGL_OBJ_SURFACE_RES_HANDLE 2 +#define VIRGL_OBJ_SURFACE_FORMAT 3 +#define VIRGL_OBJ_SURFACE_BUFFER_FIRST_ELEMENT 4 +#define VIRGL_OBJ_SURFACE_BUFFER_LAST_ELEMENT 5 +#define VIRGL_OBJ_SURFACE_TEXTURE_LEVEL 4 +#define VIRGL_OBJ_SURFACE_TEXTURE_LAYERS 5 + +/* create streamout target */ +#define VIRGL_OBJ_STREAMOUT_SIZE 4 +#define VIRGL_OBJ_STREAMOUT_HANDLE 1 +#define VIRGL_OBJ_STREAMOUT_RES_HANDLE 2 +#define VIRGL_OBJ_STREAMOUT_BUFFER_OFFSET 3 +#define VIRGL_OBJ_STREAMOUT_BUFFER_SIZE 4 + +/* sampler state */ +#define VIRGL_OBJ_SAMPLER_STATE_SIZE 9 +#define VIRGL_OBJ_SAMPLER_STATE_HANDLE 1 +#define VIRGL_OBJ_SAMPLER_STATE_S0 2 +#define VIRGL_OBJ_SAMPLE_STATE_S0_WRAP_S(x) (((x) & 0x7) << 0) +#define VIRGL_OBJ_SAMPLE_STATE_S0_WRAP_T(x) (((x) & 0x7) << 3) +#define VIRGL_OBJ_SAMPLE_STATE_S0_WRAP_R(x) (((x) & 0x7) << 6) +#define VIRGL_OBJ_SAMPLE_STATE_S0_MIN_IMG_FILTER(x) (((x) & 0x3) << 9) +#define VIRGL_OBJ_SAMPLE_STATE_S0_MIN_MIP_FILTER(x) (((x) & 0x3) << 11) +#define VIRGL_OBJ_SAMPLE_STATE_S0_MAG_IMG_FILTER(x) (((x) & 0x3) << 13) +#define VIRGL_OBJ_SAMPLE_STATE_S0_COMPARE_MODE(x) (((x) & 0x1) << 15) +#define VIRGL_OBJ_SAMPLE_STATE_S0_COMPARE_FUNC(x) (((x) & 0x7) << 16) + +#define VIRGL_OBJ_SAMPLER_STATE_LOD_BIAS 3 +#define VIRGL_OBJ_SAMPLER_STATE_MIN_LOD 4 +#define VIRGL_OBJ_SAMPLER_STATE_MAX_LOD 5 +#define VIRGL_OBJ_SAMPLER_STATE_BORDER_COLOR(x) ((x) + 6) /* 6 - 9 */ + + +/* sampler view */ +#define VIRGL_OBJ_SAMPLER_VIEW_SIZE 6 +#define VIRGL_OBJ_SAMPLER_VIEW_HANDLE 1 +#define VIRGL_OBJ_SAMPLER_VIEW_RES_HANDLE 2 +#define VIRGL_OBJ_SAMPLER_VIEW_FORMAT 3 +#define VIRGL_OBJ_SAMPLER_VIEW_BUFFER_FIRST_ELEMENT 4 +#define VIRGL_OBJ_SAMPLER_VIEW_BUFFER_LAST_ELEMENT 5 +#define VIRGL_OBJ_SAMPLER_VIEW_TEXTURE_LAYER 4 +#define VIRGL_OBJ_SAMPLER_VIEW_TEXTURE_LEVEL 5 +#define VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE 6 +#define VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_R(x) (((x) & 0x7) << 0) +#define VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_G(x) (((x) & 0x7) << 3) +#define VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_B(x) (((x) & 0x7) << 6) +#define VIRGL_OBJ_SAMPLER_VIEW_SWIZZLE_A(x) (((x) & 0x7) << 9) + +/* set sampler views */ +#define VIRGL_SET_SAMPLER_VIEWS_SIZE(num_views) ((num_views) + 2) +#define VIRGL_SET_SAMPLER_VIEWS_SHADER_TYPE 1 +#define VIRGL_SET_SAMPLER_VIEWS_START_SLOT 2 +#define VIRGL_SET_SAMPLER_VIEWS_V0_HANDLE 3 + +/* bind sampler states */ +#define VIRGL_BIND_SAMPLER_STATES(num_states) ((num_states) + 2) +#define VIRGL_BIND_SAMPLER_STATES_SHADER_TYPE 1 +#define VIRGL_BIND_SAMPLER_STATES_START_SLOT 2 +#define VIRGL_BIND_SAMPLER_STATES_S0_HANDLE 3 + +/* set stencil reference */ +#define VIRGL_SET_STENCIL_REF_SIZE 1 +#define VIRGL_SET_STENCIL_REF 1 +#define VIRGL_STENCIL_REF_VAL(f, s) ((f & 0xff) | (((s & 0xff) << 8))) + +/* set blend color */ +#define VIRGL_SET_BLEND_COLOR_SIZE 4 +#define VIRGL_SET_BLEND_COLOR(x) ((x) + 1) + +/* set scissor state */ +#define VIRGL_SET_SCISSOR_STATE_SIZE(x) (1 + 2 * x) +#define VIRGL_SET_SCISSOR_START_SLOT 1 +#define VIRGL_SET_SCISSOR_MINX_MINY(x) (2 + (x * 2)) +#define VIRGL_SET_SCISSOR_MAXX_MAXY(x) (3 + (x * 2)) + +/* resource copy region */ +#define VIRGL_CMD_RESOURCE_COPY_REGION_SIZE 13 +#define VIRGL_CMD_RCR_DST_RES_HANDLE 1 +#define VIRGL_CMD_RCR_DST_LEVEL 2 +#define VIRGL_CMD_RCR_DST_X 3 +#define VIRGL_CMD_RCR_DST_Y 4 +#define VIRGL_CMD_RCR_DST_Z 5 +#define VIRGL_CMD_RCR_SRC_RES_HANDLE 6 +#define VIRGL_CMD_RCR_SRC_LEVEL 7 +#define VIRGL_CMD_RCR_SRC_X 8 +#define VIRGL_CMD_RCR_SRC_Y 9 +#define VIRGL_CMD_RCR_SRC_Z 10 +#define VIRGL_CMD_RCR_SRC_W 11 +#define VIRGL_CMD_RCR_SRC_H 12 +#define VIRGL_CMD_RCR_SRC_D 13 + +/* blit */ +#define VIRGL_CMD_BLIT_SIZE 21 +#define VIRGL_CMD_BLIT_S0 1 +#define VIRGL_CMD_BLIT_S0_MASK(x) (((x) & 0xff) << 0) +#define VIRGL_CMD_BLIT_S0_FILTER(x) (((x) & 0x3) << 8) +#define VIRGL_CMD_BLIT_S0_SCISSOR_ENABLE(x) (((x) & 0x1) << 10) +#define VIRGL_CMD_BLIT_SCISSOR_MINX_MINY 2 +#define VIRGL_CMD_BLIT_SCISSOR_MAXX_MAXY 3 +#define VIRGL_CMD_BLIT_DST_RES_HANDLE 4 +#define VIRGL_CMD_BLIT_DST_LEVEL 5 +#define VIRGL_CMD_BLIT_DST_FORMAT 6 +#define VIRGL_CMD_BLIT_DST_X 7 +#define VIRGL_CMD_BLIT_DST_Y 8 +#define VIRGL_CMD_BLIT_DST_Z 9 +#define VIRGL_CMD_BLIT_DST_W 10 +#define VIRGL_CMD_BLIT_DST_H 11 +#define VIRGL_CMD_BLIT_DST_D 12 +#define VIRGL_CMD_BLIT_SRC_RES_HANDLE 13 +#define VIRGL_CMD_BLIT_SRC_LEVEL 14 +#define VIRGL_CMD_BLIT_SRC_FORMAT 15 +#define VIRGL_CMD_BLIT_SRC_X 16 +#define VIRGL_CMD_BLIT_SRC_Y 17 +#define VIRGL_CMD_BLIT_SRC_Z 18 +#define VIRGL_CMD_BLIT_SRC_W 19 +#define VIRGL_CMD_BLIT_SRC_H 20 +#define VIRGL_CMD_BLIT_SRC_D 21 + +/* query object */ +#define VIRGL_OBJ_QUERY_SIZE 4 +#define VIRGL_OBJ_QUERY_HANDLE 1 +#define VIRGL_OBJ_QUERY_TYPE_INDEX 2 +#define VIRGL_OBJ_QUERY_TYPE(x) (x & 0xffff) +#define VIRGL_OBJ_QUERY_INDEX(x) ((x & 0xffff) << 16) +#define VIRGL_OBJ_QUERY_OFFSET 3 +#define VIRGL_OBJ_QUERY_RES_HANDLE 4 + +#define VIRGL_QUERY_BEGIN_HANDLE 1 + +#define VIRGL_QUERY_END_HANDLE 1 + +#define VIRGL_QUERY_RESULT_HANDLE 1 +#define VIRGL_QUERY_RESULT_WAIT 2 + +/* render condition */ +#define VIRGL_RENDER_CONDITION_SIZE 3 +#define VIRGL_RENDER_CONDITION_HANDLE 1 +#define VIRGL_RENDER_CONDITION_CONDITION 2 +#define VIRGL_RENDER_CONDITION_MODE 3 + +/* resource inline write */ +#define VIRGL_RESOURCE_IW_RES_HANDLE 1 +#define VIRGL_RESOURCE_IW_LEVEL 2 +#define VIRGL_RESOURCE_IW_USAGE 3 +#define VIRGL_RESOURCE_IW_STRIDE 4 +#define VIRGL_RESOURCE_IW_LAYER_STRIDE 5 +#define VIRGL_RESOURCE_IW_X 6 +#define VIRGL_RESOURCE_IW_Y 7 +#define VIRGL_RESOURCE_IW_Z 8 +#define VIRGL_RESOURCE_IW_W 9 +#define VIRGL_RESOURCE_IW_H 10 +#define VIRGL_RESOURCE_IW_D 11 +#define VIRGL_RESOURCE_IW_DATA_START 12 + +/* set streamout targets */ +#define VIRGL_SET_STREAMOUT_TARGETS_APPEND_BITMASK 1 +#define VIRGL_SET_STREAMOUT_TARGETS_H0 2 + +/* set sample mask */ +#define VIRGL_SET_SAMPLE_MASK_SIZE 1 +#define VIRGL_SET_SAMPLE_MASK_MASK 1 + +/* set clip state */ +#define VIRGL_SET_CLIP_STATE_SIZE 32 +#define VIRGL_SET_CLIP_STATE_C0 1 + +/* polygon stipple */ +#define VIRGL_POLYGON_STIPPLE_SIZE 32 +#define VIRGL_POLYGON_STIPPLE_P0 1 + +#define VIRGL_BIND_SHADER_SIZE 2 +#define VIRGL_BIND_SHADER_HANDLE 1 +#define VIRGL_BIND_SHADER_TYPE 2 + +#endif diff --git a/src/gallium/drivers/virgl/virgl_public.h b/src/gallium/drivers/virgl/virgl_public.h new file mode 100644 index 00000000000..a3ea560df7b --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_public.h @@ -0,0 +1,31 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef VIRGL_PUBLIC_H +#define VIRGL_PUBLIC_H + +struct pipe_screen; +struct virgl_winsys; + +struct pipe_screen * +virgl_create_screen(struct virgl_winsys *vws); +#endif diff --git a/src/gallium/drivers/virgl/virgl_query.c b/src/gallium/drivers/virgl/virgl_query.c new file mode 100644 index 00000000000..b0200556342 --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_query.c @@ -0,0 +1,175 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "virgl_context.h" +#include "virgl_encode.h" +#include "virgl_protocol.h" +#include "virgl_resource.h" + +struct virgl_query { + uint32_t handle; + struct virgl_resource *buf; + + unsigned index; + unsigned type; + unsigned result_size; + unsigned result_gotten_sent; +}; + +static inline struct virgl_query *virgl_query(struct pipe_query *q) +{ + return (struct virgl_query *)q; +} + +static void virgl_render_condition(struct pipe_context *ctx, + struct pipe_query *q, + boolean condition, + uint mode) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_query *query = virgl_query(q); + uint32_t handle = 0; + if (q) + handle = query->handle; + virgl_encoder_render_condition(vctx, handle, condition, mode); +} + +static struct pipe_query *virgl_create_query(struct pipe_context *ctx, + unsigned query_type, unsigned index) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_query *query; + uint32_t handle; + + query = CALLOC_STRUCT(virgl_query); + if (!query) + return NULL; + + query->buf = (struct virgl_resource *)pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, + PIPE_USAGE_STAGING, sizeof(struct virgl_host_query_state)); + if (!query->buf) { + FREE(query); + return NULL; + } + + handle = virgl_object_assign_handle(); + query->type = query_type; + query->index = index; + query->handle = handle; + query->buf->clean = FALSE; + virgl_encoder_create_query(vctx, handle, query_type, index, query->buf, 0); + + return (struct pipe_query *)query; +} + +static void virgl_destroy_query(struct pipe_context *ctx, + struct pipe_query *q) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_query *query = virgl_query(q); + + virgl_encode_delete_object(vctx, query->handle, VIRGL_OBJECT_QUERY); + + pipe_resource_reference((struct pipe_resource **)&query->buf, NULL); + FREE(query); +} + +static boolean virgl_begin_query(struct pipe_context *ctx, + struct pipe_query *q) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_query *query = virgl_query(q); + + query->buf->clean = FALSE; + virgl_encoder_begin_query(vctx, query->handle); + return true; +} + +static void virgl_end_query(struct pipe_context *ctx, + struct pipe_query *q) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_query *query = virgl_query(q); + struct pipe_box box; + + uint32_t qs = VIRGL_QUERY_STATE_WAIT_HOST; + u_box_1d(0, 4, &box); + virgl_transfer_inline_write(ctx, &query->buf->u.b, 0, PIPE_TRANSFER_WRITE, + &box, &qs, 0, 0); + + + virgl_encoder_end_query(vctx, query->handle); +} + +static boolean virgl_get_query_result(struct pipe_context *ctx, + struct pipe_query *q, + boolean wait, + union pipe_query_result *result) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_query *query = virgl_query(q); + struct pipe_transfer *transfer; + struct virgl_host_query_state *host_state; + + /* ask host for query result */ + if (!query->result_gotten_sent) { + query->result_gotten_sent = 1; + virgl_encoder_get_query_result(vctx, query->handle, 0); + ctx->flush(ctx, NULL, 0); + } + + /* do we have to flush? */ + /* now we can do the transfer to get the result back? */ + remap: + host_state = pipe_buffer_map(ctx, &query->buf->u.b, + PIPE_TRANSFER_READ, &transfer); + + if (host_state->query_state != VIRGL_QUERY_STATE_DONE) { + pipe_buffer_unmap(ctx, transfer); + if (wait) + goto remap; + else + return FALSE; + } + + if (query->type == PIPE_QUERY_TIMESTAMP || query->type == PIPE_QUERY_TIME_ELAPSED) + result->u64 = host_state->result; + else + result->u64 = (uint32_t)host_state->result; + + pipe_buffer_unmap(ctx, transfer); + query->result_gotten_sent = 0; + return TRUE; +} + +void virgl_init_query_functions(struct virgl_context *vctx) +{ + vctx->base.render_condition = virgl_render_condition; + vctx->base.create_query = virgl_create_query; + vctx->base.destroy_query = virgl_destroy_query; + vctx->base.begin_query = virgl_begin_query; + vctx->base.end_query = virgl_end_query; + vctx->base.get_query_result = virgl_get_query_result; +} diff --git a/src/gallium/drivers/virgl/virgl_resource.c b/src/gallium/drivers/virgl/virgl_resource.c new file mode 100644 index 00000000000..0b2fc4ec497 --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_resource.c @@ -0,0 +1,90 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "util/u_inlines.h" +#include "virgl_context.h" +#include "virgl_resource.h" +#include "virgl_screen.h" + +bool virgl_res_needs_flush_wait(struct virgl_context *vctx, + struct virgl_resource *res, + unsigned usage) +{ + struct virgl_screen *vs = virgl_screen(vctx->base.screen); + + if ((!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) && vs->vws->res_is_referenced(vs->vws, vctx->cbuf, res->hw_res)) { + return true; + } + return false; +} + +bool virgl_res_needs_readback(struct virgl_context *vctx, + struct virgl_resource *res, + unsigned usage) +{ + bool readback = true; + if (res->clean) + readback = false; + else if (usage & PIPE_TRANSFER_DISCARD_RANGE) + readback = false; + else if ((usage & (PIPE_TRANSFER_WRITE | PIPE_TRANSFER_FLUSH_EXPLICIT)) == + (PIPE_TRANSFER_WRITE | PIPE_TRANSFER_FLUSH_EXPLICIT)) + readback = false; + return readback; +} + +static struct pipe_resource *virgl_resource_create(struct pipe_screen *screen, + const struct pipe_resource *templ) +{ + struct virgl_screen *vs = virgl_screen(screen); + if (templ->target == PIPE_BUFFER) + return virgl_buffer_create(vs, templ); + else + return virgl_texture_create(vs, templ); +} + +static struct pipe_resource *virgl_resource_from_handle(struct pipe_screen *screen, + const struct pipe_resource *templ, + struct winsys_handle *whandle) +{ + struct virgl_screen *vs = virgl_screen(screen); + if (templ->target == PIPE_BUFFER) + return NULL; + else + return virgl_texture_from_handle(vs, templ, whandle); +} + +void virgl_init_screen_resource_functions(struct pipe_screen *screen) +{ + screen->resource_create = virgl_resource_create; + screen->resource_from_handle = virgl_resource_from_handle; + screen->resource_get_handle = u_resource_get_handle_vtbl; + screen->resource_destroy = u_resource_destroy_vtbl; +} + +void virgl_init_context_resource_functions(struct pipe_context *ctx) +{ + ctx->transfer_map = u_transfer_map_vtbl; + ctx->transfer_flush_region = u_transfer_flush_region_vtbl; + ctx->transfer_unmap = u_transfer_unmap_vtbl; + ctx->transfer_inline_write = u_transfer_inline_write_vtbl; +} diff --git a/src/gallium/drivers/virgl/virgl_resource.h b/src/gallium/drivers/virgl/virgl_resource.h new file mode 100644 index 00000000000..bab9bcb9b4e --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_resource.h @@ -0,0 +1,146 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef VIRGL_RESOURCE_H +#define VIRGL_RESOURCE_H + +#include "util/u_resource.h" +#include "util/u_range.h" +#include "util/list.h" +#include "util/u_transfer.h" + +#include "virgl_hw.h" +#define VR_MAX_TEXTURE_2D_LEVELS 15 + +struct winsys_handle; +struct virgl_screen; +struct virgl_context; + +struct virgl_resource { + struct u_resource u; + struct virgl_hw_res *hw_res; + boolean clean; +}; + +struct virgl_buffer { + struct virgl_resource base; + + struct list_head flush_list; + boolean on_list; + + /* The buffer range which is initialized (with a write transfer, + * streamout, DMA, or as a random access target). The rest of + * the buffer is considered invalid and can be mapped unsynchronized. + * + * This allows unsychronized mapping of a buffer range which hasn't + * been used yet. It's for applications which forget to use + * the unsynchronized map flag and expect the driver to figure it out. + */ + struct util_range valid_buffer_range; +}; + +struct virgl_texture { + struct virgl_resource base; + + unsigned long level_offset[VR_MAX_TEXTURE_2D_LEVELS]; + unsigned stride[VR_MAX_TEXTURE_2D_LEVELS]; +}; + +struct virgl_transfer { + struct pipe_transfer base; + uint32_t offset; + struct virgl_resource *resolve_tmp; +}; + +void virgl_resource_destroy(struct pipe_screen *screen, + struct pipe_resource *resource); + +void virgl_init_screen_resource_functions(struct pipe_screen *screen); + +void virgl_init_context_resource_functions(struct pipe_context *ctx); + +struct pipe_resource *virgl_texture_create(struct virgl_screen *vs, + const struct pipe_resource *templ); + +struct pipe_resource *virgl_texture_from_handle(struct virgl_screen *vs, + const struct pipe_resource *templ, + struct winsys_handle *whandle); + +static inline struct virgl_resource *virgl_resource(struct pipe_resource *r) +{ + return (struct virgl_resource *)r; +} + +static inline struct virgl_buffer *virgl_buffer(struct pipe_resource *r) +{ + return (struct virgl_buffer *)r; +} + +static inline struct virgl_texture *virgl_texture(struct pipe_resource *r) +{ + return (struct virgl_texture *)r; +} + +static inline struct virgl_transfer *virgl_transfer(struct pipe_transfer *trans) +{ + return (struct virgl_transfer *)trans; +} + +struct pipe_resource *virgl_buffer_create(struct virgl_screen *vs, + const struct pipe_resource *templ); + +static inline unsigned pipe_to_virgl_bind(unsigned pbind) +{ + unsigned outbind = 0; + if (pbind & PIPE_BIND_DEPTH_STENCIL) + outbind |= VIRGL_BIND_DEPTH_STENCIL; + if (pbind & PIPE_BIND_RENDER_TARGET) + outbind |= VIRGL_BIND_RENDER_TARGET; + if (pbind & PIPE_BIND_SAMPLER_VIEW) + outbind |= VIRGL_BIND_SAMPLER_VIEW; + if (pbind & PIPE_BIND_VERTEX_BUFFER) + outbind |= VIRGL_BIND_VERTEX_BUFFER; + if (pbind & PIPE_BIND_INDEX_BUFFER) + outbind |= VIRGL_BIND_INDEX_BUFFER; + if (pbind & PIPE_BIND_CONSTANT_BUFFER) + outbind |= VIRGL_BIND_CONSTANT_BUFFER; + if (pbind & PIPE_BIND_DISPLAY_TARGET) + outbind |= VIRGL_BIND_DISPLAY_TARGET; + if (pbind & PIPE_BIND_STREAM_OUTPUT) + outbind |= VIRGL_BIND_STREAM_OUTPUT; + if (pbind & PIPE_BIND_CURSOR) + outbind |= VIRGL_BIND_CURSOR; + if (pbind & PIPE_BIND_CUSTOM) + outbind |= VIRGL_BIND_CUSTOM; + if (pbind & PIPE_BIND_SCANOUT) + outbind |= VIRGL_BIND_SCANOUT; + return outbind; +} + +bool virgl_res_needs_flush_wait(struct virgl_context *vctx, + struct virgl_resource *res, + unsigned usage); +bool virgl_res_needs_readback(struct virgl_context *vctx, + struct virgl_resource *res, + unsigned usage); +#endif diff --git a/src/gallium/drivers/virgl/virgl_screen.c b/src/gallium/drivers/virgl/virgl_screen.c new file mode 100644 index 00000000000..cca379d47ab --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_screen.c @@ -0,0 +1,553 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "util/u_memory.h" +#include "util/u_format.h" +#include "util/u_format_s3tc.h" +#include "util/u_video.h" +#include "os/os_time.h" +#include "pipe/p_defines.h" +#include "pipe/p_screen.h" +#include "draw/draw_context.h" + +#include "tgsi/tgsi_exec.h" + +#include "virgl_screen.h" +#include "virgl_resource.h" +#include "virgl_public.h" +#include "virgl_context.h" + +#define SP_MAX_TEXTURE_2D_LEVELS 15 /* 16K x 16K */ +#define SP_MAX_TEXTURE_3D_LEVELS 9 /* 512 x 512 x 512 */ +#define SP_MAX_TEXTURE_CUBE_LEVELS 13 /* 4K x 4K */ + +static const char * +virgl_get_vendor(struct pipe_screen *screen) +{ + return "Red Hat"; +} + + +static const char * +virgl_get_name(struct pipe_screen *screen) +{ + return "virgl"; +} + +static int +virgl_get_param(struct pipe_screen *screen, enum pipe_cap param) +{ + struct virgl_screen *vscreen = virgl_screen(screen); + switch (param) { + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_SM3: + return 1; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 1; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return vscreen->caps.caps.v1.max_render_targets; + case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: + return vscreen->caps.caps.v1.max_dual_source_render_targets; + case PIPE_CAP_OCCLUSION_QUERY: + return vscreen->caps.caps.v1.bset.occlusion_query; + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + return vscreen->caps.caps.v1.bset.mirror_clamp; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_TEXTURE_SWIZZLE: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return SP_MAX_TEXTURE_2D_LEVELS; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return SP_MAX_TEXTURE_3D_LEVELS; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return SP_MAX_TEXTURE_CUBE_LEVELS; + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + return 1; + case PIPE_CAP_INDEP_BLEND_ENABLE: + return vscreen->caps.caps.v1.bset.indep_blend_enable; + case PIPE_CAP_INDEP_BLEND_FUNC: + return vscreen->caps.caps.v1.bset.indep_blend_func; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + return vscreen->caps.caps.v1.bset.fragment_coord_conventions; + case PIPE_CAP_DEPTH_CLIP_DISABLE: + return vscreen->caps.caps.v1.bset.depth_clip_disable; + case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: + return vscreen->caps.caps.v1.max_streamout_buffers; + case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: + case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: + return 16*4; + case PIPE_CAP_PRIMITIVE_RESTART: + return vscreen->caps.caps.v1.bset.primitive_restart; + case PIPE_CAP_SHADER_STENCIL_EXPORT: + return vscreen->caps.caps.v1.bset.shader_stencil_export; + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: + return 1; + case PIPE_CAP_SEAMLESS_CUBE_MAP: + return vscreen->caps.caps.v1.bset.seamless_cube_map; + case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + return vscreen->caps.caps.v1.bset.seamless_cube_map_per_texture; + case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: + return vscreen->caps.caps.v1.max_texture_array_layers; + case PIPE_CAP_MIN_TEXEL_OFFSET: + case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET: + return -8; + case PIPE_CAP_MAX_TEXEL_OFFSET: + case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET: + return 7; + case PIPE_CAP_CONDITIONAL_RENDER: + return vscreen->caps.caps.v1.bset.conditional_render; + case PIPE_CAP_TEXTURE_BARRIER: + return 0; + case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: + return 1; + case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: + case PIPE_CAP_VERTEX_COLOR_CLAMPED: + return vscreen->caps.caps.v1.bset.color_clamping; + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: + return 1; + case PIPE_CAP_GLSL_FEATURE_LEVEL: + return vscreen->caps.caps.v1.glsl_level; + case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: + return 0; + case PIPE_CAP_COMPUTE: + return 0; + case PIPE_CAP_USER_VERTEX_BUFFERS: + return 0; + case PIPE_CAP_USER_INDEX_BUFFERS: + case PIPE_CAP_USER_CONSTANT_BUFFERS: + return 1; + case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: + return 16; + case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: + return vscreen->caps.caps.v1.bset.streamout_pause_resume; + case PIPE_CAP_START_INSTANCE: + return vscreen->caps.caps.v1.bset.start_instance; + case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: + case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: + return 0; + case PIPE_CAP_QUERY_TIMESTAMP: + return 1; + case PIPE_CAP_QUERY_TIME_ELAPSED: + return 0; + case PIPE_CAP_TGSI_TEXCOORD: + return 0; + case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: + return VIRGL_MAP_BUFFER_ALIGNMENT; + case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: + return vscreen->caps.caps.v1.max_tbo_size > 0; + case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: + return 0; + case PIPE_CAP_CUBE_MAP_ARRAY: + return vscreen->caps.caps.v1.bset.cube_map_array; + case PIPE_CAP_TEXTURE_MULTISAMPLE: + return vscreen->caps.caps.v1.bset.texture_multisample; + case PIPE_CAP_MAX_VIEWPORTS: + return vscreen->caps.caps.v1.max_viewports; + case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: + return vscreen->caps.caps.v1.max_tbo_size; + case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: + case PIPE_CAP_QUERY_PIPELINE_STATISTICS: + case PIPE_CAP_ENDIANNESS: + return 0; + case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: + return 1; + case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: + return 0; + case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES: + return 1024; + case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS: + return 16384; + case PIPE_CAP_TEXTURE_QUERY_LOD: + return vscreen->caps.caps.v1.bset.texture_query_lod; + case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: + return vscreen->caps.caps.v1.max_texture_gather_components; + case PIPE_CAP_TEXTURE_GATHER_SM5: + case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: + case PIPE_CAP_SAMPLE_SHADING: + case PIPE_CAP_FAKE_SW_MSAA: + case PIPE_CAP_TEXTURE_GATHER_OFFSETS: + case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: + case PIPE_CAP_MAX_VERTEX_STREAMS: + case PIPE_CAP_DRAW_INDIRECT: + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: + case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: + case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE: + case PIPE_CAP_SAMPLER_VIEW_TARGET: + case PIPE_CAP_CLIP_HALFZ: + case PIPE_CAP_VERTEXID_NOBASE: + case PIPE_CAP_POLYGON_OFFSET_CLAMP: + case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: + case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: + case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: + case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: + case PIPE_CAP_TEXTURE_FLOAT_LINEAR: + case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: + case PIPE_CAP_DEPTH_BOUNDS_TEST: + case PIPE_CAP_TGSI_TXQS: + case PIPE_CAP_FORCE_PERSAMPLE_INTERP: + case PIPE_CAP_SHAREABLE_SHADERS: + return 0; + case PIPE_CAP_VENDOR_ID: + return 0x1af4; + case PIPE_CAP_DEVICE_ID: + return 0x1010; + case PIPE_CAP_ACCELERATED: + return 1; + case PIPE_CAP_UMA: + case PIPE_CAP_VIDEO_MEMORY: + return 0; + } + /* should only get here on unhandled cases */ + debug_printf("Unexpected PIPE_CAP %d query\n", param); + return 0; +} + +static int +virgl_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap param) +{ + struct virgl_screen *vscreen = virgl_screen(screen); + switch(shader) + { + case PIPE_SHADER_FRAGMENT: + case PIPE_SHADER_VERTEX: + case PIPE_SHADER_GEOMETRY: + switch (param) { + case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: + return INT_MAX; + case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: + case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: + return 1; + case PIPE_SHADER_CAP_MAX_INPUTS: + if (vscreen->caps.caps.v1.glsl_level < 150) + return 16; + return shader == PIPE_SHADER_VERTEX ? 16 : 32; + case PIPE_SHADER_CAP_MAX_OUTPUTS: + return 128; + // case PIPE_SHADER_CAP_MAX_CONSTS: + // return 4096; + case PIPE_SHADER_CAP_MAX_TEMPS: + return 256; + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: + return vscreen->caps.caps.v1.max_uniform_blocks; + // case PIPE_SHADER_CAP_MAX_ADDRS: + // return 1; + case PIPE_SHADER_CAP_MAX_PREDS: + return 0; + case PIPE_SHADER_CAP_SUBROUTINES: + return 1; + case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: + return 16; + case PIPE_SHADER_CAP_INTEGERS: + return vscreen->caps.caps.v1.glsl_level >= 130; + case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: + return 32; + case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: + return 4096 * sizeof(float[4]); + default: + return 0; + } + default: + return 0; + } +} + +static float +virgl_get_paramf(struct pipe_screen *screen, enum pipe_capf param) +{ + switch (param) { + case PIPE_CAPF_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAPF_MAX_LINE_WIDTH_AA: + return 255.0; /* arbitrary */ + case PIPE_CAPF_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAPF_MAX_POINT_WIDTH_AA: + return 255.0; /* arbitrary */ + case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: + return 16.0; + case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: + return 16.0; /* arbitrary */ + case PIPE_CAPF_GUARD_BAND_LEFT: + case PIPE_CAPF_GUARD_BAND_TOP: + case PIPE_CAPF_GUARD_BAND_RIGHT: + case PIPE_CAPF_GUARD_BAND_BOTTOM: + return 0.0; + } + /* should only get here on unhandled cases */ + debug_printf("Unexpected PIPE_CAPF %d query\n", param); + return 0.0; +} + +static boolean +virgl_is_vertex_format_supported(struct pipe_screen *screen, + enum pipe_format format) +{ + struct virgl_screen *vscreen = virgl_screen(screen); + const struct util_format_description *format_desc; + int i; + + format_desc = util_format_description(format); + if (!format_desc) + return FALSE; + + if (format == PIPE_FORMAT_R11G11B10_FLOAT) { + int vformat = VIRGL_FORMAT_R11G11B10_FLOAT; + int big = vformat / 32; + int small = vformat % 32; + if (!(vscreen->caps.caps.v1.vertexbuffer.bitmask[big] & (1 << small))) + return FALSE; + return TRUE; + } + + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (format_desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + if (i == 4) + return FALSE; + + if (format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) + return FALSE; + + if (format_desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED) + return FALSE; + return TRUE; +} + +/** + * Query format support for creating a texture, drawing surface, etc. + * \param format the format to test + * \param type one of PIPE_TEXTURE, PIPE_SURFACE + */ +static boolean +virgl_is_format_supported( struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned bind) +{ + struct virgl_screen *vscreen = virgl_screen(screen); + const struct util_format_description *format_desc; + int i; + + assert(target == PIPE_BUFFER || + target == PIPE_TEXTURE_1D || + target == PIPE_TEXTURE_1D_ARRAY || + target == PIPE_TEXTURE_2D || + target == PIPE_TEXTURE_2D_ARRAY || + target == PIPE_TEXTURE_RECT || + target == PIPE_TEXTURE_3D || + target == PIPE_TEXTURE_CUBE || + target == PIPE_TEXTURE_CUBE_ARRAY); + + format_desc = util_format_description(format); + if (!format_desc) + return FALSE; + + if (util_format_is_intensity(format)) + return FALSE; + + if (sample_count > 1) { + if (!vscreen->caps.caps.v1.bset.texture_multisample) + return FALSE; + if (sample_count > vscreen->caps.caps.v1.max_samples) + return FALSE; + } + + if (bind & PIPE_BIND_VERTEX_BUFFER) { + return virgl_is_vertex_format_supported(screen, format); + } + + if (bind & PIPE_BIND_RENDER_TARGET) { + if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) + return FALSE; + + /* + * Although possible, it is unnatural to render into compressed or YUV + * surfaces. So disable these here to avoid going into weird paths + * inside the state trackers. + */ + if (format_desc->block.width != 1 || + format_desc->block.height != 1) + return FALSE; + + { + int big = format / 32; + int small = format % 32; + if (!(vscreen->caps.caps.v1.render.bitmask[big] & (1 << small))) + return FALSE; + } + } + + if (bind & PIPE_BIND_DEPTH_STENCIL) { + if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) + return FALSE; + } + + /* + * All other operations (sampling, transfer, etc). + */ + + if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { + if (util_format_s3tc_enabled) + goto out_lookup; + return FALSE; + } + if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { + goto out_lookup; + } + + if (format == PIPE_FORMAT_R11G11B10_FLOAT) { + goto out_lookup; + } else if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) { + goto out_lookup; + } + + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (format_desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + + if (i == 4) + return FALSE; + + /* no L4A4 */ + if (format_desc->nr_channels < 4 && format_desc->channel[i].size == 4) + return FALSE; + + out_lookup: + { + int big = format / 32; + int small = format % 32; + if (!(vscreen->caps.caps.v1.sampler.bitmask[big] & (1 << small))) + return FALSE; + } + /* + * Everything else should be supported by u_format. + */ + return TRUE; +} + +static void virgl_flush_frontbuffer(struct pipe_screen *screen, + struct pipe_resource *res, + unsigned level, unsigned layer, + void *winsys_drawable_handle, struct pipe_box *sub_box) +{ + struct virgl_screen *vscreen = virgl_screen(screen); + struct virgl_winsys *vws = vscreen->vws; + struct virgl_resource *vres = virgl_resource(res); + + if (vws->flush_frontbuffer) + vws->flush_frontbuffer(vws, vres->hw_res, level, layer, winsys_drawable_handle, + sub_box); +} + +static void virgl_fence_reference(struct pipe_screen *screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + struct virgl_screen *vscreen = virgl_screen(screen); + struct virgl_winsys *vws = vscreen->vws; + + vws->fence_reference(vws, ptr, fence); +} + +static boolean virgl_fence_finish(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + uint64_t timeout) +{ + struct virgl_screen *vscreen = virgl_screen(screen); + struct virgl_winsys *vws = vscreen->vws; + + return vws->fence_wait(vws, fence, timeout); +} + +static uint64_t +virgl_get_timestamp(struct pipe_screen *_screen) +{ + return os_time_get_nano(); +} + +static void +virgl_destroy_screen(struct pipe_screen *screen) +{ + struct virgl_screen *vscreen = virgl_screen(screen); + struct virgl_winsys *vws = vscreen->vws; + + if (vws) + vws->destroy(vws); + FREE(vscreen); +} + +struct pipe_screen * +virgl_create_screen(struct virgl_winsys *vws) +{ + struct virgl_screen *screen = CALLOC_STRUCT(virgl_screen); + + if (!screen) + return NULL; + + screen->vws = vws; + screen->base.get_name = virgl_get_name; + screen->base.get_vendor = virgl_get_vendor; + screen->base.get_param = virgl_get_param; + screen->base.get_shader_param = virgl_get_shader_param; + screen->base.get_paramf = virgl_get_paramf; + screen->base.is_format_supported = virgl_is_format_supported; + screen->base.destroy = virgl_destroy_screen; + screen->base.context_create = virgl_context_create; + screen->base.flush_frontbuffer = virgl_flush_frontbuffer; + screen->base.get_timestamp = virgl_get_timestamp; + screen->base.fence_reference = virgl_fence_reference; + //screen->base.fence_signalled = virgl_fence_signalled; + screen->base.fence_finish = virgl_fence_finish; + + virgl_init_screen_resource_functions(&screen->base); + + vws->get_caps(vws, &screen->caps); + + + util_format_s3tc_init(); + return &screen->base; +} diff --git a/src/gallium/drivers/virgl/virgl_screen.h b/src/gallium/drivers/virgl/virgl_screen.h new file mode 100644 index 00000000000..52e72ca4958 --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_screen.h @@ -0,0 +1,47 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef VIRGL_H +#define VIRGL_H + +#include "pipe/p_screen.h" +#include "virgl_winsys.h" + +struct virgl_screen { + struct pipe_screen base; + struct virgl_winsys *vws; + + struct virgl_drm_caps caps; + + uint32_t sub_ctx_id; +}; + + +static inline struct virgl_screen * +virgl_screen(struct pipe_screen *pipe) +{ + return (struct virgl_screen *)pipe; +} + +#define VIRGL_MAP_BUFFER_ALIGNMENT 64 + +#endif diff --git a/src/gallium/drivers/virgl/virgl_streamout.c b/src/gallium/drivers/virgl/virgl_streamout.c new file mode 100644 index 00000000000..b6a65fff29e --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_streamout.c @@ -0,0 +1,88 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "virgl_context.h" +#include "virgl_encode.h" +#include "virgl_protocol.h" +#include "virgl_resource.h" + +static struct pipe_stream_output_target *virgl_create_so_target( + struct pipe_context *ctx, + struct pipe_resource *buffer, + unsigned buffer_offset, + unsigned buffer_size) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_resource *res = virgl_resource(buffer); + struct virgl_so_target *t = CALLOC_STRUCT(virgl_so_target); + uint32_t handle; + + if (!t) + return NULL; + handle = virgl_object_assign_handle(); + + t->base.reference.count = 1; + t->base.context = ctx; + pipe_resource_reference(&t->base.buffer, buffer); + t->base.buffer_offset = buffer_offset; + t->base.buffer_size = buffer_size; + t->handle = handle; + res->clean = FALSE; + virgl_encoder_create_so_target(vctx, handle, res, buffer_offset, buffer_size); + return &t->base; +} + +static void virgl_destroy_so_target(struct pipe_context *ctx, + struct pipe_stream_output_target *target) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_so_target *t = virgl_so_target(target); + + pipe_resource_reference(&t->base.buffer, NULL); + virgl_encode_delete_object(vctx, t->handle, VIRGL_OBJECT_STREAMOUT_TARGET); + FREE(t); +} + +static void virgl_set_so_targets(struct pipe_context *ctx, + unsigned num_targets, + struct pipe_stream_output_target **targets, + const unsigned *offset) +{ + struct virgl_context *vctx = virgl_context(ctx); + int i; + for (i = 0; i < num_targets; i++) { + pipe_resource_reference(&vctx->so_targets[i].base.buffer, targets[i]->buffer); + } + for (i = num_targets; i < vctx->num_so_targets; i++) + pipe_resource_reference(&vctx->so_targets[i].base.buffer, NULL); + vctx->num_so_targets = num_targets; + virgl_encoder_set_so_targets(vctx, num_targets, targets, 0);//append_bitmask); +} + +void virgl_init_so_functions(struct virgl_context *vctx) +{ + vctx->base.create_stream_output_target = virgl_create_so_target; + vctx->base.stream_output_target_destroy = virgl_destroy_so_target; + vctx->base.set_stream_output_targets = virgl_set_so_targets; +} diff --git a/src/gallium/drivers/virgl/virgl_texture.c b/src/gallium/drivers/virgl/virgl_texture.c new file mode 100644 index 00000000000..31189626144 --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_texture.c @@ -0,0 +1,351 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "util/u_format.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" + +#include "virgl_context.h" +#include "virgl_resource.h" +#include "virgl_screen.h" + +static void virgl_copy_region_with_blit(struct pipe_context *pipe, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box) +{ + struct pipe_blit_info blit; + + memset(&blit, 0, sizeof(blit)); + blit.src.resource = src; + blit.src.format = src->format; + blit.src.level = src_level; + blit.src.box = *src_box; + blit.dst.resource = dst; + blit.dst.format = dst->format; + blit.dst.level = dst_level; + blit.dst.box.x = dstx; + blit.dst.box.y = dsty; + blit.dst.box.z = dstz; + blit.dst.box.width = src_box->width; + blit.dst.box.height = src_box->height; + blit.dst.box.depth = src_box->depth; + blit.mask = util_format_get_mask(src->format) & + util_format_get_mask(dst->format); + blit.filter = PIPE_TEX_FILTER_NEAREST; + + if (blit.mask) { + pipe->blit(pipe, &blit); + } +} +static void virgl_init_temp_resource_from_box(struct pipe_resource *res, + struct pipe_resource *orig, + const struct pipe_box *box, + unsigned level, unsigned flags) +{ + memset(res, 0, sizeof(*res)); + res->format = orig->format; + res->width0 = box->width; + res->height0 = box->height; + res->depth0 = 1; + res->array_size = 1; + res->usage = PIPE_USAGE_STAGING; + res->flags = flags; + + /* We must set the correct texture target and dimensions for a 3D box. */ + if (box->depth > 1 && util_max_layer(orig, level) > 0) + res->target = orig->target; + else + res->target = PIPE_TEXTURE_2D; + + switch (res->target) { + case PIPE_TEXTURE_1D_ARRAY: + case PIPE_TEXTURE_2D_ARRAY: + case PIPE_TEXTURE_CUBE_ARRAY: + res->array_size = box->depth; + break; + case PIPE_TEXTURE_3D: + res->depth0 = box->depth; + break; + default: + break; + } +} + +static unsigned +vrend_get_tex_image_offset(const struct virgl_texture *res, + unsigned level, unsigned layer) +{ + const struct pipe_resource *pres = &res->base.u.b; + const unsigned hgt = u_minify(pres->height0, level); + const unsigned nblocksy = util_format_get_nblocksy(pres->format, hgt); + unsigned offset = res->level_offset[level]; + + if (pres->target == PIPE_TEXTURE_CUBE || + pres->target == PIPE_TEXTURE_CUBE_ARRAY || + pres->target == PIPE_TEXTURE_3D || + pres->target == PIPE_TEXTURE_2D_ARRAY) { + offset += layer * nblocksy * res->stride[level]; + } + else if (pres->target == PIPE_TEXTURE_1D_ARRAY) { + offset += layer * res->stride[level]; + } + else { + assert(layer == 0); + } + + return offset; +} + +static void *virgl_texture_transfer_map(struct pipe_context *ctx, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **transfer) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_screen *vs = virgl_screen(ctx->screen); + struct virgl_texture *vtex = virgl_texture(resource); + enum pipe_format format = resource->format; + struct virgl_transfer *trans; + void *ptr; + boolean readback = TRUE; + uint32_t offset; + struct virgl_hw_res *hw_res; + const unsigned h = u_minify(vtex->base.u.b.height0, level); + const unsigned nblocksy = util_format_get_nblocksy(format, h); + bool is_depth = util_format_has_depth(util_format_description(resource->format)); + uint32_t l_stride; + bool doflushwait; + + doflushwait = virgl_res_needs_flush_wait(vctx, &vtex->base, usage); + if (doflushwait) + ctx->flush(ctx, NULL, 0); + + trans = util_slab_alloc(&vctx->texture_transfer_pool); + if (trans == NULL) + return NULL; + + trans->base.resource = resource; + trans->base.level = level; + trans->base.usage = usage; + trans->base.box = *box; + trans->base.stride = vtex->stride[level]; + trans->base.layer_stride = trans->base.stride * nblocksy; + + if (resource->target != PIPE_TEXTURE_3D && + resource->target != PIPE_TEXTURE_CUBE && + resource->target != PIPE_TEXTURE_1D_ARRAY && + resource->target != PIPE_TEXTURE_2D_ARRAY && + resource->target != PIPE_TEXTURE_CUBE_ARRAY) + l_stride = 0; + else + l_stride = trans->base.layer_stride; + + if (is_depth && resource->nr_samples > 1) { + struct pipe_resource tmp_resource; + virgl_init_temp_resource_from_box(&tmp_resource, resource, box, + level, 0); + + trans->resolve_tmp = (struct virgl_resource *)ctx->screen->resource_create(ctx->screen, &tmp_resource); + + virgl_copy_region_with_blit(ctx, &trans->resolve_tmp->u.b, 0, 0, 0, 0, resource, level, box); + ctx->flush(ctx, NULL, 0); + /* we want to do a resolve blit into the temporary */ + hw_res = trans->resolve_tmp->hw_res; + offset = 0; + } else { + offset = vrend_get_tex_image_offset(vtex, level, box->z); + + offset += box->y / util_format_get_blockheight(format) * trans->base.stride + + box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); + hw_res = vtex->base.hw_res; + trans->resolve_tmp = NULL; + } + + readback = virgl_res_needs_readback(vctx, &vtex->base, usage); + if (readback) + vs->vws->transfer_get(vs->vws, hw_res, box, trans->base.stride, l_stride, offset, level); + + if (doflushwait || readback) + vs->vws->resource_wait(vs->vws, vtex->base.hw_res); + + ptr = vs->vws->resource_map(vs->vws, hw_res); + if (!ptr) { + return NULL; + } + + trans->offset = offset; + *transfer = &trans->base; + + return ptr + trans->offset; +} + +static void virgl_texture_transfer_unmap(struct pipe_context *ctx, + struct pipe_transfer *transfer) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_transfer *trans = virgl_transfer(transfer); + struct virgl_texture *vtex = virgl_texture(transfer->resource); + uint32_t l_stride; + + if (transfer->resource->target != PIPE_TEXTURE_3D && + transfer->resource->target != PIPE_TEXTURE_CUBE && + transfer->resource->target != PIPE_TEXTURE_1D_ARRAY && + transfer->resource->target != PIPE_TEXTURE_2D_ARRAY && + transfer->resource->target != PIPE_TEXTURE_CUBE_ARRAY) + l_stride = 0; + else + l_stride = trans->base.layer_stride; + + if (trans->base.usage & PIPE_TRANSFER_WRITE) { + if (!(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) { + struct virgl_screen *vs = virgl_screen(ctx->screen); + vtex->base.clean = FALSE; + vctx->num_transfers++; + vs->vws->transfer_put(vs->vws, vtex->base.hw_res, + &transfer->box, trans->base.stride, l_stride, trans->offset, transfer->level); + + } + } + + if (trans->resolve_tmp) + pipe_resource_reference((struct pipe_resource **)&trans->resolve_tmp, NULL); + + util_slab_free(&vctx->texture_transfer_pool, trans); +} + + +static boolean +vrend_resource_layout(struct virgl_texture *res, + uint32_t *total_size) +{ + struct pipe_resource *pt = &res->base.u.b; + unsigned level; + unsigned width = pt->width0; + unsigned height = pt->height0; + unsigned depth = pt->depth0; + unsigned buffer_size = 0; + + for (level = 0; level <= pt->last_level; level++) { + unsigned slices; + + if (pt->target == PIPE_TEXTURE_CUBE) + slices = 6; + else if (pt->target == PIPE_TEXTURE_3D) + slices = depth; + else + slices = pt->array_size; + + res->stride[level] = util_format_get_stride(pt->format, width); + res->level_offset[level] = buffer_size; + + buffer_size += (util_format_get_nblocksy(pt->format, height) * + slices * res->stride[level]); + + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); + } + + if (pt->nr_samples <= 1) + *total_size = buffer_size; + else /* don't create guest backing store for MSAA */ + *total_size = 0; + return TRUE; +} + +static boolean virgl_texture_get_handle(struct pipe_screen *screen, + struct pipe_resource *ptex, + struct winsys_handle *whandle) +{ + struct virgl_screen *vs = virgl_screen(screen); + struct virgl_texture *vtex = virgl_texture(ptex); + + return vs->vws->resource_get_handle(vs->vws, vtex->base.hw_res, vtex->stride[0], whandle); +} + +static void virgl_texture_destroy(struct pipe_screen *screen, + struct pipe_resource *res) +{ + struct virgl_screen *vs = virgl_screen(screen); + struct virgl_texture *vtex = virgl_texture(res); + vs->vws->resource_unref(vs->vws, vtex->base.hw_res); + FREE(vtex); +} + +static const struct u_resource_vtbl virgl_texture_vtbl = +{ + virgl_texture_get_handle, /* get_handle */ + virgl_texture_destroy, /* resource_destroy */ + virgl_texture_transfer_map, /* transfer_map */ + NULL, /* transfer_flush_region */ + virgl_texture_transfer_unmap, /* transfer_unmap */ + NULL /* transfer_inline_write */ +}; + +struct pipe_resource * +virgl_texture_from_handle(struct virgl_screen *vs, + const struct pipe_resource *template, + struct winsys_handle *whandle) +{ + struct virgl_texture *tex; + uint32_t size; + + tex = CALLOC_STRUCT(virgl_texture); + tex->base.u.b = *template; + tex->base.u.b.screen = &vs->base; + pipe_reference_init(&tex->base.u.b.reference, 1); + tex->base.u.vtbl = &virgl_texture_vtbl; + vrend_resource_layout(tex, &size); + + tex->base.hw_res = vs->vws->resource_create_from_handle(vs->vws, whandle); + return &tex->base.u.b; +} + +struct pipe_resource *virgl_texture_create(struct virgl_screen *vs, + const struct pipe_resource *template) +{ + struct virgl_texture *tex; + uint32_t size; + unsigned vbind; + + tex = CALLOC_STRUCT(virgl_texture); + tex->base.clean = TRUE; + tex->base.u.b = *template; + tex->base.u.b.screen = &vs->base; + pipe_reference_init(&tex->base.u.b.reference, 1); + tex->base.u.vtbl = &virgl_texture_vtbl; + vrend_resource_layout(tex, &size); + + vbind = pipe_to_virgl_bind(template->bind); + tex->base.hw_res = vs->vws->resource_create(vs->vws, template->target, template->format, vbind, template->width0, template->height0, template->depth0, template->array_size, template->last_level, template->nr_samples, size); + if (!tex->base.hw_res) { + FREE(tex); + return NULL; + } + return &tex->base.u.b; +} diff --git a/src/gallium/drivers/virgl/virgl_tgsi.c b/src/gallium/drivers/virgl/virgl_tgsi.c new file mode 100644 index 00000000000..641b0b3e3b5 --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_tgsi.c @@ -0,0 +1,66 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* the virgl hw tgsi vs what the current gallium want will diverge over time. + so add a transform stage to remove things we don't want to send unless + the receiver supports it. +*/ +#include "tgsi/tgsi_transform.h" +#include "virgl_context.h" +struct virgl_transform_context { + struct tgsi_transform_context base; +}; + +/* for now just strip out the new properties the remote doesn't understand + yet */ +static void +virgl_tgsi_transform_property(struct tgsi_transform_context *ctx, + struct tgsi_full_property *prop) +{ + switch (prop->Property.PropertyName) { + case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED: + case TGSI_PROPERTY_NUM_CULLDIST_ENABLED: + break; + default: + ctx->emit_property(ctx, prop); + break; + } +} + +struct tgsi_token *virgl_tgsi_transform(const struct tgsi_token *tokens_in) +{ + + struct virgl_transform_context transform; + const uint newLen = tgsi_num_tokens(tokens_in); + struct tgsi_token *new_tokens; + + new_tokens = tgsi_alloc_tokens(newLen); + if (!new_tokens) + return NULL; + + memset(&transform, 0, sizeof(transform)); + transform.base.transform_property = virgl_tgsi_transform_property; + tgsi_transform_shader(tokens_in, new_tokens, newLen, &transform.base); + + return new_tokens; +} diff --git a/src/gallium/drivers/virgl/virgl_winsys.h b/src/gallium/drivers/virgl/virgl_winsys.h new file mode 100644 index 00000000000..ea21f2b6712 --- /dev/null +++ b/src/gallium/drivers/virgl/virgl_winsys.h @@ -0,0 +1,113 @@ +/* + * Copyright 2014, 2015 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef VIRGL_WINSYS_H +#define VIRGL_WINSYS_H + +#include "pipe/p_defines.h" +#include "virgl_hw.h" + +struct pipe_box; +struct pipe_fence_handle; +struct winsys_handle; +struct virgl_hw_res; + +#define VIRGL_MAX_CMDBUF_DWORDS (16*1024) + +struct virgl_drm_caps { + union virgl_caps caps; +}; + +struct virgl_cmd_buf { + unsigned cdw; + uint32_t *buf; +}; + +struct virgl_winsys { + unsigned pci_id; + + void (*destroy)(struct virgl_winsys *vws); + + int (*transfer_put)(struct virgl_winsys *vws, + struct virgl_hw_res *res, + const struct pipe_box *box, + uint32_t stride, uint32_t layer_stride, + uint32_t buf_offset, uint32_t level); + + int (*transfer_get)(struct virgl_winsys *vws, + struct virgl_hw_res *res, + const struct pipe_box *box, + uint32_t stride, uint32_t layer_stride, + uint32_t buf_offset, uint32_t level); + + struct virgl_hw_res *(*resource_create)(struct virgl_winsys *vws, + enum pipe_texture_target target, + uint32_t format, uint32_t bind, + uint32_t width, uint32_t height, + uint32_t depth, uint32_t array_size, + uint32_t last_level, uint32_t nr_samples, + uint32_t size); + + void (*resource_unref)(struct virgl_winsys *vws, struct virgl_hw_res *res); + + void *(*resource_map)(struct virgl_winsys *vws, struct virgl_hw_res *res); + void (*resource_wait)(struct virgl_winsys *vws, struct virgl_hw_res *res); + + struct virgl_hw_res *(*resource_create_from_handle)(struct virgl_winsys *vws, + struct winsys_handle *whandle); + boolean (*resource_get_handle)(struct virgl_winsys *vws, + struct virgl_hw_res *res, + uint32_t stride, + struct winsys_handle *whandle); + + struct virgl_cmd_buf *(*cmd_buf_create)(struct virgl_winsys *ws); + void (*cmd_buf_destroy)(struct virgl_cmd_buf *buf); + + void (*emit_res)(struct virgl_winsys *vws, struct virgl_cmd_buf *buf, struct virgl_hw_res *res, boolean write_buffer); + int (*submit_cmd)(struct virgl_winsys *vws, struct virgl_cmd_buf *buf); + + boolean (*res_is_referenced)(struct virgl_winsys *vws, + struct virgl_cmd_buf *buf, + struct virgl_hw_res *res); + + int (*get_caps)(struct virgl_winsys *vws, struct virgl_drm_caps *caps); + + /* fence */ + struct pipe_fence_handle *(*cs_create_fence)(struct virgl_winsys *vws); + bool (*fence_wait)(struct virgl_winsys *vws, + struct pipe_fence_handle *fence, + uint64_t timeout); + + void (*fence_reference)(struct virgl_winsys *vws, + struct pipe_fence_handle **dst, + struct pipe_fence_handle *src); + + /* for sw paths */ + void (*flush_frontbuffer)(struct virgl_winsys *vws, + struct virgl_hw_res *res, + unsigned level, unsigned layer, + void *winsys_drawable_handle, + struct pipe_box *sub_box); +}; + + +#endif |