diff options
author | Glenn Kennard <[email protected]> | 2015-02-24 15:59:16 +0100 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2015-02-24 21:21:04 +0100 |
commit | d80701df8af4a5d74c4f4eb09a4b3cef6970104b (patch) | |
tree | 7c8eec19f14608872e5c22f75c648ee901a6a4f1 | |
parent | dd70e786747f7e4800f4bba245373c5ffa3baeee (diff) |
r600g: Implement GL_ARB_draw_indirect for EG/CM
Requires Evergreen/Cayman and radeon kernel module
2.41.0 or newer.
Expected piglit fails due to hardware limitations:
* arb_draw_indirect-draw-arrays-prim-restart
Restarts not applied for DrawArrays commands
* arb_draw_indirect-vertexid
Base vertex offset is not included in vertex id
Marek: bump vgt_state num_dw by 3 (= space needed for one register write)
Signed-off-by: Glenn Kennard <[email protected]>
Signed-off-by: Marek Olšák <[email protected]>
-rw-r--r-- | docs/GL3.txt | 6 | ||||
-rw-r--r-- | docs/relnotes/10.6.0.html | 1 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreen_state.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreend.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 10 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_sq.h | 4 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state_common.c | 128 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600d.h | 8 |
12 files changed, 133 insertions, 38 deletions
diff --git a/docs/GL3.txt b/docs/GL3.txt index a5b81754564..43bbf854e9d 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -95,7 +95,7 @@ GL 3.3, GLSL 3.30 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, soft GL 4.0, GLSL 4.00: GL_ARB_draw_buffers_blend DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) - GL_ARB_draw_indirect DONE (i965, nvc0, radeonsi, llvmpipe, softpipe) + GL_ARB_draw_indirect DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_gpu_shader5 DONE (i965, nvc0) - 'precise' qualifier DONE - Dynamically uniform sampler array indices DONE (r600) @@ -159,7 +159,7 @@ GL 4.3, GLSL 4.30: GL_ARB_framebuffer_no_attachments not started GL_ARB_internalformat_query2 not started GL_ARB_invalidate_subdata DONE (all drivers) - GL_ARB_multi_draw_indirect DONE (i965, nvc0, radeonsi, llvmpipe, softpipe) + GL_ARB_multi_draw_indirect DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_program_interface_query not started GL_ARB_robust_buffer_access_behavior not started GL_ARB_shader_image_size not started @@ -212,7 +212,7 @@ These are the extensions cherry-picked to make GLES 3.1 GLES3.1, GLSL ES 3.1 GL_ARB_arrays_of_arrays started (Timothy) GL_ARB_compute_shader in progress (jljusten) - GL_ARB_draw_indirect DONE (i965, nvc0, radeonsi, llvmpipe, softpipe) + GL_ARB_draw_indirect DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL) GL_ARB_framebuffer_no_attachments not started GL_ARB_program_interface_query not started diff --git a/docs/relnotes/10.6.0.html b/docs/relnotes/10.6.0.html index 056d3b00def..ceeca07ff38 100644 --- a/docs/relnotes/10.6.0.html +++ b/docs/relnotes/10.6.0.html @@ -49,6 +49,7 @@ Note: some of the new features are only available with certain drivers. <li>GL_ARB_gpu_shader_fp64 on nvc0, softpipe</li> <li>GL_ARB_instanced_arrays on freedreno</li> <li>GL_ARB_pipeline_statistics_query on i965, nv50, nvc0, r600, radeonsi, softpipe</li> +<li>GL_ARB_draw_indirect, GL_ARB_multi_draw_indirect on r600</li> </ul> <h2>Bug fixes</h2> diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index ea58aeab0b4..bc634fe5e36 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -3438,7 +3438,7 @@ void evergreen_init_state_functions(struct r600_context *rctx) r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views.atom, id++, evergreen_emit_gs_sampler_views, 0); r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views.atom, id++, evergreen_emit_ps_sampler_views, 0); - r600_init_atom(rctx, &rctx->vgt_state.atom, id++, r600_emit_vgt_state, 7); + r600_init_atom(rctx, &rctx->vgt_state.atom, id++, r600_emit_vgt_state, 10); if (rctx->b.chip_class == EVERGREEN) { r600_init_atom(rctx, &rctx->sample_mask.atom, id++, evergreen_emit_sample_mask, 3); diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index 49899960367..cd4ff46b103 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -72,7 +72,6 @@ #define PKT3_REG_RMW 0x21 #define PKT3_COND_EXEC 0x22 #define PKT3_PRED_EXEC 0x23 -#define PKT3_START_3D_CMDBUF 0x24 #define PKT3_DRAW_INDEX_2 0x27 #define PKT3_CONTEXT_CONTROL 0x28 #define PKT3_DRAW_INDEX_IMMD_BE 0x29 diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 79e7f748cdf..762cc7fac44 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -2411,7 +2411,7 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx, memset(&vtx, 0, sizeof(vtx)); vtx.buffer_id = elements[i].vertex_buffer_index + fetch_resource_start; - vtx.fetch_type = elements[i].instance_divisor ? 1 : 0; + vtx.fetch_type = elements[i].instance_divisor ? SQ_VTX_FETCH_INSTANCE_DATA : SQ_VTX_FETCH_VERTEX_DATA; vtx.src_gpr = elements[i].instance_divisor > 1 ? i + 1 : 0; vtx.src_sel_x = elements[i].instance_divisor ? 3 : 0; vtx.mega_fetch_count = 0x1F; diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 06dca25924a..c8a0e9cb624 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -317,6 +317,9 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) return family >= CHIP_CEDAR ? 1 : 0; case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: return family >= CHIP_CEDAR ? 4 : 0; + case PIPE_CAP_DRAW_INDIRECT: + /* kernel command checker support is also required */ + return family >= CHIP_CEDAR && rscreen->b.info.drm_minor >= 41; /* Unsupported features. */ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: @@ -326,7 +329,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_VERTEX_COLOR_CLAMPED: case PIPE_CAP_USER_VERTEX_BUFFERS: case PIPE_CAP_TEXTURE_GATHER_OFFSETS: - case PIPE_CAP_DRAW_INDIRECT: case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: case PIPE_CAP_SAMPLER_VIEW_TARGET: case PIPE_CAP_VERTEXID_NOBASE: diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index e110efe7aea..72378548733 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -55,7 +55,7 @@ /* the number of CS dwords for flushing and drawing */ #define R600_MAX_FLUSH_CS_DWORDS 16 -#define R600_MAX_DRAW_CS_DWORDS 40 +#define R600_MAX_DRAW_CS_DWORDS 47 #define R600_TRACE_CS_DWORDS 7 #define R600_MAX_USER_CONST_BUFFERS 13 @@ -145,6 +145,7 @@ struct r600_vgt_state { uint32_t vgt_multi_prim_ib_reset_en; uint32_t vgt_multi_prim_ib_reset_indx; uint32_t vgt_indx_offset; + bool last_draw_was_indirect; }; struct r600_blend_color { diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 77c9909f5d8..2ee59c8bac0 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -938,7 +938,7 @@ static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_ memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); vtx.op = FETCH_OP_VFETCH; vtx.buffer_id = R600_SAMPLE_POSITIONS_CONST_BUFFER; - vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ + vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; if (sample_id == NULL) { vtx.src_gpr = ctx->fixed_pt_position_gpr; // SAMPLEID is in .w; vtx.src_sel_x = 3; @@ -1095,7 +1095,7 @@ static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, memset(&vtx, 0, sizeof(vtx)); vtx.buffer_id = cb_idx; - vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ + vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; vtx.src_gpr = ar_reg; vtx.src_sel_x = ar_chan; vtx.mega_fetch_count = 16; @@ -1173,7 +1173,7 @@ static int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_regi memset(&vtx, 0, sizeof(vtx)); vtx.buffer_id = R600_GS_RING_CONST_BUFFER; - vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ + vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; vtx.src_gpr = offset_reg; vtx.src_sel_x = offset_chan; vtx.offset = index * 16; /*bytes*/ @@ -1539,7 +1539,7 @@ static int generate_gs_copy_shader(struct r600_context *rctx, memset(&vtx, 0, sizeof(vtx)); vtx.op = FETCH_OP_VFETCH; vtx.buffer_id = R600_GS_RING_CONST_BUFFER; - vtx.fetch_type = 2; + vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; vtx.offset = out->ring_offset; vtx.dst_gpr = out->gpr; vtx.dst_sel_x = 0; @@ -5025,7 +5025,7 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_l memset(&vtx, 0, sizeof(vtx)); vtx.op = FETCH_OP_VFETCH; vtx.buffer_id = id + R600_MAX_CONST_BUFFERS; - vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ + vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; vtx.src_gpr = src_gpr; vtx.mega_fetch_count = 16; vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h index 587f88deb9e..1545cf1b521 100644 --- a/src/gallium/drivers/r600/r600_sq.h +++ b/src/gallium/drivers/r600/r600_sq.h @@ -481,4 +481,8 @@ #define INDEX_MODE_AR_W 3 #define INDEX_MODE_LOOP 4 +#define SQ_VTX_FETCH_VERTEX_DATA 0 +#define SQ_VTX_FETCH_INSTANCE_DATA 1 +#define SQ_VTX_FETCH_NO_INDEX_OFFSET 2 + #endif diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 3c2fdfaed42..9d35c171708 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -3048,7 +3048,7 @@ void r600_init_state_functions(struct r600_context *rctx) r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views.atom, id++, r600_emit_ps_sampler_views, 0); r600_init_atom(rctx, &rctx->vertex_buffer_state.atom, id++, r600_emit_vertex_buffers, 0); - r600_init_atom(rctx, &rctx->vgt_state.atom, id++, r600_emit_vgt_state, 7); + r600_init_atom(rctx, &rctx->vgt_state.atom, id++, r600_emit_vgt_state, 10); r600_init_atom(rctx, &rctx->seamless_cube_map.atom, id++, r600_emit_seamless_cube_map, 3); r600_init_atom(rctx, &rctx->sample_mask.atom, id++, r600_emit_sample_mask, 3); diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index b498d00b5b5..a08124bed16 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -196,6 +196,10 @@ void r600_emit_vgt_state(struct r600_context *rctx, struct r600_atom *atom) r600_write_context_reg_seq(cs, R_028408_VGT_INDX_OFFSET, 2); radeon_emit(cs, a->vgt_indx_offset); /* R_028408_VGT_INDX_OFFSET */ radeon_emit(cs, a->vgt_multi_prim_ib_reset_indx); /* R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX */ + if (a->last_draw_was_indirect) { + a->last_draw_was_indirect = false; + r600_write_ctl_const(cs, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0); + } } static void r600_set_clip_state(struct pipe_context *ctx, @@ -1353,7 +1357,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info unsigned i; struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; - if (!info.count && (info.indexed || !info.count_from_stream_output)) { + if (!info.indirect && !info.count && (info.indexed || !info.count_from_stream_output)) { return; } @@ -1379,19 +1383,44 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info pipe_resource_reference(&ib.buffer, rctx->index_buffer.buffer); ib.user_buffer = rctx->index_buffer.user_buffer; ib.index_size = rctx->index_buffer.index_size; - ib.offset = rctx->index_buffer.offset + info.start * ib.index_size; + ib.offset = rctx->index_buffer.offset; + if (!info.indirect) { + ib.offset += info.start * ib.index_size; + } /* Translate 8-bit indices to 16-bit. */ - if (ib.index_size == 1) { + if (unlikely(ib.index_size == 1)) { struct pipe_resource *out_buffer = NULL; unsigned out_offset; void *ptr; + unsigned start, count; + + if (likely(!info.indirect)) { + start = 0; + count = info.count; + } + else { + /* Have to get start/count from indirect buffer, slow path ahead... */ + struct r600_resource *indirect_resource = (struct r600_resource *)info.indirect; + unsigned *data = r600_buffer_map_sync_with_rings(&rctx->b, indirect_resource, + PIPE_TRANSFER_READ); + if (data) { + data += info.indirect_offset / sizeof(unsigned); + start = data[2] * ib.index_size; + count = data[0]; + rctx->b.ws->buffer_unmap(indirect_resource->cs_buf); + } + else { + start = 0; + count = 0; + } + } - u_upload_alloc(rctx->b.uploader, 0, info.count * 2, + u_upload_alloc(rctx->b.uploader, start, count * 2, &out_offset, &out_buffer, &ptr); util_shorten_ubyte_elts_to_userptr( - &rctx->b.b, &ib, 0, ib.offset, info.count, ptr); + &rctx->b.b, &ib, 0, ib.offset + start, count, ptr); pipe_resource_reference(&ib.buffer, NULL); ib.user_buffer = NULL; @@ -1403,9 +1432,11 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info /* Upload the index buffer. * The upload is skipped for small index counts on little-endian machines * and the indices are emitted via PKT3_DRAW_INDEX_IMMD. + * Indirect draws never use immediate indices. * Note: Instanced rendering in combination with immediate indices hangs. */ - if (ib.user_buffer && (R600_BIG_ENDIAN || info.instance_count > 1 || - info.count*ib.index_size > 20)) { + if (ib.user_buffer && (R600_BIG_ENDIAN || info.indirect || + info.instance_count > 1 || + info.count*ib.index_size > 20)) { u_upload_data(rctx->b.uploader, 0, info.count * ib.index_size, ib.user_buffer, &ib.offset, &ib.buffer); ib.user_buffer = NULL; @@ -1417,7 +1448,8 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info /* Set the index offset and primitive restart. */ if (rctx->vgt_state.vgt_multi_prim_ib_reset_en != info.primitive_restart || rctx->vgt_state.vgt_multi_prim_ib_reset_indx != info.restart_index || - rctx->vgt_state.vgt_indx_offset != info.index_bias) { + rctx->vgt_state.vgt_indx_offset != info.index_bias || + (rctx->vgt_state.last_draw_was_indirect && !info.indirect)) { rctx->vgt_state.vgt_multi_prim_ib_reset_en = info.primitive_restart; rctx->vgt_state.vgt_multi_prim_ib_reset_indx = info.restart_index; rctx->vgt_state.vgt_indx_offset = info.index_bias; @@ -1485,7 +1517,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info } /* Update start instance. */ - if (rctx->last_start_instance != info.start_instance) { + if (!info.indirect && rctx->last_start_instance != info.start_instance) { r600_write_ctl_const(cs, R_03CFF4_SQ_VTX_START_INST_LOC, info.start_instance); rctx->last_start_instance = info.start_instance; } @@ -1510,8 +1542,30 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info } /* Draw packets. */ - cs->buf[cs->cdw++] = PKT3(PKT3_NUM_INSTANCES, 0, rctx->b.predicate_drawing); - cs->buf[cs->cdw++] = info.instance_count; + if (!info.indirect) { + cs->buf[cs->cdw++] = PKT3(PKT3_NUM_INSTANCES, 0, rctx->b.predicate_drawing); + cs->buf[cs->cdw++] = info.instance_count; + } + + if (unlikely(info.indirect)) { + uint64_t va = r600_resource(info.indirect)->gpu_address; + assert(rctx->b.chip_class >= EVERGREEN); + + // Invalidate so non-indirect draw calls reset this state + rctx->vgt_state.last_draw_was_indirect = true; + rctx->last_start_instance = -1; + + cs->buf[cs->cdw++] = PKT3(EG_PKT3_SET_BASE, 2, rctx->b.predicate_drawing); + cs->buf[cs->cdw++] = EG_DRAW_INDEX_INDIRECT_PATCH_TABLE_BASE; + cs->buf[cs->cdw++] = va; + cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; + + cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing); + cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, + (struct r600_resource*)info.indirect, + RADEON_USAGE_READ, RADEON_PRIO_MIN); + } + if (info.indexed) { cs->buf[cs->cdw++] = PKT3(PKT3_INDEX_TYPE, 0, rctx->b.predicate_drawing); cs->buf[cs->cdw++] = ib.index_size == 4 ? @@ -1528,18 +1582,40 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info cs->cdw += size_dw; } else { uint64_t va = r600_resource(ib.buffer)->gpu_address + ib.offset; - cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX, 3, rctx->b.predicate_drawing); - cs->buf[cs->cdw++] = va; - cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; - cs->buf[cs->cdw++] = info.count; - cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA; - cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing); - cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, - (struct r600_resource*)ib.buffer, - RADEON_USAGE_READ, RADEON_PRIO_MIN); + + if (likely(!info.indirect)) { + cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX, 3, rctx->b.predicate_drawing); + cs->buf[cs->cdw++] = va; + cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; + cs->buf[cs->cdw++] = info.count; + cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA; + cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing); + cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, + (struct r600_resource*)ib.buffer, + RADEON_USAGE_READ, RADEON_PRIO_MIN); + } + else { + uint32_t max_size = (ib.buffer->width0 - ib.offset) / ib.index_size; + + cs->buf[cs->cdw++] = PKT3(EG_PKT3_INDEX_BASE, 1, rctx->b.predicate_drawing); + cs->buf[cs->cdw++] = va; + cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; + + cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing); + cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, + (struct r600_resource*)ib.buffer, + RADEON_USAGE_READ, RADEON_PRIO_MIN); + + cs->buf[cs->cdw++] = PKT3(EG_PKT3_INDEX_BUFFER_SIZE, 0, rctx->b.predicate_drawing); + cs->buf[cs->cdw++] = max_size; + + cs->buf[cs->cdw++] = PKT3(EG_PKT3_DRAW_INDEX_INDIRECT, 1, rctx->b.predicate_drawing); + cs->buf[cs->cdw++] = info.indirect_offset; + cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA; + } } } else { - if (info.count_from_stream_output) { + if (unlikely(info.count_from_stream_output)) { struct r600_so_target *t = (struct r600_so_target*)info.count_from_stream_output; uint64_t va = t->buf_filled_size->gpu_address + t->buf_filled_size_offset; @@ -1558,8 +1634,14 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info RADEON_PRIO_MIN); } - cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, rctx->b.predicate_drawing); - cs->buf[cs->cdw++] = info.count; + if (likely(!info.indirect)) { + cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, rctx->b.predicate_drawing); + cs->buf[cs->cdw++] = info.count; + } + else { + cs->buf[cs->cdw++] = PKT3(EG_PKT3_DRAW_INDIRECT, 1, rctx->b.predicate_drawing); + cs->buf[cs->cdw++] = info.indirect_offset; + } cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_AUTO_INDEX | (info.count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0); } diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 6a5b9640ee0..bce8b4ea065 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -52,12 +52,18 @@ #define PKT3_NOP 0x10 +#define EG_PKT3_SET_BASE 0x11 /* >= evergreen */ +#define EG_DRAW_INDEX_INDIRECT_PATCH_TABLE_BASE 1 /* DX11 Draw_Index_Indirect Patch Table Base */ +#define EG_PKT3_INDEX_BUFFER_SIZE 0x13 #define PKT3_INDIRECT_BUFFER_END 0x17 #define PKT3_SET_PREDICATION 0x20 #define PKT3_REG_RMW 0x21 #define PKT3_COND_EXEC 0x22 #define PKT3_PRED_EXEC 0x23 -#define PKT3_START_3D_CMDBUF 0x24 +#define PKT3_START_3D_CMDBUF 0x24 /* removed on evergreen */ +#define EG_PKT3_DRAW_INDIRECT 0x24 /* >= evergreen */ +#define EG_PKT3_DRAW_INDEX_INDIRECT 0x25 +#define EG_PKT3_INDEX_BASE 0x26 #define PKT3_DRAW_INDEX_2 0x27 #define PKT3_CONTEXT_CONTROL 0x28 #define PKT3_DRAW_INDEX_IMMD_BE 0x29 |