summaryrefslogtreecommitdiffstats
path: root/src/vulkan
diff options
context:
space:
mode:
authorKristian Høgsberg Kristensen <[email protected]>2015-06-11 15:31:42 -0700
committerKristian Høgsberg Kristensen <[email protected]>2015-06-11 15:31:42 -0700
commit765175f5d195df727ed9d171720f4e843809744e (patch)
tree31b0630784b57b05310423b57051624dca9af01b /src/vulkan
parent7637b02aaa4df9efaf856ee855ba03b5fe81d821 (diff)
vk: Implement basic compute shader support
Diffstat (limited to 'src/vulkan')
-rw-r--r--src/vulkan/compiler.cpp93
-rw-r--r--src/vulkan/device.c291
-rw-r--r--src/vulkan/pipeline.c54
-rw-r--r--src/vulkan/private.h5
4 files changed, 381 insertions, 62 deletions
diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp
index 28c4874a438..0db7935f085 100644
--- a/src/vulkan/compiler.cpp
+++ b/src/vulkan/compiler.cpp
@@ -32,6 +32,7 @@
#include <brw_vs.h>
#include <brw_gs.h>
+#include <brw_cs.h>
#include <mesa/main/shaderobj.h>
#include <mesa/main/fbobject.h>
@@ -603,6 +604,68 @@ really_do_gs_prog(struct brw_context *brw,
return true;
}
+static bool
+brw_codegen_cs_prog(struct brw_context *brw,
+ struct gl_shader_program *prog,
+ struct brw_compute_program *cp,
+ struct brw_cs_prog_key *key, struct anv_pipeline *pipeline)
+{
+ struct gl_context *ctx = &brw->ctx;
+ const GLuint *program;
+ void *mem_ctx = ralloc_context(NULL);
+ GLuint program_size;
+ struct brw_cs_prog_data prog_data;
+
+ struct gl_shader *cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE];
+ assert (cs);
+
+ memset(&prog_data, 0, sizeof(prog_data));
+
+ /* Allocate the references to the uniforms that will end up in the
+ * prog_data associated with the compiled program, and which will be freed
+ * by the state cache.
+ */
+ int param_count = cs->num_uniform_components;
+
+ /* The backend also sometimes adds params for texture size. */
+ param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
+ prog_data.base.param =
+ rzalloc_array(NULL, const gl_constant_value *, param_count);
+ prog_data.base.pull_param =
+ rzalloc_array(NULL, const gl_constant_value *, param_count);
+ prog_data.base.nr_params = param_count;
+
+ program = brw_cs_emit(brw, mem_ctx, key, &prog_data,
+ &cp->program, prog, &program_size);
+ if (program == NULL) {
+ ralloc_free(mem_ctx);
+ return false;
+ }
+
+ if (unlikely(INTEL_DEBUG & DEBUG_CS))
+ fprintf(stderr, "\n");
+
+ struct anv_state cs_state = anv_state_stream_alloc(&pipeline->program_stream,
+ program_size, 64);
+ memcpy(cs_state.map, program, program_size);
+
+ pipeline->cs_simd = cs_state.offset;
+
+ ralloc_free(mem_ctx);
+
+ return true;
+}
+
+static void
+brw_cs_populate_key(struct brw_context *brw,
+ struct brw_compute_program *bcp, struct brw_cs_prog_key *key)
+{
+ memset(key, 0, sizeof(*key));
+
+ /* The unique compute program ID */
+ key->program_string_id = bcp->id;
+}
+
static void
fail_on_compile_error(int status, const char *msg)
{
@@ -652,6 +715,22 @@ anv_compiler_create(struct anv_device *device)
compiler->brw->is_baytrail = devinfo->is_baytrail;
compiler->brw->is_haswell = devinfo->is_haswell;
compiler->brw->is_cherryview = devinfo->is_cherryview;
+
+ /* We need this at least for CS, which will check brw->max_cs_threads
+ * against the work group size. */
+ compiler->brw->max_vs_threads = devinfo->max_vs_threads;
+ compiler->brw->max_hs_threads = devinfo->max_hs_threads;
+ compiler->brw->max_ds_threads = devinfo->max_ds_threads;
+ compiler->brw->max_gs_threads = devinfo->max_gs_threads;
+ compiler->brw->max_wm_threads = devinfo->max_wm_threads;
+ compiler->brw->max_cs_threads = devinfo->max_cs_threads;
+ compiler->brw->urb.size = devinfo->urb.size;
+ compiler->brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
+ compiler->brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
+ compiler->brw->urb.max_hs_entries = devinfo->urb.max_hs_entries;
+ compiler->brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
+ compiler->brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
+
compiler->brw->intelScreen = compiler->screen;
compiler->screen->devinfo = &device->info;
@@ -992,6 +1071,20 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline)
pipeline->active_stages |= VK_SHADER_STAGE_FRAGMENT_BIT;
}
+ if (pipeline->shaders[VK_SHADER_STAGE_COMPUTE]) {
+ struct brw_cs_prog_key cs_key;
+ struct gl_compute_program *cp = (struct gl_compute_program *)
+ program->_LinkedShaders[MESA_SHADER_COMPUTE]->Program;
+ struct brw_compute_program *bcp = brw_compute_program(cp);
+
+ brw_cs_populate_key(brw, bcp, &cs_key);
+
+ success = brw_codegen_cs_prog(brw, program, bcp, &cs_key, pipeline);
+ fail_if(!success, "brw_codegen_cs_prog failed\n");
+ pipeline->prog_data[VK_SHADER_STAGE_COMPUTE] = &pipeline->cs_prog_data.base;
+ pipeline->active_stages |= VK_SHADER_STAGE_COMPUTE_BIT;
+ }
+
brw->ctx.Driver.DeleteShaderProgram(&brw->ctx, program);
gen7_compute_urb_partition(pipeline);
diff --git a/src/vulkan/device.c b/src/vulkan/device.c
index e44fb2b1cd9..b27bd6d765b 100644
--- a/src/vulkan/device.c
+++ b/src/vulkan/device.c
@@ -2650,9 +2650,22 @@ void anv_CmdBindPipeline(
struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
struct anv_pipeline *pipeline = (struct anv_pipeline *) _pipeline;
- cmd_buffer->pipeline = pipeline;
- cmd_buffer->vb_dirty |= pipeline->vb_used;
- cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY;
+ switch (pipelineBindPoint) {
+ case VK_PIPELINE_BIND_POINT_COMPUTE:
+ cmd_buffer->compute_pipeline = pipeline;
+ cmd_buffer->compute_dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY;
+ break;
+
+ case VK_PIPELINE_BIND_POINT_GRAPHICS:
+ cmd_buffer->pipeline = pipeline;
+ cmd_buffer->vb_dirty |= pipeline->vb_used;
+ cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY;
+ break;
+
+ default:
+ assert(!"invalid bind point");
+ break;
+ }
}
void anv_CmdBindDynamicStateObject(
@@ -2818,11 +2831,15 @@ void anv_CmdBindVertexBuffers(
static VkResult
cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
- unsigned stage)
+ unsigned stage, struct anv_state *bt_state)
{
- struct anv_pipeline_layout *layout = cmd_buffer->pipeline->layout;
+ struct anv_pipeline_layout *layout;
uint32_t color_attachments, bias, size;
- struct anv_state bt_state;
+
+ if (stage == VK_SHADER_STAGE_COMPUTE)
+ layout = cmd_buffer->compute_pipeline->layout;
+ else
+ layout = cmd_buffer->pipeline->layout;
if (stage == VK_SHADER_STAGE_FRAGMENT) {
bias = MAX_RTS;
@@ -2841,26 +2858,12 @@ cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
return VK_SUCCESS;
size = (bias + surface_count) * sizeof(uint32_t);
- bt_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32);
- uint32_t *bt_map = bt_state.map;
+ *bt_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32);
+ uint32_t *bt_map = bt_state->map;
- if (bt_state.map == NULL)
+ if (bt_state->map == NULL)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
- static const uint32_t binding_table_opcodes[] = {
- [VK_SHADER_STAGE_VERTEX] = 38,
- [VK_SHADER_STAGE_TESS_CONTROL] = 39,
- [VK_SHADER_STAGE_TESS_EVALUATION] = 40,
- [VK_SHADER_STAGE_GEOMETRY] = 41,
- [VK_SHADER_STAGE_FRAGMENT] = 42,
- [VK_SHADER_STAGE_COMPUTE] = 0,
- };
-
- anv_batch_emit(&cmd_buffer->batch,
- GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS,
- ._3DCommandSubOpcode = binding_table_opcodes[stage],
- .PointertoVSBindingTable = bt_state.offset);
-
for (uint32_t ca = 0; ca < color_attachments; ca++) {
const struct anv_surface_view *view =
cmd_buffer->framebuffer->color_attachments[ca];
@@ -2935,39 +2938,27 @@ cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
}
static VkResult
-cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, unsigned stage)
+cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer,
+ unsigned stage, struct anv_state *state)
{
- struct anv_pipeline_layout *layout = cmd_buffer->pipeline->layout;
- struct anv_state state;
+ struct anv_pipeline_layout *layout;
+ uint32_t sampler_count;
- if (!layout)
- return VK_SUCCESS;
-
- uint32_t sampler_count = layout->stage[stage].sampler_count;
+ if (stage == VK_SHADER_STAGE_COMPUTE)
+ layout = cmd_buffer->compute_pipeline->layout;
+ else
+ layout = cmd_buffer->pipeline->layout;
+ sampler_count = layout ? layout->stage[stage].sampler_count : 0;
if (sampler_count == 0)
return VK_SUCCESS;
uint32_t size = sampler_count * 16;
- state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, size, 32);
+ *state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, size, 32);
- if (state.map == NULL)
+ if (state->map == NULL)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
- static const uint32_t sampler_state_opcodes[] = {
- [VK_SHADER_STAGE_VERTEX] = 43,
- [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */
- [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */
- [VK_SHADER_STAGE_GEOMETRY] = 46,
- [VK_SHADER_STAGE_FRAGMENT] = 47,
- [VK_SHADER_STAGE_COMPUTE] = 0,
- };
-
- anv_batch_emit(&cmd_buffer->batch,
- GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS,
- ._3DCommandSubOpcode = sampler_state_opcodes[stage],
- .PointertoVSSamplerState = state.offset);
-
for (uint32_t set = 0; set < layout->num_sets; set++) {
struct anv_descriptor_set_binding *d = &cmd_buffer->descriptors[set];
struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
@@ -2983,7 +2974,7 @@ cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, unsigned stage)
if (!sampler)
continue;
- memcpy(state.map + (start + b) * 16,
+ memcpy(state->map + (start + b) * 16,
sampler->state, sizeof(sampler->state));
}
}
@@ -2991,6 +2982,54 @@ cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, unsigned stage)
return VK_SUCCESS;
}
+static VkResult
+flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, uint32_t stage)
+{
+ struct anv_state surfaces = { 0, }, samplers = { 0, };
+ VkResult result;
+
+ result = cmd_buffer_emit_samplers(cmd_buffer, stage, &samplers);
+ if (result != VK_SUCCESS)
+ return result;
+ result = cmd_buffer_emit_binding_table(cmd_buffer, stage, &surfaces);
+ if (result != VK_SUCCESS)
+ return result;
+
+ static const uint32_t sampler_state_opcodes[] = {
+ [VK_SHADER_STAGE_VERTEX] = 43,
+ [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */
+ [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */
+ [VK_SHADER_STAGE_GEOMETRY] = 46,
+ [VK_SHADER_STAGE_FRAGMENT] = 47,
+ [VK_SHADER_STAGE_COMPUTE] = 0,
+ };
+
+ static const uint32_t binding_table_opcodes[] = {
+ [VK_SHADER_STAGE_VERTEX] = 38,
+ [VK_SHADER_STAGE_TESS_CONTROL] = 39,
+ [VK_SHADER_STAGE_TESS_EVALUATION] = 40,
+ [VK_SHADER_STAGE_GEOMETRY] = 41,
+ [VK_SHADER_STAGE_FRAGMENT] = 42,
+ [VK_SHADER_STAGE_COMPUTE] = 0,
+ };
+
+ if (samplers.alloc_size > 0) {
+ anv_batch_emit(&cmd_buffer->batch,
+ GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS,
+ ._3DCommandSubOpcode = sampler_state_opcodes[stage],
+ .PointertoVSSamplerState = samplers.offset);
+ }
+
+ if (surfaces.alloc_size > 0) {
+ anv_batch_emit(&cmd_buffer->batch,
+ GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS,
+ ._3DCommandSubOpcode = binding_table_opcodes[stage],
+ .PointertoVSBindingTable = surfaces.offset);
+ }
+
+ return VK_SUCCESS;
+}
+
static void
flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer)
{
@@ -2999,11 +3038,7 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer)
VkResult result;
for_each_bit(s, dirty) {
- result = cmd_buffer_emit_binding_table(cmd_buffer, s);
- if (result != VK_SUCCESS)
- break;
-
- result = cmd_buffer_emit_samplers(cmd_buffer, s);
+ result = flush_descriptor_set(cmd_buffer, s);
if (result != VK_SUCCESS)
break;
}
@@ -3016,12 +3051,11 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer)
/* Re-emit all active binding tables */
for_each_bit(s, cmd_buffer->pipeline->active_stages) {
- result = cmd_buffer_emit_binding_table(cmd_buffer, s);
- result = cmd_buffer_emit_samplers(cmd_buffer, s);
- }
+ result = flush_descriptor_set(cmd_buffer, s);
- /* It had better succeed this time */
- assert(result == VK_SUCCESS);
+ /* It had better succeed this time */
+ assert(result == VK_SUCCESS);
+ }
}
cmd_buffer->descriptors_dirty &= ~cmd_buffer->pipeline->active_stages;
@@ -3061,6 +3095,78 @@ anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
return state;
}
+static VkResult
+flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
+{
+ struct anv_device *device = cmd_buffer->device;
+ struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline;
+ struct anv_state surfaces = { 0, }, samplers = { 0, };
+ VkResult result;
+
+ result = cmd_buffer_emit_samplers(cmd_buffer,
+ VK_SHADER_STAGE_COMPUTE, &samplers);
+ if (result != VK_SUCCESS)
+ return result;
+ result = cmd_buffer_emit_binding_table(cmd_buffer,
+ VK_SHADER_STAGE_COMPUTE, &surfaces);
+ if (result != VK_SUCCESS)
+ return result;
+
+ struct GEN8_INTERFACE_DESCRIPTOR_DATA desc = {
+ .KernelStartPointer = pipeline->cs_simd,
+ .KernelStartPointerHigh = 0,
+ .BindingTablePointer = surfaces.offset,
+ .BindingTableEntryCount = 0,
+ .SamplerStatePointer = samplers.offset,
+ .SamplerCount = 0,
+ .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */
+ };
+
+ uint32_t size = GEN8_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t);
+ struct anv_state state =
+ anv_state_pool_alloc(&device->dynamic_state_pool, size, 64);
+
+ GEN8_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc);
+
+ anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD,
+ .InterfaceDescriptorTotalLength = size,
+ .InterfaceDescriptorDataStartAddress = state.offset);
+
+ return VK_SUCCESS;
+}
+
+static void
+anv_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
+{
+ struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline;
+ VkResult result;
+
+ assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
+
+ if (cmd_buffer->current_pipeline != GPGPU) {
+ anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT,
+ .PipelineSelection = GPGPU);
+ cmd_buffer->current_pipeline = GPGPU;
+ }
+
+ if (cmd_buffer->compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)
+ anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
+
+ if ((cmd_buffer->descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
+ (cmd_buffer->compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) {
+ result = flush_compute_descriptor_set(cmd_buffer);
+ if (result != VK_SUCCESS) {
+ result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer);
+ assert(result == VK_SUCCESS);
+ result = flush_compute_descriptor_set(cmd_buffer);
+ assert(result == VK_SUCCESS);
+ }
+ cmd_buffer->descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE;
+ }
+
+ cmd_buffer->compute_dirty = 0;
+}
+
static void
anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer)
{
@@ -3278,15 +3384,80 @@ void anv_CmdDispatch(
uint32_t y,
uint32_t z)
{
- stub();
+ struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
+ uint32_t size = SIMD8; /* FIXME */
+ uint32_t right_mask = 0; /* FIXME */
+ uint32_t thread_width_max = 0; /* FIXME */
+
+ anv_cmd_buffer_flush_compute_state(cmd_buffer);
+
+ anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER,
+
+ .InterfaceDescriptorOffset = 0,
+ .IndirectDataLength = 0,
+ .IndirectDataStartAddress = 0,
+
+ .SIMDSize = size,
+
+ .ThreadDepthCounterMaximum = 0,
+ .ThreadHeightCounterMaximum = 0,
+ .ThreadWidthCounterMaximum = thread_width_max,
+
+ .ThreadGroupIDStartingX = 0,
+ .ThreadGroupIDXDimension = x,
+ .ThreadGroupIDStartingY = 0,
+ .ThreadGroupIDYDimension = y,
+ .ThreadGroupIDStartingResumeZ = 0,
+ .ThreadGroupIDZDimension = z,
+ .RightExecutionMask = right_mask,
+ .BottomExecutionMask = 0xffffffff);
+
+ anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH);
}
+#define GPGPU_DISPATCHDIMX 0x2500
+#define GPGPU_DISPATCHDIMY 0x2504
+#define GPGPU_DISPATCHDIMZ 0x2508
+
void anv_CmdDispatchIndirect(
VkCmdBuffer cmdBuffer,
- VkBuffer buffer,
+ VkBuffer _buffer,
VkDeviceSize offset)
{
- stub();
+ struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
+ struct anv_buffer *buffer = (struct anv_buffer *) _buffer;
+ struct anv_bo *bo = buffer->bo;
+ uint32_t bo_offset = buffer->offset + offset;
+
+ anv_cmd_buffer_flush_compute_state(cmd_buffer);
+
+ anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset);
+ anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4);
+ anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8);
+
+ uint32_t size = SIMD8; /* FIXME */
+ uint32_t right_mask = 0; /* FIXME */
+ uint32_t thread_width_max = 0; /* FIXME */
+
+ /* FIXME: We can't compute thread_width_max for indirect, looks like it
+ * depends on DIMX. */
+
+ anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER,
+ .IndirectParameterEnable = true,
+ .InterfaceDescriptorOffset = 0,
+ .IndirectDataLength = 0,
+ .IndirectDataStartAddress = 0,
+
+ .SIMDSize = size,
+
+ .ThreadDepthCounterMaximum = 0,
+ .ThreadHeightCounterMaximum = 0,
+ .ThreadWidthCounterMaximum = thread_width_max,
+
+ .RightExecutionMask = right_mask,
+ .BottomExecutionMask = 0xffffffff);
+
+ anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH);
}
void anv_CmdSetEvent(
diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c
index 28818152ae0..cf7562ae496 100644
--- a/src/vulkan/pipeline.c
+++ b/src/vulkan/pipeline.c
@@ -718,11 +718,61 @@ VkResult anv_CreateGraphicsPipelineDerivative(
}
VkResult anv_CreateComputePipeline(
- VkDevice device,
+ VkDevice _device,
const VkComputePipelineCreateInfo* pCreateInfo,
VkPipeline* pPipeline)
{
- stub_return(VK_UNSUPPORTED);
+ struct anv_device *device = (struct anv_device *) _device;
+ struct anv_pipeline *pipeline;
+ VkResult result;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO);
+
+ pipeline = anv_device_alloc(device, sizeof(*pipeline), 8,
+ VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
+ if (pipeline == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ pipeline->base.destructor = anv_pipeline_destroy;
+ pipeline->device = device;
+ pipeline->layout = (struct anv_pipeline_layout *) pCreateInfo->layout;
+
+ result = anv_reloc_list_init(&pipeline->batch.relocs, device);
+ if (result != VK_SUCCESS) {
+ anv_device_free(device, pipeline);
+ return result;
+ }
+ pipeline->batch.next = pipeline->batch.start = pipeline->batch_data;
+ pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data);
+
+ anv_state_stream_init(&pipeline->program_stream,
+ &device->instruction_block_pool);
+
+ memset(pipeline->shaders, 0, sizeof(pipeline->shaders));
+
+ pipeline->shaders[VK_SHADER_STAGE_COMPUTE] =
+ (struct anv_shader *) pCreateInfo->cs.shader;
+
+ pipeline->use_repclear = false;
+
+ anv_compiler_run(device->compiler, pipeline);
+
+ anv_batch_emit(&pipeline->batch, GEN8_MEDIA_VFE_STATE,
+ .ScratchSpaceBasePointer = 0, /* FIXME: Scratch bo, this should be a reloc? */
+ .StackSize = 0,
+ .PerThreadScratchSpace = 0,
+ .ScratchSpaceBasePointerHigh = 0,
+
+ .MaximumNumberofThreads = device->info.max_cs_threads - 1,
+ .NumberofURBEntries = 2,
+ .ResetGatewayTimer = true,
+ .BypassGatewayControl = true,
+ .URBEntryAllocationSize = 2,
+ .CURBEAllocationSize = 0);
+
+ *pPipeline = (VkPipeline) pipeline;
+
+ return VK_SUCCESS;
}
VkResult anv_StorePipeline(
diff --git a/src/vulkan/private.h b/src/vulkan/private.h
index 8bc5fd10880..cf1cf4c1268 100644
--- a/src/vulkan/private.h
+++ b/src/vulkan/private.h
@@ -653,8 +653,10 @@ struct anv_cmd_buffer {
uint32_t current_pipeline;
uint32_t vb_dirty;
uint32_t dirty;
+ uint32_t compute_dirty;
uint32_t descriptors_dirty;
struct anv_pipeline * pipeline;
+ struct anv_pipeline * compute_pipeline;
struct anv_framebuffer * framebuffer;
struct anv_dynamic_rs_state * rs_state;
struct anv_dynamic_ds_state * ds_state;
@@ -692,6 +694,7 @@ struct anv_pipeline {
struct brw_vs_prog_data vs_prog_data;
struct brw_wm_prog_data wm_prog_data;
struct brw_gs_prog_data gs_prog_data;
+ struct brw_cs_prog_data cs_prog_data;
struct brw_stage_prog_data * prog_data[VK_NUM_SHADER_STAGE];
struct {
uint32_t vs_start;
@@ -705,6 +708,7 @@ struct anv_pipeline {
struct anv_bo vs_scratch_bo;
struct anv_bo ps_scratch_bo;
struct anv_bo gs_scratch_bo;
+ struct anv_bo cs_scratch_bo;
uint32_t active_stages;
struct anv_state_stream program_stream;
@@ -714,6 +718,7 @@ struct anv_pipeline {
uint32_t ps_simd16;
uint32_t gs_vec4;
uint32_t gs_vertex_count;
+ uint32_t cs_simd;
uint32_t vb_used;
uint32_t binding_stride[MAX_VBS];