diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/docs/source/context.rst | 39 | ||||
-rw-r--r-- | src/gallium/docs/source/screen.rst | 28 | ||||
-rw-r--r-- | src/gallium/include/pipe/p_context.h | 73 | ||||
-rw-r--r-- | src/gallium/include/pipe/p_defines.h | 19 | ||||
-rw-r--r-- | src/gallium/include/pipe/p_screen.h | 12 | ||||
-rw-r--r-- | src/gallium/include/pipe/p_shader_tokens.h | 9 | ||||
-rw-r--r-- | src/gallium/include/pipe/p_state.h | 7 |
7 files changed, 185 insertions, 2 deletions
diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst index b2872cd282f..cb9b8de7d5a 100644 --- a/src/gallium/docs/source/context.rst +++ b/src/gallium/docs/source/context.rst @@ -542,3 +542,42 @@ These flags control the behavior of a transfer object. ``PIPE_TRANSFER_FLUSH_EXPLICIT`` Written ranges will be notified later with :ref:`transfer_flush_region`. Cannot be used with ``PIPE_TRANSFER_READ``. + + +Compute kernel execution +^^^^^^^^^^^^^^^^^^^^^^^^ + +A compute program can be defined, bound or destroyed using +``create_compute_state``, ``bind_compute_state`` or +``destroy_compute_state`` respectively. + +Any of the subroutines contained within the compute program can be +executed on the device using the ``launch_grid`` method. This method +will execute as many instances of the program as elements in the +specified N-dimensional grid, hopefully in parallel. + +The compute program has access to four special resources: + +* ``GLOBAL`` represents a memory space shared among all the threads + running on the device. An arbitrary buffer created with the + ``PIPE_BIND_GLOBAL`` flag can be mapped into it using the + ``set_global_binding`` method. + +* ``LOCAL`` represents a memory space shared among all the threads + running in the same working group. The initial contents of this + resource are undefined. + +* ``PRIVATE`` represents a memory space local to a single thread. + The initial contents of this resource are undefined. + +* ``INPUT`` represents a read-only memory space that can be + initialized at ``launch_grid`` time. + +These resources use a byte-based addressing scheme, and they can be +accessed from the compute program by means of the LOAD/STORE TGSI +opcodes. + +In addition, normal texture sampling is allowed from the compute +program: ``bind_compute_sampler_states`` may be used to set up texture +samplers for the compute stage and ``set_compute_sampler_views`` may +be used to bind a number of sampler views to it. diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 05f7e8fc539..5d8280a244b 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -110,7 +110,8 @@ The integer capabilities: * ``PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY``: This CAP describes a hw limitation. If true, pipe_vertex_element::src_offset must always be aligned to 4. If false, there are no restrictions on src_offset. - +* ``PIPE_CAP_COMPUTE``: Whether the implementation supports the + compute entry points defined in pipe_context and pipe_screen. .. _pipe_capf: @@ -186,6 +187,29 @@ to be 0. samplers. +.. _pipe_compute_cap: + +PIPE_COMPUTE_CAP_* +^^^^^^^^^^^^^^^^^^ + +Compute-specific capabilities. They can be queried using +pipe_screen::get_compute_param. + +* ``PIPE_COMPUTE_CAP_GRID_DIMENSION``: Number of supported dimensions + for grid and block coordinates. Value type: ``uint64_t``. +* ``PIPE_COMPUTE_CAP_MAX_GRID_SIZE``: Maximum grid size in block + units. Value type: ``uint64_t []``. +* ``PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE``: Maximum block size in thread + units. Value type: ``uint64_t []``. +* ``PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE``: Maximum size of the GLOBAL + resource. Value type: ``uint64_t``. +* ``PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE``: Maximum size of the LOCAL + resource. Value type: ``uint64_t``. +* ``PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE``: Maximum size of the PRIVATE + resource. Value type: ``uint64_t``. +* ``PIPE_COMPUTE_CAP_MAX_INPUT_SIZE``: Maximum size of the INPUT + resource. Value type: ``uint64_t``. + .. _pipe_bind: PIPE_BIND_* @@ -223,6 +247,8 @@ resources might be created and handled quite differently. * ``PIPE_BIND_SCANOUT``: A front color buffer or scanout buffer. * ``PIPE_BIND_SHARED``: A sharable buffer that can be given to another process. +* ``PIPE_BIND_GLOBAL``: A buffer that can be mapped into the global + address space of a compute program. .. _pipe_usage: diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 8b4a1588b86..3c0b89e9c2d 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -63,6 +63,7 @@ struct pipe_vertex_element; struct pipe_video_buffer; struct pipe_video_decoder; struct pipe_viewport_state; +struct pipe_compute_state; union pipe_color_union; union pipe_query_result; @@ -141,6 +142,10 @@ struct pipe_context { void (*bind_geometry_sampler_states)(struct pipe_context *, unsigned num_samplers, void **samplers); + void (*bind_compute_sampler_states)(struct pipe_context *, + unsigned start_slot, + unsigned num_samplers, + void **samplers); void (*delete_sampler_state)(struct pipe_context *, void *); void * (*create_rasterizer_state)(struct pipe_context *, @@ -220,6 +225,10 @@ struct pipe_context { unsigned num_views, struct pipe_sampler_view **); + void (*set_compute_sampler_views)(struct pipe_context *, + unsigned start_slot, unsigned num_views, + struct pipe_sampler_view **); + void (*set_vertex_buffers)( struct pipe_context *, unsigned num_buffers, const struct pipe_vertex_buffer * ); @@ -418,6 +427,70 @@ struct pipe_context { */ struct pipe_video_buffer *(*create_video_buffer)( struct pipe_context *context, const struct pipe_video_buffer *templat ); + + /** + * Compute kernel execution + */ + /*@{*/ + /** + * Define the compute program and parameters to be used by + * pipe_context::launch_grid. + */ + void *(*create_compute_state)(struct pipe_context *context, + const struct pipe_compute_state *); + void (*bind_compute_state)(struct pipe_context *, void *); + void (*delete_compute_state)(struct pipe_context *, void *); + + /** + * Bind an array of buffers to be mapped into the address space of + * the GLOBAL resource. Any buffers that were previously bound + * between [first, first + count - 1] are unbound after this call. + * + * \param first first buffer to map. + * \param count number of consecutive buffers to map. + * \param resources array of pointers to the buffers to map, it + * should contain at least \a count elements + * unless it's NULL, in which case no new + * resources will be bound. + * \param handles array of pointers to the memory locations that + * will be filled with the respective base + * addresses each buffer will be mapped to. It + * should contain at least \a count elements, + * unless \a resources is NULL in which case \a + * handles should be NULL as well. + * + * Note that the driver isn't required to make any guarantees about + * the contents of the \a handles array being valid anytime except + * during the subsequent calls to pipe_context::launch_grid. This + * means that the only sensible location handles[i] may point to is + * somewhere within the INPUT buffer itself. This is so to + * accommodate implementations that lack virtual memory but + * nevertheless migrate buffers on the fly, leading to resource + * base addresses that change on each kernel invocation or are + * unknown to the pipe driver. + */ + void (*set_global_binding)(struct pipe_context *context, + unsigned first, unsigned count, + struct pipe_resource **resources, + uint32_t **handles); + + /** + * Launch the compute kernel starting from instruction \a pc of the + * currently bound compute program. + * + * \a grid_layout and \a block_layout are arrays of size \a + * PIPE_COMPUTE_CAP_GRID_DIMENSION that determine the layout of the + * grid (in block units) and working block (in thread units) to be + * used, respectively. + * + * \a input will be used to initialize the INPUT resource, and it + * should point to a buffer of at least + * pipe_compute_state::req_input_mem bytes. + */ + void (*launch_grid)(struct pipe_context *context, + const uint *block_layout, const uint *grid_layout, + uint32_t pc, const void *input); + /*@}*/ }; diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 8b6d00d4318..c4c217b911f 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -304,6 +304,7 @@ enum pipe_transfer_usage { #define PIPE_BIND_STREAM_OUTPUT (1 << 11) /* set_stream_output_buffers */ #define PIPE_BIND_CURSOR (1 << 16) /* mouse cursor */ #define PIPE_BIND_CUSTOM (1 << 17) /* state-tracker/winsys usages */ +#define PIPE_BIND_GLOBAL (1 << 18) /* set_global_binding */ /* The first two flags above were previously part of the amorphous * TEXTURE_USAGE, most of which are now descriptions of the ways a @@ -346,7 +347,8 @@ enum pipe_transfer_usage { #define PIPE_SHADER_VERTEX 0 #define PIPE_SHADER_FRAGMENT 1 #define PIPE_SHADER_GEOMETRY 2 -#define PIPE_SHADER_TYPES 3 +#define PIPE_SHADER_COMPUTE 3 +#define PIPE_SHADER_TYPES 4 /** @@ -477,6 +479,7 @@ enum pipe_cap { PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY = 65, PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY = 66, PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY = 67, + PIPE_CAP_COMPUTE = 68 }; /** @@ -522,6 +525,20 @@ enum pipe_shader_cap PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS = 18 }; +/** + * Compute-specific implementation capability. They can be queried + * using pipe_screen::get_compute_param. + */ +enum pipe_compute_cap +{ + PIPE_COMPUTE_CAP_GRID_DIMENSION, + PIPE_COMPUTE_CAP_MAX_GRID_SIZE, + PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE, + PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE, + PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE, + PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE, + PIPE_COMPUTE_CAP_MAX_INPUT_SIZE +}; /** * Composite query types diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index 45c441b2fcf..7ae7c9a04e1 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -98,6 +98,18 @@ struct pipe_screen { enum pipe_video_profile profile, enum pipe_video_cap param ); + /** + * Query a compute-specific capability/parameter/limit. + * \param param one of PIPE_COMPUTE_CAP_x + * \param ret pointer to a preallocated buffer that will be + * initialized to the parameter value, or NULL. + * \return size in bytes of the parameter value that would be + * returned. + */ + int (*get_compute_param)(struct pipe_screen *, + enum pipe_compute_cap param, + void *ret); + struct pipe_context * (*context_create)( struct pipe_screen *, void *priv ); diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index df2dd5e618e..9d08fde9f03 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -166,6 +166,15 @@ struct tgsi_declaration_resource { unsigned ReturnTypeW : 6; /**< one of enum pipe_type */ }; +/* + * Special resources that don't need to be declared. They map to the + * GLOBAL/LOCAL/PRIVATE/INPUT compute memory spaces. + */ +#define TGSI_RESOURCE_GLOBAL 0x7fff +#define TGSI_RESOURCE_LOCAL 0x7ffe +#define TGSI_RESOURCE_PRIVATE 0x7ffd +#define TGSI_RESOURCE_INPUT 0x7ffc + #define TGSI_IMM_FLOAT32 0 #define TGSI_IMM_UINT32 1 #define TGSI_IMM_INT32 2 diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index a459a56a760..74f4ebd8f4c 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -580,6 +580,13 @@ struct pipe_resolve_info unsigned mask; /**< PIPE_MASK_RGBA, Z, S or ZS */ }; +struct pipe_compute_state +{ + const struct tgsi_token *tokens; /**< Compute program to be executed. */ + unsigned req_local_mem; /**< Required size of the LOCAL resource. */ + unsigned req_private_mem; /**< Required size of the PRIVATE resource. */ + unsigned req_input_mem; /**< Required size of the INPUT resource. */ +}; #ifdef __cplusplus } |