summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/vc4
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/vc4')
-rw-r--r--src/gallium/drivers/vc4/Makefile.am4
-rw-r--r--src/gallium/drivers/vc4/Makefile.sources3
-rw-r--r--src/gallium/drivers/vc4/kernel/vc4_drv.h24
-rw-r--r--src/gallium/drivers/vc4/kernel/vc4_gem.c2
-rw-r--r--src/gallium/drivers/vc4/kernel/vc4_packet.h17
-rw-r--r--src/gallium/drivers/vc4/kernel/vc4_render_cl.c56
-rw-r--r--src/gallium/drivers/vc4/kernel/vc4_validate.c315
-rw-r--r--src/gallium/drivers/vc4/vc4_blit.c6
-rw-r--r--src/gallium/drivers/vc4/vc4_bufmgr.c162
-rw-r--r--src/gallium/drivers/vc4/vc4_bufmgr.h9
-rw-r--r--src/gallium/drivers/vc4/vc4_cl.c20
-rw-r--r--src/gallium/drivers/vc4/vc4_cl.h150
-rw-r--r--src/gallium/drivers/vc4/vc4_cl_dump.c146
-rw-r--r--src/gallium/drivers/vc4/vc4_context.c11
-rw-r--r--src/gallium/drivers/vc4/vc4_context.h40
-rw-r--r--src/gallium/drivers/vc4/vc4_draw.c227
-rw-r--r--src/gallium/drivers/vc4/vc4_drm.h25
-rw-r--r--src/gallium/drivers/vc4/vc4_emit.c65
-rw-r--r--src/gallium/drivers/vc4/vc4_fence.c13
-rw-r--r--src/gallium/drivers/vc4/vc4_formats.c2
-rw-r--r--src/gallium/drivers/vc4/vc4_job.c16
-rw-r--r--src/gallium/drivers/vc4/vc4_nir_lower_blend.c431
-rw-r--r--src/gallium/drivers/vc4/vc4_nir_lower_io.c291
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_copy_propagation.c5
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_cse.c19
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c993
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c18
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h95
-rw-r--r--src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c6
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu.h30
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_disasm.c2
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c59
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_validate.c7
-rw-r--r--src/gallium/drivers/vc4/vc4_register_allocate.c89
-rw-r--r--src/gallium/drivers/vc4/vc4_resource.c8
-rw-r--r--src/gallium/drivers/vc4/vc4_resource.h6
-rw-r--r--src/gallium/drivers/vc4/vc4_screen.c10
-rw-r--r--src/gallium/drivers/vc4/vc4_simulator.c16
-rw-r--r--src/gallium/drivers/vc4/vc4_simulator_validate.h1
-rw-r--r--src/gallium/drivers/vc4/vc4_state.c92
-rw-r--r--src/gallium/drivers/vc4/vc4_tiling.c11
-rw-r--r--src/gallium/drivers/vc4/vc4_tiling.h6
-rw-r--r--src/gallium/drivers/vc4/vc4_uniforms.c344
43 files changed, 2324 insertions, 1528 deletions
diff --git a/src/gallium/drivers/vc4/Makefile.am b/src/gallium/drivers/vc4/Makefile.am
index 3f62ce21a9f..f4a57ba3404 100644
--- a/src/gallium/drivers/vc4/Makefile.am
+++ b/src/gallium/drivers/vc4/Makefile.am
@@ -19,8 +19,6 @@
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
-AUTOMAKE_OPTIONS = subdir-objects
-
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
@@ -30,10 +28,10 @@ SIM_LDFLAGS = -lsimpenrose
endif
AM_CFLAGS = \
+ -I$(top_builddir)/src/glsl/nir \
$(LIBDRM_CFLAGS) \
$(GALLIUM_DRIVER_CFLAGS) \
$(SIM_CFLAGS) \
- -I$(top_srcdir)/src/mesa/ \
$()
noinst_LTLIBRARIES = libvc4.la
diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources
index 1eb029e67e7..6fb40c20562 100644
--- a/src/gallium/drivers/vc4/Makefile.sources
+++ b/src/gallium/drivers/vc4/Makefile.sources
@@ -19,6 +19,8 @@ C_SOURCES := \
vc4_fence.c \
vc4_formats.c \
vc4_job.c \
+ vc4_nir_lower_blend.c \
+ vc4_nir_lower_io.c \
vc4_opt_algebraic.c \
vc4_opt_constant_folding.c \
vc4_opt_copy_propagation.c \
@@ -49,4 +51,5 @@ C_SOURCES := \
vc4_state.c \
vc4_tiling.c \
vc4_tiling.h \
+ vc4_uniforms.c \
$()
diff --git a/src/gallium/drivers/vc4/kernel/vc4_drv.h b/src/gallium/drivers/vc4/kernel/vc4_drv.h
index 1fd8aa9fb28..ffc973735ae 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_drv.h
+++ b/src/gallium/drivers/vc4/kernel/vc4_drv.h
@@ -26,17 +26,6 @@
#include "vc4_simulator_validate.h"
-enum vc4_bo_mode {
- VC4_MODE_UNDECIDED,
- VC4_MODE_RENDER,
- VC4_MODE_SHADER,
-};
-
-struct vc4_bo_exec_state {
- struct drm_gem_cma_object *bo;
- enum vc4_bo_mode mode;
-};
-
struct vc4_exec_info {
/* Sequence number for this bin/render job. */
uint64_t seqno;
@@ -47,7 +36,7 @@ struct vc4_exec_info {
/* This is the array of BOs that were looked up at the start of exec.
* Command validation will use indices into this array.
*/
- struct vc4_bo_exec_state *bo;
+ struct drm_gem_cma_object **bo;
uint32_t bo_count;
/* List of other BOs used in the job that need to be released
@@ -72,7 +61,6 @@ struct vc4_exec_info {
* command lists.
*/
struct vc4_shader_state {
- uint8_t packet;
uint32_t addr;
/* Maximum vertex index referenced by any primitive using this
* shader state.
@@ -88,6 +76,7 @@ struct vc4_exec_info {
bool found_tile_binning_mode_config_packet;
bool found_start_tile_binning_packet;
bool found_increment_semaphore_packet;
+ bool found_flush;
uint8_t bin_tiles_x, bin_tiles_y;
struct drm_gem_cma_object *tile_bo;
uint32_t tile_alloc_offset;
@@ -99,6 +88,9 @@ struct vc4_exec_info {
uint32_t ct0ca, ct0ea;
uint32_t ct1ca, ct1ea;
+ /* Pointer to the unvalidated bin CL (if present). */
+ void *bin_u;
+
/* Pointers to the shader recs. These paddr gets incremented as CL
* packets are relocated in validate_gl_shader_state, and the vaddrs
* (u and v) get incremented and size decremented as the shader recs
@@ -168,10 +160,8 @@ vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec);
struct vc4_validated_shader_info *
vc4_validate_shader(struct drm_gem_cma_object *shader_obj);
-bool vc4_use_bo(struct vc4_exec_info *exec,
- uint32_t hindex,
- enum vc4_bo_mode mode,
- struct drm_gem_cma_object **obj);
+struct drm_gem_cma_object *vc4_use_bo(struct vc4_exec_info *exec,
+ uint32_t hindex);
int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec);
diff --git a/src/gallium/drivers/vc4/kernel/vc4_gem.c b/src/gallium/drivers/vc4/kernel/vc4_gem.c
index e4b7fea5968..93f9ec7ed9b 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_gem.c
+++ b/src/gallium/drivers/vc4/kernel/vc4_gem.c
@@ -112,6 +112,8 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
exec->ct0ca = exec->exec_bo->paddr + bin_offset;
+ exec->bin_u = bin;
+
exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset;
exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset;
exec->shader_rec_size = args->shader_rec_size;
diff --git a/src/gallium/drivers/vc4/kernel/vc4_packet.h b/src/gallium/drivers/vc4/kernel/vc4_packet.h
index 88cfc0fa9f0..771e2b78761 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_packet.h
+++ b/src/gallium/drivers/vc4/kernel/vc4_packet.h
@@ -88,16 +88,22 @@ enum vc4_packet {
#define VC4_PACKET_START_TILE_BINNING_SIZE 1
#define VC4_PACKET_INCREMENT_SEMAPHORE_SIZE 1
#define VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE 1
+#define VC4_PACKET_BRANCH_SIZE 5
#define VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE 5
#define VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE 1
#define VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF_SIZE 1
+#define VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE 5
+#define VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE 5
#define VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE 7
#define VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE 7
#define VC4_PACKET_GL_INDEXED_PRIMITIVE_SIZE 14
#define VC4_PACKET_GL_ARRAY_PRIMITIVE_SIZE 10
+#define VC4_PACKET_COMPRESSED_PRIMITIVE_SIZE 1
+#define VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE_SIZE 1
#define VC4_PACKET_PRIMITIVE_LIST_FORMAT_SIZE 2
#define VC4_PACKET_GL_SHADER_STATE_SIZE 5
#define VC4_PACKET_NV_SHADER_STATE_SIZE 5
+#define VC4_PACKET_VG_SHADER_STATE_SIZE 5
#define VC4_PACKET_CONFIGURATION_BITS_SIZE 4
#define VC4_PACKET_FLAT_SHADE_FLAGS_SIZE 5
#define VC4_PACKET_POINT_SIZE_SIZE 5
@@ -106,6 +112,7 @@ enum vc4_packet {
#define VC4_PACKET_DEPTH_OFFSET_SIZE 5
#define VC4_PACKET_CLIP_WINDOW_SIZE 9
#define VC4_PACKET_VIEWPORT_OFFSET_SIZE 5
+#define VC4_PACKET_Z_CLIPPING_SIZE 9
#define VC4_PACKET_CLIPPER_XY_SCALING_SIZE 9
#define VC4_PACKET_CLIPPER_Z_SCALING_SIZE 9
#define VC4_PACKET_TILE_BINNING_MODE_CONFIG_SIZE 16
@@ -136,6 +143,16 @@ enum vc4_packet {
/** @{
*
+ * low bits of VC4_PACKET_STORE_FULL_RES_TILE_BUFFER and
+ * VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER.
+ */
+#define VC4_LOADSTORE_FULL_RES_EOF (1 << 3)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL (1 << 2)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_ZS (1 << 1)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR (1 << 0)
+
+/** @{
+ *
* byte 2 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
* VC4_PACKET_LOAD_TILE_BUFFER_GENERAL (low bits of the address)
*/
diff --git a/src/gallium/drivers/vc4/kernel/vc4_render_cl.c b/src/gallium/drivers/vc4/kernel/vc4_render_cl.c
index e2d907ad91f..b827eb7e9e1 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_render_cl.c
+++ b/src/gallium/drivers/vc4/kernel/vc4_render_cl.c
@@ -100,7 +100,8 @@ static void emit_tile(struct vc4_exec_info *exec,
struct vc4_rcl_setup *setup,
uint8_t x, uint8_t y, bool first, bool last)
{
- bool has_bin = exec->args->bin_cl_size != 0;
+ struct drm_vc4_submit_cl *args = exec->args;
+ bool has_bin = args->bin_cl_size != 0;
/* Note that the load doesn't actually occur until the
* tile coords packet is processed, and only one load
@@ -108,10 +109,9 @@ static void emit_tile(struct vc4_exec_info *exec,
*/
if (setup->color_read) {
rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
- rcl_u16(setup, exec->args->color_read.bits);
+ rcl_u16(setup, args->color_read.bits);
rcl_u32(setup,
- setup->color_read->paddr +
- exec->args->color_read.offset);
+ setup->color_read->paddr + args->color_read.offset);
}
if (setup->zs_read) {
@@ -122,9 +122,8 @@ static void emit_tile(struct vc4_exec_info *exec,
}
rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
- rcl_u16(setup, exec->args->zs_read.bits);
- rcl_u32(setup,
- setup->zs_read->paddr + exec->args->zs_read.offset);
+ rcl_u16(setup, args->zs_read.bits);
+ rcl_u32(setup, setup->zs_read->paddr + args->zs_read.offset);
}
/* Clipping depends on tile coordinates having been
@@ -147,11 +146,11 @@ static void emit_tile(struct vc4_exec_info *exec,
if (setup->zs_write) {
rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
- rcl_u16(setup, exec->args->zs_write.bits |
+ rcl_u16(setup, args->zs_write.bits |
(setup->color_ms_write ?
VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR : 0));
rcl_u32(setup,
- (setup->zs_write->paddr + exec->args->zs_write.offset) |
+ (setup->zs_write->paddr + args->zs_write.offset) |
((last && !setup->color_ms_write) ?
VC4_LOADSTORE_TILE_BUFFER_EOF : 0));
}
@@ -172,11 +171,12 @@ static void emit_tile(struct vc4_exec_info *exec,
static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
struct vc4_rcl_setup *setup)
{
- bool has_bin = exec->args->bin_cl_size != 0;
- uint8_t min_x_tile = exec->args->min_x_tile;
- uint8_t min_y_tile = exec->args->min_y_tile;
- uint8_t max_x_tile = exec->args->max_x_tile;
- uint8_t max_y_tile = exec->args->max_y_tile;
+ struct drm_vc4_submit_cl *args = exec->args;
+ bool has_bin = args->bin_cl_size != 0;
+ uint8_t min_x_tile = args->min_x_tile;
+ uint8_t min_y_tile = args->min_y_tile;
+ uint8_t max_x_tile = args->max_x_tile;
+ uint8_t max_y_tile = args->max_y_tile;
uint8_t xtiles = max_x_tile - min_x_tile + 1;
uint8_t ytiles = max_y_tile - min_y_tile + 1;
uint8_t x, y;
@@ -185,7 +185,7 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE;
loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE;
- if (exec->args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
+ if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
size += VC4_PACKET_CLEAR_COLORS_SIZE +
VC4_PACKET_TILE_COORDINATES_SIZE +
VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
@@ -208,7 +208,7 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
}
if (setup->zs_write)
- loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE;
+ loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
if (setup->color_ms_write) {
if (setup->zs_write)
loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE;
@@ -226,23 +226,23 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
rcl_u32(setup,
(setup->color_ms_write ?
(setup->color_ms_write->paddr +
- exec->args->color_ms_write.offset) :
+ args->color_ms_write.offset) :
0));
- rcl_u16(setup, exec->args->width);
- rcl_u16(setup, exec->args->height);
- rcl_u16(setup, exec->args->color_ms_write.bits);
+ rcl_u16(setup, args->width);
+ rcl_u16(setup, args->height);
+ rcl_u16(setup, args->color_ms_write.bits);
/* The tile buffer gets cleared when the previous tile is stored. If
* the clear values changed between frames, then the tile buffer has
* stale clear values in it, so we have to do a store in None mode (no
* writes) so that we trigger the tile buffer clear.
*/
- if (exec->args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
+ if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
rcl_u8(setup, VC4_PACKET_CLEAR_COLORS);
- rcl_u32(setup, exec->args->clear_color[0]);
- rcl_u32(setup, exec->args->clear_color[1]);
- rcl_u32(setup, exec->args->clear_z);
- rcl_u8(setup, exec->args->clear_s);
+ rcl_u32(setup, args->clear_color[0]);
+ rcl_u32(setup, args->clear_color[1]);
+ rcl_u32(setup, args->clear_z);
+ rcl_u8(setup, args->clear_s);
vc4_tile_coordinates(setup, 0, 0);
@@ -286,7 +286,8 @@ static int vc4_rcl_surface_setup(struct vc4_exec_info *exec,
if (surf->hindex == ~0)
return 0;
- if (!vc4_use_bo(exec, surf->hindex, VC4_MODE_RENDER, obj))
+ *obj = vc4_use_bo(exec, surf->hindex);
+ if (!*obj)
return -EINVAL;
if (surf->bits & ~(VC4_LOADSTORE_TILE_BUFFER_TILING_MASK |
@@ -365,7 +366,8 @@ vc4_rcl_ms_surface_setup(struct vc4_exec_info *exec,
if (surf->hindex == ~0)
return 0;
- if (!vc4_use_bo(exec, surf->hindex, VC4_MODE_RENDER, obj))
+ *obj = vc4_use_bo(exec, surf->hindex);
+ if (!*obj)
return -EINVAL;
if (tiling > VC4_TILING_FORMAT_LT) {
diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate.c b/src/gallium/drivers/vc4/kernel/vc4_validate.c
index a0b67a7e50b..b248831113c 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_validate.c
+++ b/src/gallium/drivers/vc4/kernel/vc4_validate.c
@@ -94,42 +94,42 @@ size_is_lt(uint32_t width, uint32_t height, int cpp)
height <= 4 * utile_height(cpp));
}
-bool
-vc4_use_bo(struct vc4_exec_info *exec,
- uint32_t hindex,
- enum vc4_bo_mode mode,
- struct drm_gem_cma_object **obj)
+struct drm_gem_cma_object *
+vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex)
{
- *obj = NULL;
+ struct drm_gem_cma_object *obj;
+ struct drm_vc4_bo *bo;
if (hindex >= exec->bo_count) {
DRM_ERROR("BO index %d greater than BO count %d\n",
hindex, exec->bo_count);
- return false;
+ return NULL;
}
+ obj = exec->bo[hindex];
+ bo = to_vc4_bo(&obj->base);
- if (exec->bo[hindex].mode != mode) {
- if (exec->bo[hindex].mode == VC4_MODE_UNDECIDED) {
- exec->bo[hindex].mode = mode;
- } else {
- DRM_ERROR("BO index %d reused with mode %d vs %d\n",
- hindex, exec->bo[hindex].mode, mode);
- return false;
- }
+ if (bo->validated_shader) {
+ DRM_ERROR("Trying to use shader BO as something other than "
+ "a shader\n");
+ return NULL;
}
- *obj = exec->bo[hindex].bo;
- return true;
+ return obj;
+}
+
+static struct drm_gem_cma_object *
+vc4_use_handle(struct vc4_exec_info *exec, uint32_t gem_handles_packet_index)
+{
+ return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index]);
}
static bool
-vc4_use_handle(struct vc4_exec_info *exec,
- uint32_t gem_handles_packet_index,
- enum vc4_bo_mode mode,
- struct drm_gem_cma_object **obj)
+validate_bin_pos(struct vc4_exec_info *exec, void *untrusted, uint32_t pos)
{
- return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index],
- mode, obj);
+ /* Note that the untrusted pointer passed to these functions is
+ * incremented past the packet byte.
+ */
+ return (untrusted - 1 == exec->bin_u + pos);
}
static uint32_t
@@ -201,14 +201,15 @@ vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
return true;
}
+
static int
-validate_flush_all(VALIDATE_ARGS)
+validate_flush(VALIDATE_ARGS)
{
- if (exec->found_increment_semaphore_packet) {
- DRM_ERROR("VC4_PACKET_FLUSH_ALL after "
- "VC4_PACKET_INCREMENT_SEMAPHORE\n");
+ if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 1)) {
+ DRM_ERROR("Bin CL must end with VC4_PACKET_FLUSH\n");
return -EINVAL;
}
+ exec->found_flush = true;
return 0;
}
@@ -233,17 +234,13 @@ validate_start_tile_binning(VALIDATE_ARGS)
static int
validate_increment_semaphore(VALIDATE_ARGS)
{
- if (exec->found_increment_semaphore_packet) {
- DRM_ERROR("Duplicate VC4_PACKET_INCREMENT_SEMAPHORE\n");
+ if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 2)) {
+ DRM_ERROR("Bin CL must end with "
+ "VC4_PACKET_INCREMENT_SEMAPHORE\n");
return -EINVAL;
}
exec->found_increment_semaphore_packet = true;
- /* Once we've found the semaphore increment, there should be one FLUSH
- * then the end of the command list. The FLUSH actually triggers the
- * increment, so we only need to make sure there
- */
-
return 0;
}
@@ -257,11 +254,6 @@ validate_indexed_prim_list(VALIDATE_ARGS)
uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1;
struct vc4_shader_state *shader_state;
- if (exec->found_increment_semaphore_packet) {
- DRM_ERROR("Drawing after VC4_PACKET_INCREMENT_SEMAPHORE\n");
- return -EINVAL;
- }
-
/* Check overflow condition */
if (exec->shader_state_count == 0) {
DRM_ERROR("shader state must precede primitives\n");
@@ -272,7 +264,8 @@ validate_indexed_prim_list(VALIDATE_ARGS)
if (max_index > shader_state->max_index)
shader_state->max_index = max_index;
- if (!vc4_use_handle(exec, 0, VC4_MODE_RENDER, &ib))
+ ib = vc4_use_handle(exec, 0);
+ if (!ib)
return -EINVAL;
if (offset > ib->base.size ||
@@ -295,11 +288,6 @@ validate_gl_array_primitive(VALIDATE_ARGS)
uint32_t max_index;
struct vc4_shader_state *shader_state;
- if (exec->found_increment_semaphore_packet) {
- DRM_ERROR("Drawing after VC4_PACKET_INCREMENT_SEMAPHORE\n");
- return -EINVAL;
- }
-
/* Check overflow condition */
if (exec->shader_state_count == 0) {
DRM_ERROR("shader state must precede primitives\n");
@@ -329,7 +317,6 @@ validate_gl_shader_state(VALIDATE_ARGS)
return -EINVAL;
}
- exec->shader_state[i].packet = VC4_PACKET_GL_SHADER_STATE;
exec->shader_state[i].addr = *(uint32_t *)untrusted;
exec->shader_state[i].max_index = 0;
@@ -348,31 +335,6 @@ validate_gl_shader_state(VALIDATE_ARGS)
}
static int
-validate_nv_shader_state(VALIDATE_ARGS)
-{
- uint32_t i = exec->shader_state_count++;
-
- if (i >= exec->shader_state_size) {
- DRM_ERROR("More requests for shader states than declared\n");
- return -EINVAL;
- }
-
- exec->shader_state[i].packet = VC4_PACKET_NV_SHADER_STATE;
- exec->shader_state[i].addr = *(uint32_t *)untrusted;
-
- if (exec->shader_state[i].addr & 15) {
- DRM_ERROR("NV shader state address 0x%08x misaligned\n",
- exec->shader_state[i].addr);
- return -EINVAL;
- }
-
- *(uint32_t *)validated = (exec->shader_state[i].addr +
- exec->shader_rec_p);
-
- return 0;
-}
-
-static int
validate_tile_binning_config(VALIDATE_ARGS)
{
struct drm_device *dev = exec->exec_bo->base.dev;
@@ -473,8 +435,8 @@ static const struct cmd_info {
} cmd_info[] = {
VC4_DEFINE_PACKET(VC4_PACKET_HALT, "halt", NULL),
VC4_DEFINE_PACKET(VC4_PACKET_NOP, "nop", NULL),
- VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, "flush", NULL),
- VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, "flush all state", validate_flush_all),
+ VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, "flush", validate_flush),
+ VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, "flush all state", NULL),
VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING, "start tile binning", validate_start_tile_binning),
VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE, "increment semaphore", validate_increment_semaphore),
@@ -488,7 +450,7 @@ static const struct cmd_info {
VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, "primitive list format", NULL),
VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, "GL Shader State", validate_gl_shader_state),
- VC4_DEFINE_PACKET(VC4_PACKET_NV_SHADER_STATE, "NV Shader State", validate_nv_shader_state),
+ /* We don't support validating NV shader states. */
VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, "configuration bits", NULL),
VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, "flat shade flags", NULL),
@@ -525,7 +487,7 @@ vc4_validate_bin_cl(struct drm_device *dev,
u8 cmd = *(uint8_t *)src_pkt;
const struct cmd_info *info;
- if (cmd > ARRAY_SIZE(cmd_info)) {
+ if (cmd >= ARRAY_SIZE(cmd_info)) {
DRM_ERROR("0x%08x: packet %d out of bounds\n",
src_offset, cmd);
return -EINVAL;
@@ -580,8 +542,16 @@ vc4_validate_bin_cl(struct drm_device *dev,
return -EINVAL;
}
- if (!exec->found_increment_semaphore_packet) {
- DRM_ERROR("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE\n");
+ /* The bin CL must be ended with INCREMENT_SEMAPHORE and FLUSH. The
+ * semaphore is used to trigger the render CL to start up, and the
+ * FLUSH is what caps the bin lists with
+ * VC4_PACKET_RETURN_FROM_SUB_LIST (so they jump back to the main
+ * render CL when they get called to) and actually triggers the queued
+ * semaphore increment.
+ */
+ if (!exec->found_increment_semaphore_packet || !exec->found_flush) {
+ DRM_ERROR("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE + "
+ "VC4_PACKET_FLUSH\n");
return -EINVAL;
}
@@ -612,18 +582,19 @@ reloc_tex(struct vc4_exec_info *exec,
uint32_t cube_map_stride = 0;
enum vc4_texture_data_type type;
- if (!vc4_use_bo(exec, texture_handle_index, VC4_MODE_RENDER, &tex))
+ tex = vc4_use_bo(exec, texture_handle_index);
+ if (!tex)
return false;
if (sample->is_direct) {
uint32_t remaining_size = tex->base.size - p0;
if (p0 > tex->base.size - 4) {
DRM_ERROR("UBO offset greater than UBO size\n");
- return false;
+ goto fail;
}
if (p1 > remaining_size - 4) {
DRM_ERROR("UBO clamp would allow reads outside of UBO\n");
- return false;
+ goto fail;
}
*validated_p0 = tex->paddr + p0;
return true;
@@ -642,14 +613,14 @@ reloc_tex(struct vc4_exec_info *exec,
VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) {
if (cube_map_stride) {
DRM_ERROR("Cube map stride set twice\n");
- return false;
+ goto fail;
}
cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK;
}
if (!cube_map_stride) {
DRM_ERROR("Cube map stride not set\n");
- return false;
+ goto fail;
}
}
@@ -683,7 +654,7 @@ reloc_tex(struct vc4_exec_info *exec,
case VC4_TEXTURE_TYPE_YUV422R:
default:
DRM_ERROR("Texture format %d unsupported\n", type);
- return false;
+ goto fail;
}
utile_w = utile_width(cpp);
utile_h = utile_height(cpp);
@@ -699,7 +670,7 @@ reloc_tex(struct vc4_exec_info *exec,
if (!vc4_check_tex_size(exec, tex, offset + cube_map_stride * 5,
tiling_format, width, height, cpp)) {
- return false;
+ goto fail;
}
/* The mipmap levels are stored before the base of the texture. Make
@@ -740,7 +711,7 @@ reloc_tex(struct vc4_exec_info *exec,
i, level_width, level_height,
aligned_width, aligned_height,
level_size, offset);
- return false;
+ goto fail;
}
offset -= level_size;
@@ -749,54 +720,37 @@ reloc_tex(struct vc4_exec_info *exec,
*validated_p0 = tex->paddr + p0;
return true;
+ fail:
+ DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0);
+ DRM_INFO("Texture p1 at %d: 0x%08x\n", sample->p_offset[1], p1);
+ DRM_INFO("Texture p2 at %d: 0x%08x\n", sample->p_offset[2], p2);
+ DRM_INFO("Texture p3 at %d: 0x%08x\n", sample->p_offset[3], p3);
+ return false;
}
static int
-validate_shader_rec(struct drm_device *dev,
- struct vc4_exec_info *exec,
- struct vc4_shader_state *state)
+validate_gl_shader_rec(struct drm_device *dev,
+ struct vc4_exec_info *exec,
+ struct vc4_shader_state *state)
{
uint32_t *src_handles;
void *pkt_u, *pkt_v;
- enum shader_rec_reloc_type {
- RELOC_CODE,
- RELOC_VBO,
- };
- struct shader_rec_reloc {
- enum shader_rec_reloc_type type;
- uint32_t offset;
- };
- static const struct shader_rec_reloc gl_relocs[] = {
- { RELOC_CODE, 4 }, /* fs */
- { RELOC_CODE, 16 }, /* vs */
- { RELOC_CODE, 28 }, /* cs */
+ static const uint32_t shader_reloc_offsets[] = {
+ 4, /* fs */
+ 16, /* vs */
+ 28, /* cs */
};
- static const struct shader_rec_reloc nv_relocs[] = {
- { RELOC_CODE, 4 }, /* fs */
- { RELOC_VBO, 12 }
- };
- const struct shader_rec_reloc *relocs;
- struct drm_gem_cma_object *bo[ARRAY_SIZE(gl_relocs) + 8];
- uint32_t nr_attributes = 0, nr_fixed_relocs, nr_relocs, packet_size;
+ uint32_t shader_reloc_count = ARRAY_SIZE(shader_reloc_offsets);
+ struct drm_gem_cma_object *bo[shader_reloc_count + 8];
+ uint32_t nr_attributes, nr_relocs, packet_size;
int i;
- struct vc4_validated_shader_info *validated_shader = NULL;
-
- if (state->packet == VC4_PACKET_NV_SHADER_STATE) {
- relocs = nv_relocs;
- nr_fixed_relocs = ARRAY_SIZE(nv_relocs);
- packet_size = 16;
- } else {
- relocs = gl_relocs;
- nr_fixed_relocs = ARRAY_SIZE(gl_relocs);
-
- nr_attributes = state->addr & 0x7;
- if (nr_attributes == 0)
- nr_attributes = 8;
- packet_size = gl_shader_rec_size(state->addr);
- }
- nr_relocs = nr_fixed_relocs + nr_attributes;
+ nr_attributes = state->addr & 0x7;
+ if (nr_attributes == 0)
+ nr_attributes = 8;
+ packet_size = gl_shader_rec_size(state->addr);
+ nr_relocs = ARRAY_SIZE(shader_reloc_offsets) + nr_attributes;
if (nr_relocs * 4 > exec->shader_rec_size) {
DRM_ERROR("overflowed shader recs reading %d handles "
"from %d bytes left\n",
@@ -826,21 +780,30 @@ validate_shader_rec(struct drm_device *dev,
exec->shader_rec_v += roundup(packet_size, 16);
exec->shader_rec_size -= packet_size;
- for (i = 0; i < nr_relocs; i++) {
- enum vc4_bo_mode mode;
-
- if (i < nr_fixed_relocs && relocs[i].type == RELOC_CODE)
- mode = VC4_MODE_SHADER;
- else
- mode = VC4_MODE_RENDER;
+ if (!(*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD)) {
+ DRM_ERROR("Multi-threaded fragment shaders not supported.\n");
+ return -EINVAL;
+ }
- if (!vc4_use_bo(exec, src_handles[i], mode, &bo[i])) {
- return false;
+ for (i = 0; i < shader_reloc_count; i++) {
+ if (src_handles[i] > exec->bo_count) {
+ DRM_ERROR("Shader handle %d too big\n", src_handles[i]);
+ return -EINVAL;
}
+
+ bo[i] = exec->bo[src_handles[i]];
+ if (!bo[i])
+ return -EINVAL;
+ }
+ for (i = shader_reloc_count; i < nr_relocs; i++) {
+ bo[i] = vc4_use_bo(exec, src_handles[i]);
+ if (!bo[i])
+ return -EINVAL;
}
- for (i = 0; i < nr_fixed_relocs; i++) {
- uint32_t o = relocs[i].offset;
+ for (i = 0; i < shader_reloc_count; i++) {
+ struct vc4_validated_shader_info *validated_shader;
+ uint32_t o = shader_reloc_offsets[i];
uint32_t src_offset = *(uint32_t *)(pkt_u + o);
uint32_t *texture_handles_u;
void *uniform_data_u;
@@ -848,58 +811,50 @@ validate_shader_rec(struct drm_device *dev,
*(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset;
- switch (relocs[i].type) {
- case RELOC_CODE:
- if (src_offset != 0) {
- DRM_ERROR("Shaders must be at offset 0 of "
- "the BO.\n");
- goto fail;
- }
+ if (src_offset != 0) {
+ DRM_ERROR("Shaders must be at offset 0 of "
+ "the BO.\n");
+ return -EINVAL;
+ }
- kfree(validated_shader);
- validated_shader = vc4_validate_shader(bo[i]);
- if (!validated_shader)
- goto fail;
+ validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader;
+ if (!validated_shader)
+ return -EINVAL;
- if (validated_shader->uniforms_src_size >
- exec->uniforms_size) {
- DRM_ERROR("Uniforms src buffer overflow\n");
- goto fail;
- }
+ if (validated_shader->uniforms_src_size >
+ exec->uniforms_size) {
+ DRM_ERROR("Uniforms src buffer overflow\n");
+ return -EINVAL;
+ }
- texture_handles_u = exec->uniforms_u;
- uniform_data_u = (texture_handles_u +
- validated_shader->num_texture_samples);
-
- memcpy(exec->uniforms_v, uniform_data_u,
- validated_shader->uniforms_size);
-
- for (tex = 0;
- tex < validated_shader->num_texture_samples;
- tex++) {
- if (!reloc_tex(exec,
- uniform_data_u,
- &validated_shader->texture_samples[tex],
- texture_handles_u[tex])) {
- goto fail;
- }
- }
+ texture_handles_u = exec->uniforms_u;
+ uniform_data_u = (texture_handles_u +
+ validated_shader->num_texture_samples);
- *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
+ memcpy(exec->uniforms_v, uniform_data_u,
+ validated_shader->uniforms_size);
- exec->uniforms_u += validated_shader->uniforms_src_size;
- exec->uniforms_v += validated_shader->uniforms_size;
- exec->uniforms_p += validated_shader->uniforms_size;
+ for (tex = 0;
+ tex < validated_shader->num_texture_samples;
+ tex++) {
+ if (!reloc_tex(exec,
+ uniform_data_u,
+ &validated_shader->texture_samples[tex],
+ texture_handles_u[tex])) {
+ return -EINVAL;
+ }
+ }
- break;
+ *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
- case RELOC_VBO:
- break;
- }
+ exec->uniforms_u += validated_shader->uniforms_src_size;
+ exec->uniforms_v += validated_shader->uniforms_size;
+ exec->uniforms_p += validated_shader->uniforms_size;
}
for (i = 0; i < nr_attributes; i++) {
- struct drm_gem_cma_object *vbo = bo[nr_fixed_relocs + i];
+ struct drm_gem_cma_object *vbo =
+ bo[ARRAY_SIZE(shader_reloc_offsets) + i];
uint32_t o = 36 + i * 8;
uint32_t offset = *(uint32_t *)(pkt_u + o + 0);
uint32_t attr_size = *(uint8_t *)(pkt_u + o + 4) + 1;
@@ -929,13 +884,7 @@ validate_shader_rec(struct drm_device *dev,
*(uint32_t *)(pkt_v + o) = vbo->paddr + offset;
}
- kfree(validated_shader);
-
return 0;
-
-fail:
- kfree(validated_shader);
- return -EINVAL;
}
int
@@ -946,7 +895,7 @@ vc4_validate_shader_recs(struct drm_device *dev,
int ret = 0;
for (i = 0; i < exec->shader_state_count; i++) {
- ret = validate_shader_rec(dev, exec, &exec->shader_state[i]);
+ ret = validate_gl_shader_rec(dev, exec, &exec->shader_state[i]);
if (ret)
return ret;
}
diff --git a/src/gallium/drivers/vc4/vc4_blit.c b/src/gallium/drivers/vc4/vc4_blit.c
index d29e2c9c318..e52a1941730 100644
--- a/src/gallium/drivers/vc4/vc4_blit.c
+++ b/src/gallium/drivers/vc4/vc4_blit.c
@@ -94,7 +94,7 @@ vc4_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info)
struct vc4_context *vc4 = vc4_context(ctx);
if (!util_blitter_is_blit_supported(vc4->blitter, info)) {
- fprintf(stderr, "blit unsupported %s -> %s",
+ fprintf(stderr, "blit unsupported %s -> %s\n",
util_format_short_name(info->src.resource->format),
util_format_short_name(info->dst.resource->format));
return false;
@@ -135,7 +135,7 @@ vc4_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
info.dst.resource->nr_samples <= 1 &&
!util_format_is_depth_or_stencil(info.src.resource->format) &&
!util_format_is_pure_integer(info.src.resource->format)) {
- fprintf(stderr, "color resolve unimplemented");
+ fprintf(stderr, "color resolve unimplemented\n");
return;
}
@@ -147,7 +147,7 @@ vc4_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
}
if (info.mask & PIPE_MASK_S) {
- fprintf(stderr, "cannot blit stencil, skipping");
+ fprintf(stderr, "cannot blit stencil, skipping\n");
info.mask &= ~PIPE_MASK_S;
}
diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c b/src/gallium/drivers/vc4/vc4_bufmgr.c
index cbdb9e89cf6..f7b41f5816d 100644
--- a/src/gallium/drivers/vc4/vc4_bufmgr.c
+++ b/src/gallium/drivers/vc4/vc4_bufmgr.c
@@ -1,5 +1,5 @@
/*
- * Copyright © 2014 Broadcom
+ * Copyright © 2014-2015 Broadcom
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -94,7 +94,7 @@ vc4_bo_from_cache(struct vc4_screen *screen, uint32_t size, const char *name)
* allocate something new instead, since we assume that the
* user will proceed to CPU map it and fill it with stuff.
*/
- if (!vc4_bo_wait(bo, 0)) {
+ if (!vc4_bo_wait(bo, 0, NULL)) {
pipe_mutex_unlock(cache->lock);
return NULL;
}
@@ -381,15 +381,57 @@ vc4_bo_get_dmabuf(struct vc4_bo *bo)
}
struct vc4_bo *
-vc4_bo_alloc_mem(struct vc4_screen *screen, const void *data, uint32_t size,
- const char *name)
+vc4_bo_alloc_shader(struct vc4_screen *screen, const void *data, uint32_t size)
{
- void *map;
struct vc4_bo *bo;
+ int ret;
+
+ bo = CALLOC_STRUCT(vc4_bo);
+ if (!bo)
+ return NULL;
+
+ pipe_reference_init(&bo->reference, 1);
+ bo->screen = screen;
+ bo->size = align(size, 4096);
+ bo->name = "code";
+ bo->private = false; /* Make sure it doesn't go back to the cache. */
+
+ if (!using_vc4_simulator) {
+ struct drm_vc4_create_shader_bo create = {
+ .size = size,
+ .data = (uintptr_t)data,
+ };
+
+ ret = drmIoctl(screen->fd, DRM_IOCTL_VC4_CREATE_SHADER_BO,
+ &create);
+ bo->handle = create.handle;
+ } else {
+ struct drm_mode_create_dumb create;
+ memset(&create, 0, sizeof(create));
+
+ create.width = 128;
+ create.bpp = 8;
+ create.height = (size + 127) / 128;
+
+ ret = drmIoctl(screen->fd, DRM_IOCTL_MODE_CREATE_DUMB, &create);
+ bo->handle = create.handle;
+ assert(create.size >= size);
+
+ vc4_bo_map(bo);
+ memcpy(bo->map, data, size);
+ }
+ if (ret != 0) {
+ fprintf(stderr, "create shader ioctl failure\n");
+ abort();
+ }
+
+ screen->bo_count++;
+ screen->bo_size += bo->size;
+ if (dump_stats) {
+ fprintf(stderr, "Allocated shader %dkb:\n", size / 1024);
+ vc4_bo_dump_stats(screen);
+ }
- bo = vc4_bo_alloc(screen, size, name);
- map = vc4_bo_map(bo);
- memcpy(map, data, size);
return bo;
}
@@ -413,63 +455,91 @@ vc4_bo_flink(struct vc4_bo *bo, uint32_t *name)
return true;
}
+static int vc4_wait_seqno_ioctl(int fd, uint64_t seqno, uint64_t timeout_ns)
+{
+ if (using_vc4_simulator)
+ return 0;
+
+ struct drm_vc4_wait_seqno wait = {
+ .seqno = seqno,
+ .timeout_ns = timeout_ns,
+ };
+ int ret = drmIoctl(fd, DRM_IOCTL_VC4_WAIT_SEQNO, &wait);
+ if (ret == -1)
+ return -errno;
+ else
+ return 0;
+
+}
+
bool
-vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns)
+vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns,
+ const char *reason)
{
if (screen->finished_seqno >= seqno)
return true;
- struct drm_vc4_wait_seqno wait;
- memset(&wait, 0, sizeof(wait));
- wait.seqno = seqno;
- wait.timeout_ns = timeout_ns;
-
- int ret;
- if (!using_vc4_simulator)
- ret = drmIoctl(screen->fd, DRM_IOCTL_VC4_WAIT_SEQNO, &wait);
- else {
- wait.seqno = screen->finished_seqno;
- ret = 0;
+ if (unlikely(vc4_debug & VC4_DEBUG_PERF) && timeout_ns && reason) {
+ if (vc4_wait_seqno_ioctl(screen->fd, seqno, 0) == -ETIME) {
+ fprintf(stderr, "Blocking on seqno %lld for %s\n",
+ (long long)seqno, reason);
+ }
}
- if (ret == 0) {
- screen->finished_seqno = wait.seqno;
- return true;
- }
+ int ret = vc4_wait_seqno_ioctl(screen->fd, seqno, timeout_ns);
+ if (ret) {
+ if (ret != -ETIME) {
+ fprintf(stderr, "wait failed: %d\n", ret);
+ abort();
+ }
- if (errno != ETIME) {
- fprintf(stderr, "wait failed: %d\n", ret);
- abort();
+ return false;
}
- return false;
+ screen->finished_seqno = seqno;
+ return true;
+}
+
+static int vc4_wait_bo_ioctl(int fd, uint32_t handle, uint64_t timeout_ns)
+{
+ if (using_vc4_simulator)
+ return 0;
+
+ struct drm_vc4_wait_bo wait = {
+ .handle = handle,
+ .timeout_ns = timeout_ns,
+ };
+ int ret = drmIoctl(fd, DRM_IOCTL_VC4_WAIT_BO, &wait);
+ if (ret == -1)
+ return -errno;
+ else
+ return 0;
+
}
bool
-vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns)
+vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns, const char *reason)
{
struct vc4_screen *screen = bo->screen;
- struct drm_vc4_wait_bo wait;
- memset(&wait, 0, sizeof(wait));
- wait.handle = bo->handle;
- wait.timeout_ns = timeout_ns;
-
- int ret;
- if (!using_vc4_simulator)
- ret = drmIoctl(screen->fd, DRM_IOCTL_VC4_WAIT_BO, &wait);
- else
- ret = 0;
+ if (unlikely(vc4_debug & VC4_DEBUG_PERF) && timeout_ns && reason) {
+ if (vc4_wait_bo_ioctl(screen->fd, bo->handle, 0) == -ETIME) {
+ fprintf(stderr, "Blocking on %s BO for %s\n",
+ bo->name, reason);
+ }
+ }
- if (ret == 0)
- return true;
+ int ret = vc4_wait_bo_ioctl(screen->fd, bo->handle, timeout_ns);
+ if (ret) {
+ if (ret != -ETIME) {
+ fprintf(stderr, "wait failed: %d\n", ret);
+ abort();
+ }
- if (errno != ETIME) {
- fprintf(stderr, "wait failed: %d\n", ret);
- abort();
+ return false;
}
- return false;
+ return true;
}
void *
@@ -515,7 +585,7 @@ vc4_bo_map(struct vc4_bo *bo)
{
void *map = vc4_bo_map_unsynchronized(bo);
- bool ok = vc4_bo_wait(bo, PIPE_TIMEOUT_INFINITE);
+ bool ok = vc4_bo_wait(bo, PIPE_TIMEOUT_INFINITE, "bo map");
if (!ok) {
fprintf(stderr, "BO wait for map failed\n");
abort();
diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.h b/src/gallium/drivers/vc4/vc4_bufmgr.h
index 7320695ca8e..b77506e242a 100644
--- a/src/gallium/drivers/vc4/vc4_bufmgr.h
+++ b/src/gallium/drivers/vc4/vc4_bufmgr.h
@@ -58,8 +58,8 @@ struct vc4_bo {
struct vc4_bo *vc4_bo_alloc(struct vc4_screen *screen, uint32_t size,
const char *name);
-struct vc4_bo *vc4_bo_alloc_mem(struct vc4_screen *screen, const void *data,
- uint32_t size, const char *name);
+struct vc4_bo *vc4_bo_alloc_shader(struct vc4_screen *screen, const void *data,
+ uint32_t size);
void vc4_bo_last_unreference(struct vc4_bo *bo);
void vc4_bo_last_unreference_locked_timed(struct vc4_bo *bo, time_t time);
struct vc4_bo *vc4_bo_open_name(struct vc4_screen *screen, uint32_t name,
@@ -113,10 +113,11 @@ void *
vc4_bo_map_unsynchronized(struct vc4_bo *bo);
bool
-vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns);
+vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns, const char *reason);
bool
-vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns);
+vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns,
+ const char *reason);
void
vc4_bufmgr_destroy(struct pipe_screen *pscreen);
diff --git a/src/gallium/drivers/vc4/vc4_cl.c b/src/gallium/drivers/vc4/vc4_cl.c
index 0700e885cbf..ced4f2dfa86 100644
--- a/src/gallium/drivers/vc4/vc4_cl.c
+++ b/src/gallium/drivers/vc4/vc4_cl.c
@@ -36,11 +36,12 @@ vc4_init_cl(struct vc4_context *vc4, struct vc4_cl *cl)
void
cl_ensure_space(struct vc4_cl *cl, uint32_t space)
{
- if ((cl->next - cl->base) + space <= cl->size)
+ uint32_t offset = cl_offset(cl);
+
+ if (offset + space <= cl->size)
return;
uint32_t size = MAX2(cl->size + space, cl->size * 2);
- uint32_t offset = cl->next -cl->base;
cl->base = reralloc(ralloc_parent(cl->base), cl->base, uint8_t, size);
cl->size = size;
@@ -60,15 +61,20 @@ vc4_gem_hindex(struct vc4_context *vc4, struct vc4_bo *bo)
uint32_t hindex;
uint32_t *current_handles = vc4->bo_handles.base;
- for (hindex = 0;
- hindex < (vc4->bo_handles.next - vc4->bo_handles.base) / 4;
- hindex++) {
+ for (hindex = 0; hindex < cl_offset(&vc4->bo_handles) / 4; hindex++) {
if (current_handles[hindex] == bo->handle)
return hindex;
}
- cl_u32(&vc4->bo_handles, bo->handle);
- cl_ptr(&vc4->bo_pointers, vc4_bo_reference(bo));
+ struct vc4_cl_out *out;
+
+ out = cl_start(&vc4->bo_handles);
+ cl_u32(&out, bo->handle);
+ cl_end(&vc4->bo_handles, out);
+
+ out = cl_start(&vc4->bo_pointers);
+ cl_ptr(&out, vc4_bo_reference(bo));
+ cl_end(&vc4->bo_pointers, out);
return hindex;
}
diff --git a/src/gallium/drivers/vc4/vc4_cl.h b/src/gallium/drivers/vc4/vc4_cl.h
index 4a50e790942..bf4be0efc29 100644
--- a/src/gallium/drivers/vc4/vc4_cl.h
+++ b/src/gallium/drivers/vc4/vc4_cl.h
@@ -33,12 +33,20 @@
struct vc4_bo;
+/**
+ * Undefined structure, used for typechecking that you're passing the pointers
+ * to these functions correctly.
+ */
+struct vc4_cl_out;
+
struct vc4_cl {
void *base;
- void *next;
+ struct vc4_cl_out *next;
+ struct vc4_cl_out *reloc_next;
uint32_t size;
- uint32_t reloc_next;
+#ifdef DEBUG
uint32_t reloc_count;
+#endif
};
void vc4_init_cl(struct vc4_context *vc4, struct vc4_cl *cl);
@@ -49,135 +57,149 @@ uint32_t vc4_gem_hindex(struct vc4_context *vc4, struct vc4_bo *bo);
struct PACKED unaligned_16 { uint16_t x; };
struct PACKED unaligned_32 { uint32_t x; };
-static inline void
-put_unaligned_32(void *ptr, uint32_t val)
+static inline uint32_t cl_offset(struct vc4_cl *cl)
{
- struct unaligned_32 *p = ptr;
- p->x = val;
+ return (char *)cl->next - (char *)cl->base;
}
static inline void
-put_unaligned_16(void *ptr, uint16_t val)
+cl_advance(struct vc4_cl_out **cl, uint32_t n)
{
- struct unaligned_16 *p = ptr;
- p->x = val;
+ (*cl) = (struct vc4_cl_out *)((char *)(*cl) + n);
}
-static inline void
-cl_u8(struct vc4_cl *cl, uint8_t n)
+static inline struct vc4_cl_out *
+cl_start(struct vc4_cl *cl)
{
- assert((cl->next - cl->base) + 1 <= cl->size);
-
- *(uint8_t *)cl->next = n;
- cl->next++;
+ return cl->next;
}
static inline void
-cl_u16(struct vc4_cl *cl, uint16_t n)
+cl_end(struct vc4_cl *cl, struct vc4_cl_out *next)
{
- assert((cl->next - cl->base) + 2 <= cl->size);
+ cl->next = next;
+ assert(cl_offset(cl) <= cl->size);
+}
- put_unaligned_16(cl->next, n);
- cl->next += 2;
+
+static inline void
+put_unaligned_32(struct vc4_cl_out *ptr, uint32_t val)
+{
+ struct unaligned_32 *p = (void *)ptr;
+ p->x = val;
}
static inline void
-cl_u32(struct vc4_cl *cl, uint32_t n)
+put_unaligned_16(struct vc4_cl_out *ptr, uint16_t val)
{
- assert((cl->next - cl->base) + 4 <= cl->size);
+ struct unaligned_16 *p = (void *)ptr;
+ p->x = val;
+}
- put_unaligned_32(cl->next, n);
- cl->next += 4;
+static inline void
+cl_u8(struct vc4_cl_out **cl, uint8_t n)
+{
+ *(uint8_t *)(*cl) = n;
+ cl_advance(cl, 1);
}
static inline void
-cl_aligned_u32(struct vc4_cl *cl, uint32_t n)
+cl_u16(struct vc4_cl_out **cl, uint16_t n)
{
- assert((cl->next - cl->base) + 4 <= cl->size);
+ put_unaligned_16(*cl, n);
+ cl_advance(cl, 2);
+}
- *(uint32_t *)cl->next = n;
- cl->next += 4;
+static inline void
+cl_u32(struct vc4_cl_out **cl, uint32_t n)
+{
+ put_unaligned_32(*cl, n);
+ cl_advance(cl, 4);
}
static inline void
-cl_ptr(struct vc4_cl *cl, void *ptr)
+cl_aligned_u32(struct vc4_cl_out **cl, uint32_t n)
{
- assert((cl->next - cl->base) + sizeof(void *) <= cl->size);
+ *(uint32_t *)(*cl) = n;
+ cl_advance(cl, 4);
+}
- *(void **)cl->next = ptr;
- cl->next += sizeof(void *);
+static inline void
+cl_ptr(struct vc4_cl_out **cl, void *ptr)
+{
+ *(struct vc4_cl_out **)(*cl) = ptr;
+ cl_advance(cl, sizeof(void *));
}
static inline void
-cl_f(struct vc4_cl *cl, float f)
+cl_f(struct vc4_cl_out **cl, float f)
{
cl_u32(cl, fui(f));
}
static inline void
-cl_aligned_f(struct vc4_cl *cl, float f)
+cl_aligned_f(struct vc4_cl_out **cl, float f)
{
cl_aligned_u32(cl, fui(f));
}
static inline void
-cl_start_reloc(struct vc4_cl *cl, uint32_t n)
+cl_start_reloc(struct vc4_cl *cl, struct vc4_cl_out **out, uint32_t n)
{
assert(n == 1 || n == 2);
+#ifdef DEBUG
assert(cl->reloc_count == 0);
cl->reloc_count = n;
+#endif
- cl_u8(cl, VC4_PACKET_GEM_HANDLES);
- cl->reloc_next = cl->next - cl->base;
- cl_u32(cl, 0); /* Space where hindex will be written. */
- cl_u32(cl, 0); /* Space where hindex will be written. */
+ cl_u8(out, VC4_PACKET_GEM_HANDLES);
+ cl->reloc_next = *out;
+ cl_u32(out, 0); /* Space where hindex will be written. */
+ cl_u32(out, 0); /* Space where hindex will be written. */
}
-static inline void
+static inline struct vc4_cl_out *
cl_start_shader_reloc(struct vc4_cl *cl, uint32_t n)
{
+#ifdef DEBUG
assert(cl->reloc_count == 0);
cl->reloc_count = n;
- cl->reloc_next = cl->next - cl->base;
+#endif
+ cl->reloc_next = cl->next;
+
+ /* Reserve the space where hindex will be written. */
+ cl_advance(&cl->next, n * 4);
- /* Space where hindex will be written. */
- cl->next += n * 4;
+ return cl->next;
}
static inline void
-cl_reloc_hindex(struct vc4_cl *cl, uint32_t hindex, uint32_t offset)
+cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl, struct vc4_cl_out **cl_out,
+ struct vc4_bo *bo, uint32_t offset)
{
- *(uint32_t *)(cl->base + cl->reloc_next) = hindex;
- cl->reloc_next += 4;
+ *(uint32_t *)cl->reloc_next = vc4_gem_hindex(vc4, bo);
+ cl_advance(&cl->reloc_next, 4);
+#ifdef DEBUG
cl->reloc_count--;
+#endif
- cl_u32(cl, offset);
+ cl_u32(cl_out, offset);
}
static inline void
-cl_aligned_reloc_hindex(struct vc4_cl *cl, uint32_t hindex, uint32_t offset)
+cl_aligned_reloc(struct vc4_context *vc4, struct vc4_cl *cl,
+ struct vc4_cl_out **cl_out,
+ struct vc4_bo *bo, uint32_t offset)
{
- *(uint32_t *)(cl->base + cl->reloc_next) = hindex;
- cl->reloc_next += 4;
+ *(uint32_t *)cl->reloc_next = vc4_gem_hindex(vc4, bo);
+ cl_advance(&cl->reloc_next, 4);
+#ifdef DEBUG
cl->reloc_count--;
+#endif
- cl_aligned_u32(cl, offset);
-}
-
-static inline void
-cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl,
- struct vc4_bo *bo, uint32_t offset)
-{
- cl_reloc_hindex(cl, vc4_gem_hindex(vc4, bo), offset);
-}
-
-static inline void
-cl_aligned_reloc(struct vc4_context *vc4, struct vc4_cl *cl,
- struct vc4_bo *bo, uint32_t offset)
-{
- cl_aligned_reloc_hindex(cl, vc4_gem_hindex(vc4, bo), offset);
+ cl_aligned_u32(cl_out, offset);
}
void cl_ensure_space(struct vc4_cl *cl, uint32_t size);
diff --git a/src/gallium/drivers/vc4/vc4_cl_dump.c b/src/gallium/drivers/vc4/vc4_cl_dump.c
index 69055081daa..6d748010baf 100644
--- a/src/gallium/drivers/vc4/vc4_cl_dump.c
+++ b/src/gallium/drivers/vc4/vc4_cl_dump.c
@@ -34,7 +34,7 @@ dump_float(void *cl, uint32_t offset, uint32_t hw_offset)
void *f = cl + offset;
fprintf(stderr, "0x%08x 0x%08x: %f (0x%08x)\n",
- offset, hw_offset, *(float *)f, *(uint32_t *)f);
+ offset, hw_offset, uif(*(uint32_t *)f), *(uint32_t *)f);
}
static void
@@ -47,7 +47,33 @@ dump_VC4_PACKET_BRANCH_TO_SUB_LIST(void *cl, uint32_t offset, uint32_t hw_offset
}
static void
-dump_VC4_PACKET_STORE_TILE_BUFFER_GENERAL(void *cl, uint32_t offset, uint32_t hw_offset)
+dump_loadstore_full(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+ uint32_t bits = *(uint32_t *)(cl + offset);
+
+ fprintf(stderr, "0x%08x 0x%08x: addr 0x%08x%s%s%s%s\n",
+ offset, hw_offset,
+ bits & ~0xf,
+ (bits & VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL) ? "" : " clear",
+ (bits & VC4_LOADSTORE_FULL_RES_DISABLE_ZS) ? "" : " zs",
+ (bits & VC4_LOADSTORE_FULL_RES_DISABLE_COLOR) ? "" : " color",
+ (bits & VC4_LOADSTORE_FULL_RES_EOF) ? " eof" : "");
+}
+
+static void
+dump_VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+ dump_loadstore_full(cl, offset, hw_offset);
+}
+
+static void
+dump_VC4_PACKET_STORE_FULL_RES_TILE_BUFFER(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+ dump_loadstore_full(cl, offset, hw_offset);
+}
+
+static void
+dump_loadstore_general(void *cl, uint32_t offset, uint32_t hw_offset)
{
uint8_t *bytes = cl + offset;
uint32_t *addr = cl + offset + 2;
@@ -125,6 +151,18 @@ dump_VC4_PACKET_STORE_TILE_BUFFER_GENERAL(void *cl, uint32_t offset, uint32_t hw
}
static void
+dump_VC4_PACKET_STORE_TILE_BUFFER_GENERAL(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+ dump_loadstore_general(cl, offset, hw_offset);
+}
+
+static void
+dump_VC4_PACKET_LOAD_TILE_BUFFER_GENERAL(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+ dump_loadstore_general(cl, offset, hw_offset);
+}
+
+static void
dump_VC4_PACKET_FLAT_SHADE_FLAGS(void *cl, uint32_t offset, uint32_t hw_offset)
{
uint32_t *bits = cl + offset;
@@ -291,63 +329,63 @@ dump_VC4_PACKET_GEM_HANDLES(void *cl, uint32_t offset, uint32_t hw_offset)
offset, hw_offset, handles[0], handles[1]);
}
-#define PACKET_DUMP(name, size) [name] = { #name, size, dump_##name }
-#define PACKET(name, size) [name] = { #name, size, NULL }
+#define PACKET_DUMP(name) [name] = { #name, name ## _SIZE, dump_##name }
+#define PACKET(name) [name] = { #name, name ## _SIZE, NULL }
static const struct packet_info {
const char *name;
uint8_t size;
void (*dump_func)(void *cl, uint32_t offset, uint32_t hw_offset);
} packet_info[] = {
- PACKET(VC4_PACKET_HALT, 1),
- PACKET(VC4_PACKET_NOP, 1),
-
- PACKET(VC4_PACKET_FLUSH, 1),
- PACKET(VC4_PACKET_FLUSH_ALL, 1),
- PACKET(VC4_PACKET_START_TILE_BINNING, 1),
- PACKET(VC4_PACKET_INCREMENT_SEMAPHORE, 1),
- PACKET(VC4_PACKET_WAIT_ON_SEMAPHORE, 1),
-
- PACKET(VC4_PACKET_BRANCH, 5),
- PACKET_DUMP(VC4_PACKET_BRANCH_TO_SUB_LIST, 5),
-
- PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER, 1),
- PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF, 1),
- PACKET(VC4_PACKET_STORE_FULL_RES_TILE_BUFFER, 5),
- PACKET(VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER, 5),
- PACKET_DUMP(VC4_PACKET_STORE_TILE_BUFFER_GENERAL, 7),
- PACKET(VC4_PACKET_LOAD_TILE_BUFFER_GENERAL, 7),
-
- PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE, 14),
- PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE, 10),
-
- PACKET(VC4_PACKET_COMPRESSED_PRIMITIVE, 48),
- PACKET(VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE, 49),
-
- PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, 2),
-
- PACKET(VC4_PACKET_GL_SHADER_STATE, 5),
- PACKET(VC4_PACKET_NV_SHADER_STATE, 5),
- PACKET(VC4_PACKET_VG_SHADER_STATE, 5),
-
- PACKET(VC4_PACKET_CONFIGURATION_BITS, 4),
- PACKET_DUMP(VC4_PACKET_FLAT_SHADE_FLAGS, 5),
- PACKET_DUMP(VC4_PACKET_POINT_SIZE, 5),
- PACKET_DUMP(VC4_PACKET_LINE_WIDTH, 5),
- PACKET(VC4_PACKET_RHT_X_BOUNDARY, 3),
- PACKET(VC4_PACKET_DEPTH_OFFSET, 5),
- PACKET(VC4_PACKET_CLIP_WINDOW, 9),
- PACKET_DUMP(VC4_PACKET_VIEWPORT_OFFSET, 5),
- PACKET(VC4_PACKET_Z_CLIPPING, 9),
- PACKET_DUMP(VC4_PACKET_CLIPPER_XY_SCALING, 9),
- PACKET_DUMP(VC4_PACKET_CLIPPER_Z_SCALING, 9),
-
- PACKET_DUMP(VC4_PACKET_TILE_BINNING_MODE_CONFIG, 16),
- PACKET_DUMP(VC4_PACKET_TILE_RENDERING_MODE_CONFIG, 11),
- PACKET(VC4_PACKET_CLEAR_COLORS, 14),
- PACKET_DUMP(VC4_PACKET_TILE_COORDINATES, 3),
-
- PACKET_DUMP(VC4_PACKET_GEM_HANDLES, 9),
+ PACKET(VC4_PACKET_HALT),
+ PACKET(VC4_PACKET_NOP),
+
+ PACKET(VC4_PACKET_FLUSH),
+ PACKET(VC4_PACKET_FLUSH_ALL),
+ PACKET(VC4_PACKET_START_TILE_BINNING),
+ PACKET(VC4_PACKET_INCREMENT_SEMAPHORE),
+ PACKET(VC4_PACKET_WAIT_ON_SEMAPHORE),
+
+ PACKET(VC4_PACKET_BRANCH),
+ PACKET_DUMP(VC4_PACKET_BRANCH_TO_SUB_LIST),
+
+ PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER),
+ PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF),
+ PACKET_DUMP(VC4_PACKET_STORE_FULL_RES_TILE_BUFFER),
+ PACKET_DUMP(VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER),
+ PACKET_DUMP(VC4_PACKET_STORE_TILE_BUFFER_GENERAL),
+ PACKET_DUMP(VC4_PACKET_LOAD_TILE_BUFFER_GENERAL),
+
+ PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE),
+ PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE),
+
+ PACKET(VC4_PACKET_COMPRESSED_PRIMITIVE),
+ PACKET(VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE),
+
+ PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT),
+
+ PACKET(VC4_PACKET_GL_SHADER_STATE),
+ PACKET(VC4_PACKET_NV_SHADER_STATE),
+ PACKET(VC4_PACKET_VG_SHADER_STATE),
+
+ PACKET(VC4_PACKET_CONFIGURATION_BITS),
+ PACKET_DUMP(VC4_PACKET_FLAT_SHADE_FLAGS),
+ PACKET_DUMP(VC4_PACKET_POINT_SIZE),
+ PACKET_DUMP(VC4_PACKET_LINE_WIDTH),
+ PACKET(VC4_PACKET_RHT_X_BOUNDARY),
+ PACKET(VC4_PACKET_DEPTH_OFFSET),
+ PACKET(VC4_PACKET_CLIP_WINDOW),
+ PACKET_DUMP(VC4_PACKET_VIEWPORT_OFFSET),
+ PACKET(VC4_PACKET_Z_CLIPPING),
+ PACKET_DUMP(VC4_PACKET_CLIPPER_XY_SCALING),
+ PACKET_DUMP(VC4_PACKET_CLIPPER_Z_SCALING),
+
+ PACKET_DUMP(VC4_PACKET_TILE_BINNING_MODE_CONFIG),
+ PACKET_DUMP(VC4_PACKET_TILE_RENDERING_MODE_CONFIG),
+ PACKET(VC4_PACKET_CLEAR_COLORS),
+ PACKET_DUMP(VC4_PACKET_TILE_COORDINATES),
+
+ PACKET_DUMP(VC4_PACKET_GEM_HANDLES),
};
void
@@ -359,7 +397,7 @@ vc4_dump_cl(void *cl, uint32_t size, bool is_render)
while (offset < size) {
uint8_t header = cmds[offset];
- if (header > ARRAY_SIZE(packet_info) ||
+ if (header >= ARRAY_SIZE(packet_info) ||
!packet_info[header].name) {
fprintf(stderr, "0x%08x 0x%08x: Unknown packet 0x%02x (%d)!\n",
offset, hw_offset, header, header);
diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c
index 630f8e68896..fff63158c9d 100644
--- a/src/gallium/drivers/vc4/vc4_context.c
+++ b/src/gallium/drivers/vc4/vc4_context.c
@@ -61,9 +61,11 @@ vc4_flush(struct pipe_context *pctx)
* FLUSH completes.
*/
cl_ensure_space(&vc4->bcl, 8);
- cl_u8(&vc4->bcl, VC4_PACKET_INCREMENT_SEMAPHORE);
+ struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
+ cl_u8(&bcl, VC4_PACKET_INCREMENT_SEMAPHORE);
/* The FLUSH caps all of our bin lists with a VC4_PACKET_RETURN. */
- cl_u8(&vc4->bcl, VC4_PACKET_FLUSH);
+ cl_u8(&bcl, VC4_PACKET_FLUSH);
+ cl_end(&vc4->bcl, bcl);
if (cbuf && (vc4->resolve & PIPE_CLEAR_COLOR0)) {
pipe_surface_reference(&vc4->color_write, cbuf);
@@ -103,8 +105,10 @@ vc4_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
vc4_flush(pctx);
if (fence) {
+ struct pipe_screen *screen = pctx->screen;
struct vc4_fence *f = vc4_fence_create(vc4->screen,
vc4->last_emit_seqno);
+ screen->fence_reference(screen, fence, NULL);
*fence = (struct pipe_fence_handle *)f;
}
}
@@ -126,8 +130,7 @@ vc4_cl_references_bo(struct pipe_context *pctx, struct vc4_bo *bo)
* they match.
*/
struct vc4_bo **referenced_bos = vc4->bo_pointers.base;
- for (int i = 0; i < (vc4->bo_handles.next -
- vc4->bo_handles.base) / 4; i++) {
+ for (int i = 0; i < cl_offset(&vc4->bo_handles) / 4; i++) {
if (referenced_bos[i] == bo) {
return true;
}
diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h
index d5d6be16f6e..654c46f3c0d 100644
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -67,7 +67,20 @@
#define VC4_DIRTY_CLIP (1 << 20)
#define VC4_DIRTY_UNCOMPILED_VS (1 << 21)
#define VC4_DIRTY_UNCOMPILED_FS (1 << 22)
-#define VC4_DIRTY_COMPILED_FS (1 << 24)
+#define VC4_DIRTY_COMPILED_CS (1 << 23)
+#define VC4_DIRTY_COMPILED_VS (1 << 24)
+#define VC4_DIRTY_COMPILED_FS (1 << 25)
+
+struct vc4_sampler_view {
+ struct pipe_sampler_view base;
+ uint32_t texture_p0;
+ uint32_t texture_p1;
+};
+
+struct vc4_sampler_state {
+ struct pipe_sampler_state base;
+ uint32_t texture_p1;
+};
struct vc4_texture_stateobj {
struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS];
@@ -121,6 +134,12 @@ struct vc4_compiled_shader {
struct vc4_ubo_range *ubo_ranges;
uint32_t num_ubo_ranges;
uint32_t ubo_size;
+ /**
+ * VC4_DIRTY_* flags that, when set in vc4->dirty, mean that the
+ * uniforms have to be rewritten (and therefore the shader state
+ * reemitted).
+ */
+ uint32_t uniform_dirty_bits;
/** bitmask of which inputs are color inputs, for flat shade handling. */
uint32_t color_inputs;
@@ -238,6 +257,11 @@ struct vc4_context {
*/
bool draw_call_queued;
+ /** Maximum index buffer valid for the current shader_rec. */
+ uint32_t max_index;
+ /** Last index bias baked into the current shader_rec. */
+ uint32_t last_index_bias;
+
struct primconvert_context *primconvert;
struct hash_table *fs_cache, *vs_cache;
@@ -246,6 +270,7 @@ struct vc4_context {
struct ra_regs *regs;
unsigned int reg_class_any;
+ unsigned int reg_class_r4_or_a;
unsigned int reg_class_a;
uint8_t prim_mode;
@@ -326,6 +351,18 @@ vc4_context(struct pipe_context *pcontext)
return (struct vc4_context *)pcontext;
}
+static inline struct vc4_sampler_view *
+vc4_sampler_view(struct pipe_sampler_view *psview)
+{
+ return (struct vc4_sampler_view *)psview;
+}
+
+static inline struct vc4_sampler_state *
+vc4_sampler_state(struct pipe_sampler_state *psampler)
+{
+ return (struct vc4_sampler_state *)psampler;
+}
+
struct pipe_context *vc4_context_create(struct pipe_screen *pscreen,
void *priv);
void vc4_draw_init(struct pipe_context *pctx);
@@ -337,6 +374,7 @@ void vc4_simulator_init(struct vc4_screen *screen);
int vc4_simulator_flush(struct vc4_context *vc4,
struct drm_vc4_submit_cl *args);
+void vc4_set_shader_uniform_dirty_flags(struct vc4_compiled_shader *shader);
void vc4_write_uniforms(struct vc4_context *vc4,
struct vc4_compiled_shader *shader,
struct vc4_constbuf_stateobj *cb,
diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c
index 5e6d70d6f33..a4e5e092b1a 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -71,37 +71,40 @@ vc4_start_draw(struct vc4_context *vc4)
uint32_t height = vc4->framebuffer.height;
uint32_t tilew = align(width, 64) / 64;
uint32_t tileh = align(height, 64) / 64;
+ struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
// Tile state data is 48 bytes per tile, I think it can be thrown away
// as soon as binning is finished.
- cl_u8(&vc4->bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG);
- cl_u32(&vc4->bcl, 0); /* tile alloc addr, filled by kernel */
- cl_u32(&vc4->bcl, 0); /* tile alloc size, filled by kernel */
- cl_u32(&vc4->bcl, 0); /* tile state addr, filled by kernel */
- cl_u8(&vc4->bcl, tilew);
- cl_u8(&vc4->bcl, tileh);
- cl_u8(&vc4->bcl, 0); /* flags, filled by kernel. */
+ cl_u8(&bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG);
+ cl_u32(&bcl, 0); /* tile alloc addr, filled by kernel */
+ cl_u32(&bcl, 0); /* tile alloc size, filled by kernel */
+ cl_u32(&bcl, 0); /* tile state addr, filled by kernel */
+ cl_u8(&bcl, tilew);
+ cl_u8(&bcl, tileh);
+ cl_u8(&bcl, 0); /* flags, filled by kernel. */
/* START_TILE_BINNING resets the statechange counters in the hardware,
* which are what is used when a primitive is binned to a tile to
* figure out what new state packets need to be written to that tile's
* command list.
*/
- cl_u8(&vc4->bcl, VC4_PACKET_START_TILE_BINNING);
+ cl_u8(&bcl, VC4_PACKET_START_TILE_BINNING);
/* Reset the current compressed primitives format. This gets modified
* by VC4_PACKET_GL_INDEXED_PRIMITIVE and
* VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start
* of every tile.
*/
- cl_u8(&vc4->bcl, VC4_PACKET_PRIMITIVE_LIST_FORMAT);
- cl_u8(&vc4->bcl, (VC4_PRIMITIVE_LIST_FORMAT_16_INDEX |
- VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES));
+ cl_u8(&bcl, VC4_PACKET_PRIMITIVE_LIST_FORMAT);
+ cl_u8(&bcl, (VC4_PRIMITIVE_LIST_FORMAT_16_INDEX |
+ VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES));
vc4->needs_flush = true;
vc4->draw_call_queued = true;
vc4->draw_width = width;
vc4->draw_height = height;
+
+ cl_end(&vc4->bcl, bcl);
}
static void
@@ -119,96 +122,67 @@ vc4_update_shadow_textures(struct pipe_context *pctx,
}
static void
-vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
+vc4_emit_gl_shader_state(struct vc4_context *vc4, const struct pipe_draw_info *info)
{
- struct vc4_context *vc4 = vc4_context(pctx);
-
- if (info->mode >= PIPE_PRIM_QUADS) {
- util_primconvert_save_index_buffer(vc4->primconvert, &vc4->indexbuf);
- util_primconvert_save_rasterizer_state(vc4->primconvert, &vc4->rasterizer->base);
- util_primconvert_draw_vbo(vc4->primconvert, info);
- perf_debug("Fallback conversion for %d %s vertices\n",
- info->count, u_prim_name(info->mode));
- return;
- }
-
- /* Before setting up the draw, do any fixup blits necessary. */
- vc4_update_shadow_textures(pctx, &vc4->verttex);
- vc4_update_shadow_textures(pctx, &vc4->fragtex);
-
- vc4_get_draw_cl_space(vc4);
-
+ /* VC4_DIRTY_VTXSTATE */
struct vc4_vertex_stateobj *vtx = vc4->vtx;
+ /* VC4_DIRTY_VTXBUF */
struct vc4_vertexbuf_stateobj *vertexbuf = &vc4->vertexbuf;
- if (vc4->prim_mode != info->mode) {
- vc4->prim_mode = info->mode;
- vc4->dirty |= VC4_DIRTY_PRIM_MODE;
- }
-
- vc4_start_draw(vc4);
- vc4_update_compiled_shaders(vc4, info->mode);
-
- vc4_emit_state(pctx);
- vc4->dirty = 0;
-
- vc4_write_uniforms(vc4, vc4->prog.fs,
- &vc4->constbuf[PIPE_SHADER_FRAGMENT],
- &vc4->fragtex);
- vc4_write_uniforms(vc4, vc4->prog.vs,
- &vc4->constbuf[PIPE_SHADER_VERTEX],
- &vc4->verttex);
- vc4_write_uniforms(vc4, vc4->prog.cs,
- &vc4->constbuf[PIPE_SHADER_VERTEX],
- &vc4->verttex);
-
/* The simulator throws a fit if VS or CS don't read an attribute, so
* we emit a dummy read.
*/
uint32_t num_elements_emit = MAX2(vtx->num_elements, 1);
/* Emit the shader record. */
- cl_start_shader_reloc(&vc4->shader_rec, 3 + num_elements_emit);
- cl_u16(&vc4->shader_rec,
+ struct vc4_cl_out *shader_rec =
+ cl_start_shader_reloc(&vc4->shader_rec, 3 + num_elements_emit);
+ /* VC4_DIRTY_PRIM_MODE | VC4_DIRTY_RASTERIZER */
+ cl_u16(&shader_rec,
VC4_SHADER_FLAG_ENABLE_CLIPPING |
+ VC4_SHADER_FLAG_FS_SINGLE_THREAD |
((info->mode == PIPE_PRIM_POINTS &&
vc4->rasterizer->base.point_size_per_vertex) ?
VC4_SHADER_FLAG_VS_POINT_SIZE : 0));
- cl_u8(&vc4->shader_rec, 0); /* fs num uniforms (unused) */
- cl_u8(&vc4->shader_rec, vc4->prog.fs->num_inputs);
- cl_reloc(vc4, &vc4->shader_rec, vc4->prog.fs->bo, 0);
- cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
-
- cl_u16(&vc4->shader_rec, 0); /* vs num uniforms */
- cl_u8(&vc4->shader_rec, vc4->prog.vs->vattrs_live);
- cl_u8(&vc4->shader_rec, vc4->prog.vs->vattr_offsets[8]);
- cl_reloc(vc4, &vc4->shader_rec, vc4->prog.vs->bo, 0);
- cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
-
- cl_u16(&vc4->shader_rec, 0); /* cs num uniforms */
- cl_u8(&vc4->shader_rec, vc4->prog.cs->vattrs_live);
- cl_u8(&vc4->shader_rec, vc4->prog.cs->vattr_offsets[8]);
- cl_reloc(vc4, &vc4->shader_rec, vc4->prog.cs->bo, 0);
- cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
+
+ /* VC4_DIRTY_COMPILED_FS */
+ cl_u8(&shader_rec, 0); /* fs num uniforms (unused) */
+ cl_u8(&shader_rec, vc4->prog.fs->num_inputs);
+ cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.fs->bo, 0);
+ cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
+
+ /* VC4_DIRTY_COMPILED_VS */
+ cl_u16(&shader_rec, 0); /* vs num uniforms */
+ cl_u8(&shader_rec, vc4->prog.vs->vattrs_live);
+ cl_u8(&shader_rec, vc4->prog.vs->vattr_offsets[8]);
+ cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.vs->bo, 0);
+ cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
+
+ /* VC4_DIRTY_COMPILED_CS */
+ cl_u16(&shader_rec, 0); /* cs num uniforms */
+ cl_u8(&shader_rec, vc4->prog.cs->vattrs_live);
+ cl_u8(&shader_rec, vc4->prog.cs->vattr_offsets[8]);
+ cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.cs->bo, 0);
+ cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
uint32_t max_index = 0xffff;
- uint32_t vpm_offset = 0;
for (int i = 0; i < vtx->num_elements; i++) {
struct pipe_vertex_element *elem = &vtx->pipe[i];
struct pipe_vertex_buffer *vb =
&vertexbuf->vb[elem->vertex_buffer_index];
struct vc4_resource *rsc = vc4_resource(vb->buffer);
- uint32_t offset = vb->buffer_offset + elem->src_offset;
+ /* not vc4->dirty tracked: vc4->last_index_bias */
+ uint32_t offset = (vb->buffer_offset +
+ elem->src_offset +
+ vb->stride * info->index_bias);
uint32_t vb_size = rsc->bo->size - offset;
uint32_t elem_size =
util_format_get_blocksize(elem->src_format);
- cl_reloc(vc4, &vc4->shader_rec, rsc->bo, offset);
- cl_u8(&vc4->shader_rec, elem_size - 1);
- cl_u8(&vc4->shader_rec, vb->stride);
- cl_u8(&vc4->shader_rec, vc4->prog.vs->vattr_offsets[i]);
- cl_u8(&vc4->shader_rec, vc4->prog.cs->vattr_offsets[i]);
-
- vpm_offset += align(elem_size, 4);
+ cl_reloc(vc4, &vc4->shader_rec, &shader_rec, rsc->bo, offset);
+ cl_u8(&shader_rec, elem_size - 1);
+ cl_u8(&shader_rec, vb->stride);
+ cl_u8(&shader_rec, vc4->prog.vs->vattr_offsets[i]);
+ cl_u8(&shader_rec, vc4->prog.cs->vattr_offsets[i]);
if (vb->stride > 0) {
max_index = MIN2(max_index,
@@ -219,25 +193,89 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
if (vtx->num_elements == 0) {
assert(num_elements_emit == 1);
struct vc4_bo *bo = vc4_bo_alloc(vc4->screen, 4096, "scratch VBO");
- cl_reloc(vc4, &vc4->shader_rec, bo, 0);
- cl_u8(&vc4->shader_rec, 16 - 1); /* element size */
- cl_u8(&vc4->shader_rec, 0); /* stride */
- cl_u8(&vc4->shader_rec, 0); /* VS VPM offset */
- cl_u8(&vc4->shader_rec, 0); /* CS VPM offset */
+ cl_reloc(vc4, &vc4->shader_rec, &shader_rec, bo, 0);
+ cl_u8(&shader_rec, 16 - 1); /* element size */
+ cl_u8(&shader_rec, 0); /* stride */
+ cl_u8(&shader_rec, 0); /* VS VPM offset */
+ cl_u8(&shader_rec, 0); /* CS VPM offset */
vc4_bo_unreference(&bo);
}
+ cl_end(&vc4->shader_rec, shader_rec);
+ struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
/* the actual draw call. */
- cl_u8(&vc4->bcl, VC4_PACKET_GL_SHADER_STATE);
+ cl_u8(&bcl, VC4_PACKET_GL_SHADER_STATE);
assert(vtx->num_elements <= 8);
/* Note that number of attributes == 0 in the packet means 8
* attributes. This field also contains the offset into shader_rec.
*/
- cl_u32(&vc4->bcl, num_elements_emit & 0x7);
+ cl_u32(&bcl, num_elements_emit & 0x7);
+ cl_end(&vc4->bcl, bcl);
+
+ vc4_write_uniforms(vc4, vc4->prog.fs,
+ &vc4->constbuf[PIPE_SHADER_FRAGMENT],
+ &vc4->fragtex);
+ vc4_write_uniforms(vc4, vc4->prog.vs,
+ &vc4->constbuf[PIPE_SHADER_VERTEX],
+ &vc4->verttex);
+ vc4_write_uniforms(vc4, vc4->prog.cs,
+ &vc4->constbuf[PIPE_SHADER_VERTEX],
+ &vc4->verttex);
+
+ vc4->last_index_bias = info->index_bias;
+ vc4->max_index = max_index;
+}
+
+static void
+vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
+{
+ struct vc4_context *vc4 = vc4_context(pctx);
+
+ if (info->mode >= PIPE_PRIM_QUADS) {
+ util_primconvert_save_index_buffer(vc4->primconvert, &vc4->indexbuf);
+ util_primconvert_save_rasterizer_state(vc4->primconvert, &vc4->rasterizer->base);
+ util_primconvert_draw_vbo(vc4->primconvert, info);
+ perf_debug("Fallback conversion for %d %s vertices\n",
+ info->count, u_prim_name(info->mode));
+ return;
+ }
+
+ /* Before setting up the draw, do any fixup blits necessary. */
+ vc4_update_shadow_textures(pctx, &vc4->verttex);
+ vc4_update_shadow_textures(pctx, &vc4->fragtex);
+
+ vc4_get_draw_cl_space(vc4);
+
+ if (vc4->prim_mode != info->mode) {
+ vc4->prim_mode = info->mode;
+ vc4->dirty |= VC4_DIRTY_PRIM_MODE;
+ }
+
+ vc4_start_draw(vc4);
+ vc4_update_compiled_shaders(vc4, info->mode);
+
+ vc4_emit_state(pctx);
+
+ if ((vc4->dirty & (VC4_DIRTY_VTXBUF |
+ VC4_DIRTY_VTXSTATE |
+ VC4_DIRTY_PRIM_MODE |
+ VC4_DIRTY_RASTERIZER |
+ VC4_DIRTY_COMPILED_CS |
+ VC4_DIRTY_COMPILED_VS |
+ VC4_DIRTY_COMPILED_FS |
+ vc4->prog.cs->uniform_dirty_bits |
+ vc4->prog.vs->uniform_dirty_bits |
+ vc4->prog.fs->uniform_dirty_bits)) ||
+ vc4->last_index_bias != info->index_bias) {
+ vc4_emit_gl_shader_state(vc4, info);
+ }
+
+ vc4->dirty = 0;
/* Note that the primitive type fields match with OpenGL/gallium
* definitions, up to but not including QUADS.
*/
+ struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
if (info->indexed) {
uint32_t offset = vc4->indexbuf.offset;
uint32_t index_size = vc4->indexbuf.index_size;
@@ -251,25 +289,26 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
}
struct vc4_resource *rsc = vc4_resource(prsc);
- cl_start_reloc(&vc4->bcl, 1);
- cl_u8(&vc4->bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
- cl_u8(&vc4->bcl,
+ cl_start_reloc(&vc4->bcl, &bcl, 1);
+ cl_u8(&bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
+ cl_u8(&bcl,
info->mode |
(index_size == 2 ?
VC4_INDEX_BUFFER_U16:
VC4_INDEX_BUFFER_U8));
- cl_u32(&vc4->bcl, info->count);
- cl_reloc(vc4, &vc4->bcl, rsc->bo, offset);
- cl_u32(&vc4->bcl, max_index);
+ cl_u32(&bcl, info->count);
+ cl_reloc(vc4, &vc4->bcl, &bcl, rsc->bo, offset);
+ cl_u32(&bcl, vc4->max_index);
if (vc4->indexbuf.index_size == 4)
pipe_resource_reference(&prsc, NULL);
} else {
- cl_u8(&vc4->bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
- cl_u8(&vc4->bcl, info->mode);
- cl_u32(&vc4->bcl, info->count);
- cl_u32(&vc4->bcl, info->start);
+ cl_u8(&bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
+ cl_u8(&bcl, info->mode);
+ cl_u32(&bcl, info->count);
+ cl_u32(&bcl, info->start);
}
+ cl_end(&vc4->bcl, bcl);
if (vc4->zsa && vc4->zsa->base.depth.enabled) {
vc4->resolve |= PIPE_CLEAR_DEPTH;
diff --git a/src/gallium/drivers/vc4/vc4_drm.h b/src/gallium/drivers/vc4/vc4_drm.h
index 5f1ee4fa125..863ef8da8fb 100644
--- a/src/gallium/drivers/vc4/vc4_drm.h
+++ b/src/gallium/drivers/vc4/vc4_drm.h
@@ -31,12 +31,14 @@
#define DRM_VC4_WAIT_BO 0x02
#define DRM_VC4_CREATE_BO 0x03
#define DRM_VC4_MMAP_BO 0x04
+#define DRM_VC4_CREATE_SHADER_BO 0x05
#define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
#define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
#define DRM_IOCTL_VC4_WAIT_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_WAIT_BO, struct drm_vc4_wait_bo)
#define DRM_IOCTL_VC4_CREATE_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo)
#define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo)
+#define DRM_IOCTL_VC4_CREATE_SHADER_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo)
struct drm_vc4_submit_rcl_surface {
uint32_t hindex; /* Handle index, or ~0 if not present. */
@@ -183,6 +185,29 @@ struct drm_vc4_create_bo {
};
/**
+ * struct drm_vc4_create_shader_bo - ioctl argument for creating VC4
+ * shader BOs.
+ *
+ * Since allowing a shader to be overwritten while it's also being
+ * executed from would allow privlege escalation, shaders must be
+ * created using this ioctl, and they can't be mmapped later.
+ */
+struct drm_vc4_create_shader_bo {
+ /* Size of the data argument. */
+ uint32_t size;
+ /* Flags, currently must be 0. */
+ uint32_t flags;
+
+ /* Pointer to the data. */
+ uint64_t data;
+
+ /** Returned GEM handle for the BO. */
+ uint32_t handle;
+ /* Pad, must be 0. */
+ uint32_t pad;
+};
+
+/**
* struct drm_vc4_mmap_bo - ioctl argument for mapping VC4 BOs.
*
* This doesn't actually perform an mmap. Instead, it returns the
diff --git a/src/gallium/drivers/vc4/vc4_emit.c b/src/gallium/drivers/vc4/vc4_emit.c
index d2b54fccf91..ba064ff889b 100644
--- a/src/gallium/drivers/vc4/vc4_emit.c
+++ b/src/gallium/drivers/vc4/vc4_emit.c
@@ -28,23 +28,24 @@ vc4_emit_state(struct pipe_context *pctx)
{
struct vc4_context *vc4 = vc4_context(pctx);
+ struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
if (vc4->dirty & (VC4_DIRTY_SCISSOR | VC4_DIRTY_VIEWPORT)) {
float *vpscale = vc4->viewport.scale;
float *vptranslate = vc4->viewport.translate;
- float vp_minx = -fabs(vpscale[0]) + vptranslate[0];
- float vp_maxx = fabs(vpscale[0]) + vptranslate[0];
- float vp_miny = -fabs(vpscale[1]) + vptranslate[1];
- float vp_maxy = fabs(vpscale[1]) + vptranslate[1];
+ float vp_minx = -fabsf(vpscale[0]) + vptranslate[0];
+ float vp_maxx = fabsf(vpscale[0]) + vptranslate[0];
+ float vp_miny = -fabsf(vpscale[1]) + vptranslate[1];
+ float vp_maxy = fabsf(vpscale[1]) + vptranslate[1];
uint32_t minx = MAX2(vc4->scissor.minx, vp_minx);
uint32_t miny = MAX2(vc4->scissor.miny, vp_miny);
uint32_t maxx = MIN2(vc4->scissor.maxx, vp_maxx);
uint32_t maxy = MIN2(vc4->scissor.maxy, vp_maxy);
- cl_u8(&vc4->bcl, VC4_PACKET_CLIP_WINDOW);
- cl_u16(&vc4->bcl, minx);
- cl_u16(&vc4->bcl, miny);
- cl_u16(&vc4->bcl, maxx - minx);
- cl_u16(&vc4->bcl, maxy - miny);
+ cl_u8(&bcl, VC4_PACKET_CLIP_WINDOW);
+ cl_u16(&bcl, minx);
+ cl_u16(&bcl, miny);
+ cl_u16(&bcl, maxx - minx);
+ cl_u16(&bcl, maxy - miny);
vc4->draw_min_x = MIN2(vc4->draw_min_x, minx);
vc4->draw_min_y = MIN2(vc4->draw_min_y, miny);
@@ -53,47 +54,49 @@ vc4_emit_state(struct pipe_context *pctx)
}
if (vc4->dirty & (VC4_DIRTY_RASTERIZER | VC4_DIRTY_ZSA)) {
- cl_u8(&vc4->bcl, VC4_PACKET_CONFIGURATION_BITS);
- cl_u8(&vc4->bcl,
+ cl_u8(&bcl, VC4_PACKET_CONFIGURATION_BITS);
+ cl_u8(&bcl,
vc4->rasterizer->config_bits[0] |
vc4->zsa->config_bits[0]);
- cl_u8(&vc4->bcl,
+ cl_u8(&bcl,
vc4->rasterizer->config_bits[1] |
vc4->zsa->config_bits[1]);
- cl_u8(&vc4->bcl,
+ cl_u8(&bcl,
vc4->rasterizer->config_bits[2] |
vc4->zsa->config_bits[2]);
}
if (vc4->dirty & VC4_DIRTY_RASTERIZER) {
- cl_u8(&vc4->bcl, VC4_PACKET_DEPTH_OFFSET);
- cl_u16(&vc4->bcl, vc4->rasterizer->offset_factor);
- cl_u16(&vc4->bcl, vc4->rasterizer->offset_units);
+ cl_u8(&bcl, VC4_PACKET_DEPTH_OFFSET);
+ cl_u16(&bcl, vc4->rasterizer->offset_factor);
+ cl_u16(&bcl, vc4->rasterizer->offset_units);
- cl_u8(&vc4->bcl, VC4_PACKET_POINT_SIZE);
- cl_f(&vc4->bcl, vc4->rasterizer->point_size);
+ cl_u8(&bcl, VC4_PACKET_POINT_SIZE);
+ cl_f(&bcl, vc4->rasterizer->point_size);
- cl_u8(&vc4->bcl, VC4_PACKET_LINE_WIDTH);
- cl_f(&vc4->bcl, vc4->rasterizer->base.line_width);
+ cl_u8(&bcl, VC4_PACKET_LINE_WIDTH);
+ cl_f(&bcl, vc4->rasterizer->base.line_width);
}
if (vc4->dirty & VC4_DIRTY_VIEWPORT) {
- cl_u8(&vc4->bcl, VC4_PACKET_CLIPPER_XY_SCALING);
- cl_f(&vc4->bcl, vc4->viewport.scale[0] * 16.0f);
- cl_f(&vc4->bcl, vc4->viewport.scale[1] * 16.0f);
+ cl_u8(&bcl, VC4_PACKET_CLIPPER_XY_SCALING);
+ cl_f(&bcl, vc4->viewport.scale[0] * 16.0f);
+ cl_f(&bcl, vc4->viewport.scale[1] * 16.0f);
- cl_u8(&vc4->bcl, VC4_PACKET_CLIPPER_Z_SCALING);
- cl_f(&vc4->bcl, vc4->viewport.translate[2]);
- cl_f(&vc4->bcl, vc4->viewport.scale[2]);
+ cl_u8(&bcl, VC4_PACKET_CLIPPER_Z_SCALING);
+ cl_f(&bcl, vc4->viewport.translate[2]);
+ cl_f(&bcl, vc4->viewport.scale[2]);
- cl_u8(&vc4->bcl, VC4_PACKET_VIEWPORT_OFFSET);
- cl_u16(&vc4->bcl, 16 * vc4->viewport.translate[0]);
- cl_u16(&vc4->bcl, 16 * vc4->viewport.translate[1]);
+ cl_u8(&bcl, VC4_PACKET_VIEWPORT_OFFSET);
+ cl_u16(&bcl, 16 * vc4->viewport.translate[0]);
+ cl_u16(&bcl, 16 * vc4->viewport.translate[1]);
}
if (vc4->dirty & VC4_DIRTY_FLAT_SHADE_FLAGS) {
- cl_u8(&vc4->bcl, VC4_PACKET_FLAT_SHADE_FLAGS);
- cl_u32(&vc4->bcl, vc4->rasterizer->base.flatshade ?
+ cl_u8(&bcl, VC4_PACKET_FLAT_SHADE_FLAGS);
+ cl_u32(&bcl, vc4->rasterizer->base.flatshade ?
vc4->prog.fs->color_inputs : 0);
}
+
+ cl_end(&vc4->bcl, bcl);
}
diff --git a/src/gallium/drivers/vc4/vc4_fence.c b/src/gallium/drivers/vc4/vc4_fence.c
index f2ee91de61a..b6fb2a8a460 100644
--- a/src/gallium/drivers/vc4/vc4_fence.c
+++ b/src/gallium/drivers/vc4/vc4_fence.c
@@ -60,16 +60,6 @@ vc4_fence_reference(struct pipe_screen *pscreen,
}
static boolean
-vc4_fence_signalled(struct pipe_screen *pscreen,
- struct pipe_fence_handle *pf)
-{
- struct vc4_screen *screen = vc4_screen(pscreen);
- struct vc4_fence *f = (struct vc4_fence *)pf;
-
- return vc4_wait_seqno(screen, f->seqno, 0);
-}
-
-static boolean
vc4_fence_finish(struct pipe_screen *pscreen,
struct pipe_fence_handle *pf,
uint64_t timeout_ns)
@@ -77,7 +67,7 @@ vc4_fence_finish(struct pipe_screen *pscreen,
struct vc4_screen *screen = vc4_screen(pscreen);
struct vc4_fence *f = (struct vc4_fence *)pf;
- return vc4_wait_seqno(screen, f->seqno, timeout_ns);
+ return vc4_wait_seqno(screen, f->seqno, timeout_ns, "fence wait");
}
struct vc4_fence *
@@ -98,6 +88,5 @@ void
vc4_fence_init(struct vc4_screen *screen)
{
screen->base.fence_reference = vc4_fence_reference;
- screen->base.fence_signalled = vc4_fence_signalled;
screen->base.fence_finish = vc4_fence_finish;
}
diff --git a/src/gallium/drivers/vc4/vc4_formats.c b/src/gallium/drivers/vc4/vc4_formats.c
index 004bac70c67..ffce61237de 100644
--- a/src/gallium/drivers/vc4/vc4_formats.c
+++ b/src/gallium/drivers/vc4/vc4_formats.c
@@ -108,7 +108,7 @@ static const struct vc4_format vc4_format_table[] = {
static const struct vc4_format *
get_format(enum pipe_format f)
{
- if (f > ARRAY_SIZE(vc4_format_table) ||
+ if (f >= ARRAY_SIZE(vc4_format_table) ||
!vc4_format_table[f].present)
return NULL;
else
diff --git a/src/gallium/drivers/vc4/vc4_job.c b/src/gallium/drivers/vc4/vc4_job.c
index dcade15443a..7ebd9f160eb 100644
--- a/src/gallium/drivers/vc4/vc4_job.c
+++ b/src/gallium/drivers/vc4/vc4_job.c
@@ -44,8 +44,7 @@ void
vc4_job_reset(struct vc4_context *vc4)
{
struct vc4_bo **referenced_bos = vc4->bo_pointers.base;
- for (int i = 0; i < (vc4->bo_handles.next -
- vc4->bo_handles.base) / 4; i++) {
+ for (int i = 0; i < cl_offset(&vc4->bo_handles) / 4; i++) {
vc4_bo_unreference(&referenced_bos[i]);
}
vc4_reset_cl(&vc4->bcl);
@@ -145,7 +144,7 @@ vc4_job_submit(struct vc4_context *vc4)
{
if (vc4_debug & VC4_DEBUG_CL) {
fprintf(stderr, "BCL:\n");
- vc4_dump_cl(vc4->bcl.base, vc4->bcl.next - vc4->bcl.base, false);
+ vc4_dump_cl(vc4->bcl.base, cl_offset(&vc4->bcl), false);
}
struct drm_vc4_submit_cl submit;
@@ -164,15 +163,14 @@ vc4_job_submit(struct vc4_context *vc4)
vc4->zs_write, true, true);
submit.bo_handles = (uintptr_t)vc4->bo_handles.base;
- submit.bo_handle_count = (vc4->bo_handles.next -
- vc4->bo_handles.base) / 4;
+ submit.bo_handle_count = cl_offset(&vc4->bo_handles) / 4;
submit.bin_cl = (uintptr_t)vc4->bcl.base;
- submit.bin_cl_size = vc4->bcl.next - vc4->bcl.base;
+ submit.bin_cl_size = cl_offset(&vc4->bcl);
submit.shader_rec = (uintptr_t)vc4->shader_rec.base;
- submit.shader_rec_size = vc4->shader_rec.next - vc4->shader_rec.base;
+ submit.shader_rec_size = cl_offset(&vc4->shader_rec);
submit.shader_rec_count = vc4->shader_rec_count;
submit.uniforms = (uintptr_t)vc4->uniforms.base;
- submit.uniforms_size = vc4->uniforms.next - vc4->uniforms.base;
+ submit.uniforms_size = cl_offset(&vc4->uniforms);
assert(vc4->draw_min_x != ~0 && vc4->draw_min_y != ~0);
submit.min_x_tile = vc4->draw_min_x / 64;
@@ -207,7 +205,7 @@ vc4_job_submit(struct vc4_context *vc4)
if (vc4_debug & VC4_DEBUG_ALWAYS_SYNC) {
if (!vc4_wait_seqno(vc4->screen, vc4->last_emit_seqno,
- PIPE_TIMEOUT_INFINITE)) {
+ PIPE_TIMEOUT_INFINITE, "sync")) {
fprintf(stderr, "Wait failed.\n");
abort();
}
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
new file mode 100644
index 00000000000..a372a6c0cdc
--- /dev/null
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
@@ -0,0 +1,431 @@
+/*
+ * Copyright © 2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * Implements most of the fixed function fragment pipeline in shader code.
+ *
+ * VC4 doesn't have any hardware support for blending, alpha test, logic ops,
+ * or color mask. Instead, you read the current contents of the destination
+ * from the tile buffer after having waited for the scoreboard (which is
+ * handled by vc4_qpu_emit.c), then do math using your output color and that
+ * destination value, and update the output color appropriately.
+ */
+
+/**
+ * Lowers fixed-function blending to a load of the destination color and a
+ * series of ALU operations before the store of the output.
+ */
+#include "util/u_format.h"
+#include "vc4_qir.h"
+#include "glsl/nir/nir_builder.h"
+#include "vc4_context.h"
+
+/** Emits a load of the previous fragment color from the tile buffer. */
+static nir_ssa_def *
+vc4_nir_get_dst_color(nir_builder *b)
+{
+ nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(b->shader,
+ nir_intrinsic_load_input);
+ load->num_components = 1;
+ load->const_index[0] = VC4_NIR_TLB_COLOR_READ_INPUT;
+ nir_ssa_dest_init(&load->instr, &load->dest, 1, NULL);
+ nir_builder_instr_insert(b, &load->instr);
+ return &load->dest.ssa;
+}
+
+static nir_ssa_def *
+vc4_nir_srgb_decode(nir_builder *b, nir_ssa_def *srgb)
+{
+ nir_ssa_def *is_low = nir_flt(b, srgb, nir_imm_float(b, 0.04045));
+ nir_ssa_def *low = nir_fmul(b, srgb, nir_imm_float(b, 1.0 / 12.92));
+ nir_ssa_def *high = nir_fpow(b,
+ nir_fmul(b,
+ nir_fadd(b, srgb,
+ nir_imm_float(b, 0.055)),
+ nir_imm_float(b, 1.0 / 1.055)),
+ nir_imm_float(b, 2.4));
+
+ return nir_bcsel(b, is_low, low, high);
+}
+
+static nir_ssa_def *
+vc4_nir_srgb_encode(nir_builder *b, nir_ssa_def *linear)
+{
+ nir_ssa_def *is_low = nir_flt(b, linear, nir_imm_float(b, 0.0031308));
+ nir_ssa_def *low = nir_fmul(b, linear, nir_imm_float(b, 12.92));
+ nir_ssa_def *high = nir_fsub(b,
+ nir_fmul(b,
+ nir_imm_float(b, 1.055),
+ nir_fpow(b,
+ linear,
+ nir_imm_float(b, 0.41666))),
+ nir_imm_float(b, 0.055));
+
+ return nir_bcsel(b, is_low, low, high);
+}
+
+static nir_ssa_def *
+vc4_blend_channel(nir_builder *b,
+ nir_ssa_def **src,
+ nir_ssa_def **dst,
+ unsigned factor,
+ int channel)
+{
+ switch(factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ return nir_imm_float(b, 1.0);
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ return src[channel];
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return src[3];
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return dst[3];
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ return dst[channel];
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ if (channel != 3) {
+ return nir_fmin(b,
+ src[3],
+ nir_fsub(b,
+ nir_imm_float(b, 1.0),
+ dst[3]));
+ } else {
+ return nir_imm_float(b, 1.0);
+ }
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_X + channel);
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_W);
+ case PIPE_BLENDFACTOR_ZERO:
+ return nir_imm_float(b, 0.0);
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ return nir_fsub(b, nir_imm_float(b, 1.0), src[channel]);
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ return nir_fsub(b, nir_imm_float(b, 1.0), src[3]);
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ return nir_fsub(b, nir_imm_float(b, 1.0), dst[3]);
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ return nir_fsub(b, nir_imm_float(b, 1.0), dst[channel]);
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ return nir_fsub(b, nir_imm_float(b, 1.0),
+ vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_X + channel));
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ return nir_fsub(b, nir_imm_float(b, 1.0),
+ vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_W));
+
+ default:
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ /* Unsupported. */
+ fprintf(stderr, "Unknown blend factor %d\n", factor);
+ return nir_imm_float(b, 1.0);
+ }
+}
+
+static nir_ssa_def *
+vc4_blend_func(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
+ unsigned func)
+{
+ switch (func) {
+ case PIPE_BLEND_ADD:
+ return nir_fadd(b, src, dst);
+ case PIPE_BLEND_SUBTRACT:
+ return nir_fsub(b, src, dst);
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return nir_fsub(b, dst, src);
+ case PIPE_BLEND_MIN:
+ return nir_fmin(b, src, dst);
+ case PIPE_BLEND_MAX:
+ return nir_fmax(b, src, dst);
+
+ default:
+ /* Unsupported. */
+ fprintf(stderr, "Unknown blend func %d\n", func);
+ return src;
+
+ }
+}
+
+static void
+vc4_do_blending(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
+ nir_ssa_def **src_color, nir_ssa_def **dst_color)
+{
+ struct pipe_rt_blend_state *blend = &c->fs_key->blend;
+
+ if (!blend->blend_enable) {
+ for (int i = 0; i < 4; i++)
+ result[i] = src_color[i];
+ return;
+ }
+
+ /* Clamp the src color to [0, 1]. Dest is already clamped. */
+ for (int i = 0; i < 4; i++)
+ src_color[i] = nir_fsat(b, src_color[i]);
+
+ nir_ssa_def *src_blend[4], *dst_blend[4];
+ for (int i = 0; i < 4; i++) {
+ int src_factor = ((i != 3) ? blend->rgb_src_factor :
+ blend->alpha_src_factor);
+ int dst_factor = ((i != 3) ? blend->rgb_dst_factor :
+ blend->alpha_dst_factor);
+ src_blend[i] = nir_fmul(b, src_color[i],
+ vc4_blend_channel(b,
+ src_color, dst_color,
+ src_factor, i));
+ dst_blend[i] = nir_fmul(b, dst_color[i],
+ vc4_blend_channel(b,
+ src_color, dst_color,
+ dst_factor, i));
+ }
+
+ for (int i = 0; i < 4; i++) {
+ result[i] = vc4_blend_func(b, src_blend[i], dst_blend[i],
+ ((i != 3) ? blend->rgb_func :
+ blend->alpha_func));
+ }
+}
+
+static nir_ssa_def *
+vc4_logicop(nir_builder *b, int logicop_func,
+ nir_ssa_def *src, nir_ssa_def *dst)
+{
+ switch (logicop_func) {
+ case PIPE_LOGICOP_CLEAR:
+ return nir_imm_int(b, 0);
+ case PIPE_LOGICOP_NOR:
+ return nir_inot(b, nir_ior(b, src, dst));
+ case PIPE_LOGICOP_AND_INVERTED:
+ return nir_iand(b, nir_inot(b, src), dst);
+ case PIPE_LOGICOP_COPY_INVERTED:
+ return nir_inot(b, src);
+ case PIPE_LOGICOP_AND_REVERSE:
+ return nir_iand(b, src, nir_inot(b, dst));
+ case PIPE_LOGICOP_INVERT:
+ return nir_inot(b, dst);
+ case PIPE_LOGICOP_XOR:
+ return nir_ixor(b, src, dst);
+ case PIPE_LOGICOP_NAND:
+ return nir_inot(b, nir_iand(b, src, dst));
+ case PIPE_LOGICOP_AND:
+ return nir_iand(b, src, dst);
+ case PIPE_LOGICOP_EQUIV:
+ return nir_inot(b, nir_ixor(b, src, dst));
+ case PIPE_LOGICOP_NOOP:
+ return dst;
+ case PIPE_LOGICOP_OR_INVERTED:
+ return nir_ior(b, nir_inot(b, src), dst);
+ case PIPE_LOGICOP_OR_REVERSE:
+ return nir_ior(b, src, nir_inot(b, dst));
+ case PIPE_LOGICOP_OR:
+ return nir_ior(b, src, dst);
+ case PIPE_LOGICOP_SET:
+ return nir_imm_int(b, ~0);
+ default:
+ fprintf(stderr, "Unknown logic op %d\n", logicop_func);
+ /* FALLTHROUGH */
+ case PIPE_LOGICOP_COPY:
+ return src;
+ }
+}
+
+static nir_ssa_def *
+vc4_nir_pipe_compare_func(nir_builder *b, int func,
+ nir_ssa_def *src0, nir_ssa_def *src1)
+{
+ switch (func) {
+ default:
+ fprintf(stderr, "Unknown compare func %d\n", func);
+ /* FALLTHROUGH */
+ case PIPE_FUNC_NEVER:
+ return nir_imm_int(b, 0);
+ case PIPE_FUNC_ALWAYS:
+ return nir_imm_int(b, ~0);
+ case PIPE_FUNC_EQUAL:
+ return nir_feq(b, src0, src1);
+ case PIPE_FUNC_NOTEQUAL:
+ return nir_fne(b, src0, src1);
+ case PIPE_FUNC_GREATER:
+ return nir_flt(b, src1, src0);
+ case PIPE_FUNC_GEQUAL:
+ return nir_fge(b, src0, src1);
+ case PIPE_FUNC_LESS:
+ return nir_flt(b, src0, src1);
+ case PIPE_FUNC_LEQUAL:
+ return nir_fge(b, src1, src0);
+ }
+}
+
+static void
+vc4_nir_emit_alpha_test_discard(struct vc4_compile *c, nir_builder *b,
+ nir_ssa_def *alpha)
+{
+ if (!c->fs_key->alpha_test)
+ return;
+
+ nir_ssa_def *alpha_ref =
+ vc4_nir_get_state_uniform(b, QUNIFORM_ALPHA_REF);
+ nir_ssa_def *condition =
+ vc4_nir_pipe_compare_func(b, c->fs_key->alpha_test_func,
+ alpha, alpha_ref);
+
+ nir_intrinsic_instr *discard =
+ nir_intrinsic_instr_create(b->shader,
+ nir_intrinsic_discard_if);
+ discard->num_components = 1;
+ discard->src[0] = nir_src_for_ssa(nir_inot(b, condition));
+ nir_builder_instr_insert(b, &discard->instr);
+}
+
+static void
+vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
+ nir_intrinsic_instr *intr)
+{
+ enum pipe_format color_format = c->fs_key->color_format;
+ const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
+
+ /* Pull out the float src/dst color components. */
+ nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b);
+ nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color);
+ nir_ssa_def *src_color[4], *unpacked_dst_color[4];
+ for (unsigned i = 0; i < 4; i++) {
+ src_color[i] = nir_swizzle(b, intr->src[0].ssa, &i, 1, false);
+ unpacked_dst_color[i] = nir_swizzle(b, dst_vec4, &i, 1, false);
+ }
+
+ /* Unswizzle the destination color. */
+ nir_ssa_def *dst_color[4];
+ for (unsigned i = 0; i < 4; i++) {
+ dst_color[i] = vc4_nir_get_swizzled_channel(b,
+ unpacked_dst_color,
+ format_swiz[i]);
+ }
+
+ vc4_nir_emit_alpha_test_discard(c, b, src_color[3]);
+
+ /* Turn dst color to linear. */
+ if (util_format_is_srgb(color_format)) {
+ for (int i = 0; i < 3; i++)
+ dst_color[i] = vc4_nir_srgb_decode(b, dst_color[i]);
+ }
+
+ nir_ssa_def *blend_color[4];
+ vc4_do_blending(c, b, blend_color, src_color, dst_color);
+
+ /* sRGB encode the output color */
+ if (util_format_is_srgb(color_format)) {
+ for (int i = 0; i < 3; i++)
+ blend_color[i] = vc4_nir_srgb_encode(b, blend_color[i]);
+ }
+
+ nir_ssa_def *swizzled_outputs[4];
+ for (int i = 0; i < 4; i++) {
+ swizzled_outputs[i] =
+ vc4_nir_get_swizzled_channel(b, blend_color,
+ format_swiz[i]);
+ }
+
+ nir_ssa_def *packed_color =
+ nir_pack_unorm_4x8(b,
+ nir_vec4(b,
+ swizzled_outputs[0],
+ swizzled_outputs[1],
+ swizzled_outputs[2],
+ swizzled_outputs[3]));
+
+ packed_color = vc4_logicop(b, c->fs_key->logicop_func,
+ packed_color, packed_dst_color);
+
+ /* If the bit isn't set in the color mask, then just return the
+ * original dst color, instead.
+ */
+ uint32_t colormask = 0xffffffff;
+ for (int i = 0; i < 4; i++) {
+ if (format_swiz[i] < 4 &&
+ !(c->fs_key->blend.colormask & (1 << format_swiz[i]))) {
+ colormask &= ~(0xff << (i * 8));
+ }
+ }
+ packed_color = nir_ior(b,
+ nir_iand(b, packed_color,
+ nir_imm_int(b, colormask)),
+ nir_iand(b, packed_dst_color,
+ nir_imm_int(b, ~colormask)));
+
+ /* Turn the old vec4 output into a store of the packed color. */
+ nir_instr_rewrite_src(&intr->instr, &intr->src[0],
+ nir_src_for_ssa(packed_color));
+ intr->num_components = 1;
+}
+
+static bool
+vc4_nir_lower_blend_block(nir_block *block, void *state)
+{
+ struct vc4_compile *c = state;
+
+ nir_foreach_instr(block, instr) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ if (intr->intrinsic != nir_intrinsic_store_output)
+ continue;
+
+ nir_variable *output_var = NULL;
+ foreach_list_typed(nir_variable, var, node, &c->s->outputs) {
+ if (var->data.driver_location == intr->const_index[0]) {
+ output_var = var;
+ break;
+ }
+ }
+ assert(output_var);
+ unsigned semantic_name = output_var->data.location;
+
+ if (semantic_name != TGSI_SEMANTIC_COLOR)
+ continue;
+
+ nir_function_impl *impl =
+ nir_cf_node_get_function(&block->cf_node);
+ nir_builder b;
+ nir_builder_init(&b, impl);
+ nir_builder_insert_before_instr(&b, &intr->instr);
+ vc4_nir_lower_blend_instr(c, &b, intr);
+ }
+ return true;
+}
+
+void
+vc4_nir_lower_blend(struct vc4_compile *c)
+{
+ nir_foreach_overload(c->s, overload) {
+ if (overload->impl) {
+ nir_foreach_block(overload->impl,
+ vc4_nir_lower_blend_block, c);
+
+ nir_metadata_preserve(overload->impl,
+ nir_metadata_block_index |
+ nir_metadata_dominance);
+ }
+ }
+}
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
new file mode 100644
index 00000000000..229d41147d8
--- /dev/null
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
@@ -0,0 +1,291 @@
+/*
+ * Copyright © 2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "vc4_qir.h"
+#include "tgsi/tgsi_info.h"
+#include "glsl/nir/nir_builder.h"
+
+/**
+ * Walks the NIR generated by TGSI-to-NIR to lower its io intrinsics into
+ * something amenable to the VC4 architecture.
+ *
+ * Currently, it split inputs, outputs, and uniforms into scalars, drops any
+ * non-position outputs in coordinate shaders, and fixes up the addressing on
+ * indirect uniform loads.
+ */
+
+static void
+replace_intrinsic_with_vec4(nir_builder *b, nir_intrinsic_instr *intr,
+ nir_ssa_def **comps)
+{
+
+ /* Batch things back together into a vec4. This will get split by the
+ * later ALU scalarization pass.
+ */
+ nir_ssa_def *vec = nir_vec4(b, comps[0], comps[1], comps[2], comps[3]);
+
+ /* Replace the old intrinsic with a reference to our reconstructed
+ * vec4.
+ */
+ nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(vec),
+ ralloc_parent(b->impl));
+ nir_instr_remove(&intr->instr);
+}
+
+static void
+vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b,
+ nir_intrinsic_instr *intr)
+{
+ nir_builder_insert_before_instr(b, &intr->instr);
+
+ if (c->stage == QSTAGE_FRAG && intr->const_index[0] ==
+ VC4_NIR_TLB_COLOR_READ_INPUT) {
+ /* This doesn't need any lowering. */
+ return;
+ }
+
+ nir_variable *input_var = NULL;
+ foreach_list_typed(nir_variable, var, node, &c->s->inputs) {
+ if (var->data.driver_location == intr->const_index[0]) {
+ input_var = var;
+ break;
+ }
+ }
+ assert(input_var);
+ int semantic_name = input_var->data.location;
+ int semantic_index = input_var->data.index;
+
+ /* All TGSI-to-NIR inputs are vec4. */
+ assert(intr->num_components == 4);
+
+ /* Generate scalar loads equivalent to the original VEC4. */
+ nir_ssa_def *dests[4];
+ for (unsigned i = 0; i < intr->num_components; i++) {
+ nir_intrinsic_instr *intr_comp =
+ nir_intrinsic_instr_create(c->s, nir_intrinsic_load_input);
+ intr_comp->num_components = 1;
+ intr_comp->const_index[0] = intr->const_index[0] * 4 + i;
+ nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL);
+ nir_builder_instr_insert(b, &intr_comp->instr);
+
+ dests[i] = &intr_comp->dest.ssa;
+ }
+
+ switch (c->stage) {
+ case QSTAGE_FRAG:
+ switch (semantic_name) {
+ case TGSI_SEMANTIC_FACE:
+ dests[0] = nir_fsub(b,
+ nir_imm_float(b, 1.0),
+ nir_fmul(b,
+ nir_i2f(b, dests[0]),
+ nir_imm_float(b, 2.0)));
+ dests[1] = nir_imm_float(b, 0.0);
+ dests[2] = nir_imm_float(b, 0.0);
+ dests[3] = nir_imm_float(b, 1.0);
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ if (c->fs_key->point_sprite_mask &
+ (1 << semantic_index)) {
+ if (!c->fs_key->is_points) {
+ dests[0] = nir_imm_float(b, 0.0);
+ dests[1] = nir_imm_float(b, 0.0);
+ }
+ if (c->fs_key->point_coord_upper_left) {
+ dests[1] = nir_fsub(b,
+ nir_imm_float(b, 1.0),
+ dests[1]);
+ }
+ dests[2] = nir_imm_float(b, 0.0);
+ dests[3] = nir_imm_float(b, 1.0);
+ }
+ break;
+ }
+ break;
+ case QSTAGE_COORD:
+ case QSTAGE_VERT:
+ break;
+ }
+
+ replace_intrinsic_with_vec4(b, intr, dests);
+}
+
+static void
+vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b,
+ nir_intrinsic_instr *intr)
+{
+ nir_variable *output_var = NULL;
+ foreach_list_typed(nir_variable, var, node, &c->s->outputs) {
+ if (var->data.driver_location == intr->const_index[0]) {
+ output_var = var;
+ break;
+ }
+ }
+ assert(output_var);
+ unsigned semantic_name = output_var->data.location;
+
+ if (c->stage == QSTAGE_COORD &&
+ (semantic_name != TGSI_SEMANTIC_POSITION &&
+ semantic_name != TGSI_SEMANTIC_PSIZE)) {
+ nir_instr_remove(&intr->instr);
+ return;
+ }
+
+ /* Color output is lowered by vc4_nir_lower_blend(). */
+ if (c->stage == QSTAGE_FRAG && semantic_name == TGSI_SEMANTIC_COLOR) {
+ intr->const_index[0] *= 4;
+ return;
+ }
+
+ /* All TGSI-to-NIR outputs are VEC4. */
+ assert(intr->num_components == 4);
+
+ nir_builder_insert_before_instr(b, &intr->instr);
+
+ for (unsigned i = 0; i < intr->num_components; i++) {
+ nir_intrinsic_instr *intr_comp =
+ nir_intrinsic_instr_create(c->s, nir_intrinsic_store_output);
+ intr_comp->num_components = 1;
+ intr_comp->const_index[0] = intr->const_index[0] * 4 + i;
+
+ assert(intr->src[0].is_ssa);
+ intr_comp->src[0] = nir_src_for_ssa(nir_swizzle(b,
+ intr->src[0].ssa,
+ &i, 1, false));
+ nir_builder_instr_insert(b, &intr_comp->instr);
+ }
+
+ nir_instr_remove(&intr->instr);
+}
+
+static void
+vc4_nir_lower_uniform(struct vc4_compile *c, nir_builder *b,
+ nir_intrinsic_instr *intr)
+{
+ /* All TGSI-to-NIR uniform loads are vec4, but we may create dword
+ * loads in our lowering passes.
+ */
+ if (intr->num_components == 1)
+ return;
+ assert(intr->num_components == 4);
+
+ nir_builder_insert_before_instr(b, &intr->instr);
+
+ /* Generate scalar loads equivalent to the original VEC4. */
+ nir_ssa_def *dests[4];
+ for (unsigned i = 0; i < intr->num_components; i++) {
+ nir_intrinsic_instr *intr_comp =
+ nir_intrinsic_instr_create(c->s, intr->intrinsic);
+ intr_comp->num_components = 1;
+ nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL);
+
+ if (intr->intrinsic == nir_intrinsic_load_uniform_indirect) {
+ /* Convert the variable TGSI register index to a byte
+ * offset.
+ */
+ intr_comp->src[0] =
+ nir_src_for_ssa(nir_ishl(b,
+ intr->src[0].ssa,
+ nir_imm_int(b, 4)));
+
+ /* Convert the offset to be a byte index, too. */
+ intr_comp->const_index[0] = (intr->const_index[0] * 16 +
+ i * 4);
+ } else {
+ /* We want a dword index for non-indirect uniform
+ * loads.
+ */
+ intr_comp->const_index[0] = (intr->const_index[0] * 4 +
+ i);
+ }
+
+ dests[i] = &intr_comp->dest.ssa;
+
+ nir_builder_instr_insert(b, &intr_comp->instr);
+ }
+
+ replace_intrinsic_with_vec4(b, intr, dests);
+}
+
+static void
+vc4_nir_lower_io_instr(struct vc4_compile *c, nir_builder *b,
+ struct nir_instr *instr)
+{
+ if (instr->type != nir_instr_type_intrinsic)
+ return;
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+ switch (intr->intrinsic) {
+ case nir_intrinsic_load_input:
+ vc4_nir_lower_input(c, b, intr);
+ break;
+
+ case nir_intrinsic_store_output:
+ vc4_nir_lower_output(c, b, intr);
+ break;
+
+ case nir_intrinsic_load_uniform:
+ case nir_intrinsic_load_uniform_indirect:
+ vc4_nir_lower_uniform(c, b, intr);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static bool
+vc4_nir_lower_io_block(nir_block *block, void *arg)
+{
+ struct vc4_compile *c = arg;
+ nir_function_impl *impl =
+ nir_cf_node_get_function(&block->cf_node);
+
+ nir_builder b;
+ nir_builder_init(&b, impl);
+
+ nir_foreach_instr_safe(block, instr)
+ vc4_nir_lower_io_instr(c, &b, instr);
+
+ return true;
+}
+
+static bool
+vc4_nir_lower_io_impl(struct vc4_compile *c, nir_function_impl *impl)
+{
+ nir_foreach_block(impl, vc4_nir_lower_io_block, c);
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ return true;
+}
+
+void
+vc4_nir_lower_io(struct vc4_compile *c)
+{
+ nir_foreach_overload(c->s, overload) {
+ if (overload->impl)
+ vc4_nir_lower_io_impl(c, overload->impl);
+ }
+}
diff --git a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
index d6d2fbf257f..a755de9aa41 100644
--- a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
+++ b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
@@ -67,10 +67,7 @@ qir_opt_copy_propagation(struct vc4_compile *c)
if (inst->op == QOP_MOV &&
inst->dst.file == QFILE_TEMP &&
- inst->src[0].file != QFILE_VPM &&
- !(inst->src[0].file == QFILE_TEMP &&
- (c->defs[inst->src[0].index]->op == QOP_TEX_RESULT ||
- c->defs[inst->src[0].index]->op == QOP_TLB_COLOR_READ))) {
+ inst->src[0].file != QFILE_VPM) {
movs[inst->dst.index] = inst->src[0];
}
}
diff --git a/src/gallium/drivers/vc4/vc4_opt_cse.c b/src/gallium/drivers/vc4/vc4_opt_cse.c
index 92c8260eb59..0e5480ea781 100644
--- a/src/gallium/drivers/vc4/vc4_opt_cse.c
+++ b/src/gallium/drivers/vc4/vc4_opt_cse.c
@@ -46,8 +46,7 @@ struct inst_key {
struct qreg src[4];
/**
* If the instruction depends on the flags, how many SFs have been
- * seen before this instruction, or if it depends on r4, how many r4
- * writes have been seen.
+ * seen before this instruction.
*/
uint32_t implicit_arg_update_count;
};
@@ -63,8 +62,7 @@ inst_key_equals(const void *a, const void *b)
static struct qinst *
vc4_find_cse(struct vc4_compile *c, struct hash_table *ht,
- struct qinst *inst, uint32_t sf_count,
- uint32_t r4_count)
+ struct qinst *inst, uint32_t sf_count)
{
if (inst->dst.file != QFILE_TEMP ||
inst->op == QOP_MOV ||
@@ -79,8 +77,6 @@ vc4_find_cse(struct vc4_compile *c, struct hash_table *ht,
qir_get_op_nsrc(inst->op) * sizeof(key.src[0]));
if (qir_depends_on_flags(inst))
key.implicit_arg_update_count = sf_count;
- if (qir_reads_r4(inst))
- key.implicit_arg_update_count = r4_count;
uint32_t hash = _mesa_hash_data(&key, sizeof(key));
struct hash_entry *entry =
@@ -121,7 +117,7 @@ bool
qir_opt_cse(struct vc4_compile *c)
{
bool progress = false;
- uint32_t sf_count = 0, r4_count = 0;
+ uint32_t sf_count = 0;
struct hash_table *ht = _mesa_hash_table_create(NULL, NULL,
inst_key_equals);
@@ -130,15 +126,15 @@ qir_opt_cse(struct vc4_compile *c)
list_for_each_entry(struct qinst, inst, &c->instructions, link) {
if (qir_has_side_effects(c, inst) ||
- qir_has_side_effect_reads(c, inst)) {
+ qir_has_side_effect_reads(c, inst) ||
+ inst->op == QOP_TLB_COLOR_READ) {
continue;
}
if (inst->sf) {
sf_count++;
} else {
- struct qinst *cse = vc4_find_cse(c, ht, inst,
- sf_count, r4_count);
+ struct qinst *cse = vc4_find_cse(c, ht, inst, sf_count);
if (cse) {
inst->src[0] = cse->dst;
for (int i = 1; i < qir_get_op_nsrc(inst->op);
@@ -154,9 +150,6 @@ qir_opt_cse(struct vc4_compile *c)
}
}
}
-
- if (qir_writes_r4(inst))
- r4_count++;
}
ralloc_free(ht);
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index ba47c51d9bd..13c472152d8 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -23,21 +23,19 @@
*/
#include <inttypes.h>
-#include "pipe/p_state.h"
#include "util/u_format.h"
#include "util/u_hash.h"
#include "util/u_math.h"
#include "util/u_memory.h"
-#include "util/u_pack_color.h"
-#include "util/format_srgb.h"
#include "util/ralloc.h"
#include "util/hash_table.h"
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_lowering.h"
#include "tgsi/tgsi_parse.h"
+#include "glsl/nir/nir.h"
+#include "glsl/nir/nir_builder.h"
#include "nir/tgsi_to_nir.h"
-
#include "vc4_context.h"
#include "vc4_qpu.h"
#include "vc4_qir.h"
@@ -45,51 +43,8 @@
#include "simpenrose/simpenrose.h"
#endif
-struct vc4_key {
- struct vc4_uncompiled_shader *shader_state;
- struct {
- enum pipe_format format;
- unsigned compare_mode:1;
- unsigned compare_func:3;
- unsigned wrap_s:3;
- unsigned wrap_t:3;
- uint8_t swizzle[4];
- } tex[VC4_MAX_TEXTURE_SAMPLERS];
- uint8_t ucp_enables;
-};
-
-struct vc4_fs_key {
- struct vc4_key base;
- enum pipe_format color_format;
- bool depth_enabled;
- bool stencil_enabled;
- bool stencil_twoside;
- bool stencil_full_writemasks;
- bool is_points;
- bool is_lines;
- bool alpha_test;
- bool point_coord_upper_left;
- bool light_twoside;
- uint8_t alpha_test_func;
- uint8_t logicop_func;
- uint32_t point_sprite_mask;
-
- struct pipe_rt_blend_state blend;
-};
-
-struct vc4_vs_key {
- struct vc4_key base;
-
- /**
- * This is a proxy for the array of FS input semantics, which is
- * larger than we would want to put in the key.
- */
- uint64_t compiled_fs_id;
-
- enum pipe_format attr_formats[8];
- bool is_coord;
- bool per_vertex_point_size;
-};
+static struct qreg
+ntq_get_src(struct vc4_compile *c, nir_src src, int i);
static void
resize_qreg_array(struct vc4_compile *c,
@@ -113,10 +68,10 @@ resize_qreg_array(struct vc4_compile *c,
}
static struct qreg
-indirect_uniform_load(struct vc4_compile *c,
- struct qreg indirect_offset,
- unsigned offset)
+indirect_uniform_load(struct vc4_compile *c, nir_intrinsic_instr *intr)
{
+ struct qreg indirect_offset = ntq_get_src(c, intr->src[0], 0);
+ uint32_t offset = intr->const_index[0];
struct vc4_compiler_ubo_range *range = NULL;
unsigned i;
for (i = 0; i < c->num_uniform_ranges; i++) {
@@ -138,10 +93,6 @@ indirect_uniform_load(struct vc4_compile *c,
};
offset -= range->src_offset;
- /* Translate the user's TGSI register index from the TGSI register
- * base to a byte offset.
- */
- indirect_offset = qir_SHL(c, indirect_offset, qir_uniform_ui(c, 4));
/* Adjust for where we stored the TGSI register base. */
indirect_offset = qir_ADD(c, indirect_offset,
@@ -155,24 +106,70 @@ indirect_uniform_load(struct vc4_compile *c,
range->size - 4)));
qir_TEX_DIRECT(c, indirect_offset, qir_uniform(c, QUNIFORM_UBO_ADDR, 0));
- struct qreg r4 = qir_TEX_RESULT(c);
c->num_texture_samples++;
- return qir_MOV(c, r4);
+ return qir_TEX_RESULT(c);
}
-static struct qreg *
-ntq_get_dest(struct vc4_compile *c, nir_dest dest)
+nir_ssa_def *vc4_nir_get_state_uniform(struct nir_builder *b,
+ enum quniform_contents contents)
{
- assert(!dest.is_ssa);
- nir_register *reg = dest.reg.reg;
- struct hash_entry *entry = _mesa_hash_table_search(c->def_ht, reg);
- assert(reg->num_array_elems == 0);
- assert(dest.reg.base_offset == 0);
+ nir_intrinsic_instr *intr =
+ nir_intrinsic_instr_create(b->shader,
+ nir_intrinsic_load_uniform);
+ intr->const_index[0] = VC4_NIR_STATE_UNIFORM_OFFSET + contents;
+ intr->num_components = 1;
+ nir_ssa_dest_init(&intr->instr, &intr->dest, 1, NULL);
+ nir_builder_instr_insert(b, &intr->instr);
+ return &intr->dest.ssa;
+}
- struct qreg *qregs = entry->data;
+nir_ssa_def *
+vc4_nir_get_swizzled_channel(nir_builder *b, nir_ssa_def **srcs, int swiz)
+{
+ switch (swiz) {
+ default:
+ case UTIL_FORMAT_SWIZZLE_NONE:
+ fprintf(stderr, "warning: unknown swizzle\n");
+ /* FALLTHROUGH */
+ case UTIL_FORMAT_SWIZZLE_0:
+ return nir_imm_float(b, 0.0);
+ case UTIL_FORMAT_SWIZZLE_1:
+ return nir_imm_float(b, 1.0);
+ case UTIL_FORMAT_SWIZZLE_X:
+ case UTIL_FORMAT_SWIZZLE_Y:
+ case UTIL_FORMAT_SWIZZLE_Z:
+ case UTIL_FORMAT_SWIZZLE_W:
+ return srcs[swiz];
+ }
+}
+
+static struct qreg *
+ntq_init_ssa_def(struct vc4_compile *c, nir_ssa_def *def)
+{
+ struct qreg *qregs = ralloc_array(c->def_ht, struct qreg,
+ def->num_components);
+ _mesa_hash_table_insert(c->def_ht, def, qregs);
return qregs;
}
+static struct qreg *
+ntq_get_dest(struct vc4_compile *c, nir_dest *dest)
+{
+ if (dest->is_ssa) {
+ struct qreg *qregs = ntq_init_ssa_def(c, &dest->ssa);
+ for (int i = 0; i < dest->ssa.num_components; i++)
+ qregs[i] = c->undef;
+ return qregs;
+ } else {
+ nir_register *reg = dest->reg.reg;
+ assert(dest->reg.base_offset == 0);
+ assert(reg->num_array_elems == 0);
+ struct hash_entry *entry =
+ _mesa_hash_table_search(c->def_ht, reg);
+ return entry->data;
+ }
+}
+
static struct qreg
ntq_get_src(struct vc4_compile *c, nir_src src, int i)
{
@@ -282,22 +279,6 @@ qir_srgb_decode(struct vc4_compile *c, struct qreg srgb)
}
static struct qreg
-qir_srgb_encode(struct vc4_compile *c, struct qreg linear)
-{
- struct qreg low = qir_FMUL(c, linear, qir_uniform_f(c, 12.92));
- struct qreg high = qir_FSUB(c,
- qir_FMUL(c,
- qir_uniform_f(c, 1.055),
- qir_POW(c,
- linear,
- qir_uniform_f(c, 0.41666))),
- qir_uniform_f(c, 0.055));
-
- qir_SF(c, qir_FSUB(c, linear, qir_uniform_f(c, 0.0031308)));
- return qir_SEL_X_Y_NS(c, low, high);
-}
-
-static struct qreg
ntq_umul(struct vc4_compile *c, struct qreg src0, struct qreg src1)
{
struct qreg src0_hi = qir_SHR(c, src0,
@@ -410,13 +391,13 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr)
qir_TEX_S(c, s, texture_u[next_texture_u++]);
c->num_texture_samples++;
- struct qreg r4 = qir_TEX_RESULT(c);
+ struct qreg tex = qir_TEX_RESULT(c);
enum pipe_format format = c->key->tex[unit].format;
struct qreg unpacked[4];
if (util_format_is_depth_or_stencil(format)) {
- struct qreg depthf = qir_ITOF(c, qir_SHR(c, r4,
+ struct qreg depthf = qir_ITOF(c, qir_SHR(c, tex,
qir_uniform_ui(c, 8)));
struct qreg normalized = qir_FMUL(c, depthf,
qir_uniform_f(c, 1.0f/0xffffff));
@@ -468,7 +449,7 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr)
unpacked[i] = depth_output;
} else {
for (int i = 0; i < 4; i++)
- unpacked[i] = qir_R4_UNPACK(c, r4, i);
+ unpacked[i] = qir_UNPACK_8_F(c, tex, i);
}
const uint8_t *format_swiz = vc4_get_format_swizzle(format);
@@ -484,7 +465,7 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr)
texture_output[i]);
}
- struct qreg *dest = ntq_get_dest(c, instr->dest);
+ struct qreg *dest = ntq_get_dest(c, &instr->dest);
for (int i = 0; i < 4; i++) {
dest[i] = get_swizzled_channel(c, texture_output,
c->key->tex[unit].swizzle[i]);
@@ -558,7 +539,7 @@ ntq_fsin(struct vc4_compile *c, struct qreg src)
struct qreg scaled_x =
qir_FMUL(c,
src,
- qir_uniform_f(c, 1.0f / (M_PI * 2.0f)));
+ qir_uniform_f(c, 1.0 / (M_PI * 2.0)));
struct qreg x = qir_FADD(c,
ntq_ffract(c, scaled_x),
@@ -756,26 +737,6 @@ emit_fragcoord_input(struct vc4_compile *c, int attr)
c->inputs[attr * 4 + 3] = qir_RCP(c, qir_FRAG_W(c));
}
-static void
-emit_point_coord_input(struct vc4_compile *c, int attr)
-{
- if (c->point_x.file == QFILE_NULL) {
- c->point_x = qir_uniform_f(c, 0.0);
- c->point_y = qir_uniform_f(c, 0.0);
- }
-
- c->inputs[attr * 4 + 0] = c->point_x;
- if (c->fs_key->point_coord_upper_left) {
- c->inputs[attr * 4 + 1] = qir_FSUB(c,
- qir_uniform_f(c, 1.0),
- c->point_y);
- } else {
- c->inputs[attr * 4 + 1] = c->point_y;
- }
- c->inputs[attr * 4 + 2] = qir_uniform_f(c, 0.0);
- c->inputs[attr * 4 + 3] = qir_uniform_f(c, 1.0);
-}
-
static struct qreg
emit_fragment_varying(struct vc4_compile *c, uint8_t semantic,
uint8_t index, uint8_t swizzle)
@@ -817,19 +778,6 @@ emit_fragment_input(struct vc4_compile *c, int attr,
}
static void
-emit_face_input(struct vc4_compile *c, int attr)
-{
- c->inputs[attr * 4 + 0] = qir_FSUB(c,
- qir_uniform_f(c, 1.0),
- qir_FMUL(c,
- qir_ITOF(c, qir_FRAG_REV_FLAG(c)),
- qir_uniform_f(c, 2.0)));
- c->inputs[attr * 4 + 1] = qir_uniform_f(c, 0.0);
- c->inputs[attr * 4 + 2] = qir_uniform_f(c, 0.0);
- c->inputs[attr * 4 + 3] = qir_uniform_f(c, 1.0);
-}
-
-static void
add_output(struct vc4_compile *c,
uint32_t decl_offset,
uint8_t semantic_name,
@@ -884,12 +832,38 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
for (int i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
srcs[i] = ntq_get_src(c, instr->src[i].src,
instr->src[i].swizzle[0]);
- struct qreg *dest = ntq_get_dest(c, instr->dest.dest);
+ struct qreg *dest = ntq_get_dest(c, &instr->dest.dest);
for (int i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
dest[i] = srcs[i];
return;
}
+ if (instr->op == nir_op_pack_unorm_4x8) {
+ struct qreg result;
+ for (int i = 0; i < 4; i++) {
+ struct qreg src = ntq_get_src(c, instr->src[0].src,
+ instr->src[0].swizzle[i]);
+ if (i == 0)
+ result = qir_PACK_8888_F(c, src);
+ else
+ result = qir_PACK_8_F(c, result, src, i);
+ }
+ struct qreg *dest = ntq_get_dest(c, &instr->dest.dest);
+ *dest = result;
+ return;
+ }
+
+ if (instr->op == nir_op_unpack_unorm_4x8) {
+ struct qreg src = ntq_get_src(c, instr->src[0].src,
+ instr->src[0].swizzle[0]);
+ struct qreg *dest = ntq_get_dest(c, &instr->dest.dest);
+ for (int i = 0; i < 4; i++) {
+ if (instr->dest.write_mask & (1 << i))
+ dest[i] = qir_UNPACK_8_F(c, src, i);
+ }
+ return;
+ }
+
/* General case: We can just grab the one used channel per src. */
struct qreg src[nir_op_infos[instr->op].num_inputs];
for (int i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
@@ -898,7 +872,7 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
/* Pick the channel to store the output in. */
assert(!instr->dest.saturate);
- struct qreg *dest = ntq_get_dest(c, instr->dest.dest);
+ struct qreg *dest = ntq_get_dest(c, &instr->dest.dest);
assert(util_is_power_of_two(instr->dest.write_mask));
dest += ffs(instr->dest.write_mask) - 1;
@@ -1092,167 +1066,6 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
}
}
-static struct qreg
-vc4_blend_channel(struct vc4_compile *c,
- struct qreg *dst,
- struct qreg *src,
- struct qreg val,
- unsigned factor,
- int channel)
-{
- switch(factor) {
- case PIPE_BLENDFACTOR_ONE:
- return val;
- case PIPE_BLENDFACTOR_SRC_COLOR:
- return qir_FMUL(c, val, src[channel]);
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- return qir_FMUL(c, val, src[3]);
- case PIPE_BLENDFACTOR_DST_ALPHA:
- return qir_FMUL(c, val, dst[3]);
- case PIPE_BLENDFACTOR_DST_COLOR:
- return qir_FMUL(c, val, dst[channel]);
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- if (channel != 3) {
- return qir_FMUL(c,
- val,
- qir_FMIN(c,
- src[3],
- qir_FSUB(c,
- qir_uniform_f(c, 1.0),
- dst[3])));
- } else {
- return val;
- }
- case PIPE_BLENDFACTOR_CONST_COLOR:
- return qir_FMUL(c, val,
- qir_uniform(c, QUNIFORM_BLEND_CONST_COLOR,
- channel));
- case PIPE_BLENDFACTOR_CONST_ALPHA:
- return qir_FMUL(c, val,
- qir_uniform(c, QUNIFORM_BLEND_CONST_COLOR, 3));
- case PIPE_BLENDFACTOR_ZERO:
- return qir_uniform_f(c, 0.0);
- case PIPE_BLENDFACTOR_INV_SRC_COLOR:
- return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
- src[channel]));
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
- src[3]));
- case PIPE_BLENDFACTOR_INV_DST_ALPHA:
- return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
- dst[3]));
- case PIPE_BLENDFACTOR_INV_DST_COLOR:
- return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
- dst[channel]));
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
- return qir_FMUL(c, val,
- qir_FSUB(c, qir_uniform_f(c, 1.0),
- qir_uniform(c,
- QUNIFORM_BLEND_CONST_COLOR,
- channel)));
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
- return qir_FMUL(c, val,
- qir_FSUB(c, qir_uniform_f(c, 1.0),
- qir_uniform(c,
- QUNIFORM_BLEND_CONST_COLOR,
- 3)));
-
- default:
- case PIPE_BLENDFACTOR_SRC1_COLOR:
- case PIPE_BLENDFACTOR_SRC1_ALPHA:
- case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
- case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
- /* Unsupported. */
- fprintf(stderr, "Unknown blend factor %d\n", factor);
- return val;
- }
-}
-
-static struct qreg
-vc4_blend_func(struct vc4_compile *c,
- struct qreg src, struct qreg dst,
- unsigned func)
-{
- switch (func) {
- case PIPE_BLEND_ADD:
- return qir_FADD(c, src, dst);
- case PIPE_BLEND_SUBTRACT:
- return qir_FSUB(c, src, dst);
- case PIPE_BLEND_REVERSE_SUBTRACT:
- return qir_FSUB(c, dst, src);
- case PIPE_BLEND_MIN:
- return qir_FMIN(c, src, dst);
- case PIPE_BLEND_MAX:
- return qir_FMAX(c, src, dst);
-
- default:
- /* Unsupported. */
- fprintf(stderr, "Unknown blend func %d\n", func);
- return src;
-
- }
-}
-
-/**
- * Implements fixed function blending in shader code.
- *
- * VC4 doesn't have any hardware support for blending. Instead, you read the
- * current contents of the destination from the tile buffer after having
- * waited for the scoreboard (which is handled by vc4_qpu_emit.c), then do
- * math using your output color and that destination value, and update the
- * output color appropriately.
- */
-static void
-vc4_blend(struct vc4_compile *c, struct qreg *result,
- struct qreg *dst_color, struct qreg *src_color)
-{
- struct pipe_rt_blend_state *blend = &c->fs_key->blend;
-
- if (!blend->blend_enable) {
- for (int i = 0; i < 4; i++)
- result[i] = src_color[i];
- return;
- }
-
- struct qreg clamped_src[4];
- struct qreg clamped_dst[4];
- for (int i = 0; i < 4; i++) {
- clamped_src[i] = qir_SAT(c, src_color[i]);
- clamped_dst[i] = qir_SAT(c, dst_color[i]);
- }
- src_color = clamped_src;
- dst_color = clamped_dst;
-
- struct qreg src_blend[4], dst_blend[4];
- for (int i = 0; i < 3; i++) {
- src_blend[i] = vc4_blend_channel(c,
- dst_color, src_color,
- src_color[i],
- blend->rgb_src_factor, i);
- dst_blend[i] = vc4_blend_channel(c,
- dst_color, src_color,
- dst_color[i],
- blend->rgb_dst_factor, i);
- }
- src_blend[3] = vc4_blend_channel(c,
- dst_color, src_color,
- src_color[3],
- blend->alpha_src_factor, 3);
- dst_blend[3] = vc4_blend_channel(c,
- dst_color, src_color,
- dst_color[3],
- blend->alpha_dst_factor, 3);
-
- for (int i = 0; i < 3; i++) {
- result[i] = vc4_blend_func(c,
- src_blend[i], dst_blend[i],
- blend->rgb_func);
- }
- result[3] = vc4_blend_func(c,
- src_blend[3], dst_blend[3],
- blend->alpha_func);
-}
-
static void
clip_distance_discard(struct vc4_compile *c)
{
@@ -1276,167 +1089,15 @@ clip_distance_discard(struct vc4_compile *c)
}
static void
-alpha_test_discard(struct vc4_compile *c)
-{
- struct qreg src_alpha;
- struct qreg alpha_ref = qir_uniform(c, QUNIFORM_ALPHA_REF, 0);
-
- if (!c->fs_key->alpha_test)
- return;
-
- if (c->output_color_index != -1)
- src_alpha = c->outputs[c->output_color_index + 3];
- else
- src_alpha = qir_uniform_f(c, 1.0);
-
- if (c->discard.file == QFILE_NULL)
- c->discard = qir_uniform_ui(c, 0);
-
- switch (c->fs_key->alpha_test_func) {
- case PIPE_FUNC_NEVER:
- c->discard = qir_uniform_ui(c, ~0);
- break;
- case PIPE_FUNC_ALWAYS:
- break;
- case PIPE_FUNC_EQUAL:
- qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
- c->discard = qir_SEL_X_Y_ZS(c, c->discard,
- qir_uniform_ui(c, ~0));
- break;
- case PIPE_FUNC_NOTEQUAL:
- qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
- c->discard = qir_SEL_X_Y_ZC(c, c->discard,
- qir_uniform_ui(c, ~0));
- break;
- case PIPE_FUNC_GREATER:
- qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
- c->discard = qir_SEL_X_Y_NC(c, c->discard,
- qir_uniform_ui(c, ~0));
- break;
- case PIPE_FUNC_GEQUAL:
- qir_SF(c, qir_FSUB(c, alpha_ref, src_alpha));
- c->discard = qir_SEL_X_Y_NS(c, c->discard,
- qir_uniform_ui(c, ~0));
- break;
- case PIPE_FUNC_LESS:
- qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
- c->discard = qir_SEL_X_Y_NS(c, c->discard,
- qir_uniform_ui(c, ~0));
- break;
- case PIPE_FUNC_LEQUAL:
- qir_SF(c, qir_FSUB(c, alpha_ref, src_alpha));
- c->discard = qir_SEL_X_Y_NC(c, c->discard,
- qir_uniform_ui(c, ~0));
- break;
- }
-}
-
-static struct qreg
-vc4_logicop(struct vc4_compile *c, struct qreg src, struct qreg dst)
-{
- switch (c->fs_key->logicop_func) {
- case PIPE_LOGICOP_CLEAR:
- return qir_uniform_f(c, 0.0);
- case PIPE_LOGICOP_NOR:
- return qir_NOT(c, qir_OR(c, src, dst));
- case PIPE_LOGICOP_AND_INVERTED:
- return qir_AND(c, qir_NOT(c, src), dst);
- case PIPE_LOGICOP_COPY_INVERTED:
- return qir_NOT(c, src);
- case PIPE_LOGICOP_AND_REVERSE:
- return qir_AND(c, src, qir_NOT(c, dst));
- case PIPE_LOGICOP_INVERT:
- return qir_NOT(c, dst);
- case PIPE_LOGICOP_XOR:
- return qir_XOR(c, src, dst);
- case PIPE_LOGICOP_NAND:
- return qir_NOT(c, qir_AND(c, src, dst));
- case PIPE_LOGICOP_AND:
- return qir_AND(c, src, dst);
- case PIPE_LOGICOP_EQUIV:
- return qir_NOT(c, qir_XOR(c, src, dst));
- case PIPE_LOGICOP_NOOP:
- return dst;
- case PIPE_LOGICOP_OR_INVERTED:
- return qir_OR(c, qir_NOT(c, src), dst);
- case PIPE_LOGICOP_OR_REVERSE:
- return qir_OR(c, src, qir_NOT(c, dst));
- case PIPE_LOGICOP_OR:
- return qir_OR(c, src, dst);
- case PIPE_LOGICOP_SET:
- return qir_uniform_ui(c, ~0);
- case PIPE_LOGICOP_COPY:
- default:
- return src;
- }
-}
-
-static void
emit_frag_end(struct vc4_compile *c)
{
clip_distance_discard(c);
- alpha_test_discard(c);
-
- enum pipe_format color_format = c->fs_key->color_format;
- const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
- struct qreg tlb_read_color[4] = { c->undef, c->undef, c->undef, c->undef };
- struct qreg dst_color[4] = { c->undef, c->undef, c->undef, c->undef };
- struct qreg linear_dst_color[4] = { c->undef, c->undef, c->undef, c->undef };
- struct qreg packed_dst_color = c->undef;
-
- if (c->fs_key->blend.blend_enable ||
- c->fs_key->blend.colormask != 0xf ||
- c->fs_key->logicop_func != PIPE_LOGICOP_COPY) {
- struct qreg r4 = qir_TLB_COLOR_READ(c);
- for (int i = 0; i < 4; i++)
- tlb_read_color[i] = qir_R4_UNPACK(c, r4, i);
- for (int i = 0; i < 4; i++) {
- dst_color[i] = get_swizzled_channel(c,
- tlb_read_color,
- format_swiz[i]);
- if (util_format_is_srgb(color_format) && i != 3) {
- linear_dst_color[i] =
- qir_srgb_decode(c, dst_color[i]);
- } else {
- linear_dst_color[i] = dst_color[i];
- }
- }
- /* Save the packed value for logic ops. Can't reuse r4
- * because other things might smash it (like sRGB)
- */
- packed_dst_color = qir_MOV(c, r4);
- }
-
- struct qreg blend_color[4];
- struct qreg undef_array[4] = {
- c->undef, c->undef, c->undef, c->undef
- };
- vc4_blend(c, blend_color, linear_dst_color,
- (c->output_color_index != -1 ?
- c->outputs + c->output_color_index :
- undef_array));
-
- if (util_format_is_srgb(color_format)) {
- for (int i = 0; i < 3; i++)
- blend_color[i] = qir_srgb_encode(c, blend_color[i]);
- }
-
- /* Debug: Sometimes you're getting a black output and just want to see
- * if the FS is getting executed at all. Spam magenta into the color
- * output.
- */
- if (0) {
- blend_color[0] = qir_uniform_f(c, 1.0);
- blend_color[1] = qir_uniform_f(c, 0.0);
- blend_color[2] = qir_uniform_f(c, 1.0);
- blend_color[3] = qir_uniform_f(c, 0.5);
- }
-
- struct qreg swizzled_outputs[4];
- for (int i = 0; i < 4; i++) {
- swizzled_outputs[i] = get_swizzled_channel(c, blend_color,
- format_swiz[i]);
+ struct qreg color;
+ if (c->output_color_index != -1) {
+ color = c->outputs[c->output_color_index];
+ } else {
+ color = qir_uniform_ui(c, 0);
}
if (c->discard.file != QFILE_NULL)
@@ -1463,47 +1124,7 @@ emit_frag_end(struct vc4_compile *c)
qir_TLB_Z_WRITE(c, z);
}
- struct qreg packed_color = c->undef;
- for (int i = 0; i < 4; i++) {
- if (swizzled_outputs[i].file == QFILE_NULL)
- continue;
- if (packed_color.file == QFILE_NULL) {
- packed_color = qir_PACK_8888_F(c, swizzled_outputs[i]);
- } else {
- packed_color = qir_PACK_8_F(c,
- packed_color,
- swizzled_outputs[i],
- i);
- }
- }
-
- if (packed_color.file == QFILE_NULL)
- packed_color = qir_uniform_ui(c, 0);
-
- if (c->fs_key->logicop_func != PIPE_LOGICOP_COPY) {
- packed_color = vc4_logicop(c, packed_color, packed_dst_color);
- }
-
- /* If the bit isn't set in the color mask, then just return the
- * original dst color, instead.
- */
- uint32_t colormask = 0xffffffff;
- for (int i = 0; i < 4; i++) {
- if (format_swiz[i] < 4 &&
- !(c->fs_key->blend.colormask & (1 << format_swiz[i]))) {
- colormask &= ~(0xff << (i * 8));
- }
- }
- if (colormask != 0xffffffff) {
- packed_color = qir_OR(c,
- qir_AND(c, packed_color,
- qir_uniform_ui(c, colormask)),
- qir_AND(c, packed_dst_color,
- qir_uniform_ui(c, ~colormask)));
- }
-
- qir_emit(c, qir_inst(QOP_TLB_COLOR_WRITE, c->undef,
- packed_color, c->undef));
+ qir_TLB_COLOR_WRITE(c, color);
}
static void
@@ -1695,6 +1316,7 @@ vc4_optimize_nir(struct nir_shader *s)
progress = nir_opt_peephole_select(s) || progress;
progress = nir_opt_algebraic(s) || progress;
progress = nir_opt_constant_folding(s) || progress;
+ progress = nir_opt_undef(s) || progress;
} while (progress);
}
@@ -1736,6 +1358,7 @@ ntq_setup_inputs(struct vc4_compile *c)
unsigned loc = var->data.driver_location;
assert(array_len == 1);
+ (void)array_len;
resize_qreg_array(c, &c->inputs, &c->inputs_array_size,
(loc + 1) * 4);
@@ -1743,11 +1366,12 @@ ntq_setup_inputs(struct vc4_compile *c)
if (semantic_name == TGSI_SEMANTIC_POSITION) {
emit_fragcoord_input(c, loc);
} else if (semantic_name == TGSI_SEMANTIC_FACE) {
- emit_face_input(c, loc);
+ c->inputs[loc * 4 + 0] = qir_FRAG_REV_FLAG(c);
} else if (semantic_name == TGSI_SEMANTIC_GENERIC &&
(c->fs_key->point_sprite_mask &
(1 << semantic_index))) {
- emit_point_coord_input(c, loc);
+ c->inputs[loc * 4 + 0] = c->point_x;
+ c->inputs[loc * 4 + 1] = c->point_y;
} else {
emit_fragment_input(c, loc,
semantic_name,
@@ -1770,6 +1394,13 @@ ntq_setup_outputs(struct vc4_compile *c)
unsigned loc = var->data.driver_location * 4;
assert(array_len == 1);
+ (void)array_len;
+
+ /* NIR hack to pass through
+ * TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS */
+ if (semantic_name == TGSI_SEMANTIC_COLOR &&
+ semantic_index == -1)
+ semantic_index = 0;
for (int i = 0; i < 4; i++) {
add_output(c,
@@ -1834,8 +1465,7 @@ ntq_setup_registers(struct vc4_compile *c, struct exec_list *list)
static void
ntq_emit_load_const(struct vc4_compile *c, nir_load_const_instr *instr)
{
- struct qreg *qregs = ralloc_array(c->def_ht, struct qreg,
- instr->def.num_components);
+ struct qreg *qregs = ntq_init_ssa_def(c, &instr->def);
for (int i = 0; i < instr->def.num_components; i++)
qregs[i] = qir_uniform_ui(c, instr->value.u[i]);
@@ -1843,47 +1473,59 @@ ntq_emit_load_const(struct vc4_compile *c, nir_load_const_instr *instr)
}
static void
+ntq_emit_ssa_undef(struct vc4_compile *c, nir_ssa_undef_instr *instr)
+{
+ struct qreg *qregs = ntq_init_ssa_def(c, &instr->def);
+
+ /* QIR needs there to be *some* value, so pick 0 (same as for
+ * ntq_setup_registers().
+ */
+ for (int i = 0; i < instr->def.num_components; i++)
+ qregs[i] = qir_uniform_ui(c, 0);
+}
+
+static void
ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
{
const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
struct qreg *dest = NULL;
if (info->has_dest) {
- dest = ntq_get_dest(c, instr->dest);
+ dest = ntq_get_dest(c, &instr->dest);
}
switch (instr->intrinsic) {
case nir_intrinsic_load_uniform:
- for (int i = 0; i < instr->num_components; i++) {
- dest[i] = qir_uniform(c, QUNIFORM_UNIFORM,
- instr->const_index[0] * 4 + i);
+ assert(instr->num_components == 1);
+ if (instr->const_index[0] < VC4_NIR_STATE_UNIFORM_OFFSET) {
+ *dest = qir_uniform(c, QUNIFORM_UNIFORM,
+ instr->const_index[0]);
+ } else {
+ *dest = qir_uniform(c, instr->const_index[0] -
+ VC4_NIR_STATE_UNIFORM_OFFSET,
+ 0);
}
break;
case nir_intrinsic_load_uniform_indirect:
- for (int i = 0; i < instr->num_components; i++) {
- dest[i] = indirect_uniform_load(c,
- ntq_get_src(c, instr->src[0], 0),
- (instr->const_index[0] *
- 4 + i) * sizeof(float));
- }
+ *dest = indirect_uniform_load(c, instr);
break;
case nir_intrinsic_load_input:
- for (int i = 0; i < instr->num_components; i++)
- dest[i] = c->inputs[instr->const_index[0] * 4 + i];
-
+ assert(instr->num_components == 1);
+ if (instr->const_index[0] == VC4_NIR_TLB_COLOR_READ_INPUT) {
+ *dest = qir_TLB_COLOR_READ(c);
+ } else {
+ *dest = c->inputs[instr->const_index[0]];
+ }
break;
case nir_intrinsic_store_output:
- for (int i = 0; i < instr->num_components; i++) {
- c->outputs[instr->const_index[0] * 4 + i] =
- qir_MOV(c, ntq_get_src(c, instr->src[0], i));
- }
- c->num_outputs = MAX2(c->num_outputs,
- instr->const_index[0] * 4 +
- instr->num_components + 1);
+ assert(instr->num_components == 1);
+ c->outputs[instr->const_index[0]] =
+ qir_MOV(c, ntq_get_src(c, instr->src[0], 0));
+ c->num_outputs = MAX2(c->num_outputs, instr->const_index[0] + 1);
break;
case nir_intrinsic_discard:
@@ -1927,6 +1569,10 @@ ntq_emit_instr(struct vc4_compile *c, nir_instr *instr)
ntq_emit_load_const(c, nir_instr_as_load_const(instr));
break;
+ case nir_instr_type_ssa_undef:
+ ntq_emit_ssa_undef(c, nir_instr_as_ssa_undef(instr));
+ break;
+
case nir_instr_type_tex:
ntq_emit_tex(c, nir_instr_as_tex(instr));
break;
@@ -2084,13 +1730,17 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
c->s = tgsi_to_nir(tokens, &nir_options);
nir_opt_global_to_local(c->s);
nir_convert_to_ssa(c->s);
+ if (stage == QSTAGE_FRAG)
+ vc4_nir_lower_blend(c);
+ vc4_nir_lower_io(c);
nir_lower_idiv(c->s);
+ nir_lower_load_const_to_scalar(c->s);
vc4_optimize_nir(c->s);
nir_remove_dead_variables(c->s);
- nir_convert_from_ssa(c->s);
+ nir_convert_from_ssa(c->s, true);
if (vc4_debug & VC4_DEBUG_SHADERDB) {
fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d NIR instructions\n",
@@ -2187,6 +1837,8 @@ copy_uniform_state_to_shader(struct vc4_compiled_shader *shader,
memcpy(uinfo->contents, c->uniform_contents,
count * sizeof(*uinfo->contents));
uinfo->num_texture_samples = c->num_texture_samples;
+
+ vc4_set_shader_uniform_dirty_flags(shader);
}
static struct vc4_compiled_shader *
@@ -2259,9 +1911,8 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage,
}
copy_uniform_state_to_shader(shader, c);
- shader->bo = vc4_bo_alloc_mem(vc4->screen, c->qpu_insts,
- c->qpu_inst_count * sizeof(uint64_t),
- "code");
+ shader->bo = vc4_bo_alloc_shader(vc4->screen, c->qpu_insts,
+ c->qpu_inst_count * sizeof(uint64_t));
/* Copy the compiler UBO range state to the compiled shader, dropping
* out arrays that were never referenced by an indirect load.
@@ -2288,10 +1939,12 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage,
}
}
if (shader->ubo_size) {
- fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d UBO uniforms\n",
- qir_get_stage_name(c->stage),
- c->program_id, c->variant_id,
- shader->ubo_size / 4);
+ if (vc4_debug & VC4_DEBUG_SHADERDB) {
+ fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d UBO uniforms\n",
+ qir_get_stage_name(c->stage),
+ c->program_id, c->variant_id,
+ shader->ubo_size / 4);
+ }
}
qir_compile_destroy(c);
@@ -2421,9 +2074,20 @@ vc4_update_compiled_vs(struct vc4_context *vc4, uint8_t prim_mode)
(prim_mode == PIPE_PRIM_POINTS &&
vc4->rasterizer->base.point_size_per_vertex);
- vc4->prog.vs = vc4_get_compiled_shader(vc4, QSTAGE_VERT, &key->base);
+ struct vc4_compiled_shader *vs =
+ vc4_get_compiled_shader(vc4, QSTAGE_VERT, &key->base);
+ if (vs != vc4->prog.vs) {
+ vc4->prog.vs = vs;
+ vc4->dirty |= VC4_DIRTY_COMPILED_VS;
+ }
+
key->is_coord = true;
- vc4->prog.cs = vc4_get_compiled_shader(vc4, QSTAGE_COORD, &key->base);
+ struct vc4_compiled_shader *cs =
+ vc4_get_compiled_shader(vc4, QSTAGE_COORD, &key->base);
+ if (cs != vc4->prog.cs) {
+ vc4->prog.cs = cs;
+ vc4->dirty |= VC4_DIRTY_COMPILED_CS;
+ }
}
void
@@ -2490,305 +2154,6 @@ vc4_shader_state_delete(struct pipe_context *pctx, void *hwcso)
free(so);
}
-static uint32_t translate_wrap(uint32_t p_wrap, bool using_nearest)
-{
- switch (p_wrap) {
- case PIPE_TEX_WRAP_REPEAT:
- return 0;
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- return 1;
- case PIPE_TEX_WRAP_MIRROR_REPEAT:
- return 2;
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- return 3;
- case PIPE_TEX_WRAP_CLAMP:
- return (using_nearest ? 1 : 3);
- default:
- fprintf(stderr, "Unknown wrap mode %d\n", p_wrap);
- assert(!"not reached");
- return 0;
- }
-}
-
-static void
-write_texture_p0(struct vc4_context *vc4,
- struct vc4_texture_stateobj *texstate,
- uint32_t unit)
-{
- struct pipe_sampler_view *texture = texstate->textures[unit];
- struct vc4_resource *rsc = vc4_resource(texture->texture);
-
- cl_reloc(vc4, &vc4->uniforms, rsc->bo,
- VC4_SET_FIELD(rsc->slices[0].offset >> 12, VC4_TEX_P0_OFFSET) |
- VC4_SET_FIELD(texture->u.tex.last_level -
- texture->u.tex.first_level, VC4_TEX_P0_MIPLVLS) |
- VC4_SET_FIELD(texture->target == PIPE_TEXTURE_CUBE,
- VC4_TEX_P0_CMMODE) |
- VC4_SET_FIELD(rsc->vc4_format & 15, VC4_TEX_P0_TYPE));
-}
-
-static void
-write_texture_p1(struct vc4_context *vc4,
- struct vc4_texture_stateobj *texstate,
- uint32_t unit)
-{
- struct pipe_sampler_view *texture = texstate->textures[unit];
- struct vc4_resource *rsc = vc4_resource(texture->texture);
- struct pipe_sampler_state *sampler = texstate->samplers[unit];
- static const uint8_t minfilter_map[6] = {
- VC4_TEX_P1_MINFILT_NEAR_MIP_NEAR,
- VC4_TEX_P1_MINFILT_LIN_MIP_NEAR,
- VC4_TEX_P1_MINFILT_NEAR_MIP_LIN,
- VC4_TEX_P1_MINFILT_LIN_MIP_LIN,
- VC4_TEX_P1_MINFILT_NEAREST,
- VC4_TEX_P1_MINFILT_LINEAR,
- };
- static const uint32_t magfilter_map[] = {
- [PIPE_TEX_FILTER_NEAREST] = VC4_TEX_P1_MAGFILT_NEAREST,
- [PIPE_TEX_FILTER_LINEAR] = VC4_TEX_P1_MAGFILT_LINEAR,
- };
-
- bool either_nearest =
- (sampler->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST ||
- sampler->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST);
-
- cl_aligned_u32(&vc4->uniforms,
- VC4_SET_FIELD(rsc->vc4_format >> 4, VC4_TEX_P1_TYPE4) |
- VC4_SET_FIELD(texture->texture->height0 & 2047,
- VC4_TEX_P1_HEIGHT) |
- VC4_SET_FIELD(texture->texture->width0 & 2047,
- VC4_TEX_P1_WIDTH) |
- VC4_SET_FIELD(magfilter_map[sampler->mag_img_filter],
- VC4_TEX_P1_MAGFILT) |
- VC4_SET_FIELD(minfilter_map[sampler->min_mip_filter * 2 +
- sampler->min_img_filter],
- VC4_TEX_P1_MINFILT) |
- VC4_SET_FIELD(translate_wrap(sampler->wrap_s, either_nearest),
- VC4_TEX_P1_WRAP_S) |
- VC4_SET_FIELD(translate_wrap(sampler->wrap_t, either_nearest),
- VC4_TEX_P1_WRAP_T));
-}
-
-static void
-write_texture_p2(struct vc4_context *vc4,
- struct vc4_texture_stateobj *texstate,
- uint32_t data)
-{
- uint32_t unit = data & 0xffff;
- struct pipe_sampler_view *texture = texstate->textures[unit];
- struct vc4_resource *rsc = vc4_resource(texture->texture);
-
- cl_aligned_u32(&vc4->uniforms,
- VC4_SET_FIELD(VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE,
- VC4_TEX_P2_PTYPE) |
- VC4_SET_FIELD(rsc->cube_map_stride >> 12, VC4_TEX_P2_CMST) |
- VC4_SET_FIELD((data >> 16) & 1, VC4_TEX_P2_BSLOD));
-}
-
-
-#define SWIZ(x,y,z,w) { \
- UTIL_FORMAT_SWIZZLE_##x, \
- UTIL_FORMAT_SWIZZLE_##y, \
- UTIL_FORMAT_SWIZZLE_##z, \
- UTIL_FORMAT_SWIZZLE_##w \
-}
-
-static void
-write_texture_border_color(struct vc4_context *vc4,
- struct vc4_texture_stateobj *texstate,
- uint32_t unit)
-{
- struct pipe_sampler_state *sampler = texstate->samplers[unit];
- struct pipe_sampler_view *texture = texstate->textures[unit];
- struct vc4_resource *rsc = vc4_resource(texture->texture);
- union util_color uc;
-
- const struct util_format_description *tex_format_desc =
- util_format_description(texture->format);
-
- float border_color[4];
- for (int i = 0; i < 4; i++)
- border_color[i] = sampler->border_color.f[i];
- if (util_format_is_srgb(texture->format)) {
- for (int i = 0; i < 3; i++)
- border_color[i] =
- util_format_linear_to_srgb_float(border_color[i]);
- }
-
- /* Turn the border color into the layout of channels that it would
- * have when stored as texture contents.
- */
- float storage_color[4];
- util_format_unswizzle_4f(storage_color,
- border_color,
- tex_format_desc->swizzle);
-
- /* Now, pack so that when the vc4_format-sampled texture contents are
- * replaced with our border color, the vc4_get_format_swizzle()
- * swizzling will get the right channels.
- */
- if (util_format_is_depth_or_stencil(texture->format)) {
- uc.ui[0] = util_pack_z(PIPE_FORMAT_Z24X8_UNORM,
- sampler->border_color.f[0]) << 8;
- } else {
- switch (rsc->vc4_format) {
- default:
- case VC4_TEXTURE_TYPE_RGBA8888:
- util_pack_color(storage_color,
- PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
- break;
- case VC4_TEXTURE_TYPE_RGBA4444:
- util_pack_color(storage_color,
- PIPE_FORMAT_A8B8G8R8_UNORM, &uc);
- break;
- case VC4_TEXTURE_TYPE_RGB565:
- util_pack_color(storage_color,
- PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
- break;
- case VC4_TEXTURE_TYPE_ALPHA:
- uc.ui[0] = float_to_ubyte(storage_color[0]) << 24;
- break;
- case VC4_TEXTURE_TYPE_LUMALPHA:
- uc.ui[0] = ((float_to_ubyte(storage_color[1]) << 24) |
- (float_to_ubyte(storage_color[0]) << 0));
- break;
- }
- }
-
- cl_aligned_u32(&vc4->uniforms, uc.ui[0]);
-}
-
-static uint32_t
-get_texrect_scale(struct vc4_texture_stateobj *texstate,
- enum quniform_contents contents,
- uint32_t data)
-{
- struct pipe_sampler_view *texture = texstate->textures[data];
- uint32_t dim;
-
- if (contents == QUNIFORM_TEXRECT_SCALE_X)
- dim = texture->texture->width0;
- else
- dim = texture->texture->height0;
-
- return fui(1.0f / dim);
-}
-
-static struct vc4_bo *
-vc4_upload_ubo(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
- const uint32_t *gallium_uniforms)
-{
- if (!shader->ubo_size)
- return NULL;
-
- struct vc4_bo *ubo = vc4_bo_alloc(vc4->screen, shader->ubo_size, "ubo");
- uint32_t *data = vc4_bo_map(ubo);
- for (uint32_t i = 0; i < shader->num_ubo_ranges; i++) {
- memcpy(data + shader->ubo_ranges[i].dst_offset,
- gallium_uniforms + shader->ubo_ranges[i].src_offset,
- shader->ubo_ranges[i].size);
- }
-
- return ubo;
-}
-
-void
-vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
- struct vc4_constbuf_stateobj *cb,
- struct vc4_texture_stateobj *texstate)
-{
- struct vc4_shader_uniform_info *uinfo = &shader->uniforms;
- const uint32_t *gallium_uniforms = cb->cb[0].user_buffer;
- struct vc4_bo *ubo = vc4_upload_ubo(vc4, shader, gallium_uniforms);
-
- cl_ensure_space(&vc4->uniforms, (uinfo->count +
- uinfo->num_texture_samples) * 4);
-
- cl_start_shader_reloc(&vc4->uniforms, uinfo->num_texture_samples);
-
- for (int i = 0; i < uinfo->count; i++) {
-
- switch (uinfo->contents[i]) {
- case QUNIFORM_CONSTANT:
- cl_aligned_u32(&vc4->uniforms, uinfo->data[i]);
- break;
- case QUNIFORM_UNIFORM:
- cl_aligned_u32(&vc4->uniforms,
- gallium_uniforms[uinfo->data[i]]);
- break;
- case QUNIFORM_VIEWPORT_X_SCALE:
- cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[0] * 16.0f);
- break;
- case QUNIFORM_VIEWPORT_Y_SCALE:
- cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[1] * 16.0f);
- break;
-
- case QUNIFORM_VIEWPORT_Z_OFFSET:
- cl_aligned_f(&vc4->uniforms, vc4->viewport.translate[2]);
- break;
- case QUNIFORM_VIEWPORT_Z_SCALE:
- cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[2]);
- break;
-
- case QUNIFORM_USER_CLIP_PLANE:
- cl_aligned_f(&vc4->uniforms,
- vc4->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]);
- break;
-
- case QUNIFORM_TEXTURE_CONFIG_P0:
- write_texture_p0(vc4, texstate, uinfo->data[i]);
- break;
-
- case QUNIFORM_TEXTURE_CONFIG_P1:
- write_texture_p1(vc4, texstate, uinfo->data[i]);
- break;
-
- case QUNIFORM_TEXTURE_CONFIG_P2:
- write_texture_p2(vc4, texstate, uinfo->data[i]);
- break;
-
- case QUNIFORM_UBO_ADDR:
- cl_aligned_reloc(vc4, &vc4->uniforms, ubo, 0);
- break;
-
- case QUNIFORM_TEXTURE_BORDER_COLOR:
- write_texture_border_color(vc4, texstate, uinfo->data[i]);
- break;
-
- case QUNIFORM_TEXRECT_SCALE_X:
- case QUNIFORM_TEXRECT_SCALE_Y:
- cl_aligned_u32(&vc4->uniforms,
- get_texrect_scale(texstate,
- uinfo->contents[i],
- uinfo->data[i]));
- break;
-
- case QUNIFORM_BLEND_CONST_COLOR:
- cl_aligned_f(&vc4->uniforms,
- CLAMP(vc4->blend_color.color[uinfo->data[i]], 0, 1));
- break;
-
- case QUNIFORM_STENCIL:
- cl_aligned_u32(&vc4->uniforms,
- vc4->zsa->stencil_uniforms[uinfo->data[i]] |
- (uinfo->data[i] <= 1 ?
- (vc4->stencil_ref.ref_value[uinfo->data[i]] << 8) :
- 0));
- break;
-
- case QUNIFORM_ALPHA_REF:
- cl_aligned_f(&vc4->uniforms,
- vc4->zsa->base.alpha.ref_value);
- break;
- }
-#if 0
- uint32_t written_val = *(uint32_t *)(vc4->uniforms.next - 4);
- fprintf(stderr, "%p: %d / 0x%08x (%f)\n",
- shader, i, written_val, uif(written_val));
-#endif
- }
-}
-
static void
vc4_fp_state_bind(struct pipe_context *pctx, void *hwcso)
{
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 1c96ef4795f..254140a72f5 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -96,10 +96,6 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_TEX_B] = { "tex_b", 0, 2 },
[QOP_TEX_DIRECT] = { "tex_direct", 0, 2 },
[QOP_TEX_RESULT] = { "tex_result", 1, 0, true },
- [QOP_R4_UNPACK_A] = { "r4_unpack_a", 1, 1 },
- [QOP_R4_UNPACK_B] = { "r4_unpack_b", 1, 1 },
- [QOP_R4_UNPACK_C] = { "r4_unpack_c", 1, 1 },
- [QOP_R4_UNPACK_D] = { "r4_unpack_d", 1, 1 },
[QOP_UNPACK_8A_F] = { "unpack_8a_f", 1, 1 },
[QOP_UNPACK_8B_F] = { "unpack_8b_f", 1, 1 },
[QOP_UNPACK_8C_F] = { "unpack_8c_f", 1, 1 },
@@ -234,20 +230,6 @@ qir_writes_r4(struct qinst *inst)
}
}
-bool
-qir_reads_r4(struct qinst *inst)
-{
- switch (inst->op) {
- case QOP_R4_UNPACK_A:
- case QOP_R4_UNPACK_B:
- case QOP_R4_UNPACK_C:
- case QOP_R4_UNPACK_D:
- return true;
- default:
- return false;
- }
-}
-
static void
qir_print_reg(struct vc4_compile *c, struct qreg reg, bool write)
{
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 732cfd0b306..cade795c12a 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -36,6 +36,11 @@
#include "util/list.h"
#include "util/u_math.h"
+#include "vc4_screen.h"
+#include "pipe/p_state.h"
+
+struct nir_builder;
+
enum qfile {
QFILE_NULL,
QFILE_TEMP,
@@ -155,10 +160,6 @@ enum qop {
* the destination
*/
QOP_TEX_RESULT,
- QOP_R4_UNPACK_A,
- QOP_R4_UNPACK_B,
- QOP_R4_UNPACK_C,
- QOP_R4_UNPACK_D
};
struct queued_qpu_inst {
@@ -243,7 +244,11 @@ enum quniform_contents {
QUNIFORM_TEXTURE_BORDER_COLOR,
- QUNIFORM_BLEND_CONST_COLOR,
+ QUNIFORM_BLEND_CONST_COLOR_X,
+ QUNIFORM_BLEND_CONST_COLOR_Y,
+ QUNIFORM_BLEND_CONST_COLOR_Z,
+ QUNIFORM_BLEND_CONST_COLOR_W,
+
QUNIFORM_STENCIL,
QUNIFORM_ALPHA_REF,
@@ -280,6 +285,52 @@ struct vc4_compiler_ubo_range {
bool used;
};
+struct vc4_key {
+ struct vc4_uncompiled_shader *shader_state;
+ struct {
+ enum pipe_format format;
+ unsigned compare_mode:1;
+ unsigned compare_func:3;
+ unsigned wrap_s:3;
+ unsigned wrap_t:3;
+ uint8_t swizzle[4];
+ } tex[VC4_MAX_TEXTURE_SAMPLERS];
+ uint8_t ucp_enables;
+};
+
+struct vc4_fs_key {
+ struct vc4_key base;
+ enum pipe_format color_format;
+ bool depth_enabled;
+ bool stencil_enabled;
+ bool stencil_twoside;
+ bool stencil_full_writemasks;
+ bool is_points;
+ bool is_lines;
+ bool alpha_test;
+ bool point_coord_upper_left;
+ bool light_twoside;
+ uint8_t alpha_test_func;
+ uint8_t logicop_func;
+ uint32_t point_sprite_mask;
+
+ struct pipe_rt_blend_state blend;
+};
+
+struct vc4_vs_key {
+ struct vc4_key base;
+
+ /**
+ * This is a proxy for the array of FS input semantics, which is
+ * larger than we would want to put in the key.
+ */
+ uint64_t compiled_fs_id;
+
+ enum pipe_format attr_formats[8];
+ bool is_coord;
+ bool per_vertex_point_size;
+};
+
struct vc4_compile {
struct vc4_context *vc4;
nir_shader *s;
@@ -369,6 +420,16 @@ struct vc4_compile {
uint32_t variant_id;
};
+/* Special nir_load_input intrinsic index for loading the current TLB
+ * destination color.
+ */
+#define VC4_NIR_TLB_COLOR_READ_INPUT 2000000000
+
+/* Special offset for nir_load_uniform values to get a QUNIFORM_*
+ * state-dependent value.
+ */
+#define VC4_NIR_STATE_UNIFORM_OFFSET 2000000000
+
struct vc4_compile *qir_compile_init(void);
void qir_compile_destroy(struct vc4_compile *c);
struct qinst *qir_inst(enum qop op, struct qreg dst,
@@ -393,7 +454,6 @@ bool qir_is_multi_instruction(struct qinst *inst);
bool qir_is_tex(struct qinst *inst);
bool qir_depends_on_flags(struct qinst *inst);
bool qir_writes_r4(struct qinst *inst);
-bool qir_reads_r4(struct qinst *inst);
bool qir_src_needs_a_file(struct qinst *inst);
struct qreg qir_follow_movs(struct vc4_compile *c, struct qreg reg);
@@ -409,6 +469,12 @@ bool qir_opt_cse(struct vc4_compile *c);
bool qir_opt_dead_code(struct vc4_compile *c);
bool qir_opt_small_immediates(struct vc4_compile *c);
bool qir_opt_vpm_writes(struct vc4_compile *c);
+void vc4_nir_lower_blend(struct vc4_compile *c);
+void vc4_nir_lower_io(struct vc4_compile *c);
+nir_ssa_def *vc4_nir_get_state_uniform(struct nir_builder *b,
+ enum quniform_contents contents);
+nir_ssa_def *vc4_nir_get_swizzled_channel(struct nir_builder *b,
+ nir_ssa_def **srcs, int swiz);
void qir_lower_uniforms(struct vc4_compile *c);
void qpu_schedule_instructions(struct vc4_compile *c);
@@ -523,27 +589,12 @@ QIR_ALU0(FRAG_W)
QIR_ALU0(FRAG_REV_FLAG)
QIR_ALU0(TEX_RESULT)
QIR_ALU0(TLB_COLOR_READ)
+QIR_NODST_1(TLB_COLOR_WRITE)
QIR_NODST_1(TLB_Z_WRITE)
QIR_NODST_1(TLB_DISCARD_SETUP)
QIR_NODST_1(TLB_STENCIL_SETUP)
static inline struct qreg
-qir_R4_UNPACK(struct vc4_compile *c, struct qreg r4, int i)
-{
- struct qreg t = qir_get_temp(c);
- qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, r4, c->undef));
- return t;
-}
-
-static inline struct qreg
-qir_SEL_X_0_COND(struct vc4_compile *c, int i)
-{
- struct qreg t = qir_get_temp(c);
- qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, c->undef, c->undef));
- return t;
-}
-
-static inline struct qreg
qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i)
{
struct qreg t = qir_get_temp(c);
diff --git a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
index 910c89dca79..f087c3b81b5 100644
--- a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
+++ b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
@@ -52,7 +52,7 @@ static void
add_uniform(struct hash_table *ht, struct qreg reg)
{
struct hash_entry *entry;
- void *key = (void *)(uintptr_t)reg.index;
+ void *key = (void *)(uintptr_t)(reg.index + 1);
entry = _mesa_hash_table_search(ht, key);
if (entry) {
@@ -66,7 +66,7 @@ static void
remove_uniform(struct hash_table *ht, struct qreg reg)
{
struct hash_entry *entry;
- void *key = (void *)(uintptr_t)reg.index;
+ void *key = (void *)(uintptr_t)(reg.index + 1);
entry = _mesa_hash_table_search(ht, key);
assert(entry);
@@ -122,7 +122,7 @@ qir_lower_uniforms(struct vc4_compile *c)
struct hash_entry *entry;
hash_table_foreach(ht, entry) {
uint32_t count = (uintptr_t)entry->data;
- uint32_t index = (uintptr_t)entry->key;
+ uint32_t index = (uintptr_t)entry->key - 1;
if (count > max_count) {
max_count = count;
max_index = index;
diff --git a/src/gallium/drivers/vc4/vc4_qpu.h b/src/gallium/drivers/vc4/vc4_qpu.h
index c9ab6344589..fbb90ba12a0 100644
--- a/src/gallium/drivers/vc4/vc4_qpu.h
+++ b/src/gallium/drivers/vc4/vc4_qpu.h
@@ -122,23 +122,23 @@ static inline struct qpu_reg qpu_r3(void) { return qpu_rn(3); }
static inline struct qpu_reg qpu_r4(void) { return qpu_rn(4); }
static inline struct qpu_reg qpu_r5(void) { return qpu_rn(5); }
-uint64_t qpu_NOP(void);
-uint64_t qpu_a_MOV(struct qpu_reg dst, struct qpu_reg src);
-uint64_t qpu_m_MOV(struct qpu_reg dst, struct qpu_reg src);
+uint64_t qpu_NOP(void) ATTRIBUTE_CONST;
+uint64_t qpu_a_MOV(struct qpu_reg dst, struct qpu_reg src) ATTRIBUTE_CONST;
+uint64_t qpu_m_MOV(struct qpu_reg dst, struct qpu_reg src) ATTRIBUTE_CONST;
uint64_t qpu_a_alu2(enum qpu_op_add op, struct qpu_reg dst,
- struct qpu_reg src0, struct qpu_reg src1);
+ struct qpu_reg src0, struct qpu_reg src1) ATTRIBUTE_CONST;
uint64_t qpu_m_alu2(enum qpu_op_mul op, struct qpu_reg dst,
- struct qpu_reg src0, struct qpu_reg src1);
-uint64_t qpu_merge_inst(uint64_t a, uint64_t b);
-uint64_t qpu_load_imm_ui(struct qpu_reg dst, uint32_t val);
-uint64_t qpu_set_sig(uint64_t inst, uint32_t sig);
-uint64_t qpu_set_cond_add(uint64_t inst, uint32_t cond);
-uint64_t qpu_set_cond_mul(uint64_t inst, uint32_t cond);
-uint32_t qpu_encode_small_immediate(uint32_t i);
-
-bool qpu_waddr_is_tlb(uint32_t waddr);
-bool qpu_inst_is_tlb(uint64_t inst);
-int qpu_num_sf_accesses(uint64_t inst);
+ struct qpu_reg src0, struct qpu_reg src1) ATTRIBUTE_CONST;
+uint64_t qpu_merge_inst(uint64_t a, uint64_t b) ATTRIBUTE_CONST;
+uint64_t qpu_load_imm_ui(struct qpu_reg dst, uint32_t val) ATTRIBUTE_CONST;
+uint64_t qpu_set_sig(uint64_t inst, uint32_t sig) ATTRIBUTE_CONST;
+uint64_t qpu_set_cond_add(uint64_t inst, uint32_t cond) ATTRIBUTE_CONST;
+uint64_t qpu_set_cond_mul(uint64_t inst, uint32_t cond) ATTRIBUTE_CONST;
+uint32_t qpu_encode_small_immediate(uint32_t i) ATTRIBUTE_CONST;
+
+bool qpu_waddr_is_tlb(uint32_t waddr) ATTRIBUTE_CONST;
+bool qpu_inst_is_tlb(uint64_t inst) ATTRIBUTE_CONST;
+int qpu_num_sf_accesses(uint64_t inst) ATTRIBUTE_CONST;
void qpu_serialize_one_inst(struct vc4_compile *c, uint64_t inst);
static inline uint64_t
diff --git a/src/gallium/drivers/vc4/vc4_qpu_disasm.c b/src/gallium/drivers/vc4/vc4_qpu_disasm.c
index 55e0e6139b5..00aeb300a9b 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_disasm.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_disasm.c
@@ -225,7 +225,7 @@ static const char *qpu_condflags[] = {
};
#define DESC(array, index) \
- ((index > ARRAY_SIZE(array) || !(array)[index]) ? \
+ ((index >= ARRAY_SIZE(array) || !(array)[index]) ? \
"???" : (array)[index])
static const char *
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 99afe4b8798..f324056258c 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -234,6 +234,7 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
case QFILE_VPM:
assert((int)qinst->src[i].index >=
last_vpm_read_index);
+ (void)last_vpm_read_index;
last_vpm_read_index = qinst->src[i].index;
src[i] = qpu_ra(QPU_R_VPM);
break;
@@ -319,7 +320,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
abort();
}
- queue(c, qpu_a_MOV(dst, qpu_r4()));
+ if (dst.mux != QPU_MUX_R4)
+ queue(c, qpu_a_MOV(dst, qpu_r4()));
break;
@@ -402,6 +404,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
*last_inst(c) = qpu_set_sig(*last_inst(c),
QPU_SIG_COLOR_LOAD);
+ if (dst.mux != QPU_MUX_R4)
+ queue(c, qpu_a_MOV(dst, qpu_r4()));
break;
case QOP_TLB_COLOR_WRITE:
@@ -451,21 +455,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
queue(c, qpu_NOP());
*last_inst(c) = qpu_set_sig(*last_inst(c),
QPU_SIG_LOAD_TMU0);
-
- break;
-
- case QOP_R4_UNPACK_A:
- case QOP_R4_UNPACK_B:
- case QOP_R4_UNPACK_C:
- case QOP_R4_UNPACK_D:
- assert(src[0].mux == QPU_MUX_R4);
- queue(c, qpu_a_MOV(dst, src[0]));
- *last_inst(c) |= QPU_PM;
- *last_inst(c) |= QPU_SET_FIELD(QPU_UNPACK_8A +
- (qinst->op -
- QOP_R4_UNPACK_A),
- QPU_UNPACK);
-
+ if (dst.mux != QPU_MUX_R4)
+ queue(c, qpu_a_MOV(dst, qpu_r4()));
break;
case QOP_UNPACK_8A_F:
@@ -474,20 +465,30 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
case QOP_UNPACK_8D_F:
case QOP_UNPACK_16A_F:
case QOP_UNPACK_16B_F: {
- assert(src[0].mux == QPU_MUX_A);
-
- /* Since we're setting the pack bits, if the
- * destination is in A it would get re-packed.
- */
- queue(c, qpu_a_FMAX((dst.mux == QPU_MUX_A ?
- qpu_rb(31) : dst),
- src[0], src[0]));
- *last_inst(c) |= QPU_SET_FIELD(unpack_map[qinst->op -
- QOP_UNPACK_8A_F],
- QPU_UNPACK);
+ if (src[0].mux == QPU_MUX_R4) {
+ queue(c, qpu_a_MOV(dst, src[0]));
+ *last_inst(c) |= QPU_PM;
+ *last_inst(c) |= QPU_SET_FIELD(QPU_UNPACK_8A +
+ (qinst->op -
+ QOP_UNPACK_8A_F),
+ QPU_UNPACK);
+ } else {
+ assert(src[0].mux == QPU_MUX_A);
- if (dst.mux == QPU_MUX_A) {
- queue(c, qpu_a_MOV(dst, qpu_rb(31)));
+ /* Since we're setting the pack bits, if the
+ * destination is in A it would get re-packed.
+ */
+ queue(c, qpu_a_FMAX((dst.mux == QPU_MUX_A ?
+ qpu_rb(31) : dst),
+ src[0], src[0]));
+ *last_inst(c) |=
+ QPU_SET_FIELD(unpack_map[qinst->op -
+ QOP_UNPACK_8A_F],
+ QPU_UNPACK);
+
+ if (dst.mux == QPU_MUX_A) {
+ queue(c, qpu_a_MOV(dst, qpu_rb(31)));
+ }
}
}
break;
diff --git a/src/gallium/drivers/vc4/vc4_qpu_validate.c b/src/gallium/drivers/vc4/vc4_qpu_validate.c
index 8471edbf62c..9cf6841f41c 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_validate.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_validate.c
@@ -23,6 +23,13 @@
#include "vc4_qpu.h"
+#ifdef NDEBUG
+/* Since most of our code is used in assert()s, don't warn about dead code. */
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+
static bool
writes_reg(uint64_t inst, uint32_t w)
{
diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c
index 3b0b890b66a..a29db1f3abe 100644
--- a/src/gallium/drivers/vc4/vc4_register_allocate.c
+++ b/src/gallium/drivers/vc4/vc4_register_allocate.c
@@ -116,6 +116,8 @@ vc4_alloc_reg_set(struct vc4_context *vc4)
vc4->regs = ra_alloc_reg_set(vc4, ARRAY_SIZE(vc4_regs));
vc4->reg_class_any = ra_alloc_reg_class(vc4->regs);
+ vc4->reg_class_r4_or_a = ra_alloc_reg_class(vc4->regs);
+ vc4->reg_class_a = ra_alloc_reg_class(vc4->regs);
for (uint32_t i = 0; i < ARRAY_SIZE(vc4_regs); i++) {
/* Reserve ra31/rb31 for spilling fixup_raddr_conflict() in
* vc4_qpu_emit.c
@@ -126,15 +128,18 @@ vc4_alloc_reg_set(struct vc4_context *vc4)
/* R4 can't be written as a general purpose register. (it's
* TMU_NOSWAP as a write address).
*/
- if (vc4_regs[i].mux == QPU_MUX_R4)
+ if (vc4_regs[i].mux == QPU_MUX_R4) {
+ ra_class_add_reg(vc4->regs, vc4->reg_class_r4_or_a, i);
continue;
+ }
ra_class_add_reg(vc4->regs, vc4->reg_class_any, i);
}
- vc4->reg_class_a = ra_alloc_reg_class(vc4->regs);
- for (uint32_t i = AB_INDEX; i < AB_INDEX + 64; i += 2)
+ for (uint32_t i = AB_INDEX; i < AB_INDEX + 64; i += 2) {
ra_class_add_reg(vc4->regs, vc4->reg_class_a, i);
+ ra_class_add_reg(vc4->regs, vc4->reg_class_r4_or_a, i);
+ }
ra_set_finalize(vc4->regs, NULL);
}
@@ -153,6 +158,10 @@ node_to_temp_priority(const void *in_a, const void *in_b)
return a->priority - b->priority;
}
+#define CLASS_BIT_A (1 << 0)
+#define CLASS_BIT_B_OR_ACC (1 << 1)
+#define CLASS_BIT_R4 (1 << 2)
+
/**
* Returns a mapping from QFILE_TEMP indices to struct qpu_regs.
*
@@ -165,6 +174,7 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
uint32_t temp_to_node[c->num_temps];
uint32_t def[c->num_temps];
uint32_t use[c->num_temps];
+ uint8_t class_bits[c->num_temps];
struct qpu_reg *temp_registers = calloc(c->num_temps,
sizeof(*temp_registers));
memset(def, 0, sizeof(def));
@@ -181,10 +191,6 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
struct ra_graph *g = ra_alloc_interference_graph(vc4->regs,
c->num_temps);
- for (uint32_t i = 0; i < c->num_temps; i++) {
- ra_set_node_class(g, i, vc4->reg_class_any);
- }
-
/* Compute the live ranges so we can figure out interference.
*/
uint32_t ip = 0;
@@ -223,8 +229,33 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
temp_to_node[map[i].temp] = i;
}
- /* Figure out our register classes and preallocated registers*/
+ /* Figure out our register classes and preallocated registers. We
+ * start with any temp being able to be in any file, then instructions
+ * incrementally remove bits that the temp definitely can't be in.
+ */
+ memset(class_bits,
+ CLASS_BIT_A | CLASS_BIT_B_OR_ACC | CLASS_BIT_R4,
+ sizeof(class_bits));
+
+ ip = 0;
list_for_each_entry(struct qinst, inst, &c->instructions, link) {
+ if (qir_writes_r4(inst)) {
+ /* This instruction writes r4 (and optionally moves
+ * its result to a temp), so nothing else can be
+ * stored in r4 across it.
+ */
+ for (int i = 0; i < c->num_temps; i++) {
+ if (def[i] < ip && use[i] > ip)
+ class_bits[i] &= ~CLASS_BIT_R4;
+ }
+ } else {
+ /* R4 can't be written as a general purpose
+ * register. (it's TMU_NOSWAP as a write address).
+ */
+ if (inst->dst.file == QFILE_TEMP)
+ class_bits[inst->dst.index] &= ~CLASS_BIT_R4;
+ }
+
switch (inst->op) {
case QOP_FRAG_Z:
ra_set_node_reg(g, temp_to_node[inst->dst.index],
@@ -236,17 +267,9 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
AB_INDEX + QPU_R_FRAG_PAYLOAD_ZW * 2);
break;
- case QOP_TEX_RESULT:
- case QOP_TLB_COLOR_READ:
- assert(vc4_regs[ACC_INDEX + 4].mux == QPU_MUX_R4);
- ra_set_node_reg(g, temp_to_node[inst->dst.index],
- ACC_INDEX + 4);
- break;
-
case QOP_PACK_SCALED:
/* The pack flags require an A-file dst register. */
- ra_set_node_class(g, temp_to_node[inst->dst.index],
- vc4->reg_class_a);
+ class_bits[inst->dst.index] &= CLASS_BIT_A;
break;
default:
@@ -254,8 +277,30 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
}
if (qir_src_needs_a_file(inst)) {
- ra_set_node_class(g, temp_to_node[inst->src[0].index],
- vc4->reg_class_a);
+ class_bits[inst->src[0].index] &= CLASS_BIT_A;
+ }
+ ip++;
+ }
+
+ for (uint32_t i = 0; i < c->num_temps; i++) {
+ int node = temp_to_node[i];
+
+ switch (class_bits[i]) {
+ case CLASS_BIT_A | CLASS_BIT_B_OR_ACC | CLASS_BIT_R4:
+ case CLASS_BIT_A | CLASS_BIT_B_OR_ACC:
+ ra_set_node_class(g, node, vc4->reg_class_any);
+ break;
+ case CLASS_BIT_A | CLASS_BIT_R4:
+ ra_set_node_class(g, node, vc4->reg_class_r4_or_a);
+ break;
+ case CLASS_BIT_A:
+ ra_set_node_class(g, node, vc4->reg_class_a);
+ break;
+ default:
+ fprintf(stderr, "temp %d: bad class bits: 0x%x\n",
+ i, class_bits[i]);
+ abort();
+ break;
}
}
@@ -270,7 +315,11 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
}
bool ok = ra_allocate(g);
- assert(ok);
+ if (!ok) {
+ fprintf(stderr, "Failed to register allocate:\n");
+ qir_dump(c);
+ abort();
+ }
for (uint32_t i = 0; i < c->num_temps; i++) {
temp_registers[i] = vc4_regs[ra_get_node_reg(g, temp_to_node[i])];
diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c
index cab76406055..5d5166fd818 100644
--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
@@ -102,6 +102,12 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
vc4_resource_bo_alloc(rsc);
+
+ /* If it might be bound as one of our vertex buffers, make
+ * sure we re-emit vertex buffer state.
+ */
+ if (prsc->bind & PIPE_BIND_VERTEX_BUFFER)
+ vc4->dirty |= VC4_DIRTY_VTXBUF;
} else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
if (vc4_cl_references_bo(pctx, rsc->bo)) {
if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
@@ -110,6 +116,8 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
prsc->height0 == box->height &&
prsc->depth0 == box->depth) {
vc4_resource_bo_alloc(rsc);
+ if (prsc->bind & PIPE_BIND_VERTEX_BUFFER)
+ vc4->dirty |= VC4_DIRTY_VTXBUF;
} else {
vc4_flush(pctx);
}
diff --git a/src/gallium/drivers/vc4/vc4_resource.h b/src/gallium/drivers/vc4/vc4_resource.h
index ab8f5d3cd55..87571b75e8b 100644
--- a/src/gallium/drivers/vc4/vc4_resource.h
+++ b/src/gallium/drivers/vc4/vc4_resource.h
@@ -82,19 +82,19 @@ struct vc4_resource {
struct pipe_resource *shadow_parent;
};
-static INLINE struct vc4_resource *
+static inline struct vc4_resource *
vc4_resource(struct pipe_resource *prsc)
{
return (struct vc4_resource *)prsc;
}
-static INLINE struct vc4_surface *
+static inline struct vc4_surface *
vc4_surface(struct pipe_surface *psurf)
{
return (struct vc4_surface *)psurf;
}
-static INLINE struct vc4_transfer *
+static inline struct vc4_transfer *
vc4_transfer(struct pipe_transfer *ptrans)
{
return (struct vc4_transfer *)ptrans;
diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c
index f63bead0fbb..2dee1d40e5f 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -176,6 +176,10 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
return 0;
/* Stream output. */
@@ -489,6 +493,12 @@ vc4_screen_bo_get_handle(struct pipe_screen *pscreen,
{
whandle->stride = stride;
+ /* If we're passing some reference to our BO out to some other part of
+ * the system, then we can't do any optimizations about only us being
+ * the ones seeing it (like BO caching or shadow update avoidance).
+ */
+ bo->private = false;
+
switch (whandle->type) {
case DRM_API_HANDLE_TYPE_SHARED:
return vc4_bo_flink(bo, &whandle->handle);
diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c
index b58013dd2ee..7cfd236349d 100644
--- a/src/gallium/drivers/vc4/vc4_simulator.c
+++ b/src/gallium/drivers/vc4/vc4_simulator.c
@@ -74,11 +74,12 @@ vc4_simulator_pin_bos(struct drm_device *dev, struct vc4_exec_info *exec)
struct vc4_bo **bos = vc4->bo_pointers.base;
exec->bo_count = args->bo_handle_count;
- exec->bo = calloc(exec->bo_count, sizeof(struct vc4_bo_exec_state));
+ exec->bo = calloc(exec->bo_count, sizeof(void *));
for (int i = 0; i < exec->bo_count; i++) {
struct vc4_bo *bo = bos[i];
struct drm_gem_cma_object *obj = vc4_wrap_bo_with_cma(dev, bo);
+ struct drm_vc4_bo *drm_bo = to_vc4_bo(&obj->base);
#if 0
fprintf(stderr, "bo hindex %d: %s\n", i, bo->name);
#endif
@@ -86,7 +87,16 @@ vc4_simulator_pin_bos(struct drm_device *dev, struct vc4_exec_info *exec)
vc4_bo_map(bo);
memcpy(obj->vaddr, bo->map, bo->size);
- exec->bo[i].bo = obj;
+ exec->bo[i] = obj;
+
+ /* The kernel does this validation at shader create ioctl
+ * time.
+ */
+ if (strcmp(bo->name, "code") == 0) {
+ drm_bo->validated_shader = vc4_validate_shader(obj);
+ if (!drm_bo->validated_shader)
+ abort();
+ }
}
return 0;
}
@@ -95,7 +105,7 @@ static int
vc4_simulator_unpin_bos(struct vc4_exec_info *exec)
{
for (int i = 0; i < exec->bo_count; i++) {
- struct drm_gem_cma_object *obj = exec->bo[i].bo;
+ struct drm_gem_cma_object *obj = exec->bo[i];
struct vc4_bo *bo = to_vc4_bo(&obj->base)->bo;
memcpy(bo->map, obj->vaddr, bo->size);
diff --git a/src/gallium/drivers/vc4/vc4_simulator_validate.h b/src/gallium/drivers/vc4/vc4_simulator_validate.h
index 2bb36b253bb..68ace0216aa 100644
--- a/src/gallium/drivers/vc4/vc4_simulator_validate.h
+++ b/src/gallium/drivers/vc4/vc4_simulator_validate.h
@@ -78,6 +78,7 @@ struct drm_gem_cma_object {
struct drm_vc4_bo {
struct drm_gem_cma_object base;
struct vc4_bo *bo;
+ struct vc4_validated_shader_info *validated_shader;
struct list_head unref_head;
};
diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c
index 4a1d4c3a4d6..8a759c2ca4c 100644
--- a/src/gallium/drivers/vc4/vc4_state.c
+++ b/src/gallium/drivers/vc4/vc4_state.c
@@ -107,7 +107,7 @@ vc4_create_rasterizer_state(struct pipe_context *pctx,
/* Workaround: HW-2726 PTB does not handle zero-size points (BCM2835,
* BCM21553).
*/
- so->point_size = MAX2(cso->point_size, .125);
+ so->point_size = MAX2(cso->point_size, .125f);
if (cso->front_ccw)
so->config_bits[0] |= VC4_CONFIG_BITS_CW_PRIMITIVES;
@@ -461,11 +461,64 @@ vc4_get_stage_tex(struct vc4_context *vc4, unsigned shader)
}
}
+static uint32_t translate_wrap(uint32_t p_wrap, bool using_nearest)
+{
+ switch (p_wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ return 0;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return 1;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return 2;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ return 3;
+ case PIPE_TEX_WRAP_CLAMP:
+ return (using_nearest ? 1 : 3);
+ default:
+ fprintf(stderr, "Unknown wrap mode %d\n", p_wrap);
+ assert(!"not reached");
+ return 0;
+ }
+}
+
static void *
vc4_create_sampler_state(struct pipe_context *pctx,
const struct pipe_sampler_state *cso)
{
- return vc4_generic_cso_state_create(cso, sizeof(*cso));
+ static const uint8_t minfilter_map[6] = {
+ VC4_TEX_P1_MINFILT_NEAR_MIP_NEAR,
+ VC4_TEX_P1_MINFILT_LIN_MIP_NEAR,
+ VC4_TEX_P1_MINFILT_NEAR_MIP_LIN,
+ VC4_TEX_P1_MINFILT_LIN_MIP_LIN,
+ VC4_TEX_P1_MINFILT_NEAREST,
+ VC4_TEX_P1_MINFILT_LINEAR,
+ };
+ static const uint32_t magfilter_map[] = {
+ [PIPE_TEX_FILTER_NEAREST] = VC4_TEX_P1_MAGFILT_NEAREST,
+ [PIPE_TEX_FILTER_LINEAR] = VC4_TEX_P1_MAGFILT_LINEAR,
+ };
+ bool either_nearest =
+ (cso->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST ||
+ cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST);
+ struct vc4_sampler_state *so = CALLOC_STRUCT(vc4_sampler_state);
+
+ if (!so)
+ return NULL;
+
+ memcpy(so, cso, sizeof(*cso));
+
+ so->texture_p1 =
+ (VC4_SET_FIELD(magfilter_map[cso->mag_img_filter],
+ VC4_TEX_P1_MAGFILT) |
+ VC4_SET_FIELD(minfilter_map[cso->min_mip_filter * 2 +
+ cso->min_img_filter],
+ VC4_TEX_P1_MINFILT) |
+ VC4_SET_FIELD(translate_wrap(cso->wrap_s, either_nearest),
+ VC4_TEX_P1_WRAP_S) |
+ VC4_SET_FIELD(translate_wrap(cso->wrap_t, either_nearest),
+ VC4_TEX_P1_WRAP_T));
+
+ return so;
}
static void
@@ -499,13 +552,13 @@ static struct pipe_sampler_view *
vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
const struct pipe_sampler_view *cso)
{
- struct pipe_sampler_view *so = malloc(sizeof(*so));
+ struct vc4_sampler_view *so = malloc(sizeof(*so));
struct vc4_resource *rsc = vc4_resource(prsc);
if (!so)
return NULL;
- *so = *cso;
+ so->base = *cso;
pipe_reference(NULL, &prsc->reference);
@@ -516,18 +569,19 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
* Also, Raspberry Pi doesn't support sampling from raster textures,
* so we also have to copy to a temporary then.
*/
- if (so->u.tex.first_level ||
+ if (cso->u.tex.first_level ||
rsc->vc4_format == VC4_TEXTURE_TYPE_RGBA32R) {
struct vc4_resource *shadow_parent = vc4_resource(prsc);
struct pipe_resource tmpl = shadow_parent->base.b;
struct vc4_resource *clone;
tmpl.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
- tmpl.width0 = u_minify(tmpl.width0, so->u.tex.first_level);
- tmpl.height0 = u_minify(tmpl.height0, so->u.tex.first_level);
- tmpl.last_level = so->u.tex.last_level - so->u.tex.first_level;
+ tmpl.width0 = u_minify(tmpl.width0, cso->u.tex.first_level);
+ tmpl.height0 = u_minify(tmpl.height0, cso->u.tex.first_level);
+ tmpl.last_level = cso->u.tex.last_level - cso->u.tex.first_level;
prsc = vc4_resource_create(pctx->screen, &tmpl);
+ rsc = vc4_resource(prsc);
clone = vc4_resource(prsc);
clone->shadow_parent = &shadow_parent->base.b;
/* Flag it as needing update of the contents from the parent. */
@@ -535,11 +589,23 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
assert(clone->vc4_format != VC4_TEXTURE_TYPE_RGBA32R);
}
- so->texture = prsc;
- so->reference.count = 1;
- so->context = pctx;
-
- return so;
+ so->base.texture = prsc;
+ so->base.reference.count = 1;
+ so->base.context = pctx;
+
+ so->texture_p0 =
+ (VC4_SET_FIELD(rsc->slices[0].offset >> 12, VC4_TEX_P0_OFFSET) |
+ VC4_SET_FIELD(rsc->vc4_format & 15, VC4_TEX_P0_TYPE) |
+ VC4_SET_FIELD(cso->u.tex.last_level -
+ cso->u.tex.first_level, VC4_TEX_P0_MIPLVLS) |
+ VC4_SET_FIELD(cso->target == PIPE_TEXTURE_CUBE,
+ VC4_TEX_P0_CMMODE));
+ so->texture_p1 =
+ (VC4_SET_FIELD(rsc->vc4_format >> 4, VC4_TEX_P1_TYPE4) |
+ VC4_SET_FIELD(prsc->height0 & 2047, VC4_TEX_P1_HEIGHT) |
+ VC4_SET_FIELD(prsc->width0 & 2047, VC4_TEX_P1_WIDTH));
+
+ return &so->base;
}
static void
diff --git a/src/gallium/drivers/vc4/vc4_tiling.c b/src/gallium/drivers/vc4/vc4_tiling.c
index f9801c9cefd..cf86eb0fa31 100644
--- a/src/gallium/drivers/vc4/vc4_tiling.c
+++ b/src/gallium/drivers/vc4/vc4_tiling.c
@@ -127,13 +127,10 @@ vc4_store_utile(void *dst, void *src, uint32_t src_stride, uint32_t cpp)
static void
check_box_utile_alignment(const struct pipe_box *box, int cpp)
{
- uint32_t utile_w = vc4_utile_width(cpp);
- uint32_t utile_h = vc4_utile_height(cpp);
-
- assert(!(box->x & (utile_w - 1)));
- assert(!(box->y & (utile_h - 1)));
- assert(!(box->width & (utile_w - 1)));
- assert(!(box->height & (utile_h - 1)));
+ assert(!(box->x & (vc4_utile_width(cpp) - 1)));
+ assert(!(box->y & (vc4_utile_height(cpp) - 1)));
+ assert(!(box->width & (vc4_utile_width(cpp) - 1)));
+ assert(!(box->height & (vc4_utile_height(cpp) - 1)));
}
static void
diff --git a/src/gallium/drivers/vc4/vc4_tiling.h b/src/gallium/drivers/vc4/vc4_tiling.h
index b5d10da3417..b90bba70200 100644
--- a/src/gallium/drivers/vc4/vc4_tiling.h
+++ b/src/gallium/drivers/vc4/vc4_tiling.h
@@ -24,9 +24,9 @@
#ifndef VC4_TILING_H
#define VC4_TILING_H
-uint32_t vc4_utile_width(int cpp);
-uint32_t vc4_utile_height(int cpp);
-bool vc4_size_is_lt(uint32_t width, uint32_t height, int cpp);
+uint32_t vc4_utile_width(int cpp) ATTRIBUTE_CONST;
+uint32_t vc4_utile_height(int cpp) ATTRIBUTE_CONST;
+bool vc4_size_is_lt(uint32_t width, uint32_t height, int cpp) ATTRIBUTE_CONST;
void vc4_load_utile(void *dst, void *src, uint32_t dst_stride, uint32_t cpp);
void vc4_store_utile(void *dst, void *src, uint32_t src_stride, uint32_t cpp);
void vc4_load_tiled_image(void *dst, uint32_t dst_stride,
diff --git a/src/gallium/drivers/vc4/vc4_uniforms.c b/src/gallium/drivers/vc4/vc4_uniforms.c
new file mode 100644
index 00000000000..85d6998205e
--- /dev/null
+++ b/src/gallium/drivers/vc4/vc4_uniforms.c
@@ -0,0 +1,344 @@
+/*
+ * Copyright © 2014-2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_pack_color.h"
+#include "util/format_srgb.h"
+
+#include "vc4_context.h"
+#include "vc4_qir.h"
+
+static void
+write_texture_p0(struct vc4_context *vc4,
+ struct vc4_cl_out **uniforms,
+ struct vc4_texture_stateobj *texstate,
+ uint32_t unit)
+{
+ struct vc4_sampler_view *sview =
+ vc4_sampler_view(texstate->textures[unit]);
+ struct vc4_resource *rsc = vc4_resource(sview->base.texture);
+
+ cl_reloc(vc4, &vc4->uniforms, uniforms, rsc->bo, sview->texture_p0);
+}
+
+static void
+write_texture_p1(struct vc4_context *vc4,
+ struct vc4_cl_out **uniforms,
+ struct vc4_texture_stateobj *texstate,
+ uint32_t unit)
+{
+ struct vc4_sampler_view *sview =
+ vc4_sampler_view(texstate->textures[unit]);
+ struct vc4_sampler_state *sampler =
+ vc4_sampler_state(texstate->samplers[unit]);
+
+ cl_aligned_u32(uniforms, sview->texture_p1 | sampler->texture_p1);
+}
+
+static void
+write_texture_p2(struct vc4_context *vc4,
+ struct vc4_cl_out **uniforms,
+ struct vc4_texture_stateobj *texstate,
+ uint32_t data)
+{
+ uint32_t unit = data & 0xffff;
+ struct pipe_sampler_view *texture = texstate->textures[unit];
+ struct vc4_resource *rsc = vc4_resource(texture->texture);
+
+ cl_aligned_u32(uniforms,
+ VC4_SET_FIELD(VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE,
+ VC4_TEX_P2_PTYPE) |
+ VC4_SET_FIELD(rsc->cube_map_stride >> 12, VC4_TEX_P2_CMST) |
+ VC4_SET_FIELD((data >> 16) & 1, VC4_TEX_P2_BSLOD));
+}
+
+
+#define SWIZ(x,y,z,w) { \
+ UTIL_FORMAT_SWIZZLE_##x, \
+ UTIL_FORMAT_SWIZZLE_##y, \
+ UTIL_FORMAT_SWIZZLE_##z, \
+ UTIL_FORMAT_SWIZZLE_##w \
+}
+
+static void
+write_texture_border_color(struct vc4_context *vc4,
+ struct vc4_cl_out **uniforms,
+ struct vc4_texture_stateobj *texstate,
+ uint32_t unit)
+{
+ struct pipe_sampler_state *sampler = texstate->samplers[unit];
+ struct pipe_sampler_view *texture = texstate->textures[unit];
+ struct vc4_resource *rsc = vc4_resource(texture->texture);
+ union util_color uc;
+
+ const struct util_format_description *tex_format_desc =
+ util_format_description(texture->format);
+
+ float border_color[4];
+ for (int i = 0; i < 4; i++)
+ border_color[i] = sampler->border_color.f[i];
+ if (util_format_is_srgb(texture->format)) {
+ for (int i = 0; i < 3; i++)
+ border_color[i] =
+ util_format_linear_to_srgb_float(border_color[i]);
+ }
+
+ /* Turn the border color into the layout of channels that it would
+ * have when stored as texture contents.
+ */
+ float storage_color[4];
+ util_format_unswizzle_4f(storage_color,
+ border_color,
+ tex_format_desc->swizzle);
+
+ /* Now, pack so that when the vc4_format-sampled texture contents are
+ * replaced with our border color, the vc4_get_format_swizzle()
+ * swizzling will get the right channels.
+ */
+ if (util_format_is_depth_or_stencil(texture->format)) {
+ uc.ui[0] = util_pack_z(PIPE_FORMAT_Z24X8_UNORM,
+ sampler->border_color.f[0]) << 8;
+ } else {
+ switch (rsc->vc4_format) {
+ default:
+ case VC4_TEXTURE_TYPE_RGBA8888:
+ util_pack_color(storage_color,
+ PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
+ break;
+ case VC4_TEXTURE_TYPE_RGBA4444:
+ util_pack_color(storage_color,
+ PIPE_FORMAT_A8B8G8R8_UNORM, &uc);
+ break;
+ case VC4_TEXTURE_TYPE_RGB565:
+ util_pack_color(storage_color,
+ PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
+ break;
+ case VC4_TEXTURE_TYPE_ALPHA:
+ uc.ui[0] = float_to_ubyte(storage_color[0]) << 24;
+ break;
+ case VC4_TEXTURE_TYPE_LUMALPHA:
+ uc.ui[0] = ((float_to_ubyte(storage_color[1]) << 24) |
+ (float_to_ubyte(storage_color[0]) << 0));
+ break;
+ }
+ }
+
+ cl_aligned_u32(uniforms, uc.ui[0]);
+}
+
+static uint32_t
+get_texrect_scale(struct vc4_texture_stateobj *texstate,
+ enum quniform_contents contents,
+ uint32_t data)
+{
+ struct pipe_sampler_view *texture = texstate->textures[data];
+ uint32_t dim;
+
+ if (contents == QUNIFORM_TEXRECT_SCALE_X)
+ dim = texture->texture->width0;
+ else
+ dim = texture->texture->height0;
+
+ return fui(1.0f / dim);
+}
+
+static struct vc4_bo *
+vc4_upload_ubo(struct vc4_context *vc4,
+ struct vc4_compiled_shader *shader,
+ const uint32_t *gallium_uniforms)
+{
+ if (!shader->ubo_size)
+ return NULL;
+
+ struct vc4_bo *ubo = vc4_bo_alloc(vc4->screen, shader->ubo_size, "ubo");
+ uint32_t *data = vc4_bo_map(ubo);
+ for (uint32_t i = 0; i < shader->num_ubo_ranges; i++) {
+ memcpy(data + shader->ubo_ranges[i].dst_offset,
+ gallium_uniforms + shader->ubo_ranges[i].src_offset,
+ shader->ubo_ranges[i].size);
+ }
+
+ return ubo;
+}
+
+void
+vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
+ struct vc4_constbuf_stateobj *cb,
+ struct vc4_texture_stateobj *texstate)
+{
+ struct vc4_shader_uniform_info *uinfo = &shader->uniforms;
+ const uint32_t *gallium_uniforms = cb->cb[0].user_buffer;
+ struct vc4_bo *ubo = vc4_upload_ubo(vc4, shader, gallium_uniforms);
+
+ cl_ensure_space(&vc4->uniforms, (uinfo->count +
+ uinfo->num_texture_samples) * 4);
+
+ struct vc4_cl_out *uniforms =
+ cl_start_shader_reloc(&vc4->uniforms,
+ uinfo->num_texture_samples);
+
+ for (int i = 0; i < uinfo->count; i++) {
+
+ switch (uinfo->contents[i]) {
+ case QUNIFORM_CONSTANT:
+ cl_aligned_u32(&uniforms, uinfo->data[i]);
+ break;
+ case QUNIFORM_UNIFORM:
+ cl_aligned_u32(&uniforms,
+ gallium_uniforms[uinfo->data[i]]);
+ break;
+ case QUNIFORM_VIEWPORT_X_SCALE:
+ cl_aligned_f(&uniforms, vc4->viewport.scale[0] * 16.0f);
+ break;
+ case QUNIFORM_VIEWPORT_Y_SCALE:
+ cl_aligned_f(&uniforms, vc4->viewport.scale[1] * 16.0f);
+ break;
+
+ case QUNIFORM_VIEWPORT_Z_OFFSET:
+ cl_aligned_f(&uniforms, vc4->viewport.translate[2]);
+ break;
+ case QUNIFORM_VIEWPORT_Z_SCALE:
+ cl_aligned_f(&uniforms, vc4->viewport.scale[2]);
+ break;
+
+ case QUNIFORM_USER_CLIP_PLANE:
+ cl_aligned_f(&uniforms,
+ vc4->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]);
+ break;
+
+ case QUNIFORM_TEXTURE_CONFIG_P0:
+ write_texture_p0(vc4, &uniforms, texstate,
+ uinfo->data[i]);
+ break;
+
+ case QUNIFORM_TEXTURE_CONFIG_P1:
+ write_texture_p1(vc4, &uniforms, texstate,
+ uinfo->data[i]);
+ break;
+
+ case QUNIFORM_TEXTURE_CONFIG_P2:
+ write_texture_p2(vc4, &uniforms, texstate,
+ uinfo->data[i]);
+ break;
+
+ case QUNIFORM_UBO_ADDR:
+ cl_aligned_reloc(vc4, &vc4->uniforms, &uniforms, ubo, 0);
+ break;
+
+ case QUNIFORM_TEXTURE_BORDER_COLOR:
+ write_texture_border_color(vc4, &uniforms,
+ texstate, uinfo->data[i]);
+ break;
+
+ case QUNIFORM_TEXRECT_SCALE_X:
+ case QUNIFORM_TEXRECT_SCALE_Y:
+ cl_aligned_u32(&uniforms,
+ get_texrect_scale(texstate,
+ uinfo->contents[i],
+ uinfo->data[i]));
+ break;
+
+ case QUNIFORM_BLEND_CONST_COLOR_X:
+ case QUNIFORM_BLEND_CONST_COLOR_Y:
+ case QUNIFORM_BLEND_CONST_COLOR_Z:
+ case QUNIFORM_BLEND_CONST_COLOR_W:
+ cl_aligned_f(&uniforms,
+ CLAMP(vc4->blend_color.color[uinfo->contents[i] -
+ QUNIFORM_BLEND_CONST_COLOR_X],
+ 0, 1));
+ break;
+
+ case QUNIFORM_STENCIL:
+ cl_aligned_u32(&uniforms,
+ vc4->zsa->stencil_uniforms[uinfo->data[i]] |
+ (uinfo->data[i] <= 1 ?
+ (vc4->stencil_ref.ref_value[uinfo->data[i]] << 8) :
+ 0));
+ break;
+
+ case QUNIFORM_ALPHA_REF:
+ cl_aligned_f(&uniforms,
+ vc4->zsa->base.alpha.ref_value);
+ break;
+ }
+#if 0
+ uint32_t written_val = *((uint32_t *)uniforms - 1);
+ fprintf(stderr, "%p: %d / 0x%08x (%f)\n",
+ shader, i, written_val, uif(written_val));
+#endif
+ }
+
+ cl_end(&vc4->uniforms, uniforms);
+
+ vc4_bo_unreference(&ubo);
+}
+
+void
+vc4_set_shader_uniform_dirty_flags(struct vc4_compiled_shader *shader)
+{
+ uint32_t dirty = 0;
+
+ for (int i = 0; i < shader->uniforms.count; i++) {
+ switch (shader->uniforms.contents[i]) {
+ case QUNIFORM_CONSTANT:
+ break;
+ case QUNIFORM_UNIFORM:
+ case QUNIFORM_UBO_ADDR:
+ dirty |= VC4_DIRTY_CONSTBUF;
+ break;
+
+ case QUNIFORM_VIEWPORT_X_SCALE:
+ case QUNIFORM_VIEWPORT_Y_SCALE:
+ case QUNIFORM_VIEWPORT_Z_OFFSET:
+ case QUNIFORM_VIEWPORT_Z_SCALE:
+ dirty |= VC4_DIRTY_VIEWPORT;
+ break;
+
+ case QUNIFORM_USER_CLIP_PLANE:
+ dirty |= VC4_DIRTY_CLIP;
+ break;
+
+ case QUNIFORM_TEXTURE_CONFIG_P0:
+ case QUNIFORM_TEXTURE_CONFIG_P1:
+ case QUNIFORM_TEXTURE_CONFIG_P2:
+ case QUNIFORM_TEXTURE_BORDER_COLOR:
+ case QUNIFORM_TEXRECT_SCALE_X:
+ case QUNIFORM_TEXRECT_SCALE_Y:
+ dirty |= VC4_DIRTY_TEXSTATE;
+ break;
+
+ case QUNIFORM_BLEND_CONST_COLOR_X:
+ case QUNIFORM_BLEND_CONST_COLOR_Y:
+ case QUNIFORM_BLEND_CONST_COLOR_Z:
+ case QUNIFORM_BLEND_CONST_COLOR_W:
+ dirty |= VC4_DIRTY_BLEND_COLOR;
+ break;
+
+ case QUNIFORM_STENCIL:
+ case QUNIFORM_ALPHA_REF:
+ dirty |= VC4_DIRTY_ZSA;
+ break;
+ }
+ }
+
+ shader->uniform_dirty_bits = dirty;
+}