summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorAlyssa Rosenzweig <[email protected]>2019-05-14 04:11:36 +0000
committerAlyssa Rosenzweig <[email protected]>2019-05-16 01:25:25 +0000
commit74ab80b92da9390a3ca7e718537e035bd5af0b22 (patch)
treec3dae1c37cdbcb9d3b0041558255aef3c6af2b7f /src
parentf73c0b73ec28b2fd0d98b027491806336e4768eb (diff)
panfrost/midgard: Add load/store opcodes
This commit adds a bunch of new load/store opcodes, largely related to OpenCL, as well as adjusting the name of existing opcodes to be more uniform. The immediate effect is compute shaders are substantially easier to interpret now. Signed-off-by: Alyssa Rosenzweig <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/panfrost/midgard/disassemble.c10
-rw-r--r--src/gallium/drivers/panfrost/midgard/helpers.h6
-rw-r--r--src/gallium/drivers/panfrost/midgard/midgard.h129
-rw-r--r--src/gallium/drivers/panfrost/midgard/midgard_compile.c38
4 files changed, 131 insertions, 52 deletions
diff --git a/src/gallium/drivers/panfrost/midgard/disassemble.c b/src/gallium/drivers/panfrost/midgard/disassemble.c
index c893bc89a6c..a9e443fa67c 100644
--- a/src/gallium/drivers/panfrost/midgard/disassemble.c
+++ b/src/gallium/drivers/panfrost/midgard/disassemble.c
@@ -855,10 +855,10 @@ static bool
is_op_varying(unsigned op)
{
switch (op) {
- case midgard_op_store_vary_16:
- case midgard_op_store_vary_32:
- case midgard_op_load_vary_16:
- case midgard_op_load_vary_32:
+ case midgard_op_st_vary_16:
+ case midgard_op_st_vary_32:
+ case midgard_op_ld_vary_16:
+ case midgard_op_ld_vary_32:
return true;
}
@@ -881,7 +881,7 @@ print_load_store_instr(uint64_t data,
int address = word->address;
- if (word->op == midgard_op_load_uniform_32) {
+ if (word->op == midgard_op_ld_uniform_32) {
/* Uniforms use their own addressing scheme */
int lo = word->varying_parameters >> 7;
diff --git a/src/gallium/drivers/panfrost/midgard/helpers.h b/src/gallium/drivers/panfrost/midgard/helpers.h
index 441c7285887..f32a683233a 100644
--- a/src/gallium/drivers/panfrost/midgard/helpers.h
+++ b/src/gallium/drivers/panfrost/midgard/helpers.h
@@ -23,13 +23,13 @@
*/
#define OP_IS_STORE_VARY(op) (\
- op == midgard_op_store_vary_16 || \
- op == midgard_op_store_vary_32 \
+ op == midgard_op_st_vary_16 || \
+ op == midgard_op_st_vary_32 \
)
#define OP_IS_STORE(op) (\
OP_IS_STORE_VARY(op) || \
- op == midgard_op_store_cubemap_coords \
+ op == midgard_op_st_cubemap_coords \
)
#define OP_IS_MOVE(op) ( \
diff --git a/src/gallium/drivers/panfrost/midgard/midgard.h b/src/gallium/drivers/panfrost/midgard/midgard.h
index 91d1c075f96..4a4ec0e4542 100644
--- a/src/gallium/drivers/panfrost/midgard/midgard.h
+++ b/src/gallium/drivers/panfrost/midgard/midgard.h
@@ -345,20 +345,63 @@ typedef enum {
/* Unclear why this is on the L/S unit, but (with an address of 0,
* appropriate swizzle, magic constant 0x24, and xy mask?) moves fp32 cube
* map coordinates in r27 to its cube map texture coordinate
- * destination (e.g r29). 0x4 magic for loading from fp16 instead */
-
- midgard_op_store_cubemap_coords = 0x0E,
-
- midgard_op_load_attr_16 = 0x95,
- midgard_op_load_attr_32 = 0x94,
- midgard_op_load_vary_16 = 0x99,
- midgard_op_load_vary_32 = 0x98,
- midgard_op_load_color_buffer_16 = 0x9D,
- midgard_op_load_color_buffer_8 = 0xBA,
- midgard_op_load_uniform_16 = 0xAC,
- midgard_op_load_uniform_32 = 0xB0,
- midgard_op_store_vary_16 = 0xD5,
- midgard_op_store_vary_32 = 0xD4
+ * destination (e.g r29). 0x4 magic for lding from fp16 instead */
+
+ midgard_op_st_cubemap_coords = 0x0E,
+
+ /* Used in OpenCL. Probably can ld other things as well */
+ midgard_op_ld_global_id = 0x10,
+
+ /* val in r27.y, address embedded, outputs result to argument. Invert val for sub. Let val = +-1 for inc/dec. */
+ midgard_op_atomic_add = 0x40,
+ midgard_op_atomic_and = 0x44,
+ midgard_op_atomic_or = 0x48,
+ midgard_op_atomic_xor = 0x4C,
+
+ midgard_op_atomic_imin = 0x50,
+ midgard_op_atomic_umin = 0x54,
+ midgard_op_atomic_imax = 0x58,
+ midgard_op_atomic_umax = 0x5C,
+
+ midgard_op_atomic_xchg = 0x60,
+
+ /* Used for compute shader's __global arguments, __local variables (or
+ * for register spilling) */
+
+ midgard_op_ld_char = 0x81,
+ midgard_op_ld_char2 = 0x84,
+ midgard_op_ld_short = 0x85,
+ midgard_op_ld_char4 = 0x88, /* short2, int, float */
+ midgard_op_ld_short4 = 0x8C, /* int2, float2, long */
+ midgard_op_ld_int4 = 0x90, /* float4, long2 */
+
+ midgard_op_ld_attr_32 = 0x94,
+ midgard_op_ld_attr_16 = 0x95,
+ midgard_op_ld_attr_32i = 0x97,
+ midgard_op_ld_vary_32 = 0x98,
+ midgard_op_ld_vary_16 = 0x99,
+ midgard_op_ld_vary_32i = 0x9B,
+ midgard_op_ld_color_buffer_16 = 0x9D,
+
+ midgard_op_ld_uniform_16 = 0xAC,
+
+ midgard_op_ld_uniform_32 = 0xB0,
+ midgard_op_ld_color_buffer_8 = 0xBA,
+
+ midgard_op_st_char = 0xC0,
+ midgard_op_st_char2 = 0xC4, /* short */
+ midgard_op_st_char4 = 0xC8, /* short2, int, float */
+ midgard_op_st_short4 = 0xCC, /* int2, float2, long */
+ midgard_op_st_int4 = 0xD0, /* float4, long2 */
+
+ midgard_op_st_vary_32 = 0xD4,
+ midgard_op_st_vary_16 = 0xD5,
+ midgard_op_st_vary_32i = 0xD7,
+
+ /* Value to st in r27, location r26.w as short2 */
+ midgard_op_st_image_f = 0xD8,
+ midgard_op_st_image_ui = 0xDA,
+ midgard_op_st_image_i = 0xDB,
} midgard_load_store_op;
typedef enum {
@@ -494,17 +537,53 @@ __attribute__((__packed__))
midgard_texture_word;
static char *load_store_opcode_names[256] = {
- [midgard_op_store_cubemap_coords] = "st_cubemap_coords",
- [midgard_op_load_attr_16] = "ld_attr_16",
- [midgard_op_load_attr_32] = "ld_attr_32",
- [midgard_op_load_vary_16] = "ld_vary_16",
- [midgard_op_load_vary_32] = "ld_vary_32",
- [midgard_op_load_uniform_16] = "ld_uniform_16",
- [midgard_op_load_uniform_32] = "ld_uniform_32",
- [midgard_op_load_color_buffer_8] = "ld_color_buffer_8",
- [midgard_op_load_color_buffer_16] = "ld_color_buffer_16",
- [midgard_op_store_vary_16] = "st_vary_16",
- [midgard_op_store_vary_32] = "st_vary_32"
+ [midgard_op_st_cubemap_coords] = "st_cubemap_coords",
+ [midgard_op_ld_global_id] = "ld_global_id",
+
+ [midgard_op_atomic_add] = "atomic_add",
+ [midgard_op_atomic_and] = "atomic_and",
+ [midgard_op_atomic_or] = "atomic_or",
+ [midgard_op_atomic_xor] = "atomic_xor",
+ [midgard_op_atomic_imin] = "atomic_imin",
+ [midgard_op_atomic_umin] = "atomic_umin",
+ [midgard_op_atomic_imax] = "atomic_imax",
+ [midgard_op_atomic_umax] = "atomic_umax",
+ [midgard_op_atomic_umax] = "atomic_xchg",
+
+ [midgard_op_ld_char] = "ld_char",
+ [midgard_op_ld_char2] = "ld_char2",
+ [midgard_op_ld_short] = "ld_short",
+ [midgard_op_ld_char4] = "ld_char4",
+ [midgard_op_ld_short4] = "ld_short4",
+ [midgard_op_ld_int4] = "ld_int4",
+
+ [midgard_op_ld_attr_32] = "ld_attr_32",
+ [midgard_op_ld_attr_16] = "ld_attr_16",
+ [midgard_op_ld_attr_32i] = "ld_attr_32i",
+
+ [midgard_op_ld_vary_32] = "ld_vary_32",
+ [midgard_op_ld_vary_16] = "ld_vary_16",
+ [midgard_op_ld_vary_32i] = "ld_vary_32i",
+
+ [midgard_op_ld_color_buffer_16] = "ld_color_buffer_16",
+
+ [midgard_op_ld_uniform_16] = "ld_uniform_16",
+ [midgard_op_ld_uniform_32] = "ld_uniform_32",
+ [midgard_op_ld_color_buffer_8] = "ld_color_buffer_8",
+
+ [midgard_op_st_char] = "st_char",
+ [midgard_op_st_char2] = "st_char2",
+ [midgard_op_st_char4] = "st_char4",
+ [midgard_op_st_short4] = "st_short4",
+ [midgard_op_st_int4] = "st_int4",
+
+ [midgard_op_st_vary_32] = "st_vary_32",
+ [midgard_op_st_vary_16] = "st_vary_16",
+ [midgard_op_st_vary_32i] = "st_vary_32i",
+
+ [midgard_op_st_image_f] = "st_image_f",
+ [midgard_op_st_image_ui] = "st_image_ui",
+ [midgard_op_st_image_i] = "st_image_i",
};
#endif
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_compile.c b/src/gallium/drivers/panfrost/midgard/midgard_compile.c
index 421a3343a71..61fa610e812 100644
--- a/src/gallium/drivers/panfrost/midgard/midgard_compile.c
+++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.c
@@ -323,16 +323,16 @@ v_fmov(unsigned src, midgard_vector_alu_src mod, unsigned dest)
* don't support half-floats -- this requires changes in other parts of the
* compiler -- therefore the 16-bit versions are commented out. */
-//M_LOAD(load_attr_16);
-M_LOAD(load_attr_32);
-//M_LOAD(load_vary_16);
-M_LOAD(load_vary_32);
-//M_LOAD(load_uniform_16);
-M_LOAD(load_uniform_32);
-M_LOAD(load_color_buffer_8);
-//M_STORE(store_vary_16);
-M_STORE(store_vary_32);
-M_STORE(store_cubemap_coords);
+//M_LOAD(ld_attr_16);
+M_LOAD(ld_attr_32);
+//M_LOAD(ld_vary_16);
+M_LOAD(ld_vary_32);
+//M_LOAD(ld_uniform_16);
+M_LOAD(ld_uniform_32);
+M_LOAD(ld_color_buffer_8);
+//M_STORE(st_vary_16);
+M_STORE(st_vary_32);
+M_STORE(st_cubemap_coords);
static midgard_instruction
v_alu_br_compact_cond(midgard_jmp_writeout_op op, unsigned tag, signed offset, unsigned cond)
@@ -1487,7 +1487,7 @@ emit_uniform_read(compiler_context *ctx, unsigned dest, unsigned offset, nir_src
* higher-indexed uniforms, at a performance cost. More
* generally, we're emitting a UBO read instruction. */
- midgard_instruction ins = m_load_uniform_32(dest, offset);
+ midgard_instruction ins = m_ld_uniform_32(dest, offset);
/* TODO: Don't split */
ins.load_store.varying_parameters = (offset & 7) << 7;
@@ -1560,7 +1560,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
/* XXX: Half-floats? */
/* TODO: swizzle, mask */
- midgard_instruction ins = m_load_vary_32(reg, offset);
+ midgard_instruction ins = m_ld_vary_32(reg, offset);
midgard_varying_parameter p = {
.is_varying = 1,
@@ -1615,7 +1615,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
} else if (out->data.location == VARYING_SLOT_COL1) {
/* Destination color must be read from framebuffer */
- midgard_instruction ins = m_load_color_buffer_8(reg, 0);
+ midgard_instruction ins = m_ld_color_buffer_8(reg, 0);
ins.load_store.swizzle = 0; /* xxxx */
/* Read each component sequentially */
@@ -1682,7 +1682,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
assert(0);
}
} else if (ctx->stage == MESA_SHADER_VERTEX) {
- midgard_instruction ins = m_load_attr_32(reg, offset);
+ midgard_instruction ins = m_ld_attr_32(reg, offset);
ins.load_store.unknown = 0x1E1E; /* XXX: What is this? */
ins.load_store.mask = (1 << instr->num_components) - 1;
emit_mir_instruction(ctx, ins);
@@ -1745,7 +1745,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
attach_constants(ctx, &ins, constant_value, reg + 1);
emit_mir_instruction(ctx, ins);
- midgard_instruction st = m_store_vary_32(SSA_FIXED_REGISTER(0), offset);
+ midgard_instruction st = m_st_vary_32(SSA_FIXED_REGISTER(0), offset);
st.load_store.unknown = 0x1E9E; /* XXX: What is this? */
emit_mir_instruction(ctx, st);
} else {
@@ -1842,7 +1842,7 @@ emit_tex(compiler_context *ctx, nir_tex_instr *instr)
midgard_instruction move = v_fmov(index, alu_src, SSA_FIXED_REGISTER(27));
emit_mir_instruction(ctx, move);
- midgard_instruction st = m_store_cubemap_coords(reg, 0);
+ midgard_instruction st = m_st_cubemap_coords(reg, 0);
st.load_store.unknown = 0x24; /* XXX: What is this? */
st.load_store.mask = 0x3; /* xy? */
st.load_store.swizzle = alu_src.swizzle;
@@ -2126,7 +2126,7 @@ install_registers(compiler_context *ctx, struct ra_graph *g)
case TAG_LOAD_STORE_4: {
if (OP_IS_STORE_VARY(ins->load_store.op)) {
- /* TODO: use ssa_args for store_vary */
+ /* TODO: use ssa_args for st_vary */
ins->load_store.reg = 0;
} else {
bool has_dest = args.dest >= 0;
@@ -2239,7 +2239,7 @@ allocate_registers(compiler_context *ctx)
mir_foreach_instr_in_block(block, ins) {
if (ins->compact_branch) continue;
- /* Dest is < 0 for store_vary instructions, which break
+ /* Dest is < 0 for st_vary instructions, which break
* the usual SSA conventions. Liveness analysis doesn't
* make sense on these instructions, so skip them to
* avoid memory corruption */
@@ -3432,7 +3432,7 @@ midgard_emit_store(compiler_context *ctx, midgard_block *block) {
midgard_instruction mov = v_fmov(idx, blank_alu_src, SSA_FIXED_REGISTER(REGISTER_VARYING_BASE + high_varying_register));
- midgard_instruction st = m_store_vary_32(SSA_FIXED_REGISTER(high_varying_register), varying);
+ midgard_instruction st = m_st_vary_32(SSA_FIXED_REGISTER(high_varying_register), varying);
st.load_store.unknown = 0x1E9E; /* XXX: What is this? */
mir_insert_instruction_before(mir_next_op(ins), st);