summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2019-02-06 15:42:17 -0600
committerJason Ekstrand <[email protected]>2019-04-19 19:56:42 +0000
commit843286d324c833198f4f5bd6d548ab3612968169 (patch)
tree38c0c13517ff3a52783d02bf792fd7e6722dbd0e
parent2edf29b933564d4f1aae80b91f674f1175f91625 (diff)
intel/fs: Add support for bindless texture ops
We add two new texture sources for bindless surface and sampler handles. Bindless surface handles are expected to be pre-shifted so that the 20-bit surface state table index is in the top 20 bits of the 32-bit handle. This lets us avoid any extra shifts in the shader. Bindless sampler handles are 32-byte aligned byte offsets from general state base address. We use 32-byte aligned instead of 16-byte aligned to avoid having to use more indirect messages than needed. It means we can't tightly pack samplers but that's probably not a big deal. Reviewed-by: Lionel Landwerlin <[email protected]> Reviewed-by: Caio Marcelo de Oliveira Filho <[email protected]>
-rw-r--r--src/intel/compiler/brw_eu_defines.h5
-rw-r--r--src/intel/compiler/brw_fs.cpp67
-rw-r--r--src/intel/compiler/brw_fs.h3
-rw-r--r--src/intel/compiler/brw_fs_nir.cpp17
-rw-r--r--src/intel/compiler/brw_fs_visitor.cpp4
5 files changed, 86 insertions, 10 deletions
diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h
index da723307b73..b14e33024b8 100644
--- a/src/intel/compiler/brw_eu_defines.h
+++ b/src/intel/compiler/brw_eu_defines.h
@@ -835,6 +835,10 @@ enum tex_logical_srcs {
TEX_LOGICAL_SRC_SURFACE,
/** Texture sampler index */
TEX_LOGICAL_SRC_SAMPLER,
+ /** Texture surface bindless handle */
+ TEX_LOGICAL_SRC_SURFACE_HANDLE,
+ /** Texture sampler bindless handle */
+ TEX_LOGICAL_SRC_SAMPLER_HANDLE,
/** Texel offset for gathers */
TEX_LOGICAL_SRC_TG4_OFFSET,
/** REQUIRED: Number of coordinate components (as UD immediate) */
@@ -1224,6 +1228,7 @@ enum brw_message_target {
*/
#define GEN8_BTI_STATELESS_IA_COHERENT 255
#define GEN8_BTI_STATELESS_NON_COHERENT 253
+#define GEN9_BTI_BINDLESS 252
/* Dataport atomic operations for Untyped Atomic Integer Operation message
* (and others).
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 9f82946f078..856e2ef815d 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -4685,6 +4685,8 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
const fs_reg &mcs,
const fs_reg &surface,
const fs_reg &sampler,
+ const fs_reg &surface_handle,
+ const fs_reg &sampler_handle,
const fs_reg &tg4_offset,
unsigned coord_components,
unsigned grad_components)
@@ -4697,9 +4699,14 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
for (unsigned i = 0; i < ARRAY_SIZE(sources); i++)
sources[i] = bld.vgrf(BRW_REGISTER_TYPE_F);
+ /* We must have exactly one of surface/sampler and surface/sampler_handle */
+ assert((surface.file == BAD_FILE) != (surface_handle.file == BAD_FILE));
+ assert((sampler.file == BAD_FILE) != (sampler_handle.file == BAD_FILE));
+
if (op == SHADER_OPCODE_TG4 || op == SHADER_OPCODE_TG4_OFFSET ||
inst->offset != 0 || inst->eot ||
op == SHADER_OPCODE_SAMPLEINFO ||
+ sampler_handle.file != BAD_FILE ||
is_high_sampler(devinfo, sampler)) {
/* For general texture offsets (no txf workaround), we need a header to
* put them in.
@@ -4739,7 +4746,21 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
ubld1.MOV(component(header, 2), brw_imm_ud(0));
}
- if (is_high_sampler(devinfo, sampler)) {
+ if (sampler_handle.file != BAD_FILE) {
+ /* Bindless sampler handles aren't relative to the sampler state
+ * pointer passed into the shader through SAMPLER_STATE_POINTERS_*.
+ * Instead, it's an absolute pointer relative to dynamic state base
+ * address.
+ *
+ * Sampler states are 16 bytes each and the pointer we give here has
+ * to be 32-byte aligned. In order to avoid more indirect messages
+ * than required, we assume that all bindless sampler states are
+ * 32-byte aligned. This sacrifices a bit of general state base
+ * address space but means we can do something more efficient in the
+ * shader.
+ */
+ ubld1.MOV(component(header, 3), sampler_handle);
+ } else if (is_high_sampler(devinfo, sampler)) {
if (sampler.file == BRW_IMMEDIATE_VALUE) {
assert(sampler.ud >= 16);
const int sampler_state_size = 16; /* 16 bytes */
@@ -4942,14 +4963,42 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
}
inst->sfid = BRW_SFID_SAMPLER;
- if (surface.file == IMM && sampler.file == IMM) {
+ if (surface.file == IMM &&
+ (sampler.file == IMM || sampler_handle.file != BAD_FILE)) {
inst->desc = brw_sampler_desc(devinfo,
surface.ud + base_binding_table_index,
- sampler.ud % 16,
+ sampler.file == IMM ? sampler.ud % 16 : 0,
msg_type,
simd_mode,
0 /* return_format unused on gen7+ */);
inst->src[0] = brw_imm_ud(0);
+ inst->src[1] = brw_imm_ud(0); /* ex_desc */
+ } else if (surface_handle.file != BAD_FILE) {
+ /* Bindless surface */
+ assert(devinfo->gen >= 9);
+ inst->desc = brw_sampler_desc(devinfo,
+ GEN9_BTI_BINDLESS,
+ sampler.file == IMM ? sampler.ud % 16 : 0,
+ msg_type,
+ simd_mode,
+ 0 /* return_format unused on gen7+ */);
+
+ /* For bindless samplers, the entire address is included in the message
+ * header so we can leave the portion in the message descriptor 0.
+ */
+ if (sampler_handle.file != BAD_FILE || sampler.file == IMM) {
+ inst->src[0] = brw_imm_ud(0);
+ } else {
+ const fs_builder ubld = bld.group(1, 0).exec_all();
+ fs_reg desc = ubld.vgrf(BRW_REGISTER_TYPE_UD);
+ ubld.SHL(desc, sampler, brw_imm_ud(8));
+ inst->src[0] = desc;
+ }
+
+ /* We assume that the driver provided the handle in the top 20 bits so
+ * we can use the surface handle directly as the extended descriptor.
+ */
+ inst->src[1] = retype(surface_handle, BRW_REGISTER_TYPE_UD);
} else {
/* Immediate portion of the descriptor */
inst->desc = brw_sampler_desc(devinfo,
@@ -4964,7 +5013,9 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
/* This case is common in GL */
ubld.MUL(desc, surface, brw_imm_ud(0x101));
} else {
- if (sampler.file == IMM) {
+ if (sampler_handle.file != BAD_FILE) {
+ ubld.MOV(desc, surface);
+ } else if (sampler.file == IMM) {
ubld.OR(desc, surface, brw_imm_ud(sampler.ud << 8));
} else {
ubld.SHL(desc, sampler, brw_imm_ud(8));
@@ -4976,8 +5027,8 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
ubld.AND(desc, desc, brw_imm_ud(0xfff));
inst->src[0] = component(desc, 0);
+ inst->src[1] = brw_imm_ud(0); /* ex_desc */
}
- inst->src[1] = brw_imm_ud(0); /* ex_desc */
inst->src[2] = src_payload;
inst->resize_sources(3);
@@ -5009,6 +5060,8 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op)
const fs_reg &mcs = inst->src[TEX_LOGICAL_SRC_MCS];
const fs_reg &surface = inst->src[TEX_LOGICAL_SRC_SURFACE];
const fs_reg &sampler = inst->src[TEX_LOGICAL_SRC_SAMPLER];
+ const fs_reg &surface_handle = inst->src[TEX_LOGICAL_SRC_SURFACE_HANDLE];
+ const fs_reg &sampler_handle = inst->src[TEX_LOGICAL_SRC_SAMPLER_HANDLE];
const fs_reg &tg4_offset = inst->src[TEX_LOGICAL_SRC_TG4_OFFSET];
assert(inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].file == IMM);
const unsigned coord_components = inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud;
@@ -5019,7 +5072,9 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op)
lower_sampler_logical_send_gen7(bld, inst, op, coordinate,
shadow_c, lod, lod2, min_lod,
sample_index,
- mcs, surface, sampler, tg4_offset,
+ mcs, surface, sampler,
+ surface_handle, sampler_handle,
+ tg4_offset,
coord_components, grad_components);
} else if (devinfo->gen >= 5) {
lower_sampler_logical_send_gen5(bld, inst, op, coordinate,
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index b6536eb7158..e8af99e1705 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -183,7 +183,8 @@ public:
void emit_interpolation_setup_gen6();
void compute_sample_position(fs_reg dst, fs_reg int_sample_pos);
fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
- const fs_reg &sampler);
+ const fs_reg &texture,
+ const fs_reg &texture_handle);
void emit_gen6_gather_wa(uint8_t wa, fs_reg dst);
fs_reg resolve_source_modifiers(const fs_reg &src);
void emit_discard_jump();
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index 4bf85bfb7e8..28c6e0e209a 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -3201,7 +3201,7 @@ fs_visitor::emit_non_coherent_fb_read(const fs_builder &bld, const fs_reg &dst,
const fs_reg sample = nir_system_values[SYSTEM_VALUE_SAMPLE_ID];
const fs_reg mcs = wm_key->multisample_fbo ?
- emit_mcs_fetch(coords, 3, brw_imm_ud(surface)) : fs_reg();
+ emit_mcs_fetch(coords, 3, brw_imm_ud(surface), fs_reg()) : fs_reg();
/* Use either a normal or a CMS texel fetch message depending on whether
* the framebuffer is single or multisample. On SKL+ use the wide CMS
@@ -5237,6 +5237,18 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
break;
}
+ case nir_tex_src_texture_handle:
+ assert(nir_tex_instr_src_index(instr, nir_tex_src_texture_offset) == -1);
+ srcs[TEX_LOGICAL_SRC_SURFACE] = fs_reg();
+ srcs[TEX_LOGICAL_SRC_SURFACE_HANDLE] = bld.emit_uniformize(src);
+ break;
+
+ case nir_tex_src_sampler_handle:
+ assert(nir_tex_instr_src_index(instr, nir_tex_src_sampler_offset) == -1);
+ srcs[TEX_LOGICAL_SRC_SAMPLER] = fs_reg();
+ srcs[TEX_LOGICAL_SRC_SAMPLER_HANDLE] = bld.emit_uniformize(src);
+ break;
+
case nir_tex_src_ms_mcs:
assert(instr->op == nir_texop_txf_ms);
srcs[TEX_LOGICAL_SRC_MCS] = retype(src, BRW_REGISTER_TYPE_D);
@@ -5266,7 +5278,8 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
srcs[TEX_LOGICAL_SRC_MCS] =
emit_mcs_fetch(srcs[TEX_LOGICAL_SRC_COORDINATE],
instr->coord_components,
- srcs[TEX_LOGICAL_SRC_SURFACE]);
+ srcs[TEX_LOGICAL_SRC_SURFACE],
+ srcs[TEX_LOGICAL_SRC_SURFACE_HANDLE]);
} else {
srcs[TEX_LOGICAL_SRC_MCS] = brw_imm_ud(0u);
}
diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp
index f8e8d36360e..232693cec78 100644
--- a/src/intel/compiler/brw_fs_visitor.cpp
+++ b/src/intel/compiler/brw_fs_visitor.cpp
@@ -35,7 +35,8 @@ using namespace brw;
/* Sample from the MCS surface attached to this multisample texture. */
fs_reg
fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
- const fs_reg &texture)
+ const fs_reg &texture,
+ const fs_reg &texture_handle)
{
const fs_reg dest = vgrf(glsl_type::uvec4_type);
@@ -43,6 +44,7 @@ fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
srcs[TEX_LOGICAL_SRC_COORDINATE] = coordinate;
srcs[TEX_LOGICAL_SRC_SURFACE] = texture;
srcs[TEX_LOGICAL_SRC_SAMPLER] = brw_imm_ud(0);
+ srcs[TEX_LOGICAL_SRC_SURFACE_HANDLE] = texture_handle;
srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_d(components);
srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_d(0);