diff options
-rw-r--r-- | src/intel/compiler/brw_eu.h | 27 | ||||
-rw-r--r-- | src/intel/compiler/brw_eu_emit.c | 72 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs.cpp | 182 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_generator.cpp | 62 | ||||
-rw-r--r-- | src/intel/compiler/brw_schedule_instructions.cpp | 72 |
5 files changed, 201 insertions, 214 deletions
diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index e59c893d543..c9a8ea948ed 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -791,17 +791,6 @@ brw_untyped_atomic(struct brw_codegen *p, bool header_present); void -brw_untyped_atomic_float(struct brw_codegen *p, - struct brw_reg dst, - struct brw_reg payload, - struct brw_reg surface, - unsigned atomic_op, - unsigned msg_length, - bool response_expected, - bool header_present); - - -void brw_untyped_surface_read(struct brw_codegen *p, struct brw_reg dst, struct brw_reg payload, @@ -845,22 +834,6 @@ brw_typed_surface_write(struct brw_codegen *p, bool header_present); void -brw_byte_scattered_read(struct brw_codegen *p, - struct brw_reg dst, - struct brw_reg payload, - struct brw_reg surface, - unsigned msg_length, - unsigned bit_size); - -void -brw_byte_scattered_write(struct brw_codegen *p, - struct brw_reg payload, - struct brw_reg surface, - unsigned msg_length, - unsigned bit_size, - bool header_present); - -void brw_memory_fence(struct brw_codegen *p, struct brw_reg dst, enum opcode send_op); diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index df63a6c42b7..4bd01a55cb7 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -2786,35 +2786,6 @@ brw_untyped_atomic(struct brw_codegen *p, } void -brw_untyped_atomic_float(struct brw_codegen *p, - struct brw_reg dst, - struct brw_reg payload, - struct brw_reg surface, - unsigned atomic_op, - unsigned msg_length, - bool response_expected, - bool header_present) -{ - const struct gen_device_info *devinfo = p->devinfo; - - assert(devinfo->gen >= 9); - assert(brw_get_default_access_mode(p) == BRW_ALIGN_1); - - const unsigned sfid = HSW_SFID_DATAPORT_DATA_CACHE_1; - const unsigned exec_size = 1 << brw_get_default_exec_size(p); - const unsigned response_length = - brw_surface_payload_size(p, response_expected, exec_size); - const unsigned desc = - brw_message_desc(devinfo, msg_length, response_length, header_present) | - brw_dp_untyped_atomic_float_desc(devinfo, exec_size, atomic_op, - response_expected); - - brw_send_indirect_surface_message(p, sfid, - brw_writemask(dst, WRITEMASK_XYZW), - payload, surface, desc); -} - -void brw_untyped_surface_read(struct brw_codegen *p, struct brw_reg dst, struct brw_reg payload, @@ -2865,49 +2836,6 @@ brw_untyped_surface_write(struct brw_codegen *p, } void -brw_byte_scattered_read(struct brw_codegen *p, - struct brw_reg dst, - struct brw_reg payload, - struct brw_reg surface, - unsigned msg_length, - unsigned bit_size) -{ - const struct gen_device_info *devinfo = p->devinfo; - assert(devinfo->gen > 7 || devinfo->is_haswell); - assert(brw_get_default_access_mode(p) == BRW_ALIGN_1); - const unsigned exec_size = 1 << brw_get_default_exec_size(p); - const unsigned response_length = brw_surface_payload_size(p, 1, exec_size); - const unsigned desc = - brw_message_desc(devinfo, msg_length, response_length, false) | - brw_dp_byte_scattered_rw_desc(devinfo, exec_size, bit_size, false); - - brw_send_indirect_surface_message(p, GEN7_SFID_DATAPORT_DATA_CACHE, - dst, payload, surface, desc); -} - -void -brw_byte_scattered_write(struct brw_codegen *p, - struct brw_reg payload, - struct brw_reg surface, - unsigned msg_length, - unsigned bit_size, - bool header_present) -{ - const struct gen_device_info *devinfo = p->devinfo; - assert(devinfo->gen > 7 || devinfo->is_haswell); - assert(brw_get_default_access_mode(p) == BRW_ALIGN_1); - const unsigned exec_size = 1 << brw_get_default_exec_size(p); - const unsigned desc = - brw_message_desc(devinfo, msg_length, 0, header_present) | - brw_dp_byte_scattered_rw_desc(devinfo, exec_size, bit_size, true); - - brw_send_indirect_surface_message(p, GEN7_SFID_DATAPORT_DATA_CACHE, - brw_writemask(brw_null_reg(), - WRITEMASK_XYZW), - payload, surface, desc); -} - -void brw_typed_atomic(struct brw_codegen *p, struct brw_reg dst, struct brw_reg payload, diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index b4e07be14bd..f6525fe467c 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -4851,8 +4851,7 @@ emit_surface_header(const fs_builder &bld, const fs_reg &sample_mask) } static void -lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op, - const fs_reg &sample_mask) +lower_surface_logical_send(const fs_builder &bld, fs_inst *inst) { const gen_device_info *devinfo = bld.shader->devinfo; @@ -4862,10 +4861,17 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op, const fs_reg &surface = inst->src[2]; const UNUSED fs_reg &dims = inst->src[3]; const fs_reg &arg = inst->src[4]; + assert(arg.file == IMM); /* Calculate the total number of components of the payload. */ const unsigned addr_sz = inst->components_read(0); const unsigned src_sz = inst->components_read(1); + + const bool is_typed_access = + inst->opcode == SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL || + inst->opcode == SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL || + inst->opcode == SHADER_OPCODE_TYPED_ATOMIC_LOGICAL; + /* From the BDW PRM Volume 7, page 147: * * "For the Data Cache Data Port*, the header must be present for the @@ -4876,10 +4882,7 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op, * messages prior to Gen9, since we have to provide a header anyway. On * Gen11+ the header has been removed so we can only use predication. */ - const unsigned header_sz = devinfo->gen < 9 && - (op == SHADER_OPCODE_TYPED_SURFACE_READ || - op == SHADER_OPCODE_TYPED_SURFACE_WRITE || - op == SHADER_OPCODE_TYPED_ATOMIC) ? 1 : 0; + const unsigned header_sz = devinfo->gen < 9 && is_typed_access ? 1 : 0; const unsigned sz = header_sz + addr_sz + src_sz; /* Allocate space for the payload. */ @@ -4887,6 +4890,10 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op, const fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz); unsigned n = 0; + const bool has_side_effects = inst->has_side_effects(); + fs_reg sample_mask = has_side_effects ? bld.sample_mask_reg() : + fs_reg(brw_imm_d(0xffff)); + /* Construct the payload. */ if (header_sz) components[n++] = emit_surface_header(bld, sample_mask); @@ -4925,14 +4932,125 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op, } } + uint32_t sfid; + switch (inst->opcode) { + case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: + case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: + /* Byte scattered opcodes go through the normal data cache */ + sfid = GEN7_SFID_DATAPORT_DATA_CACHE; + break; + + case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: + case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: + case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: + case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: + /* Untyped Surface messages go through the data cache but the SFID value + * changed on Haswell. + */ + sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? + HSW_SFID_DATAPORT_DATA_CACHE_1 : + GEN7_SFID_DATAPORT_DATA_CACHE); + break; + + case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: + case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: + case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: + /* Typed surface messages go through the render cache on IVB and the + * data cache on HSW+. + */ + sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? + HSW_SFID_DATAPORT_DATA_CACHE_1 : + GEN6_SFID_DATAPORT_RENDER_CACHE); + break; + + default: + unreachable("Unsupported surface opcode"); + } + + uint32_t desc; + switch (inst->opcode) { + case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: + desc = brw_dp_untyped_surface_rw_desc(devinfo, inst->exec_size, + arg.ud, /* num_channels */ + false /* write */); + break; + + case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: + desc = brw_dp_untyped_surface_rw_desc(devinfo, inst->exec_size, + arg.ud, /* num_channels */ + true /* write */); + break; + + case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: + desc = brw_dp_byte_scattered_rw_desc(devinfo, inst->exec_size, + arg.ud, /* bit_size */ + false /* write */); + break; + + case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: + desc = brw_dp_byte_scattered_rw_desc(devinfo, inst->exec_size, + arg.ud, /* bit_size */ + true /* write */); + break; + + case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: + desc = brw_dp_untyped_atomic_desc(devinfo, inst->exec_size, + arg.ud, /* atomic_op */ + !inst->dst.is_null()); + break; + + case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: + desc = brw_dp_untyped_atomic_float_desc(devinfo, inst->exec_size, + arg.ud, /* atomic_op */ + !inst->dst.is_null()); + break; + + case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: + desc = brw_dp_typed_surface_rw_desc(devinfo, inst->exec_size, inst->group, + arg.ud, /* num_channels */ + false /* write */); + break; + + case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: + desc = brw_dp_typed_surface_rw_desc(devinfo, inst->exec_size, inst->group, + arg.ud, /* num_channels */ + true /* write */); + break; + + case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: + desc = brw_dp_typed_atomic_desc(devinfo, inst->exec_size, inst->group, + arg.ud, /* atomic_op */ + !inst->dst.is_null()); + break; + + default: + unreachable("Unknown surface logical instruction"); + } + /* Update the original instruction. */ - inst->opcode = op; + inst->opcode = SHADER_OPCODE_SEND; inst->mlen = header_sz + (addr_sz + src_sz) * inst->exec_size / 8; inst->header_size = header_sz; + inst->send_has_side_effects = has_side_effects; + inst->send_is_volatile = !has_side_effects; + + /* Set up SFID and descriptors */ + inst->sfid = sfid; + inst->desc = desc; + if (surface.file == IMM) { + inst->desc |= surface.ud & 0xff; + inst->src[0] = brw_imm_ud(0); + } else { + const fs_builder ubld = bld.exec_all().group(1, 0); + fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD); + ubld.AND(tmp, surface, brw_imm_ud(0xff)); + inst->src[0] = component(tmp, 0); + } + inst->src[1] = brw_imm_ud(0); /* ex_desc */ + + /* Finally, the payload */ + inst->src[2] = payload; - inst->src[0] = payload; - inst->src[1] = surface; - inst->src[2] = arg; inst->resize_sources(3); delete[] components; @@ -5076,57 +5194,15 @@ fs_visitor::lower_logical_sends() break; case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: - lower_surface_logical_send(ibld, inst, - SHADER_OPCODE_UNTYPED_SURFACE_READ, - fs_reg()); - break; - case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: - lower_surface_logical_send(ibld, inst, - SHADER_OPCODE_UNTYPED_SURFACE_WRITE, - ibld.sample_mask_reg()); - break; - case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: - lower_surface_logical_send(ibld, inst, - SHADER_OPCODE_BYTE_SCATTERED_READ, - fs_reg()); - break; - case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: - lower_surface_logical_send(ibld, inst, - SHADER_OPCODE_BYTE_SCATTERED_WRITE, - ibld.sample_mask_reg()); - break; - case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: - lower_surface_logical_send(ibld, inst, - SHADER_OPCODE_UNTYPED_ATOMIC, - ibld.sample_mask_reg()); - break; - case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: - lower_surface_logical_send(ibld, inst, - SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT, - ibld.sample_mask_reg()); - break; - case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: - lower_surface_logical_send(ibld, inst, - SHADER_OPCODE_TYPED_SURFACE_READ, - brw_imm_d(0xffff)); - break; - case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: - lower_surface_logical_send(ibld, inst, - SHADER_OPCODE_TYPED_SURFACE_WRITE, - ibld.sample_mask_reg()); - break; - case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: - lower_surface_logical_send(ibld, inst, - SHADER_OPCODE_TYPED_ATOMIC, - ibld.sample_mask_reg()); + lower_surface_logical_send(ibld, inst); break; case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 17578fe5ff6..0c9feb63a8c 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -2264,68 +2264,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) generate_shader_time_add(inst, src[0], src[1], src[2]); break; - case SHADER_OPCODE_UNTYPED_ATOMIC: - assert(src[2].file == BRW_IMMEDIATE_VALUE); - brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud, - inst->mlen, !inst->dst.is_null(), - inst->header_size); - break; - - case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT: - assert(src[2].file == BRW_IMMEDIATE_VALUE); - brw_untyped_atomic_float(p, dst, src[0], src[1], src[2].ud, - inst->mlen, !inst->dst.is_null(), - inst->header_size); - break; - - case SHADER_OPCODE_UNTYPED_SURFACE_READ: - assert(!inst->header_size); - assert(src[2].file == BRW_IMMEDIATE_VALUE); - brw_untyped_surface_read(p, dst, src[0], src[1], - inst->mlen, src[2].ud); - break; - - case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: - assert(src[2].file == BRW_IMMEDIATE_VALUE); - brw_untyped_surface_write(p, src[0], src[1], - inst->mlen, src[2].ud, - inst->header_size); - break; - - case SHADER_OPCODE_BYTE_SCATTERED_READ: - assert(!inst->header_size); - assert(src[2].file == BRW_IMMEDIATE_VALUE); - brw_byte_scattered_read(p, dst, src[0], src[1], - inst->mlen, src[2].ud); - break; - - case SHADER_OPCODE_BYTE_SCATTERED_WRITE: - assert(src[2].file == BRW_IMMEDIATE_VALUE); - brw_byte_scattered_write(p, src[0], src[1], - inst->mlen, src[2].ud, - inst->header_size); - break; - - case SHADER_OPCODE_TYPED_ATOMIC: - assert(src[2].file == BRW_IMMEDIATE_VALUE); - brw_typed_atomic(p, dst, src[0], src[1], - src[2].ud, inst->mlen, !inst->dst.is_null(), - inst->header_size); - break; - - case SHADER_OPCODE_TYPED_SURFACE_READ: - assert(src[2].file == BRW_IMMEDIATE_VALUE); - brw_typed_surface_read(p, dst, src[0], src[1], - inst->mlen, src[2].ud, - inst->header_size); - break; - - case SHADER_OPCODE_TYPED_SURFACE_WRITE: - assert(src[2].file == BRW_IMMEDIATE_VALUE); - brw_typed_surface_write(p, src[0], src[1], inst->mlen, src[2].ud, - inst->header_size); - break; - case SHADER_OPCODE_MEMORY_FENCE: brw_memory_fence(p, dst, BRW_OPCODE_SEND); break; diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index bc4c2dc5cdb..f453bb42574 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -416,6 +416,78 @@ schedule_node::set_latency_gen7(bool is_haswell) case SHADER_OPCODE_SEND: switch (inst->sfid) { + case GEN6_SFID_DATAPORT_RENDER_CACHE: + switch ((inst->desc >> 14) & 0x1f) { + case GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE: + case GEN7_DATAPORT_RC_TYPED_SURFACE_READ: + /* See also SHADER_OPCODE_TYPED_SURFACE_READ */ + assert(!is_haswell); + latency = 600; + break; + + case GEN7_DATAPORT_RC_TYPED_ATOMIC_OP: + /* See also SHADER_OPCODE_TYPED_ATOMIC */ + assert(!is_haswell); + latency = 14000; + break; + + default: + unreachable("Unknown render cache message"); + } + break; + + case GEN7_SFID_DATAPORT_DATA_CACHE: + switch ((inst->desc >> 14) & 0x1f) { + case HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ: + case HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE: + /* We have no data for this but assume it's roughly the same as + * untyped surface read/write. + */ + latency = 300; + break; + + case GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ: + case GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE: + /* See also SHADER_OPCODE_UNTYPED_SURFACE_READ */ + assert(!is_haswell); + latency = 600; + break; + + case GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP: + /* See also SHADER_OPCODE_UNTYPED_ATOMIC */ + assert(!is_haswell); + latency = 14000; + break; + + default: + unreachable("Unknown data cache message"); + } + break; + + case HSW_SFID_DATAPORT_DATA_CACHE_1: + switch ((inst->desc >> 14) & 0x1f) { + case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ: + case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE: + case HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ: + case HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE: + /* See also SHADER_OPCODE_UNTYPED_SURFACE_READ */ + latency = 300; + break; + + case HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP: + case HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2: + case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2: + case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP: + case GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP: + /* See also SHADER_OPCODE_UNTYPED_ATOMIC */ + latency = 14000; + break; + + default: + unreachable("Unknown data cache message"); + } + break; + default: unreachable("Unknown SFID"); } |