diff options
author | Jason Ekstrand <[email protected]> | 2018-11-12 18:48:10 -0600 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2018-11-15 19:59:49 -0600 |
commit | 6339aba775ecdcaf74136479d02e3622bc1d4c0a (patch) | |
tree | bf3d0178ade3cccab3b89a19564a7418cc0dc070 /src/intel/compiler/brw_vec4_nir.cpp | |
parent | d34fd81e7668b14158d63ade844a0e260b6f9152 (diff) |
intel/compiler: Lower SSBO and shared loads/stores in NIR
We have a bunch of code to do this in the back-end compiler but it's
fairly specific to typed surface messages and the way we emit them.
This breaks it out into NIR were it's easier to do things a bit more
generally. It also means we can easily share the code between the vec4
and FS back-ends if we wish.
Reviewed-by: Samuel Iglesias Gonsálvez <[email protected]>
Diffstat (limited to 'src/intel/compiler/brw_vec4_nir.cpp')
-rw-r--r-- | src/intel/compiler/brw_vec4_nir.cpp | 126 |
1 files changed, 14 insertions, 112 deletions
diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp index 564be7e5eee..26ca2ddd8dc 100644 --- a/src/intel/compiler/brw_vec4_nir.cpp +++ b/src/intel/compiler/brw_vec4_nir.cpp @@ -500,6 +500,11 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) case nir_intrinsic_store_ssbo: { assert(devinfo->gen >= 7); + /* brw_nir_lower_mem_access_bit_sizes takes care of this */ + assert(nir_src_bit_size(instr->src[0]) == 32); + assert(nir_intrinsic_write_mask(instr) == + (1 << instr->num_components) - 1); + src_reg surf_index = get_nir_ssbo_intrinsic_index(instr); src_reg offset_reg = retype(get_nir_src_imm(instr->src[2]), BRW_REGISTER_TYPE_UD); @@ -507,9 +512,6 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) /* Value */ src_reg val_reg = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F, 4); - /* Writemask */ - unsigned write_mask = instr->const_index[0]; - /* IvyBridge does not have a native SIMD4x2 untyped write message so untyped * writes will use SIMD8 mode. In order to hide this and keep symmetry across * typed and untyped messages and across hardware platforms, the @@ -551,92 +553,18 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) const vec4_builder bld = vec4_builder(this).at_end() .annotate(current_annotation, base_ir); - unsigned type_slots = nir_src_bit_size(instr->src[0]) / 32; - if (type_slots == 2) { - dst_reg tmp = dst_reg(this, glsl_type::dvec4_type); - shuffle_64bit_data(tmp, retype(val_reg, tmp.type), true); - val_reg = src_reg(retype(tmp, BRW_REGISTER_TYPE_F)); - } - - uint8_t swizzle[4] = { 0, 0, 0, 0}; - int num_channels = 0; - unsigned skipped_channels = 0; - int num_components = instr->num_components; - for (int i = 0; i < num_components; i++) { - /* Read components Z/W of a dvec from the appropriate place. We will - * also have to adjust the swizzle (we do that with the '% 4' below) - */ - if (i == 2 && type_slots == 2) - val_reg = byte_offset(val_reg, REG_SIZE); - - /* Check if this channel needs to be written. If so, record the - * channel we need to take the data from in the swizzle array - */ - int component_mask = 1 << i; - int write_test = write_mask & component_mask; - if (write_test) { - /* If we are writing doubles we have to write 2 channels worth of - * of data (64 bits) for each double component. - */ - swizzle[num_channels++] = (i * type_slots) % 4; - if (type_slots == 2) - swizzle[num_channels++] = (i * type_slots + 1) % 4; - } - - /* If we don't have to write this channel it means we have a gap in the - * vector, so write the channels we accumulated until now, if any. Do - * the same if this was the last component in the vector, if we have - * enough channels for a full vec4 write or if we have processed - * components XY of a dvec (since components ZW are not in the same - * SIMD register) - */ - if (!write_test || i == num_components - 1 || num_channels == 4 || - (i == 1 && type_slots == 2)) { - if (num_channels > 0) { - /* We have channels to write, so update the offset we need to - * write at to skip the channels we skipped, if any. - */ - if (skipped_channels > 0) { - if (offset_reg.file == IMM) { - offset_reg.ud += 4 * skipped_channels; - } else { - emit(ADD(dst_reg(offset_reg), offset_reg, - brw_imm_ud(4 * skipped_channels))); - } - } - - /* Swizzle the data register so we take the data from the channels - * we need to write and send the write message. This will write - * num_channels consecutive dwords starting at offset. - */ - val_reg.swizzle = - BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); - emit_untyped_write(bld, surf_index, offset_reg, val_reg, - 1 /* dims */, num_channels /* size */, - BRW_PREDICATE_NONE); - - /* If we have to do a second write we will have to update the - * offset so that we jump over the channels we have just written - * now. - */ - skipped_channels = num_channels; - - /* Restart the count for the next write message */ - num_channels = 0; - } - - /* If we didn't write the channel, increase skipped count */ - if (!write_test) - skipped_channels += type_slots; - } - } - + emit_untyped_write(bld, surf_index, offset_reg, val_reg, + 1 /* dims */, instr->num_components /* size */, + BRW_PREDICATE_NONE); break; } case nir_intrinsic_load_ssbo: { assert(devinfo->gen >= 7); + /* brw_nir_lower_mem_access_bit_sizes takes care of this */ + assert(nir_dest_bit_size(instr->dest) == 32); + src_reg surf_index = get_nir_ssbo_intrinsic_index(instr); src_reg offset_reg = retype(get_nir_src_imm(instr->src[1]), BRW_REGISTER_TYPE_UD); @@ -645,36 +573,10 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) const vec4_builder bld = vec4_builder(this).at_end() .annotate(current_annotation, base_ir); - src_reg read_result; + src_reg read_result = emit_untyped_read(bld, surf_index, offset_reg, + 1 /* dims */, 4 /* size*/, + BRW_PREDICATE_NONE); dst_reg dest = get_nir_dest(instr->dest); - if (type_sz(dest.type) < 8) { - read_result = emit_untyped_read(bld, surf_index, offset_reg, - 1 /* dims */, 4 /* size*/, - BRW_PREDICATE_NONE); - } else { - src_reg shuffled = src_reg(this, glsl_type::dvec4_type); - - src_reg temp; - temp = emit_untyped_read(bld, surf_index, offset_reg, - 1 /* dims */, 4 /* size*/, - BRW_PREDICATE_NONE); - emit(MOV(dst_reg(retype(shuffled, temp.type)), temp)); - - if (offset_reg.file == IMM) - offset_reg.ud += 16; - else - emit(ADD(dst_reg(offset_reg), offset_reg, brw_imm_ud(16))); - - temp = emit_untyped_read(bld, surf_index, offset_reg, - 1 /* dims */, 4 /* size*/, - BRW_PREDICATE_NONE); - emit(MOV(dst_reg(retype(byte_offset(shuffled, REG_SIZE), temp.type)), - temp)); - - read_result = src_reg(this, glsl_type::dvec4_type); - shuffle_64bit_data(dst_reg(read_result), shuffled, false); - } - read_result.type = dest.type; read_result.swizzle = brw_swizzle_for_size(instr->num_components); emit(MOV(dest, read_result)); |