i965/vec4: make offset() work in terms of a simd width and scalar components

So that it has the same semantics as the scalar backend implementation. The helper will now take a simd width (which is always 8 in vec4 mode) and step as many scalar components as specified by that width, respecting the size of the scalar channels. v2 (Curro): - Remove the assertion in offset(), byte_offset() has the same checks. - Use byte_offset() directly instead of add_byte_offset(). - Make things more clear by explicitly including the vertical stride in the byte offset expression. Reviewed-by: Francisco Jerez <[email protected]>
author: Iago Toral Quiroga <[email protected]> 2016-10-03 13:33:12 +0200
committer: Iago Toral Quiroga <[email protected]> 2016-10-27 10:59:31 +0200
commit: 66fcfa6894ab61a8cb70955f4a4113729e4a8099 (patch)
tree: 849fced6c4ce84a3dc8bffe4bd9179350ac6f57f
parent: ba63db1f2ecf78ab894fe411b4f27d8523cf952d (diff)
3 files changed, 16 insertions, 18 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
index 1633c4ce0d5..5dfdfce17ab 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
@@ -104,12 +104,11 @@ byte_offset(src_reg reg, unsigned bytes)
 }
 
 static inline src_reg
-offset(src_reg reg, unsigned delta)
+offset(src_reg reg, unsigned width, unsigned delta)
 {
-   assert(delta == 0 ||
-          (reg.file != ARF && reg.file != FIXED_GRF && reg.file != IMM));
-   reg.offset += delta * (reg.file == UNIFORM ? 16 : REG_SIZE);
-   return reg;
+   const unsigned stride = (reg.file == UNIFORM ? 0 : 4);
+   const unsigned num_components = MAX2(width / 4 * stride, 4);
+   return byte_offset(reg, num_components * type_sz(reg.type) * delta);
 }
 
 /**
@@ -180,12 +179,11 @@ byte_offset(dst_reg reg, unsigned bytes)
 }
 
 static inline dst_reg
-offset(dst_reg reg, unsigned delta)
+offset(dst_reg reg, unsigned width, unsigned delta)
 {
-   assert(delta == 0 ||
-          (reg.file != ARF && reg.file != FIXED_GRF && reg.file != IMM));
-   reg.offset += delta * (reg.file == UNIFORM ? 16 : REG_SIZE);
-   return reg;
+   const unsigned stride = (reg.file == UNIFORM ? 0 : 4);
+   const unsigned num_components = MAX2(width / 4 * stride, 4);
+   return byte_offset(reg, num_components * type_sz(reg.type) * delta);
 }
 
 static inline dst_reg
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index aabf082dd30..fc4eb3ac1d7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -256,7 +256,7 @@ dst_reg_for_nir_reg(vec4_visitor *v, nir_register *nir_reg,
    dst_reg reg;
 
    reg = v->nir_locals[nir_reg->index];
-   reg = offset(reg, base_offset);
+   reg = offset(reg, 8, base_offset);
    if (indirect) {
       reg.reladdr =
          new(v->mem_ctx) src_reg(v->get_nir_src(*indirect,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
index 19c685feea5..00c94fedca2 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
@@ -42,9 +42,9 @@ namespace {
                                          DIV_ROUND_UP(size * dst_stride, 4));
 
             for (unsigned i = 0; i < size; ++i)
-               bld.MOV(writemask(offset(dst, i * dst_stride / 4),
+               bld.MOV(writemask(offset(dst, 8, i * dst_stride / 4),
                                  1 << (i * dst_stride % 4)),
-                       swizzle(offset(src, i * src_stride / 4),
+                       swizzle(offset(src, 8, i * src_stride / 4),
                                brw_swizzle_for_mask(1 << (i * src_stride % 4))));
 
             return src_reg(dst);
@@ -124,16 +124,16 @@ namespace brw {
             unsigned n = 0;
 
             if (header_sz)
-               bld.exec_all().MOV(offset(payload, n++),
+               bld.exec_all().MOV(offset(payload, 8, n++),
                                   retype(header, BRW_REGISTER_TYPE_UD));
 
             for (unsigned i = 0; i < addr_sz; i++)
-               bld.MOV(offset(payload, n++),
-                       offset(retype(addr, BRW_REGISTER_TYPE_UD), i));
+               bld.MOV(offset(payload, 8, n++),
+                       offset(retype(addr, BRW_REGISTER_TYPE_UD), 8, i));
 
             for (unsigned i = 0; i < src_sz; i++)
-               bld.MOV(offset(payload, n++),
-                       offset(retype(src, BRW_REGISTER_TYPE_UD), i));
+               bld.MOV(offset(payload, 8, n++),
+                       offset(retype(src, BRW_REGISTER_TYPE_UD), 8, i));
 
             /* Reduce the dynamically uniform surface index to a single
              * scalar.
author	Iago Toral Quiroga <[email protected]>	2016-10-03 13:33:12 +0200
committer	Iago Toral Quiroga <[email protected]>	2016-10-27 10:59:31 +0200
commit	66fcfa6894ab61a8cb70955f4a4113729e4a8099 (patch)
tree	849fced6c4ce84a3dc8bffe4bd9179350ac6f57f
parent	ba63db1f2ecf78ab894fe411b4f27d8523cf952d (diff)