diff options
author | Jason Ekstrand <[email protected]> | 2014-12-03 17:03:19 -0800 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2015-01-15 07:19:03 -0800 |
commit | 27663dbe8edfb7583d9d8fc3704a04a5c837fe05 (patch) | |
tree | 86a274c5438b6d78c3454919fa56c754cd6617ae /src/glsl/nir/nir_intrinsics.h | |
parent | d1d12efb36074abd34d6d6d3aa4db9190f6c0de3 (diff) |
nir: Vectorize intrinsics
We used to have the number of components built into the intrinsic. This
meant that all of our load/store intrinsics had vec1, vec2, vec3, and vec4
variants. This lead to piles of switch statements to generate the correct
intrinsic names, and introspection to figure out the number of components.
We can make things much nicer by allowing "vectorized" intrinsics.
Reviewed-by: Connor Abbott <[email protected]>
Diffstat (limited to 'src/glsl/nir/nir_intrinsics.h')
-rw-r--r-- | src/glsl/nir/nir_intrinsics.h | 79 |
1 files changed, 23 insertions, 56 deletions
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index e4ad8cdc02a..75bd12f6a0a 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -42,19 +42,9 @@ #define ARR(...) { __VA_ARGS__ } -INTRINSIC(load_var_vec1, 0, ARR(), true, 1, 1, 0, - NIR_INTRINSIC_CAN_ELIMINATE) -INTRINSIC(load_var_vec2, 0, ARR(), true, 2, 1, 0, - NIR_INTRINSIC_CAN_ELIMINATE) -INTRINSIC(load_var_vec3, 0, ARR(), true, 3, 1, 0, - NIR_INTRINSIC_CAN_ELIMINATE) -INTRINSIC(load_var_vec4, 0, ARR(), true, 4, 1, 0, - NIR_INTRINSIC_CAN_ELIMINATE) -INTRINSIC(store_var_vec1, 1, ARR(1), false, 0, 1, 0, 0) -INTRINSIC(store_var_vec2, 1, ARR(2), false, 0, 1, 0, 0) -INTRINSIC(store_var_vec3, 1, ARR(3), false, 0, 1, 0, 0) -INTRINSIC(store_var_vec4, 1, ARR(4), false, 0, 1, 0, 0) -INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0) +INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE) +INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 0, 0) +INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0) /* * a barrier is an intrinsic with no inputs/outputs but which can't be moved @@ -94,27 +84,6 @@ SYSTEM_VALUE(sample_pos, 2) SYSTEM_VALUE(sample_mask_in, 1) SYSTEM_VALUE(invocation_id, 1) -#define LOAD_OR_INTERP(name, num_srcs, src_comps, num_indices, flags) \ - INTRINSIC(name##_vec1, num_srcs, ARR(src_comps), true, 1, \ - 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \ - INTRINSIC(name##_vec2, num_srcs, ARR(src_comps), true, 2, \ - 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \ - INTRINSIC(name##_vec3, num_srcs, ARR(src_comps), true, 3, \ - 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \ - INTRINSIC(name##_vec4, num_srcs, ARR(src_comps), true, 4, \ - 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \ - INTRINSIC(name##_vec1_indirect, 1 + num_srcs, ARR(1, src_comps), true, 1, \ - 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \ - INTRINSIC(name##_vec2_indirect, 1 + num_srcs, ARR(1, src_comps), true, 2, \ - 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \ - INTRINSIC(name##_vec3_indirect, 1 + num_srcs, ARR(1, src_comps), true, 3, \ - 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \ - INTRINSIC(name##_vec4_indirect, 1 + num_srcs, ARR(1, src_comps), true, 4, \ - 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) - -#define LOAD(name, num_indices, flags) \ - LOAD_OR_INTERP(load_##name, 0, 0, num_indices, flags) - /* * The first index is the address to load from, and the second index is the * number of array elements to load. For UBO's (and SSBO's), the first index @@ -129,6 +98,12 @@ SYSTEM_VALUE(invocation_id, 1) * elements begin immediately after the previous array element. */ +#define LOAD(name, num_indices, flags) \ + INTRINSIC(load_##name, 0, ARR(), true, 0, 0, num_indices, \ + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \ + INTRINSIC(load_##name##_indirect, 1, ARR(1), true, 0, 0, num_indices, \ + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \ + LOAD(uniform, 2, NIR_INTRINSIC_CAN_REORDER) LOAD(ubo, 3, NIR_INTRINSIC_CAN_REORDER) LOAD(input, 2, NIR_INTRINSIC_CAN_REORDER) @@ -140,29 +115,16 @@ LOAD(input, 2, NIR_INTRINSIC_CAN_REORDER) * interp_at_offset* intrinsics take a second source that is either a * sample id or a vec2 position offset. */ -#define INTERP(name, flags) \ - LOAD_OR_INTERP(interp_##name, 0, 0, 2, flags) - -#define INTERP_WITH_ARG(name, src_comps, flags) \ - LOAD_OR_INTERP(interp_##name, 1, src_comps, 2, flags) -INTERP(at_centroid, NIR_INTRINSIC_CAN_REORDER) -INTERP_WITH_ARG(at_sample, 1, NIR_INTRINSIC_CAN_REORDER) -INTERP_WITH_ARG(at_offset, 1, NIR_INTRINSIC_CAN_REORDER) +#define INTERP(name, num_srcs, src_comps) \ + INTRINSIC(interp_##name, num_srcs, ARR(src_comps), true, \ + 0, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \ + INTRINSIC(interp_##name##_indirect, 1 + num_srcs, ARR(1, src_comps), true, \ + 0, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -#define STORE(name, num_indices, flags) \ - INTRINSIC(store_##name##_vec1, 1, ARR(1), false, 0, 0, num_indices, flags) \ - INTRINSIC(store_##name##_vec2, 1, ARR(2), false, 0, 0, num_indices, flags) \ - INTRINSIC(store_##name##_vec3, 1, ARR(3), false, 0, 0, num_indices, flags) \ - INTRINSIC(store_##name##_vec4, 1, ARR(4), false, 0, 0, num_indices, flags) \ - INTRINSIC(store_##name##_vec1_indirect, 2, ARR(1, 1), false, 0, 0, \ - num_indices, flags) \ - INTRINSIC(store_##name##_vec2_indirect, 2, ARR(2, 1), false, 0, 0, \ - num_indices, flags) \ - INTRINSIC(store_##name##_vec3_indirect, 2, ARR(3, 1), false, 0, 0, \ - num_indices, flags) \ - INTRINSIC(store_##name##_vec4_indirect, 2, ARR(4, 1), false, 0, 0, \ - num_indices, flags) \ +INTERP(at_centroid, 0, 0) +INTERP(at_sample, 1, 1) +INTERP(at_offset, 1, 1) /* * Stores work the same way as loads, except now the first register input is @@ -170,7 +132,12 @@ INTERP_WITH_ARG(at_offset, 1, NIR_INTRINSIC_CAN_REORDER) * offset. */ +#define STORE(name, num_indices, flags) \ + INTRINSIC(store_##name, 1, ARR(0), false, 0, 0, num_indices, flags) \ + INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \ + num_indices, flags) \ + STORE(output, 2, 0) /* STORE(ssbo, 3, 0) */ -LAST_INTRINSIC(store_output_vec4_indirect) +LAST_INTRINSIC(store_output_indirect) |