From 27663dbe8edfb7583d9d8fc3704a04a5c837fe05 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 3 Dec 2014 17:03:19 -0800 Subject: nir: Vectorize intrinsics We used to have the number of components built into the intrinsic. This meant that all of our load/store intrinsics had vec1, vec2, vec3, and vec4 variants. This lead to piles of switch statements to generate the correct intrinsic names, and introspection to figure out the number of components. We can make things much nicer by allowing "vectorized" intrinsics. Reviewed-by: Connor Abbott --- src/glsl/nir/nir_intrinsics.h | 79 +++++++++++++------------------------------ 1 file changed, 23 insertions(+), 56 deletions(-) (limited to 'src/glsl/nir/nir_intrinsics.h') diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index e4ad8cdc02a..75bd12f6a0a 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -42,19 +42,9 @@ #define ARR(...) { __VA_ARGS__ } -INTRINSIC(load_var_vec1, 0, ARR(), true, 1, 1, 0, - NIR_INTRINSIC_CAN_ELIMINATE) -INTRINSIC(load_var_vec2, 0, ARR(), true, 2, 1, 0, - NIR_INTRINSIC_CAN_ELIMINATE) -INTRINSIC(load_var_vec3, 0, ARR(), true, 3, 1, 0, - NIR_INTRINSIC_CAN_ELIMINATE) -INTRINSIC(load_var_vec4, 0, ARR(), true, 4, 1, 0, - NIR_INTRINSIC_CAN_ELIMINATE) -INTRINSIC(store_var_vec1, 1, ARR(1), false, 0, 1, 0, 0) -INTRINSIC(store_var_vec2, 1, ARR(2), false, 0, 1, 0, 0) -INTRINSIC(store_var_vec3, 1, ARR(3), false, 0, 1, 0, 0) -INTRINSIC(store_var_vec4, 1, ARR(4), false, 0, 1, 0, 0) -INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0) +INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE) +INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 0, 0) +INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0) /* * a barrier is an intrinsic with no inputs/outputs but which can't be moved @@ -94,27 +84,6 @@ SYSTEM_VALUE(sample_pos, 2) SYSTEM_VALUE(sample_mask_in, 1) SYSTEM_VALUE(invocation_id, 1) -#define LOAD_OR_INTERP(name, num_srcs, src_comps, num_indices, flags) \ - INTRINSIC(name##_vec1, num_srcs, ARR(src_comps), true, 1, \ - 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \ - INTRINSIC(name##_vec2, num_srcs, ARR(src_comps), true, 2, \ - 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \ - INTRINSIC(name##_vec3, num_srcs, ARR(src_comps), true, 3, \ - 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \ - INTRINSIC(name##_vec4, num_srcs, ARR(src_comps), true, 4, \ - 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \ - INTRINSIC(name##_vec1_indirect, 1 + num_srcs, ARR(1, src_comps), true, 1, \ - 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \ - INTRINSIC(name##_vec2_indirect, 1 + num_srcs, ARR(1, src_comps), true, 2, \ - 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \ - INTRINSIC(name##_vec3_indirect, 1 + num_srcs, ARR(1, src_comps), true, 3, \ - 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \ - INTRINSIC(name##_vec4_indirect, 1 + num_srcs, ARR(1, src_comps), true, 4, \ - 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) - -#define LOAD(name, num_indices, flags) \ - LOAD_OR_INTERP(load_##name, 0, 0, num_indices, flags) - /* * The first index is the address to load from, and the second index is the * number of array elements to load. For UBO's (and SSBO's), the first index @@ -129,6 +98,12 @@ SYSTEM_VALUE(invocation_id, 1) * elements begin immediately after the previous array element. */ +#define LOAD(name, num_indices, flags) \ + INTRINSIC(load_##name, 0, ARR(), true, 0, 0, num_indices, \ + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \ + INTRINSIC(load_##name##_indirect, 1, ARR(1), true, 0, 0, num_indices, \ + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \ + LOAD(uniform, 2, NIR_INTRINSIC_CAN_REORDER) LOAD(ubo, 3, NIR_INTRINSIC_CAN_REORDER) LOAD(input, 2, NIR_INTRINSIC_CAN_REORDER) @@ -140,29 +115,16 @@ LOAD(input, 2, NIR_INTRINSIC_CAN_REORDER) * interp_at_offset* intrinsics take a second source that is either a * sample id or a vec2 position offset. */ -#define INTERP(name, flags) \ - LOAD_OR_INTERP(interp_##name, 0, 0, 2, flags) - -#define INTERP_WITH_ARG(name, src_comps, flags) \ - LOAD_OR_INTERP(interp_##name, 1, src_comps, 2, flags) -INTERP(at_centroid, NIR_INTRINSIC_CAN_REORDER) -INTERP_WITH_ARG(at_sample, 1, NIR_INTRINSIC_CAN_REORDER) -INTERP_WITH_ARG(at_offset, 1, NIR_INTRINSIC_CAN_REORDER) +#define INTERP(name, num_srcs, src_comps) \ + INTRINSIC(interp_##name, num_srcs, ARR(src_comps), true, \ + 0, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \ + INTRINSIC(interp_##name##_indirect, 1 + num_srcs, ARR(1, src_comps), true, \ + 0, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -#define STORE(name, num_indices, flags) \ - INTRINSIC(store_##name##_vec1, 1, ARR(1), false, 0, 0, num_indices, flags) \ - INTRINSIC(store_##name##_vec2, 1, ARR(2), false, 0, 0, num_indices, flags) \ - INTRINSIC(store_##name##_vec3, 1, ARR(3), false, 0, 0, num_indices, flags) \ - INTRINSIC(store_##name##_vec4, 1, ARR(4), false, 0, 0, num_indices, flags) \ - INTRINSIC(store_##name##_vec1_indirect, 2, ARR(1, 1), false, 0, 0, \ - num_indices, flags) \ - INTRINSIC(store_##name##_vec2_indirect, 2, ARR(2, 1), false, 0, 0, \ - num_indices, flags) \ - INTRINSIC(store_##name##_vec3_indirect, 2, ARR(3, 1), false, 0, 0, \ - num_indices, flags) \ - INTRINSIC(store_##name##_vec4_indirect, 2, ARR(4, 1), false, 0, 0, \ - num_indices, flags) \ +INTERP(at_centroid, 0, 0) +INTERP(at_sample, 1, 1) +INTERP(at_offset, 1, 1) /* * Stores work the same way as loads, except now the first register input is @@ -170,7 +132,12 @@ INTERP_WITH_ARG(at_offset, 1, NIR_INTRINSIC_CAN_REORDER) * offset. */ +#define STORE(name, num_indices, flags) \ + INTRINSIC(store_##name, 1, ARR(0), false, 0, 0, num_indices, flags) \ + INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \ + num_indices, flags) \ + STORE(output, 2, 0) /* STORE(ssbo, 3, 0) */ -LAST_INTRINSIC(store_output_vec4_indirect) +LAST_INTRINSIC(store_output_indirect) -- cgit v1.2.3