summaryrefslogtreecommitdiffstats
path: root/src/compiler/nir
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2016-04-13 20:25:39 -0700
committerJason Ekstrand <[email protected]>2016-04-13 20:25:39 -0700
commit12f88ba32a14ea79134f4e995a55149f078a2f27 (patch)
tree9070861dced23d0ad7dbec598bfd96b686eb7bf1 /src/compiler/nir
parent79fbec30fc16399ede9385ef52cb62cefbb388f4 (diff)
parent171a570f388b2895d14f6d5418c99573cffd6369 (diff)
Merge remote-tracking branch 'public/master' into vulkan
Diffstat (limited to 'src/compiler/nir')
-rw-r--r--src/compiler/nir/Makefile.sources87
-rw-r--r--src/compiler/nir/glsl_to_nir.cpp112
-rw-r--r--src/compiler/nir/nir.c18
-rw-r--r--src/compiler/nir/nir.h48
-rw-r--r--src/compiler/nir/nir_algebraic.py1
-rw-r--r--src/compiler/nir/nir_builder.h4
-rw-r--r--src/compiler/nir/nir_clone.c7
-rw-r--r--src/compiler/nir/nir_control_flow.c6
-rw-r--r--src/compiler/nir/nir_from_ssa.c2
-rw-r--r--src/compiler/nir/nir_instr_set.c22
-rw-r--r--src/compiler/nir/nir_lower_alu_to_scalar.c3
-rw-r--r--src/compiler/nir/nir_lower_atomics.c5
-rw-r--r--src/compiler/nir/nir_lower_double_packing.c95
-rw-r--r--src/compiler/nir/nir_lower_indirect_derefs.c12
-rw-r--r--src/compiler/nir/nir_lower_io.c12
-rw-r--r--src/compiler/nir/nir_lower_load_const_to_scalar.c9
-rw-r--r--src/compiler/nir/nir_lower_locals_to_regs.c3
-rw-r--r--src/compiler/nir/nir_lower_to_source_mods.c5
-rw-r--r--src/compiler/nir/nir_lower_vars_to_ssa.c4
-rw-r--r--src/compiler/nir/nir_lower_vec_to_movs.c1
-rw-r--r--src/compiler/nir/nir_opcodes.py73
-rw-r--r--src/compiler/nir/nir_opt_algebraic.py45
-rw-r--r--src/compiler/nir/nir_opt_constant_folding.c4
-rw-r--r--src/compiler/nir/nir_opt_dce.c2
-rw-r--r--src/compiler/nir/nir_phi_builder.c3
-rw-r--r--src/compiler/nir/nir_print.c19
-rw-r--r--src/compiler/nir/nir_remove_dead_variables.c14
-rw-r--r--src/compiler/nir/nir_search.c5
-rw-r--r--src/compiler/nir/nir_split_var_copies.c2
-rw-r--r--src/compiler/nir/nir_to_ssa.c3
-rw-r--r--src/compiler/nir/nir_validate.c3
-rw-r--r--src/compiler/nir/spirv/spirv_to_nir.c4
32 files changed, 414 insertions, 219 deletions
diff --git a/src/compiler/nir/Makefile.sources b/src/compiler/nir/Makefile.sources
deleted file mode 100644
index e6367d9c282..00000000000
--- a/src/compiler/nir/Makefile.sources
+++ /dev/null
@@ -1,87 +0,0 @@
-NIR_GENERATED_FILES = \
- nir_builder_opcodes.h \
- nir_constant_expressions.c \
- nir_opcodes.c \
- nir_opcodes.h \
- nir_opt_algebraic.c
-
-NIR_FILES = \
- glsl_to_nir.cpp \
- glsl_to_nir.h \
- nir.c \
- nir.h \
- nir_array.h \
- nir_builder.h \
- nir_clone.c \
- nir_constant_expressions.h \
- nir_control_flow.c \
- nir_control_flow.h \
- nir_control_flow_private.h \
- nir_dominance.c \
- nir_from_ssa.c \
- nir_gather_info.c \
- nir_gs_count_vertices.c \
- nir_inline_functions.c \
- nir_instr_set.c \
- nir_instr_set.h \
- nir_intrinsics.c \
- nir_intrinsics.h \
- nir_liveness.c \
- nir_lower_alu_to_scalar.c \
- nir_lower_atomics.c \
- nir_lower_clip.c \
- nir_lower_global_vars_to_local.c \
- nir_lower_gs_intrinsics.c \
- nir_lower_load_const_to_scalar.c \
- nir_lower_locals_to_regs.c \
- nir_lower_idiv.c \
- nir_lower_indirect_derefs.c \
- nir_lower_io.c \
- nir_lower_outputs_to_temporaries.c \
- nir_lower_phis_to_scalar.c \
- nir_lower_returns.c \
- nir_lower_samplers.c \
- nir_lower_system_values.c \
- nir_lower_tex.c \
- nir_lower_to_source_mods.c \
- nir_lower_two_sided_color.c \
- nir_lower_vars_to_ssa.c \
- nir_lower_var_copies.c \
- nir_lower_vec_to_movs.c \
- nir_metadata.c \
- nir_move_vec_src_uses_to_dest.c \
- nir_normalize_cubemap_coords.c \
- nir_opt_constant_folding.c \
- nir_opt_copy_propagate.c \
- nir_opt_cse.c \
- nir_opt_dce.c \
- nir_opt_dead_cf.c \
- nir_opt_gcm.c \
- nir_opt_global_to_local.c \
- nir_opt_peephole_select.c \
- nir_opt_remove_phis.c \
- nir_opt_undef.c \
- nir_phi_builder.c \
- nir_phi_builder.h \
- nir_print.c \
- nir_remove_dead_variables.c \
- nir_repair_ssa.c \
- nir_search.c \
- nir_search.h \
- nir_split_var_copies.c \
- nir_sweep.c \
- nir_to_ssa.c \
- nir_validate.c \
- nir_vla.h \
- nir_worklist.c \
- nir_worklist.h
-
-SPIRV_FILES = \
- spirv/nir_spirv.h \
- spirv/spirv_to_nir.c \
- spirv/vtn_alu.c \
- spirv/vtn_cfg.c \
- spirv/vtn_glsl450.c \
- spirv/vtn_private.h \
- spirv/vtn_variables.c
-
diff --git a/src/compiler/nir/glsl_to_nir.cpp b/src/compiler/nir/glsl_to_nir.cpp
index 14affeee8ac..d4c58a9ba2e 100644
--- a/src/compiler/nir/glsl_to_nir.cpp
+++ b/src/compiler/nir/glsl_to_nir.cpp
@@ -73,7 +73,7 @@ public:
void create_function(ir_function_signature *ir);
private:
- void add_instr(nir_instr *instr, unsigned num_components);
+ void add_instr(nir_instr *instr, unsigned num_components, unsigned bit_size);
nir_ssa_def *evaluate_rvalue(ir_rvalue *ir);
nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def **srcs);
@@ -257,6 +257,11 @@ constant_copy(ir_constant *ir, void *mem_ctx)
ret->value.f[i] = ir->value.f[i];
break;
+ case GLSL_TYPE_DOUBLE:
+ for (i = 0; i < total_elems; i++)
+ ret->value.d[i] = ir->value.d[i];
+ break;
+
case GLSL_TYPE_BOOL:
for (i = 0; i < total_elems; i++)
ret->value.b[i] = ir->value.b[i];
@@ -736,7 +741,7 @@ nir_visitor::visit(ir_call *ir)
case nir_intrinsic_image_samples:
case nir_intrinsic_image_size: {
nir_ssa_undef_instr *instr_undef =
- nir_ssa_undef_instr_create(shader, 1);
+ nir_ssa_undef_instr_create(shader, 1, 32);
nir_builder_instr_insert(&b, &instr_undef->instr);
/* Set the image variable dereference. */
@@ -854,8 +859,9 @@ nir_visitor::visit(ir_call *ir)
instr->num_components = type->vector_elements;
/* Setup destination register */
+ unsigned bit_size = glsl_get_bit_size(type->base_type);
nir_ssa_dest_init(&instr->instr, &instr->dest,
- type->vector_elements, 32, NULL);
+ type->vector_elements, bit_size, NULL);
/* Insert the created nir instruction now since in the case of boolean
* result we will need to emit another instruction after it
@@ -878,7 +884,7 @@ nir_visitor::visit(ir_call *ir)
load_ssbo_compare->src[1].swizzle[i] = 0;
nir_ssa_dest_init(&load_ssbo_compare->instr,
&load_ssbo_compare->dest.dest,
- type->vector_elements, 32, NULL);
+ type->vector_elements, bit_size, NULL);
load_ssbo_compare->dest.write_mask = (1 << type->vector_elements) - 1;
nir_builder_instr_insert(&b, &load_ssbo_compare->instr);
dest = &load_ssbo_compare->dest.dest;
@@ -1152,12 +1158,13 @@ get_instr_dest(nir_instr *instr)
}
void
-nir_visitor::add_instr(nir_instr *instr, unsigned num_components)
+nir_visitor::add_instr(nir_instr *instr, unsigned num_components,
+ unsigned bit_size)
{
nir_dest *dest = get_instr_dest(instr);
if (dest)
- nir_ssa_dest_init(instr, dest, num_components, 32, NULL);
+ nir_ssa_dest_init(instr, dest, num_components, bit_size, NULL);
nir_builder_instr_insert(&b, instr);
@@ -1182,12 +1189,19 @@ nir_visitor::evaluate_rvalue(ir_rvalue* ir)
load_instr->num_components = ir->type->vector_elements;
load_instr->variables[0] = this->deref_head;
ralloc_steal(load_instr, load_instr->variables[0]);
- add_instr(&load_instr->instr, ir->type->vector_elements);
+ unsigned bit_size = glsl_get_bit_size(ir->type->base_type);
+ add_instr(&load_instr->instr, ir->type->vector_elements, bit_size);
}
return this->result;
}
+static bool
+type_is_float(glsl_base_type type)
+{
+ return type == GLSL_TYPE_FLOAT || type == GLSL_TYPE_DOUBLE;
+}
+
void
nir_visitor::visit(ir_expression *ir)
{
@@ -1196,11 +1210,11 @@ nir_visitor::visit(ir_expression *ir)
case ir_binop_ubo_load: {
nir_intrinsic_instr *load =
nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_ubo);
+ unsigned bit_size = glsl_get_bit_size(ir->type->base_type);
load->num_components = ir->type->vector_elements;
- load->dest.ssa.bit_size = glsl_get_bit_size(ir->type->base_type);
load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0]));
load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1]));
- add_instr(&load->instr, ir->type->vector_elements);
+ add_instr(&load->instr, ir->type->vector_elements, bit_size);
/*
* In UBO's, a true boolean value is any non-zero value, but we consider
@@ -1265,7 +1279,8 @@ nir_visitor::visit(ir_expression *ir)
intrin->intrinsic == nir_intrinsic_interp_var_at_sample)
intrin->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1]));
- add_instr(&intrin->instr, deref->type->vector_elements);
+ unsigned bit_size = glsl_get_bit_size(deref->type->base_type);
+ add_instr(&intrin->instr, deref->type->vector_elements, bit_size);
if (swizzle) {
unsigned swiz[4] = {
@@ -1306,20 +1321,20 @@ nir_visitor::visit(ir_expression *ir)
result = supports_ints ? nir_inot(&b, srcs[0]) : nir_fnot(&b, srcs[0]);
break;
case ir_unop_neg:
- result = (types[0] == GLSL_TYPE_FLOAT) ? nir_fneg(&b, srcs[0])
- : nir_ineg(&b, srcs[0]);
+ result = type_is_float(types[0]) ? nir_fneg(&b, srcs[0])
+ : nir_ineg(&b, srcs[0]);
break;
case ir_unop_abs:
- result = (types[0] == GLSL_TYPE_FLOAT) ? nir_fabs(&b, srcs[0])
- : nir_iabs(&b, srcs[0]);
+ result = type_is_float(types[0]) ? nir_fabs(&b, srcs[0])
+ : nir_iabs(&b, srcs[0]);
break;
case ir_unop_saturate:
- assert(types[0] == GLSL_TYPE_FLOAT);
+ assert(type_is_float(types[0]));
result = nir_fsat(&b, srcs[0]);
break;
case ir_unop_sign:
- result = (types[0] == GLSL_TYPE_FLOAT) ? nir_fsign(&b, srcs[0])
- : nir_isign(&b, srcs[0]);
+ result = type_is_float(types[0]) ? nir_fsign(&b, srcs[0])
+ : nir_isign(&b, srcs[0]);
break;
case ir_unop_rcp: result = nir_frcp(&b, srcs[0]); break;
case ir_unop_rsq: result = nir_frsq(&b, srcs[0]); break;
@@ -1342,6 +1357,19 @@ nir_visitor::visit(ir_expression *ir)
case ir_unop_f2b: result = nir_f2b(&b, srcs[0]); break;
case ir_unop_i2b: result = nir_i2b(&b, srcs[0]); break;
case ir_unop_b2i: result = nir_b2i(&b, srcs[0]); break;
+ case ir_unop_d2f: result = nir_d2f(&b, srcs[0]); break;
+ case ir_unop_f2d: result = nir_f2d(&b, srcs[0]); break;
+ case ir_unop_d2i: result = nir_d2i(&b, srcs[0]); break;
+ case ir_unop_d2u: result = nir_d2u(&b, srcs[0]); break;
+ case ir_unop_d2b: result = nir_d2b(&b, srcs[0]); break;
+ case ir_unop_i2d:
+ assert(supports_ints);
+ result = nir_i2d(&b, srcs[0]);
+ break;
+ case ir_unop_u2d:
+ assert(supports_ints);
+ result = nir_u2d(&b, srcs[0]);
+ break;
case ir_unop_i2u:
case ir_unop_u2i:
case ir_unop_bitcast_i2f:
@@ -1395,6 +1423,12 @@ nir_visitor::visit(ir_expression *ir)
case ir_unop_unpack_half_2x16:
result = nir_unpack_half_2x16(&b, srcs[0]);
break;
+ case ir_unop_pack_double_2x32:
+ result = nir_pack_double_2x32(&b, srcs[0]);
+ break;
+ case ir_unop_unpack_double_2x32:
+ result = nir_unpack_double_2x32(&b, srcs[0]);
+ break;
case ir_unop_bitfield_reverse:
result = nir_bitfield_reverse(&b, srcs[0]);
break;
@@ -1465,24 +1499,25 @@ nir_visitor::visit(ir_expression *ir)
nir_intrinsic_get_buffer_size);
load->num_components = ir->type->vector_elements;
load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0]));
- add_instr(&load->instr, ir->type->vector_elements);
+ unsigned bit_size = glsl_get_bit_size(ir->type->base_type);
+ add_instr(&load->instr, ir->type->vector_elements, bit_size);
return;
}
case ir_binop_add:
- result = (out_type == GLSL_TYPE_FLOAT) ? nir_fadd(&b, srcs[0], srcs[1])
- : nir_iadd(&b, srcs[0], srcs[1]);
+ result = type_is_float(out_type) ? nir_fadd(&b, srcs[0], srcs[1])
+ : nir_iadd(&b, srcs[0], srcs[1]);
break;
case ir_binop_sub:
- result = (out_type == GLSL_TYPE_FLOAT) ? nir_fsub(&b, srcs[0], srcs[1])
- : nir_isub(&b, srcs[0], srcs[1]);
+ result = type_is_float(out_type) ? nir_fsub(&b, srcs[0], srcs[1])
+ : nir_isub(&b, srcs[0], srcs[1]);
break;
case ir_binop_mul:
- result = (out_type == GLSL_TYPE_FLOAT) ? nir_fmul(&b, srcs[0], srcs[1])
- : nir_imul(&b, srcs[0], srcs[1]);
+ result = type_is_float(out_type) ? nir_fmul(&b, srcs[0], srcs[1])
+ : nir_imul(&b, srcs[0], srcs[1]);
break;
case ir_binop_div:
- if (out_type == GLSL_TYPE_FLOAT)
+ if (type_is_float(out_type))
result = nir_fdiv(&b, srcs[0], srcs[1]);
else if (out_type == GLSL_TYPE_INT)
result = nir_idiv(&b, srcs[0], srcs[1]);
@@ -1490,11 +1525,11 @@ nir_visitor::visit(ir_expression *ir)
result = nir_udiv(&b, srcs[0], srcs[1]);
break;
case ir_binop_mod:
- result = (out_type == GLSL_TYPE_FLOAT) ? nir_fmod(&b, srcs[0], srcs[1])
- : nir_umod(&b, srcs[0], srcs[1]);
+ result = type_is_float(out_type) ? nir_fmod(&b, srcs[0], srcs[1])
+ : nir_umod(&b, srcs[0], srcs[1]);
break;
case ir_binop_min:
- if (out_type == GLSL_TYPE_FLOAT)
+ if (type_is_float(out_type))
result = nir_fmin(&b, srcs[0], srcs[1]);
else if (out_type == GLSL_TYPE_INT)
result = nir_imin(&b, srcs[0], srcs[1]);
@@ -1502,7 +1537,7 @@ nir_visitor::visit(ir_expression *ir)
result = nir_umin(&b, srcs[0], srcs[1]);
break;
case ir_binop_max:
- if (out_type == GLSL_TYPE_FLOAT)
+ if (type_is_float(out_type))
result = nir_fmax(&b, srcs[0], srcs[1]);
else if (out_type == GLSL_TYPE_INT)
result = nir_imax(&b, srcs[0], srcs[1]);
@@ -1538,7 +1573,7 @@ nir_visitor::visit(ir_expression *ir)
case ir_binop_borrow: result = nir_usub_borrow(&b, srcs[0], srcs[1]); break;
case ir_binop_less:
if (supports_ints) {
- if (types[0] == GLSL_TYPE_FLOAT)
+ if (type_is_float(types[0]))
result = nir_flt(&b, srcs[0], srcs[1]);
else if (types[0] == GLSL_TYPE_INT)
result = nir_ilt(&b, srcs[0], srcs[1]);
@@ -1550,7 +1585,7 @@ nir_visitor::visit(ir_expression *ir)
break;
case ir_binop_greater:
if (supports_ints) {
- if (types[0] == GLSL_TYPE_FLOAT)
+ if (type_is_float(types[0]))
result = nir_flt(&b, srcs[1], srcs[0]);
else if (types[0] == GLSL_TYPE_INT)
result = nir_ilt(&b, srcs[1], srcs[0]);
@@ -1562,7 +1597,7 @@ nir_visitor::visit(ir_expression *ir)
break;
case ir_binop_lequal:
if (supports_ints) {
- if (types[0] == GLSL_TYPE_FLOAT)
+ if (type_is_float(types[0]))
result = nir_fge(&b, srcs[1], srcs[0]);
else if (types[0] == GLSL_TYPE_INT)
result = nir_ige(&b, srcs[1], srcs[0]);
@@ -1574,7 +1609,7 @@ nir_visitor::visit(ir_expression *ir)
break;
case ir_binop_gequal:
if (supports_ints) {
- if (types[0] == GLSL_TYPE_FLOAT)
+ if (type_is_float(types[0]))
result = nir_fge(&b, srcs[0], srcs[1]);
else if (types[0] == GLSL_TYPE_INT)
result = nir_ige(&b, srcs[0], srcs[1]);
@@ -1586,7 +1621,7 @@ nir_visitor::visit(ir_expression *ir)
break;
case ir_binop_equal:
if (supports_ints) {
- if (types[0] == GLSL_TYPE_FLOAT)
+ if (type_is_float(types[0]))
result = nir_feq(&b, srcs[0], srcs[1]);
else
result = nir_ieq(&b, srcs[0], srcs[1]);
@@ -1596,7 +1631,7 @@ nir_visitor::visit(ir_expression *ir)
break;
case ir_binop_nequal:
if (supports_ints) {
- if (types[0] == GLSL_TYPE_FLOAT)
+ if (type_is_float(types[0]))
result = nir_fne(&b, srcs[0], srcs[1]);
else
result = nir_ine(&b, srcs[0], srcs[1]);
@@ -1606,7 +1641,7 @@ nir_visitor::visit(ir_expression *ir)
break;
case ir_binop_all_equal:
if (supports_ints) {
- if (types[0] == GLSL_TYPE_FLOAT) {
+ if (type_is_float(types[0])) {
switch (ir->operands[0]->type->vector_elements) {
case 1: result = nir_feq(&b, srcs[0], srcs[1]); break;
case 2: result = nir_ball_fequal2(&b, srcs[0], srcs[1]); break;
@@ -1638,7 +1673,7 @@ nir_visitor::visit(ir_expression *ir)
break;
case ir_binop_any_nequal:
if (supports_ints) {
- if (types[0] == GLSL_TYPE_FLOAT) {
+ if (type_is_float(types[0])) {
switch (ir->operands[0]->type->vector_elements) {
case 1: result = nir_fne(&b, srcs[0], srcs[1]); break;
case 2: result = nir_bany_fnequal2(&b, srcs[0], srcs[1]); break;
@@ -1902,7 +1937,8 @@ nir_visitor::visit(ir_texture *ir)
assert(src_number == num_srcs);
- add_instr(&instr->instr, nir_tex_instr_dest_size(instr));
+ unsigned bit_size = glsl_get_bit_size(ir->type->base_type);
+ add_instr(&instr->instr, nir_tex_instr_dest_size(instr), bit_size);
}
void
diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
index b67916dc86b..8d38d3384d8 100644
--- a/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@@ -469,12 +469,13 @@ nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
}
nir_load_const_instr *
-nir_load_const_instr_create(nir_shader *shader, unsigned num_components)
+nir_load_const_instr_create(nir_shader *shader, unsigned num_components,
+ unsigned bit_size)
{
nir_load_const_instr *instr = ralloc(shader, nir_load_const_instr);
instr_init(&instr->instr, nir_instr_type_load_const);
- nir_ssa_def_init(&instr->instr, &instr->def, num_components, 32, NULL);
+ nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size, NULL);
return instr;
}
@@ -558,12 +559,14 @@ nir_parallel_copy_instr_create(nir_shader *shader)
}
nir_ssa_undef_instr *
-nir_ssa_undef_instr_create(nir_shader *shader, unsigned num_components)
+nir_ssa_undef_instr_create(nir_shader *shader,
+ unsigned num_components,
+ unsigned bit_size)
{
nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr);
instr_init(&instr->instr, nir_instr_type_ssa_undef);
- nir_ssa_def_init(&instr->instr, &instr->def, num_components, 32, NULL);
+ nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size, NULL);
return instr;
}
@@ -691,8 +694,10 @@ nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref)
tail = tail->child;
}
+ unsigned bit_size = glsl_get_bit_size(glsl_get_base_type(tail->type));
nir_load_const_instr *load =
- nir_load_const_instr_create(shader, glsl_get_vector_elements(tail->type));
+ nir_load_const_instr_create(shader, glsl_get_vector_elements(tail->type),
+ bit_size);
matrix_offset *= load->def.num_components;
for (unsigned i = 0; i < load->def.num_components; i++) {
@@ -702,6 +707,9 @@ nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref)
case GLSL_TYPE_UINT:
load->value.u32[i] = constant->value.u[matrix_offset + i];
break;
+ case GLSL_TYPE_DOUBLE:
+ load->value.f64[i] = constant->value.d[matrix_offset + i];
+ break;
case GLSL_TYPE_BOOL:
load->value.u32[i] = constant->value.b[matrix_offset + i] ?
NIR_TRUE : NIR_FALSE;
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 8e45cba5a16..c3a33431239 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -81,16 +81,16 @@ typedef struct {
} nir_state_slot;
typedef enum {
- nir_var_all = -1,
- nir_var_shader_in,
- nir_var_shader_out,
- nir_var_global,
- nir_var_local,
- nir_var_uniform,
- nir_var_shader_storage,
- nir_var_system_value,
- nir_var_param,
- nir_var_shared,
+ nir_var_shader_in = (1 << 0),
+ nir_var_shader_out = (1 << 1),
+ nir_var_global = (1 << 2),
+ nir_var_local = (1 << 3),
+ nir_var_uniform = (1 << 4),
+ nir_var_shader_storage = (1 << 5),
+ nir_var_system_value = (1 << 6),
+ nir_var_param = (1 << 7),
+ nir_var_shared = (1 << 8),
+ nir_var_all = ~0,
} nir_variable_mode;
/**
@@ -156,6 +156,12 @@ typedef struct nir_variable {
char *name;
struct nir_variable_data {
+ /**
+ * Storage class of the variable.
+ *
+ * \sa nir_variable_mode
+ */
+ nir_variable_mode mode;
/**
* Is the variable read-only?
@@ -170,13 +176,6 @@ typedef struct nir_variable {
unsigned invariant:1;
/**
- * Storage class of the variable.
- *
- * \sa nir_variable_mode
- */
- nir_variable_mode mode:5;
-
- /**
* Interpolation mode for shader inputs / outputs
*
* \sa glsl_interp_qualifier
@@ -1857,7 +1856,8 @@ nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op);
nir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type);
nir_load_const_instr *nir_load_const_instr_create(nir_shader *shader,
- unsigned num_components);
+ unsigned num_components,
+ unsigned bit_size);
nir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader,
nir_intrinsic_op op);
@@ -1872,7 +1872,8 @@ nir_phi_instr *nir_phi_instr_create(nir_shader *shader);
nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader);
nir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader,
- unsigned num_components);
+ unsigned num_components,
+ unsigned bit_size);
nir_deref_var *nir_deref_var_create(void *mem_ctx, nir_variable *var);
nir_deref_array *nir_deref_array_create(void *mem_ctx);
@@ -2208,12 +2209,13 @@ void nir_lower_var_copies(nir_shader *shader);
bool nir_lower_global_vars_to_local(nir_shader *shader);
-bool nir_lower_indirect_derefs(nir_shader *shader, uint32_t mode_mask);
+bool nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes);
bool nir_lower_locals_to_regs(nir_shader *shader);
void nir_lower_outputs_to_temporaries(nir_shader *shader,
nir_function *entrypoint);
+void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint);
void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint);
@@ -2222,14 +2224,14 @@ void nir_assign_var_locations(struct exec_list *var_list,
int (*type_size)(const struct glsl_type *));
void nir_lower_io(nir_shader *shader,
- nir_variable_mode mode,
+ nir_variable_mode modes,
int (*type_size)(const struct glsl_type *));
nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr);
nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr);
void nir_lower_vars_to_ssa(nir_shader *shader);
-bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode mode);
+bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes);
void nir_move_vec_src_uses_to_dest(nir_shader *shader);
bool nir_lower_vec_to_movs(nir_shader *shader);
@@ -2305,6 +2307,8 @@ void nir_lower_to_source_mods(nir_shader *shader);
bool nir_lower_gs_intrinsics(nir_shader *shader);
+void nir_lower_double_pack(nir_shader *shader);
+
bool nir_normalize_cubemap_coords(nir_shader *shader);
void nir_live_ssa_defs_impl(nir_function_impl *impl);
diff --git a/src/compiler/nir/nir_algebraic.py b/src/compiler/nir/nir_algebraic.py
index d05564f779c..53a79073a44 100644
--- a/src/compiler/nir/nir_algebraic.py
+++ b/src/compiler/nir/nir_algebraic.py
@@ -291,6 +291,7 @@ ${pass_name}(nir_shader *shader)
bool progress = false;
bool condition_flags[${len(condition_list)}];
const nir_shader_compiler_options *options = shader->options;
+ (void) options;
% for index, condition in enumerate(condition_list):
condition_flags[${index}] = ${condition};
diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h
index 3dc7c25ec28..29b13fb222f 100644
--- a/src/compiler/nir/nir_builder.h
+++ b/src/compiler/nir/nir_builder.h
@@ -78,7 +78,7 @@ static inline nir_ssa_def *
nir_ssa_undef(nir_builder *build, unsigned num_components, unsigned bit_size)
{
nir_ssa_undef_instr *undef =
- nir_ssa_undef_instr_create(build->shader, num_components);
+ nir_ssa_undef_instr_create(build->shader, num_components, bit_size);
undef->def.bit_size = bit_size;
if (!undef)
return NULL;
@@ -92,7 +92,7 @@ static inline nir_ssa_def *
nir_build_imm(nir_builder *build, unsigned num_components, nir_const_value value)
{
nir_load_const_instr *load_const =
- nir_load_const_instr_create(build->shader, num_components);
+ nir_load_const_instr_create(build->shader, num_components, 32);
if (!load_const)
return NULL;
diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c
index 7d2e3835258..e231387c889 100644
--- a/src/compiler/nir/nir_clone.c
+++ b/src/compiler/nir/nir_clone.c
@@ -179,6 +179,7 @@ clone_register(clone_state *state, const nir_register *reg)
add_remap(state, nreg, reg);
nreg->num_components = reg->num_components;
+ nreg->bit_size = reg->bit_size;
nreg->num_array_elems = reg->num_array_elems;
nreg->index = reg->index;
nreg->name = ralloc_strdup(nreg, reg->name);
@@ -359,7 +360,8 @@ static nir_load_const_instr *
clone_load_const(clone_state *state, const nir_load_const_instr *lc)
{
nir_load_const_instr *nlc =
- nir_load_const_instr_create(state->ns, lc->def.num_components);
+ nir_load_const_instr_create(state->ns, lc->def.num_components,
+ lc->def.bit_size);
memcpy(&nlc->value, &lc->value, sizeof(nlc->value));
@@ -372,7 +374,8 @@ static nir_ssa_undef_instr *
clone_ssa_undef(clone_state *state, const nir_ssa_undef_instr *sa)
{
nir_ssa_undef_instr *nsa =
- nir_ssa_undef_instr_create(state->ns, sa->def.num_components);
+ nir_ssa_undef_instr_create(state->ns, sa->def.num_components,
+ sa->def.bit_size);
add_remap(state, &nsa->def, &sa->def);
diff --git a/src/compiler/nir/nir_control_flow.c b/src/compiler/nir/nir_control_flow.c
index 33b06d0cc84..ea5741288ce 100644
--- a/src/compiler/nir/nir_control_flow.c
+++ b/src/compiler/nir/nir_control_flow.c
@@ -281,7 +281,8 @@ insert_phi_undef(nir_block *block, nir_block *pred)
nir_phi_instr *phi = nir_instr_as_phi(instr);
nir_ssa_undef_instr *undef =
nir_ssa_undef_instr_create(ralloc_parent(phi),
- phi->dest.ssa.num_components);
+ phi->dest.ssa.num_components,
+ phi->dest.ssa.bit_size);
nir_instr_insert_before_cf_list(&impl->body, &undef->instr);
nir_phi_src *src = ralloc(phi, nir_phi_src);
src->pred = pred;
@@ -691,7 +692,8 @@ replace_ssa_def_uses(nir_ssa_def *def, void *void_impl)
void *mem_ctx = ralloc_parent(impl);
nir_ssa_undef_instr *undef =
- nir_ssa_undef_instr_create(mem_ctx, def->num_components);
+ nir_ssa_undef_instr_create(mem_ctx, def->num_components,
+ def->bit_size);
nir_instr_insert_before_cf_list(&impl->body, &undef->instr);
nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(&undef->def));
return true;
diff --git a/src/compiler/nir/nir_from_ssa.c b/src/compiler/nir/nir_from_ssa.c
index 82317c21b62..7bbc2c0f299 100644
--- a/src/compiler/nir/nir_from_ssa.c
+++ b/src/compiler/nir/nir_from_ssa.c
@@ -474,6 +474,7 @@ rewrite_ssa_def(nir_ssa_def *def, void *void_state)
node->set->reg = nir_local_reg_create(state->impl);
node->set->reg->name = def->name;
node->set->reg->num_components = def->num_components;
+ node->set->reg->bit_size = def->bit_size;
node->set->reg->num_array_elems = 0;
}
@@ -491,6 +492,7 @@ rewrite_ssa_def(nir_ssa_def *def, void *void_state)
reg = nir_local_reg_create(state->impl);
reg->name = def->name;
reg->num_components = def->num_components;
+ reg->bit_size = def->bit_size;
reg->num_array_elems = 0;
}
diff --git a/src/compiler/nir/nir_instr_set.c b/src/compiler/nir/nir_instr_set.c
index e244122e466..c6161433516 100644
--- a/src/compiler/nir/nir_instr_set.c
+++ b/src/compiler/nir/nir_instr_set.c
@@ -52,6 +52,7 @@ hash_alu(uint32_t hash, const nir_alu_instr *instr)
{
hash = HASH(hash, instr->op);
hash = HASH(hash, instr->dest.dest.ssa.num_components);
+ hash = HASH(hash, instr->dest.dest.ssa.bit_size);
/* We explicitly don't hash instr->dest.dest.exact */
if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
@@ -82,9 +83,8 @@ hash_load_const(uint32_t hash, const nir_load_const_instr *instr)
{
hash = HASH(hash, instr->def.num_components);
- hash = _mesa_fnv32_1a_accumulate_block(hash, instr->value.f32,
- instr->def.num_components
- * sizeof(instr->value.f32[0]));
+ unsigned size = instr->def.num_components * (instr->def.bit_size / 8);
+ hash = _mesa_fnv32_1a_accumulate_block(hash, instr->value.f32, size);
return hash;
}
@@ -126,8 +126,10 @@ hash_intrinsic(uint32_t hash, const nir_intrinsic_instr *instr)
const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
hash = HASH(hash, instr->intrinsic);
- if (info->has_dest)
+ if (info->has_dest) {
hash = HASH(hash, instr->dest.ssa.num_components);
+ hash = HASH(hash, instr->dest.ssa.bit_size);
+ }
assert(info->num_variables == 0);
@@ -268,6 +270,9 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2)
if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components)
return false;
+ if (alu1->dest.dest.ssa.bit_size != alu2->dest.dest.ssa.bit_size)
+ return false;
+
/* We explicitly don't hash instr->dest.dest.exact */
if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
@@ -325,8 +330,11 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2)
if (load1->def.num_components != load2->def.num_components)
return false;
+ if (load1->def.bit_size != load2->def.bit_size)
+ return false;
+
return memcmp(load1->value.f32, load2->value.f32,
- load1->def.num_components * sizeof(*load2->value.f32)) == 0;
+ load1->def.num_components * (load1->def.bit_size / 8)) == 0;
}
case nir_instr_type_phi: {
nir_phi_instr *phi1 = nir_instr_as_phi(instr1);
@@ -362,6 +370,10 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2)
intrinsic2->dest.ssa.num_components)
return false;
+ if (info->has_dest && intrinsic1->dest.ssa.bit_size !=
+ intrinsic2->dest.ssa.bit_size)
+ return false;
+
for (unsigned i = 0; i < info->num_srcs; i++) {
if (!nir_srcs_equal(intrinsic1->src[i], intrinsic2->src[i]))
return false;
diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c
index e8ba640fe0b..1548abbd558 100644
--- a/src/compiler/nir/nir_lower_alu_to_scalar.c
+++ b/src/compiler/nir/nir_lower_alu_to_scalar.c
@@ -187,6 +187,9 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
return;
}
+ case nir_op_unpack_double_2x32:
+ return;
+
LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd);
LOWER_REDUCTION(nir_op_ball_fequal, nir_op_feq, nir_op_iand);
LOWER_REDUCTION(nir_op_ball_iequal, nir_op_ieq, nir_op_iand);
diff --git a/src/compiler/nir/nir_lower_atomics.c b/src/compiler/nir/nir_lower_atomics.c
index 70381a7968a..b2ea31888f8 100644
--- a/src/compiler/nir/nir_lower_atomics.c
+++ b/src/compiler/nir/nir_lower_atomics.c
@@ -74,7 +74,8 @@ lower_instr(nir_intrinsic_instr *instr,
nir_intrinsic_set_base(new_instr,
state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index);
- nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1);
+ nir_load_const_instr *offset_const =
+ nir_load_const_instr_create(mem_ctx, 1, 32);
offset_const->value.u32[0] = instr->variables[0]->var->data.offset;
nir_instr_insert_before(&instr->instr, &offset_const->instr);
@@ -95,7 +96,7 @@ lower_instr(nir_intrinsic_instr *instr,
if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
nir_load_const_instr *atomic_counter_size =
- nir_load_const_instr_create(mem_ctx, 1);
+ nir_load_const_instr_create(mem_ctx, 1, 32);
atomic_counter_size->value.u32[0] = child_array_elements * ATOMIC_COUNTER_SIZE;
nir_instr_insert_before(&instr->instr, &atomic_counter_size->instr);
diff --git a/src/compiler/nir/nir_lower_double_packing.c b/src/compiler/nir/nir_lower_double_packing.c
new file mode 100644
index 00000000000..d43683d2007
--- /dev/null
+++ b/src/compiler/nir/nir_lower_double_packing.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+/*
+ * lowers:
+ *
+ * packDouble2x32(foo) -> packDouble2x32Split(foo.x, foo.y)
+ * unpackDouble2x32(foo) -> vec2(unpackDouble2x32_x(foo), unpackDouble2x32_y(foo))
+ */
+
+static nir_ssa_def *
+lower_pack_double(nir_builder *b, nir_ssa_def *src)
+{
+ return nir_pack_double_2x32_split(b, nir_channel(b, src, 0),
+ nir_channel(b, src, 1));
+}
+
+static nir_ssa_def *
+lower_unpack_double(nir_builder *b, nir_ssa_def *src)
+{
+ return nir_vec2(b, nir_unpack_double_2x32_split_x(b, src),
+ nir_unpack_double_2x32_split_y(b, src));
+}
+
+static bool
+lower_double_pack_block(nir_block *block, void *ctx)
+{
+ nir_builder *b = (nir_builder *) ctx;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_alu)
+ continue;
+
+ nir_alu_instr *alu_instr = (nir_alu_instr *) instr;
+
+ if (alu_instr->op != nir_op_pack_double_2x32 &&
+ alu_instr->op != nir_op_unpack_double_2x32)
+ continue;
+
+ b->cursor = nir_before_instr(&alu_instr->instr);
+
+ nir_ssa_def *src = nir_ssa_for_alu_src(b, alu_instr, 0);
+ nir_ssa_def *dest =
+ alu_instr->op == nir_op_pack_double_2x32 ?
+ lower_pack_double(b, src) :
+ lower_unpack_double(b, src);
+
+ nir_ssa_def_rewrite_uses(&alu_instr->dest.dest.ssa, nir_src_for_ssa(dest));
+ nir_instr_remove(&alu_instr->instr);
+ }
+
+ return true;
+}
+
+static void
+lower_double_pack_impl(nir_function_impl *impl)
+{
+ nir_builder b;
+ nir_builder_init(&b, impl);
+ nir_foreach_block(impl, lower_double_pack_block, &b);
+}
+
+void
+nir_lower_double_pack(nir_shader *shader)
+{
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ lower_double_pack_impl(function->impl);
+ }
+}
+
diff --git a/src/compiler/nir/nir_lower_indirect_derefs.c b/src/compiler/nir/nir_lower_indirect_derefs.c
index 62b8c84a956..a69dd612565 100644
--- a/src/compiler/nir/nir_lower_indirect_derefs.c
+++ b/src/compiler/nir/nir_lower_indirect_derefs.c
@@ -161,7 +161,7 @@ deref_has_indirect(nir_deref_var *deref)
struct lower_indirect_state {
nir_builder builder;
- uint32_t mode_mask;
+ nir_variable_mode modes;
bool progress;
};
@@ -183,7 +183,7 @@ lower_indirect_block(nir_block *block, void *void_state)
continue;
/* Only lower variables whose mode is in the mask */
- if (!(state->mode_mask & (1 << intrin->variables[0]->var->data.mode)))
+ if (!(state->modes & intrin->variables[0]->var->data.mode))
continue;
state->builder.cursor = nir_before_instr(&intrin->instr);
@@ -206,12 +206,12 @@ lower_indirect_block(nir_block *block, void *void_state)
}
static bool
-lower_indirects_impl(nir_function_impl *impl, uint32_t mode_mask)
+lower_indirects_impl(nir_function_impl *impl, nir_variable_mode modes)
{
struct lower_indirect_state state;
state.progress = false;
- state.mode_mask = mode_mask;
+ state.modes = modes;
nir_builder_init(&state.builder, impl);
nir_foreach_block(impl, lower_indirect_block, &state);
@@ -228,13 +228,13 @@ lower_indirects_impl(nir_function_impl *impl, uint32_t mode_mask)
* that does a binary search on the array index.
*/
bool
-nir_lower_indirect_derefs(nir_shader *shader, uint32_t mode_mask)
+nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes)
{
bool progress = false;
nir_foreach_function(shader, function) {
if (function->impl)
- progress = lower_indirects_impl(function->impl, mode_mask) || progress;
+ progress = lower_indirects_impl(function->impl, modes) || progress;
}
return progress;
diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c
index a30061d3bf0..369a8ee537e 100644
--- a/src/compiler/nir/nir_lower_io.c
+++ b/src/compiler/nir/nir_lower_io.c
@@ -38,7 +38,7 @@ struct lower_io_state {
nir_builder builder;
void *mem_ctx;
int (*type_size)(const struct glsl_type *type);
- nir_variable_mode mode;
+ nir_variable_mode modes;
};
void
@@ -245,7 +245,7 @@ nir_lower_io_block(nir_block *block, void *void_state)
nir_variable_mode mode = intrin->variables[0]->var->data.mode;
- if (state->mode != nir_var_all && state->mode != mode)
+ if ((state->modes & mode) == 0)
continue;
if (mode != nir_var_shader_in &&
@@ -393,14 +393,14 @@ nir_lower_io_block(nir_block *block, void *void_state)
static void
nir_lower_io_impl(nir_function_impl *impl,
- nir_variable_mode mode,
+ nir_variable_mode modes,
int (*type_size)(const struct glsl_type *))
{
struct lower_io_state state;
nir_builder_init(&state.builder, impl);
state.mem_ctx = ralloc_parent(impl);
- state.mode = mode;
+ state.modes = modes;
state.type_size = type_size;
nir_foreach_block(impl, nir_lower_io_block, &state);
@@ -410,12 +410,12 @@ nir_lower_io_impl(nir_function_impl *impl,
}
void
-nir_lower_io(nir_shader *shader, nir_variable_mode mode,
+nir_lower_io(nir_shader *shader, nir_variable_mode modes,
int (*type_size)(const struct glsl_type *))
{
nir_foreach_function(shader, function) {
if (function->impl)
- nir_lower_io_impl(function->impl, mode, type_size);
+ nir_lower_io_impl(function->impl, modes, type_size);
}
}
diff --git a/src/compiler/nir/nir_lower_load_const_to_scalar.c b/src/compiler/nir/nir_lower_load_const_to_scalar.c
index b5df46413f1..db5865fb0c0 100644
--- a/src/compiler/nir/nir_lower_load_const_to_scalar.c
+++ b/src/compiler/nir/nir_lower_load_const_to_scalar.c
@@ -48,8 +48,13 @@ lower_load_const_instr_scalar(nir_load_const_instr *lower)
/* Emit the individual loads. */
nir_ssa_def *loads[4];
for (unsigned i = 0; i < lower->def.num_components; i++) {
- nir_load_const_instr *load_comp = nir_load_const_instr_create(b.shader, 1);
- load_comp->value.u32[0] = lower->value.u32[i];
+ nir_load_const_instr *load_comp =
+ nir_load_const_instr_create(b.shader, 1, lower->def.bit_size);
+ if (lower->def.bit_size == 64)
+ load_comp->value.f64[0] = lower->value.f64[i];
+ else
+ load_comp->value.u32[0] = lower->value.u32[i];
+ assert(lower->def.bit_size == 64 || lower->def.bit_size == 32);
nir_builder_instr_insert(&b, &load_comp->instr);
loads[i] = &load_comp->def;
}
diff --git a/src/compiler/nir/nir_lower_locals_to_regs.c b/src/compiler/nir/nir_lower_locals_to_regs.c
index 0438802d3b2..111bfdd2e33 100644
--- a/src/compiler/nir/nir_lower_locals_to_regs.c
+++ b/src/compiler/nir/nir_lower_locals_to_regs.c
@@ -119,6 +119,7 @@ get_reg_for_deref(nir_deref_var *deref, struct locals_to_regs_state *state)
nir_register *reg = nir_local_reg_create(state->impl);
reg->num_components = glsl_get_vector_elements(tail->type);
reg->num_array_elems = array_size > 1 ? array_size : 0;
+ reg->bit_size = glsl_get_bit_size(glsl_get_base_type(tail->type));
_mesa_hash_table_insert_pre_hashed(state->regs_table, hash, deref, reg);
nir_array_add(&state->derefs_array, nir_deref_var *, deref);
@@ -160,7 +161,7 @@ get_deref_reg_src(nir_deref_var *deref, nir_instr *instr,
if (src.reg.indirect) {
nir_load_const_instr *load_const =
- nir_load_const_instr_create(state->shader, 1);
+ nir_load_const_instr_create(state->shader, 1, 32);
load_const->value.u32[0] = glsl_get_length(parent_type);
nir_instr_insert_before(instr, &load_const->instr);
diff --git a/src/compiler/nir/nir_lower_to_source_mods.c b/src/compiler/nir/nir_lower_to_source_mods.c
index 6c4e1f0d3f3..1e8c3c2a130 100644
--- a/src/compiler/nir/nir_lower_to_source_mods.c
+++ b/src/compiler/nir/nir_lower_to_source_mods.c
@@ -54,7 +54,7 @@ nir_lower_to_source_mods_block(nir_block *block, void *state)
if (parent->dest.saturate)
continue;
- switch (nir_op_infos[alu->op].input_types[i]) {
+ switch (nir_alu_type_get_base_type(nir_op_infos[alu->op].input_types[i])) {
case nir_type_float:
if (parent->op != nir_op_fmov)
continue;
@@ -128,7 +128,8 @@ nir_lower_to_source_mods_block(nir_block *block, void *state)
continue;
/* We can only saturate float destinations */
- if (nir_op_infos[alu->op].output_type != nir_type_float)
+ if (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) !=
+ nir_type_float)
continue;
if (!list_empty(&alu->dest.dest.ssa.if_uses))
diff --git a/src/compiler/nir/nir_lower_vars_to_ssa.c b/src/compiler/nir/nir_lower_vars_to_ssa.c
index 9f9e454c198..249c3892335 100644
--- a/src/compiler/nir/nir_lower_vars_to_ssa.c
+++ b/src/compiler/nir/nir_lower_vars_to_ssa.c
@@ -504,8 +504,8 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state)
*/
nir_ssa_undef_instr *undef =
nir_ssa_undef_instr_create(state->shader,
- intrin->num_components);
- undef->def.bit_size = intrin->dest.ssa.bit_size;
+ intrin->num_components,
+ intrin->dest.ssa.bit_size);
nir_instr_insert_before(&intrin->instr, &undef->instr);
nir_instr_remove(&intrin->instr);
diff --git a/src/compiler/nir/nir_lower_vec_to_movs.c b/src/compiler/nir/nir_lower_vec_to_movs.c
index f51cede3920..9e40b84e6e3 100644
--- a/src/compiler/nir/nir_lower_vec_to_movs.c
+++ b/src/compiler/nir/nir_lower_vec_to_movs.c
@@ -240,6 +240,7 @@ lower_vec_to_movs_block(nir_block *block, void *void_state)
/* Since we insert multiple MOVs, we have a register destination. */
nir_register *reg = nir_local_reg_create(impl);
reg->num_components = vec->dest.dest.ssa.num_components;
+ reg->bit_size = vec->dest.dest.ssa.bit_size;
nir_ssa_def_rewrite_uses(&vec->dest.dest.ssa, nir_src_for_reg(reg));
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index d6b658dbfc8..e75ca28cf0e 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -95,6 +95,7 @@ tuint = "uint"
tfloat32 = "float32"
tint32 = "int32"
tuint32 = "uint32"
+tuint64 = "uint64"
tfloat64 = "float64"
commutative = "commutative "
@@ -161,15 +162,23 @@ unop("fexp2", tfloat, "exp2f(src0)")
unop("flog2", tfloat, "log2f(src0)")
unop_convert("f2i", tint32, tfloat32, "src0") # Float-to-integer conversion.
unop_convert("f2u", tuint32, tfloat32, "src0") # Float-to-unsigned conversion
+unop_convert("d2i", tint32, tfloat64, "src0") # Double-to-integer conversion.
+unop_convert("d2u", tuint32, tfloat64, "src0") # Double-to-unsigned conversion.
unop_convert("i2f", tfloat32, tint32, "src0") # Integer-to-float conversion.
+unop_convert("i2d", tfloat64, tint32, "src0") # Integer-to-double conversion.
# Float-to-boolean conversion
unop_convert("f2b", tbool, tfloat32, "src0 != 0.0f")
+unop_convert("d2b", tbool, tfloat64, "src0 != 0.0")
# Boolean-to-float conversion
unop_convert("b2f", tfloat32, tbool, "src0 ? 1.0f : 0.0f")
# Int-to-boolean conversion
unop_convert("i2b", tbool, tint32, "src0 != 0")
unop_convert("b2i", tint32, tbool, "src0 ? 1 : 0") # Boolean-to-int conversion
unop_convert("u2f", tfloat32, tuint32, "src0") # Unsigned-to-float conversion.
+unop_convert("u2d", tfloat64, tuint32, "src0") # Unsigned-to-double conversion.
+# double-to-float conversion
+unop_convert("d2f", tfloat32, tfloat64, "src0") # Single to double precision
+unop_convert("f2d", tfloat64, tfloat32, "src0") # Double to single precision
# Unary floating-point rounding operations.
@@ -253,6 +262,34 @@ dst.x = (src0.x << 0) |
(src0.w << 24);
""")
+unop_horiz("pack_double_2x32", 1, tuint64, 2, tuint32, """
+union {
+ uint64_t u64;
+ struct {
+ uint32_t i1;
+ uint32_t i2;
+ };
+} di;
+
+di.i1 = src0.x;
+di.i2 = src0.y;
+dst.x = di.u64;
+""")
+
+unop_horiz("unpack_double_2x32", 2, tuint32, 1, tuint64, """
+union {
+ uint64_t u64;
+ struct {
+ uint32_t i1;
+ uint32_t i2;
+ };
+} di;
+
+di.u64 = src0.x;
+dst.x = di.i1;
+dst.y = di.i2;
+""")
+
# Lowered floating point unpacking operations.
@@ -261,6 +298,29 @@ unop_horiz("unpack_half_2x16_split_x", 1, tfloat32, 1, tuint32,
unop_horiz("unpack_half_2x16_split_y", 1, tfloat32, 1, tuint32,
"unpack_half_1x16((uint16_t)(src0.x >> 16))")
+unop_convert("unpack_double_2x32_split_x", tuint32, tuint64, """
+union {
+ uint64_t u64;
+ struct {
+ uint32_t x;
+ uint32_t y;
+ };
+} di;
+di.u64 = src0;
+dst = di.x;
+""")
+
+unop_convert("unpack_double_2x32_split_y", tuint32, tuint64, """
+union {
+ uint64_t u64;
+ struct {
+ uint32_t x;
+ uint32_t y;
+ };
+} di;
+di.u64 = src0;
+dst = di.y;
+""")
# Bit operations, part of ARB_gpu_shader5.
@@ -540,6 +600,19 @@ binop("fpow", tfloat, "", "bit_size == 64 ? powf(src0, src1) : pow(src0, src1)")
binop_horiz("pack_half_2x16_split", 1, tuint32, 1, tfloat32, 1, tfloat32,
"pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)")
+binop_convert("pack_double_2x32_split", tuint64, tuint32, "", """
+union {
+ uint64_t u64;
+ struct {
+ uint32_t x;
+ uint32_t y;
+ };
+} di;
+di.x = src0;
+di.y = src1;
+dst = di.u64;
+""")
+
# bfm implements the behavior of the first operation of the SM5 "bfi" assembly
# and that of the "bfi1" i965 instruction. That is, it has undefined behavior
# if either of its arguments are 32.
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index ddfe94d9e73..dd41931b345 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -138,7 +138,10 @@ optimizations = [
(('~fmax', ('fmin', a, 1.0), 0.0), ('fsat', a), '!options->lower_fsat'),
(('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'),
(('fsat', ('fsat', a)), ('fsat', a)),
- (('fmin', ('fmax', ('fmin', ('fmax', a, 0.0), 1.0), 0.0), 1.0), ('fmin', ('fmax', a, 0.0), 1.0)),
+ (('fmin', ('fmax', ('fmin', ('fmax', a, b), c), b), c), ('fmin', ('fmax', a, b), c)),
+ (('imin', ('imax', ('imin', ('imax', a, b), c), b), c), ('imin', ('imax', a, b), c)),
+ (('umin', ('umax', ('umin', ('umax', a, b), c), b), c), ('umin', ('umax', a, b), c)),
+ (('extract_u8', ('imin', ('imax', a, 0), 0xff), 0), ('imin', ('imax', a, 0), 0xff)),
(('~ior', ('flt', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))),
(('~ior', ('flt', a, c), ('flt', b, c)), ('flt', ('fmin', a, b), c)),
(('~ior', ('fge', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))),
@@ -275,6 +278,14 @@ optimizations = [
(('fmul', ('fneg', a), b), ('fneg', ('fmul', a, b))),
(('imul', ('ineg', a), b), ('ineg', ('imul', a, b))),
+ # Reassociate constants in add/mul chains so they can be folded together.
+ # For now, we only handle cases where the constants are separated by
+ # a single non-constant. We could do better eventually.
+ (('~fmul', '#a', ('fmul', b, '#c')), ('fmul', ('fmul', a, c), b)),
+ (('imul', '#a', ('imul', b, '#c')), ('imul', ('imul', a, c), b)),
+ (('~fadd', '#a', ('fadd', b, '#c')), ('fadd', ('fadd', a, c), b)),
+ (('iadd', '#a', ('iadd', b, '#c')), ('iadd', ('iadd', a, c), b)),
+
# Misc. lowering
(('fmod', a, b), ('fsub', a, ('fmul', b, ('ffloor', ('fdiv', a, b)))), 'options->lower_fmod'),
(('frem', a, b), ('fsub', a, ('fmul', b, ('ftrunc', ('fdiv', a, b)))), 'options->lower_fmod'),
@@ -362,26 +373,30 @@ optimizations = [
]
def fexp2i(exp):
- # We assume that exp is already in range.
+ # We assume that exp is already in the range [-126, 127].
return ('ishl', ('iadd', exp, 127), 23)
def ldexp32(f, exp):
- # First, we clamp exp to a reasonable range. The maximum range that we
- # need is the largest range for an exponent, ([-127, 128] if you include
- # inf and 0) plus the number of mantissa bits in either direction to
- # account for denormals. This means that we need at least a range of
- # [-150, 151]. For our implementation, however, what we really care
- # about is that neither exp/2 nor exp-exp/2 go out of the regular range
- # for floating-point exponents.
+ # First, we clamp exp to a reasonable range. The maximum possible range
+ # for a normal exponent is [-126, 127] and, throwing in denormals, you get
+ # a maximum range of [-149, 127]. This means that we can potentially have
+ # a swing of +-276. If you start with FLT_MAX, you actually have to do
+ # ldexp(FLT_MAX, -278) to get it to flush all the way to zero. The GLSL
+ # spec, on the other hand, only requires that we handle an exponent value
+ # in the range [-126, 128]. This implementation is *mostly* correct; it
+ # handles a range on exp of [-252, 254] which allows you to create any
+ # value (including denorms if the hardware supports it) and to adjust the
+ # exponent of any normal value to anything you want.
exp = ('imin', ('imax', exp, -252), 254)
# Now we compute two powers of 2, one for exp/2 and one for exp-exp/2.
- # While the spec technically defines ldexp as f * 2.0^exp, simply
- # multiplying once doesn't work when denormals are involved because
- # 2.0^exp may not be representable even though ldexp(f, exp) is (see
- # comments above about range). Instead, we create two powers of two and
- # multiply by them each in turn. That way the effective range of our
- # exponent is doubled.
+ # (We use ishr which isn't the same for -1, but the -1 case still works
+ # since we use exp-exp/2 as the second exponent.) While the spec
+ # technically defines ldexp as f * 2.0^exp, simply multiplying once doesn't
+ # work with denormals and doesn't allow for the full swing in exponents
+ # that you can get with normalized values. Instead, we create two powers
+ # of two and multiply by them each in turn. That way the effective range
+ # of our exponent is doubled.
pow2_1 = fexp2i(('ishr', exp, 1))
pow2_2 = fexp2i(('isub', exp, ('ishr', exp, 1)))
return ('fmul', ('fmul', f, pow2_1), pow2_2)
diff --git a/src/compiler/nir/nir_opt_constant_folding.c b/src/compiler/nir/nir_opt_constant_folding.c
index e64ca369bbc..caa4231b188 100644
--- a/src/compiler/nir/nir_opt_constant_folding.c
+++ b/src/compiler/nir/nir_opt_constant_folding.c
@@ -98,9 +98,9 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
nir_load_const_instr *new_instr =
nir_load_const_instr_create(mem_ctx,
- instr->dest.dest.ssa.num_components);
+ instr->dest.dest.ssa.num_components,
+ instr->dest.dest.ssa.bit_size);
- new_instr->def.bit_size = instr->dest.dest.ssa.bit_size;
new_instr->value = dest;
nir_instr_insert_before(&instr->instr, &new_instr->instr);
diff --git a/src/compiler/nir/nir_opt_dce.c b/src/compiler/nir/nir_opt_dce.c
index 32436c18b60..cab09dfffc3 100644
--- a/src/compiler/nir/nir_opt_dce.c
+++ b/src/compiler/nir/nir_opt_dce.c
@@ -71,7 +71,7 @@ init_instr(nir_instr *instr, struct exec_list *worklist)
nir_tex_instr *tex_instr;
/* We use the pass_flags to store the live/dead information. In DCE, we
- * just treat it as a zero/non-zerl boolean for whether or not the
+ * just treat it as a zero/non-zero boolean for whether or not the
* instruction is live.
*/
instr->pass_flags = 0;
diff --git a/src/compiler/nir/nir_phi_builder.c b/src/compiler/nir/nir_phi_builder.c
index a39e3606fd5..1f1388a73dd 100644
--- a/src/compiler/nir/nir_phi_builder.c
+++ b/src/compiler/nir/nir_phi_builder.c
@@ -195,7 +195,8 @@ nir_phi_builder_value_get_block_def(struct nir_phi_builder_value *val,
*/
nir_ssa_undef_instr *undef =
nir_ssa_undef_instr_create(val->builder->shader,
- val->num_components);
+ val->num_components,
+ val->bit_size);
nir_instr_insert(nir_before_cf_list(&val->builder->impl->body),
&undef->instr);
val->defs[block->index] = &undef->def;
diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
index 17ae3681e21..2793020953e 100644
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@@ -29,6 +29,7 @@
#include "compiler/shader_enums.h"
#include <stdio.h>
#include <stdlib.h>
+#include <inttypes.h> /* for PRIx64 macro */
static void
print_tabs(unsigned num_tabs, FILE *fp)
@@ -68,7 +69,7 @@ static void
print_register_decl(nir_register *reg, print_state *state)
{
FILE *fp = state->fp;
- fprintf(fp, "decl_reg %s ", sizes[reg->num_components]);
+ fprintf(fp, "decl_reg %s %u ", sizes[reg->num_components], reg->bit_size);
if (reg->is_packed)
fprintf(fp, "(packed) ");
print_register(reg, state);
@@ -83,7 +84,8 @@ print_ssa_def(nir_ssa_def *def, print_state *state)
FILE *fp = state->fp;
if (def->name != NULL)
fprintf(fp, "/* %s */ ", def->name);
- fprintf(fp, "%s ssa_%u", sizes[def->num_components], def->index);
+ fprintf(fp, "%s %u ssa_%u", sizes[def->num_components], def->bit_size,
+ def->index);
}
static void
@@ -279,6 +281,13 @@ print_constant(nir_constant *c, const struct glsl_type *type, print_state *state
}
break;
+ case GLSL_TYPE_DOUBLE:
+ for (i = 0; i < total_elems; i++) {
+ if (i > 0) fprintf(fp, ", ");
+ fprintf(fp, "%f", c->value.d[i]);
+ }
+ break;
+
case GLSL_TYPE_STRUCT:
for (i = 0; i < c->num_elements; i++) {
if (i > 0) fprintf(fp, ", ");
@@ -716,7 +725,11 @@ print_load_const_instr(nir_load_const_instr *instr, print_state *state)
* and then print the float in a comment for readability.
*/
- fprintf(fp, "0x%08x /* %f */", instr->value.u32[i], instr->value.f32[i]);
+ if (instr->def.bit_size == 64)
+ fprintf(fp, "0x%16" PRIx64 " /* %f */", instr->value.u64[i],
+ instr->value.f64[i]);
+ else
+ fprintf(fp, "0x%08x /* %f */", instr->value.u32[i], instr->value.f32[i]);
}
fprintf(fp, ")");
diff --git a/src/compiler/nir/nir_remove_dead_variables.c b/src/compiler/nir/nir_remove_dead_variables.c
index ad69de85b97..7395805d7a2 100644
--- a/src/compiler/nir/nir_remove_dead_variables.c
+++ b/src/compiler/nir/nir_remove_dead_variables.c
@@ -120,7 +120,7 @@ remove_dead_vars(struct exec_list *var_list, struct set *live)
}
bool
-nir_remove_dead_variables(nir_shader *shader, nir_variable_mode mode)
+nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes)
{
bool progress = false;
struct set *live =
@@ -128,22 +128,22 @@ nir_remove_dead_variables(nir_shader *shader, nir_variable_mode mode)
add_var_use_shader(shader, live);
- if (mode == nir_var_uniform || mode == nir_var_all)
+ if (modes & nir_var_uniform)
progress = remove_dead_vars(&shader->uniforms, live) || progress;
- if (mode == nir_var_shader_in || mode == nir_var_all)
+ if (modes & nir_var_shader_in)
progress = remove_dead_vars(&shader->inputs, live) || progress;
- if (mode == nir_var_shader_out || mode == nir_var_all)
+ if (modes & nir_var_shader_out)
progress = remove_dead_vars(&shader->outputs, live) || progress;
- if (mode == nir_var_global || mode == nir_var_all)
+ if (modes & nir_var_global)
progress = remove_dead_vars(&shader->globals, live) || progress;
- if (mode == nir_var_system_value || mode == nir_var_all)
+ if (modes & nir_var_system_value)
progress = remove_dead_vars(&shader->system_values, live) || progress;
- if (mode == nir_var_local || mode == nir_var_all) {
+ if (modes & nir_var_local) {
nir_foreach_function(shader, function) {
if (function->impl) {
if (remove_dead_vars(&function->impl->locals, live)) {
diff --git a/src/compiler/nir/nir_search.c b/src/compiler/nir/nir_search.c
index 3a65ab18928..dc53a9063c4 100644
--- a/src/compiler/nir/nir_search.c
+++ b/src/compiler/nir/nir_search.c
@@ -477,7 +477,8 @@ construct_value(const nir_search_value *value,
case nir_search_value_constant: {
const nir_search_constant *c = nir_search_value_as_constant(value);
- nir_load_const_instr *load = nir_load_const_instr_create(mem_ctx, 1);
+ nir_load_const_instr *load =
+ nir_load_const_instr_create(mem_ctx, 1, bitsize->dest_size);
switch (c->type) {
case nir_type_float:
@@ -528,8 +529,6 @@ construct_value(const nir_search_value *value,
unreachable("Invalid alu source type");
}
- load->def.bit_size = bitsize->dest_size;
-
nir_instr_insert_before(instr, &load->instr);
nir_alu_src val;
diff --git a/src/compiler/nir/nir_split_var_copies.c b/src/compiler/nir/nir_split_var_copies.c
index 6fdaefa32c8..2b011077a7c 100644
--- a/src/compiler/nir/nir_split_var_copies.c
+++ b/src/compiler/nir/nir_split_var_copies.c
@@ -149,6 +149,7 @@ split_var_copy_instr(nir_intrinsic_instr *old_copy,
case GLSL_TYPE_UINT:
case GLSL_TYPE_INT:
case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_BOOL:
if (glsl_type_is_matrix(src_tail->type)) {
nir_deref_array *deref = nir_deref_array_create(state->dead_ctx);
@@ -231,6 +232,7 @@ split_var_copies_block(nir_block *block, void *void_state)
ralloc_steal(state->dead_ctx, instr);
break;
case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_INT:
case GLSL_TYPE_UINT:
case GLSL_TYPE_BOOL:
diff --git a/src/compiler/nir/nir_to_ssa.c b/src/compiler/nir/nir_to_ssa.c
index d588d7d2df3..23d709a218a 100644
--- a/src/compiler/nir/nir_to_ssa.c
+++ b/src/compiler/nir/nir_to_ssa.c
@@ -160,7 +160,8 @@ static nir_ssa_def *get_ssa_src(nir_register *reg, rewrite_state *state)
* to preserve the information that this source is undefined
*/
nir_ssa_undef_instr *instr =
- nir_ssa_undef_instr_create(state->mem_ctx, reg->num_components);
+ nir_ssa_undef_instr_create(state->mem_ctx, reg->num_components,
+ reg->bit_size);
/*
* We could just insert the undefined instruction before the instruction
diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c
index 9f18d1c33e4..3c3306c75fb 100644
--- a/src/compiler/nir/nir_validate.c
+++ b/src/compiler/nir/nir_validate.c
@@ -903,6 +903,9 @@ validate_var_decl(nir_variable *var, bool is_global, validate_state *state)
{
assert(is_global == nir_variable_is_global(var));
+ /* Must have exactly one mode set */
+ assert(util_bitcount(var->data.mode) == 1);
+
/*
* TODO validate some things ir_validate.cpp does (requires more GLSL type
* support)
diff --git a/src/compiler/nir/spirv/spirv_to_nir.c b/src/compiler/nir/spirv/spirv_to_nir.c
index 948454494fa..99514b49650 100644
--- a/src/compiler/nir/spirv/spirv_to_nir.c
+++ b/src/compiler/nir/spirv/spirv_to_nir.c
@@ -86,7 +86,7 @@ vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant,
if (glsl_type_is_vector_or_scalar(type)) {
unsigned num_components = glsl_get_vector_elements(val->type);
nir_load_const_instr *load =
- nir_load_const_instr_create(b->shader, num_components);
+ nir_load_const_instr_create(b->shader, num_components, 32);
for (unsigned i = 0; i < num_components; i++)
load->value.u32[i] = constant->value.u[i];
@@ -103,7 +103,7 @@ vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant,
struct vtn_ssa_value *col_val = rzalloc(b, struct vtn_ssa_value);
col_val->type = glsl_get_column_type(val->type);
nir_load_const_instr *load =
- nir_load_const_instr_create(b->shader, rows);
+ nir_load_const_instr_create(b->shader, rows, 32);
for (unsigned j = 0; j < rows; j++)
load->value.u32[j] = constant->value.u[rows * i + j];