diff options
author | Nicolai Hähnle <[email protected]> | 2016-04-25 18:20:50 -0500 |
---|---|---|
committer | Nicolai Hähnle <[email protected]> | 2016-04-29 11:52:59 -0500 |
commit | 98c348d26b28a662d093543ecb7ca839e7883e8e (patch) | |
tree | bd482743d87e110f06f7d21eb65e7cb01def16b6 /src | |
parent | 59af21c3e991d13ffaf79494ea608a67b7d3e7f0 (diff) |
st/glsl_to_tgsi: reduce stack explosion in recursive expression visitor
In optimized builds, visit(ir_expression *) experiences inlining with gcc that
leads the function to have a roughly 32KB stack frame. This is a problem given
that the function is called recursively. In non-optimized builds, the stack
frame is much smaller, hence one gets crashes that happen only in optimized
builds.
Arguably there is a compiler bug or at least severe misfeature here. In any
case, the easy thing to do for now seems to be moving the bulk of the
non-recursive code into a separate function. This is sufficient to convince my
version of gcc not to blow up the stack frame of the recursive part. Just to be
sure, add the gcc-specific noinline attribute to prevent this bug from
reoccuring if inliner heuristics change.
v2: put ATTRIBUTE_NOINLINE into macros.h
Cc: "11.1 11.2" <[email protected]>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95133
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95026
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92850
Reviewed-by: Ilia Mirkin <[email protected]>
Reviewed-by: Rob Clark <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 20 | ||||
-rw-r--r-- | src/util/macros.h | 6 |
2 files changed, 22 insertions, 4 deletions
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index ad818a8240b..3c4c91b0e2f 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -450,6 +450,8 @@ public: virtual void visit(ir_barrier *); /*@}*/ + void visit_expression(ir_expression *, st_src_reg *) ATTRIBUTE_NOINLINE; + void visit_atomic_counter_intrinsic(ir_call *); void visit_ssbo_intrinsic(ir_call *); void visit_membar_intrinsic(ir_call *); @@ -1535,10 +1537,7 @@ glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, void glsl_to_tgsi_visitor::visit(ir_expression *ir) { - unsigned int operand; st_src_reg op[ARRAY_SIZE(ir->operands)]; - st_src_reg result_src; - st_dst_reg result_dst; /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c) */ @@ -1561,7 +1560,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) if (ir->operation == ir_quadop_vector) assert(!"ir_quadop_vector should have been lowered"); - for (operand = 0; operand < ir->get_num_operands(); operand++) { + for (unsigned int operand = 0; operand < ir->get_num_operands(); operand++) { this->result.file = PROGRAM_UNDEFINED; ir->operands[operand]->accept(this); if (this->result.file == PROGRAM_UNDEFINED) { @@ -1578,6 +1577,19 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) assert(!ir->operands[operand]->type->is_matrix()); } + visit_expression(ir, op); +} + +/* The non-recursive part of the expression visitor lives in a separate + * function and should be prevented from being inlined, to avoid a stack + * explosion when deeply nested expressions are visited. + */ +void +glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) +{ + st_src_reg result_src; + st_dst_reg result_dst; + int vector_elements = ir->operands[0]->type->vector_elements; if (ir->operands[1]) { vector_elements = MAX2(vector_elements, diff --git a/src/util/macros.h b/src/util/macros.h index 773e12ffdeb..c0bfb15a47a 100644 --- a/src/util/macros.h +++ b/src/util/macros.h @@ -214,6 +214,12 @@ do { \ #define MUST_CHECK #endif +#if defined(__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) +#define ATTRIBUTE_NOINLINE __attribute__((noinline)) +#else +#define ATTRIBUTE_NOINLINE +#endif + /** Compute ceiling of integer quotient of A divided by B. */ #define DIV_ROUND_UP( A, B ) ( (A) % (B) == 0 ? (A)/(B) : (A)/(B)+1 ) |