diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 101 |
2 files changed, 42 insertions, 60 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 392b6120f67..a14c2ee7326 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -390,6 +390,7 @@ public: unsigned output_components[BRW_MAX_DRAW_BUFFERS]; fs_reg dual_src_output; int first_non_payload_grf; + /** Either BRW_MAX_GRF or GEN7_MRF_HACK_START */ int max_grf; int urb_setup[FRAG_ATTRIB_MAX]; diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 5fb3db94ecc..747c5fc6293 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -72,13 +72,29 @@ fs_visitor::assign_regs_trivial() } static void -brw_alloc_reg_set_for_classes(struct brw_context *brw, - int *class_sizes, - int class_count, - int reg_width, - int base_reg_count) +brw_alloc_reg_set(struct brw_context *brw, int reg_width, int base_reg_count) { struct intel_context *intel = &brw->intel; + /* The registers used to make up almost all values handled in the compiler + * are a scalar value occupying a single register (or 2 registers in the + * case of 16-wide, which is handled by dividing base_reg_count by 2 and + * multiplying allocated register numbers by 2). Things that were + * aggregates of scalar values at the GLSL level were split to scalar + * values by split_virtual_grfs(). + * + * However, texture SEND messages return a series of contiguous registers. + * We currently always ask for 4 registers, but we may convert that to use + * less some day. + * + * Additionally, on gen5 we need aligned pairs of registers for the PLN + * instruction. + * + * So we have a need for classes for 1, 2, and 4 registers currently, and + * we add in '3' to make indexing the array easier (since we'll probably + * want it for texturing later). + */ + const int class_sizes[4] = {1, 2, 3, 4}; + const int class_count = 4; /* Compute the total number of registers across all classes. */ int ra_reg_count = 0; @@ -139,7 +155,6 @@ brw_alloc_reg_set_for_classes(struct brw_context *brw, pairs_base_reg + i); } } - class_count++; } ra_set_finalize(brw->wm.regs, NULL); @@ -158,70 +173,36 @@ fs_visitor::assign_regs() int hw_reg_mapping[this->virtual_grf_count]; int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width); int base_reg_count = (max_grf - first_assigned_grf) / reg_width; - int class_sizes[base_reg_count]; - int class_count = 0; calculate_live_intervals(); - /* Set up the register classes. - * - * The base registers store a scalar value. For texture samples, - * we get virtual GRFs composed of 4 contiguous hw register. For - * structures and arrays, we store them as contiguous larger things - * than that, though we should be able to do better most of the - * time. - */ - class_sizes[class_count++] = 1; - if (brw->has_pln && intel->gen < 6) { - /* Always set up the (unaligned) pairs for gen5, so we can find - * them for making the aligned pair class. - */ - class_sizes[class_count++] = 2; - } - for (int r = 0; r < this->virtual_grf_count; r++) { - int i; - - for (i = 0; i < class_count; i++) { - if (class_sizes[i] == this->virtual_grf_sizes[r]) - break; - } - if (i == class_count) { - if (this->virtual_grf_sizes[r] >= base_reg_count) { - fail("Object too large to register allocate.\n"); - } - - class_sizes[class_count++] = this->virtual_grf_sizes[r]; - } - } - - brw_alloc_reg_set_for_classes(brw, class_sizes, class_count, - reg_width, base_reg_count); + brw_alloc_reg_set(brw, reg_width, base_reg_count); struct ra_graph *g = ra_alloc_interference_graph(brw->wm.regs, this->virtual_grf_count); for (int i = 0; i < this->virtual_grf_count; i++) { - for (int c = 0; c < class_count; c++) { - if (class_sizes[c] == this->virtual_grf_sizes[i]) { - /* Special case: on pre-GEN6 hardware that supports PLN, the - * second operand of a PLN instruction needs to be an - * even-numbered register, so we have a special register class - * wm_aligned_pairs_class to handle this case. pre-GEN6 always - * uses this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] as the - * second operand of a PLN instruction (since it doesn't support - * any other interpolation modes). So all we need to do is find - * that register and set it to the appropriate class. - */ - if (brw->wm.aligned_pairs_class >= 0 && - this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg == i) { - ra_set_node_class(g, i, brw->wm.aligned_pairs_class); - } else { - ra_set_node_class(g, i, brw->wm.classes[c]); - } - break; - } + assert(this->virtual_grf_sizes[i] >= 1 && + this->virtual_grf_sizes[i] <= 4 && + "Register allocation relies on split_virtual_grfs()"); + int c = brw->wm.classes[this->virtual_grf_sizes[i] - 1]; + + /* Special case: on pre-GEN6 hardware that supports PLN, the + * second operand of a PLN instruction needs to be an + * even-numbered register, so we have a special register class + * wm_aligned_pairs_class to handle this case. pre-GEN6 always + * uses this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] as the + * second operand of a PLN instruction (since it doesn't support + * any other interpolation modes). So all we need to do is find + * that register and set it to the appropriate class. + */ + if (brw->wm.aligned_pairs_class >= 0 && + this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg == i) { + c = brw->wm.aligned_pairs_class; } + ra_set_node_class(g, i, c); + for (int j = 0; j < i; j++) { if (virtual_grf_interferes(i, j)) { ra_add_node_interference(g, i, j); |