summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2011-05-04 12:50:16 -0700
committerEric Anholt <[email protected]>2011-08-16 13:04:41 -0700
commita070d5f363e99b0f846d555e9ca3a74ec807fdc0 (patch)
treea2dd5e72abec48b8c721a3cfd4ebf068dfcc46bc
parentaf3c9803d818fd33139f1247a387d64b967b8992 (diff)
i965/vs: Start adding support for uniforms
There's no clever packing here, no pull constants, and no array support.
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h22
-rw-r--r--src/mesa/drivers/dri/i965/brw_curbe.c27
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h5
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_emit.cpp73
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp150
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen6_vs_state.c21
-rw-r--r--src/mesa/drivers/dri/i965/gen6_wm_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen7_wm_state.c2
9 files changed, 256 insertions, 48 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 7b6b64c1a5c..4a1abd6252e 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -248,6 +248,7 @@ enum param_conversion {
PARAM_CONVERT_F2I,
PARAM_CONVERT_F2U,
PARAM_CONVERT_F2B,
+ PARAM_CONVERT_ZERO,
};
/* Data about a particular attempt to compile a program. Note that
@@ -317,6 +318,13 @@ struct brw_vs_prog_data {
/* Used for calculating urb partitions:
*/
GLuint urb_entry_size;
+
+ const float *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */
+ enum param_conversion param_convert[MAX_UNIFORMS * 4];
+ const float *pull_param[MAX_UNIFORMS * 4];
+ enum param_conversion pull_param_convert[MAX_UNIFORMS * 4];
+
+ bool uses_new_param_layout;
};
@@ -898,7 +906,7 @@ brw_fragment_program_const(const struct gl_fragment_program *p)
}
static inline
-float convert_param(enum param_conversion conversion, float param)
+float convert_param(enum param_conversion conversion, const float *param)
{
union {
float f;
@@ -908,21 +916,23 @@ float convert_param(enum param_conversion conversion, float param)
switch (conversion) {
case PARAM_NO_CONVERT:
- return param;
+ return *param;
case PARAM_CONVERT_F2I:
- fi.i = param;
+ fi.i = *param;
return fi.f;
case PARAM_CONVERT_F2U:
- fi.u = param;
+ fi.u = *param;
return fi.f;
case PARAM_CONVERT_F2B:
- if (param != 0.0)
+ if (*param != 0.0)
fi.i = 1;
else
fi.i = 0;
return fi.f;
+ case PARAM_CONVERT_ZERO:
+ return 0.0;
default:
- return param;
+ return *param;
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index ae11c487a2c..960be10006e 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -203,7 +203,7 @@ static void prepare_constant_buffer(struct brw_context *brw)
/* copy float constants */
for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
buf[offset + i] = convert_param(brw->wm.prog_data->param_convert[i],
- *brw->wm.prog_data->param[i]);
+ brw->wm.prog_data->param[i]);
}
}
@@ -244,15 +244,22 @@ static void prepare_constant_buffer(struct brw_context *brw)
GLuint offset = brw->curbe.vs_start * 16;
GLuint nr = brw->vs.prog_data->nr_params / 4;
- /* Load the subset of push constants that will get used when
- * we also have a pull constant buffer.
- */
- for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
- if (brw->vs.constant_map[i] != -1) {
- assert(brw->vs.constant_map[i] <= nr);
- memcpy(buf + offset + brw->vs.constant_map[i] * 4,
- vp->program.Base.Parameters->ParameterValues[i],
- 4 * sizeof(float));
+ if (brw->vs.prog_data->uses_new_param_layout) {
+ for (i = 0; i < brw->vs.prog_data->nr_params; i++) {
+ buf[offset + i] = convert_param(brw->vs.prog_data->param_convert[i],
+ brw->vs.prog_data->param[i]);
+ }
+ } else {
+ /* Load the subset of push constants that will get used when
+ * we also have a pull constant buffer.
+ */
+ for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
+ if (brw->vs.constant_map[i] != -1) {
+ assert(brw->vs.constant_map[i] <= nr);
+ memcpy(buf + offset + brw->vs.constant_map[i] * 4,
+ vp->program.Base.Parameters->ParameterValues[i],
+ 4 * sizeof(float));
+ }
}
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 10168fc1cb0..01058243f04 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -356,6 +356,8 @@ public:
* for the ir->location's used.
*/
dst_reg output_reg[VERT_RESULT_MAX];
+ int uniform_size[MAX_UNIFORMS];
+ int uniforms;
struct hash_table *variable_ht;
@@ -363,7 +365,10 @@ public:
void fail(const char *msg, ...);
int virtual_grf_alloc(int size);
+ int setup_uniform_values(int loc, const glsl_type *type);
+ void setup_builtin_uniform_values(ir_variable *ir);
int setup_attributes(int payload_reg);
+ int setup_uniforms(int payload_reg);
void setup_payload();
void reg_allocate_trivial();
void reg_allocate();
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index bdc7a79d83d..1f2853e1118 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -67,20 +67,12 @@ vec4_visitor::setup_attributes(int payload_reg)
prog_data->urb_read_length = (nr_attributes + 1) / 2;
- return nr_attributes;
+ return payload_reg + nr_attributes;
}
-void
-vec4_visitor::setup_payload(void)
+int
+vec4_visitor::setup_uniforms(int reg)
{
- int reg = 0;
-
- /* r0 is always reserved, as it contains the payload with the URB
- * handles that are passed on to the URB write at the end of the
- * thread.
- */
- reg++;
-
/* User clip planes from curbe:
*/
if (c->key.nr_userclip) {
@@ -99,14 +91,49 @@ vec4_visitor::setup_payload(void)
}
}
- /* FINISHME: push constants */
+ /* The pre-gen6 VS requires that some push constants get loaded no
+ * matter what, or the GPU would hang.
+ */
+ if (this->uniforms == 0) {
+ this->uniform_size[this->uniforms] = 1;
+
+ for (unsigned int i = 0; i < 4; i++) {
+ unsigned int slot = this->uniforms * 4 + i;
+
+ c->prog_data.param[slot] = NULL;
+ c->prog_data.param_convert[slot] = PARAM_CONVERT_ZERO;
+ }
+
+ this->uniforms++;
+ } else {
+ reg += ALIGN(uniforms, 2) / 2;
+ }
+
+ /* for now, we are not doing any elimination of unused slots, nor
+ * are we packing our uniforms.
+ */
+ c->prog_data.nr_params = this->uniforms * 4;
+
c->prog_data.curb_read_length = reg - 1;
- c->prog_data.nr_params = 0;
- /* XXX 0 causes a bug elsewhere... */
- if (intel->gen < 6 && c->prog_data.nr_params == 0)
- c->prog_data.nr_params = 4;
+ c->prog_data.uses_new_param_layout = true;
+
+ return reg;
+}
+
+void
+vec4_visitor::setup_payload(void)
+{
+ int reg = 0;
+
+ /* The payload always contains important data in g0, which contains
+ * the URB handles that are passed on to the URB write at the end
+ * of the thread. So, we always start push constants at g1.
+ */
+ reg++;
- reg += setup_attributes(reg);
+ reg = setup_uniforms(reg);
+
+ reg = setup_attributes(reg);
this->first_non_payload_grf = reg;
}
@@ -174,6 +201,18 @@ vec4_instruction::get_src(int i)
}
break;
+ case UNIFORM:
+ brw_reg = stride(brw_vec4_grf(1 + (src[i].reg + src[i].reg_offset) / 2,
+ ((src[i].reg + src[i].reg_offset) % 2) * 4),
+ 0, 4, 1);
+ brw_reg = retype(brw_reg, src[i].type);
+ brw_reg.dw1.bits.swizzle = src[i].swizzle;
+ if (src[i].abs)
+ brw_reg = brw_abs(brw_reg);
+ if (src[i].negate)
+ brw_reg = negate(brw_reg);
+ break;
+
case HW_REG:
brw_reg = src[i].fixed_hw_reg;
break;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index bba1d810f19..91abd40faad 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -22,7 +22,10 @@
*/
#include "brw_vec4.h"
+extern "C" {
#include "main/macros.h"
+#include "program/prog_parameter.h"
+}
namespace brw {
@@ -306,6 +309,130 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
this->type = brw_type_for_base_type(type);
}
+/* Our support for uniforms is piggy-backed on the struct
+ * gl_fragment_program, because that's where the values actually
+ * get stored, rather than in some global gl_shader_program uniform
+ * store.
+ */
+int
+vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
+{
+ unsigned int offset = 0;
+ float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f;
+
+ if (type->is_matrix()) {
+ const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
+ type->vector_elements,
+ 1);
+
+ for (unsigned int i = 0; i < type->matrix_columns; i++) {
+ offset += setup_uniform_values(loc + offset, column);
+ }
+
+ return offset;
+ }
+
+ switch (type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_BOOL:
+ for (unsigned int i = 0; i < type->vector_elements; i++) {
+ int slot = this->uniforms * 4 + i;
+ switch (type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ c->prog_data.param_convert[slot] = PARAM_NO_CONVERT;
+ break;
+ case GLSL_TYPE_UINT:
+ c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U;
+ break;
+ case GLSL_TYPE_INT:
+ c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I;
+ break;
+ case GLSL_TYPE_BOOL:
+ c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B;
+ break;
+ default:
+ assert(!"not reached");
+ c->prog_data.param_convert[slot] = PARAM_NO_CONVERT;
+ break;
+ }
+ c->prog_data.param[slot] = &values[i];
+ }
+
+ for (unsigned int i = type->vector_elements; i < 4; i++) {
+ c->prog_data.param_convert[this->uniforms * 4 + i] =
+ PARAM_CONVERT_ZERO;
+ c->prog_data.param[this->uniforms * 4 + i] = NULL;
+ }
+
+ this->uniform_size[this->uniforms] = type->vector_elements;
+ this->uniforms++;
+
+ return 1;
+
+ case GLSL_TYPE_STRUCT:
+ for (unsigned int i = 0; i < type->length; i++) {
+ offset += setup_uniform_values(loc + offset,
+ type->fields.structure[i].type);
+ }
+ return offset;
+
+ case GLSL_TYPE_ARRAY:
+ for (unsigned int i = 0; i < type->length; i++) {
+ offset += setup_uniform_values(loc + offset, type->fields.array);
+ }
+ return offset;
+
+ case GLSL_TYPE_SAMPLER:
+ /* The sampler takes up a slot, but we don't use any values from it. */
+ return 1;
+
+ default:
+ assert(!"not reached");
+ return 0;
+ }
+}
+
+/* Our support for builtin uniforms is even scarier than non-builtin.
+ * It sits on top of the PROG_STATE_VAR parameters that are
+ * automatically updated from GL context state.
+ */
+void
+vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
+{
+ const ir_state_slot *const slots = ir->state_slots;
+ assert(ir->state_slots != NULL);
+
+ for (unsigned int i = 0; i < ir->num_state_slots; i++) {
+ /* This state reference has already been setup by ir_to_mesa,
+ * but we'll get the same index back here. We can reference
+ * ParameterValues directly, since unlike brw_fs.cpp, we never
+ * add new state references during compile.
+ */
+ int index = _mesa_add_state_reference(this->vp->Base.Parameters,
+ (gl_state_index *)slots[i].tokens);
+ float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
+
+ this->uniform_size[this->uniforms] = 0;
+ /* Add each of the unique swizzled channels of the element.
+ * This will end up matching the size of the glsl_type of this field.
+ */
+ int last_swiz = -1;
+ for (unsigned int j = 0; j < 4; j++) {
+ int swiz = GET_SWZ(slots[i].swizzle, j);
+ if (swiz == last_swiz)
+ break;
+ last_swiz = swiz;
+
+ c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
+ c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT;
+ this->uniform_size[this->uniforms]++;
+ }
+ this->uniforms++;
+ }
+}
+
dst_reg *
vec4_visitor::variable_storage(ir_variable *var)
{
@@ -496,13 +623,10 @@ vec4_visitor::visit(ir_variable *ir)
switch (ir->mode) {
case ir_var_in:
reg = new(mem_ctx) dst_reg(ATTR, ir->location);
- reg->type = brw_type_for_base_type(ir->type);
- hash_table_insert(this->variable_ht, reg, ir);
break;
case ir_var_out:
reg = new(mem_ctx) dst_reg(this, ir->type);
- hash_table_insert(this->variable_ht, reg, ir);
for (int i = 0; i < type_size(ir->type); i++) {
output_reg[ir->location + i] = *reg;
@@ -512,14 +636,21 @@ vec4_visitor::visit(ir_variable *ir)
case ir_var_temporary:
reg = new(mem_ctx) dst_reg(this, ir->type);
- hash_table_insert(this->variable_ht, reg, ir);
-
break;
case ir_var_uniform:
- /* FINISHME: uniforms */
+ reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
+
+ if (!strncmp(ir->name, "gl_", 3)) {
+ setup_builtin_uniform_values(ir);
+ } else {
+ setup_uniform_values(ir->location, ir->type);
+ }
break;
}
+
+ reg->type = brw_type_for_base_type(ir->type);
+ hash_table_insert(this->variable_ht, reg, ir);
}
void
@@ -1606,6 +1737,7 @@ vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
this->current_annotation = NULL;
this->c = c;
+ this->vp = brw->vertex_program; /* FINISHME: change for precompile */
this->prog_data = &c->prog_data;
this->variable_ht = hash_table_ctor(0,
@@ -1615,6 +1747,12 @@ vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
this->virtual_grf_sizes = NULL;
this->virtual_grf_count = 0;
this->virtual_grf_array_size = 0;
+
+ this->uniforms = 0;
+
+ this->variable_ht = hash_table_ctor(0,
+ hash_table_pointer_hash,
+ hash_table_pointer_compare);
}
vec4_visitor::~vec4_visitor()
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index fb4fb146f8d..ad909789d82 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -342,7 +342,7 @@ prepare_wm_pull_constants(struct brw_context *brw)
constants = brw->wm.const_bo->virtual;
for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
constants[i] = convert_param(brw->wm.prog_data->pull_param_convert[i],
- *brw->wm.prog_data->pull_param[i]);
+ brw->wm.prog_data->pull_param[i]);
}
drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo);
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index e70454416bf..affa72c7324 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -81,12 +81,21 @@ gen6_prepare_vs_push_constants(struct brw_context *brw)
params_uploaded++;
}
- for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
- if (brw->vs.constant_map[i] != -1) {
- memcpy(param + brw->vs.constant_map[i] * 4,
- vp->program.Base.Parameters->ParameterValues[i],
- 4 * sizeof(float));
- params_uploaded++;
+ if (brw->vs.prog_data->uses_new_param_layout) {
+ for (i = 0; i < brw->vs.prog_data->nr_params; i++) {
+ *param = convert_param(brw->vs.prog_data->param_convert[i],
+ brw->vs.prog_data->param[i]);
+ param++;
+ }
+ params_uploaded += brw->vs.prog_data->nr_params / 4;
+ } else {
+ for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
+ if (brw->vs.constant_map[i] != -1) {
+ memcpy(param + brw->vs.constant_map[i] * 4,
+ vp->program.Base.Parameters->ParameterValues[i],
+ 4 * sizeof(float));
+ params_uploaded++;
+ }
}
}
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 3d525248f25..07e9995f53b 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -61,7 +61,7 @@ gen6_prepare_wm_push_constants(struct brw_context *brw)
for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
constants[i] = convert_param(brw->wm.prog_data->param_convert[i],
- *brw->wm.prog_data->param[i]);
+ brw->wm.prog_data->param[i]);
}
if (0) {
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index a102ca772b3..1d80e96778e 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -58,7 +58,7 @@ gen7_prepare_wm_constants(struct brw_context *brw)
for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
constants[i] = convert_param(brw->wm.prog_data->param_convert[i],
- *brw->wm.prog_data->param[i]);
+ brw->wm.prog_data->param[i]);
}
if (0) {