summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/mesa/shader/slang/slang_assemble.c200
-rw-r--r--src/mesa/shader/slang/slang_assemble.h3
-rw-r--r--src/mesa/shader/slang/slang_assemble_assignment.c139
-rw-r--r--src/mesa/shader/slang/slang_assemble_constructor.c46
-rw-r--r--src/mesa/shader/slang/slang_execute_x86.c197
-rw-r--r--src/mesa/shader/slang/slang_storage.c119
-rw-r--r--src/mesa/shader/slang/slang_storage.h22
-rw-r--r--src/mesa/x86/rtasm/x86sse.c14
-rw-r--r--src/mesa/x86/rtasm/x86sse.h2
9 files changed, 428 insertions, 314 deletions
diff --git a/src/mesa/shader/slang/slang_assemble.c b/src/mesa/shader/slang/slang_assemble.c
index 9e5851383c9..36fb2305f68 100644
--- a/src/mesa/shader/slang/slang_assemble.c
+++ b/src/mesa/shader/slang/slang_assemble.c
@@ -398,74 +398,95 @@ GLboolean _slang_cleanup_stack (slang_assemble_ctx *A, slang_operation *op)
/* _slang_assemble_operation() */
-static GLboolean dereference_aggregate (slang_assemble_ctx *A, const slang_storage_aggregate *agg,
- GLuint *size, slang_swizzle *swz, GLboolean is_swizzled)
+static GLboolean
+dereference_basic (slang_assemble_ctx *A, slang_storage_type type, GLuint *size, slang_swizzle *swz,
+ GLboolean is_swizzled)
{
- GLuint i;
-
- for (i = agg->count; i > 0; i--)
- {
- const slang_storage_array *arr = &agg->arrays[i - 1];
- GLuint j;
-
- for (j = arr->length; j > 0; j--)
- {
- if (arr->type == slang_stor_aggregate)
- {
- if (!dereference_aggregate (A, arr->aggregate, size, swz, is_swizzled))
- return GL_FALSE;
- }
- else
- {
- GLuint src_offset;
- slang_assembly_type ty;
+ GLuint src_offset;
+ slang_assembly_type ty;
+
+ *size -= _slang_sizeof_type (type);
+
+ /* If swizzling is taking place, we are forced to use scalar operations, even if we have
+ * vec4 instructions enabled (this should be actually done with special vec4 shuffle
+ * instructions).
+ * Adjust the size and calculate the offset within source variable to read.
+ */
+ if (is_swizzled)
+ src_offset = swz->swizzle[*size / 4] * 4;
+ else
+ src_offset = *size;
+
+ /* dereference data slot of a basic type */
+ if (!PLAB2 (A->file, slang_asm_local_addr, A->local.addr_tmp, 4))
+ return GL_FALSE;
+ if (!PUSH (A->file, slang_asm_addr_deref))
+ return GL_FALSE;
+ if (src_offset != 0) {
+ if (!PLAB (A->file, slang_asm_addr_push, src_offset))
+ return GL_FALSE;
+ if (!PUSH (A->file, slang_asm_addr_add))
+ return GL_FALSE;
+ }
+
+ switch (type) {
+ case slang_stor_bool:
+ ty = slang_asm_bool_deref;
+ break;
+ case slang_stor_int:
+ ty = slang_asm_int_deref;
+ break;
+ case slang_stor_float:
+ ty = slang_asm_float_deref;
+ break;
+#if defined(USE_X86_ASM) || defined(SLANG_X86)
+ case slang_stor_vec4:
+ ty = slang_asm_vec4_deref;
+ break;
+#endif
+ default:
+ _mesa_problem(NULL, "Unexpected arr->type in dereference_basic");
+ ty = slang_asm_none;
+ }
- *size -= 4;
+ return PUSH (A->file, ty);
+}
- /* calculate the offset within source variable to read */
- if (is_swizzled)
- {
- /* swizzle the index to get the actual offset */
- src_offset = swz->swizzle[*size / 4] * 4;
- }
- else
- {
- /* no swizzling - read sequentially */
- src_offset = *size;
- }
+static GLboolean
+dereference_aggregate (slang_assemble_ctx *A, const slang_storage_aggregate *agg, GLuint *size,
+ slang_swizzle *swz, GLboolean is_swizzled)
+{
+ GLuint i;
- /* dereference data slot of a basic type */
- if (!PLAB2 (A->file, slang_asm_local_addr, A->local.addr_tmp, 4))
- return GL_FALSE;
- if (!PUSH (A->file, slang_asm_addr_deref))
- return GL_FALSE;
- if (!PLAB (A->file, slang_asm_addr_push, src_offset))
- return GL_FALSE;
- if (!PUSH (A->file, slang_asm_addr_add))
- return GL_FALSE;
+ for (i = agg->count; i > 0; i--) {
+ const slang_storage_array *arr = &agg->arrays[i - 1];
+ GLuint j;
- switch (arr->type)
- {
- case slang_stor_bool:
- ty = slang_asm_bool_deref;
- break;
- case slang_stor_int:
- ty = slang_asm_int_deref;
- break;
- case slang_stor_float:
- ty = slang_asm_float_deref;
- break;
- default:
- _mesa_problem(NULL, "Unexpected arr->type in dereference_aggregate");
- ty = slang_asm_none;
- }
- if (!PUSH (A->file, ty))
- return GL_FALSE;
- }
- }
- }
+ for (j = arr->length; j > 0; j--) {
+ if (arr->type == slang_stor_aggregate) {
+ if (!dereference_aggregate (A, arr->aggregate, size, swz, is_swizzled))
+ return GL_FALSE;
+ }
+ else {
+ if (is_swizzled && arr->type == slang_stor_vec4) {
+ if (!dereference_basic (A, slang_stor_float, size, swz, is_swizzled))
+ return GL_FALSE;
+ if (!dereference_basic (A, slang_stor_float, size, swz, is_swizzled))
+ return GL_FALSE;
+ if (!dereference_basic (A, slang_stor_float, size, swz, is_swizzled))
+ return GL_FALSE;
+ if (!dereference_basic (A, slang_stor_float, size, swz, is_swizzled))
+ return GL_FALSE;
+ }
+ else {
+ if (!dereference_basic (A, arr->type, size, swz, is_swizzled))
+ return GL_FALSE;
+ }
+ }
+ }
+ }
- return GL_TRUE;
+ return GL_TRUE;
}
GLboolean _slang_dereference (slang_assemble_ctx *A, slang_operation *op)
@@ -694,35 +715,40 @@ static GLboolean call_asm_instruction (slang_assemble_ctx *A, slang_atom a_name)
return GL_TRUE;
}
-static GLboolean equality_aggregate (slang_assemble_ctx *A, const slang_storage_aggregate *agg,
- GLuint *index, GLuint size, GLuint z_label)
+static GLboolean
+equality_aggregate (slang_assemble_ctx *A, const slang_storage_aggregate *agg, GLuint *index,
+ GLuint size, GLuint z_label)
{
- GLuint i;
+ GLuint i;
- for (i = 0; i < agg->count; i++)
- {
- const slang_storage_array *arr = &agg->arrays[i];
- GLuint j;
+ for (i = 0; i < agg->count; i++) {
+ const slang_storage_array *arr = &agg->arrays[i];
+ GLuint j;
- for (j = 0; j < arr->length; j++)
- {
- if (arr->type == slang_stor_aggregate)
- {
- if (!equality_aggregate (A, arr->aggregate, index, size, z_label))
- return GL_FALSE;
- }
- else
- {
- if (!PLAB2 (A->file, slang_asm_float_equal_int, size + *index, *index))
- return GL_FALSE;
- *index += 4;
- if (!PLAB (A->file, slang_asm_jump_if_zero, z_label))
- return GL_FALSE;
- }
- }
- }
+ for (j = 0; j < arr->length; j++) {
+ if (arr->type == slang_stor_aggregate) {
+ if (!equality_aggregate (A, arr->aggregate, index, size, z_label))
+ return GL_FALSE;
+ }
+ else {
+#if defined(USE_X86_ASM) || defined(SLANG_X86)
+ if (arr->type == slang_stor_vec4) {
+ if (!PLAB2 (A->file, slang_asm_vec4_equal_int, size + *index, *index))
+ return GL_FALSE;
+ }
+ else
+#endif
+ if (!PLAB2 (A->file, slang_asm_float_equal_int, size + *index, *index))
+ return GL_FALSE;
- return GL_TRUE;
+ *index += _slang_sizeof_type (arr->type);
+ if (!PLAB (A->file, slang_asm_jump_if_zero, z_label))
+ return GL_FALSE;
+ }
+ }
+ }
+
+ return GL_TRUE;
}
static GLboolean equality (slang_assemble_ctx *A, slang_operation *op, GLboolean equal)
diff --git a/src/mesa/shader/slang/slang_assemble.h b/src/mesa/shader/slang/slang_assemble.h
index fbf88bd6d18..95e4fa263a3 100644
--- a/src/mesa/shader/slang/slang_assemble.h
+++ b/src/mesa/shader/slang/slang_assemble.h
@@ -105,6 +105,9 @@ typedef enum slang_assembly_type_
slang_asm_vec4_divide,
slang_asm_vec4_negate,
slang_asm_vec4_dot,
+ slang_asm_vec4_copy,
+ slang_asm_vec4_deref,
+ slang_asm_vec4_equal_int,
/* not a real assembly instruction */
slang_asm__last
} slang_assembly_type;
diff --git a/src/mesa/shader/slang/slang_assemble_assignment.c b/src/mesa/shader/slang/slang_assemble_assignment.c
index 2f4cb1423f3..d894a8db18c 100644
--- a/src/mesa/shader/slang/slang_assemble_assignment.c
+++ b/src/mesa/shader/slang/slang_assemble_assignment.c
@@ -53,66 +53,89 @@
* +------------------+
*/
-static GLboolean assign_aggregate (slang_assemble_ctx *A, const slang_storage_aggregate *agg,
- GLuint *index, GLuint size)
+static GLboolean
+assign_basic (slang_assemble_ctx *A, slang_storage_type type, GLuint *index, GLuint size)
{
- GLuint i;
-
- for (i = 0; i < agg->count; i++)
- {
- const slang_storage_array *arr = &agg->arrays[i];
- GLuint j;
-
- for (j = 0; j < arr->length; j++)
- {
- if (arr->type == slang_stor_aggregate)
- {
- if (!assign_aggregate (A, arr->aggregate, index, size))
- return GL_FALSE;
- }
- else
- {
- GLuint dst_addr_loc, dst_offset;
- slang_assembly_type ty;
-
- /* calculate the distance from top of the stack to the destination address */
- dst_addr_loc = size - *index;
-
- /* calculate the offset within destination variable to write */
- if (A->swz.num_components != 0)
- {
- /* swizzle the index to get the actual offset */
- dst_offset = A->swz.swizzle[*index / 4] * 4;
- }
- else
- {
- /* no swizzling - write sequentially */
- dst_offset = *index;
- }
-
- switch (arr->type)
- {
- case slang_stor_bool:
- ty = slang_asm_bool_copy;
- break;
- case slang_stor_int:
- ty = slang_asm_int_copy;
- break;
- case slang_stor_float:
- ty = slang_asm_float_copy;
- break;
- default:
- break;
- }
- if (!slang_assembly_file_push_label2 (A->file, ty, dst_addr_loc, dst_offset))
- return GL_FALSE;
-
- *index += 4;
- }
- }
- }
+ GLuint dst_offset, dst_addr_loc;
+ slang_assembly_type ty;
+
+ /* Calculate the offset within destination variable to write. */
+ if (A->swz.num_components != 0)
+ dst_offset = A->swz.swizzle[*index / 4] * 4;
+ else
+ dst_offset = *index;
+
+ switch (type) {
+ case slang_stor_bool:
+ ty = slang_asm_bool_copy;
+ break;
+ case slang_stor_int:
+ ty = slang_asm_int_copy;
+ break;
+ case slang_stor_float:
+ ty = slang_asm_float_copy;
+ break;
+#if defined(USE_X86_ASM) || defined(SLANG_X86)
+ case slang_stor_vec4:
+ ty = slang_asm_vec4_copy;
+ break;
+#endif
+ default:
+ _mesa_problem(NULL, "Unexpected arr->type in assign_basic");
+ ty = slang_asm_none;
+ }
+
+ /* Calculate the distance from top of the stack to the destination address. As the
+ * copy operation progresses, components of the source are being successively popped
+ * off the stack by the amount of *index increase step.
+ */
+ dst_addr_loc = size - *index;
+
+ if (!slang_assembly_file_push_label2 (A->file, ty, dst_addr_loc, dst_offset))
+ return GL_FALSE;
+ *index += _slang_sizeof_type (type);
+
+ return GL_TRUE;
+}
- return GL_TRUE;
+static GLboolean
+assign_aggregate (slang_assemble_ctx *A, const slang_storage_aggregate *agg, GLuint *index,
+ GLuint size)
+{
+ GLuint i;
+
+ for (i = 0; i < agg->count; i++) {
+ const slang_storage_array *arr = &agg->arrays[i];
+ GLuint j;
+
+ for (j = 0; j < arr->length; j++) {
+ if (arr->type == slang_stor_aggregate) {
+ if (!assign_aggregate (A, arr->aggregate, index, size))
+ return GL_FALSE;
+ }
+ else {
+ /* When the destination is swizzled, we are forced to do float_copy, even if
+ * vec4 extension is enabled with vec4_copy operation.
+ */
+ if (A->swz.num_components != 0 && arr->type == slang_stor_vec4) {
+ if (!assign_basic (A, slang_stor_float, index, size))
+ return GL_FALSE;
+ if (!assign_basic (A, slang_stor_float, index, size))
+ return GL_FALSE;
+ if (!assign_basic (A, slang_stor_float, index, size))
+ return GL_FALSE;
+ if (!assign_basic (A, slang_stor_float, index, size))
+ return GL_FALSE;
+ }
+ else {
+ if (!assign_basic (A, arr->type, index, size))
+ return GL_FALSE;
+ }
+ }
+ }
+ }
+
+ return GL_TRUE;
}
GLboolean _slang_assemble_assignment (slang_assemble_ctx *A, slang_operation *op)
diff --git a/src/mesa/shader/slang/slang_assemble_constructor.c b/src/mesa/shader/slang/slang_assemble_constructor.c
index 6f02772bcd7..9d1aa707182 100644
--- a/src/mesa/shader/slang/slang_assemble_constructor.c
+++ b/src/mesa/shader/slang/slang_assemble_constructor.c
@@ -143,38 +143,32 @@ GLvoid _slang_multiply_swizzles (slang_swizzle *dst, const slang_swizzle *left,
/* _slang_assemble_constructor() */
-static GLboolean sizeof_argument (slang_assemble_ctx *A, GLuint *size, slang_operation *op)
+static GLboolean
+sizeof_argument (slang_assemble_ctx *A, GLuint *size, slang_operation *op)
{
- slang_assembly_typeinfo ti;
- GLboolean result = GL_FALSE;
- slang_storage_aggregate agg, flat_agg;
+ slang_assembly_typeinfo ti;
+ GLboolean result = GL_FALSE;
+ slang_storage_aggregate agg;
- if (!slang_assembly_typeinfo_construct (&ti))
- return GL_FALSE;
- if (!_slang_typeof_operation (A, op, &ti))
- goto end1;
+ if (!slang_assembly_typeinfo_construct (&ti))
+ return GL_FALSE;
+ if (!_slang_typeof_operation (A, op, &ti))
+ goto end1;
- if (!slang_storage_aggregate_construct (&agg))
- goto end1;
- if (!_slang_aggregate_variable (&agg, &ti.spec, 0, A->space.funcs, A->space.structs,
- A->space.vars, A->mach, A->file, A->atoms))
- goto end2;
+ if (!slang_storage_aggregate_construct (&agg))
+ goto end1;
+ if (!_slang_aggregate_variable (&agg, &ti.spec, 0, A->space.funcs, A->space.structs,
+ A->space.vars, A->mach, A->file, A->atoms))
+ goto end;
- if (!slang_storage_aggregate_construct (&flat_agg))
- goto end2;
- if (!_slang_flatten_aggregate (&flat_agg, &agg))
- goto end;
-
- *size = flat_agg.count * 4;
+ *size = _slang_sizeof_aggregate (&agg);
+ result = GL_TRUE;
- result = GL_TRUE;
end:
- slang_storage_aggregate_destruct (&flat_agg);
-end2:
- slang_storage_aggregate_destruct (&agg);
+ slang_storage_aggregate_destruct (&agg);
end1:
- slang_assembly_typeinfo_destruct (&ti);
- return result;
+ slang_assembly_typeinfo_destruct (&ti);
+ return result;
}
static GLboolean constructor_aggregate (slang_assemble_ctx *A, const slang_storage_aggregate *flat,
@@ -270,7 +264,7 @@ GLboolean _slang_assemble_constructor (slang_assemble_ctx *A, slang_operation *o
arg_sums[1] = 0; /* will hold all argument's size sum */
for (i = 0; i < op->num_children; i++)
{
- GLuint arg_size;
+ GLuint arg_size = 0;
if (!sizeof_argument (A, &arg_size, &op->children[i]))
goto end;
diff --git a/src/mesa/shader/slang/slang_execute_x86.c b/src/mesa/shader/slang/slang_execute_x86.c
index 59c3aadcaac..468984dce6b 100644
--- a/src/mesa/shader/slang/slang_execute_x86.c
+++ b/src/mesa/shader/slang/slang_execute_x86.c
@@ -50,6 +50,7 @@ typedef struct
struct x86_reg r_eax;
struct x86_reg r_ecx;
struct x86_reg r_edx;
+ struct x86_reg r_ebx;
struct x86_reg r_esp;
struct x86_reg r_ebp;
struct x86_reg r_st0;
@@ -183,7 +184,7 @@ static GLvoid do_print_bool (slang_info_log **infolog, GLfloat x)
static GLvoid codegen_assem (codegen_ctx *G, slang_assembly *a, slang_info_log **infolog)
{
- GLint disp;
+ GLint disp, i;
switch (a->type)
{
@@ -517,128 +518,133 @@ static GLvoid codegen_assem (codegen_ctx *G, slang_assembly *a, slang_info_log *
break;
case slang_asm_vec4_add:
/* [vec4] | vec4 */
- x87_fld (&G->f, x86_deref (G->r_esp));
- x87_fld (&G->f, x86_make_disp (G->r_esp, 4));
- x87_fld (&G->f, x86_make_disp (G->r_esp, 8));
- x87_fld (&G->f, x86_make_disp (G->r_esp, 12));
+ for (i = 0; i < 4; i++)
+ x87_fld (&G->f, x86_make_disp (G->r_esp, i * 4));
x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16));
x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp));
- x87_fld (&G->f, x86_deref (G->r_eax));
- x87_fld (&G->f, x86_make_disp (G->r_eax, 4));
- x87_fld (&G->f, x86_make_disp (G->r_eax, 8));
- x87_fld (&G->f, x86_make_disp (G->r_eax, 12));
- x87_faddp (&G->f, G->r_st4);
- x87_faddp (&G->f, G->r_st4);
- x87_faddp (&G->f, G->r_st4);
- x87_faddp (&G->f, G->r_st4);
- x87_fstp (&G->f, x86_make_disp (G->r_eax, 12));
- x87_fstp (&G->f, x86_make_disp (G->r_eax, 8));
- x87_fstp (&G->f, x86_make_disp (G->r_eax, 4));
- x87_fstp (&G->f, x86_deref (G->r_eax));
+ for (i = 0; i < 4; i++)
+ x87_fld (&G->f, x86_make_disp (G->r_eax, i * 4));
+ for (i = 0; i < 4; i++)
+ x87_faddp (&G->f, G->r_st4);
+ for (i = 0; i < 4; i++)
+ x87_fstp (&G->f, x86_make_disp (G->r_eax, 12 - i * 4));
break;
case slang_asm_vec4_subtract:
/* [vec4] | vec4 */
x86_mov (&G->f, G->r_eax, x86_make_disp (G->r_esp, 16));
- x87_fld (&G->f, x86_deref (G->r_eax));
- x87_fld (&G->f, x86_make_disp (G->r_eax, 4));
- x87_fld (&G->f, x86_make_disp (G->r_eax, 8));
- x87_fld (&G->f, x86_make_disp (G->r_eax, 12));
- x87_fld (&G->f, x86_deref (G->r_esp));
- x87_fld (&G->f, x86_make_disp (G->r_esp, 4));
- x87_fld (&G->f, x86_make_disp (G->r_esp, 8));
- x87_fld (&G->f, x86_make_disp (G->r_esp, 12));
+ for (i = 0; i < 4; i++)
+ x87_fld (&G->f, x86_make_disp (G->r_eax, i * 4));
+ for (i = 0; i < 4; i++)
+ x87_fld (&G->f, x86_make_disp (G->r_esp, i * 4));
x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16));
- x87_fsubp (&G->f, G->r_st4);
- x87_fsubp (&G->f, G->r_st4);
- x87_fsubp (&G->f, G->r_st4);
- x87_fsubp (&G->f, G->r_st4);
- x87_fstp (&G->f, x86_make_disp (G->r_eax, 12));
- x87_fstp (&G->f, x86_make_disp (G->r_eax, 8));
- x87_fstp (&G->f, x86_make_disp (G->r_eax, 4));
- x87_fstp (&G->f, x86_deref (G->r_eax));
+ for (i = 0; i < 4; i++)
+ x87_fsubp (&G->f, G->r_st4);
+ for (i = 0; i < 4; i++)
+ x87_fstp (&G->f, x86_make_disp (G->r_eax, 12 - i * 4));
break;
case slang_asm_vec4_multiply:
/* [vec4] | vec4 */
- x87_fld (&G->f, x86_deref (G->r_esp));
- x87_fld (&G->f, x86_make_disp (G->r_esp, 4));
- x87_fld (&G->f, x86_make_disp (G->r_esp, 8));
- x87_fld (&G->f, x86_make_disp (G->r_esp, 12));
+ for (i = 0; i < 4; i++)
+ x87_fld (&G->f, x86_make_disp (G->r_esp, i * 4));
x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16));
x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp));
- x87_fld (&G->f, x86_deref (G->r_eax));
- x87_fld (&G->f, x86_make_disp (G->r_eax, 4));
- x87_fld (&G->f, x86_make_disp (G->r_eax, 8));
- x87_fld (&G->f, x86_make_disp (G->r_eax, 12));
- x87_fmulp (&G->f, G->r_st4);
- x87_fmulp (&G->f, G->r_st4);
- x87_fmulp (&G->f, G->r_st4);
- x87_fmulp (&G->f, G->r_st4);
- x87_fstp (&G->f, x86_make_disp (G->r_eax, 12));
- x87_fstp (&G->f, x86_make_disp (G->r_eax, 8));
- x87_fstp (&G->f, x86_make_disp (G->r_eax, 4));
- x87_fstp (&G->f, x86_deref (G->r_eax));
+ for (i = 0; i < 4; i++)
+ x87_fld (&G->f, x86_make_disp (G->r_eax, i * 4));
+ for (i = 0; i < 4; i++)
+ x87_fmulp (&G->f, G->r_st4);
+ for (i = 0; i < 4; i++)
+ x87_fstp (&G->f, x86_make_disp (G->r_eax, 12 - i * 4));
break;
case slang_asm_vec4_divide:
/* [vec4] | vec4 */
x86_mov (&G->f, G->r_eax, x86_make_disp (G->r_esp, 16));
- x87_fld (&G->f, x86_deref (G->r_eax));
- x87_fld (&G->f, x86_make_disp (G->r_eax, 4));
- x87_fld (&G->f, x86_make_disp (G->r_eax, 8));
- x87_fld (&G->f, x86_make_disp (G->r_eax, 12));
- x87_fld (&G->f, x86_deref (G->r_esp));
- x87_fld (&G->f, x86_make_disp (G->r_esp, 4));
- x87_fld (&G->f, x86_make_disp (G->r_esp, 8));
- x87_fld (&G->f, x86_make_disp (G->r_esp, 12));
+ for (i = 0; i < 4; i++)
+ x87_fld (&G->f, x86_make_disp (G->r_eax, i * 4));
+ for (i = 0; i < 4; i++)
+ x87_fld (&G->f, x86_make_disp (G->r_esp, i * 4));
x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16));
- x87_fdivp (&G->f, G->r_st4);
- x87_fdivp (&G->f, G->r_st4);
- x87_fdivp (&G->f, G->r_st4);
- x87_fdivp (&G->f, G->r_st4);
- x87_fstp (&G->f, x86_make_disp (G->r_eax, 12));
- x87_fstp (&G->f, x86_make_disp (G->r_eax, 8));
- x87_fstp (&G->f, x86_make_disp (G->r_eax, 4));
- x87_fstp (&G->f, x86_deref (G->r_eax));
+ for (i = 0; i < 4; i++)
+ x87_fdivp (&G->f, G->r_st4);
+ for (i = 0; i < 4; i++)
+ x87_fstp (&G->f, x86_make_disp (G->r_eax, 12 - i * 4));
break;
case slang_asm_vec4_negate:
/* [vec4] */
x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp));
- x87_fld (&G->f, x86_deref (G->r_eax));
- x87_fld (&G->f, x86_make_disp (G->r_eax, 4));
- x87_fld (&G->f, x86_make_disp (G->r_eax, 8));
- x87_fld (&G->f, x86_make_disp (G->r_eax, 12));
- x87_fchs (&G->f);
- x87_fstp (&G->f, x86_make_disp (G->r_eax, 12));
- x87_fchs (&G->f);
- x87_fstp (&G->f, x86_make_disp (G->r_eax, 8));
- x87_fchs (&G->f);
- x87_fstp (&G->f, x86_make_disp (G->r_eax, 4));
- x87_fchs (&G->f);
- x87_fstp (&G->f, x86_deref (G->r_eax));
+ for (i = 0; i < 4; i++)
+ x87_fld (&G->f, x86_make_disp (G->r_eax, i * 4));
+ for (i = 0; i < 4; i++) {
+ x87_fchs (&G->f);
+ x87_fstp (&G->f, x86_make_disp (G->r_eax, 12 - i * 4));
+ }
break;
case slang_asm_vec4_dot:
/* [vec4] | vec4 */
- x87_fld (&G->f, x86_deref (G->r_esp));
- x87_fld (&G->f, x86_make_disp (G->r_esp, 4));
- x87_fld (&G->f, x86_make_disp (G->r_esp, 8));
- x87_fld (&G->f, x86_make_disp (G->r_esp, 12));
+ for (i = 0; i < 4; i++)
+ x87_fld (&G->f, x86_make_disp (G->r_esp, i * 4));
x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16));
x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp));
- x87_fld (&G->f, x86_deref (G->r_eax));
- x87_fld (&G->f, x86_make_disp (G->r_eax, 4));
- x87_fld (&G->f, x86_make_disp (G->r_eax, 8));
- x87_fld (&G->f, x86_make_disp (G->r_eax, 12));
- x87_fmulp (&G->f, G->r_st4);
- x87_fmulp (&G->f, G->r_st4);
- x87_fmulp (&G->f, G->r_st4);
- x87_fmulp (&G->f, G->r_st4);
- x87_faddp (&G->f, G->r_st1);
- x87_faddp (&G->f, G->r_st1);
- x87_faddp (&G->f, G->r_st1);
+ for (i = 0; i < 4; i++)
+ x87_fld (&G->f, x86_make_disp (G->r_eax, i * 4));
+ for (i = 0; i < 4; i++)
+ x87_fmulp (&G->f, G->r_st4);
+ for (i = 0; i < 3; i++)
+ x87_faddp (&G->f, G->r_st1);
x87_fstp (&G->f, x86_deref (G->r_eax));
break;
- default:
- assert (0);
- }
+ case slang_asm_vec4_copy:
+ /* [vec4] | vec4 */
+ x86_mov (&G->f, G->r_eax, x86_make_disp (G->r_esp, a->param[0]));
+ x86_pop (&G->f, G->r_ecx);
+ x86_pop (&G->f, G->r_edx);
+ x86_mov (&G->f, x86_make_disp (G->r_eax, a->param[1]), G->r_ecx);
+ x86_pop (&G->f, G->r_ebx);
+ x86_mov (&G->f, x86_make_disp (G->r_eax, a->param[1] + 4), G->r_edx);
+ x86_pop (&G->f, G->r_ecx);
+ x86_mov (&G->f, x86_make_disp (G->r_eax, a->param[1] + 8), G->r_ebx);
+ x86_mov (&G->f, x86_make_disp (G->r_eax, a->param[1] + 12), G->r_ecx);
+ break;
+ case slang_asm_vec4_deref:
+ /* [vec4] */
+ x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp));
+ x86_mov (&G->f, G->r_ecx, x86_make_disp (G->r_eax, 12));
+ x86_mov (&G->f, G->r_edx, x86_make_disp (G->r_eax, 8));
+ x86_mov (&G->f, x86_deref (G->r_esp), G->r_ecx);
+ x86_mov (&G->f, G->r_ebx, x86_make_disp (G->r_eax, 4));
+ x86_push (&G->f, G->r_edx);
+ x86_mov (&G->f, G->r_ecx, x86_deref (G->r_eax));
+ x86_push (&G->f, G->r_ebx);
+ x86_push (&G->f, G->r_ecx);
+ break;
+ case slang_asm_vec4_equal_int:
+ x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, -4));
+ x86_mov_reg_imm (&G->f, G->r_edx, 0x4000);
+ for (i = 0; i < 4; i++) {
+ x87_fld (&G->f, x86_make_disp (G->r_esp, a->param[0] + 4 + i * 4));
+ x87_fcomp (&G->f, x86_make_disp (G->r_esp, a->param[1] + 4 + i * 4));
+ x87_fnstsw (&G->f, G->r_eax);
+ x86_and (&G->f, G->r_edx, G->r_eax);
+ }
+ /* TODO: use test r8,imm8 */
+ x86_mov_reg_imm (&G->f, G->r_ecx, 0x4000);
+ x86_test (&G->f, G->r_edx, G->r_ecx);
+ {
+ GLubyte *lab0, *lab1;
+
+ /* TODO: use jcc rel8 */
+ lab0 = x86_jcc_forward (&G->f, cc_E);
+ x86_mov_reg_imm (&G->f, G->r_ecx, FLOAT_ONE);
+ /* TODO: use jmp rel8 */
+ lab1 = x86_jmp_forward (&G->f);
+ x86_fixup_fwd_jump (&G->f, lab0);
+ x86_mov_reg_imm (&G->f, G->r_ecx, FLOAT_ZERO);
+ x86_fixup_fwd_jump (&G->f, lab1);
+ x86_mov (&G->f, x86_deref (G->r_esp), G->r_ecx);
+ }
+ break;
+ default:
+ assert (0);
+ }
}
GLboolean _slang_x86_codegen (slang_machine *mach, slang_assembly_file *file, GLuint start)
@@ -664,6 +670,7 @@ GLboolean _slang_x86_codegen (slang_machine *mach, slang_assembly_file *file, GL
G.r_eax = x86_make_reg (file_REG32, reg_AX);
G.r_ecx = x86_make_reg (file_REG32, reg_CX);
G.r_edx = x86_make_reg (file_REG32, reg_DX);
+ G.r_ebx = x86_make_reg (file_REG32, reg_BX);
G.r_esp = x86_make_reg (file_REG32, reg_SP);
G.r_ebp = x86_make_reg (file_REG32, reg_BP);
G.r_st0 = x86_make_reg (file_x87, 0);
diff --git a/src/mesa/shader/slang/slang_storage.c b/src/mesa/shader/slang/slang_storage.c
index 34c62081f05..6220b7c5bf6 100644
--- a/src/mesa/shader/slang/slang_storage.c
+++ b/src/mesa/shader/slang/slang_storage.c
@@ -211,14 +211,22 @@ GLboolean _slang_aggregate_variable (slang_storage_aggregate *agg, slang_type_sp
return aggregate_vector (agg, slang_stor_float, 2);
case slang_spec_vec3:
return aggregate_vector (agg, slang_stor_float, 3);
- case slang_spec_vec4:
- return aggregate_vector (agg, slang_stor_float, 4);
+ case slang_spec_vec4:
+#if defined(USE_X86_ASM) || defined(SLANG_X86)
+ return aggregate_vector (agg, slang_stor_vec4, 1);
+#else
+ return aggregate_vector (agg, slang_stor_float, 4);
+#endif
case slang_spec_mat2:
return aggregate_matrix (agg, slang_stor_float, 2);
case slang_spec_mat3:
return aggregate_matrix (agg, slang_stor_float, 3);
- case slang_spec_mat4:
- return aggregate_matrix (agg, slang_stor_float, 4);
+ case slang_spec_mat4:
+#if defined(USE_X86_ASM) || defined(SLANG_X86)
+ return aggregate_vector (agg, slang_stor_vec4, 4);
+#else
+ return aggregate_matrix (agg, slang_stor_float, 4);
+#endif
case slang_spec_sampler1D:
case slang_spec_sampler2D:
case slang_spec_sampler3D:
@@ -258,54 +266,77 @@ GLboolean _slang_aggregate_variable (slang_storage_aggregate *agg, slang_type_sp
}
}
-/* _slang_sizeof_aggregate() */
+/* _slang_sizeof_type() */
-GLuint _slang_sizeof_aggregate (const slang_storage_aggregate *agg)
+GLuint
+_slang_sizeof_type (slang_storage_type type)
{
- GLuint i, size = 0;
+ if (type == slang_stor_aggregate)
+ return 0;
+ if (type == slang_stor_vec4)
+ return 4 * sizeof (GLfloat);
+ return sizeof (GLfloat);
+}
- for (i = 0; i < agg->count; i++)
- {
- GLuint element_size;
+/* _slang_sizeof_aggregate() */
- if (agg->arrays[i].type == slang_stor_aggregate)
- element_size = _slang_sizeof_aggregate (agg->arrays[i].aggregate);
- else
- element_size = sizeof (GLfloat);
- size += element_size * agg->arrays[i].length;
- }
- return size;
+GLuint _slang_sizeof_aggregate (const slang_storage_aggregate *agg)
+{
+ GLuint i, size = 0;
+
+ for (i = 0; i < agg->count; i++) {
+ slang_storage_array *arr = &agg->arrays[i];
+ GLuint element_size;
+
+ if (arr->type == slang_stor_aggregate)
+ element_size = _slang_sizeof_aggregate (arr->aggregate);
+ else
+ element_size = _slang_sizeof_type (arr->type);
+ size += element_size * arr->length;
+ }
+ return size;
}
/* _slang_flatten_aggregate () */
-GLboolean _slang_flatten_aggregate (slang_storage_aggregate *flat, const slang_storage_aggregate *agg)
+GLboolean
+_slang_flatten_aggregate (slang_storage_aggregate *flat, const slang_storage_aggregate *agg)
{
- GLuint i;
-
- for (i = 0; i < agg->count; i++)
- {
- GLuint j;
-
- for (j = 0; j < agg->arrays[i].length; j++)
- {
- if (agg->arrays[i].type == slang_stor_aggregate)
- {
- if (!_slang_flatten_aggregate (flat, agg->arrays[i].aggregate))
- return GL_FALSE;
- }
- else
- {
- slang_storage_array *arr;
-
- arr = slang_storage_aggregate_push_new (flat);
- if (arr == NULL)
- return GL_FALSE;
- arr->type = agg->arrays[i].type;
- arr->length = 1;
- }
- }
- }
- return GL_TRUE;
+ GLuint i;
+
+ for (i = 0; i < agg->count; i++) {
+ GLuint j;
+
+ for (j = 0; j < agg->arrays[i].length; j++) {
+ if (agg->arrays[i].type == slang_stor_aggregate) {
+ if (!_slang_flatten_aggregate (flat, agg->arrays[i].aggregate))
+ return GL_FALSE;
+ }
+ else {
+ GLuint k, count;
+ slang_storage_type type;
+
+ if (agg->arrays[i].type == slang_stor_vec4) {
+ count = 4;
+ type = slang_stor_float;
+ }
+ else {
+ count = 1;
+ type = agg->arrays[i].type;
+ }
+
+ for (k = 0; k < count; k++) {
+ slang_storage_array *arr;
+
+ arr = slang_storage_aggregate_push_new (flat);
+ if (arr == NULL)
+ return GL_FALSE;
+ arr->type = type;
+ arr->length = 1;
+ }
+ }
+ }
+ }
+ return GL_TRUE;
}
diff --git a/src/mesa/shader/slang/slang_storage.h b/src/mesa/shader/slang/slang_storage.h
index 532ea638ee7..209f8674d97 100644
--- a/src/mesa/shader/slang/slang_storage.h
+++ b/src/mesa/shader/slang/slang_storage.h
@@ -41,13 +41,19 @@ extern "C" {
*
* For now, only the three basic types are supported, that is bool, int and float. Other built-in
* types like vector or matrix can easily be decomposed into a series of basic types.
+ *
+ * If the vec4 module is enabled, 4-component vectors of floats are used when possible. 4x4 matrices
+ * are constructed of 4 vec4 slots.
*/
typedef enum slang_storage_type_
{
- slang_stor_aggregate,
- slang_stor_bool,
- slang_stor_int,
- slang_stor_float
+ /* core */
+ slang_stor_aggregate,
+ slang_stor_bool,
+ slang_stor_int,
+ slang_stor_float,
+ /* vec4 */
+ slang_stor_vec4
} slang_storage_type;
/*
@@ -106,6 +112,14 @@ _slang_evaluate_int(slang_assembly_file *file,
slang_atom_pool *atoms);
/*
+ * Returns the size (in machine units) of the given storage type.
+ * It is an error to pass-in slang_stor_aggregate.
+ * Returns 0 on error.
+ */
+extern GLuint
+_slang_sizeof_type (slang_storage_type);
+
+/*
* Returns total size (in machine units) of the given aggregate.
* Returns 0 on error.
*/
diff --git a/src/mesa/x86/rtasm/x86sse.c b/src/mesa/x86/rtasm/x86sse.c
index 6137aef8ece..3ea37bb5e76 100644
--- a/src/mesa/x86/rtasm/x86sse.c
+++ b/src/mesa/x86/rtasm/x86sse.c
@@ -367,6 +367,20 @@ void x86_sub( struct x86_function *p,
emit_op_modrm(p, 0x2b, 0x29, dst, src );
}
+void x86_or( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ emit_op_modrm( p, 0x0b, 0x09, dst, src );
+}
+
+void x86_and( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ emit_op_modrm( p, 0x23, 0x21, dst, src );
+}
+
/***********************************************************************
diff --git a/src/mesa/x86/rtasm/x86sse.h b/src/mesa/x86/rtasm/x86sse.h
index 5ec54894311..66fb852ac98 100644
--- a/src/mesa/x86/rtasm/x86sse.h
+++ b/src/mesa/x86/rtasm/x86sse.h
@@ -172,12 +172,14 @@ void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src
void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, GLubyte shuf );
void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_dec( struct x86_function *p, struct x86_reg reg );
void x86_inc( struct x86_function *p, struct x86_reg reg );
void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_mul( struct x86_function *p, struct x86_reg src );
+void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_pop( struct x86_function *p, struct x86_reg reg );
void x86_push( struct x86_function *p, struct x86_reg reg );
void x86_ret( struct x86_function *p );