gallivm: fix F2U opcode

Previously, we were really doing F2I. And also move it to generic section. (Note that for llvmpipe the code generated is definitely bad, due to lack of unsigned conversions with sse. I think though what llvm does (using scalar conversions to 64bit signed either with x87 fpu (32bit) or sse (64bit) including lots of domain changes is quite suboptimal, could do something like is_large = arg >= 2^31 half_arg = 0.5 * arg small_c = fptoint(arg) large_c = fptoint(half_arg) << 1 res = select(is_large, large_c, small_c) which should be much less instructions but that's something llvm should do itself.) This fixes piglit fs/vs-float-uint-conversion.shader_test (maybe more, needs GL 3.0 version override to run.) Reviewed-by: Jose Fonseca <[email protected]> Reviewed-by: Zack Rusin <[email protected]>
author: Roland Scheidegger <[email protected]> 2014-02-04 19:53:53 +0100
committer: Roland Scheidegger <[email protected]> 2014-02-05 17:45:31 +0100
commit: 4a7da3bec5961bd82c520e1f5e363cbd2fa8fb77 (patch)
tree: 56072b856a0ecf3d5fdaf415c6fa46f98825e820
parent: 5c975966dcaaa4e781f3baba0fc1e3b7ad4a18a6 (diff)
1 files changed, 22 insertions, 20 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
index caaeb01561f..b9546dbc661 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -720,10 +720,23 @@ sub_emit(
    struct lp_build_tgsi_context * bld_base,
    struct lp_build_emit_data * emit_data)
 {
-	emit_data->output[emit_data->chan] = LLVMBuildFSub(
-				bld_base->base.gallivm->builder,
-				emit_data->args[0],
-				emit_data->args[1], "");
+   emit_data->output[emit_data->chan] =
+      LLVMBuildFSub(bld_base->base.gallivm->builder,
+                    emit_data->args[0],
+                    emit_data->args[1], "");
+}
+
+/* TGSI_OPCODE_F2U */
+static void
+f2u_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] =
+      LLVMBuildFPToUI(bld_base->base.gallivm->builder,
+                      emit_data->args[0],
+                      bld_base->base.int_vec_type, "");
 }
 
 /* TGSI_OPCODE_U2F */
@@ -733,9 +746,10 @@ u2f_emit(
    struct lp_build_tgsi_context * bld_base,
    struct lp_build_emit_data * emit_data)
 {
-   emit_data->output[emit_data->chan] = LLVMBuildUIToFP(bld_base->base.gallivm->builder,
-							emit_data->args[0],
-							bld_base->base.vec_type, "");
+   emit_data->output[emit_data->chan] =
+      LLVMBuildUIToFP(bld_base->base.gallivm->builder,
+                      emit_data->args[0],
+                      bld_base->base.vec_type, "");
 }
 
 static void
@@ -949,6 +963,7 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
    bld_base->op_actions[TGSI_OPCODE_SUB].emit = sub_emit;
 
    bld_base->op_actions[TGSI_OPCODE_UARL].emit = mov_emit;
+   bld_base->op_actions[TGSI_OPCODE_F2U].emit = f2u_emit;
    bld_base->op_actions[TGSI_OPCODE_U2F].emit = u2f_emit;
    bld_base->op_actions[TGSI_OPCODE_UMAD].emit = umad_emit;
    bld_base->op_actions[TGSI_OPCODE_UMUL].emit = umul_emit;
@@ -1128,18 +1143,6 @@ f2i_emit_cpu(
                                                         emit_data->args[0]);
 }
 
-/* TGSI_OPCODE_F2U (CPU Only) */
-static void
-f2u_emit_cpu(
-   const struct lp_build_tgsi_action * action,
-   struct lp_build_tgsi_context * bld_base,
-   struct lp_build_emit_data * emit_data)
-{
-   /* FIXME: implement and use lp_build_utrunc() */
-   emit_data->output[emit_data->chan] = lp_build_itrunc(&bld_base->base,
-                                                        emit_data->args[0]);
-}
-
 /* TGSI_OPCODE_FSET Helper (CPU Only) */
 static void
 fset_emit_cpu(
@@ -1832,7 +1835,6 @@ lp_set_default_actions_cpu(
    bld_base->op_actions[TGSI_OPCODE_DIV].emit = div_emit_cpu;
    bld_base->op_actions[TGSI_OPCODE_EX2].emit = ex2_emit_cpu;
    bld_base->op_actions[TGSI_OPCODE_F2I].emit = f2i_emit_cpu;
-   bld_base->op_actions[TGSI_OPCODE_F2U].emit = f2u_emit_cpu;
    bld_base->op_actions[TGSI_OPCODE_FLR].emit = flr_emit_cpu;
    bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = fseq_emit_cpu;
    bld_base->op_actions[TGSI_OPCODE_FSGE].emit = fsge_emit_cpu;
author	Roland Scheidegger <[email protected]>	2014-02-04 19:53:53 +0100
committer	Roland Scheidegger <[email protected]>	2014-02-05 17:45:31 +0100
commit	4a7da3bec5961bd82c520e1f5e363cbd2fa8fb77 (patch)
tree	56072b856a0ecf3d5fdaf415c6fa46f98825e820
parent	5c975966dcaaa4e781f3baba0fc1e3b7ad4a18a6 (diff)