diff options
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.c | 44 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_half.h | 7 |
2 files changed, 48 insertions, 3 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index f67c16200a9..d898fd66f48 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -58,6 +58,7 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi_exec.h" +#include "util/u_half.h" #include "util/u_memory.h" #include "util/u_math.h" @@ -3058,6 +3059,45 @@ exec_dp2(struct tgsi_exec_machine *mach, } static void +exec_pk2h(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned chan; + union tgsi_exec_channel arg[2], dst; + + fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); + for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) { + dst.u[chan] = util_float_to_half(arg[0].f[chan]) | + (util_float_to_half(arg[1].f[chan]) << 16); + } + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst, &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_UINT); + } + } +} + +static void +exec_up2h(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned chan; + union tgsi_exec_channel arg, dst[2]; + + fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); + for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) { + dst[0].f[chan] = util_half_to_float(arg.u[chan] & 0xffff); + dst[1].f[chan] = util_half_to_float(arg.u[chan] >> 16); + } + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void exec_scs(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { @@ -4339,7 +4379,7 @@ exec_instruction( break; case TGSI_OPCODE_PK2H: - assert (0); + exec_pk2h(mach, inst); break; case TGSI_OPCODE_PK2US: @@ -4425,7 +4465,7 @@ exec_instruction( break; case TGSI_OPCODE_UP2H: - assert (0); + exec_up2h(mach, inst); break; case TGSI_OPCODE_UP2US: diff --git a/src/gallium/auxiliary/util/u_half.h b/src/gallium/auxiliary/util/u_half.h index d28fae3c77d..966d213bdd5 100644 --- a/src/gallium/auxiliary/util/u_half.h +++ b/src/gallium/auxiliary/util/u_half.h @@ -74,7 +74,11 @@ util_float_to_half(float f) f32.ui &= round_mask; f32.f *= magic.f; f32.ui -= round_mask; - + /* + * XXX: The magic mul relies on denorms being available, otherwise + * all f16 denorms get flushed to zero - hence when this is used + * for tgsi_exec in softpipe we won't get f16 denorms. + */ /* * Clamp to max finite value if overflowed. * OpenGL has completely undefined rounding behavior for float to @@ -112,6 +116,7 @@ util_half_to_float(uint16_t f16) /* Adjust */ f32.f *= magic.f; + /* XXX: The magic mul relies on denorms being available */ /* Inf / NaN */ if (f32.f >= infnan.f) |