diff options
-rw-r--r-- | src/gallium/drivers/panfrost/midgard/assemble.py | 643 |
1 files changed, 0 insertions, 643 deletions
diff --git a/src/gallium/drivers/panfrost/midgard/assemble.py b/src/gallium/drivers/panfrost/midgard/assemble.py deleted file mode 100644 index 8088934e1da..00000000000 --- a/src/gallium/drivers/panfrost/midgard/assemble.py +++ /dev/null @@ -1,643 +0,0 @@ -""" -Copyright (C) 2018 Alyssa Rosenzweig -Copyright (c) 2013 Connor Abbott ([email protected]) - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" - -import sys -import pprint -import struct - -program = [] - -# Definitions from cwabbott's tools - -t6xx_alu_ops = { - "fadd": 0x10, - "fmul": 0x14, - "fmin": 0x28, - "fmax": 0x2C, - "fmov": 0x30, - "ffloor": 0x36, - "fceil": 0x37, - "fdot3": 0x3C, - "fdot3r": 0x3D, - "fdot4": 0x3E, - "freduce": 0x3F, - "iadd": 0x40, - "isub": 0x46, - "imul": 0x58, - "imov": 0x7B, - "feq": 0x80, - "fne": 0x81, - "flt": 0x82, - "fle": 0x83, - "f2i": 0x99, - "f2u8": 0x9C, - "u2f": 0xBC, - "ieq": 0xA0, - "ine": 0xA1, - "ilt": 0xA4, - "ile": 0xA5, - "iand": 0x70, - "ior": 0x71, - "inot": 0x72, - "iandnot": 0x74, - "ixor": 0x76, - "ball": 0xA9, - "bany": 0xB1, - "i2f": 0xB8, - "csel": 0xC5, - "fatan_pt2": 0xE8, - "frcp": 0xF0, - "frsqrt": 0xF2, - "fsqrt": 0xF3, - "fexp2": 0xF4, - "flog2": 0xF5, - "fsin": 0xF6, - "fcos": 0xF7, - "fatan2_pt1": 0xF9, -} - -t6xx_alu_bits = { - "vmul": 17, - "sadd": 19, - "vadd": 21, - "smul": 23, - "lut": 25, - "br": 26, - "branch": 27, - "constants": 32 -} - -t6xx_alu_size_bits = { - "vmul": 48, - "sadd": 32, - "vadd": 48, - "smul": 32, - "lut": 48, - "br": 16, - "branch": 48 -} - -t6xx_outmod = { - "none": 0, - "pos": 1, - "int": 2, - "sat": 3 -} - -t6xx_reg_mode = { - "quarter": 0, - "half": 1, - "full": 2, - "double": 3 -} - -t6xx_dest_override = { - "lower": 0, - "upper": 1, - "none": 2 -} - -t6xx_load_store_ops = { - "ld_st_noop": 0x03, - "ld_attr_16": 0x95, - "ld_attr_32": 0x94, - "ld_vary_16": 0x99, - "ld_vary_32": 0x98, - "ld_uniform_16": 0xAC, - "ld_uniform_32": 0xB0, - "st_vary_16": 0xD5, - "st_vary_32": 0xD4, - "ld_color_buffer_8": 0xBA -} - -t6xx_tag = { - "texture": 0x3, - "load_store": 0x5, - "alu4": 0x8, - "alu8": 0x9, - "alu12": 0xA, - "alu16": 0xB, -} - -def is_tag_alu(tag): - return (tag >= t6xx_tag["alu4"]) and (tag <= t6xx_tag["alu16"]) - -# Just an enum - -ALU = 0 -LDST = 1 -TEXTURE = 2 - -# Constant types supported, mapping the constant prefix to the Python format -# string and the coercion function - -constant_types = { - "f": ("f", float), - "h": ("e", float), - "i": ("i", int), - "s": ("h", int) -} - -compact_branch_op = { - "jump": 1, - "branch": 2, - "discard": 4, - "write": 7 -} - -branch_condition = { - "false": 1, - "true": 2, - "always": 3, -} - -# TODO: What else? - -texture_op = { - "normal": 0x11, - "texelfetch": 0x14 -} - -texture_fmt = { - "2d": 0x02, - "3d": 0x03 -} - -with open(sys.argv[1], "r") as f: - for ln in f: - space = ln.strip().split(" ") - - instruction = space[0] - rest = " ".join(space[1:]) - - arguments = [s.strip() for s in rest.split(",")] - program += [(instruction, arguments)] - -swizzle_component = { - "x": 0, - "y": 1, - "z": 2, - "w": 3 -} - -def decode_reg_name(reg_name): - ireg = 0 - upper = False - half = False - - if reg_name[0] == 'r': - ireg = int(reg_name[1:]) - elif reg_name[0] == 'h': - rreg = int(reg_name[2:]) - - # Decode half-register into its full register's half - ireg = rreg >> 1 - upper = rreg & 1 - half = True - else: - # Special case for load/store addresses - ireg = int(reg_name) - - return (ireg, half, upper) - -def standard_swizzle_from_parts(swizzle_parts): - swizzle_s = swizzle_parts[1] if len(swizzle_parts) > 1 else "xyzw" - - swizzle = 0 - for (i, c) in enumerate(swizzle_s): - swizzle |= swizzle_component[c] << (2 * i) - - return swizzle - -def mask_from_parts(mask_parts, large_mask): - mask_s = mask_parts[1] if len(mask_parts) > 1 else "xyzw" - - if large_mask: - mask = sum([(3 << (2*swizzle_component[c]) if c in mask_s else 0) for c in "xyzw"]) - else: - mask = sum([(1 << swizzle_component[c] if c in mask_s else 0) for c in "xyzw"]) - - return (mask, mask_s) - -def decode_reg(reg): - if reg[0] == "#": - # Not actually a register, instead an immediate float - return (True, struct.unpack("H", struct.pack("e", float(reg[1:])))[0], 0, 0, 0, 0) - - # Function call syntax used in abs() modifier - if reg[-1] == ')': - reg = reg[:-1] - - swizzle_parts = reg.split(".") - - reg_name = swizzle_parts[0] - - modifiers = 0 - - if reg_name[0] == '-': - modifiers |= 2 - reg_name = reg_name[1:] - - if reg_name[0] == 'a': - modifiers |= 1 - reg_name = reg_name[len("abs("):] - - (ireg, half, upper) = decode_reg_name(reg_name) - - return (False, ireg, standard_swizzle_from_parts(swizzle_parts), half, upper, modifiers) - -def decode_masked_reg(reg, large_mask): - mask_parts = reg.split(".") - - reg_name = mask_parts[0] - (ireg, half, upper) = decode_reg_name(reg_name) - (mask, mask_s) = mask_from_parts(mask_parts, large_mask) - - component = max([0] + [swizzle_component[c] for c in "xyzw" if c in mask_s]) - - return (ireg, mask, component, half, upper) - -# TODO: Fill these in XXX - -# Texture pipeline registers in r28-r29 -TEXTURE_BASE = 28 - -def decode_texture_reg_number(reg): - r = reg.split(".")[0] - - if r[0] == "r": - return (True, int(r[1:]) - TEXTURE_BASE, 0) - else: - no = int(r[2:]) - return (False, (no >> 1) - TEXTURE_BASE, no & 1) - -def decode_texture_reg(reg): - (full, select, upper) = decode_texture_reg_number(reg) - - # Swizzle mandatory for texture registers, afaict - swizzle = reg.split(".")[1] - swizzleL = swizzle_component[swizzle[0]] - swizzleR = swizzle_component[swizzle[1]] - - return (full, select, upper, swizzleR, swizzleL) - -def decode_texture_out_reg(reg): - (full, select, upper) = decode_texture_reg_number(reg) - (mask, _) = mask_from_parts(reg.split("."), False) - - return (full, select, upper, mask) - -instruction_stream = [] - -for p in program: - ins = p[0] - arguments = p[1] - - family = ins_mod = ins.split(".")[0] - ins_op = (ins + ".").split(".")[1] - - ins_outmod = (ins + "." + ".").split(".")[2] - - try: - out_mod = t6xx_outmod[ins_outmod] - except: - out_mod = 0 - - if ins in t6xx_load_store_ops: - op = t6xx_load_store_ops[ins] - (reg, mask, component, half, upper) = decode_masked_reg(p[1][0], False) - (immediate, address, swizzle, half, upper, modifiers) = decode_reg(p[1][1]) - unknown = int(p[1][2], 16) - b = (op << 0) | (reg << 8) | (mask << 13) | (swizzle << 17) | (unknown << 25) | (address << 51) - instruction_stream += [(LDST, b)] - elif ins_op in t6xx_alu_ops: - op = t6xx_alu_ops[ins_op] - - (reg_out, mask, out_component, half0, upper0) = decode_masked_reg(p[1][0], True) - (_, reg_in1, swizzle1, half1, upper1, mod1) = decode_reg(p[1][1]) - (immediate, reg_in2, swizzle2, half2, upper2, mod2) = decode_reg(p[1][2]) - - if immediate: - register_word = (reg_in1 << 0) | ((reg_in2 >> 11) << 5) | (reg_out << 10) | (1 << 15) - else: - register_word = (reg_in1 << 0) | (reg_in2 << 5) | (reg_out << 10) - - if ins_mod in ["vadd", "vmul", "lut"]: - io_mode = t6xx_reg_mode["half" if half0 else "full"] - repsel = 0 - i1half = half1 - i2block = 0 - output_override = 2 # NORMAL, TODO - wr_mask = 0 - - if (ins_outmod == "quarter"): - io_mode = t6xx_reg_mode["quarter"] - - if half0: - # TODO: half actually - repsel = 2 * upper1 - else: - repsel = upper1 - - if half0: - # Rare case... - - (_, halfmask, _, _, _) = decode_masked_reg(p[1][0], False) - wr_mask = halfmask - else: - wr_mask = mask - - - if immediate: - # Inline constant: lower 11 bits - - i2block = ((reg_in2 & 0xFF) << 3) | ((reg_in2 >> 8) & 0x7) - else: - if half0: - # TODO: replicate input 2 if half - pass - else: - # TODO: half selection - i2block = upper2 | (half2 << 2) - - i2block |= swizzle2 << 3 - - # Extra modifier for some special cased stuff - try: - special = ins.split(".")[3] - - if special == "low": - output_override = 0 # low - elif special == "fulllow": - # TODO: Not really a special case, just a bug? - io_mode = t6xx_reg_mode["full"] - output_override = 0 #low - wr_mask = 0xFF - except: - pass - - instruction_word = (op << 0) | (io_mode << 8) | (mod1 << 10) | (repsel << 12) | (i1half << 14) | (swizzle1 << 15) | (mod2 << 23) | (i2block << 25) | (output_override << 36) | (out_mod << 38) | (wr_mask << 40) - elif ins_mod in ["sadd", "smul"]: - # TODO: What are these? - unknown2 = 0 - unknown3 = 0 - - i1comp_block = 0 - - if half1: - i1comp_block = swizzle1 | (upper1 << 2) - else: - i1comp_block = swizzle1 << 1 - - i2block = 0 - - if immediate: - # Inline constant is splattered in a... bizarre way - - i2block = (((reg_in2 >> 9) & 3) << 0) | (((reg_in2 >> 8) & 1) << 2) | (((reg_in2 >> 5) & 7) << 3) | (((reg_in2 >> 0) & 15) << 6) - else: - # TODO: half register - swizzle2 = (swizzle2 << 1) & 0x1F - i2block = (mod2 << 0) | ((not half2) << 2) | (swizzle2 << 3) | (unknown2 << 5) - - outcomp_block = 0 - - if True: - outcomp_block = out_component << 1 - else: - # TODO: half register - pass - - instruction_word = (op << 0) | (mod1 << 8) | ((not half1) << 10) | (i1comp_block << 11) | (i2block << 14) | (unknown3 << 25) | (out_mod << 26) | ((not half0) << 28) | (outcomp_block) << 29 - - else: - instruction_word = op - - instruction_stream += [(ALU, ins_mod, register_word, instruction_word)] - elif family == "texture": - # Texture ops use long series of modifiers to describe their needed - # capabilities, seperated by dots. Decode them here - parts = ins.split(".") - - # First few modifiers are fixed, like an instruction name - tex_op = parts[1] - tex_fmt = parts[2] - - # The remaining are variable, but strictly ordered - parts = parts[3:] - - op = texture_op[tex_op] - - # Some bits are defined directly in the modifier list - shadow = "shadow" in parts - cont = "cont" in parts - last = "last" in parts - has_filter = "raw" not in parts - - # The remaining need order preserved since they have their own arguments - argument_parts = [part for part in parts if part not in ["shadow", "cont", "last", "raw"]] - - bias_lod = 0 - - for argument, part in zip(argument_parts, arguments[4:]): - if argument == "bias": - bias_lod = int(float(part) * 256) - else: - print("Unknown argument: " + str(argument)) - - fmt = texture_fmt[tex_fmt] - has_offset = 0 - - magic1 = 1 # IDEK - magic2 = 2 # Where did this even come from?! - - texture_handle = int(arguments[1][len("texture"):]) - - sampler_parts = arguments[2].split(".") - sampler_handle = int(sampler_parts[0][len("sampler"):]) - swizzle0 = standard_swizzle_from_parts(sampler_parts) - - (full0, select0, upper0, mask0) = decode_texture_out_reg(arguments[0]) - (full1, select1, upper1, swizzleR1, swizzleL1) = decode_texture_reg(arguments[3]) - - tex = (op << 0) | (shadow << 6) | (cont << 8) | (last << 9) | (fmt << 10) | (has_offset << 15) | (has_filter << 16) | (select1 << 17) | (upper1 << 18) | (swizzleL1 << 19) | (swizzleR1 << 21) | (0 << 23) | (magic2 << 25) | (full0 << 29) | (magic1 << 30) | (select0 << 32) | (upper0 << 33) | (mask0 << 34) | (swizzle0 << 40) | (bias_lod << 72) | (texture_handle << 88) | (sampler_handle << 104) - - instruction_stream += [(TEXTURE, tex)] - elif family == "br": - cond = ins.split(".")[2] - condition = branch_condition[cond] - bop = compact_branch_op[ins_op] - - offset = int(arguments[0].split("->")[0]) - - # 2's complement and chill - if offset < 0: - offset = (1 << 7) - abs(offset) - - # Find where we're going - dest_tag = int(arguments[0].split("->")[1]) - - br = (bop << 0) | (dest_tag << 3) | (offset << 7) | (condition << 14) - - # TODO: Unconditional branch encoding - - instruction_stream += [(ALU, "br", None, br)] - elif ins[1:] == "constants": - if ins[0] not in constant_types: - print("Unknown constant type " + str(constant_type)) - break - - (fmt, cast) = constant_types[ins[0]] - - encoded = [struct.pack(fmt, cast(f)) for f in p[1]] - - consts = bytearray() - for c in encoded: - consts += c - - # consts must be exactly 4 quadwords, so pad with zeroes if necessary - consts += bytes(4*4 - len(consts)) - - instruction_stream += [(ALU, "constants", consts)] - -# Emit from instruction stream -instructions = [] -index = 0 -while index < len(instruction_stream): - output_stream = bytearray() - ins = instruction_stream[index] - tag = ins[0] - - can_prefetch = index + 1 < len(instruction_stream) - succeeding = None - - if tag == LDST: - succeeding = instruction_stream[index + 1] if can_prefetch else None - parta = ins[1] - partb = None - - if succeeding and succeeding[0] == LDST: - partb = succeeding[1] - index += 1 - else: - partb = parta - parta = t6xx_load_store_ops["ld_st_noop"] - - tag8 = t6xx_tag["load_store"] - - ins = (partb << 68) | (parta << 8) | tag8 - output_stream += (ins.to_bytes(16, "little")) - elif tag == TEXTURE: - tag8 = t6xx_tag["texture"] - ins = (ins[1] << 8) | tag8 - - output_stream += (ins.to_bytes(16, "little")) - elif tag == ALU: - # TODO: Combining ALU ops - - emit_size = 4 # 32-bit tag always emitted - - tag = 0 - register_words = bytearray() - body_words = bytearray() - constant_words = None - - last_alu_bit = 0 - - # Iterate through while there are ALU tags in strictly ascending order - while index < len(instruction_stream) and instruction_stream[index][0] == ALU and t6xx_alu_bits[instruction_stream[index][1]] > last_alu_bit: - ins = instruction_stream[index] - - bit = t6xx_alu_bits[ins[1]] - last_alu_bit = bit - - if ins[1] == "constants": - constant_words = ins[2] - else: - # Flag for the used part of the GPU - tag |= 1 << bit - - # 16-bit register word, if present - if ins[2] is not None: - register_words += (ins[2].to_bytes(2, "little")) - emit_size += 2 - - size = int(t6xx_alu_size_bits[ins[1]] / 8) - body_words += (ins[3].to_bytes(size, "little")) - emit_size += size - - index += 1 - - index -= 1 # fix off by one, from later loop increment - - # Pad to nearest multiple of 4 words - padding = (16 - (emit_size & 15)) if (emit_size & 15) else 0 - emit_size += padding - - # emit_size includes constants - if constant_words: - emit_size += len(constant_words) - - # Calculate tag given size - words = emit_size >> 2 - tag |= t6xx_tag["alu" + str(words)] - - # Actually emit, now that we can - output_stream += tag.to_bytes(4, "little") - output_stream += register_words - output_stream += body_words - output_stream += bytes(padding) - - if constant_words: - output_stream += constant_words - - instructions += [output_stream] - index += 1 - -# Assmebly over; just emit tags at this point -binary = bytearray() - -for (idx, ins) in enumerate(instructions): - # Instruction prefetch - tag = 0 - - if idx + 1 < len(instructions): - tag = instructions[idx + 1][0] & 0xF - - # Check for ALU special case - - if is_tag_alu(tag) and idx + 2 == len(instructions): - tag = 1 - else: - # Instruction stream over - - tag = 1 - - ins[0] |= tag << 4 - - binary += ins - -pprint.pprint(program) - -with open(sys.argv[2], "wb") as f: - f.write(binary) |