Path: blob/master/tools/asm_processor/asm-processor.py
7857 views
#!/usr/bin/env python31import argparse2import tempfile3import struct4import copy5import sys6import re7import os8from collections import namedtuple9from io import StringIO1011MAX_FN_SIZE = 10012SLOW_CHECKS = False1314EI_NIDENT = 1615EI_CLASS = 416EI_DATA = 517EI_VERSION = 618EI_OSABI = 719EI_ABIVERSION = 820STN_UNDEF = 02122SHN_UNDEF = 023SHN_ABS = 0xfff124SHN_COMMON = 0xfff225SHN_XINDEX = 0xffff26SHN_LORESERVE = 0xff002728STT_NOTYPE = 029STT_OBJECT = 130STT_FUNC = 231STT_SECTION = 332STT_FILE = 433STT_COMMON = 534STT_TLS = 63536STB_LOCAL = 037STB_GLOBAL = 138STB_WEAK = 23940STV_DEFAULT = 041STV_INTERNAL = 142STV_HIDDEN = 243STV_PROTECTED = 34445SHT_NULL = 046SHT_PROGBITS = 147SHT_SYMTAB = 248SHT_STRTAB = 349SHT_RELA = 450SHT_HASH = 551SHT_DYNAMIC = 652SHT_NOTE = 753SHT_NOBITS = 854SHT_REL = 955SHT_SHLIB = 1056SHT_DYNSYM = 1157SHT_INIT_ARRAY = 1458SHT_FINI_ARRAY = 1559SHT_PREINIT_ARRAY = 1660SHT_GROUP = 1761SHT_SYMTAB_SHNDX = 1862SHT_MIPS_GPTAB = 0x7000000363SHT_MIPS_DEBUG = 0x7000000564SHT_MIPS_REGINFO = 0x7000000665SHT_MIPS_OPTIONS = 0x7000000d6667SHF_WRITE = 0x168SHF_ALLOC = 0x269SHF_EXECINSTR = 0x470SHF_MERGE = 0x1071SHF_STRINGS = 0x2072SHF_INFO_LINK = 0x4073SHF_LINK_ORDER = 0x8074SHF_OS_NONCONFORMING = 0x10075SHF_GROUP = 0x20076SHF_TLS = 0x4007778R_MIPS_32 = 279R_MIPS_26 = 480R_MIPS_HI16 = 581R_MIPS_LO16 = 6828384class ElfHeader:85"""86typedef struct {87unsigned char e_ident[EI_NIDENT];88Elf32_Half e_type;89Elf32_Half e_machine;90Elf32_Word e_version;91Elf32_Addr e_entry;92Elf32_Off e_phoff;93Elf32_Off e_shoff;94Elf32_Word e_flags;95Elf32_Half e_ehsize;96Elf32_Half e_phentsize;97Elf32_Half e_phnum;98Elf32_Half e_shentsize;99Elf32_Half e_shnum;100Elf32_Half e_shstrndx;101} Elf32_Ehdr;102"""103104def __init__(self, data):105self.e_ident = data[:EI_NIDENT]106self.e_type, self.e_machine, self.e_version, self.e_entry, self.e_phoff, self.e_shoff, self.e_flags, self.e_ehsize, self.e_phentsize, self.e_phnum, self.e_shentsize, self.e_shnum, self.e_shstrndx = struct.unpack('>HHIIIIIHHHHHH', data[EI_NIDENT:])107assert self.e_ident[EI_CLASS] == 1 # 32-bit108assert self.e_ident[EI_DATA] == 2 # big-endian109assert self.e_type == 1 # relocatable110assert self.e_machine == 8 # MIPS I Architecture111assert self.e_phoff == 0 # no program header112assert self.e_shoff != 0 # section header113assert self.e_shstrndx != SHN_UNDEF114115def to_bin(self):116return self.e_ident + struct.pack('>HHIIIIIHHHHHH', self.e_type,117self.e_machine, self.e_version, self.e_entry, self.e_phoff,118self.e_shoff, self.e_flags, self.e_ehsize, self.e_phentsize,119self.e_phnum, self.e_shentsize, self.e_shnum, self.e_shstrndx)120121122class Symbol:123"""124typedef struct {125Elf32_Word st_name;126Elf32_Addr st_value;127Elf32_Word st_size;128unsigned char st_info;129unsigned char st_other;130Elf32_Half st_shndx;131} Elf32_Sym;132"""133134def __init__(self, data, strtab):135self.st_name, self.st_value, self.st_size, st_info, self.st_other, self.st_shndx = struct.unpack('>IIIBBH', data)136assert self.st_shndx != SHN_XINDEX, "too many sections (SHN_XINDEX not supported)"137self.bind = st_info >> 4138self.type = st_info & 15139self.name = strtab.lookup_str(self.st_name)140self.visibility = self.st_other & 3141142def to_bin(self):143st_info = (self.bind << 4) | self.type144return struct.pack('>IIIBBH', self.st_name, self.st_value, self.st_size, st_info, self.st_other, self.st_shndx)145146147class Relocation:148def __init__(self, data, sh_type):149self.sh_type = sh_type150if sh_type == SHT_REL:151self.r_offset, self.r_info = struct.unpack('>II', data)152else:153self.r_offset, self.r_info, self.r_addend = struct.unpack('>III', data)154self.sym_index = self.r_info >> 8155self.rel_type = self.r_info & 0xff156157def to_bin(self):158self.r_info = (self.sym_index << 8) | self.rel_type159if self.sh_type == SHT_REL:160return struct.pack('>II', self.r_offset, self.r_info)161else:162return struct.pack('>III', self.r_offset, self.r_info, self.r_addend)163164165class Section:166"""167typedef struct {168Elf32_Word sh_name;169Elf32_Word sh_type;170Elf32_Word sh_flags;171Elf32_Addr sh_addr;172Elf32_Off sh_offset;173Elf32_Word sh_size;174Elf32_Word sh_link;175Elf32_Word sh_info;176Elf32_Word sh_addralign;177Elf32_Word sh_entsize;178} Elf32_Shdr;179"""180181def __init__(self, header, data, index):182self.sh_name, self.sh_type, self.sh_flags, self.sh_addr, self.sh_offset, self.sh_size, self.sh_link, self.sh_info, self.sh_addralign, self.sh_entsize = struct.unpack('>IIIIIIIIII', header)183assert not self.sh_flags & SHF_LINK_ORDER184if self.sh_entsize != 0:185assert self.sh_size % self.sh_entsize == 0186if self.sh_type == SHT_NOBITS:187self.data = b''188else:189self.data = data[self.sh_offset:self.sh_offset + self.sh_size]190self.index = index191self.relocated_by = []192193@staticmethod194def from_parts(sh_name, sh_type, sh_flags, sh_link, sh_info, sh_addralign, sh_entsize, data, index):195header = struct.pack('>IIIIIIIIII', sh_name, sh_type, sh_flags, 0, 0, len(data), sh_link, sh_info, sh_addralign, sh_entsize)196return Section(header, data, index)197198def lookup_str(self, index):199assert self.sh_type == SHT_STRTAB200to = self.data.find(b'\0', index)201assert to != -1202return self.data[index:to].decode('latin1')203204def add_str(self, string):205assert self.sh_type == SHT_STRTAB206ret = len(self.data)207self.data += string.encode('latin1') + b'\0'208return ret209210def is_rel(self):211return self.sh_type == SHT_REL or self.sh_type == SHT_RELA212213def header_to_bin(self):214if self.sh_type != SHT_NOBITS:215self.sh_size = len(self.data)216return struct.pack('>IIIIIIIIII', self.sh_name, self.sh_type, self.sh_flags, self.sh_addr, self.sh_offset, self.sh_size, self.sh_link, self.sh_info, self.sh_addralign, self.sh_entsize)217218def late_init(self, sections):219if self.sh_type == SHT_SYMTAB:220self.init_symbols(sections)221elif self.is_rel():222self.rel_target = sections[self.sh_info]223self.rel_target.relocated_by.append(self)224self.init_relocs()225226def find_symbol(self, name):227assert self.sh_type == SHT_SYMTAB228for s in self.symbol_entries:229if s.name == name:230return (s.st_shndx, s.st_value)231return None232233def find_symbol_in_section(self, name, section):234pos = self.find_symbol(name)235assert pos is not None236assert pos[0] == section.index237return pos[1]238239def init_symbols(self, sections):240assert self.sh_type == SHT_SYMTAB241assert self.sh_entsize == 16242self.strtab = sections[self.sh_link]243entries = []244for i in range(0, self.sh_size, self.sh_entsize):245entries.append(Symbol(self.data[i:i+self.sh_entsize], self.strtab))246self.symbol_entries = entries247248def init_relocs(self):249assert self.is_rel()250entries = []251for i in range(0, self.sh_size, self.sh_entsize):252entries.append(Relocation(self.data[i:i+self.sh_entsize], self.sh_type))253self.relocations = entries254255def local_symbols(self):256assert self.sh_type == SHT_SYMTAB257return self.symbol_entries[:self.sh_info]258259def global_symbols(self):260assert self.sh_type == SHT_SYMTAB261return self.symbol_entries[self.sh_info:]262263264class ElfFile:265def __init__(self, data):266self.data = data267assert data[:4] == b'\x7fELF', "not an ELF file"268269self.elf_header = ElfHeader(data[0:52])270271offset, size = self.elf_header.e_shoff, self.elf_header.e_shentsize272null_section = Section(data[offset:offset + size], data, 0)273num_sections = self.elf_header.e_shnum or null_section.sh_size274275self.sections = [null_section]276for i in range(1, num_sections):277ind = offset + i * size278self.sections.append(Section(data[ind:ind + size], data, i))279280symtab = None281for s in self.sections:282if s.sh_type == SHT_SYMTAB:283assert not symtab284symtab = s285assert symtab is not None286self.symtab = symtab287288shstr = self.sections[self.elf_header.e_shstrndx]289for s in self.sections:290s.name = shstr.lookup_str(s.sh_name)291s.late_init(self.sections)292293def find_section(self, name):294for s in self.sections:295if s.name == name:296return s297return None298299def add_section(self, name, sh_type, sh_flags, sh_link, sh_info, sh_addralign, sh_entsize, data):300shstr = self.sections[self.elf_header.e_shstrndx]301sh_name = shstr.add_str(name)302s = Section.from_parts(sh_name=sh_name, sh_type=sh_type,303sh_flags=sh_flags, sh_link=sh_link, sh_info=sh_info,304sh_addralign=sh_addralign, sh_entsize=sh_entsize, data=data,305index=len(self.sections))306self.sections.append(s)307s.name = name308s.late_init(self.sections)309return s310311def drop_irrelevant_sections(self):312# We can only drop sections at the end, since otherwise section313# references might be wrong. Luckily, these sections typically are.314while self.sections[-1].sh_type in [SHT_MIPS_DEBUG, SHT_MIPS_GPTAB]:315self.sections.pop()316317def write(self, filename):318outfile = open(filename, 'wb')319outidx = 0320def write_out(data):321nonlocal outidx322outfile.write(data)323outidx += len(data)324def pad_out(align):325if align and outidx % align:326write_out(b'\0' * (align - outidx % align))327328self.elf_header.e_shnum = len(self.sections)329write_out(self.elf_header.to_bin())330331for s in self.sections:332if s.sh_type != SHT_NOBITS and s.sh_type != SHT_NULL:333pad_out(s.sh_addralign)334s.sh_offset = outidx335write_out(s.data)336337pad_out(4)338self.elf_header.e_shoff = outidx339for s in self.sections:340write_out(s.header_to_bin())341342outfile.seek(0)343outfile.write(self.elf_header.to_bin())344outfile.close()345346347def is_temp_name(name):348return name.startswith('_asmpp_')349350351# https://stackoverflow.com/a/241506352def re_comment_replacer(match):353s = match.group(0)354if s[0] in "/#":355return " "356else:357return s358359360re_comment_or_string = re.compile(361r'#.*|/\*.*?\*/|"(?:\\.|[^\\"])*"'362)363364365class Failure(Exception):366def __init__(self, message):367self.message = message368369def __str__(self):370return self.message371372373class GlobalState:374def __init__(self, min_instr_count, skip_instr_count, use_jtbl_for_rodata):375# A value that hopefully never appears as a 32-bit rodata constant (or we376# miscompile late rodata). Increases by 1 in each step.377self.late_rodata_hex = 0xE0123456378self.namectr = 0379self.min_instr_count = min_instr_count380self.skip_instr_count = skip_instr_count381self.use_jtbl_for_rodata = use_jtbl_for_rodata382383def next_late_rodata_hex(self):384dummy_bytes = struct.pack('>I', self.late_rodata_hex)385if (self.late_rodata_hex & 0xffff) == 0:386# Avoid lui387self.late_rodata_hex += 1388self.late_rodata_hex += 1389return dummy_bytes390391def make_name(self, cat):392self.namectr += 1393return '_asmpp_{}{}'.format(cat, self.namectr)394395396Function = namedtuple('Function', ['text_glabels', 'asm_conts', 'late_rodata_dummy_bytes', 'jtbl_rodata_size', 'late_rodata_asm_conts', 'fn_desc', 'data'])397398399class GlobalAsmBlock:400def __init__(self, fn_desc):401self.fn_desc = fn_desc402self.cur_section = '.text'403self.asm_conts = []404self.late_rodata_asm_conts = []405self.late_rodata_alignment = 0406self.late_rodata_alignment_from_content = False407self.text_glabels = []408self.fn_section_sizes = {409'.text': 0,410'.data': 0,411'.bss': 0,412'.rodata': 0,413'.late_rodata': 0,414}415self.fn_ins_inds = []416self.glued_line = ''417self.num_lines = 0418419def fail(self, message, line=None):420context = self.fn_desc421if line:422context += ", at line \"" + line + "\""423raise Failure(message + "\nwithin " + context)424425def count_quoted_size(self, line, z, real_line, output_enc):426line = line.encode(output_enc).decode('latin1')427in_quote = False428num_parts = 0429ret = 0430i = 0431digits = "0123456789" # 0-7 would be more sane, but this matches GNU as432while i < len(line):433c = line[i]434i += 1435if not in_quote:436if c == '"':437in_quote = True438num_parts += 1439else:440if c == '"':441in_quote = False442continue443ret += 1444if c != '\\':445continue446if i == len(line):447self.fail("backslash at end of line not supported", real_line)448c = line[i]449i += 1450# (if c is in "bfnrtv", we have a real escaped literal)451if c == 'x':452# hex literal, consume any number of hex chars, possibly none453while i < len(line) and line[i] in digits + "abcdefABCDEF":454i += 1455elif c in digits:456# octal literal, consume up to two more digits457it = 0458while i < len(line) and line[i] in digits and it < 2:459i += 1460it += 1461462if in_quote:463self.fail("unterminated string literal", real_line)464if num_parts == 0:465self.fail(".ascii with no string", real_line)466return ret + num_parts if z else ret467468def align2(self):469while self.fn_section_sizes[self.cur_section] % 2 != 0:470self.fn_section_sizes[self.cur_section] += 1471472def align4(self):473while self.fn_section_sizes[self.cur_section] % 4 != 0:474self.fn_section_sizes[self.cur_section] += 1475476def add_sized(self, size, line):477if self.cur_section in ['.text', '.late_rodata']:478if size % 4 != 0:479self.fail("size must be a multiple of 4", line)480if size < 0:481self.fail("size cannot be negative", line)482self.fn_section_sizes[self.cur_section] += size483if self.cur_section == '.text':484if not self.text_glabels:485self.fail(".text block without an initial glabel", line)486self.fn_ins_inds.append((self.num_lines - 1, size // 4))487488def process_line(self, line, output_enc):489self.num_lines += 1490if line.endswith('\\'):491self.glued_line += line[:-1]492return493line = self.glued_line + line494self.glued_line = ''495496real_line = line497line = re.sub(re_comment_or_string, re_comment_replacer, line)498line = line.strip()499line = re.sub(r'^[a-zA-Z0-9_]+:\s*', '', line)500changed_section = False501emitting_double = False502if line.startswith('glabel ') and self.cur_section == '.text':503self.text_glabels.append(line.split()[1])504if not line:505pass # empty line506elif line.startswith('glabel ') or (' ' not in line and line.endswith(':')):507pass # label508elif line.startswith('.section') or line in ['.text', '.data', '.rdata', '.rodata', '.bss', '.late_rodata']:509# section change510self.cur_section = '.rodata' if line == '.rdata' else line.split(',')[0].split()[-1]511if self.cur_section not in ['.data', '.text', '.rodata', '.late_rodata', '.bss']:512self.fail("unrecognized .section directive", real_line)513changed_section = True514elif line.startswith('.late_rodata_alignment'):515if self.cur_section != '.late_rodata':516self.fail(".late_rodata_alignment must occur within .late_rodata section", real_line)517value = int(line.split()[1])518if value not in [4, 8]:519self.fail(".late_rodata_alignment argument must be 4 or 8", real_line)520if self.late_rodata_alignment and self.late_rodata_alignment != value:521self.fail(".late_rodata_alignment alignment assumption conflicts with earlier .double directive. Make sure to provide explicit alignment padding.")522self.late_rodata_alignment = value523changed_section = True524elif line.startswith('.incbin'):525self.add_sized(int(line.split(',')[-1].strip(), 0), real_line)526elif line.startswith('.word') or line.startswith('.float'):527self.align4()528self.add_sized(4 * len(line.split(',')), real_line)529elif line.startswith('.double'):530self.align4()531if self.cur_section == '.late_rodata':532align8 = self.fn_section_sizes[self.cur_section] % 8533# Automatically set late_rodata_alignment, so the generated C code uses doubles.534# This gives us correct alignment for the transferred doubles even when the535# late_rodata_alignment is wrong, e.g. for non-matching compilation.536if not self.late_rodata_alignment:537self.late_rodata_alignment = 8 - align8538self.late_rodata_alignment_from_content = True539elif self.late_rodata_alignment != 8 - align8:540if self.late_rodata_alignment_from_content:541self.fail("found two .double directives with different start addresses mod 8. Make sure to provide explicit alignment padding.", real_line)542else:543self.fail(".double at address that is not 0 mod 8 (based on .late_rodata_alignment assumption). Make sure to provide explicit alignment padding.", real_line)544self.add_sized(8 * len(line.split(',')), real_line)545emitting_double = True546elif line.startswith('.space'):547self.add_sized(int(line.split()[1], 0), real_line)548elif line.startswith('.balign') or line.startswith('.align'):549align = int(line.split()[1])550if align != 4:551self.fail("only .balign 4 is supported", real_line)552self.align4()553elif line.startswith('.asci'):554z = (line.startswith('.asciz') or line.startswith('.asciiz'))555self.add_sized(self.count_quoted_size(line, z, real_line, output_enc), real_line)556elif line.startswith('.byte'):557self.add_sized(len(line.split(',')), real_line)558elif line.startswith('.half'):559self.align2()560self.add_sized(2*len(line.split(',')), real_line)561elif line.startswith('.'):562# .macro, ...563self.fail("asm directive not supported", real_line)564else:565# Unfortunately, macros are hard to support for .rodata --566# we don't know how how space they will expand to before567# running the assembler, but we need that information to568# construct the C code. So if we need that we'll either569# need to run the assembler twice (at least in some rare570# cases), or change how this program is invoked.571# Similarly, we can't currently deal with pseudo-instructions572# that expand to several real instructions.573if self.cur_section != '.text':574self.fail("instruction or macro call in non-.text section? not supported", real_line)575self.add_sized(4, real_line)576if self.cur_section == '.late_rodata':577if not changed_section:578if emitting_double:579self.late_rodata_asm_conts.append(".align 0")580self.late_rodata_asm_conts.append(real_line)581if emitting_double:582self.late_rodata_asm_conts.append(".align 2")583else:584self.asm_conts.append(real_line)585586def finish(self, state):587src = [''] * (self.num_lines + 1)588late_rodata_dummy_bytes = []589jtbl_rodata_size = 0590late_rodata_fn_output = []591592num_instr = self.fn_section_sizes['.text'] // 4593594if self.fn_section_sizes['.late_rodata'] > 0:595# Generate late rodata by emitting unique float constants.596# This requires 3 instructions for each 4 bytes of rodata.597# If we know alignment, we can use doubles, which give 3598# instructions for 8 bytes of rodata.599size = self.fn_section_sizes['.late_rodata'] // 4600skip_next = False601needs_double = (self.late_rodata_alignment != 0)602for i in range(size):603if skip_next:604skip_next = False605continue606# Jump tables give 9 instructions for >= 5 words of rodata, and should be607# emitted when:608# - -O2 or -O2 -g3 are used, which give the right codegen609# - we have emitted our first .float/.double (to ensure that we find the610# created rodata in the binary)611# - we have emitted our first .double, if any (to ensure alignment of doubles612# in shifted rodata sections)613# - we have at least 5 words of rodata left to emit (otherwise IDO does not614# generate a jump table)615# - we have at least 10 more instructions to go in this function (otherwise our616# function size computation will be wrong since the delay slot goes unused)617if (not needs_double and state.use_jtbl_for_rodata and i >= 1 and618size - i >= 5 and num_instr - len(late_rodata_fn_output) >= 10):619cases = " ".join("case {}:".format(case) for case in range(size - i))620late_rodata_fn_output.append("switch (*(volatile int*)0) { " + cases + " ; }")621late_rodata_fn_output.extend([""] * 8)622jtbl_rodata_size = (size - i) * 4623break624dummy_bytes = state.next_late_rodata_hex()625late_rodata_dummy_bytes.append(dummy_bytes)626if self.late_rodata_alignment == 4 * ((i + 1) % 2 + 1) and i + 1 < size:627dummy_bytes2 = state.next_late_rodata_hex()628late_rodata_dummy_bytes.append(dummy_bytes2)629fval, = struct.unpack('>d', dummy_bytes + dummy_bytes2)630late_rodata_fn_output.append('*(volatile double*)0 = {};'.format(fval))631skip_next = True632needs_double = True633else:634fval, = struct.unpack('>f', dummy_bytes)635late_rodata_fn_output.append('*(volatile float*)0 = {}f;'.format(fval))636late_rodata_fn_output.append('')637late_rodata_fn_output.append('')638639text_name = None640if self.fn_section_sizes['.text'] > 0 or late_rodata_fn_output:641text_name = state.make_name('func')642src[0] = 'void {}(void) {{'.format(text_name)643src[self.num_lines] = '}'644instr_count = self.fn_section_sizes['.text'] // 4645if instr_count < state.min_instr_count:646self.fail("too short .text block")647tot_emitted = 0648tot_skipped = 0649fn_emitted = 0650fn_skipped = 0651rodata_stack = late_rodata_fn_output[::-1]652for (line, count) in self.fn_ins_inds:653for _ in range(count):654if (fn_emitted > MAX_FN_SIZE and instr_count - tot_emitted > state.min_instr_count and655(not rodata_stack or rodata_stack[-1])):656# Don't let functions become too large. When a function reaches 284657# instructions, and -O2 -framepointer flags are passed, the IRIX658# compiler decides it is a great idea to start optimizing more.659fn_emitted = 0660fn_skipped = 0661src[line] += ' }} void {}(void) {{ '.format(state.make_name('large_func'))662if fn_skipped < state.skip_instr_count:663fn_skipped += 1664tot_skipped += 1665elif rodata_stack:666src[line] += rodata_stack.pop()667else:668src[line] += '*(volatile int*)0 = 0;'669tot_emitted += 1670fn_emitted += 1671if rodata_stack:672size = len(late_rodata_fn_output) // 3673available = instr_count - tot_skipped674self.fail(675"late rodata to text ratio is too high: {} / {} must be <= 1/3\n"676"add .late_rodata_alignment (4|8) to the .late_rodata "677"block to double the allowed ratio."678.format(size, available))679680rodata_name = None681if self.fn_section_sizes['.rodata'] > 0:682rodata_name = state.make_name('rodata')683src[self.num_lines] += ' const char {}[{}] = {{1}};'.format(rodata_name, self.fn_section_sizes['.rodata'])684685data_name = None686if self.fn_section_sizes['.data'] > 0:687data_name = state.make_name('data')688src[self.num_lines] += ' char {}[{}] = {{1}};'.format(data_name, self.fn_section_sizes['.data'])689690bss_name = None691if self.fn_section_sizes['.bss'] > 0:692bss_name = state.make_name('bss')693src[self.num_lines] += ' char {}[{}];'.format(bss_name, self.fn_section_sizes['.bss'])694695fn = Function(696text_glabels=self.text_glabels,697asm_conts=self.asm_conts,698late_rodata_dummy_bytes=late_rodata_dummy_bytes,699jtbl_rodata_size=jtbl_rodata_size,700late_rodata_asm_conts=self.late_rodata_asm_conts,701fn_desc=self.fn_desc,702data={703'.text': (text_name, self.fn_section_sizes['.text']),704'.data': (data_name, self.fn_section_sizes['.data']),705'.rodata': (rodata_name, self.fn_section_sizes['.rodata']),706'.bss': (bss_name, self.fn_section_sizes['.bss']),707})708return src, fn709710cutscene_data_regexpr = re.compile(r"CutsceneData (.|\n)*\[\] = {")711float_regexpr = re.compile(r"[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?f")712713def repl_float_hex(m):714return str(struct.unpack(">I", struct.pack(">f", float(m.group(0).strip().rstrip("f"))))[0])715716def parse_source(f, opt, framepointer, input_enc, output_enc, print_source=None):717if opt in ['O2', 'O1']:718if framepointer:719min_instr_count = 6720skip_instr_count = 5721else:722min_instr_count = 2723skip_instr_count = 1724elif opt == 'g':725if framepointer:726min_instr_count = 7727skip_instr_count = 7728else:729min_instr_count = 4730skip_instr_count = 4731else:732if opt != 'g3':733raise Failure("must pass one of -g, -O1, -O2, -O2 -g3")734if framepointer:735min_instr_count = 4736skip_instr_count = 4737else:738min_instr_count = 2739skip_instr_count = 2740741use_jtbl_for_rodata = False742if opt in ['O2', 'g3'] and not framepointer:743use_jtbl_for_rodata = True744745state = GlobalState(min_instr_count, skip_instr_count, use_jtbl_for_rodata)746747global_asm = None748asm_functions = []749output_lines = []750751is_cutscene_data = False752753for line_no, raw_line in enumerate(f, 1):754raw_line = raw_line.rstrip()755line = raw_line.lstrip()756757# Print exactly one output line per source line, to make compiler758# errors have correct line numbers. These will be overridden with759# reasonable content further down.760output_lines.append('')761762if global_asm is not None:763if line.startswith(')'):764src, fn = global_asm.finish(state)765for i, line2 in enumerate(src):766output_lines[start_index + i] = line2767asm_functions.append(fn)768global_asm = None769else:770global_asm.process_line(raw_line, output_enc)771else:772if line in ['GLOBAL_ASM(', '#pragma GLOBAL_ASM(']:773global_asm = GlobalAsmBlock("GLOBAL_ASM block at line " + str(line_no))774start_index = len(output_lines)775elif ((line.startswith('GLOBAL_ASM("') or line.startswith('#pragma GLOBAL_ASM("'))776and line.endswith('")')):777fname = line[line.index('(') + 2 : -2]778global_asm = GlobalAsmBlock(fname)779with open(fname, encoding=input_enc) as f:780for line2 in f:781global_asm.process_line(line2.rstrip(), output_enc)782src, fn = global_asm.finish(state)783output_lines[-1] = ''.join(src)784asm_functions.append(fn)785global_asm = None786elif line.startswith('#include "') and line.endswith('" EARLY'):787# C includes qualified with EARLY (i.e. #include "file.c" EARLY) will be788# processed recursively when encountered789fpath = os.path.dirname(f.name)790fname = line[line.index(' ') + 2 : -7]791include_src = StringIO()792with open(fpath + os.path.sep + fname, encoding=input_enc) as include_file:793parse_source(include_file, opt, framepointer, input_enc, output_enc, include_src)794output_lines[-1] = include_src.getvalue()795include_src.write('#line ' + str(line_no) + '\n')796include_src.close()797else:798# This is a hack to replace all floating-point numbers in an array of a particular type799# (in this case CutsceneData) with their corresponding IEEE-754 hexadecimal representation800if cutscene_data_regexpr.search(line) is not None:801is_cutscene_data = True802elif line.endswith("};"):803is_cutscene_data = False804if is_cutscene_data:805raw_line = re.sub(float_regexpr, repl_float_hex, raw_line)806output_lines[-1] = raw_line807808if print_source:809if isinstance(print_source, StringIO):810for line in output_lines:811print_source.write(line + '\n')812else:813for line in output_lines:814print_source.write(line.encode(output_enc) + b'\n')815print_source.flush()816if print_source != sys.stdout.buffer:817print_source.close()818819return asm_functions820821def fixup_objfile(objfile_name, functions, asm_prelude, assembler, output_enc):822SECTIONS = ['.data', '.text', '.rodata', '.bss']823824with open(objfile_name, 'rb') as f:825objfile = ElfFile(f.read())826827prev_locs = {828'.text': 0,829'.data': 0,830'.rodata': 0,831'.bss': 0,832}833to_copy = {834'.text': [],835'.data': [],836'.rodata': [],837'.bss': [],838}839asm = []840all_late_rodata_dummy_bytes = []841all_jtbl_rodata_size = []842late_rodata_asm = []843late_rodata_source_name_start = None844late_rodata_source_name_end = None845846# Generate an assembly file with all the assembly we need to fill in. For847# simplicity we pad with nops/.space so that addresses match exactly, so we848# don't have to fix up relocations/symbol references.849all_text_glabels = set()850for function in functions:851ifdefed = False852for sectype, (temp_name, size) in function.data.items():853if temp_name is None:854continue855assert size > 0856loc = objfile.symtab.find_symbol(temp_name)857if loc is None:858ifdefed = True859break860loc = loc[1]861prev_loc = prev_locs[sectype]862if loc < prev_loc:863raise Failure("Wrongly computed size for section {} (diff {}). This is an asm-processor bug!".format(sectype, prev_loc- loc))864if loc != prev_loc:865asm.append('.section ' + sectype)866if sectype == '.text':867for i in range((loc - prev_loc) // 4):868asm.append('nop')869else:870asm.append('.space {}'.format(loc - prev_loc))871to_copy[sectype].append((loc, size, temp_name, function.fn_desc))872prev_locs[sectype] = loc + size873if not ifdefed:874all_text_glabels.update(function.text_glabels)875all_late_rodata_dummy_bytes.append(function.late_rodata_dummy_bytes)876all_jtbl_rodata_size.append(function.jtbl_rodata_size)877late_rodata_asm.append(function.late_rodata_asm_conts)878for sectype, (temp_name, size) in function.data.items():879if temp_name is not None:880asm.append('.section ' + sectype)881asm.append('glabel ' + temp_name + '_asm_start')882asm.append('.text')883for line in function.asm_conts:884asm.append(line)885for sectype, (temp_name, size) in function.data.items():886if temp_name is not None:887asm.append('.section ' + sectype)888asm.append('glabel ' + temp_name + '_asm_end')889if any(late_rodata_asm):890late_rodata_source_name_start = '_asmpp_late_rodata_start'891late_rodata_source_name_end = '_asmpp_late_rodata_end'892asm.append('.rdata')893asm.append('glabel {}'.format(late_rodata_source_name_start))894for conts in late_rodata_asm:895asm.extend(conts)896asm.append('glabel {}'.format(late_rodata_source_name_end))897898o_file = tempfile.NamedTemporaryFile(prefix='asm-processor', suffix='.o', delete=False)899o_name = o_file.name900o_file.close()901s_file = tempfile.NamedTemporaryFile(prefix='asm-processor', suffix='.s', delete=False)902s_name = s_file.name903try:904s_file.write(asm_prelude + b'\n')905for line in asm:906s_file.write(line.encode(output_enc) + b'\n')907s_file.close()908ret = os.system(assembler + " " + s_name + " -o " + o_name)909if ret != 0:910raise Failure("failed to assemble")911with open(o_name, 'rb') as f:912asm_objfile = ElfFile(f.read())913914# Remove some clutter from objdump output915objfile.drop_irrelevant_sections()916917# Unify reginfo sections918target_reginfo = objfile.find_section('.reginfo')919source_reginfo_data = list(asm_objfile.find_section('.reginfo').data)920data = list(target_reginfo.data)921for i in range(20):922data[i] |= source_reginfo_data[i]923target_reginfo.data = bytes(data)924925# Move over section contents926modified_text_positions = set()927jtbl_rodata_positions = set()928last_rodata_pos = 0929for sectype in SECTIONS:930if not to_copy[sectype]:931continue932source = asm_objfile.find_section(sectype)933assert source is not None, "didn't find source section: " + sectype934for (pos, count, temp_name, fn_desc) in to_copy[sectype]:935loc1 = asm_objfile.symtab.find_symbol_in_section(temp_name + '_asm_start', source)936loc2 = asm_objfile.symtab.find_symbol_in_section(temp_name + '_asm_end', source)937assert loc1 == pos, "assembly and C files don't line up for section " + sectype + ", " + fn_desc938if loc2 - loc1 != count:939raise Failure("incorrectly computed size for section " + sectype + ", " + fn_desc + ". If using .double, make sure to provide explicit alignment padding.")940if sectype == '.bss':941continue942target = objfile.find_section(sectype)943assert target is not None, "missing target section of type " + sectype944data = list(target.data)945for (pos, count, _, _) in to_copy[sectype]:946data[pos:pos + count] = source.data[pos:pos + count]947if sectype == '.text':948assert count % 4 == 0949assert pos % 4 == 0950for i in range(count // 4):951modified_text_positions.add(pos + 4 * i)952elif sectype == '.rodata':953last_rodata_pos = pos + count954target.data = bytes(data)955956# Move over late rodata. This is heuristic, sadly, since I can't think957# of another way of doing it.958moved_late_rodata = {}959if any(all_late_rodata_dummy_bytes) or any(all_jtbl_rodata_size):960source = asm_objfile.find_section('.rodata')961target = objfile.find_section('.rodata')962source_pos = asm_objfile.symtab.find_symbol_in_section(late_rodata_source_name_start, source)963source_end = asm_objfile.symtab.find_symbol_in_section(late_rodata_source_name_end, source)964if source_end - source_pos != sum(map(len, all_late_rodata_dummy_bytes)) * 4 + sum(all_jtbl_rodata_size):965raise Failure("computed wrong size of .late_rodata")966new_data = list(target.data)967for dummy_bytes_list, jtbl_rodata_size in zip(all_late_rodata_dummy_bytes, all_jtbl_rodata_size):968for index, dummy_bytes in enumerate(dummy_bytes_list):969pos = target.data.index(dummy_bytes, last_rodata_pos)970# This check is nice, but makes time complexity worse for large files:971if SLOW_CHECKS and target.data.find(dummy_bytes, pos + 4) != -1:972raise Failure("multiple occurrences of late_rodata hex magic. Change asm-processor to use something better than 0xE0123456!")973if index == 0 and len(dummy_bytes_list) > 1 and target.data[pos+4:pos+8] == b'\0\0\0\0':974# Ugly hack to handle double alignment for non-matching builds.975# We were told by .late_rodata_alignment (or deduced from a .double)976# that a function's late_rodata started out 4 (mod 8), and emitted977# a float and then a double. But it was actually 0 (mod 8), so our978# double was moved by 4 bytes. To make them adjacent to keep jump979# tables correct, move the float by 4 bytes as well.980new_data[pos:pos+4] = b'\0\0\0\0'981pos += 4982new_data[pos:pos+4] = source.data[source_pos:source_pos+4]983moved_late_rodata[source_pos] = pos984last_rodata_pos = pos + 4985source_pos += 4986if jtbl_rodata_size > 0:987assert dummy_bytes_list, "should always have dummy bytes before jtbl data"988pos = last_rodata_pos989new_data[pos : pos + jtbl_rodata_size] = \990source.data[source_pos : source_pos + jtbl_rodata_size]991for i in range(0, jtbl_rodata_size, 4):992moved_late_rodata[source_pos + i] = pos + i993jtbl_rodata_positions.add(pos + i)994last_rodata_pos += jtbl_rodata_size995source_pos += jtbl_rodata_size996target.data = bytes(new_data)997998# Merge strtab data.999strtab_adj = len(objfile.symtab.strtab.data)1000objfile.symtab.strtab.data += asm_objfile.symtab.strtab.data10011002# Find relocated symbols1003relocated_symbols = set()1004for sectype in SECTIONS:1005for obj in [asm_objfile, objfile]:1006sec = obj.find_section(sectype)1007if sec is None:1008continue1009for reltab in sec.relocated_by:1010for rel in reltab.relocations:1011relocated_symbols.add(obj.symtab.symbol_entries[rel.sym_index])10121013# Move over symbols, deleting the temporary function labels.1014# Sometimes this naive procedure results in duplicate symbols, or UNDEF1015# symbols that are also defined the same .o file. Hopefully that's fine.1016# Skip over local symbols that aren't used relocated against, to avoid1017# conflicts.1018new_local_syms = [s for s in objfile.symtab.local_symbols() if not is_temp_name(s.name)]1019new_global_syms = [s for s in objfile.symtab.global_symbols() if not is_temp_name(s.name)]1020for i, s in enumerate(asm_objfile.symtab.symbol_entries):1021is_local = (i < asm_objfile.symtab.sh_info)1022if is_local and s not in relocated_symbols:1023continue1024if is_temp_name(s.name):1025continue1026if s.st_shndx not in [SHN_UNDEF, SHN_ABS]:1027section_name = asm_objfile.sections[s.st_shndx].name1028if section_name not in SECTIONS:1029raise Failure("generated assembly .o must only have symbols for .text, .data, .rodata, ABS and UNDEF, but found " + section_name)1030s.st_shndx = objfile.find_section(section_name).index1031# glabel's aren't marked as functions, making objdump output confusing. Fix that.1032if s.name in all_text_glabels:1033s.type = STT_FUNC1034if objfile.sections[s.st_shndx].name == '.rodata' and s.st_value in moved_late_rodata:1035s.st_value = moved_late_rodata[s.st_value]1036s.st_name += strtab_adj1037if is_local:1038new_local_syms.append(s)1039else:1040new_global_syms.append(s)1041new_syms = new_local_syms + new_global_syms1042for i, s in enumerate(new_syms):1043s.new_index = i1044objfile.symtab.data = b''.join(s.to_bin() for s in new_syms)1045objfile.symtab.sh_info = len(new_local_syms)10461047# Move over relocations1048for sectype in SECTIONS:1049source = asm_objfile.find_section(sectype)1050target = objfile.find_section(sectype)10511052if target is not None:1053# fixup relocation symbol indices, since we butchered them above1054for reltab in target.relocated_by:1055nrels = []1056for rel in reltab.relocations:1057if (sectype == '.text' and rel.r_offset in modified_text_positions or1058sectype == '.rodata' and rel.r_offset in jtbl_rodata_positions):1059# don't include relocations for late_rodata dummy code1060continue1061# hopefully we don't have relocations for local or1062# temporary symbols, so new_index exists1063rel.sym_index = objfile.symtab.symbol_entries[rel.sym_index].new_index1064nrels.append(rel)1065reltab.relocations = nrels1066reltab.data = b''.join(rel.to_bin() for rel in nrels)10671068if not source:1069continue10701071target_reltab = objfile.find_section('.rel' + sectype)1072target_reltaba = objfile.find_section('.rela' + sectype)1073for reltab in source.relocated_by:1074for rel in reltab.relocations:1075rel.sym_index = asm_objfile.symtab.symbol_entries[rel.sym_index].new_index1076if sectype == '.rodata' and rel.r_offset in moved_late_rodata:1077rel.r_offset = moved_late_rodata[rel.r_offset]1078new_data = b''.join(rel.to_bin() for rel in reltab.relocations)1079if reltab.sh_type == SHT_REL:1080if not target_reltab:1081target_reltab = objfile.add_section('.rel' + sectype,1082sh_type=SHT_REL, sh_flags=0,1083sh_link=objfile.symtab.index, sh_info=target.index,1084sh_addralign=4, sh_entsize=8, data=b'')1085target_reltab.data += new_data1086else:1087if not target_reltaba:1088target_reltaba = objfile.add_section('.rela' + sectype,1089sh_type=SHT_RELA, sh_flags=0,1090sh_link=objfile.symtab.index, sh_info=target.index,1091sh_addralign=4, sh_entsize=12, data=b'')1092target_reltaba.data += new_data10931094objfile.write(objfile_name)1095finally:1096s_file.close()1097os.remove(s_name)1098try:1099os.remove(o_name)1100except:1101pass11021103def run_wrapped(argv, outfile, functions):1104parser = argparse.ArgumentParser(description="Pre-process .c files and post-process .o files to enable embedding assembly into C.")1105parser.add_argument('filename', help="path to .c code")1106parser.add_argument('--post-process', dest='objfile', help="path to .o file to post-process")1107parser.add_argument('--assembler', dest='assembler', help="assembler command (e.g. \"mips-linux-gnu-as -march=vr4300 -mabi=32\")")1108parser.add_argument('--asm-prelude', dest='asm_prelude', help="path to a file containing a prelude to the assembly file (with .set and .macro directives, e.g.)")1109parser.add_argument('--input-enc', default='latin1', help="Input encoding (default: latin1)")1110parser.add_argument('--output-enc', default='latin1', help="Output encoding (default: latin1)")1111parser.add_argument('-framepointer', dest='framepointer', action='store_true')1112parser.add_argument('-g3', dest='g3', action='store_true')1113group = parser.add_mutually_exclusive_group(required=True)1114group.add_argument('-O1', dest='opt', action='store_const', const='O1')1115group.add_argument('-O2', dest='opt', action='store_const', const='O2')1116group.add_argument('-g', dest='opt', action='store_const', const='g')1117args = parser.parse_args(argv)1118opt = args.opt1119if args.g3:1120if opt != 'O2':1121raise Failure("-g3 is only supported together with -O2")1122opt = 'g3'11231124if args.objfile is None:1125with open(args.filename, encoding=args.input_enc) as f:1126return parse_source(f, opt=opt, framepointer=args.framepointer, input_enc=args.input_enc, output_enc=args.output_enc, print_source=outfile)1127else:1128if args.assembler is None:1129raise Failure("must pass assembler command")1130if functions is None:1131with open(args.filename, encoding=args.input_enc) as f:1132functions = parse_source(f, opt=opt, framepointer=args.framepointer, input_enc=args.input_enc, output_enc=args.output_enc)1133if not functions:1134return1135asm_prelude = b''1136if args.asm_prelude:1137with open(args.asm_prelude, 'rb') as f:1138asm_prelude = f.read()1139fixup_objfile(args.objfile, functions, asm_prelude, args.assembler, args.output_enc)11401141def run(argv, outfile=sys.stdout.buffer, functions=None):1142try:1143return run_wrapped(argv, outfile, functions)1144except Failure as e:1145print("Error:", e, file=sys.stderr)1146sys.exit(1)11471148if __name__ == "__main__":1149run(sys.argv[1:])115011511152