You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
151 lines
4.3 KiB
151 lines
4.3 KiB
#!/usr/bin/env python |
|
|
|
""" |
|
strip_asm.py - Cleanup ASM output for the specified file |
|
""" |
|
|
|
from argparse import ArgumentParser |
|
import sys |
|
import os |
|
import re |
|
|
|
def find_used_labels(asm): |
|
found = set() |
|
label_re = re.compile("\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)") |
|
for l in asm.splitlines(): |
|
m = label_re.match(l) |
|
if m: |
|
found.add('.L%s' % m.group(1)) |
|
return found |
|
|
|
|
|
def normalize_labels(asm): |
|
decls = set() |
|
label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") |
|
for l in asm.splitlines(): |
|
m = label_decl.match(l) |
|
if m: |
|
decls.add(m.group(0)) |
|
if len(decls) == 0: |
|
return asm |
|
needs_dot = next(iter(decls))[0] != '.' |
|
if not needs_dot: |
|
return asm |
|
for ld in decls: |
|
asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", '\\1.' + ld, asm) |
|
return asm |
|
|
|
|
|
def transform_labels(asm): |
|
asm = normalize_labels(asm) |
|
used_decls = find_used_labels(asm) |
|
new_asm = '' |
|
label_decl = re.compile("^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") |
|
for l in asm.splitlines(): |
|
m = label_decl.match(l) |
|
if not m or m.group(0) in used_decls: |
|
new_asm += l |
|
new_asm += '\n' |
|
return new_asm |
|
|
|
|
|
def is_identifier(tk): |
|
if len(tk) == 0: |
|
return False |
|
first = tk[0] |
|
if not first.isalpha() and first != '_': |
|
return False |
|
for i in range(1, len(tk)): |
|
c = tk[i] |
|
if not c.isalnum() and c != '_': |
|
return False |
|
return True |
|
|
|
def process_identifiers(l): |
|
""" |
|
process_identifiers - process all identifiers and modify them to have |
|
consistent names across all platforms; specifically across ELF and MachO. |
|
For example, MachO inserts an additional understore at the beginning of |
|
names. This function removes that. |
|
""" |
|
parts = re.split(r'([a-zA-Z0-9_]+)', l) |
|
new_line = '' |
|
for tk in parts: |
|
if is_identifier(tk): |
|
if tk.startswith('__Z'): |
|
tk = tk[1:] |
|
elif tk.startswith('_') and len(tk) > 1 and \ |
|
tk[1].isalpha() and tk[1] != 'Z': |
|
tk = tk[1:] |
|
new_line += tk |
|
return new_line |
|
|
|
|
|
def process_asm(asm): |
|
""" |
|
Strip the ASM of unwanted directives and lines |
|
""" |
|
new_contents = '' |
|
asm = transform_labels(asm) |
|
|
|
# TODO: Add more things we want to remove |
|
discard_regexes = [ |
|
re.compile("\s+\..*$"), # directive |
|
re.compile("\s*#(NO_APP|APP)$"), #inline ASM |
|
re.compile("\s*#.*$"), # comment line |
|
re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), #global directive |
|
re.compile("\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"), |
|
] |
|
keep_regexes = [ |
|
|
|
] |
|
fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:") |
|
for l in asm.splitlines(): |
|
# Remove Mach-O attribute |
|
l = l.replace('@GOTPCREL', '') |
|
add_line = True |
|
for reg in discard_regexes: |
|
if reg.match(l) is not None: |
|
add_line = False |
|
break |
|
for reg in keep_regexes: |
|
if reg.match(l) is not None: |
|
add_line = True |
|
break |
|
if add_line: |
|
if fn_label_def.match(l) and len(new_contents) != 0: |
|
new_contents += '\n' |
|
l = process_identifiers(l) |
|
new_contents += l |
|
new_contents += '\n' |
|
return new_contents |
|
|
|
def main(): |
|
parser = ArgumentParser( |
|
description='generate a stripped assembly file') |
|
parser.add_argument( |
|
'input', metavar='input', type=str, nargs=1, |
|
help='An input assembly file') |
|
parser.add_argument( |
|
'out', metavar='output', type=str, nargs=1, |
|
help='The output file') |
|
args, unknown_args = parser.parse_known_args() |
|
input = args.input[0] |
|
output = args.out[0] |
|
if not os.path.isfile(input): |
|
print(("ERROR: input file '%s' does not exist") % input) |
|
sys.exit(1) |
|
contents = None |
|
with open(input, 'r') as f: |
|
contents = f.read() |
|
new_contents = process_asm(contents) |
|
with open(output, 'w') as f: |
|
f.write(new_contents) |
|
|
|
|
|
if __name__ == '__main__': |
|
main() |
|
|
|
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 |
|
# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off; |
|
# kate: indent-mode python; remove-trailing-spaces modified;
|
|
|