|
|
|
#!/usr/bin/env python
|
|
|
|
|
|
|
|
import os, sys, re
|
|
|
|
|
|
|
|
finput=open(sys.argv[1], "rt")
|
|
|
|
|
|
|
|
# read the whole file content to s
|
|
|
|
s = "".join(finput.readlines())
|
|
|
|
finput.close()
|
|
|
|
|
|
|
|
# normalize line endings
|
|
|
|
s = re.sub(r"\r\n", "\n", s)
|
|
|
|
|
|
|
|
# remove trailing whitespaces
|
|
|
|
s = re.sub(r"[ \t]+\n", "\n", s)
|
|
|
|
|
|
|
|
# compress multiple empty lines
|
|
|
|
for i in range(5):
|
|
|
|
s = re.sub(r"\n\n\n", "\n\n", s)
|
|
|
|
|
|
|
|
# remove empty line before ".." that terminates a code block
|
|
|
|
s = re.sub(r"\n\n\.\.\n", "\n..\n", s)
|
|
|
|
|
|
|
|
# move :: starting a code block to the end of previous line
|
|
|
|
s = re.sub(r"\n\n::\n", " ::\n", s)
|
|
|
|
|
|
|
|
# remove extra line breaks before/after _ or ,
|
|
|
|
s = re.sub(r"\n[ \t]*([_,])\n", r"\1", s)
|
|
|
|
|
|
|
|
# remove extra line breaks after `
|
|
|
|
s = re.sub(r"`\n", "` ", s)
|
|
|
|
|
|
|
|
# remove extra line breaks before `
|
|
|
|
s = re.sub(r"\n[ \t]*`", " `", s)
|
|
|
|
|
|
|
|
# remove links to wiki
|
|
|
|
s = re.sub(r"\n[ \t]*`id=\d[^`]+`__\n", "", s)
|
|
|
|
|
|
|
|
# remove trailing whitespaces one more time
|
|
|
|
s = re.sub(r"[ \t]+\n", "\n", s)
|
|
|
|
|
|
|
|
foutput=open(sys.argv[2], "wt")
|
|
|
|
foutput.write(s)
|
|
|
|
foutput.close()
|