HarfBuzz text shaping engine
http://harfbuzz.github.io/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
53 lines
1.4 KiB
53 lines
1.4 KiB
#!/usr/bin/python |
|
|
|
import sys |
|
import re |
|
import unicodedata |
|
|
|
shorthands = { |
|
"ZERO WIDTH NON-JOINER": "ZWNJ", |
|
"ZERO WIDTH JOINER": "ZWJ", |
|
"NARROW NO-BREAK SPACE": "NNBSP", |
|
"COMBINING GRAPHEME JOINER": "CGJ", |
|
"LEFT-TO-RIGHT MARK": "LRM", |
|
"RIGHT-TO-LEFT MARK": "RLM", |
|
"LEFT-TO-RIGHT EMBEDDING": "LRE", |
|
"RIGHT-TO-LEFT EMBEDDING": "RLE", |
|
"POP DIRECTIONAL FORMATTING": "PDF", |
|
"LEFT-TO-RIGHT OVERRIDE": "LRO", |
|
"RIGHT-TO-LEFT OVERRIDE": "RLO", |
|
} |
|
def pretty_name (x): |
|
try: |
|
s = unicodedata.name (x) |
|
except ValueError: |
|
return "XXX" |
|
s = re.sub (".* LETTER ", "", s) |
|
s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s) |
|
s = re.sub (".* SIGN ", "", s) |
|
s = re.sub (".* COMBINING ", "", s) |
|
if re.match (".* VIRAMA", s): |
|
s = "HALANT" |
|
if s in shorthands: |
|
s = shorthands[s] |
|
return s |
|
|
|
|
|
def pretty_names (s): |
|
s = re.sub (r"[<+>\\uU]", " ", s) |
|
s = re.sub (r"0[xX]", " ", s) |
|
s = [unichr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)] |
|
return ' + '.join (pretty_name (x) for x in s) |
|
|
|
if __name__ == '__main__': |
|
|
|
if len (sys.argv) == 1 or ('--stdin' in sys.argv and len (sys.argv) != 2): |
|
print "Usage:\n %s UNICODE_CODEPOINTS...\nor:\n %s --stdin" % (sys.argv[0], sys.argv[0]) |
|
sys.exit (1) |
|
|
|
if '--stdin' in sys.argv: |
|
sys.argv.remove ('--stdin') |
|
for line in sys.stdin.readlines (): |
|
print pretty_names (line) |
|
else: |
|
print pretty_names (','.join (sys.argv[1:]))
|
|
|