Fix code point iteration in narrow Python

pull/584/head
David Corbett 7 years ago committed by Behdad Esfahbod
parent 33ca3b67bf
commit d8df714251
  1. 27
      test/shaping/hb_test_tools.py

@ -7,6 +7,9 @@ from itertools import *
diff_symbols = "-+=*&^%$#@!~/" diff_symbols = "-+=*&^%$#@!~/"
diff_colors = ['red', 'green', 'blue'] diff_colors = ['red', 'green', 'blue']
def codepoints(s):
return (ord (u) for u in s)
try: try:
unichr = unichr unichr = unichr
@ -43,6 +46,28 @@ try:
except UnicodeDecodeError: except UnicodeDecodeError:
raise ValueError('unichr() arg not in range(0x110000)') raise ValueError('unichr() arg not in range(0x110000)')
def codepoints(s):
high_surrogate = None
for u in s:
cp = ord (u)
if 0xDC00 <= cp <= 0xDFFF:
if high_surrogate:
yield 0x10000 + (high_surrogate - 0xD800) * 0x400 + (cp - 0xDC00)
high_surrogate = None
else:
yield 0xFFFC
else:
if high_surrogate:
yield 0xFFFC
high_surrogate = None
if 0xD800 <= cp <= 0xDBFF:
high_surrogate = cp
else:
yield cp
high_surrogate = None
if high_surrogate:
yield 0xFFFC
except NameError: except NameError:
unichr = chr unichr = chr
@ -456,7 +481,7 @@ class Unicode:
@staticmethod @staticmethod
def decode (s): def decode (s):
return u','.join ("U+%04X" % ord (u) for u in tounicode (s, 'utf-8')) return u','.join ("U+%04X" % cp for cp in codepoints (tounicode (s, 'utf-8')))
@staticmethod @staticmethod
def parse (s): def parse (s):

Loading…
Cancel
Save