[USE] Update the data files

This uses the data files from
<3254c94606/USE>
and closes #3817.
pull/3828/head
David Corbett 2 years ago committed by Behdad Esfahbod
parent b78d4b1e58
commit 3ca5fbda76
  1. 23
      src/gen-use-table.py
  2. 120
      src/hb-ot-shaper-use-table.hh
  3. 34
      src/hb-ot-shaper-vowel-constraints.cc
  4. 15
      src/ms-use/IndicPositionalCategory-Additional.txt
  5. 198
      src/ms-use/IndicShapingInvalidCluster.txt
  6. 91
      src/ms-use/IndicSyllabicCategory-Additional.txt

@ -134,6 +134,7 @@ property_names = [
'Number_Joiner',
'Number',
'Brahmi_Joining_Number',
'Symbol_Modifier',
'Hieroglyph',
'Hieroglyph_Joiner',
'Hieroglyph_Segment_Begin',
@ -214,8 +215,7 @@ def is_CONS_MED(U, UISC, UDI, UGC, AJT):
return (UISC == Consonant_Medial and UGC != Lo or
UISC == Consonant_Initial_Postfixed)
def is_CONS_MOD(U, UISC, UDI, UGC, AJT):
return (UISC in [Nukta, Gemination_Mark, Consonant_Killer] and
not is_SYM_MOD(U, UISC, UDI, UGC, AJT))
return UISC in [Nukta, Gemination_Mark, Consonant_Killer]
def is_CONS_SUB(U, UISC, UDI, UGC, AJT):
return UISC == Consonant_Subjoined and UGC != Lo
def is_CONS_WITH_STACKER(U, UISC, UDI, UGC, AJT):
@ -257,7 +257,7 @@ def is_SAKOT(U, UISC, UDI, UGC, AJT):
# Split off of HALANT
return U == 0x1A60
def is_SYM_MOD(U, UISC, UDI, UGC, AJT):
return U in [0x1B6B, 0x1B6C, 0x1B6D, 0x1B6E, 0x1B6F, 0x1B70, 0x1B71, 0x1B72, 0x1B73]
return UISC == Symbol_Modifier
def is_VOWEL(U, UISC, UDI, UGC, AJT):
return (UISC == Pure_Killer or
UGC != Lo and UISC in [Vowel, Vowel_Dependent])
@ -359,9 +359,6 @@ def map_to_use(data):
# TODO: These don't have UISC assigned in Unicode 13.0.0, but have UIPC
if 0x0F18 <= U <= 0x0F19 or 0x0F3E <= U <= 0x0F3F: UISC = Vowel_Dependent
# TODO: https://github.com/harfbuzz/harfbuzz/pull/627
if 0x1BF2 <= U <= 0x1BF3: UISC = Nukta; UIPC = Bottom
# TODO: U+1CED should only be allowed after some of
# the nasalization marks, maybe only for U+1CE9..U+1CF1.
if U == 0x1CED: UISC = Tone_Mark
@ -372,23 +369,9 @@ def map_to_use(data):
# Resolve Indic_Positional_Category
# TODO: These should die, but have UIPC in Unicode 13.0.0
if U in [0x953, 0x954]: UIPC = Not_Applicable
# TODO: These are not in USE's override list that we have, nor are they in Unicode 13.0.0
if 0xA926 <= U <= 0xA92A: UIPC = Top
# TODO: https://github.com/harfbuzz/harfbuzz/pull/1037
# and https://github.com/harfbuzz/harfbuzz/issues/1631
if U in [0x11302, 0x11303, 0x114C1]: UIPC = Top
if 0x1CF8 <= U <= 0x1CF9: UIPC = Top
# TODO: https://github.com/harfbuzz/harfbuzz/issues/3550
if U == 0x10A38: UIPC = Bottom
# TODO: https://github.com/harfbuzz/harfbuzz/pull/982
# also https://github.com/harfbuzz/harfbuzz/issues/1012
if 0x1112A <= U <= 0x1112B: UIPC = Top
if 0x11131 <= U <= 0x11132: UIPC = Top
assert (UIPC in [Not_Applicable, Visual_Order_Left] or U == 0x0F7F or
USE in use_positions), "%s %s %s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UDI, UGC, AJT)

@ -25,6 +25,7 @@
* # Updated for Unicode 12.1 by Andrew Glass 2019-05-24
* # Updated for Unicode 13.0 by Andrew Glass 2020-07-28
* # Updated for Unicode 14.0 by Andrew Glass 2021-09-25
* # Updated for Unicode 15.0 by Andrew Glass 2022-09-16
* # Override values For Indic_Positional_Category
* # Not derivable
* # Initial version based on Unicode 7.0 by Andrew Glass 2014-03-17
@ -34,6 +35,7 @@
* # Updated for Unicode 12.1 by Andrew Glass 2019-05-30
* # Updated for Unicode 13.0 by Andrew Glass 2020-07-28
* # Updated for Unicode 14.0 by Andrew Glass 2021-09-28
* # Updated for Unicode 15.0 by Andrew Glass 2022-09-16
* UnicodeData.txt does not have a header.
*/
@ -90,7 +92,7 @@
#pragma GCC diagnostic pop
static const uint8_t
hb_use_u8[3115] =
hb_use_u8[3141] =
{
16, 50, 51, 51, 51, 52, 51, 83, 118, 131, 51, 57, 58, 179, 195, 61,
51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51,
@ -125,11 +127,11 @@ hb_use_u8[3115] =
2, 2, 2, 2, 2, 2, 2, 2, 2, 88, 89, 2, 2, 2, 2, 2,
2, 2, 2, 90, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 91, 2, 2, 92, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 93, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 94, 94, 95, 96, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94,
94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94,
94, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
2, 2, 2, 91, 2, 2, 92, 2, 2, 2, 93, 2, 2, 2, 2, 2,
2, 2, 2, 94, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 95, 95, 96, 97, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95,
95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95,
95, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 2, 2, 2, 2, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 4,
0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 0, 0, 0, 0, 0, 0,
@ -226,70 +228,72 @@ hb_use_u8[3115] =
0, 9, 47, 2, 2, 2, 2, 2, 2, 2, 2, 2, 125, 18, 20, 151,
20, 19, 152, 153, 2, 2, 2, 2, 2, 0, 0, 63, 154, 0, 0, 0,
0, 2, 11, 0, 0, 0, 0, 0, 0, 2, 63, 23, 18, 18, 18, 20,
20, 106, 155, 0, 0, 156, 157, 29, 158, 28, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 21, 17, 20, 20, 159, 42, 0, 0, 0,
20, 106, 155, 0, 0, 54, 156, 29, 157, 28, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 21, 17, 20, 20, 158, 42, 0, 0, 0,
47, 125, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 7, 7, 2, 2,
28, 2, 2, 2, 2, 2, 2, 2, 28, 2, 2, 2, 2, 2, 2, 2,
8, 16, 17, 19, 20, 160, 29, 0, 0, 9, 9, 28, 2, 2, 2, 7,
8, 16, 17, 19, 20, 159, 29, 0, 0, 9, 9, 28, 2, 2, 2, 7,
28, 7, 2, 28, 2, 2, 56, 15, 21, 14, 21, 45, 30, 31, 30, 32,
0, 0, 0, 0, 33, 0, 0, 0, 2, 2, 21, 0, 9, 9, 9, 44,
0, 9, 9, 44, 0, 0, 0, 0, 0, 2, 2, 63, 23, 18, 18, 18,
20, 21, 123, 13, 15, 0, 0, 0, 0, 2, 2, 2, 2, 2, 0, 0,
161, 162, 0, 0, 0, 0, 0, 0, 0, 16, 17, 18, 18, 64, 97, 23,
158, 9, 163, 7, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2,
63, 23, 18, 18, 0, 46, 46, 9, 164, 35, 0, 0, 0, 0, 0, 0,
160, 161, 0, 0, 0, 0, 0, 0, 0, 16, 17, 18, 18, 64, 97, 23,
157, 9, 162, 7, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2,
63, 23, 18, 18, 0, 46, 46, 9, 163, 35, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 2, 2, 18, 0, 21, 17, 18, 18, 19, 14, 80,
164, 36, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 8, 165,
23, 18, 20, 20, 163, 7, 0, 0, 0, 2, 2, 2, 2, 2, 7, 41,
163, 36, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 8, 164,
23, 18, 20, 20, 162, 7, 0, 0, 0, 2, 2, 2, 2, 2, 7, 41,
133, 21, 20, 18, 74, 19, 20, 0, 0, 2, 2, 2, 7, 0, 0, 0,
0, 2, 2, 2, 2, 2, 2, 16, 17, 18, 19, 20, 103, 164, 35, 0,
0, 2, 2, 2, 2, 2, 2, 16, 17, 18, 19, 20, 103, 163, 35, 0,
0, 2, 2, 2, 7, 28, 0, 2, 2, 2, 2, 28, 7, 2, 2, 2,
2, 21, 21, 16, 30, 31, 10, 166, 167, 168, 169, 0, 0, 0, 0, 0,
2, 21, 21, 16, 30, 31, 10, 165, 166, 167, 168, 0, 0, 0, 0, 0,
0, 2, 2, 2, 2, 0, 2, 2, 2, 63, 23, 18, 18, 0, 20, 21,
27, 106, 0, 31, 0, 0, 0, 0, 0, 50, 18, 20, 20, 20, 137, 2,
2, 2, 170, 171, 9, 13, 172, 70, 173, 0, 0, 1, 144, 0, 0, 0,
0, 50, 18, 20, 14, 17, 18, 2, 2, 2, 2, 155, 155, 155, 174, 174,
174, 174, 174, 174, 13, 175, 0, 28, 0, 20, 18, 18, 29, 20, 20, 9,
164, 0, 59, 59, 59, 59, 59, 59, 59, 64, 19, 80, 44, 0, 0, 0,
2, 2, 169, 170, 9, 13, 171, 70, 172, 0, 0, 1, 144, 0, 0, 0,
0, 50, 18, 20, 14, 17, 18, 2, 2, 2, 2, 155, 155, 155, 173, 173,
173, 173, 173, 173, 13, 174, 0, 28, 0, 20, 18, 18, 29, 20, 20, 9,
163, 0, 59, 59, 59, 59, 59, 59, 59, 64, 19, 80, 44, 0, 0, 0,
0, 2, 2, 2, 7, 2, 28, 2, 2, 50, 20, 20, 29, 0, 36, 20,
25, 9, 157, 176, 172, 0, 0, 0, 0, 2, 2, 2, 28, 7, 2, 2,
25, 9, 156, 175, 171, 0, 0, 0, 0, 2, 2, 2, 28, 7, 2, 2,
2, 2, 2, 2, 2, 2, 21, 21, 45, 20, 33, 80, 66, 0, 0, 0,
0, 2, 177, 64, 45, 0, 0, 0, 0, 9, 178, 2, 2, 2, 2, 2,
0, 2, 176, 64, 45, 0, 0, 0, 0, 9, 177, 2, 2, 2, 2, 2,
2, 2, 2, 21, 20, 18, 29, 0, 46, 14, 140, 0, 0, 0, 0, 0,
0, 179, 179, 179, 106, 7, 0, 0, 0, 9, 9, 9, 44, 0, 0, 0,
0, 2, 2, 2, 2, 2, 7, 0, 56, 180, 18, 18, 18, 18, 18, 18,
0, 178, 178, 178, 106, 179, 178, 0, 0, 145, 2, 2, 180, 114, 114, 114,
114, 114, 114, 114, 0, 0, 0, 0, 0, 9, 9, 9, 44, 0, 0, 0,
0, 2, 2, 2, 2, 2, 7, 0, 56, 181, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 0, 0, 0,
38, 114, 24, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0,
0, 2, 2, 2, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 0, 56,
35, 0, 4, 118, 118, 118, 119, 0, 0, 9, 9, 9, 47, 2, 2, 2,
0, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2,
44, 2, 2, 2, 2, 2, 2, 9, 9, 2, 2, 42, 42, 42, 90, 0,
0, O, O, O, GB, B, B, GB, O, O, WJ,FMPst,FMPst, O, CGJ, B,
O, B,VMAbv,VMAbv,VMAbv, O,VMAbv, B,CMBlw,CMBlw,CMBlw,VMAbv,VMPst, VAbv, VPst,CMBlw,
B, VPst, VPre, VPst, VBlw, VBlw, VBlw, VBlw, VAbv, VAbv, VAbv, VPst, VPst, VPst, H, VPre,
VPst,VMBlw, O, O, VAbv, GB,VMAbv,VMPst,VMPst, O, B, VBlw, O, O, VPre, VPre,
O, VPre, H, O, VPst,FMAbv, O,CMBlw, O, VAbv, O, VAbv, H, O,VMBlw,VMAbv,
CMAbv, GB, GB, O, MBlw,CMAbv,CMAbv, VPst, VAbv,VMAbv, O, VPst, O, VPre, VPre,VMAbv,
B, O, CS, CS,VMPst, B, VAbv, VAbv, B, R, O, HVM, O, O, FBlw, O,
CMAbv, O,CMBlw, VAbv, VBlw, B, SUB, SUB, SUB, O, SUB, SUB, O, FBlw, O, B,
VPst, VBlw, VPre,VMAbv,VMBlw,VMPst, IS, VAbv, MPst, MPre, MBlw, MBlw, B, MBlw, MBlw, VPst,
VMPst,VMPst, B, MBlw, VPst, VPre, VAbv, VAbv,VMPst,VMPst,VMBlw, B,VMPst, VBlw, VPst, CGJ,
CGJ, VPst,VMAbv,VMAbv,FMAbv, FAbv,CMAbv,FMAbv,VMAbv,FMAbv, VAbv, IS,FMAbv, B,FMAbv, B,
CGJ, WJ, CGJ, GB,CMAbv,CMAbv, B, GB, B, VAbv, SUB, FPst, FPst,VMBlw, FPst, FPst,
FBlw,VMAbv,FMBlw, VAbv, VPre, B, MPre, MBlw, SUB, FAbv, FAbv, MAbv, SUB, Sk, VPst, VAbv,
VMAbv,VMAbv, FAbv,CMAbv, VPst, H, B, O,SMAbv,SMBlw,SMAbv,SMAbv,SMAbv, VPst, IS, VBlw,
FAbv,VMPre,VMPre,FMAbv,CMBlw,VMBlw,VMBlw,VMAbv, CS, O,FMAbv, ZWNJ, CGJ, WJ, WJ, WJ,
O,FMPst, O, O, H, MPst, VPst, H,VMAbv, VAbv,VMBlw, B, VBlw, FPst, VPst, FAbv,
VMPst, B,CMAbv, VAbv, MBlw, MPst, MBlw, H, O, VBlw, MPst, MPre, MAbv, MBlw, O, B,
FAbv, FAbv, FPst, VBlw, B, B, VPre, O,VMPst, IS, O,VMPst, VBlw, VPst,VMBlw,VMBlw,
VMAbv, O, IS,VMBlw, B,VMPst,VMAbv,VMPst, CS, CS, B, N, N, O, HN, VPre,
VBlw, VAbv, IS,CMAbv, O, VPst, B, R, R, O,FMBlw,CMBlw, VAbv, VPre,VMAbv,VMAbv,
H, VAbv,CMBlw,FMAbv, B, CS, CS, H,CMBlw,VMPst, H,VMPst, VAbv,VMAbv, VPst, IS,
R, MPst, R, MPst,CMBlw, B,FMBlw, VBlw,VMAbv, R, MBlw, MBlw, GB, FBlw, FBlw,CMAbv,
IS, VBlw, IS, GB, VAbv, R,VMPst, H, H, O, VBlw,
44, 2, 2, 2, 2, 2, 2, 9, 9, 2, 2, 2, 2, 2, 2, 20,
20, 2, 2, 42, 42, 42, 90, 0, 0, O, O, O, GB, B, B, GB,
O, O, WJ,FMPst,FMPst, O, CGJ, B, O, B,VMAbv,VMAbv,VMAbv, O,VMAbv, B,
CMBlw,CMBlw,CMBlw,VMAbv,VMPst, VAbv, VPst,CMBlw, B, VPst, VPre, VPst, VBlw, VBlw, VBlw, VBlw,
VAbv, VAbv, VAbv, VPst, VPst, VPst, H, VPre, VPst,VMBlw, O, O, VAbv, GB,VMAbv,VMPst,
VMPst, O, B, VBlw, O, O, VPre, VPre, O, VPre, H, O, VPst,FMAbv, O,CMBlw,
O, VAbv, O, VAbv, H, O,VMBlw,VMAbv,CMAbv, GB, GB, O, MBlw,CMAbv,CMAbv, VPst,
VAbv,VMAbv, O, VPst, O, VPre, VPre,VMAbv, B, O, CS, CS,VMPst, B, VAbv, VAbv,
B, R, O, HVM, O, O,FMBlw, O,CMAbv, O,CMBlw, VAbv, VBlw, B, SUB, SUB,
SUB, O, SUB, SUB, O,FMBlw, O, B, VPst, VBlw, VPre,VMAbv,VMBlw,VMPst, IS, VAbv,
MPst, MPre, MBlw, MBlw, B, MBlw, MBlw, VPst,VMPst,VMPst, B, MBlw, VPst, VPre, VAbv, VAbv,
VMPst,VMPst,VMBlw, B,VMPst, VBlw, VPst, CGJ, CGJ, VPst,VMAbv,VMAbv,FMAbv, FAbv,CMAbv,FMAbv,
VMAbv,FMAbv, VAbv, IS,FMAbv, B,FMAbv, B, CGJ, WJ, CGJ, GB,CMAbv,CMAbv, B, GB,
B, VAbv, SUB, FPst, FPst,VMBlw, FPst, FPst, FBlw,VMAbv,FMBlw, VAbv, VPre, B, MPre, MBlw,
SUB, FAbv, FAbv, MAbv, SUB, Sk, VPst, VAbv,VMAbv,VMAbv, FAbv,CMAbv, VPst, H, B, O,
SMAbv,SMBlw,SMAbv,SMAbv,SMAbv, VPst, IS, VBlw, FAbv,VMPre,VMPre,FMAbv,CMBlw,VMBlw,VMBlw,VMAbv,
CS, O,FMAbv, ZWNJ, CGJ, WJ, WJ, WJ, O,FMPst, O, O, H, MPst, VPst, H,
VMAbv, VAbv,VMBlw, B, VBlw, FPst, VPst, FAbv,VMPst, B,CMAbv, VAbv, MBlw, MPst, MBlw, H,
O, VBlw, MPst, MPre, MAbv, MBlw, O, B, FAbv, FAbv, FPst, VBlw, B, B, VPre, O,
VMPst, IS, O,VMPst, VBlw, VPst,VMBlw,VMBlw,VMAbv, O, IS,VMBlw, B,VMPst,VMAbv,VMPst,
CS, CS, B, N, N, O, HN, VPre, VBlw, VAbv, IS,CMAbv, O, VPst, B, R,
R,CMBlw, VAbv, VPre,VMAbv,VMAbv, H, VAbv,CMBlw,FMAbv, B, CS, CS, H,CMBlw,VMPst,
H,VMPst, VAbv,VMAbv, VPst, IS, R, MPst, R, MPst,CMBlw, B,FMBlw, VBlw,VMAbv, R,
MBlw, MBlw, GB, FBlw, FBlw,CMAbv, IS, VBlw, IS, GB, VAbv, R,VMPst, H, H, B,
H, B,VMBlw, O, VBlw,
};
static const uint16_t
hb_use_u16[776] =
hb_use_u16[784] =
{
0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 3, 4, 0, 5, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0,
@ -332,14 +336,14 @@ hb_use_u16[776] =
9,242, 73,243, 0, 0, 0, 0,244, 9, 9,245,246, 2,247, 9,
248,249, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9,250,
251, 48, 9,252,253, 2, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 98,254, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,
9, 9, 9,255, 0, 0, 0, 0, 9, 9, 9, 9,256,257,258,258,
259,260, 0, 0, 0, 0,261, 0, 9, 9, 9, 9, 9,262, 0, 0,
9, 9, 9, 9, 9, 9,105, 70, 94,263, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0,264, 9, 9, 70,265,266, 0, 0, 0,
0, 9,267, 0, 9, 9,268, 2, 9, 9, 9, 9,269, 2, 0, 0,
129,129,129,129,129,129,129,129,160,160,160,160,160,160,160,160,
160,160,160,160,160,160,160,129,
9, 9, 9,254,255,256, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,
9, 9, 9,257, 0, 0, 0, 0, 9, 9, 9, 9,258,259,260,260,
261,262, 0, 0, 0, 0,263, 0, 9, 9, 9, 9, 9,264, 0, 0,
9, 9, 9, 9, 9, 9,105, 70, 94,265, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0,266, 9, 9, 70,267,268, 0, 0, 0,
0, 9,269, 0, 9, 9,270, 2, 0, 0, 0, 0, 0, 9,271, 2,
9, 9, 9, 9,272, 2, 0, 0,129,129,129,129,129,129,129,129,
160,160,160,160,160,160,160,160,160,160,160,160,160,160,160,129,
};
static inline unsigned
@ -350,7 +354,7 @@ hb_use_b4 (const uint8_t* a, unsigned i)
static inline uint_fast8_t
hb_use_get_category (unsigned u)
{
return u<921600u?hb_use_u8[2753+(((hb_use_u8[593+(((hb_use_u16[((hb_use_u8[113+(((hb_use_b4(hb_use_u8,u>>1>>3>>3>>5))<<5)+((u>>1>>3>>3)&31u))])<<3)+((u>>1>>3)&7u)])<<3)+((u>>1)&7u))])<<1)+((u)&1u))]:O;
return u<921600u?hb_use_u8[2777+(((hb_use_u8[593+(((hb_use_u16[((hb_use_u8[113+(((hb_use_b4(hb_use_u8,u>>1>>3>>3>>5))<<5)+((u>>1>>3>>3)&31u))])<<3)+((u>>1>>3)&7u)])<<3)+((u>>1)&7u))])<<1)+((u)&1u))]:O;
}
#undef B

@ -342,6 +342,40 @@ _hb_preprocess_text_vowel_constraints (const hb_ot_shape_plan_t *plan HB_UNUSED,
}
break;
case HB_SCRIPT_KHOJKI:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur ().codepoint)
{
case 0x11200u:
switch (buffer->cur (1).codepoint)
{
case 0x1122Cu: case 0x11231u: case 0x11233u:
matched = true;
break;
}
break;
case 0x11206u:
matched = 0x1122Cu == buffer->cur (1).codepoint;
break;
case 0x1122Cu:
switch (buffer->cur (1).codepoint)
{
case 0x11230u: case 0x11231u:
matched = true;
break;
}
break;
case 0x11240u:
matched = 0x1122Eu == buffer->cur (1).codepoint;
break;
}
(void) buffer->next_glyph ();
if (matched) _output_with_dotted_circle (buffer);
}
break;
case HB_SCRIPT_KHUDAWADI:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{

@ -7,6 +7,7 @@
# Updated for Unicode 12.1 by Andrew Glass 2019-05-30
# Updated for Unicode 13.0 by Andrew Glass 2020-07-28
# Updated for Unicode 14.0 by Andrew Glass 2021-09-28
# Updated for Unicode 15.0 by Andrew Glass 2022-09-16
# ================================================
# ================================================
@ -19,9 +20,12 @@
0F7A..0F7D ; Bottom # Mn [4] TIBETAN VOWEL SIGN E..TIBETAN VOWEL SIGN OO # Not really below, but need to override to fit into Universal model
0F80 ; Bottom # Mn TIBETAN VOWEL SIGN REVERSED I # Not really below, but need to override to fit into Universal model
A9BF ; Bottom # Mc JAVANESE CONSONANT SIGN CAKRA
10A38 ; Bottom # Mn KHAROSHTHI SIGN BAR ABOVE # Overriden, ccc controls order USE issue #26
11127..11129 ; Bottom # Mn [3] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN II
1112D ; Bottom # Mn CHAKMA VOWEL SIGN AI
11130 ; Bottom # Mn CHAKMA VOWEL SIGN OI
1BF2..1BF3 ; Bottom # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN # see USE issue #20
# ================================================
@ -42,12 +46,15 @@ A9BE ; Right # Mc JAVANESE CONSONANT SIGN PENGKAL # Reduced from
0F74 ; Top # Mn TIBETAN VOWEL SIGN U # Not really above, but need to override to fit into Universal model
1A18 ; Top # Mn BUGINESE VOWEL SIGN U # Workaround to allow below to occur before above by treating all below marks as above
AA35   ; Top # Mn       CHAM CONSONANT SIGN
1112A..1112B ; Top # Mn [2] CHAKMA VOWEL SIGN U..CHAKMA VOWEL SIGN UU # see USE issue #25
11131..11132 ; Top # Mn [2] CHAKMA O MARK..CHAKMA AU MARK # see USE issue #25
1E4EC..1E4EF ; Top # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH # 1E4EE is below, but made to for ccc
# ================================================
# Indic_Positional_Category=Top_And_Right
0E33 ; Top_And_Right # Lo THAI CHARACTER SARA AM # IMC has Right, which seems to be a mistake.
0EB3 ; Top_And_Right # Lo LAO VOWEL SIGN AM # IMC has Right, which seems to be a mistake.
0E33 ; Top_And_Right # Lo THAI CHARACTER SARA AM # IPC has Right, which seems to be a mistake.
0EB3 ; Top_And_Right # Lo LAO VOWEL SIGN AM # IPC has Right, which seems to be a mistake.
# ================================================
# ================================================
@ -72,6 +79,9 @@ AA35   ; Top # Mn       CHAM CONSONANT SIGN
16F4F ; Bottom # Mn MIAO SIGN CONSONANT MODIFIER BAR
16F51..16F87 ; Bottom # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI
16F8F..16F92 ; Bottom # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
#HIEROGLYPHS defined here while ISC is being used as a proxy for dedicated Hieroglyph cluster
13440 ; Bottom # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY
13447..13455 ; Bottom # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED
# ================================================
@ -84,6 +94,7 @@ AA35   ; Top # Mn       CHAM CONSONANT SIGN
07EB..07F3 ; Top # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
07FD ; Top # Mn NKO DANTAYALAN # Not really top, but assigned here to allow ccc to control mark order
1885..1886 ; Top # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
1CF8..1CF9 ; Top # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
10D24..10D27 ; Top # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
10EAB..10EAC ; Top # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
16B30..16B36 ; Top # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM

@ -8,98 +8,106 @@
#
# Scope: This file enumerates sequences of characters that should be treated as invalid clusters
0905 0946 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN SHORT E
0905 093E ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AA
0930 094D 0907 ; # DEVANAGARI LETTER RA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER I
0909 0941 ; # DEVANAGARI LETTER U, DEVANAGARI VOWEL SIGN U
090F 0945 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN CANDRA E
090F 0946 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN SHORT E
090F 0947 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN E
0905 0949 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN CANDRA O
0906 0945 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN CANDRA E
0905 094A ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN SHORT O
0906 0946 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN SHORT E
0905 094B ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN O
0906 0947 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN E
0905 094C ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AU
0906 0948 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN AI
0905 0945 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN CANDRA E
0905 093A ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN OE
0905 093B ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN OOE
0906 093A ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN OE
0905 094F ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AW
0905 0956 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN UE
0905 0957 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN UUE
0985 09BE ; # BENGALI LETTER A, BENGALI VOWEL SIGN AA
098B 09C3 ; # BENGALI LETTER VOCALIC R, BENGALI VOWEL SIGN VOCALIC R
098C 09E2 ; # BENGALI LETTER VOCALIC L, BENGALI VOWEL SIGN VOCALIC L
0A05 0A3E ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AA
0A72 0A3F ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN I
0A72 0A40 ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN II
0A73 0A41 ; # GURMUKHI URA, GURMUKHI VOWEL SIGN U
0A73 0A42 ; # GURMUKHI URA, GURMUKHI VOWEL SIGN UU
0A72 0A47 ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN EE
0A05 0A48 ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AI
0A73 0A4B ; # GURMUKHI URA, GURMUKHI VOWEL SIGN OO
0A05 0A4C ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AU
0A85 0ABE ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA
0A85 0AC5 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN CANDRA E
0A85 0AC7 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN E
0A85 0AC8 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AI
0A85 0AC9 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN CANDRA O
0A85 0ACB ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN O
0A85 0ABE 0AC5 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA, GUJARATI VOWEL SIGN CANDRA E
0A85 0ACC ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AU
0A85 0ABE 0AC8 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA, GUJARATI VOWEL SIGN AI
0AC5 0ABE ; # GUJARATI VOWEL SIGN CANDRA E, GUJARATI VOWEL SIGN AA
0B05 0B3E ; # ORIYA LETTER A, ORIYA VOWEL SIGN AA
0B0F 0B57 ; # ORIYA LETTER E, ORIYA AU LENGTH MARK
0B13 0B57 ; # ORIYA LETTER O, ORIYA AU LENGTH MARK
0B85 0BC2 ; # TAMIL LETTER A, TAMIL VOWEL SIGN UU
0C12 0C55 ; # TELUGU LETTER O, TELUGU LENGTH MARK
0C12 0C4C ; # TELUGU LETTER O, TELUGU VOWEL SIGN AU
0C3F 0C55 ; # TELUGU VOWEL SIGN I, TELUGU LENGTH MARK
0C46 0C55 ; # TELUGU VOWEL SIGN E, TELUGU LENGTH MARK
0C4A 0C55 ; # TELUGU VOWEL SIGN O, TELUGU LENGTH MARK
0C89 0CBE ; # KANNADA LETTER U, KANNADA VOWEL SIGN AA
0C92 0CCC ; # KANNADA LETTER O, KANNADA VOWEL SIGN AU
0C8B 0CBE ; # KANNADA LETTER VOCALIC R, KANNADA VOWEL SIGN AA
0D07 0D57 ; # MALAYALAM LETTER I, MALAYALAM AU LENGTH MARK
0D09 0D57 ; # MALAYALAM LETTER U, MALAYALAM AU LENGTH MARK
0D0E 0D46 ; # MALAYALAM LETTER E, MALAYALAM VOWEL SIGN E
0D12 0D3E ; # MALAYALAM LETTER O, MALAYALAM VOWEL SIGN AA
0D12 0D57 ; # MALAYALAM LETTER O, MALAYALAM AU LENGTH MARK
0D85 0DCF ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN AELA-PILLA
0D85 0DD0 ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN KETTI AEDA-PILLA
0D85 0DD1 ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN DIGA AEDA-PILLA
0D8B 0DDF ; # SINHALA LETTER UYANNA, SINHALA VOWEL SIGN GAYANUKITTA
0D8D 0DD8 ; # SINHALA LETTER IRUYANNA, SINHALA VOWEL SIGN GAETTA-PILLA
0D8F 0DDF ; # SINHALA LETTER ILUYANNA, SINHALA VOWEL SIGN GAYANUKITTA
0D91 0DCA ; # SINHALA LETTER EYANNA, SINHALA SIGN AL-LAKUNA
0D91 0DD9 ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA
0D91 0DDA ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN DIGA KOMBUVA
0D91 0DDC ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA AELA-PILLA
0D91 0DDD ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA DIGA AELA-PILLA
0D91 0DDE ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA GAYANUKITTA
0D94 0DDF ; # SINHALA LETTER OYANNA, SINHALA VOWEL SIGN GAYANUKITTA
11005 11038 ; # BRAHMI LETTER A, BRAHMI VOWEL SIGN AA
1100B 1103E ; # BRAHMI LETTER VOCALIC R, BRAHMI VOWEL SIGN VOCALIC R
1100F 11042 ; # BRAHMI LETTER E, BRAHMI VOWEL SIGN E
11680 116AD ; # TAKRI LETTER A, TAKRI VOWEL SIGN AA
11686 116B2 ; # TAKRI LETTER E, TAKRI VOWEL SIGN E
11680 116B4 ; # TAKRI LETTER A, TAKRI VOWEL SIGN O
11680 116B5 ; # TAKRI LETTER A, TAKRI VOWEL SIGN AU
112B0 112E0 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AA
112B0 112E5 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN E
112B0 112E6 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AI
112B0 112E7 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN O
112B0 112E8 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AU
11481 114B0 ; # TIRHUTA LETTER A, TIRHUTA VOWEL SIGN AA
114AA 114B5 ; # TIRHUTA LETTER LA, TIRHUTA VOWEL SIGN VOCALIC R
114AA 114B6 ; # TIRHUTA LETTER LA, TIRHUTA VOWEL SIGN VOCALIC RR
1148B 114BA ; # TIRHUTA LETTER E, TIRHUTA VOWEL SIGN SHORT E
1148D 114BA ; # TIRHUTA LETTER O, TIRHUTA VOWEL SIGN SHORT E
11600 11639 ; # MODI LETTER A, MODI VOWEL SIGN E
11600 1163A ; # MODI LETTER A, MODI VOWEL SIGN AI
11601 11639 ; # MODI LETTER AA, MODI VOWEL SIGN E
11601 1163A ; # MODI LETTER AA, MODI VOWEL SIGN AI
0905 0946 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN SHORT E
0905 093E ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AA
0930 094D 0907 ; # DEVANAGARI LETTER RA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER I
0909 0941 ; # DEVANAGARI LETTER U, DEVANAGARI VOWEL SIGN U
090F 0945 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN CANDRA E
090F 0946 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN SHORT E
090F 0947 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN E
0905 0949 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN CANDRA O
0906 0945 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN CANDRA E
0905 094A ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN SHORT O
0906 0946 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN SHORT E
0905 094B ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN O
0906 0947 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN E
0905 094C ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AU
0906 0948 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN AI
0905 0945 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN CANDRA E
0905 093A ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN OE
0905 093B ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN OOE
0906 093A ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN OE
0905 094F ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AW
0905 0956 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN UE
0905 0957 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN UUE
0985 09BE ; # BENGALI LETTER A, BENGALI VOWEL SIGN AA
098B 09C3 ; # BENGALI LETTER VOCALIC R, BENGALI VOWEL SIGN VOCALIC R
098C 09E2 ; # BENGALI LETTER VOCALIC L, BENGALI VOWEL SIGN VOCALIC L
0A05 0A3E ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AA
0A72 0A3F ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN I
0A72 0A40 ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN II
0A73 0A41 ; # GURMUKHI URA, GURMUKHI VOWEL SIGN U
0A73 0A42 ; # GURMUKHI URA, GURMUKHI VOWEL SIGN UU
0A72 0A47 ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN EE
0A05 0A48 ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AI
0A73 0A4B ; # GURMUKHI URA, GURMUKHI VOWEL SIGN OO
0A05 0A4C ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AU
0A85 0ABE ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA
0A85 0AC5 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN CANDRA E
0A85 0AC7 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN E
0A85 0AC8 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AI
0A85 0AC9 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN CANDRA O
0A85 0ACB ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN O
0A85 0ABE 0AC5 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA, GUJARATI VOWEL SIGN CANDRA E
0A85 0ACC ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AU
0A85 0ABE 0AC8 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA, GUJARATI VOWEL SIGN AI
0AC5 0ABE ; # GUJARATI VOWEL SIGN CANDRA E, GUJARATI VOWEL SIGN AA
0B05 0B3E ; # ORIYA LETTER A, ORIYA VOWEL SIGN AA
0B0F 0B57 ; # ORIYA LETTER E, ORIYA AU LENGTH MARK
0B13 0B57 ; # ORIYA LETTER O, ORIYA AU LENGTH MARK
0B85 0BC2 ; # TAMIL LETTER A, TAMIL VOWEL SIGN UU
0C12 0C55 ; # TELUGU LETTER O, TELUGU LENGTH MARK
0C12 0C4C ; # TELUGU LETTER O, TELUGU VOWEL SIGN AU
0C3F 0C55 ; # TELUGU VOWEL SIGN I, TELUGU LENGTH MARK
0C46 0C55 ; # TELUGU VOWEL SIGN E, TELUGU LENGTH MARK
0C4A 0C55 ; # TELUGU VOWEL SIGN O, TELUGU LENGTH MARK
0C89 0CBE ; # KANNADA LETTER U, KANNADA VOWEL SIGN AA
0C92 0CCC ; # KANNADA LETTER O, KANNADA VOWEL SIGN AU
0C8B 0CBE ; # KANNADA LETTER VOCALIC R, KANNADA VOWEL SIGN AA
0D07 0D57 ; # MALAYALAM LETTER I, MALAYALAM AU LENGTH MARK
0D09 0D57 ; # MALAYALAM LETTER U, MALAYALAM AU LENGTH MARK
0D0E 0D46 ; # MALAYALAM LETTER E, MALAYALAM VOWEL SIGN E
0D12 0D3E ; # MALAYALAM LETTER O, MALAYALAM VOWEL SIGN AA
0D12 0D57 ; # MALAYALAM LETTER O, MALAYALAM AU LENGTH MARK
0D85 0DCF ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN AELA-PILLA
0D85 0DD0 ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN KETTI AEDA-PILLA
0D85 0DD1 ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN DIGA AEDA-PILLA
0D8B 0DDF ; # SINHALA LETTER UYANNA, SINHALA VOWEL SIGN GAYANUKITTA
0D8D 0DD8 ; # SINHALA LETTER IRUYANNA, SINHALA VOWEL SIGN GAETTA-PILLA
0D8F 0DDF ; # SINHALA LETTER ILUYANNA, SINHALA VOWEL SIGN GAYANUKITTA
0D91 0DCA ; # SINHALA LETTER EYANNA, SINHALA SIGN AL-LAKUNA
0D91 0DD9 ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA
0D91 0DDA ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN DIGA KOMBUVA
0D91 0DDC ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA AELA-PILLA
0D91 0DDD ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA DIGA AELA-PILLA
0D91 0DDE ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA GAYANUKITTA
0D94 0DDF ; # SINHALA LETTER OYANNA, SINHALA VOWEL SIGN GAYANUKITTA
11005 11038 ; # BRAHMI LETTER A, BRAHMI VOWEL SIGN AA
1100B 1103E ; # BRAHMI LETTER VOCALIC R, BRAHMI VOWEL SIGN VOCALIC R
1100F 11042 ; # BRAHMI LETTER E, BRAHMI VOWEL SIGN E
11680 116AD ; # TAKRI LETTER A, TAKRI VOWEL SIGN AA
11686 116B2 ; # TAKRI LETTER E, TAKRI VOWEL SIGN E
11680 116B4 ; # TAKRI LETTER A, TAKRI VOWEL SIGN O
11680 116B5 ; # TAKRI LETTER A, TAKRI VOWEL SIGN AU
11200 1122C ; # KHOJKI LETTER A, KHOJKI VOWEL SIGN AA
11240 1122E ; # KHOJKI LETTER SHORT I, KHOJKI VOWEL SIGN II
11206 1122C ; # KHOJKI LETTER O, KHOJKI VOWEL SIGN AA
11200 11231 ; # KHOJKI LETTER A, KHOJKI VOWEL SIGN AI
11200 11233 ; # KHOJKI LETTER A, KHOJKI VOWEL SIGN AU
11200 1122C 11231 ; # KHOJKI LETTER A, KHOJKI VOWEL SIGN AA, KHOJKI VOWEL SIGN AI
1122C 11230 ; # KHOJKI VOWEL SIGN AA, KHOJKI VOWEL SIGN E
1122C 11231 ; # KHOJKI VOWEL SIGN AA, KHOJKI VOWEL SIGN AI
112B0 112E0 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AA
112B0 112E5 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN E
112B0 112E6 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AI
112B0 112E7 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN O
112B0 112E8 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AU
11481 114B0 ; # TIRHUTA LETTER A, TIRHUTA VOWEL SIGN AA
114AA 114B5 ; # TIRHUTA LETTER LA, TIRHUTA VOWEL SIGN VOCALIC R
114AA 114B6 ; # TIRHUTA LETTER LA, TIRHUTA VOWEL SIGN VOCALIC RR
1148B 114BA ; # TIRHUTA LETTER E, TIRHUTA VOWEL SIGN SHORT E
1148D 114BA ; # TIRHUTA LETTER O, TIRHUTA VOWEL SIGN SHORT E
11600 11639 ; # MODI LETTER A, MODI VOWEL SIGN E
11600 1163A ; # MODI LETTER A, MODI VOWEL SIGN AI
11601 11639 ; # MODI LETTER AA, MODI VOWEL SIGN E
11601 1163A ; # MODI LETTER AA, MODI VOWEL SIGN AI

@ -5,6 +5,7 @@
# Updated for Unicode 12.1 by Andrew Glass 2019-05-24
# Updated for Unicode 13.0 by Andrew Glass 2020-07-28
# Updated for Unicode 14.0 by Andrew Glass 2021-09-25
# Updated for Unicode 15.0 by Andrew Glass 2022-09-16
# ================================================
# OVERRIDES TO ASSIGNED VALUES
@ -18,23 +19,13 @@ AA29 ; Bindu # Mn  CHAM VOWEL SIGN AA
# ================================================
# Indic_Syllabic_Category=Consonant
0840..0858 ; Consonant # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN
0F00..0F01 ; Consonant # Lo [2] TIBETAN SYLLABLE OM..TIBETAN MARK GTER YIG MGO TRUNCATED
0F04..0F06 ; Consonant # Po TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK CARET YIG MGO PHUR SHAD MA
19C1..19C7 ; Consonant # Lo [7] NEW TAI LUE LETTER FINAL V..NEW TAI LUE LETTER FINAL B # Reassigned to avoid clustering with a base consonant
25CC ; Consonant # So DOTTED CIRCLE
25CC ; Consonant # So DOTTED CIRCLE #Reassigned to allow it to cluster as a generic base
# ================================================
# Indic_Syllabic_Category=Consonant_Dead
0F7F ; Consonant_Dead # Mc TIBETAN SIGN RNAM BCAD # reassigned so that visarga will form an independent cluster
# ================================================
# Indic_Syllabic_Category=Consonant_Final
0F35 ; Consonant_Final # Mn TIBETAN MARK NGAS BZUNG NYI ZLA
0F37 ; Consonant_Final # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS
0FC6 ; Consonant_Final # Mn TIBETAN SYMBOL PADMA GDAN
0F7F ; Consonant_Dead # Mc TIBETAN SIGN RNAM BCAD # reassigned so that visarga can form an independent cluster, but see #19
# ================================================
@ -49,8 +40,8 @@ AA29 ; Bindu # Mn  CHAM VOWEL SIGN AA
# ================================================
# Indic_Syllabic_Category=Nukta
0F71 ; Nukta # Mn TIBETAN VOWEL SIGN AA # Reassigned to get this before an above vowel
10A38..10A3A ; Nukta # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
0F71 ; Nukta # Mn TIBETAN VOWEL SIGN AA # Reassigned to get this before an above vowel, but see #22
1BF2..1BF3 ; Nukta # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN # see USE issue #20
# ================================================
@ -73,6 +64,9 @@ AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN
# Indic_Syllabic_Category=Consonant
0800..0815 ; Consonant # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF
0840..0858 ; Consonant # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN
0F00..0F01 ; Consonant # Lo [2] TIBETAN SYLLABLE OM..TIBETAN MARK GTER YIG MGO TRUNCATED
0F04..0F06 ; Consonant # Po TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK CARET YIG MGO PHUR SHAD MA
1800 ; Consonant # Po MONGOLIAN BIRGA # Reassigned so that legacy Birga + MFVS sequences still work
1807 ; Consonant # Po MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER
180A ; Consonant # Po MONGOLIAN NIRUGU
@ -94,11 +88,13 @@ AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN
10E80..10EA9 ; Consonant # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
10EB0..10EB1 ; Consonant # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
10F30..10F45 ; Consonant # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
10F70..10F81 ; Consonant # Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH
111DA ; Consonant # Lo SHARADA EKAM
#HIEROGLYPHS to be moved to new category
13000..1342E ; Consonant # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032
13000..1342F ; Consonant # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D
#For the Begin and End segment to be handled fully correctly, the cluster model needs to be modified.
13437..13438 ; Consonant # Lo [2] EGYPTIAN HIEROGLYPH BEGIN SEGMENT..EGYPTIAN HIEROGLYPH END SEGMENT
13441..13446 ; Consonant # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..HIEROGLYPH WIDE LOST SIGN
16B00..16B2F ; Consonant # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU
16F00..16F4A ; Consonant # Lo [75] MIAO LETTER PA..MIAO LETTER RTE
16FE4 ; Consonant # Mn KHITAN SMALL SCRIPT FILLER # Avoids Mn pushing this into VOWEL class
@ -113,6 +109,8 @@ AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN
1E14F ; Consonant # So NYIAKENG PUACHUE HMONG CIRCLED CA
1E290..1E2AD ; Consonant # Lo [30] TOTO LETTER PA..TOTO LETTER A
1E2C0..1E2EB ; Consonant # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH
1E4D0..1E4EA ; Consonant # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL
1E4EB ; Consonant # Lm NAG MUNDARI SIGN OJOD
1E900..1E921 ; Consonant # Lu [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA
1E922..1E943 ; Consonant # Ll [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA
1E94B ; Consonant # Lm ADLAM NASALIZATION MARK
@ -140,7 +138,6 @@ FE00..FE0F ; Modifying_Letter # Mn [16] VARIATION SELECTOR-1..VARIATION SEL
0F39 ; Nukta # Mn TIBETAN MARK TSA -PHRU # NOW IN UNICODE 10.0
1885..1886 ; Nukta # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
18A9 ; Nukta # Mn MONGOLIAN LETTER ALI GALI DAGALGA
1B6B..1B73 ; Nukta # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
10AE5..10AE6 ; Nukta # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
16F4F ; Nukta # Mn MIAO SIGN CONSONANT MODIFIER BAR
1BC9D..1BC9E ; Nukta # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
@ -155,6 +152,7 @@ FE00..FE0F ; Modifying_Letter # Mn [16] VARIATION SELECTOR-1..VARIATION SEL
16AC0..16AC9 ; Number # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE
1E140..1E149 ; Number # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE
1E2F0..1E2F9 ; Number # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE
1E4F0..1E4F9 ; Number # Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE
1E950..1E959 ; Number # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
# ================================================
@ -176,7 +174,9 @@ FE00..FE0F ; Modifying_Letter # Mn [16] VARIATION SELECTOR-1..VARIATION SEL
# Indic_Syllabic_Category=Virama
2D7F ; Virama # Mn TIFINAGH CONSONANT JOINER
#HIEROGLYPHS to be moved to new category
13430..13436 ; Virama # Cf [7] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH OVERLAY MIDDLE
13439..1343B ; Virama # Cf [3] EGYPTIAN HIEROGLYPH INSERT AT MIDDLE..EGYPTIAN HIEROGLYPH INSERT AT BOTTOM
# ================================================
@ -191,6 +191,21 @@ AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN
0B55 ; Vowel_Dependent # Mn ORIYA SIGN OVERLINE
10EAB..10EAC ; Vowel_Dependent # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
16F51..16F87 ; Vowel_Dependent # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI
1E4EC..1E4EF ; Vowel_Dependent # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH
# ================================================
# Indic_Syllabic_Category=Cantillation_Mark
1CF8..1CF9 ; Cantillation_Mark # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
#HIEROGLYPHS to be moved to new category
13440 ; Cantillation_Mark # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY
13447..13455 ; Cantillation_Mark # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED
# ================================================
# Indic_Syllabic_Category=Symbol_Modifier
1B6B..1B73 ; Symbol_Modifier # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
# ================================================
# ================================================
@ -198,24 +213,56 @@ AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN
# ================================================
# ================================================
# USE_Syllabic_Category=Hieroglyph
# 13000..1342E ; Hieroglyph # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032
# USE, Extended_Syllabic_Category=Hieroglyph
# 13000..1342F ; Hieroglyph # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D
# 13441..13446 ; Hieroglyph # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..HIEROGLYPH WIDE LOST SIGN
# ================================================
# USE_Syllabic_Category=Hieroglyph_Joiner
# 13430..13436 ; Hieroglyph_Joiner # Cf EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH OVERLAY MIDDLE
# USE, Extended_Syllabic_Category=Hieroglyph_Joiner
# 13430..13436 ; Hieroglyph_Joiner # Cf [7] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH OVERLAY MIDDLE
# 13439..1343B ; Hieroglyph_Joiner # Cf [3] EGYPTIAN HIEROGLYPH INSERT AT MIDDLE..EGYPTIAN HIEROGLYPH INSERT AT BOTTOM
# ================================================
# USE_Syllabic_Category= Hieroglyph_Segment_Begin
# USE, Extended_Syllabic_Category=Hieroglyph_Mark_Begin
# 005B ; Hieroglyph_Mark_Begin # Ps LEFT SQUARE BRACKET
# 007B ; Hieroglyph_Mark_Begin # Ps LEFT CURLY BRACKET
# 27E6 ; Hieroglyph_Mark_Begin # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET
# 27E8 ; Hieroglyph_Mark_Begin # Ps MATHEMATICAL LEFT ANGLE BRACKET
# 2E22 ; Hieroglyph_Mark_Begin # Ps TOP LEFT HALF BRACKET
# 2E24 ; Hieroglyph_Mark_Begin # Ps BOTTOM LEFT HALF BRACKET
# ================================================
# USE, Extended_Syllabic_Category=Hieroglyph_Mark_End
# 005D ; Hieroglyph_Mark_Begin # Pe RIGHT SQUARE BRACKET
# 007D ; Hieroglyph_Mark_Begin # Pe RIGHT CURLY BRACKET
# 27E7 ; Hieroglyph_Mark_Begin # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET
# 27E9 ; Hieroglyph_Mark_Begin # Pe MATHEMATICAL RIGHT ANGLE BRACKET
# 2E23 ; Hieroglyph_Mark_Begin # Pe TOP RIGHT HALF BRACKET
# 2E25 ; Hieroglyph_Mark_Begin # Pe BOTTOM RIGHT HALF BRACKET
# ================================================
# USE, Extended_Syllabic_Category=Hieroglyph_Segment_Begin
# 13437 ; Hieroglyph_Segment_Begin # Cf EGYPTIAN HIEROGLYPH BEGIN SEGMENT
# ================================================
# USE_Syllabic_Category= Hieroglyph_Segment_End
# USE, Extended_Syllabic_Category=Hieroglyph_Segment_End
# 13438 ; Hieroglyph_Segment_End # Cf EGYPTIAN HIEROGLYPH END SEGMENT
# ================================================
# USE, Extended_Syllabic_Category=Hieroglyph_Mirror
# 13440 ; Hieroglyph_Mirror # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY
# ================================================
# USE, Extended_Syllabic_Category=Hieroglyph_Modifier
# 13447..13455 ; Hieroglyph_Modifier # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED
# ================================================
# eof

Loading…
Cancel
Save