[USE, Unicode 16] Update the data files

This uses the data files from
<0fbf022acc/USE>
and closes #4858.
pull/4859/head
David Corbett 5 months ago
parent 98353ecefe
commit a5c9cc4e22
  1. 8
      src/ms-use/IndicPositionalCategory-Additional.txt
  2. 34
      src/ms-use/IndicSyllabicCategory-Additional.txt

@ -9,6 +9,7 @@
# Updated for Unicode 14.0 by Andrew Glass 2021-09-28
# Updated for Unicode 15.0 by Andrew Glass 2022-09-16
# Updated for Unicode 15.1 by Andrew Glass 2023-09-14
# Updated for Unicode 16.0 by Andrew Glass 2024-09-11
# ================================================
# ================================================
@ -27,7 +28,6 @@ A9BF ; Bottom # Mc JAVANESE CONSONANT SIGN CAKRA
11130 ; Bottom # Mn CHAKMA VOWEL SIGN OI
1BF2..1BF3 ; Bottom # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN # see USE issue #20
# ================================================
# Indic_Positional_Category=Left
@ -80,9 +80,8 @@ AA35   ; Top # Mn       CHAM CONSONANT SIGN
16F4F ; Bottom # Mn MIAO SIGN CONSONANT MODIFIER BAR
16F51..16F87 ; Bottom # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI
16F8F..16F92 ; Bottom # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
#HIEROGLYPHS defined here while ISC is being used as a proxy for dedicated Hieroglyph cluster
13440 ; Bottom # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY
13447..13455 ; Bottom # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED
1E5EE ; Bottom # Mn OL ONAL SIGN MU # Not really below, but need to override to fit into Universal model
1E5EF ; Bottom # Mn OL ONAL SIGN IKIR
# ================================================
@ -98,6 +97,7 @@ AA35   ; Top # Mn       CHAM CONSONANT SIGN
1CF8..1CF9 ; Top # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
10D24..10D27 ; Top # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
10EAB..10EAC ; Top # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
10D69..10D6D ; Top # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK
16B30..16B36 ; Top # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
1E130..1E136 ; Top # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
1E2AE ; Top # Mn TOTO SIGN RISING TONE

@ -7,6 +7,7 @@
# Updated for Unicode 14.0 by Andrew Glass 2021-09-25
# Updated for Unicode 15.0 by Andrew Glass 2022-09-16
# Updated for Unicode 15.1 by Andrew Glass 2023-09-14
# Updated for Unicode 16.0 by Andrew Glass 2024-09-11
# ================================================
# OVERRIDES TO ASSIGNED VALUES
@ -16,6 +17,7 @@
193A ; Bindu # Mn LIMBU SIGN KEMPHRENG
AA29 ; Bindu # Mn CHAM VOWEL SIGN AA
10A0D ; Bindu # Mn KHAROSHTHI SIGN DOUBLE RING BELOW
113CE ; Bindu # Mn TULU-TIGALARI SIGN VIRAMA
# ================================================
@ -30,6 +32,16 @@ AA29 ; Bindu # Mn CHAM VOWEL SIGN AA
# ================================================
# Indic_Syllabic_Category=Consonant_With_Stacker
11A3A ; Consonant_With_Stacker # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA
# ================================================
# Indic_Syllabic_Category=Consonant_Subjoined
11A3B..11A3E ; Consonant_Subjoined # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA
# ================================================
# Indic_Syllabic_Category=Consonant_Final_Modifier
1C36 ; Consonant_Final_Modifier # Mn LEPCHA SIGN RAN
@ -43,6 +55,7 @@ AA29 ; Bindu # Mn CHAM VOWEL SIGN AA
# Indic_Syllabic_Category=Nukta
0F71 ; Nukta # Mn TIBETAN VOWEL SIGN AA # Reassigned to get this before an above vowel, but see #22
1BF2..1BF3 ; Nukta # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN # see USE issue #20
113CF ; Nukta # Mc TULU-TIGALARI SIGN LOOPED VIRAMA
# ================================================
@ -71,8 +84,9 @@ AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN
1800 ; Consonant # Po MONGOLIAN BIRGA # Reassigned so that legacy Birga + MFVS sequences still work
1807 ; Consonant # Po MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER
180A ; Consonant # Po MONGOLIAN NIRUGU
1820..1878 ; Consonant # Lo [88] MONGOLIAN LETTER A..MONGOLIAN LETTER CHA WITH TWO DOTS
1820..1842 ; Consonant # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI
1843 ; Consonant # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN
1844..1878 ; Consonant # Lo [53] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER CHA WITH TWO DOTS
2D30..2D67 ; Consonant # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO
2D6F ; Consonant # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK
10570..1057A ; Consonant # Lo [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA
@ -86,6 +100,10 @@ AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN
10AC0..10AC7 ; Consonant # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW
10AC9..10AE4 ; Consonant # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW
10D00..10D23 ; Consonant # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA
10D4A..10D4F ; Consonant # Lo [6] GARAY VOWEL SIGN A..GARAY SUKUN
10D50..10D65 ; Consonant # Lu [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA
10D70..10D85 ; Consonant # Ll [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA
10D6F ; Consonant # Lm GARAY REDUPLICATION MARK
10E80..10EA9 ; Consonant # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
10EB0..10EB1 ; Consonant # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
10F30..10F45 ; Consonant # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
@ -95,6 +113,7 @@ AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN
16F00..16F4A ; Consonant # Lo [75] MIAO LETTER PA..MIAO LETTER RTE
16FE4 ; Consonant # Mn KHITAN SMALL SCRIPT FILLER # Avoids Mn pushing this into VOWEL class
18B00..18CD5 ; Consonant # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5
18CFF ; Consonant # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF
1BC00..1BC6A ; Consonant # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M
1BC70..1BC7C ; Consonant # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK
1BC80..1BC88 ; Consonant # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL
@ -107,6 +126,8 @@ AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN
1E2C0..1E2EB ; Consonant # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH
1E4D0..1E4EA ; Consonant # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL
1E4EB ; Consonant # Lm NAG MUNDARI SIGN OJOD
1E5D0..1E5ED ; Consonant # Lo [30] OL ONAL LETTER O..OL ONAL LETTER EG
1E5F0 ; Consonant # Lo OL ONAL SIGN HODDOND
1E900..1E921 ; Consonant # Lu [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA
1E922..1E943 ; Consonant # Ll [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA
1E94B ; Consonant # Lm ADLAM NASALIZATION MARK
@ -149,6 +170,7 @@ FE00..FE0F ; Modifying_Letter # Mn [16] VARIATION SELECTOR-1..VARIATION SEL
1E140..1E149 ; Number # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE
1E2F0..1E2F9 ; Number # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE
1E4F0..1E4F9 ; Number # Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE
1E5F1..1E5FA ; Number # Nd [10] OL ONAL DIGIT ZERO..OL ONAL DIGIT NINE
1E950..1E959 ; Number # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
# ================================================
@ -173,18 +195,13 @@ FE00..FE0F ; Modifying_Letter # Mn [16] VARIATION SELECTOR-1..VARIATION SEL
# ================================================
# Indic_Syllabic_Category=Vowel_Independent
AAB1 ; Vowel_Independent # Lo TAI VIET VOWEL AA
AABA ; Vowel_Independent # Lo TAI VIET VOWEL UA
AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN
# ================================================
# Indic_Syllabic_Category=Vowel_Dependent
0B55 ; Vowel_Dependent # Mn ORIYA SIGN OVERLINE
10D69..10D6D ; Vowel_Dependent # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK
10EAB..10EAC ; Vowel_Dependent # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
16F51..16F87 ; Vowel_Dependent # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI
1E4EC..1E4EF ; Vowel_Dependent # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH
1E5EE..1E5EF ; Vowel_Dependent # Mn [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR
# ================================================
@ -207,6 +224,7 @@ AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN
13000..1342F ; Hieroglyph # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D
1343C..1343F ; Hieroglyph # Cf [4] EGYPTIAN HIEROGLYPH BEGIN ENCLOSURE..END WALLED ENCLOSURE
13441..13446 ; Hieroglyph # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..HIEROGLYPH WIDE LOST SIGN
13460..143FA ; Hieroglyph # Lo [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA
# ================================================

Loading…
Cancel
Save