Merge pull request #3206 from harfbuzz/unicode-14

Update to Unicode 14.0.0
pull/3207/head
Behdad Esfahbod 3 years ago committed by GitHub
commit 175f24a459
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 3
      src/hb-common.cc
  2. 14
      src/hb-common.h
  3. 9
      src/hb-ot-shape-complex-arabic-joining-list.hh
  4. 38
      src/hb-ot-shape-complex-arabic-table.hh
  5. 42
      src/hb-ot-shape-complex-indic-table.cc
  6. 76
      src/hb-ot-shape-complex-use-table.hh
  7. 4
      src/hb-ot-shape-complex-vowel-constraints.cc
  8. 7
      src/hb-ot-shape-complex.hh
  9. 6796
      src/hb-ucd-table.hh
  10. 8
      src/hb-unicode-emoji-table.hh
  11. 4
      src/update-unicode-tables.make
  12. 13
      test/api/test-unicode.c

@ -595,6 +595,9 @@ hb_script_get_horizontal_direction (hb_script_t script)
case HB_SCRIPT_CHORASMIAN:
case HB_SCRIPT_YEZIDI:
/* Unicode-14.0 additions */
case HB_SCRIPT_OLD_UYGHUR:
return HB_DIRECTION_RTL;

@ -476,6 +476,11 @@ hb_language_get_default (void);
* @HB_SCRIPT_DIVES_AKURU: `Diak`, Since: 2.6.7
* @HB_SCRIPT_KHITAN_SMALL_SCRIPT: `Kits`, Since: 2.6.7
* @HB_SCRIPT_YEZIDI: `Yezi`, Since: 2.6.7
* @HB_SCRIPT_CYPRO_MINOAN: `Cpmn`, Since: REPLACEME
* @HB_SCRIPT_OLD_UYGHUR: `Ougr`, Since: REPLACEME
* @HB_SCRIPT_TANGSA: `Tnsa`, Since: REPLACEME
* @HB_SCRIPT_TOTO: `Toto`, Since: REPLACEME
* @HB_SCRIPT_VITHKUQI: `Vith`, Since: REPLACEME
* @HB_SCRIPT_INVALID: No script set
*
* Data type for scripts. Each #hb_script_t's value is an #hb_tag_t corresponding
@ -683,6 +688,15 @@ typedef enum
HB_SCRIPT_KHITAN_SMALL_SCRIPT = HB_TAG ('K','i','t','s'), /*13.0*/
HB_SCRIPT_YEZIDI = HB_TAG ('Y','e','z','i'), /*13.0*/
/*
* Since REPLACEME
*/
HB_SCRIPT_CYPRO_MINOAN = HB_TAG ('C','p','m','n'), /*14.0*/
HB_SCRIPT_OLD_UYGHUR = HB_TAG ('O','u','g','r'), /*14.0*/
HB_SCRIPT_TANGSA = HB_TAG ('T','n','s','a'), /*14.0*/
HB_SCRIPT_TOTO = HB_TAG ('T','o','t','o'), /*14.0*/
HB_SCRIPT_VITHKUQI = HB_TAG ('V','i','t','h'), /*14.0*/
/* No script set. */
HB_SCRIPT_INVALID = HB_TAG_NONE,

@ -6,10 +6,10 @@
*
* on files with these headers:
*
* # ArabicShaping-13.0.0.txt
* # Date: 2020-01-31, 23:55:00 GMT [KW, RP]
* # Scripts-13.0.0.txt
* # Date: 2020-01-22, 00:07:43 GMT
* # ArabicShaping-14.0.0.txt
* # Date: 2021-05-21, 01:54:00 GMT [KW, RP]
* # Scripts-14.0.0.txt
* # Date: 2021-07-10, 00:35:31 GMT
*/
#ifndef HB_OT_SHAPE_COMPLEX_ARABIC_JOINING_LIST_HH
@ -29,6 +29,7 @@ has_arabic_joining (hb_script_t script)
case HB_SCRIPT_MANICHAEAN:
case HB_SCRIPT_MONGOLIAN:
case HB_SCRIPT_NKO:
case HB_SCRIPT_OLD_UYGHUR:
case HB_SCRIPT_PHAGS_PA:
case HB_SCRIPT_PSALTER_PAHLAVI:
case HB_SCRIPT_SOGDIAN:

@ -6,10 +6,10 @@
*
* on files with these headers:
*
* # ArabicShaping-13.0.0.txt
* # Date: 2020-01-31, 23:55:00 GMT [KW, RP]
* # Blocks-13.0.0.txt
* # Date: 2019-07-10, 19:06:00 GMT [KW]
* # ArabicShaping-14.0.0.txt
* # Date: 2021-05-21, 01:54:00 GMT [KW, RP]
* # Blocks-14.0.0.txt
* # Date: 2021-01-22, 23:29:00 GMT [KW]
* UnicodeData.txt does not have a header.
*/
@ -75,13 +75,17 @@ static const uint8_t joining_table[] =
/* Syriac Supplement */
/* 0860 */ D,U,D,D,D,D,U,R,D,R,R,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* 0880 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* 0860 */ D,U,D,D,D,D,U,R,D,R,R,X,X,X,X,X,
/* Arabic Extended-B */
/* 0860 */ R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,
/* 0880 */ R,R,R,C,C,C,D,U,U,D,D,D,D,D,R,X,U,U,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* Arabic Extended-A */
/* 08A0 */ D,D,D,D,D,D,D,D,D,D,R,R,R,U,R,D,D,R,R,D,D,X,D,D,D,R,D,D,D,D,D,D,
/* 08C0 */ D,D,D,D,D,D,D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* 08A0 */ D,D,D,D,D,D,D,D,D,D,R,R,R,U,R,D,D,R,R,D,D,D,D,D,D,R,D,D,D,D,D,D,
/* 08C0 */ D,D,D,D,D,D,D,D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* 08E0 */ X,X,U,
#define joining_offset_0x1806u 739
@ -137,23 +141,28 @@ static const uint8_t joining_table[] =
/* Sogdian */
/* 10F20 */ D,D,D,R,D,D,D,D,D,D,D,D,D,D,D,D,
/* 10F40 */ D,D,D,D,D,U,X,X,X,X,X,X,X,X,X,X,X,D,D,D,R,
/* 10F40 */ D,D,D,D,D,U,X,X,X,X,X,X,X,X,X,X,X,D,D,D,R,X,X,X,X,X,X,X,X,X,X,X,
/* 10F60 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* Old Uyghur */
#define joining_offset_0x10fb0u 1219
/* 10F60 */ D,D,D,D,R,R,D,D,D,D,D,D,D,D,D,D,
/* 10F80 */ D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* 10FA0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* Chorasmian */
/* 10FA0 */ D,U,D,D,R,R,R,U,D,R,R,D,D,R,D,D,
/* 10FC0 */ U,D,R,R,D,U,U,U,U,R,D,L,
#define joining_offset_0x110bdu 1247
#define joining_offset_0x110bdu 1338
/* Kaithi */
/* 110A0 */ U,X,X,
/* 110C0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,U,
#define joining_offset_0x1e900u 1264
#define joining_offset_0x1e900u 1355
/* Adlam */
@ -161,7 +170,7 @@ static const uint8_t joining_table[] =
/* 1E920 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
/* 1E940 */ D,D,D,D,X,X,X,X,X,X,X,T,
}; /* Table items: 1340; occupancy: 57% */
}; /* Table items: 1431; occupancy: 57% */
static unsigned int
@ -189,8 +198,7 @@ joining_type (hb_codepoint_t u)
if (hb_in_range<hb_codepoint_t> (u, 0x10AC0u, 0x10AEFu)) return joining_table[u - 0x10AC0u + joining_offset_0x10ac0u];
if (hb_in_range<hb_codepoint_t> (u, 0x10B80u, 0x10BAFu)) return joining_table[u - 0x10B80u + joining_offset_0x10b80u];
if (hb_in_range<hb_codepoint_t> (u, 0x10D00u, 0x10D23u)) return joining_table[u - 0x10D00u + joining_offset_0x10d00u];
if (hb_in_range<hb_codepoint_t> (u, 0x10F30u, 0x10F54u)) return joining_table[u - 0x10F30u + joining_offset_0x10f30u];
if (hb_in_range<hb_codepoint_t> (u, 0x10FB0u, 0x10FCBu)) return joining_table[u - 0x10FB0u + joining_offset_0x10fb0u];
if (hb_in_range<hb_codepoint_t> (u, 0x10F30u, 0x10FCBu)) return joining_table[u - 0x10F30u + joining_offset_0x10f30u];
break;
case 0x11u:

@ -6,12 +6,12 @@
*
* on files with these headers:
*
* # IndicSyllabicCategory-13.0.0.txt
* # Date: 2019-07-22, 19:55:00 GMT [KW, RP]
* # IndicPositionalCategory-13.0.0.txt
* # Date: 2019-07-23, 00:01:00 GMT [KW, RP]
* # Blocks-13.0.0.txt
* # Date: 2019-07-10, 19:06:00 GMT [KW]
* # IndicSyllabicCategory-14.0.0.txt
* # Date: 2021-05-22, 01:01:00 GMT [KW, RP]
* # IndicPositionalCategory-14.0.0.txt
* # Date: 2021-05-22, 01:01:00 GMT [KW, RP]
* # Blocks-14.0.0.txt
* # Date: 2021-01-22, 23:29:00 GMT [KW]
*/
#include "hb.hh"
@ -27,9 +27,9 @@
#define ISC_Bi INDIC_SYLLABIC_CATEGORY_BINDU /* 91 chars; Bindu */
#define ISC_BJN INDIC_SYLLABIC_CATEGORY_BRAHMI_JOINING_NUMBER /* 20 chars; Brahmi_Joining_Number */
#define ISC_Ca INDIC_SYLLABIC_CATEGORY_CANTILLATION_MARK /* 59 chars; Cantillation_Mark */
#define ISC_C INDIC_SYLLABIC_CATEGORY_CONSONANT /* 2195 chars; Consonant */
#define ISC_CD INDIC_SYLLABIC_CATEGORY_CONSONANT_DEAD /* 12 chars; Consonant_Dead */
#define ISC_CF INDIC_SYLLABIC_CATEGORY_CONSONANT_FINAL /* 67 chars; Consonant_Final */
#define ISC_C INDIC_SYLLABIC_CATEGORY_CONSONANT /* 2206 chars; Consonant */
#define ISC_CD INDIC_SYLLABIC_CATEGORY_CONSONANT_DEAD /* 14 chars; Consonant_Dead */
#define ISC_CF INDIC_SYLLABIC_CATEGORY_CONSONANT_FINAL /* 70 chars; Consonant_Final */
#define ISC_CHL INDIC_SYLLABIC_CATEGORY_CONSONANT_HEAD_LETTER /* 5 chars; Consonant_Head_Letter */
#define ISC_CIP INDIC_SYLLABIC_CATEGORY_CONSONANT_INITIAL_POSTFIXED /* 1 chars; Consonant_Initial_Postfixed */
#define ISC_CK INDIC_SYLLABIC_CATEGORY_CONSONANT_KILLER /* 2 chars; Consonant_Killer */
@ -38,18 +38,18 @@
#define ISC_CPR INDIC_SYLLABIC_CATEGORY_CONSONANT_PRECEDING_REPHA /* 3 chars; Consonant_Preceding_Repha */
#define ISC_CPrf INDIC_SYLLABIC_CATEGORY_CONSONANT_PREFIXED /* 10 chars; Consonant_Prefixed */
#define ISC_CS INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED /* 94 chars; Consonant_Subjoined */
#define ISC_CSR INDIC_SYLLABIC_CATEGORY_CONSONANT_SUCCEEDING_REPHA /* 4 chars; Consonant_Succeeding_Repha */
#define ISC_CSR INDIC_SYLLABIC_CATEGORY_CONSONANT_SUCCEEDING_REPHA /* 1 chars; Consonant_Succeeding_Repha */
#define ISC_CWS INDIC_SYLLABIC_CATEGORY_CONSONANT_WITH_STACKER /* 8 chars; Consonant_With_Stacker */
#define ISC_GM INDIC_SYLLABIC_CATEGORY_GEMINATION_MARK /* 3 chars; Gemination_Mark */
#define ISC_IS INDIC_SYLLABIC_CATEGORY_INVISIBLE_STACKER /* 12 chars; Invisible_Stacker */
#define ISC_ZWJ INDIC_SYLLABIC_CATEGORY_JOINER /* 1 chars; Joiner */
#define ISC_ML INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER /* 1 chars; Modifying_Letter */
#define ISC_ZWNJ INDIC_SYLLABIC_CATEGORY_NON_JOINER /* 1 chars; Non_Joiner */
#define ISC_N INDIC_SYLLABIC_CATEGORY_NUKTA /* 31 chars; Nukta */
#define ISC_N INDIC_SYLLABIC_CATEGORY_NUKTA /* 32 chars; Nukta */
#define ISC_Nd INDIC_SYLLABIC_CATEGORY_NUMBER /* 491 chars; Number */
#define ISC_NJ INDIC_SYLLABIC_CATEGORY_NUMBER_JOINER /* 1 chars; Number_Joiner */
#define ISC_x INDIC_SYLLABIC_CATEGORY_OTHER /* 1 chars; Other */
#define ISC_PK INDIC_SYLLABIC_CATEGORY_PURE_KILLER /* 23 chars; Pure_Killer */
#define ISC_PK INDIC_SYLLABIC_CATEGORY_PURE_KILLER /* 25 chars; Pure_Killer */
#define ISC_RS INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER /* 2 chars; Register_Shifter */
#define ISC_SM INDIC_SYLLABIC_CATEGORY_SYLLABLE_MODIFIER /* 25 chars; Syllable_Modifier */
#define ISC_TL INDIC_SYLLABIC_CATEGORY_TONE_LETTER /* 7 chars; Tone_Letter */
@ -57,18 +57,18 @@
#define ISC_V INDIC_SYLLABIC_CATEGORY_VIRAMA /* 27 chars; Virama */
#define ISC_Vs INDIC_SYLLABIC_CATEGORY_VISARGA /* 35 chars; Visarga */
#define ISC_Vo INDIC_SYLLABIC_CATEGORY_VOWEL /* 30 chars; Vowel */
#define ISC_M INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT /* 683 chars; Vowel_Dependent */
#define ISC_VI INDIC_SYLLABIC_CATEGORY_VOWEL_INDEPENDENT /* 484 chars; Vowel_Independent */
#define ISC_M INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT /* 686 chars; Vowel_Dependent */
#define ISC_VI INDIC_SYLLABIC_CATEGORY_VOWEL_INDEPENDENT /* 486 chars; Vowel_Independent */
#define IMC_B INDIC_MATRA_CATEGORY_BOTTOM /* 351 chars; Bottom */
#define IMC_B INDIC_MATRA_CATEGORY_BOTTOM /* 352 chars; Bottom */
#define IMC_BL INDIC_MATRA_CATEGORY_BOTTOM_AND_LEFT /* 1 chars; Bottom_And_Left */
#define IMC_BR INDIC_MATRA_CATEGORY_BOTTOM_AND_RIGHT /* 4 chars; Bottom_And_Right */
#define IMC_L INDIC_MATRA_CATEGORY_LEFT /* 64 chars; Left */
#define IMC_LR INDIC_MATRA_CATEGORY_LEFT_AND_RIGHT /* 22 chars; Left_And_Right */
#define IMC_x INDIC_MATRA_CATEGORY_NOT_APPLICABLE /* 1 chars; Not_Applicable */
#define IMC_O INDIC_MATRA_CATEGORY_OVERSTRUCK /* 10 chars; Overstruck */
#define IMC_R INDIC_MATRA_CATEGORY_RIGHT /* 288 chars; Right */
#define IMC_T INDIC_MATRA_CATEGORY_TOP /* 415 chars; Top */
#define IMC_R INDIC_MATRA_CATEGORY_RIGHT /* 290 chars; Right */
#define IMC_T INDIC_MATRA_CATEGORY_TOP /* 418 chars; Top */
#define IMC_TB INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM /* 10 chars; Top_And_Bottom */
#define IMC_TBL INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_LEFT /* 2 chars; Top_And_Bottom_And_Left */
#define IMC_TBR INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_RIGHT /* 1 chars; Top_And_Bottom_And_Right */
@ -231,11 +231,11 @@ static const uint16_t indic_table[] = {
/* 0C20 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0C28 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0C30 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0C38 */ _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), _(A,x), _(M,T), _(M,T),
/* 0C38 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(A,x), _(M,T), _(M,T),
/* 0C40 */ _(M,T), _(M,R), _(M,R), _(M,R), _(M,R), _(x,x), _(M,T), _(M,T),
/* 0C48 */ _(M,TB), _(x,x), _(M,T), _(M,T), _(M,T), _(V,T), _(x,x), _(x,x),
/* 0C50 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,T), _(M,B), _(x,x),
/* 0C58 */ _(C,x), _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
/* 0C58 */ _(C,x), _(C,x), _(C,x), _(x,x), _(x,x), _(CD,x), _(x,x), _(x,x),
/* 0C60 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
/* 0C68 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
/* 0C70 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
@ -254,7 +254,7 @@ static const uint16_t indic_table[] = {
/* 0CC0 */ _(M,TR), _(M,R), _(M,R), _(M,R), _(M,R), _(x,x), _(M,T), _(M,TR),
/* 0CC8 */ _(M,TR), _(x,x), _(M,TR), _(M,TR), _(M,T), _(V,T), _(x,x), _(x,x),
/* 0CD0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R), _(M,R), _(x,x),
/* 0CD8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(C,x), _(x,x),
/* 0CD8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(CD,x), _(C,x), _(x,x),
/* 0CE0 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
/* 0CE8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
/* 0CF0 */ _(x,x),_(CWS,x),_(CWS,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
@ -402,7 +402,7 @@ static const uint16_t indic_table[] = {
/* AA70 */ _(x,x), _(C,x), _(C,x), _(C,x), _(CP,x), _(CP,x), _(CP,x), _(x,x),
/* AA78 */ _(x,x), _(x,x), _(C,x), _(TM,R), _(TM,T), _(TM,R), _(C,x), _(C,x),
}; /* Table items: 1792; occupancy: 70% */
}; /* Table items: 1792; occupancy: 71% */
uint16_t
hb_indic_get_categories (hb_codepoint_t u)

@ -6,14 +6,14 @@
*
* on files with these headers:
*
* # IndicSyllabicCategory-13.0.0.txt
* # Date: 2019-07-22, 19:55:00 GMT [KW, RP]
* # IndicPositionalCategory-13.0.0.txt
* # Date: 2019-07-23, 00:01:00 GMT [KW, RP]
* # ArabicShaping-13.0.0.txt
* # Date: 2020-01-31, 23:55:00 GMT [KW, RP]
* # Blocks-13.0.0.txt
* # Date: 2019-07-10, 19:06:00 GMT [KW]
* # IndicSyllabicCategory-14.0.0.txt
* # Date: 2021-05-22, 01:01:00 GMT [KW, RP]
* # IndicPositionalCategory-14.0.0.txt
* # Date: 2021-05-22, 01:01:00 GMT [KW, RP]
* # ArabicShaping-14.0.0.txt
* # Date: 2021-05-21, 01:54:00 GMT [KW, RP]
* # Blocks-14.0.0.txt
* # Date: 2021-01-22, 23:29:00 GMT [KW]
* # Override values For Indic_Syllabic_Category
* # Not derivable
* # Initial version based on Unicode 7.0 by Andrew Glass 2014-03-17
@ -199,7 +199,7 @@ static const uint8_t use_table[] = {
/* 0C00 */ VMAbv, VMPst, VMPst, VMPst, VMAbv, B, B, B, B, B, B, B, B, O, B, B,
/* 0C10 */ B, O, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 0C20 */ B, B, B, B, B, B, B, B, B, O, B, B, B, B, B, B,
/* 0C30 */ B, B, B, B, B, B, B, B, B, B, O, O, O, B, VAbv, VAbv,
/* 0C30 */ B, B, B, B, B, B, B, B, B, B, O, O, CMBlw, B, VAbv, VAbv,
/* 0C40 */ VAbv, VPst, VPst, VPst, VPst, O, VAbv, VAbv, VAbv, O, VAbv, VAbv, VAbv, H, O, O,
/* 0C50 */ O, O, O, O, O, VAbv, VBlw, O, B, B, B, O, O, O, O, O,
/* 0C60 */ B, B, VBlw, VBlw, O, O, B, B, B, B, B, B, B, B, B, B,
@ -278,13 +278,13 @@ static const uint8_t use_table[] = {
/* Tagalog */
/* 1700 */ B, B, B, B, B, B, B, B, B, B, B, B, B, O, B, B,
/* 1710 */ B, B, VAbv, VBlw, VBlw, O, O, O, O, O, O, O, O, O, O, O,
/* 1700 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 1710 */ B, B, VAbv, VBlw, VBlw, VPst, O, O, O, O, O, O, O, O, O, B,
/* Hanunoo */
/* 1720 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 1730 */ B, B, VAbv, VBlw, VBlw, O, O, O, O, O, O, O, O, O, O, O,
/* 1730 */ B, B, VAbv, VBlw, VPst, O, O, O, O, O, O, O, O, O, O, O,
/* Buhid */
@ -374,7 +374,7 @@ static const uint8_t use_table[] = {
/* 1B10 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 1B20 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 1B30 */ B, B, B, B, CMAbv, VPst, VAbv, VAbv, VBlw, VBlw, VBlw, VBlw, VAbv, VAbv, VPre, VPre,
/* 1B40 */ VPre, VPre, VAbv, VAbv, H, B, B, B, B, B, B, B, O, O, O, O,
/* 1B40 */ VPre, VPre, VAbv, VAbv, H, B, B, B, B, B, B, B, B, O, O, O,
/* 1B50 */ B, B, B, B, B, B, B, B, B, B, O, GB, GB, O, O, GB,
/* 1B60 */ O, S, GB, S, S, S, S, S, GB, S, S, SMAbv, SMBlw, SMAbv, SMAbv, SMAbv,
/* 1B70 */ SMAbv, SMAbv, SMAbv, SMAbv, O, O, O, O, O, O, O, O, O, O, O, O,
@ -630,7 +630,7 @@ static const uint8_t use_table[] = {
/* 11040 */ VBlw, VBlw, VAbv, VAbv, VAbv, VAbv, HVM, O, O, O, O, O, O, O, O, O,
/* 11050 */ O, O, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
/* 11060 */ N, N, N, N, N, N, B, B, B, B, B, B, B, B, B, B,
/* 11070 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, HN,
/* 11070 */ VAbv, B, B, VAbv, VAbv, B, O, O, O, O, O, O, O, O, O, HN,
/* Kaithi */
@ -638,8 +638,9 @@ static const uint8_t use_table[] = {
/* 11090 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 110A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 110B0 */ VPst, VPre, VPst, VBlw, VBlw, VAbv, VAbv, VPst, VPst, H, CMBlw, O, O, O, O, O,
/* 110C0 */ O, O, VBlw, O, O, O, O, O,
#define use_offset_0x11100u 4608
#define use_offset_0x11100u 4616
/* Chakma */
@ -677,7 +678,7 @@ static const uint8_t use_table[] = {
/* 11220 */ B, B, B, B, B, B, B, B, B, B, B, B, VPst, VPst, VPst, VBlw,
/* 11230 */ VAbv, VAbv, VAbv, VAbv, VMAbv, H, CMAbv, CMAbv, O, O, O, O, O, O, VMAbv, O,
#define use_offset_0x11280u 4928
#define use_offset_0x11280u 4936
/* Multani */
@ -705,7 +706,7 @@ static const uint8_t use_table[] = {
/* 11360 */ B, B, VPst, VPst, O, O, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, O, O, O,
/* 11370 */ VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, O, O, O,
#define use_offset_0x11400u 5176
#define use_offset_0x11400u 5184
/* Newa */
@ -728,7 +729,7 @@ static const uint8_t use_table[] = {
/* 114C0 */ VMAbv, VMAbv, H, CMBlw, B, O, O, O, O, O, O, O, O, O, O, O,
/* 114D0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
#define use_offset_0x11580u 5400
#define use_offset_0x11580u 5408
/* Siddham */
@ -770,8 +771,9 @@ static const uint8_t use_table[] = {
/* 11710 */ B, B, B, B, B, B, B, B, B, B, B, O, O, MBlw, MPre, MAbv,
/* 11720 */ VPst, VPst, VAbv, VAbv, VBlw, VBlw, VPre, VAbv, VBlw, VAbv, VAbv, VAbv, O, O, O, O,
/* 11730 */ B, B, B, B, B, B, B, B, B, B, B, B, O, O, O, O,
/* 11740 */ B, B, B, B, B, B, B, O,
#define use_offset_0x11800u 5848
#define use_offset_0x11800u 5864
/* Dogra */
@ -781,7 +783,7 @@ static const uint8_t use_table[] = {
/* 11820 */ B, B, B, B, B, B, B, B, B, B, B, B, VPst, VPre, VPst, VBlw,
/* 11830 */ VBlw, VBlw, VBlw, VAbv, VAbv, VAbv, VAbv, VMAbv, VMPst, H, CMBlw, O, O, O, O, O,
#define use_offset_0x11900u 5912
#define use_offset_0x11900u 5928
/* Dives Akuru */
@ -793,7 +795,7 @@ static const uint8_t use_table[] = {
/* 11940 */ MPst, R, MPst, CMBlw, O, O, O, O, O, O, O, O, O, O, O, O,
/* 11950 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
#define use_offset_0x119a0u 6008
#define use_offset_0x119a0u 6024
/* Nandinagari */
@ -821,7 +823,7 @@ static const uint8_t use_table[] = {
/* 11A80 */ B, B, B, B, R, R, R, R, R, R, FBlw, FBlw, FBlw, FBlw, FBlw, FBlw,
/* 11A90 */ FBlw, FBlw, FBlw, FBlw, FBlw, FBlw, VMAbv, VMPst, CMAbv, H, O, O, O, B, O, O,
#define use_offset_0x11c00u 6264
#define use_offset_0x11c00u 6280
/* Bhaiksuki */
@ -842,7 +844,7 @@ static const uint8_t use_table[] = {
/* 11CA0 */ SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, O, SUB, SUB, SUB, SUB, SUB, SUB, SUB,
/* 11CB0 */ VBlw, VPre, VBlw, VAbv, VPst, VMAbv, VMAbv, O,
#define use_offset_0x11d00u 6448
#define use_offset_0x11d00u 6464
/* Masaram Gondi */
@ -862,7 +864,7 @@ static const uint8_t use_table[] = {
/* 11D90 */ VAbv, VAbv, O, VPst, VPst, VMAbv, VMPst, H, O, O, O, O, O, O, O, O,
/* 11DA0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
#define use_offset_0x11ee0u 6624
#define use_offset_0x11ee0u 6640
/* Makasar */
@ -870,7 +872,7 @@ static const uint8_t use_table[] = {
/* 11EE0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 11EF0 */ B, B, GB, VAbv, VBlw, VPre, VPst, O,
#define use_offset_0x13000u 6648
#define use_offset_0x13000u 6664
/* Egyptian Hieroglyphs */
@ -947,7 +949,7 @@ static const uint8_t use_table[] = {
/* 13430 */ J, J, J, J, J, J, J, SB, SE, O, O, O, O, O, O, O,
#define use_offset_0x16b00u 7736
#define use_offset_0x16b00u 7752
/* Pahawh Hmong */
@ -957,7 +959,7 @@ static const uint8_t use_table[] = {
/* 16B20 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 16B30 */ VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, O,
#define use_offset_0x16f00u 7792
#define use_offset_0x16f00u 7808
/* Miao */
@ -973,14 +975,14 @@ static const uint8_t use_table[] = {
/* 16F80 */ VBlw, VBlw, VBlw, VBlw, VBlw, VBlw, VBlw, VBlw, O, O, O, O, O, O, O, VMBlw,
/* 16F90 */ VMBlw, VMBlw, VMBlw, O, O, O, O, O,
#define use_offset_0x16fe0u 7944
#define use_offset_0x16fe0u 7960
/* Ideographic Symbols and Punctuation */
/* 16FE0 */ O, O, O, O, B, O, O, O,
#define use_offset_0x18b00u 7952
#define use_offset_0x18b00u 7968
/* Khitan Small Script */
@ -1016,7 +1018,7 @@ static const uint8_t use_table[] = {
/* 18CC0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 18CD0 */ B, B, B, B, B, B, O, O,
#define use_offset_0x1bc00u 8424
#define use_offset_0x1bc00u 8440
/* Duployan */
@ -1032,7 +1034,7 @@ static const uint8_t use_table[] = {
/* 1BC80 */ B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, O,
/* 1BC90 */ B, B, B, B, B, B, B, B, B, B, O, O, O, CMBlw, CMBlw, O,
#define use_offset_0x1e100u 8584
#define use_offset_0x1e100u 8600
/* Nyiakeng Puachue Hmong */
@ -1043,7 +1045,7 @@ static const uint8_t use_table[] = {
/* 1E130 */ VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, B, B, B, B, B, B, B, O, O,
/* 1E140 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, B, B,
#define use_offset_0x1e2c0u 8664
#define use_offset_0x1e2c0u 8680
/* Wancho */
@ -1053,7 +1055,7 @@ static const uint8_t use_table[] = {
/* 1E2E0 */ B, B, B, B, B, B, B, B, B, B, B, B, VMAbv, VMAbv, VMAbv, VMAbv,
/* 1E2F0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
#define use_offset_0x1e900u 8728
#define use_offset_0x1e900u 8744
/* Adlam */
@ -1065,7 +1067,7 @@ static const uint8_t use_table[] = {
/* 1E940 */ B, B, B, B, CMAbv, CMAbv, CMAbv, CMAbv, CMAbv, CMAbv, CMAbv, B, O, O, O, O,
/* 1E950 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
}; /* Table items: 8824; occupancy: 79% */
}; /* Table items: 8840; occupancy: 79% */
static inline uint8_t
hb_use_get_category (hb_codepoint_t u)
@ -1111,15 +1113,15 @@ hb_use_get_category (hb_codepoint_t u)
if (hb_in_range<hb_codepoint_t> (u, 0x10D00u, 0x10D3Fu)) return use_table[u - 0x10D00u + use_offset_0x10d00u];
if (hb_in_range<hb_codepoint_t> (u, 0x10E80u, 0x10EB7u)) return use_table[u - 0x10E80u + use_offset_0x10e80u];
if (hb_in_range<hb_codepoint_t> (u, 0x10F30u, 0x10F57u)) return use_table[u - 0x10F30u + use_offset_0x10f30u];
if (hb_in_range<hb_codepoint_t> (u, 0x10FB0u, 0x110BFu)) return use_table[u - 0x10FB0u + use_offset_0x10fb0u];
if (hb_in_range<hb_codepoint_t> (u, 0x10FB0u, 0x110C7u)) return use_table[u - 0x10FB0u + use_offset_0x10fb0u];
break;
case 0x11u:
if (hb_in_range<hb_codepoint_t> (u, 0x10FB0u, 0x110BFu)) return use_table[u - 0x10FB0u + use_offset_0x10fb0u];
if (hb_in_range<hb_codepoint_t> (u, 0x10FB0u, 0x110C7u)) return use_table[u - 0x10FB0u + use_offset_0x10fb0u];
if (hb_in_range<hb_codepoint_t> (u, 0x11100u, 0x1123Fu)) return use_table[u - 0x11100u + use_offset_0x11100u];
if (hb_in_range<hb_codepoint_t> (u, 0x11280u, 0x11377u)) return use_table[u - 0x11280u + use_offset_0x11280u];
if (hb_in_range<hb_codepoint_t> (u, 0x11400u, 0x114DFu)) return use_table[u - 0x11400u + use_offset_0x11400u];
if (hb_in_range<hb_codepoint_t> (u, 0x11580u, 0x1173Fu)) return use_table[u - 0x11580u + use_offset_0x11580u];
if (hb_in_range<hb_codepoint_t> (u, 0x11580u, 0x11747u)) return use_table[u - 0x11580u + use_offset_0x11580u];
if (hb_in_range<hb_codepoint_t> (u, 0x11800u, 0x1183Fu)) return use_table[u - 0x11800u + use_offset_0x11800u];
if (hb_in_range<hb_codepoint_t> (u, 0x11900u, 0x1195Fu)) return use_table[u - 0x11900u + use_offset_0x11900u];
if (hb_in_range<hb_codepoint_t> (u, 0x119A0u, 0x11A9Fu)) return use_table[u - 0x119A0u + use_offset_0x119a0u];

@ -10,8 +10,8 @@
* # Date: 2015-03-12, 21:17:00 GMT [AG]
* # Date: 2019-11-08, 23:22:00 GMT [AG]
*
* # Scripts-13.0.0.txt
* # Date: 2020-01-22, 00:07:43 GMT
* # Scripts-14.0.0.txt
* # Date: 2021-07-10, 00:35:31 GMT
*/
#include "hb.hh"

@ -374,6 +374,13 @@ hb_ot_shape_complex_categorize (const hb_ot_shape_planner_t *planner)
case HB_SCRIPT_CHORASMIAN:
case HB_SCRIPT_DIVES_AKURU:
/* Unicode-14.0 additions */
case HB_SCRIPT_CYPRO_MINOAN:
case HB_SCRIPT_OLD_UYGHUR:
case HB_SCRIPT_TANGSA:
case HB_SCRIPT_TOTO:
case HB_SCRIPT_VITHKUQI:
/* If the designer designed the font for the 'DFLT' script,
* (or we ended up arbitrarily pick 'latn'), use the default shaper.
* Otherwise, use the specific shaper.

File diff suppressed because it is too large Load Diff

@ -6,14 +6,14 @@
*
* on file with this header:
*
* # emoji-data.txt
* # Date: 2020-01-28, 20:52:38 GMT
* # © 2020 Unicode®, Inc.
* # emoji-data-14.0.0.txt
* # Date: 2021-08-26, 17:22:22 GMT
* # © 2021 Unicode®, Inc.
* # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
* # For terms of use, see http://www.unicode.org/terms_of_use.html
* #
* # Emoji Data for UTS #51
* # Version: 13.0
* # Used with Emoji Version 14.0 and subsequent minor revisions (if any)
* #
* # For documentation and usage, see http://www.unicode.org/reports/tr51
*/

@ -13,7 +13,7 @@ hb-ot-shape-complex-arabic-joining-list.hh: gen-arabic-joining-list.py ArabicSha
./$^ > $@ || ($(RM) $@; false)
hb-ot-shape-complex-arabic-table.hh: gen-arabic-table.py ArabicShaping.txt UnicodeData.txt Blocks.txt
./$^ > $@ || ($(RM) $@; false)
hb-unicode-emoji-table.hh: gen-emoji-table.py emoji-data.txt
hb-unicode-emoji-table.hh: gen-emoji-table.py emoji-data.txt emoji-test.txt
./$^ > $@ || ($(RM) $@; false)
hb-ot-shape-complex-indic-table.cc: gen-indic-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt Blocks.txt
./$^ > $@ || ($(RM) $@; false)
@ -37,6 +37,8 @@ Blocks.txt:
curl -O https://unicode.org/Public/UCD/latest/ucd/Blocks.txt
emoji-data.txt:
curl -O https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt
emoji-test.txt:
curl -O https://www.unicode.org/Public/emoji/latest/emoji-test.txt
IndicSyllabicCategory.txt:
curl -O https://unicode.org/Public/UCD/latest/ucd/IndicSyllabicCategory.txt
IndicPositionalCategory.txt:

@ -181,6 +181,9 @@ static const test_pair_t combining_class_tests_more[] =
/* Unicode-13.0 character additions */
{ 0x1ABF, 220 },
/* Unicode-14.0 character additions */
{ 0x1DFA, 218 },
{ 0x111111, 0 }
};
@ -261,6 +264,9 @@ static const test_pair_t general_category_tests_more[] =
/* Unicode-13.0 character additions */
{ 0x08BE, HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER },
/* Unicode-14.0 character additions */
{ 0x20C0, HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL },
{ 0x111111, HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED }
};
@ -511,6 +517,13 @@ static const test_pair_t script_tests_more[] =
{ 0x11900, HB_SCRIPT_DIVES_AKURU },
{ 0x18B00, HB_SCRIPT_KHITAN_SMALL_SCRIPT },
/* Unicode-14.0 additions */
{ 0x10570, HB_SCRIPT_VITHKUQI },
{ 0x10F70, HB_SCRIPT_OLD_UYGHUR },
{ 0x12F90, HB_SCRIPT_CYPRO_MINOAN },
{ 0x16A70, HB_SCRIPT_TANGSA },
{ 0x1E290, HB_SCRIPT_TOTO },
{ 0x111111, HB_SCRIPT_UNKNOWN }
};

Loading…
Cancel
Save