diff --git a/src/gen-tag-table.py b/src/gen-tag-table.py index 925ffb439..7cbf3a79e 100755 --- a/src/gen-tag-table.py +++ b/src/gen-tag-table.py @@ -884,7 +884,7 @@ def print_subtag_matches (subtag): for language, tags in sorted (ot.from_bcp_47.items (), key=lambda i: (-len (i[0]), i[0])): lt = LanguageTag (language) - if len (lt.subtags) == 1 or lt.grandfathered and ot.from_bcp_47[lt.subtags[0]] == tags: + if len (lt.subtags) == 1 or lt.grandfathered and len (lt.subtags[1]) != 3 and ot.from_bcp_47[lt.subtags[0]] == tags: continue print (' if (', end='') if (lt.language == 'und' or diff --git a/src/hb-ot-tag-table.hh b/src/hb-ot-tag-table.hh index 0d06255a3..721136883 100644 --- a/src/hb-ot-tag-table.hh +++ b/src/hb-ot-tag-table.hh @@ -1279,6 +1279,13 @@ hb_ot_tags_from_complex_language (const char *lang_str, *count = 1; return true; } + if (0 == strcmp (lang_str, "zh-min-nan")) + { + /* Minnan, Hokkien, Amoy, Taiwanese, Southern Min, Southern Fujian, Hoklo, Southern Fukien, Ho-lo */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } if (lang_matches (lang_str, "cdo-hans")) { /* Min Dong Chinese; Han (Simplified variant) */ @@ -1791,6 +1798,13 @@ hb_ot_tags_from_complex_language (const char *lang_str, *count = 1; return true; } + if (0 == strcmp (lang_str, "no-bok")) + { + /* Norwegian Bokmal */ + tags[0] = HB_TAG('N','O','R',' '); /* Norwegian */ + *count = 1; + return true; + } if (0 == strcmp (lang_str, "no-nyn")) { /* Norwegian Nynorsk */ @@ -1822,6 +1836,13 @@ hb_ot_tags_from_complex_language (const char *lang_str, *count = 1; return true; } + if (0 == strcmp (lang_str, "zh-min")) + { + /* Min, Fuzhou, Hokkien, Amoy, or Taiwanese */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } if (0 == strcmp (lang_str, "i-hak")) { /* Hakka */ diff --git a/src/hb-ot-tag.cc b/src/hb-ot-tag.cc index 4d8cb5989..0d4c06030 100644 --- a/src/hb-ot-tag.cc +++ b/src/hb-ot-tag.cc @@ -249,8 +249,17 @@ hb_ot_tags_from_language (const char *lang_str, return; /* Find a language matching in the first component. */ + s = strchr (lang_str, '-'); { const LangTag *lang_tag; + if (s && limit - lang_str >= 6) + { + const char *extlang_end = strchr (s + 1, '-'); + /* If there is an extended language tag, use it. */ + if (3 == (extlang_end ? extlang_end - s - 1 : strlen (s + 1)) && + ISALPHA (s[1])) + lang_str = s + 1; + } lang_tag = (LangTag *) bsearch (lang_str, ot_languages, ARRAY_LENGTH (ot_languages), sizeof (LangTag), lang_compare_first_component); @@ -264,7 +273,6 @@ hb_ot_tags_from_language (const char *lang_str, } } - s = strchr (lang_str, '-'); if (!s) s = lang_str + strlen (lang_str); if (s - lang_str == 3) { diff --git a/test/api/test-ot-tag.c b/test/api/test-ot-tag.c index 6d64d131b..350289812 100644 --- a/test/api/test-ot-tag.c +++ b/test/api/test-ot-tag.c @@ -369,9 +369,13 @@ test_ot_tag_language (void) test_tag_from_language ("ABC", "xyz-xy-x-hbotabc-zxc"); /* Unnormalized BCP 47 tags */ + test_tag_from_language ("ARA", "ar-aao"); test_tag_from_language ("JBO", "art-lojban"); + test_tag_from_language ("KOK", "kok-gom"); test_tag_from_language ("LTZ", "i-lux"); test_tag_from_language ("MNG", "drh"); + test_tag_from_language ("MOR", "ar-ary"); + test_tag_from_language ("MOR", "ar-ary-DZ"); test_tag_from_language ("NOR", "no-bok"); test_tag_from_language ("NYN", "no-nyn"); test_tag_from_language ("ZHS", "i-hak"); @@ -379,6 +383,9 @@ test_ot_tag_language (void) test_tag_from_language ("ZHS", "zh-min"); test_tag_from_language ("ZHS", "zh-min-nan"); test_tag_from_language ("ZHS", "zh-xiang"); + + /* A UN M.49 region code, not an extended language subtag */ + test_tag_from_language ("ARA", "ar-001"); } static void