From 7afb14407e59dfeaa79c33aca1ffa60e7982e349 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Mon, 30 Jul 2012 13:54:46 -0400 Subject: [PATCH] [Indic] Recategorize Telugu length marks Fixes 8 more Telugu tests. Failures at 15 (0.00154548%). --- src/hb-unicode.cc | 8 ++++++++ src/indic.cc | 11 ++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/hb-unicode.cc b/src/hb-unicode.cc index 3569b2059..b05b290c4 100644 --- a/src/hb-unicode.cc +++ b/src/hb-unicode.cc @@ -369,6 +369,14 @@ _hb_unicode_modified_combining_class (hb_unicode_funcs_t *ufuncs, * Uniscribe does this. */ c = 104; } + else if (unlikely (hb_in_range (unicode, 0x0C55, 0x0C56))) + { + /* Telugu length marks. + * These are the only matras in the main Indic script range that have + * a non-zero ccc. That makes them reorder with the Halant that is + * ccc=9. Just zero them, we don't need them in our Indic shaper. */ + c = 0; + } return c; } diff --git a/src/indic.cc b/src/indic.cc index 3b440761e..991a7723b 100644 --- a/src/indic.cc +++ b/src/indic.cc @@ -39,8 +39,13 @@ main (void) unsigned int category = type & 0x0F; unsigned int position = type >> 4; - hb_codepoint_t a, b; - if (!hb_unicode_decompose (funcs, u, &a, &b)) - printf ("U+%04X %x %x\n", u, category, position); + hb_unicode_general_category_t cat = hb_unicode_general_category (funcs, u); + unsigned int ccc = hb_unicode_combining_class (funcs, u); + if (category == OT_M && ccc) + printf ("U+%04X %d\n", u, ccc); + +// hb_codepoint_t a, b; +// if (!hb_unicode_decompose (funcs, u, &a, &b)) +// printf ("U+%04X %x %x\n", u, category, position); } }