From 43b653150081a2f9dc6b7481229ac4cd952575dc Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Fri, 16 Nov 2012 13:12:35 -0800 Subject: [PATCH] [Indic] Another try to unbreak Sinhala split matras Just read the comments... --- src/hb-ot-shape-complex-indic.cc | 43 +++++++++++++++---- .../indic/script-sinhala/misc/MANIFEST | 1 + .../script-sinhala/misc/split-matras.txt | 4 ++ 3 files changed, 40 insertions(+), 8 deletions(-) create mode 100644 test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/split-matras.txt diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index b18582403..d924d1a5a 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -1317,15 +1317,42 @@ decompose_indic (const hb_ot_shape_normalize_context_t *c, #endif } - if (indic_options ().uniscribe_bug_compatible) - switch (ab) + if ((ab == 0x0DDA || hb_in_range (ab, 0x0DDC, 0x0DDE))) { - /* These Sinhala ones have Unicode decompositions, but Uniscribe - * decomposes them "Khmer-style". */ - case 0x0DDA : *a = 0x0DD9; *b= 0x0DDA; return true; - case 0x0DDC : *a = 0x0DD9; *b= 0x0DDC; return true; - case 0x0DDD : *a = 0x0DD9; *b= 0x0DDD; return true; - case 0x0DDE : *a = 0x0DD9; *b= 0x0DDE; return true; + /* + * Sinhala split matras... Let the fun begin. + * + * These four characters have Unicode decompositions. However, Uniscribe + * decomposes them "Khmer-style", that is, it uses the character itself to + * get the second half. The first half of all four decompositions is always + * U+0DD9. + * + * Now, there are buggy fonts, namely, the widely used lklug.ttf, that are + * broken with Uniscribe. But we need to support them. As such, we only + * do the Uniscribe-style decomposition if the character is transformed into + * its "sec.half" form by the 'pstf' feature. Otherwise, we fall back to + * Unicode decomposition. + * + * Note that we can't unconditionally use Unicode decomposition. That would + * break some other fonts, that are designed to work with Uniscribe, and + * don't have positioning features for the Unicode-style decomposition. + * + * Argh... + */ + + const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) c->plan->data; + + hb_codepoint_t glyph; + + if (indic_options ().uniscribe_bug_compatible || + (c->font->get_glyph (ab, 0, &glyph) && + indic_plan->pstf.would_substitute (&glyph, 1, true, c->font->face))) + { + /* Ok, safe to use Uniscribe-style decomposition. */ + *a = 0x0DD9; + *b = ab; + return true; + } } return c->unicode->decompose (ab, a, b); diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/MANIFEST b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/MANIFEST index 7eff9e198..a00d7aee5 100644 --- a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/MANIFEST +++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/MANIFEST @@ -1,3 +1,4 @@ extensive.txt misc.txt reph.txt +split-matras.txt diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/split-matras.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/split-matras.txt new file mode 100644 index 000000000..2a73a403a --- /dev/null +++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-sinhala/misc/split-matras.txt @@ -0,0 +1,4 @@ +කේ +කො +කෝ +කෞ