Merge remote-tracking branch 'fdo/master'

pull/683/head
Behdad Esfahbod 7 years ago
commit 127bcc6ece
  1. 1
      src/hb-buffer-private.hh
  2. 11
      src/hb-ot-layout-private.hh
  3. 37
      src/hb-ot-shape-complex-arabic.cc
  4. 30
      src/hb-ot-shape-normalize.cc
  5. 4
      test/shaping/tests/arabic-mark-order.tests

@ -69,6 +69,7 @@ enum hb_buffer_scratch_flags_t {
HB_BUFFER_SCRATCH_FLAG_HAS_SPACE_FALLBACK = 0x00000004u, HB_BUFFER_SCRATCH_FLAG_HAS_SPACE_FALLBACK = 0x00000004u,
HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT = 0x00000008u, HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT = 0x00000008u,
HB_BUFFER_SCRATCH_FLAG_HAS_UNSAFE_TO_BREAK = 0x00000010u, HB_BUFFER_SCRATCH_FLAG_HAS_UNSAFE_TO_BREAK = 0x00000010u,
HB_BUFFER_SCRATCH_FLAG_HAS_CGJ = 0x00000020u,
/* Reserved for complex shapers' internal use. */ /* Reserved for complex shapers' internal use. */
HB_BUFFER_SCRATCH_FLAG_COMPLEX0 = 0x01000000u, HB_BUFFER_SCRATCH_FLAG_COMPLEX0 = 0x01000000u,

@ -280,7 +280,11 @@ _hb_glyph_info_set_unicode_props (hb_glyph_info_t *info, hb_buffer_t *buffer)
else if (unlikely (hb_in_range (u, 0xE0020u, 0xE007Fu))) props |= UPROPS_MASK_HIDDEN; else if (unlikely (hb_in_range (u, 0xE0020u, 0xE007Fu))) props |= UPROPS_MASK_HIDDEN;
/* COMBINING GRAPHEME JOINER should not be skipped; at least some times. /* COMBINING GRAPHEME JOINER should not be skipped; at least some times.
* https://github.com/harfbuzz/harfbuzz/issues/554 */ * https://github.com/harfbuzz/harfbuzz/issues/554 */
else if (unlikely (u == 0x034Fu)) props |= UPROPS_MASK_HIDDEN; else if (unlikely (u == 0x034Fu))
{
buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_CGJ;
props |= UPROPS_MASK_HIDDEN;
}
} }
else if (unlikely (HB_UNICODE_GENERAL_CATEGORY_IS_NON_ENCLOSING_MARK_OR_MODIFIER_SYMBOL (gen_cat))) else if (unlikely (HB_UNICODE_GENERAL_CATEGORY_IS_NON_ENCLOSING_MARK_OR_MODIFIER_SYMBOL (gen_cat)))
{ {
@ -388,6 +392,11 @@ _hb_glyph_info_is_default_ignorable_and_not_hidden (const hb_glyph_info_t *info)
== UPROPS_MASK_IGNORABLE) && == UPROPS_MASK_IGNORABLE) &&
!_hb_glyph_info_ligated (info); !_hb_glyph_info_ligated (info);
} }
static inline void
_hb_glyph_info_unhide (hb_glyph_info_t *info)
{
info->unicode_props() &= ~ UPROPS_MASK_HIDDEN;
}
static inline bool static inline bool
_hb_glyph_info_is_unicode_format (const hb_glyph_info_t *info) _hb_glyph_info_is_unicode_format (const hb_glyph_info_t *info)

@ -644,13 +644,15 @@ reorder_marks_arabic (const hb_ot_shape_plan_t *plan,
{ {
hb_glyph_info_t *info = buffer->info; hb_glyph_info_t *info = buffer->info;
DEBUG_MSG (ARABIC, buffer, "Reordering marks from %d to %d", start, end);
unsigned int i = start; unsigned int i = start;
for (unsigned int cc = 220; cc <= 230; cc += 10) for (unsigned int cc = 220; cc <= 230; cc += 10)
{ {
DEBUG_MSG (ARABIC, buffer, "Looking for %d's starting at %d\n", cc, i); DEBUG_MSG (ARABIC, buffer, "Looking for %d's starting at %d", cc, i);
while (i < end && info_cc(info[i]) < cc) while (i < end && info_cc(info[i]) < cc)
i++; i++;
DEBUG_MSG (ARABIC, buffer, "Looking for %d's stopped at %d\n", cc, i); DEBUG_MSG (ARABIC, buffer, "Looking for %d's stopped at %d", cc, i);
if (i == end) if (i == end)
break; break;
@ -658,20 +660,17 @@ reorder_marks_arabic (const hb_ot_shape_plan_t *plan,
if (info_cc(info[i]) > cc) if (info_cc(info[i]) > cc)
continue; continue;
/* Technically we should also check "info_cc(info[j]) == cc"
* in the following loop. But not doing it is safe; we might
* end up moving all the 220 MCMs and 230 MCMs together in one
* move and be done. */
unsigned int j = i; unsigned int j = i;
while (j < end && info_is_mcm (info[j])) while (j < end && info_cc(info[j]) == cc && info_is_mcm (info[j]))
j++; j++;
DEBUG_MSG (ARABIC, buffer, "Found %d's from %d to %d\n", cc, i, j);
if (i == j) if (i == j)
continue; continue;
DEBUG_MSG (ARABIC, buffer, "Found %d's from %d to %d", cc, i, j);
/* Shift it! */ /* Shift it! */
DEBUG_MSG (ARABIC, buffer, "Shifting %d's: %d %d\n", cc, i, j); DEBUG_MSG (ARABIC, buffer, "Shifting %d's: %d %d", cc, i, j);
hb_glyph_info_t temp[HB_OT_SHAPE_COMPLEX_MAX_COMBINING_MARKS]; hb_glyph_info_t temp[HB_OT_SHAPE_COMPLEX_MAX_COMBINING_MARKS];
assert (j - i <= ARRAY_LENGTH (temp)); assert (j - i <= ARRAY_LENGTH (temp));
buffer->merge_clusters (start, j); buffer->merge_clusters (start, j);
@ -679,7 +678,25 @@ reorder_marks_arabic (const hb_ot_shape_plan_t *plan,
memmove (&info[start + j - i], &info[start], (i - start) * sizeof (hb_glyph_info_t)); memmove (&info[start + j - i], &info[start], (i - start) * sizeof (hb_glyph_info_t));
memmove (&info[start], temp, (j - i) * sizeof (hb_glyph_info_t)); memmove (&info[start], temp, (j - i) * sizeof (hb_glyph_info_t));
start += j - i; /* Renumber CC such that the reordered sequence is still sorted.
* 22 and 26 are chosen because they are smaller than all Arabic categories,
* and are folded back to 220/230 respectively during fallback mark positioning.
*
* We do this because the CGJ-handling logic in the normalizer relies on
* mark sequences having an increasing order even after this reordering.
* https://github.com/harfbuzz/harfbuzz/issues/554
* This, however, does break some obscure sequences, where the normalizer
* might compose a sequence that it should not. For example, in the seequence
* ALEF, HAMZAH, MADDAH, we should NOT try to compose ALEF+MADDAH, but with this
* renumbering, we will.
*/
unsigned int new_start = start + j - i;
unsigned int new_cc = cc == 220 ? HB_MODIFIED_COMBINING_CLASS_CCC22 : HB_MODIFIED_COMBINING_CLASS_CCC26;
while (start < new_start)
{
_hb_glyph_info_set_modified_combining_class (&info[start], new_cc);
start++;
}
i = j; i = j;
} }

@ -345,9 +345,8 @@ _hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan,
if (_hb_glyph_info_get_modified_combining_class (&buffer->info[end]) == 0) if (_hb_glyph_info_get_modified_combining_class (&buffer->info[end]) == 0)
break; break;
/* We are going to do a O(n^2). Only do this if the sequence is short, /* We are going to do a O(n^2). Only do this if the sequence is short. */
* but not too short ;). */ if (end - i > HB_OT_SHAPE_COMPLEX_MAX_COMBINING_MARKS) {
if (end - i < 2 || end - i > HB_OT_SHAPE_COMPLEX_MAX_COMBINING_MARKS) {
i = end; i = end;
continue; continue;
} }
@ -373,13 +372,11 @@ _hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan,
buffer->clear_output (); buffer->clear_output ();
count = buffer->len; count = buffer->len;
unsigned int starter = 0; unsigned int starter = 0;
bool combine = true;
buffer->next_glyph (); buffer->next_glyph ();
while (buffer->idx < count && !buffer->in_error) while (buffer->idx < count && !buffer->in_error)
{ {
hb_codepoint_t composed, glyph; hb_codepoint_t composed, glyph;
if (combine && if (/* We don't try to compose a non-mark character with it's preceding starter.
/* We don't try to compose a non-mark character with it's preceding starter.
* This is both an optimization to avoid trying to compose every two neighboring * This is both an optimization to avoid trying to compose every two neighboring
* glyphs in most scripts AND a desired feature for Hangul. Apparently Hangul * glyphs in most scripts AND a desired feature for Hangul. Apparently Hangul
* fonts are not designed to mix-and-match pre-composed syllables and Jamo. */ * fonts are not designed to mix-and-match pre-composed syllables and Jamo. */
@ -410,22 +407,27 @@ _hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan,
continue; continue;
} }
else if (/* We sometimes custom-tailor the sorted order of marks. In that case, stop
* trying to combine as soon as combining-class drops. */
starter < buffer->out_len - 1 &&
info_cc (buffer->prev()) > info_cc (buffer->cur()))
combine = false;
} }
/* Blocked, or doesn't compose. */ /* Blocked, or doesn't compose. */
buffer->next_glyph (); buffer->next_glyph ();
if (info_cc (buffer->prev()) == 0) if (info_cc (buffer->prev()) == 0)
{
starter = buffer->out_len - 1; starter = buffer->out_len - 1;
combine = true;
}
} }
buffer->swap_buffers (); buffer->swap_buffers ();
if (buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_CGJ)
{
/* For all CGJ, check if it prevented any reordering at all.
* If it did NOT, then make it skippable.
* https://github.com/harfbuzz/harfbuzz/issues/554
*/
for (unsigned int i = 1; i + 1 < buffer->len; i++)
if (buffer->info[i].codepoint == 0x034Fu/*CGJ*/ &&
info_cc(buffer->info[i-1]) <= info_cc(buffer->info[i+1]))
{
_hb_glyph_info_unhide (&buffer->info[i]);
}
}
} }

@ -1,2 +1,6 @@
fonts/sha1sum/94a5d6fb15a27521fba9ea4aee9cb39b2d03322a.ttf::U+064A,U+064E,U+0670,U+0653,U+0640,U+0654,U+064E,U+0627:[afii57415.zz04=7+481|afii57454=4@25,975+0|uni0654=4@-50,50+0|afii57440=4+650|uni0670_uni0653=0@75,400+0|afii57454=0@750,1125+0|afii57450.calt=0+1331] fonts/sha1sum/94a5d6fb15a27521fba9ea4aee9cb39b2d03322a.ttf::U+064A,U+064E,U+0670,U+0653,U+0640,U+0654,U+064E,U+0627:[afii57415.zz04=7+481|afii57454=4@25,975+0|uni0654=4@-50,50+0|afii57440=4+650|uni0670_uni0653=0@75,400+0|afii57454=0@750,1125+0|afii57450.calt=0+1331]
fonts/sha1sum/24b8d24d00ae86f49791b746da4c9d3f717a51a8.ttf::U+0628,U+0618,U+0619,U+064E,U+064F,U+0654,U+0658,U+0653,U+0654,U+0651,U+0656,U+0651,U+065C,U+0655,U+0650:[uni0653.small=0@266,2508+0|uni0654=0@308,2151+0|uni0655=0@518,-1544+0|uni065C=0@501,-1453+0|uni0656=0@573,-659+0|uni0650=0@500,133+0|uni0619=0@300,1807+0|uni0618=0@357,1674+0|uni0651064E=0@387,1178+0|uni0651=0@402,764+0|uni0658=0@424,404+0|uni0654064F=0@540,-435+0|uni0628=0+1352] fonts/sha1sum/24b8d24d00ae86f49791b746da4c9d3f717a51a8.ttf::U+0628,U+0618,U+0619,U+064E,U+064F,U+0654,U+0658,U+0653,U+0654,U+0651,U+0656,U+0651,U+065C,U+0655,U+0650:[uni0653.small=0@266,2508+0|uni0654=0@308,2151+0|uni0655=0@518,-1544+0|uni065C=0@501,-1453+0|uni0656=0@573,-659+0|uni0650=0@500,133+0|uni0619=0@300,1807+0|uni0618=0@357,1674+0|uni0651064E=0@387,1178+0|uni0651=0@402,764+0|uni0658=0@424,404+0|uni0654064F=0@540,-435+0|uni0628=0+1352]
fonts/sha1sum/21b7fb9c1eeae260473809fbc1fe330f66a507cd.ttf::U+0649,U+0655,U+034F,U+0650:[uni0650.small2=0@727,-774+0|space=0+0|uni0655=0@727,-209+0|uni0649=0+1566]
fonts/sha1sum/21b7fb9c1eeae260473809fbc1fe330f66a507cd.ttf::U+0649,U+0655,U+0650:[uni0650.small2=0@727,-774+0|uni0655=0@727,-209+0|uni0649=0+1566]
fonts/sha1sum/21b7fb9c1eeae260473809fbc1fe330f66a507cd.ttf::U+0649,U+0650,U+0655:[uni0650.small2=0@727,-774+0|uni0655=0@727,-209+0|uni0649=0+1566]
fonts/sha1sum/21b7fb9c1eeae260473809fbc1fe330f66a507cd.ttf::U+0649,U+0650,U+034F,U+0655:[uni0655=0+0|space=0+0|uni0650=0@166,0+0|uni0649=0+1566]

Loading…
Cancel
Save