|
|
|
@ -1,7 +1,7 @@ |
|
|
|
|
/*
|
|
|
|
|
* Copyright © 2009 Red Hat, Inc. |
|
|
|
|
* Copyright © 2011 Codethink Limited |
|
|
|
|
* Copyright © 2010,2011 Google, Inc. |
|
|
|
|
* Copyright © 2011 Codethink Limited |
|
|
|
|
* Copyright © 2010,2011,2012 Google, Inc. |
|
|
|
|
* |
|
|
|
|
* This is part of HarfBuzz, a text shaping library. |
|
|
|
|
* |
|
|
|
@ -287,69 +287,148 @@ hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs, |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
unsigned int |
|
|
|
|
hb_unicode_funcs_t::modified_combining_class (hb_codepoint_t unicode) |
|
|
|
|
const uint8_t |
|
|
|
|
_hb_modified_combining_class[256] = |
|
|
|
|
{ |
|
|
|
|
int c = combining_class (unicode); |
|
|
|
|
|
|
|
|
|
if (unlikely (hb_in_range<int> (c, 27, 33))) |
|
|
|
|
{ |
|
|
|
|
/* Modify the combining-class to suit Arabic better. See:
|
|
|
|
|
* http://unicode.org/faq/normalization.html#8
|
|
|
|
|
* http://unicode.org/faq/normalization.html#9
|
|
|
|
|
*/ |
|
|
|
|
c = c == 33 ? 27 : c + 1; |
|
|
|
|
} |
|
|
|
|
else if (unlikely (hb_in_range<int> (c, 10, 26))) |
|
|
|
|
{ |
|
|
|
|
/* The equivalent fix for Hebrew is more complex.
|
|
|
|
|
* |
|
|
|
|
* We permute the "fixed-position" classes 10-26 into the order |
|
|
|
|
* described in the SBL Hebrew manual: |
|
|
|
|
* |
|
|
|
|
* http://www.sbl-site.org/Fonts/SBLHebrewUserManual1.5x.pdf
|
|
|
|
|
* |
|
|
|
|
* (as recommended by: |
|
|
|
|
* http://forum.fontlab.com/archive-old-microsoft-volt-group/vista-and-diacritic-ordering-t6751.0.html)
|
|
|
|
|
* |
|
|
|
|
* More details here: |
|
|
|
|
* https://bugzilla.mozilla.org/show_bug.cgi?id=662055
|
|
|
|
|
*/ |
|
|
|
|
static const int permuted_hebrew_classes[26 - 10 + 1] = { |
|
|
|
|
/* 10 sheva */ 22, |
|
|
|
|
/* 11 hataf segol */ 15, |
|
|
|
|
/* 12 hataf patah */ 16, |
|
|
|
|
/* 13 hataf qamats */ 17, |
|
|
|
|
/* 14 hiriq */ 23, |
|
|
|
|
/* 15 tsere */ 18, |
|
|
|
|
/* 16 segol */ 19, |
|
|
|
|
/* 17 patah */ 20, |
|
|
|
|
/* 18 qamats */ 21, |
|
|
|
|
/* 19 holam */ 14, |
|
|
|
|
/* 20 qubuts */ 24, |
|
|
|
|
/* 21 dagesh */ 12, |
|
|
|
|
/* 22 meteg */ 25, |
|
|
|
|
/* 23 rafe */ 13, |
|
|
|
|
/* 24 shin dot */ 10, |
|
|
|
|
/* 25 sin dot */ 11, |
|
|
|
|
/* 26 point varika */ 26, |
|
|
|
|
}; |
|
|
|
|
c = permuted_hebrew_classes[c - 10]; |
|
|
|
|
} |
|
|
|
|
else if (unlikely (unicode == 0x0E3A)) /* THAI VOWEL SIGN PHINTHU */ |
|
|
|
|
{ |
|
|
|
|
/* Assign 104, so it reorders after the THAI ccc=103 marks.
|
|
|
|
|
* Uniscribe does this. */ |
|
|
|
|
c = 104; |
|
|
|
|
} |
|
|
|
|
else if (unlikely (hb_in_range<hb_codepoint_t> (unicode, 0x0C55, 0x0C56))) |
|
|
|
|
{ |
|
|
|
|
/* Telugu length marks.
|
|
|
|
|
* These are the only matras in the main Indic script range that have |
|
|
|
|
* a non-zero ccc. That makes them reorder with the Halant that is |
|
|
|
|
* ccc=9. Just zero them, we don't need them in our Indic shaper. */ |
|
|
|
|
c = 0; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return c; |
|
|
|
|
} |
|
|
|
|
0, /* HB_UNICODE_COMBINING_CLASS_NOT_REORDERED */ |
|
|
|
|
1, /* HB_UNICODE_COMBINING_CLASS_OVERLAY */ |
|
|
|
|
2, 3, 4, 5, 6, |
|
|
|
|
7, /* HB_UNICODE_COMBINING_CLASS_NUKTA */ |
|
|
|
|
8, /* HB_UNICODE_COMBINING_CLASS_KANA_VOICING */ |
|
|
|
|
9, /* HB_UNICODE_COMBINING_CLASS_VIRAMA */ |
|
|
|
|
|
|
|
|
|
/* Hebrew */ |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* We permute the "fixed-position" classes 10-26 into the order |
|
|
|
|
* described in the SBL Hebrew manual: |
|
|
|
|
* |
|
|
|
|
* http://www.sbl-site.org/Fonts/SBLHebrewUserManual1.5x.pdf
|
|
|
|
|
* |
|
|
|
|
* (as recommended by: |
|
|
|
|
* http://forum.fontlab.com/archive-old-microsoft-volt-group/vista-and-diacritic-ordering-t6751.0.html)
|
|
|
|
|
* |
|
|
|
|
* More details here: |
|
|
|
|
* https://bugzilla.mozilla.org/show_bug.cgi?id=662055
|
|
|
|
|
*/ |
|
|
|
|
22, /* HB_UNICODE_COMBINING_CLASS_CCC10 sheva */ |
|
|
|
|
15, /* HB_UNICODE_COMBINING_CLASS_CCC11 hataf segol */ |
|
|
|
|
16, /* HB_UNICODE_COMBINING_CLASS_CCC12 hataf patah*/ |
|
|
|
|
17, /* HB_UNICODE_COMBINING_CLASS_CCC13 hataf qamats */ |
|
|
|
|
23, /* HB_UNICODE_COMBINING_CLASS_CCC14 hiriq */ |
|
|
|
|
18, /* HB_UNICODE_COMBINING_CLASS_CCC15 tsere */ |
|
|
|
|
19, /* HB_UNICODE_COMBINING_CLASS_CCC16 segol */ |
|
|
|
|
20, /* HB_UNICODE_COMBINING_CLASS_CCC17 patah */ |
|
|
|
|
21, /* HB_UNICODE_COMBINING_CLASS_CCC18 qamats */ |
|
|
|
|
14, /* HB_UNICODE_COMBINING_CLASS_CCC19 holam */ |
|
|
|
|
24, /* HB_UNICODE_COMBINING_CLASS_CCC20 qubuts */ |
|
|
|
|
12, /* HB_UNICODE_COMBINING_CLASS_CCC21 dagesh */ |
|
|
|
|
25, /* HB_UNICODE_COMBINING_CLASS_CCC22 meteg */ |
|
|
|
|
13, /* HB_UNICODE_COMBINING_CLASS_CCC23 rafe */ |
|
|
|
|
10, /* HB_UNICODE_COMBINING_CLASS_CCC24 shin dot */ |
|
|
|
|
11, /* HB_UNICODE_COMBINING_CLASS_CCC25 sin dot */ |
|
|
|
|
|
|
|
|
|
26, /* HB_UNICODE_COMBINING_CLASS_CCC26 */ |
|
|
|
|
|
|
|
|
|
/* Arabic */ |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Modify to move Shadda (ccc=33) before other marks. See: |
|
|
|
|
* http://unicode.org/faq/normalization.html#8
|
|
|
|
|
* http://unicode.org/faq/normalization.html#9
|
|
|
|
|
*/ |
|
|
|
|
28, /* HB_UNICODE_COMBINING_CLASS_CCC27 */ |
|
|
|
|
29, /* HB_UNICODE_COMBINING_CLASS_CCC28 */ |
|
|
|
|
30, /* HB_UNICODE_COMBINING_CLASS_CCC29 */ |
|
|
|
|
31, /* HB_UNICODE_COMBINING_CLASS_CCC30 */ |
|
|
|
|
32, /* HB_UNICODE_COMBINING_CLASS_CCC31 */ |
|
|
|
|
33, /* HB_UNICODE_COMBINING_CLASS_CCC32 */ |
|
|
|
|
27, /* HB_UNICODE_COMBINING_CLASS_CCC33 shadda */ |
|
|
|
|
|
|
|
|
|
34, /* HB_UNICODE_COMBINING_CLASS_CCC34 */ |
|
|
|
|
35, /* HB_UNICODE_COMBINING_CLASS_CCC35 */ |
|
|
|
|
|
|
|
|
|
/* Syriac */ |
|
|
|
|
36, /* HB_UNICODE_COMBINING_CLASS_CCC36 */ |
|
|
|
|
|
|
|
|
|
37, 38, 39, |
|
|
|
|
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, |
|
|
|
|
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, |
|
|
|
|
80, 81, 82, 83, |
|
|
|
|
|
|
|
|
|
/* Telugu */ |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Modify Telugu length marks (ccc=84, ccc=91). |
|
|
|
|
* These are the only matras in the main Indic scripts range that have |
|
|
|
|
* a non-zero ccc. That makes them reorder with the Halant that is |
|
|
|
|
* ccc=9. Just zero them, we don't need them in our Indic shaper. |
|
|
|
|
*/ |
|
|
|
|
0, /* HB_UNICODE_COMBINING_CLASS_CCC84 */ |
|
|
|
|
85, 86, 87, 88, 89, 90, |
|
|
|
|
0, /* HB_UNICODE_COMBINING_CLASS_CCC91 */ |
|
|
|
|
92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, |
|
|
|
|
|
|
|
|
|
/* Thai */ |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Modify U+0E38 and U+0E39 (ccc=104) to be reordered before U+0E3A (ccc=9). |
|
|
|
|
* Uniscribe does this too. |
|
|
|
|
*/ |
|
|
|
|
3, /* HB_UNICODE_COMBINING_CLASS_CCC103 */ |
|
|
|
|
|
|
|
|
|
104, 105, 106, |
|
|
|
|
107, /* HB_UNICODE_COMBINING_CLASS_CCC107 */ |
|
|
|
|
108, 109, 110, 111, 112, 113, 114, 115, 116, 117, |
|
|
|
|
|
|
|
|
|
/* Lao */ |
|
|
|
|
118, /* HB_UNICODE_COMBINING_CLASS_CCC118 */ |
|
|
|
|
119, 120, 121, |
|
|
|
|
122, /* HB_UNICODE_COMBINING_CLASS_CCC122 */ |
|
|
|
|
123, 124, 125, 126, 127, 128, |
|
|
|
|
|
|
|
|
|
/* Tibetan */ |
|
|
|
|
129, /* HB_UNICODE_COMBINING_CLASS_CCC129 */ |
|
|
|
|
130, /* HB_UNICODE_COMBINING_CLASS_CCC130 */ |
|
|
|
|
131, |
|
|
|
|
132, /* HB_UNICODE_COMBINING_CLASS_CCC133 */ |
|
|
|
|
133, 134, 135, 136, 137, 138, 139, |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
140, 141, 142, 143, 144, 145, 146, 147, 148, 149, |
|
|
|
|
150, 151, 152, 153, 154, 155, 156, 157, 158, 159, |
|
|
|
|
160, 161, 162, 163, 164, 165, 166, 167, 168, 169, |
|
|
|
|
170, 171, 172, 173, 174, 175, 176, 177, 178, 179, |
|
|
|
|
180, 181, 182, 183, 184, 185, 186, 187, 188, 189, |
|
|
|
|
190, 191, 192, 193, 194, 195, 196, 197, 198, 199, |
|
|
|
|
|
|
|
|
|
200, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT */ |
|
|
|
|
201, |
|
|
|
|
202, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW */ |
|
|
|
|
203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, |
|
|
|
|
214, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE */ |
|
|
|
|
215, |
|
|
|
|
216, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT */ |
|
|
|
|
217, |
|
|
|
|
218, /* HB_UNICODE_COMBINING_CLASS_BELOW_LEFT */ |
|
|
|
|
219, |
|
|
|
|
220, /* HB_UNICODE_COMBINING_CLASS_BELOW */ |
|
|
|
|
221, |
|
|
|
|
222, /* HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT */ |
|
|
|
|
223, |
|
|
|
|
224, /* HB_UNICODE_COMBINING_CLASS_LEFT */ |
|
|
|
|
225, |
|
|
|
|
226, /* HB_UNICODE_COMBINING_CLASS_RIGHT */ |
|
|
|
|
227, |
|
|
|
|
228, /* HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT */ |
|
|
|
|
229, |
|
|
|
|
230, /* HB_UNICODE_COMBINING_CLASS_ABOVE */ |
|
|
|
|
231, |
|
|
|
|
232, /* HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT */ |
|
|
|
|
233, /* HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW */ |
|
|
|
|
234, /* HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE */ |
|
|
|
|
235, 236, 237, 238, 239, |
|
|
|
|
240, /* HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT */ |
|
|
|
|
241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, |
|
|
|
|
255, /* HB_UNICODE_COMBINING_CLASS_INVALID */ |
|
|
|
|
}; |
|
|
|
|