@ -25,6 +25,7 @@
*/
*/
# include "hb-ot-shape-normalize-private.hh"
# include "hb-ot-shape-normalize-private.hh"
# include "hb-ot-shape-complex-private.hh"
# include "hb-ot-shape-private.hh"
# include "hb-ot-shape-private.hh"
@ -82,180 +83,23 @@
*/
*/
static hb_bool_t
static hb_bool_t
decompose_f unc ( hb_unicode_funcs_t * unicode ,
decompose_uni code ( hb_unicode_funcs_t * unicode ,
hb_codepoint_t ab ,
hb_codepoint_t ab ,
hb_codepoint_t * a ,
hb_codepoint_t * a ,
hb_codepoint_t * b )
hb_codepoint_t * b )
{
{
/* XXX FIXME, move these to complex shapers and propagage to normalizer.*/
switch ( ab ) {
case 0x0AC9 : return false ;
case 0x0931 : return false ;
case 0x0B94 : return false ;
/* These ones have Unicode decompositions, but we do it
* this way to be close to what Uniscribe does . */
case 0x0DDA : * a = 0x0DD9 ; * b = 0x0DDA ; return true ;
case 0x0DDC : * a = 0x0DD9 ; * b = 0x0DDC ; return true ;
case 0x0DDD : * a = 0x0DD9 ; * b = 0x0DDD ; return true ;
case 0x0DDE : * a = 0x0DD9 ; * b = 0x0DDE ; return true ;
case 0x0F77 : * a = 0x0FB2 ; * b = 0x0F81 ; return true ;
case 0x0F79 : * a = 0x0FB3 ; * b = 0x0F81 ; return true ;
case 0x17BE : * a = 0x17C1 ; * b = 0x17BE ; return true ;
case 0x17BF : * a = 0x17C1 ; * b = 0x17BF ; return true ;
case 0x17C0 : * a = 0x17C1 ; * b = 0x17C0 ; return true ;
case 0x17C4 : * a = 0x17C1 ; * b = 0x17C4 ; return true ;
case 0x17C5 : * a = 0x17C1 ; * b = 0x17C5 ; return true ;
case 0x1925 : * a = 0x1920 ; * b = 0x1923 ; return true ;
case 0x1926 : * a = 0x1920 ; * b = 0x1924 ; return true ;
case 0x1B3C : * a = 0x1B42 ; * b = 0x1B3C ; return true ;
case 0x1112E : * a = 0x11127 ; * b = 0x11131 ; return true ;
case 0x1112F : * a = 0x11127 ; * b = 0x11132 ; return true ;
#if 0
case 0x0B57 : * a = 0 xno decomp , - > RIGHT ; return true ;
case 0x1C29 : * a = 0 xno decomp , - > LEFT ; return true ;
case 0xA9C0 : * a = 0 xno decomp , - > RIGHT ; return true ;
case 0x111BF : * a = 0 xno decomp , - > ABOVE ; return true ;
# endif
}
return unicode - > decompose ( ab , a , b ) ;
return unicode - > decompose ( ab , a , b ) ;
}
}
static hb_bool_t
static hb_bool_t
compose_f unc ( hb_unicode_funcs_t * unicode ,
compose_unicode ( hb_unicode_funcs_t * unicode ,
hb_codepoint_t a ,
hb_codepoint_t a ,
hb_codepoint_t b ,
hb_codepoint_t b ,
hb_codepoint_t * ab )
hb_codepoint_t * ab )
{
{
/* XXX, this belongs to indic normalizer. */
return unicode - > compose ( a , b , ab ) ;
if ( HB_UNICODE_GENERAL_CATEGORY_IS_MARK ( unicode - > general_category ( a ) ) )
return false ;
/* XXX, add composition-exclusion exceptions to Indic shaper. */
if ( a = = 0x09AF & & b = = 0x09BC ) { * ab = 0x09DF ; return true ; }
/* XXX, these belong to the hebew / default shaper. */
/* Hebrew presentation-form shaping.
* https : //bugzilla.mozilla.org/show_bug.cgi?id=728866 */
// Hebrew presentation forms with dagesh, for characters 0x05D0..0x05EA;
// note that some letters do not have a dagesh presForm encoded
static const hb_codepoint_t sDageshForms [ 0x05EA - 0x05D0 + 1 ] = {
0xFB30 , // ALEF
0xFB31 , // BET
0xFB32 , // GIMEL
0xFB33 , // DALET
0xFB34 , // HE
0xFB35 , // VAV
0xFB36 , // ZAYIN
0 , // HET
0xFB38 , // TET
0xFB39 , // YOD
0xFB3A , // FINAL KAF
0xFB3B , // KAF
0xFB3C , // LAMED
0 , // FINAL MEM
0xFB3E , // MEM
0 , // FINAL NUN
0xFB40 , // NUN
0xFB41 , // SAMEKH
0 , // AYIN
0xFB43 , // FINAL PE
0xFB44 , // PE
0 , // FINAL TSADI
0xFB46 , // TSADI
0xFB47 , // QOF
0xFB48 , // RESH
0xFB49 , // SHIN
0xFB4A // TAV
} ;
hb_bool_t found = unicode - > compose ( a , b , ab ) ;
if ( ! found & & ( b & ~ 0x7F ) = = 0x0580 ) {
// special-case Hebrew presentation forms that are excluded from
// standard normalization, but wanted for old fonts
switch ( b ) {
case 0x05B4 : // HIRIQ
if ( a = = 0x05D9 ) { // YOD
* ab = 0xFB1D ;
found = true ;
}
break ;
case 0x05B7 : // patah
if ( a = = 0x05F2 ) { // YIDDISH YOD YOD
* ab = 0xFB1F ;
found = true ;
} else if ( a = = 0x05D0 ) { // ALEF
* ab = 0xFB2E ;
found = true ;
}
break ;
case 0x05B8 : // QAMATS
if ( a = = 0x05D0 ) { // ALEF
* ab = 0xFB2F ;
found = true ;
}
break ;
case 0x05B9 : // HOLAM
if ( a = = 0x05D5 ) { // VAV
* ab = 0xFB4B ;
found = true ;
}
break ;
case 0x05BC : // DAGESH
if ( a > = 0x05D0 & & a < = 0x05EA ) {
* ab = sDageshForms [ a - 0x05D0 ] ;
found = ( * ab ! = 0 ) ;
} else if ( a = = 0xFB2A ) { // SHIN WITH SHIN DOT
* ab = 0xFB2C ;
found = true ;
} else if ( a = = 0xFB2B ) { // SHIN WITH SIN DOT
* ab = 0xFB2D ;
found = true ;
}
break ;
case 0x05BF : // RAFE
switch ( a ) {
case 0x05D1 : // BET
* ab = 0xFB4C ;
found = true ;
break ;
case 0x05DB : // KAF
* ab = 0xFB4D ;
found = true ;
break ;
case 0x05E4 : // PE
* ab = 0xFB4E ;
found = true ;
break ;
}
break ;
case 0x05C1 : // SHIN DOT
if ( a = = 0x05E9 ) { // SHIN
* ab = 0xFB2A ;
found = true ;
} else if ( a = = 0xFB49 ) { // SHIN WITH DAGESH
* ab = 0xFB2C ;
found = true ;
}
break ;
case 0x05C2 : // SIN DOT
if ( a = = 0x05E9 ) { // SHIN
* ab = 0xFB2B ;
found = true ;
} else if ( a = = 0xFB49 ) { // SHIN WITH DAGESH
* ab = 0xFB2D ;
found = true ;
}
break ;
}
}
return found ;
}
}
static inline void
static inline void
set_glyph ( hb_glyph_info_t & info , hb_font_t * font )
set_glyph ( hb_glyph_info_t & info , hb_font_t * font )
{
{
@ -283,40 +127,54 @@ skip_char (hb_buffer_t *buffer)
buffer - > skip_glyph ( ) ;
buffer - > skip_glyph ( ) ;
}
}
struct normalize_context_t
{
hb_buffer_t * buffer ;
hb_font_t * font ;
hb_bool_t ( * decompose ) ( hb_unicode_funcs_t * unicode ,
hb_codepoint_t ab ,
hb_codepoint_t * a ,
hb_codepoint_t * b ) ;
hb_bool_t ( * compose ) ( hb_unicode_funcs_t * unicode ,
hb_codepoint_t a ,
hb_codepoint_t b ,
hb_codepoint_t * ab ) ;
} ;
/* Returns 0 if didn't decompose, number of resulting characters otherwise. */
/* Returns 0 if didn't decompose, number of resulting characters otherwise. */
static inline unsigned int
static inline unsigned int
decompose ( hb_font_t * font , hb_buffer_t * buffer , bool shortest , hb_codepoint_t ab )
decompose ( const normalize_context_t * c , bool shortest , hb_codepoint_t ab )
{
{
hb_codepoint_t a , b , a_glyph , b_glyph ;
hb_codepoint_t a , b , a_glyph , b_glyph ;
if ( ! decompose_func ( buffer - > unicode , ab , & a , & b ) | |
if ( ! c - > decompose ( c - > buffer - > unicode , ab , & a , & b ) | |
( b & & ! font - > get_glyph ( b , 0 , & b_glyph ) ) )
( b & & ! c - > font - > get_glyph ( b , 0 , & b_glyph ) ) )
return 0 ;
return 0 ;
bool has_a = font - > get_glyph ( a , 0 , & a_glyph ) ;
bool has_a = c - > font - > get_glyph ( a , 0 , & a_glyph ) ;
if ( shortest & & has_a ) {
if ( shortest & & has_a ) {
/* Output a and b */
/* Output a and b */
output_char ( buffer , a , a_glyph ) ;
output_char ( c - > buffer , a , a_glyph ) ;
if ( likely ( b ) ) {
if ( likely ( b ) ) {
output_char ( buffer , b , b_glyph ) ;
output_char ( c - > buffer , b , b_glyph ) ;
return 2 ;
return 2 ;
}
}
return 1 ;
return 1 ;
}
}
unsigned int ret ;
unsigned int ret ;
if ( ( ret = decompose ( font , buffer , shortest , a ) ) ) {
if ( ( ret = decompose ( c , shortest , a ) ) ) {
if ( b ) {
if ( b ) {
output_char ( buffer , b , b_glyph ) ;
output_char ( c - > buffer , b , b_glyph ) ;
return ret + 1 ;
return ret + 1 ;
}
}
return ret ;
return ret ;
}
}
if ( has_a ) {
if ( has_a ) {
output_char ( buffer , a , a_glyph ) ;
output_char ( c - > buffer , a , a_glyph ) ;
if ( likely ( b ) ) {
if ( likely ( b ) ) {
output_char ( buffer , b , b_glyph ) ;
output_char ( c - > buffer , b , b_glyph ) ;
return 2 ;
return 2 ;
}
}
return 1 ;
return 1 ;
@ -327,41 +185,42 @@ decompose (hb_font_t *font, hb_buffer_t *buffer, bool shortest, hb_codepoint_t a
/* Returns 0 if didn't decompose, number of resulting characters otherwise. */
/* Returns 0 if didn't decompose, number of resulting characters otherwise. */
static inline bool
static inline bool
decompose_compatibility ( hb_font_t * font , hb_buffer_t * buffer , hb_codepoint_t u )
decompose_compatibility ( const normalize_context_t * c , hb_codepoint_t u )
{
{
unsigned int len , i ;
unsigned int len , i ;
hb_codepoint_t decomposed [ HB_UNICODE_MAX_DECOMPOSITION_LEN ] ;
hb_codepoint_t decomposed [ HB_UNICODE_MAX_DECOMPOSITION_LEN ] ;
hb_codepoint_t glyphs [ HB_UNICODE_MAX_DECOMPOSITION_LEN ] ;
hb_codepoint_t glyphs [ HB_UNICODE_MAX_DECOMPOSITION_LEN ] ;
len = buffer - > unicode - > decompose_compatibility ( u , decomposed ) ;
len = c - > buffer - > unicode - > decompose_compatibility ( u , decomposed ) ;
if ( ! len )
if ( ! len )
return 0 ;
return 0 ;
for ( i = 0 ; i < len ; i + + )
for ( i = 0 ; i < len ; i + + )
if ( ! font - > get_glyph ( decomposed [ i ] , 0 , & glyphs [ i ] ) )
if ( ! c - > font - > get_glyph ( decomposed [ i ] , 0 , & glyphs [ i ] ) )
return 0 ;
return 0 ;
for ( i = 0 ; i < len ; i + + )
for ( i = 0 ; i < len ; i + + )
output_char ( buffer , decomposed [ i ] , glyphs [ i ] ) ;
output_char ( c - > buffer , decomposed [ i ] , glyphs [ i ] ) ;
return len ;
return len ;
}
}
/* Returns true if recomposition may be benefitial. */
/* Returns true if recomposition may be benefitial. */
static inline bool
static inline bool
decompose_current_character ( hb_font_t * font , hb_buffer_t * buffer , bool shortest )
decompose_current_character ( const normalize_context_t * c , bool shortest )
{
{
hb_buffer_t * const buffer = c - > buffer ;
hb_codepoint_t glyph ;
hb_codepoint_t glyph ;
unsigned int len = 1 ;
unsigned int len = 1 ;
/* Kind of a cute waterfall here... */
/* Kind of a cute waterfall here... */
if ( shortest & & font - > get_glyph ( buffer - > cur ( ) . codepoint , 0 , & glyph ) )
if ( shortest & & c - > font - > get_glyph ( buffer - > cur ( ) . codepoint , 0 , & glyph ) )
next_char ( buffer , glyph ) ;
next_char ( buffer , glyph ) ;
else if ( ( len = decompose ( font , buffer , shortest , buffer - > cur ( ) . codepoint ) ) )
else if ( ( len = decompose ( c , shortest , buffer - > cur ( ) . codepoint ) ) )
skip_char ( buffer ) ;
skip_char ( buffer ) ;
else if ( ! shortest & & font - > get_glyph ( buffer - > cur ( ) . codepoint , 0 , & glyph ) )
else if ( ! shortest & & c - > font - > get_glyph ( buffer - > cur ( ) . codepoint , 0 , & glyph ) )
next_char ( buffer , glyph ) ;
next_char ( buffer , glyph ) ;
else if ( ( len = decompose_compatibility ( font , buffer , buffer - > cur ( ) . codepoint ) ) )
else if ( ( len = decompose_compatibility ( c , buffer - > cur ( ) . codepoint ) ) )
skip_char ( buffer ) ;
skip_char ( buffer ) ;
else
else
next_char ( buffer , glyph ) ; /* glyph is initialized in earlier branches. */
next_char ( buffer , glyph ) ; /* glyph is initialized in earlier branches. */
@ -374,49 +233,51 @@ decompose_current_character (hb_font_t *font, hb_buffer_t *buffer, bool shortest
}
}
static inline void
static inline void
handle_variation_selector_cluster ( hb_font_t * font , hb_buffer_t * buffer , unsigned int end )
handle_variation_selector_cluster ( const normalize_context_t * c , unsigned int end )
{
{
hb_buffer_t * const buffer = c - > buffer ;
for ( ; buffer - > idx < end - 1 ; ) {
for ( ; buffer - > idx < end - 1 ; ) {
if ( unlikely ( buffer - > unicode - > is_variation_selector ( buffer - > cur ( + 1 ) . codepoint ) ) ) {
if ( unlikely ( buffer - > unicode - > is_variation_selector ( buffer - > cur ( + 1 ) . codepoint ) ) ) {
/* The next two lines are some ugly lines... But work. */
/* The next two lines are some ugly lines... But work. */
font - > get_glyph ( buffer - > cur ( ) . codepoint , buffer - > cur ( + 1 ) . codepoint , & buffer - > cur ( ) . glyph_index ( ) ) ;
c - > font - > get_glyph ( buffer - > cur ( ) . codepoint , buffer - > cur ( + 1 ) . codepoint , & buffer - > cur ( ) . glyph_index ( ) ) ;
buffer - > replace_glyphs ( 2 , 1 , & buffer - > cur ( ) . codepoint ) ;
buffer - > replace_glyphs ( 2 , 1 , & buffer - > cur ( ) . codepoint ) ;
} else {
} else {
set_glyph ( buffer - > cur ( ) , font ) ;
set_glyph ( buffer - > cur ( ) , c - > font ) ;
buffer - > next_glyph ( ) ;
buffer - > next_glyph ( ) ;
}
}
}
}
if ( likely ( buffer - > idx < end ) ) {
if ( likely ( buffer - > idx < end ) ) {
set_glyph ( buffer - > cur ( ) , font ) ;
set_glyph ( buffer - > cur ( ) , c - > font ) ;
buffer - > next_glyph ( ) ;
buffer - > next_glyph ( ) ;
}
}
}
}
/* Returns true if recomposition may be benefitial. */
/* Returns true if recomposition may be benefitial. */
static inline bool
static inline bool
decompose_multi_char_cluster ( hb_font_t * font , hb_buffer_t * buffer , unsigned int end )
decompose_multi_char_cluster ( const normalize_context_t * c , unsigned int end )
{
{
hb_buffer_t * const buffer = c - > buffer ;
/* TODO Currently if there's a variation-selector we give-up, it's just too hard. */
/* TODO Currently if there's a variation-selector we give-up, it's just too hard. */
for ( unsigned int i = buffer - > idx ; i < end ; i + + )
for ( unsigned int i = buffer - > idx ; i < end ; i + + )
if ( unlikely ( buffer - > unicode - > is_variation_selector ( buffer - > info [ i ] . codepoint ) ) ) {
if ( unlikely ( buffer - > unicode - > is_variation_selector ( buffer - > info [ i ] . codepoint ) ) ) {
handle_variation_selector_cluster ( font , buffer , end ) ;
handle_variation_selector_cluster ( c , end ) ;
return false ;
return false ;
}
}
while ( buffer - > idx < end )
while ( buffer - > idx < end )
decompose_current_character ( font , buffer , false ) ;
decompose_current_character ( c , false ) ;
/* We can be smarter here and only return true if there are at least two ccc!=0 marks.
/* We can be smarter here and only return true if there are at least two ccc!=0 marks.
* But does not matter . */
* But does not matter . */
return true ;
return true ;
}
}
static inline bool
static inline bool
decompose_cluster ( hb_font_t * font , hb_buffer_t * buffer , bool short_circuit , unsigned int end )
decompose_cluster ( const normalize_context_t * c , bool short_circuit , unsigned int end )
{
{
if ( likely ( buffer - > idx + 1 = = end ) )
if ( likely ( c - > buffer - > idx + 1 = = end ) )
return decompose_current_character ( font , buffer , short_circuit ) ;
return decompose_current_character ( c , short_circuit ) ;
else
else
return decompose_multi_char_cluster ( font , buffer , end ) ;
return decompose_multi_char_cluster ( c , end ) ;
}
}
@ -431,9 +292,20 @@ compare_combining_class (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb)
void
void
_hb_ot_shape_normalize ( hb_font_t * font , hb_buffer_t * buffer ,
_hb_ot_shape_normalize ( const hb_ot_complex_shaper_t * shaper ,
hb_ot_shape_normalization_mode_t mode )
hb_buffer_t * buffer ,
hb_font_t * font )
{
{
hb_ot_shape_normalization_mode_t mode = shaper - > normalization_preference ?
shaper - > normalization_preference ( & buffer - > props ) :
HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT ;
const normalize_context_t c = {
buffer ,
font ,
shaper - > decompose ? shaper - > decompose : decompose_unicode ,
shaper - > compose ? shaper - > compose : compose_unicode
} ;
bool short_circuit = mode ! = HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED & &
bool short_circuit = mode ! = HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED & &
mode ! = HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT ;
mode ! = HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT ;
bool can_use_recompose = false ;
bool can_use_recompose = false ;
@ -457,7 +329,7 @@ _hb_ot_shape_normalize (hb_font_t *font, hb_buffer_t *buffer,
if ( buffer - > cur ( ) . cluster ! = buffer - > info [ end ] . cluster )
if ( buffer - > cur ( ) . cluster ! = buffer - > info [ end ] . cluster )
break ;
break ;
can_use_recompose = decompose_cluster ( font , buffer , short_circuit , end ) | | can_use_recompose ;
can_use_recompose = decompose_cluster ( & c , short_circuit , end ) | | can_use_recompose ;
}
}
buffer - > swap_buffers ( ) ;
buffer - > swap_buffers ( ) ;
@ -517,7 +389,7 @@ _hb_ot_shape_normalize (hb_font_t *font, hb_buffer_t *buffer,
( starter = = buffer - > out_len - 1 | |
( starter = = buffer - > out_len - 1 | |
_hb_glyph_info_get_modified_combining_class ( & buffer - > prev ( ) ) < _hb_glyph_info_get_modified_combining_class ( & buffer - > cur ( ) ) ) & &
_hb_glyph_info_get_modified_combining_class ( & buffer - > prev ( ) ) < _hb_glyph_info_get_modified_combining_class ( & buffer - > cur ( ) ) ) & &
/* And compose. */
/* And compose. */
compose_func ( buffer - > unicode ,
c . c ompose ( buffer - > unicode ,
buffer - > out_info [ starter ] . codepoint ,
buffer - > out_info [ starter ] . codepoint ,
buffer - > cur ( ) . codepoint ,
buffer - > cur ( ) . codepoint ,
& composed ) & &
& composed ) & &