Major update to distinguish between unsorted and overlapped segments

for cmap format 4.  For overlapped but sorted segments, which is
previously considered unsorted, we still use binary search.

* src/sfnt/ttcmap.h (struct  TT_CMapRec_): Replace `unsorted' by
`flags'.
(TT_CMAP_FLAG_UNSORTED, TT_CMAP_FLAG_OVERLAPPED): New macros.

* src/sfnt/ttcmap.c (OPT_CMAP4): Removed as it is always defined.
(struct TT_CMap4Rec_): Remove `old_charcode' and `table_length'.
(tt_cmap4_reset): Removed.
(tt_cmap4_init): Updated accordingly.
(tt_cmap4_next): Updated accordingly.
Take care of overlapped segments.
(tt_cmap4_validate): Make sure the subtable is large enough.
Do not check glyph_ids because some fonts set the length wrongly.
Also, when all segments have offset 0, glyph_ids is always invalid. It
does not cause any problem so far only because the check misses
equality.
Distinguish between unsorted and overlapped segments.
(tt_cmap4_char_map_linear, tt_cmap4_char_map_binary): New functions to
do "charcode => glyph index" by linear/binary search.
(tt_cmap4_char_index, tt_cmap4_char_next): Use
tt_cmap4_char_map_linear and tt_cmap4_char_map_binary.
(tt_face_build_cmaps): Treat the return value of validator as flags
for cmap.
david-pic-changes
Wu, Chia-I (吳佳一) 19 years ago
parent 569ec4ee2a
commit a0911343ee
  1. 29
      ChangeLog
  2. 607
      src/sfnt/ttcmap.c
  3. 6
      src/sfnt/ttcmap.h

@ -1,3 +1,32 @@
2005-11-29 Chia-I Wu <b90201047@ntu.edu.tw>
Major update to distinguish between unsorted and overlapped segments
for cmap format 4. For overlapped but sorted segments, which is
previously considered unsorted, we still use binary search.
* src/sfnt/ttcmap.h (struct TT_CMapRec_): Replace `unsorted' by
`flags'.
(TT_CMAP_FLAG_UNSORTED, TT_CMAP_FLAG_OVERLAPPED): New macros.
* src/sfnt/ttcmap.c (OPT_CMAP4): Removed as it is always defined.
(struct TT_CMap4Rec_): Remove `old_charcode' and `table_length'.
(tt_cmap4_reset): Removed.
(tt_cmap4_init): Updated accordingly.
(tt_cmap4_next): Updated accordingly.
Take care of overlapped segments.
(tt_cmap4_validate): Make sure the subtable is large enough.
Do not check glyph_ids because some fonts set the length wrongly.
Also, when all segments have offset 0, glyph_ids is always invalid. It
does not cause any problem so far only because the check misses
equality.
Distinguish between unsorted and overlapped segments.
(tt_cmap4_char_map_linear, tt_cmap4_char_map_binary): New functions to
do "charcode => glyph index" by linear/binary search.
(tt_cmap4_char_index, tt_cmap4_char_next): Use
tt_cmap4_char_map_linear and tt_cmap4_char_map_binary.
(tt_face_build_cmaps): Treat the return value of validator as flags
for cmap.
2005-11-29 Chia-I Wu <b90201047@ntu.edu.tw>
* src/sfnt/ttcmap.c (struct TT_CMap12Rec_, tt_cmap12_init,

@ -625,18 +625,12 @@
#ifdef TT_CONFIG_CMAP_FORMAT_4
#define OPT_CMAP4
#ifdef OPT_CMAP4
typedef struct TT_CMap4Rec_
{
TT_CMapRec cmap;
FT_UInt32 old_charcode; /* old charcode */
FT_UInt32 cur_charcode; /* current charcode */
FT_UInt cur_gindex; /* current glyph index */
FT_UInt table_length;
FT_UInt num_ranges;
FT_UInt cur_range;
FT_UInt cur_start;
@ -656,14 +650,9 @@
cmap->cmap.data = table;
p = table + 2;
cmap->table_length = FT_PEEK_USHORT( p );
p = table + 6;
cmap->num_ranges = FT_PEEK_USHORT( p ) >> 1;
cmap->cur_range = cmap->num_ranges;
cmap->old_charcode = 0xFFFFFFFFUL;
cmap->cur_charcode = 0;
cmap->cur_charcode = 0xFFFFFFFFUL;
cmap->cur_gindex = 0;
return SFNT_Err_Ok;
@ -707,21 +696,27 @@
range_index++;
}
cmap->old_charcode = 0xFFFFFFFFUL;
cmap->cur_charcode = 0;
cmap->cur_gindex = 0;
cmap->cur_range = num_ranges;
return -1;
}
/* find the index of the charcode next to cmap->cur_charcode; */
/* caller should call tt_cmap4_set_range with proper range */
/* before calling this function */
/* */
static void
tt_cmap4_next( TT_CMap4 cmap )
{
FT_UInt charcode = cmap->cur_charcode + 1;
FT_UInt charcode;
if ( cmap->cur_charcode >= 0xFFFFUL )
goto Fail;
charcode = cmap->cur_charcode + 1;
cmap->old_charcode = cmap->cur_charcode;
if ( charcode < cmap->cur_start )
charcode = cmap->cur_start;
for ( ;; )
{
@ -775,26 +770,15 @@
if ( tt_cmap4_set_range( cmap, cmap->cur_range + 1 ) < 0 )
break;
charcode = cmap->cur_start;
if ( charcode < cmap->cur_start )
charcode = cmap->cur_start;
}
}
static void
tt_cmap4_reset( TT_CMap4 cmap,
FT_UInt code,
FT_UInt range_index )
{
if ( tt_cmap4_set_range( cmap, range_index ) >= 0 )
{
cmap->cur_charcode = code;
tt_cmap4_next( cmap );
}
Fail:
cmap->cur_charcode = 0xFFFFFFFFUL;
cmap->cur_gindex = 0;
}
#endif /* OPT_CMAP4 */
FT_CALLBACK_DEF( FT_Error )
tt_cmap4_validate( FT_Byte* table,
@ -807,11 +791,11 @@
FT_Error error = SFNT_Err_Ok;
/* in certain fonts, the `length' field is invalid and goes */
/* out of bound. We try to correct this here... */
if ( length < 16 )
FT_INVALID_TOO_SHORT;
/* in certain fonts, the `length' field is invalid and goes */
/* out of bound. We try to correct this here... */
if ( table + length > valid->limit )
{
if ( valid->level >= FT_VALIDATE_TIGHT )
@ -832,6 +816,9 @@
num_segs /= 2;
if ( length < 16 + num_segs * 2 * 4 )
FT_INVALID_TOO_SHORT;
/* check the search parameters - even though we never use them */
/* */
if ( valid->level >= FT_VALIDATE_PARANOID )
@ -863,9 +850,6 @@
offsets = deltas + num_segs * 2;
glyph_ids = offsets + num_segs * 2;
if ( glyph_ids > table + length )
FT_INVALID_TOO_SHORT;
/* check last segment, its end count must be FFFF */
if ( valid->level >= FT_VALIDATE_PARANOID )
{
@ -874,10 +858,9 @@
FT_INVALID_DATA;
}
/* check that segments are sorted in increasing order and do not */
/* overlap; check also the offsets */
{
FT_UInt start, end, last = 0, offset, n;
FT_UInt start, end, offset, n;
FT_UInt last_start = 0, last_end = 0;
FT_Int delta;
@ -899,12 +882,20 @@
/* unfortunately, some popular Asian fonts present overlapping */
/* ranges in their charmaps */
/* */
if ( n > 0 && start <= last )
if ( start <= last_end && n > 0 )
{
if ( valid->level >= FT_VALIDATE_TIGHT )
FT_INVALID_DATA;
else
error = SFNT_Err_Invalid_CharMap_Format;
{
/* allow overlapping segments, provided their start points */
/* and end points, respectively, are in ascending order. */
/* */
if ( last_start > start || last_end > end )
error |= TT_CMAP_FLAG_UNSORTED;
else
error |= TT_CMAP_FLAG_OVERLAPPED;
}
}
if ( offset && offset != 0xFFFFU )
@ -955,7 +946,8 @@
FT_INVALID_DATA;
}
last = end;
last_start = start;
last_end = end;
}
}
@ -963,327 +955,333 @@
}
FT_CALLBACK_DEF( FT_UInt )
tt_cmap4_char_index( TT_CMap cmap,
FT_UInt32 char_code )
static FT_UInt
tt_cmap4_char_map_linear( TT_CMap cmap,
FT_UInt* pcharcode,
FT_Bool next )
{
FT_Byte* table = cmap->data;
FT_UInt result = 0;
FT_UInt num_segs2, start, end, offset;
FT_Int delta;
FT_UInt i, num_segs;
FT_UInt32 charcode = *pcharcode;
FT_UInt gindex = 0;
FT_Byte* p;
if ( char_code < 0x10000UL )
{
FT_UInt idx, num_segs2;
FT_Int delta;
FT_UInt code = (FT_UInt)char_code;
FT_Byte* p;
p = table + 6;
num_segs2 = FT_PAD_FLOOR( TT_PEEK_USHORT( p ), 2 ); /* be paranoid! */
p = cmap->data + 6;
num_segs2 = FT_PAD_FLOOR( TT_PEEK_USHORT( p ), 2 );
if ( !cmap->unsorted )
{
/* Some fonts have more than 170 segments in their charmaps! */
/* We changed this function to use a more efficient binary */
/* search for improving performance */
FT_UInt min = 0;
FT_UInt max = num_segs2 >> 1;
FT_UInt mid, start, end, offset;
num_segs = num_segs2 >> 1;
if ( !num_segs )
return 0;
while ( min < max )
{
mid = ( min + max ) >> 1;
p = table + 14 + mid * 2;
end = TT_NEXT_USHORT( p );
p += num_segs2;
start = TT_PEEK_USHORT( p);
if ( code < start )
max = mid;
else if ( code > end )
min = mid + 1;
else
{
/* we found the segment */
idx = code;
if ( next )
charcode++;
p += num_segs2;
delta = TT_PEEK_SHORT( p );
/* linear search */
for ( ; charcode <= 0xFFFFU; charcode++ )
{
FT_Byte* q;
p += num_segs2;
offset = TT_PEEK_USHORT( p );
p = cmap->data + 14; /* ends table */
q = cmap->data + 16 + num_segs2; /* starts table */
if ( offset == 0xFFFFU )
goto Exit;
for ( i = 0; i < num_segs; i++ )
{
end = TT_NEXT_USHORT( p );
start = TT_NEXT_USHORT( q );
if ( offset != 0 )
{
p += offset + 2 * ( idx - start );
idx = TT_PEEK_USHORT( p );
}
if ( charcode >= start && charcode <= end )
{
p = q - 2 + num_segs2;
delta = TT_PEEK_SHORT( p );
p += num_segs2;
offset = TT_PEEK_USHORT( p );
if ( idx != 0 )
result = (FT_UInt)( idx + delta ) & 0xFFFFU;
if ( offset == 0xFFFFU )
continue;
goto Exit;
if ( offset )
{
p += offset + ( charcode - start ) * 2;
gindex = TT_PEEK_USHORT( p );
if ( gindex != 0 )
gindex = (FT_UInt)( gindex + delta ) & 0xFFFFU;
}
else
gindex = (FT_UInt)( charcode + delta ) & 0xFFFFU;
break;
}
}
else
{
FT_UInt n;
FT_Byte* q;
if ( !next || gindex )
break;
}
p = table + 14; /* ends table */
q = table + 16 + num_segs2; /* starts table */
if ( next && gindex )
*pcharcode = charcode;
return gindex;
}
for ( n = 0; n < num_segs2; n += 2 )
{
FT_UInt end = TT_NEXT_USHORT( p );
FT_UInt start = TT_NEXT_USHORT( q );
FT_UInt offset;
static FT_UInt
tt_cmap4_char_map_binary( TT_CMap cmap,
FT_UInt* pcharcode,
FT_Bool next )
{
FT_UInt num_segs2, start, end, offset;
FT_Int delta;
FT_UInt max, min, mid, num_segs;
FT_UInt charcode = *pcharcode;
FT_UInt gindex = 0;
FT_Byte* p;
if ( code < start )
break;
p = cmap->data + 6;
num_segs2 = FT_PAD_FLOOR( TT_PEEK_USHORT( p ), 2 );
if ( code <= end )
{
idx = code;
num_segs = num_segs2 >> 1;
p = q + num_segs2 - 2;
delta = TT_PEEK_SHORT( p );
p += num_segs2;
offset = TT_PEEK_USHORT( p );
if ( !num_segs )
return 0;
if ( offset == 0xFFFFU )
goto Exit;
if ( next )
charcode++;
if ( offset != 0 )
{
p += offset + 2 * ( idx - start );
idx = TT_PEEK_USHORT( p );
}
min = 0;
max = num_segs;
if ( idx != 0 )
result = (FT_UInt)( idx + delta ) & 0xFFFFU;
}
}
}
}
/* binary search */
while ( min < max )
{
mid = ( min + max ) >> 1;
p = cmap->data + 14 + mid * 2;
end = TT_PEEK_USHORT( p );
p += 2 + num_segs2;
start = TT_PEEK_USHORT( p );
Exit:
return result;
}
if ( charcode < start )
max = mid;
else if ( charcode > end )
min = mid + 1;
else
{
p += num_segs2;
delta = TT_PEEK_SHORT( p );
p += num_segs2;
offset = TT_PEEK_USHORT( p );
/* find the first segment containing `charcode' */
if ( cmap->flags & TT_CMAP_FLAG_OVERLAPPED )
{
FT_UInt i;
FT_CALLBACK_DEF( FT_UInt )
tt_cmap4_char_next( TT_CMap cmap,
FT_UInt32 *pchar_code )
{
FT_Byte* table = cmap->data;
FT_UInt32 result = 0;
FT_UInt gindex = 0;
FT_UInt32 char_code = *pchar_code;
FT_Byte* p;
FT_UInt code, num_segs2;
/* call the current segment `max' */
max = mid;
if ( char_code >= 0xFFFFUL )
goto Exit;
if ( offset == 0xFFFFU )
mid = max + 1;
#ifdef OPT_CMAP4
{
TT_CMap4 cmap4 = (TT_CMap4)cmap;
/* find in segments before the current segment */
for ( i = max ; i > 0; i-- )
{
FT_UInt prev_end;
if ( char_code == cmap4->old_charcode )
{
result = cmap4->cur_charcode;
gindex = cmap4->cur_gindex;
if ( result != 0 )
{
tt_cmap4_next( cmap4 );
goto Exit;
}
}
}
#endif /* OPT_CMAP4 */
p = cmap->data + 14 + ( i - 1 ) * 2;
prev_end = TT_PEEK_USHORT( p );
code = (FT_UInt)char_code + 1;
p = table + 6;
num_segs2 = FT_PAD_FLOOR( TT_PEEK_USHORT( p ), 2 ); /* ensure even-ness */
if ( charcode > prev_end )
break;
if ( !cmap->unsorted )
{
for (;;)
{
/* Some fonts have more than 170 segments in their charmaps! */
/* We changed this function to use a more efficient binary */
/* search */
FT_UInt offset;
FT_Int delta;
FT_UInt min = 0;
FT_UInt max = num_segs2 >> 1;
FT_UInt mid, start, end;
FT_UInt hi;
/* we begin by finding the segment which end is
closer to our code point */
hi = max + 1;
while ( min < max )
{
mid = ( min + max ) >> 1;
p = table + 14 + mid * 2;
end = TT_PEEK_USHORT( p );
end = prev_end;
p += 2 + num_segs2;
start = TT_PEEK_USHORT( p );
p += num_segs2;
delta = TT_PEEK_SHORT( p );
p += num_segs2;
offset = TT_PEEK_USHORT( p );
if ( end < code )
min = mid + 1;
else
{
hi = mid;
max = mid;
if ( offset != 0xFFFFU )
mid = i - 1;
}
}
if ( hi > max )
{
/* the point is behind the last segment;
we will exit right now */
goto Exit;
}
/* no luck */
if ( mid == max + 1 )
{
if ( i != max )
{
p = cmap->data + 14 + max * 2;
end = TT_PEEK_USHORT( p );
p += 2 + num_segs2;
start = TT_PEEK_USHORT( p );
p += num_segs2;
delta = TT_PEEK_SHORT( p );
p += num_segs2;
offset = TT_PEEK_USHORT( p );
}
p = table + 14 + hi * 2;
end = TT_PEEK_USHORT( p );
mid = max;
p += 2 + num_segs2;
start = TT_PEEK_USHORT( p );
/* find in segments after the current segment */
for ( i = max + 1; i < num_segs; i++ )
{
FT_UInt next_end, next_start;
if ( code < start )
code = start;
p += num_segs2;
delta = TT_PEEK_USHORT( p );
p = cmap->data + 14 + i * 2;
next_end = TT_PEEK_USHORT( p );
p += 2 + num_segs2;
next_start = TT_PEEK_USHORT( p );
p += num_segs2;
offset = TT_PEEK_USHORT( p );
if ( charcode < next_start )
break;
if ( offset != 0 && offset != 0xFFFFU )
{
/* parse the glyph ids array for non-zero index */
p += offset + ( code - start ) * 2;
while ( code <= end )
{
gindex = TT_NEXT_USHORT( p );
if ( gindex != 0 )
end = next_end;
start = next_start;
p += num_segs2;
delta = TT_PEEK_SHORT( p );
p += num_segs2;
offset = TT_PEEK_USHORT( p );
if ( offset != 0xFFFFU )
mid = i;
}
i--;
/* still no luck */
if ( mid == max )
{
gindex = (FT_UInt)( gindex + delta ) & 0xFFFFU;
if ( gindex != 0 )
{
result = code;
#ifdef OPT_CMAP4
tt_cmap4_reset( (TT_CMap4)cmap, code, hi );
#endif
goto Exit;
}
mid = i;
break;
}
code++;
}
/* end, start, delta and offset are for the i'th segment */
if ( mid != i )
{
p = cmap->data + 14 + mid * 2;
end = TT_PEEK_USHORT( p );
p += 2 + num_segs2;
start = TT_PEEK_USHORT( p );
p += num_segs2;
delta = TT_PEEK_SHORT( p );
p += num_segs2;
offset = TT_PEEK_USHORT( p );
}
}
else if ( offset == 0xFFFFU )
else
{
/* an offset of 0xFFFF means an empty segment in certain fonts! */
code = end + 1;
if ( offset == 0xFFFFU )
break;
}
else /* offset == 0 */
if ( offset )
{
gindex = (FT_UInt)( code + delta ) & 0xFFFFU;
p += offset + ( charcode - start ) * 2;
gindex = TT_PEEK_USHORT( p );
if ( gindex != 0 )
{
result = code;
#ifdef OPT_CMAP4
tt_cmap4_reset( (TT_CMap4)cmap, code, hi );
#endif
goto Exit;
}
code++;
gindex = (FT_UInt)( gindex + delta ) & 0xFFFFU;
}
else
gindex = (FT_UInt)( charcode + delta ) & 0xFFFFU;
break;
}
}
else
if ( next )
{
for ( ;; )
{
FT_UInt offset, n;
FT_Int delta;
FT_Byte* q;
TT_CMap4 cmap4 = (TT_CMap4)cmap;
/* if `charcode' is not in any segment, then `mid' is */
/* the segment nearest to `charcode' */
/* */
p = table + 14; /* ends table */
q = table + 16 + num_segs2; /* starts table */
if ( charcode > end )
{
mid++;
if ( mid == num_segs )
return 0;
}
for ( n = 0; n < num_segs2; n += 2 )
if ( tt_cmap4_set_range( cmap4, mid ) )
{
if ( gindex )
*pcharcode = charcode;
}
else
{
cmap4->cur_charcode = charcode;
if ( gindex )
cmap4->cur_gindex = gindex;
else
{
FT_UInt end = TT_NEXT_USHORT( p );
FT_UInt start = TT_NEXT_USHORT( q );
cmap4->cur_charcode = charcode;
tt_cmap4_next( cmap4 );
gindex = cmap4->cur_gindex;
}
if ( gindex )
*pcharcode = cmap4->cur_charcode;
}
}
if ( code < start )
code = start;
return gindex;
}
if ( code <= end )
{
p = q + num_segs2 - 2;
delta = TT_PEEK_SHORT( p );
p += num_segs2;
offset = TT_PEEK_USHORT( p );
if ( offset != 0 && offset != 0xFFFFU )
{
/* parse the glyph ids array for non-0 index */
p += offset + ( code - start ) * 2;
while ( code <= end )
{
gindex = TT_NEXT_USHORT( p );
if ( gindex != 0 )
{
gindex = (FT_UInt)( gindex + delta ) & 0xFFFFU;
if ( gindex != 0 )
break;
}
code++;
}
}
else if ( offset == 0xFFFFU )
{
/* an offset of 0xFFFF means an empty glyph in certain fonts! */
code = end;
break;
}
else
gindex = (FT_UInt)( code + delta ) & 0xFFFFU;
FT_CALLBACK_DEF( FT_UInt )
tt_cmap4_char_index( TT_CMap cmap,
FT_UInt32 char_code )
{
if ( char_code >= 0x10000U )
return 0;
if ( gindex == 0 )
break;
if ( cmap->flags & TT_CMAP_FLAG_UNSORTED )
return tt_cmap4_char_map_linear( cmap, &char_code, 0 );
else
return tt_cmap4_char_map_binary( cmap, &char_code, 0 );
}
result = code;
goto Exit;
}
}
/* loop to next trial charcode */
if ( code >= 0xFFFFU )
break;
code++;
FT_CALLBACK_DEF( FT_UInt )
tt_cmap4_char_next( TT_CMap cmap,
FT_UInt32 *pchar_code )
{
FT_UInt gindex;
if ( *pchar_code >= 0xFFFFU )
return 0;
if ( cmap->flags & TT_CMAP_FLAG_UNSORTED )
gindex = tt_cmap4_char_map_linear( cmap, pchar_code, 1 );
else
{
TT_CMap4 cmap4 = (TT_CMap4)cmap;
/* no need to search */
if ( *pchar_code == cmap4->cur_charcode )
{
tt_cmap4_next( cmap4 );
gindex = cmap4->cur_gindex;
if ( gindex )
*pchar_code = cmap4->cur_charcode;
}
else
gindex = tt_cmap4_char_map_binary( cmap, pchar_code, 1 );
}
Exit:
*pchar_code = result;
return gindex;
}
@ -1305,13 +1303,8 @@
const TT_CMap_ClassRec tt_cmap4_class_rec =
{
{
#ifdef OPT_CMAP4
sizeof ( TT_CMap4Rec ),
(FT_CMap_InitFunc) tt_cmap4_init,
#else
sizeof ( TT_CMapRec ),
(FT_CMap_InitFunc) tt_cmap_init,
#endif
(FT_CMap_DoneFunc) NULL,
(FT_CMap_CharIndexFunc)tt_cmap4_char_index,
(FT_CMap_CharNextFunc) tt_cmap4_char_next
@ -2300,13 +2293,15 @@
if ( valid.validator.error == 0 )
{
(void)FT_CMap_New( (FT_CMap_Class)clazz, cmap, &charmap, NULL );
FT_CMap ttcmap;
/* it is simpler to directly set the `unsorted' flag instead */
/* of adding a parameter to FT_CMap_New */
((TT_CMap)(face->root.charmaps
[face->root.num_charmaps - 1]))->unsorted =
FT_BOOL( error );
if ( !FT_CMap_New( (FT_CMap_Class)clazz, cmap, &charmap, &ttcmap ) )
{
/* it is simpler to directly set `flags' than adding */
/* a parameter to FT_CMap_New */
((TT_CMap)ttcmap)->flags = (FT_Int)error;
}
}
else
{

@ -27,11 +27,15 @@
FT_BEGIN_HEADER
#define TT_CMAP_FLAG_UNSORTED 1
#define TT_CMAP_FLAG_OVERLAPPED 2
typedef struct TT_CMapRec_
{
FT_CMapRec cmap;
FT_Byte* data; /* pointer to in-memory cmap table */
FT_Bool unsorted; /* for format 4 only */
FT_Int flags; /* for format 4 only */
} TT_CMapRec, *TT_CMap;

Loading…
Cancel
Save