[name] Flesh out UTF-X to UTF-X conversion routines

pull/1319/head
Behdad Esfahbod 6 years ago
parent 84811a06a2
commit 5531bd068e
  1. 84
      src/hb-ot-name.cc
  2. 10
      src/hb-ot-name.h
  3. 107
      src/hb-utf.hh

@ -51,6 +51,51 @@ hb_ot_name_get_names (hb_face_t *face,
}
template <typename in_utf_t, typename out_utf_t>
static inline unsigned int
hb_ot_name_convert_utf (const hb_bytes_t *bytes,
unsigned int *text_size /* IN/OUT */,
typename out_utf_t::codepoint_t *text /* OUT */)
{
unsigned int src_len = bytes->len / sizeof (typename in_utf_t::codepoint_t);
const typename in_utf_t::codepoint_t *src = (const typename in_utf_t::codepoint_t *) bytes->arrayZ;
const typename in_utf_t::codepoint_t *src_end = src + src_len;
typename out_utf_t::codepoint_t *dst = text;
hb_codepoint_t unicode;
const hb_codepoint_t replacement = HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT;
if (text_size && *text_size)
{
(*text_size)--; /* Same room for NUL-termination. */
const typename out_utf_t::codepoint_t *dst_end = text + *text_size;
while (src < src_end && dst < dst_end)
{
const typename in_utf_t::codepoint_t *src_next = in_utf_t::next (src, src_end, &unicode, replacement);
typename out_utf_t::codepoint_t *dst_next = out_utf_t::encode (dst, dst_end, unicode);
if (dst_next == dst)
break; /* Out-of-room. */
dst = dst_next;
src = src_next;
};
*text_size = dst - text;
*text = 0; /* NUL-terminate. */
}
/* Accumulate length of rest. */
unsigned int dst_len = dst - text;
while (src < src_end)
{
src = in_utf_t::next (src, src_end, &unicode, replacement);
dst_len += out_utf_t::encode_len (unicode);
};
return dst_len;
}
template <typename utf_t>
static inline unsigned int
hb_ot_name_get_utf (hb_face_t *face,
@ -63,22 +108,27 @@ hb_ot_name_get_utf (hb_face_t *face,
unsigned int idx = 0; // XXX bsearch and find
hb_bytes_t bytes = name.table->get_name (idx);
unsigned int full_length = 0;
const typename utf_t::codepoint_t *src = (const typename utf_t::codepoint_t *) bytes.arrayZ;
unsigned int src_len = bytes.len / sizeof (typename utf_t::codepoint_t);
if (true /*UTF16-BE*/)
return hb_ot_name_convert_utf<hb_utf16_be_t, utf_t> (&bytes, text_size, text);
if (text_size && *text_size)
if (text_size)
{
*text_size--; /* Leave room for nul-termination. */
/* TODO Switch to walking string and validating. */
memcpy (text,
src,
MIN (*text_size, src_len) * sizeof (typename utf_t::codepoint_t));
if (*text_size)
*text = 0;
*text_size = 0;
}
return 0;
}
/* Walk the rest, accumulate the full length. */
return *text_size; //XXX
unsigned int
hb_ot_name_get_utf8 (hb_face_t *face,
hb_name_id_t name_id,
hb_language_t language,
unsigned int *text_size /* IN/OUT */,
char *text /* OUT */)
{
return hb_ot_name_get_utf<hb_utf8_t> (face, name_id, language, text_size,
(hb_utf8_t::codepoint_t *) text);
}
unsigned int
@ -90,3 +140,13 @@ hb_ot_name_get_utf16 (hb_face_t *face,
{
return hb_ot_name_get_utf<hb_utf16_t> (face, name_id, language, text_size, text);
}
unsigned int
hb_ot_name_get_utf32 (hb_face_t *face,
hb_name_id_t name_id,
hb_language_t language,
unsigned int *text_size /* IN/OUT */,
uint32_t *text /* OUT */)
{
return hb_ot_name_get_utf<hb_utf32_t> (face, name_id, language, text_size, text);
}

@ -49,14 +49,12 @@ typedef unsigned int hb_name_id_t;
#define HB_NAME_ID_INVALID 0xFFFF
#if 0
HB_EXTERN unsigned int
Xhb_ot_name_get_utf8 (hb_face_t *face,
unsigned int
hb_ot_name_get_utf8 (hb_face_t *face,
hb_name_id_t name_id,
hb_language_t language,
unsigned int *text_size /* IN/OUT */,
char *text /* OUT */);
#endif
HB_EXTERN unsigned int
hb_ot_name_get_utf16 (hb_face_t *face,
@ -65,14 +63,12 @@ hb_ot_name_get_utf16 (hb_face_t *face,
unsigned int *text_size /* IN/OUT */,
uint16_t *text /* OUT */);
#if 0
HB_EXTERN unsigned int
Xhb_ot_name_get_utf32 (hb_face_t *face,
hb_ot_name_get_utf32 (hb_face_t *face,
hb_name_id_t name_id,
hb_language_t language,
unsigned int *text_size /* IN/OUT */,
uint32_t *text /* OUT */);
#endif
typedef struct hb_ot_name_entry_t

@ -127,6 +127,55 @@ struct hb_utf8_t
{
return ::strlen ((const char *) text);
}
static inline unsigned int
encode_len (hb_codepoint_t unicode)
{
if (unicode < 0x0080u) return 1;
if (unicode < 0x0800u) return 2;
if (unicode < 0x10000u) return 3;
if (unicode < 0x110000u) return 4;
return 3;
}
static inline codepoint_t *
encode (codepoint_t *text,
const codepoint_t *end,
hb_codepoint_t unicode)
{
if (unlikely (unicode >= 0xD800u && (unicode <= 0xDFFFu || unicode > 0x10FFFFu)))
unicode = 0xFFFDu;
if (unicode < 0x0080u)
*text++ = unicode;
else if (unicode < 0x0800u)
{
if (end - text >= 2)
{
*text++ = 0xC0u + (0x1Fu & (unicode >> 6));
*text++ = 0x80u + (0x3Fu & (unicode ));
}
}
else if (unicode < 0x10000u)
{
if (end - text >= 3)
{
*text++ = 0xE0u + (0x0Fu & (unicode >> 12));
*text++ = 0x80u + (0x3Fu & (unicode >> 6));
*text++ = 0x80u + (0x3Fu & (unicode ));
}
}
else
{
if (end - text >= 4)
{
*text++ = 0xF0u + (0x07u & (unicode >> 18));
*text++ = 0x80u + (0x3Fu & (unicode >> 12));
*text++ = 0x80u + (0x3Fu & (unicode >> 6));
*text++ = 0x80u + (0x3Fu & (unicode ));
}
}
return text;
}
};
@ -208,6 +257,30 @@ struct hb_utf16_xe_t
while (*text++) l++;
return l;
}
static inline unsigned int
encode_len (hb_codepoint_t unicode)
{
return unicode < 0x10000 ? 1 : 2;
}
static inline codepoint_t *
encode (codepoint_t *text,
const codepoint_t *end,
hb_codepoint_t unicode)
{
if (unlikely (unicode >= 0xD800u && (unicode <= 0xDFFFu || unicode > 0x10FFFFu)))
unicode = 0xFFFDu;
if (unicode < 0x10000u)
*text++ = unicode;
else if (end - text >= 2)
{
unicode -= 0x10000u;
*text++ = 0xD800u + (unicode >> 10);
*text++ = 0xDC00u + (unicode & 0x03FFu);
}
return text;
}
};
typedef hb_utf16_xe_t<uint16_t> hb_utf16_t;
@ -251,6 +324,23 @@ struct hb_utf32_xe_t
while (*text++) l++;
return l;
}
static inline unsigned int
encode_len (hb_codepoint_t unicode HB_UNUSED)
{
return 1;
}
static inline codepoint_t *
encode (codepoint_t *text,
const codepoint_t *end HB_UNUSED,
hb_codepoint_t unicode)
{
if (validate && unlikely (unicode >= 0xD800u && (unicode <= 0xDFFFu || unicode > 0x10FFFFu)))
unicode = 0xFFFDu;
*text++ = unicode;
return text;
}
};
typedef hb_utf32_xe_t<uint32_t> hb_utf32_t;
@ -289,6 +379,23 @@ struct hb_latin1_t
while (*text++) l++;
return l;
}
static inline unsigned int
encode_len (hb_codepoint_t unicode HB_UNUSED)
{
return 1;
}
static inline codepoint_t *
encode (codepoint_t *text,
const codepoint_t *end HB_UNUSED,
hb_codepoint_t unicode)
{
if (unlikely (unicode >= 0x0100u))
unicode = '?';
*text++ = unicode;
return text;
}
};
#endif /* HB_UTF_HH */

Loading…
Cancel
Save