|
|
|
@ -29,14 +29,19 @@ |
|
|
|
|
|
|
|
|
|
#include "hb-private.hh" |
|
|
|
|
|
|
|
|
|
template <typename T, bool validate=true> struct hb_utf_t; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* UTF-8 */ |
|
|
|
|
|
|
|
|
|
static inline const uint8_t * |
|
|
|
|
hb_utf_next (const uint8_t *text, |
|
|
|
|
template <> |
|
|
|
|
struct hb_utf_t<uint8_t, true> |
|
|
|
|
{ |
|
|
|
|
static inline const uint8_t * |
|
|
|
|
next (const uint8_t *text, |
|
|
|
|
const uint8_t *end, |
|
|
|
|
hb_codepoint_t *unicode) |
|
|
|
|
{ |
|
|
|
|
{ |
|
|
|
|
/* Written to only accept well-formed sequences.
|
|
|
|
|
* Based on ideas from ICU's U8_NEXT. |
|
|
|
|
* Generates a -1 for each ill-formed byte. */ |
|
|
|
@ -95,42 +100,45 @@ hb_utf_next (const uint8_t *text, |
|
|
|
|
*unicode = c; |
|
|
|
|
return text; |
|
|
|
|
|
|
|
|
|
error: |
|
|
|
|
error: |
|
|
|
|
*unicode = -1; |
|
|
|
|
return text; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static inline const uint8_t * |
|
|
|
|
hb_utf_prev (const uint8_t *text, |
|
|
|
|
static inline const uint8_t * |
|
|
|
|
prev (const uint8_t *text, |
|
|
|
|
const uint8_t *start, |
|
|
|
|
hb_codepoint_t *unicode) |
|
|
|
|
{ |
|
|
|
|
{ |
|
|
|
|
const uint8_t *end = text--; |
|
|
|
|
while (start < text && (*text & 0xc0) == 0x80 && end - text < 4) |
|
|
|
|
text--; |
|
|
|
|
|
|
|
|
|
if (likely (hb_utf_next (text, end, unicode) == end)) |
|
|
|
|
if (likely (next (text, end, unicode) == end)) |
|
|
|
|
return text; |
|
|
|
|
|
|
|
|
|
*unicode = -1; |
|
|
|
|
return end - 1; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static inline unsigned int |
|
|
|
|
hb_utf_strlen (const uint8_t *text) |
|
|
|
|
{ |
|
|
|
|
return strlen ((const char *) text); |
|
|
|
|
} |
|
|
|
|
static inline unsigned int |
|
|
|
|
strlen (const uint8_t *text) |
|
|
|
|
{ |
|
|
|
|
return ::strlen ((const char *) text); |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* UTF-16 */ |
|
|
|
|
|
|
|
|
|
static inline const uint16_t * |
|
|
|
|
hb_utf_next (const uint16_t *text, |
|
|
|
|
template <> |
|
|
|
|
struct hb_utf_t<uint16_t, true> |
|
|
|
|
{ |
|
|
|
|
static inline const uint16_t * |
|
|
|
|
next (const uint16_t *text, |
|
|
|
|
const uint16_t *end, |
|
|
|
|
hb_codepoint_t *unicode) |
|
|
|
|
{ |
|
|
|
|
{ |
|
|
|
|
hb_codepoint_t c = *text++; |
|
|
|
|
|
|
|
|
|
if (likely (!hb_in_range (c, 0xD800u, 0xDFFFu))) |
|
|
|
@ -155,13 +163,13 @@ hb_utf_next (const uint16_t *text, |
|
|
|
|
/* Lonely / out-of-order surrogate. */ |
|
|
|
|
*unicode = -1; |
|
|
|
|
return text; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static inline const uint16_t * |
|
|
|
|
hb_utf_prev (const uint16_t *text, |
|
|
|
|
static inline const uint16_t * |
|
|
|
|
prev (const uint16_t *text, |
|
|
|
|
const uint16_t *start, |
|
|
|
|
hb_codepoint_t *unicode) |
|
|
|
|
{ |
|
|
|
|
{ |
|
|
|
|
const uint16_t *end = text--; |
|
|
|
|
hb_codepoint_t c = *text; |
|
|
|
|
|
|
|
|
@ -174,57 +182,62 @@ hb_utf_prev (const uint16_t *text, |
|
|
|
|
if (likely (start < text && hb_in_range (c, 0xDC00u, 0xDFFFu))) |
|
|
|
|
text--; |
|
|
|
|
|
|
|
|
|
if (likely (hb_utf_next (text, end, unicode) == end)) |
|
|
|
|
if (likely (next (text, end, unicode) == end)) |
|
|
|
|
return text; |
|
|
|
|
|
|
|
|
|
*unicode = -1; |
|
|
|
|
return end - 1; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static inline unsigned int |
|
|
|
|
hb_utf_strlen (const uint16_t *text) |
|
|
|
|
{ |
|
|
|
|
static inline unsigned int |
|
|
|
|
strlen (const uint16_t *text) |
|
|
|
|
{ |
|
|
|
|
unsigned int l = 0; |
|
|
|
|
while (*text++) l++; |
|
|
|
|
return l; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* UTF-32 */ |
|
|
|
|
|
|
|
|
|
static inline const uint32_t * |
|
|
|
|
hb_utf_next (const uint32_t *text, |
|
|
|
|
template <bool validate> |
|
|
|
|
struct hb_utf_t<uint32_t, validate> |
|
|
|
|
{ |
|
|
|
|
static inline const uint32_t * |
|
|
|
|
next (const uint32_t *text, |
|
|
|
|
const uint32_t *end HB_UNUSED, |
|
|
|
|
hb_codepoint_t *unicode) |
|
|
|
|
{ |
|
|
|
|
{ |
|
|
|
|
hb_codepoint_t c = *text++; |
|
|
|
|
if (unlikely (c > 0x10FFFFu || hb_in_range (c, 0xD800u, 0xDFFFu))) |
|
|
|
|
if (validate && unlikely (c > 0x10FFFFu || hb_in_range (c, 0xD800u, 0xDFFFu))) |
|
|
|
|
goto error; |
|
|
|
|
*unicode = c; |
|
|
|
|
return text; |
|
|
|
|
|
|
|
|
|
error: |
|
|
|
|
error: |
|
|
|
|
*unicode = -1; |
|
|
|
|
return text; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static inline const uint32_t * |
|
|
|
|
hb_utf_prev (const uint32_t *text, |
|
|
|
|
static inline const uint32_t * |
|
|
|
|
prev (const uint32_t *text, |
|
|
|
|
const uint32_t *start HB_UNUSED, |
|
|
|
|
hb_codepoint_t *unicode) |
|
|
|
|
{ |
|
|
|
|
hb_utf_next (text - 1, text, unicode); |
|
|
|
|
{ |
|
|
|
|
next (text - 1, text, unicode); |
|
|
|
|
return text - 1; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static inline unsigned int |
|
|
|
|
hb_utf_strlen (const uint32_t *text) |
|
|
|
|
{ |
|
|
|
|
static inline unsigned int |
|
|
|
|
strlen (const uint32_t *text) |
|
|
|
|
{ |
|
|
|
|
unsigned int l = 0; |
|
|
|
|
while (*text++) l++; |
|
|
|
|
return l; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#endif /* HB_UTF_PRIVATE_HH */ |
|
|
|
|