diff --git a/src/hb-ucdn.cc b/src/hb-ucdn.cc index a884e3ffd..d8404b294 100644 --- a/src/hb-ucdn.cc +++ b/src/hb-ucdn.cc @@ -160,6 +160,10 @@ static const hb_script_t ucdn_script_translate[] = HB_SCRIPT_NEWA, HB_SCRIPT_OSAGE, HB_SCRIPT_TANGUT, + HB_SCRIPT_MASARAM_GONDI, + HB_SCRIPT_NUSHU, + HB_SCRIPT_SOYOMBO, + HB_SCRIPT_ZANABAZAR_SQUARE, }; static hb_unicode_combining_class_t diff --git a/src/hb-ucdn/Makefile.sources b/src/hb-ucdn/Makefile.sources index 52778d46c..cb823b605 100644 --- a/src/hb-ucdn/Makefile.sources +++ b/src/hb-ucdn/Makefile.sources @@ -3,5 +3,5 @@ NULL = LIBHB_UCDN_sources = \ ucdn.h \ ucdn.c \ - unicodedata_db.h \ + ucdn_db.h \ $(NULL) diff --git a/src/hb-ucdn/ucdn.c b/src/hb-ucdn/ucdn.c index f4e9be17c..30747fea2 100644 --- a/src/hb-ucdn/ucdn.c +++ b/src/hb-ucdn/ucdn.c @@ -23,7 +23,6 @@ typedef struct { unsigned char category; unsigned char combining; unsigned char bidi_class; - unsigned char mirrored; unsigned char east_asian_width; unsigned char script; unsigned char linebreak_class; @@ -43,7 +42,7 @@ typedef struct { short count, index; } Reindex; -#include "unicodedata_db.h" +#include "ucdn_db.h" /* constants required for Hangul (de)composition */ #define SBASE 0xAC00 @@ -91,20 +90,30 @@ static const unsigned short *get_decomp_record(uint32_t code) return &decomp_data[index]; } -static int get_comp_index(uint32_t code, const Reindex *idx) +static int compare_reindex(const void *a, const void *b) { - int i; - - for (i = 0; idx[i].start; i++) { - const Reindex *cur = &idx[i]; - if (code < cur->start) - return -1; - if (code <= cur->start + cur->count) { - return cur->index + (code - cur->start); - } - } + Reindex *ra = (Reindex *)a; + Reindex *rb = (Reindex *)b; - return -1; + if (ra->start < rb->start) + return -1; + else if (ra->start > (rb->start + rb->count)) + return 1; + else + return 0; +} + +static int get_comp_index(uint32_t code, const Reindex *idx, size_t len) +{ + Reindex *res; + Reindex r = {0, 0, 0}; + r.start = code; + res = (Reindex *) bsearch(&r, idx, len, sizeof(Reindex), compare_reindex); + + if (res != NULL) + return res->index + (code - res->start); + else + return -1; } static int compare_mp(const void *a, const void *b) @@ -127,8 +136,8 @@ static BracketPair *search_bp(uint32_t code) BracketPair *res; bp.from = code; - res = bsearch(&bp, bracket_pairs, BIDI_BRACKET_LEN, sizeof(BracketPair), - compare_bp); + res = (BracketPair *) bsearch(&bp, bracket_pairs, BIDI_BRACKET_LEN, + sizeof(BracketPair), compare_bp); return res; } @@ -154,23 +163,18 @@ static int hangul_pair_decompose(uint32_t code, uint32_t *a, uint32_t *b) static int hangul_pair_compose(uint32_t *code, uint32_t a, uint32_t b) { - if (b < VBASE || b >= (TBASE + TCOUNT)) - return 0; - - if ((a < LBASE || a >= (LBASE + LCOUNT)) - && (a < SBASE || a >= (SBASE + SCOUNT))) - return 0; - - if (a >= SBASE) { + if (a >= SBASE && a < (SBASE + SCOUNT) && b >= TBASE && b < (TBASE + TCOUNT)) { /* LV,T */ *code = a + (b - TBASE); return 3; - } else { + } else if (a >= LBASE && a < (LBASE + LCOUNT) && b >= VBASE && b < (VBASE + VCOUNT)) { /* L,V */ int li = a - LBASE; int vi = b - VBASE; *code = SBASE + li * NCOUNT + vi * TCOUNT; return 2; + } else { + return 0; } } @@ -178,7 +182,7 @@ static uint32_t decode_utf16(const unsigned short **code_ptr) { const unsigned short *code = *code_ptr; - if ((code[0] & 0xd800) != 0xd800) { + if (code[0] < 0xd800 || code[0] > 0xdc00) { *code_ptr += 1; return (uint32_t)code[0]; } else { @@ -215,7 +219,7 @@ int ucdn_get_bidi_class(uint32_t code) int ucdn_get_mirrored(uint32_t code) { - return get_ucd_record(code)->mirrored; + return ucdn_mirror(code) != code; } int ucdn_get_script(uint32_t code) @@ -264,12 +268,9 @@ uint32_t ucdn_mirror(uint32_t code) MirrorPair mp = {0}; MirrorPair *res; - if (get_ucd_record(code)->mirrored == 0) - return code; - mp.from = code; - res = bsearch(&mp, mirror_pairs, BIDI_MIRROR_LEN, sizeof(MirrorPair), - compare_mp); + res = (MirrorPair *) bsearch(&mp, mirror_pairs, BIDI_MIRROR_LEN, + sizeof(MirrorPair), compare_mp); if (res == NULL) return code; @@ -326,8 +327,8 @@ int ucdn_compose(uint32_t *code, uint32_t a, uint32_t b) if (hangul_pair_compose(code, a, b)) return 1; - l = get_comp_index(a, nfc_first); - r = get_comp_index(b, nfc_last); + l = get_comp_index(a, nfc_first, sizeof(nfc_first) / sizeof(Reindex)); + r = get_comp_index(b, nfc_last, sizeof(nfc_last) / sizeof(Reindex)); if (l < 0 || r < 0) return 0; diff --git a/src/hb-ucdn/ucdn.h b/src/hb-ucdn/ucdn.h index f694dc5a8..71a1e4b30 100644 --- a/src/hb-ucdn/ucdn.h +++ b/src/hb-ucdn/ucdn.h @@ -206,6 +206,10 @@ typedef unsigned __int64 uint64_t; #define UCDN_SCRIPT_NEWA 135 #define UCDN_SCRIPT_OSAGE 136 #define UCDN_SCRIPT_TANGUT 137 +#define UCDN_SCRIPT_MASARAM_GONDI 138 +#define UCDN_SCRIPT_NUSHU 139 +#define UCDN_SCRIPT_SOYOMBO 140 +#define UCDN_SCRIPT_ZANABAZAR_SQUARE 141 #define UCDN_LINEBREAK_CLASS_OP 0 #define UCDN_LINEBREAK_CLASS_CL 1 @@ -247,6 +251,9 @@ typedef unsigned __int64 uint64_t; #define UCDN_LINEBREAK_CLASS_SG 37 #define UCDN_LINEBREAK_CLASS_SP 38 #define UCDN_LINEBREAK_CLASS_XX 39 +#define UCDN_LINEBREAK_CLASS_ZWJ 40 +#define UCDN_LINEBREAK_CLASS_EB 41 +#define UCDN_LINEBREAK_CLASS_EM 42 #define UCDN_GENERAL_CATEGORY_CC 0 #define UCDN_GENERAL_CATEGORY_CF 1