From d5300241680844f5625f32792f7dd7181ed05f9b Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sat, 21 Jan 2012 19:07:22 -0500 Subject: [PATCH] [util] Make clusters work with char offset instead of UTF-8 offset This means the --features indices also refer to char position instead of byte position now. Same for cluster values reported by hb-shape. Will add an option for byte indices later. --- util/options.cc | 7 +++++-- util/options.hh | 12 ++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/util/options.cc b/util/options.cc index c2d4ec902..c394fd5aa 100644 --- a/util/options.cc +++ b/util/options.cc @@ -404,8 +404,11 @@ shape_options_t::add_options (option_parser_t *parser) " Comma-separated list of font features to apply to text\n" "\n" " Features can be enabled or disabled, either globally or limited to\n" - " specific byte ranges. The format is Python-esque. Here is how it all\n" - " works:\n" + " specific character ranges. The range indices refer to the positions\n" + " between Unicode characters. The position before the first character\n" + " is 0, and the position after the first character is 1, and so on.\n" + "\n" + " The format is Python-esque. Here is how it all works:\n" "\n" " Syntax: Value: Start: End:\n" "\n" diff --git a/util/options.hh b/util/options.hh index 444569e07..d6322cdbe 100644 --- a/util/options.hh +++ b/util/options.hh @@ -153,6 +153,18 @@ struct shape_options_t : option_group_t hb_font_t *font, hb_buffer_t *buffer) { hb_buffer_reset (buffer); hb_buffer_add_utf8 (buffer, text, text_len, 0, text_len); + + /* Reset cluster values to refer to Unicode character index + * instead of UTF-8 index. + * TODO: Add an option for this. */ + unsigned int num_glyphs = hb_buffer_get_length (buffer); + hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, NULL); + for (unsigned int i = 0; i < num_glyphs; i++) + { + info->cluster = i; + info++; + } + setup_buffer (buffer); return hb_shape_full (font, buffer, features, num_features, NULL, shapers); }