From 8f4f47df7c42294c06d6bd4f2d0e1b35c4040eb5 Mon Sep 17 00:00:00 2001 From: Garret Rieger Date: Wed, 25 Aug 2021 13:34:05 -0700 Subject: [PATCH] [subset] use inverted set for all unicodes. Modify the code that handles input->unicodes to be safe with possibly inverted sets. Also adds --unicodes-= and --unicodes+= flags. --- src/hb-subset-plan.cc | 45 ++++++++++++++++++++++++++----------------- util/hb-subset.cc | 15 ++++++++++++--- 2 files changed, 39 insertions(+), 21 deletions(-) diff --git a/src/hb-subset-plan.cc b/src/hb-subset-plan.cc index 9b74ac2ec..4c273a6be 100644 --- a/src/hb-subset-plan.cc +++ b/src/hb-subset-plan.cc @@ -240,22 +240,24 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes, { OT::cmap::accelerator_t cmap; cmap.init (plan->source); + constexpr static const int size_threshold = 65000; - for (hb_codepoint_t cp : *unicodes) + if (unicodes->get_population () < size_threshold && glyphs->is_empty ()) { - hb_codepoint_t gid; - if (!cmap.get_nominal_glyph (cp, &gid)) + // This is the fast path if it's anticipated that size of unicodes + // is << then the number of codepoints in the font. + for (hb_codepoint_t cp : *unicodes) { - DEBUG_MSG(SUBSET, nullptr, "Drop U+%04X; no gid", cp); - continue; + hb_codepoint_t gid; + if (!cmap.get_nominal_glyph (cp, &gid)) + { + DEBUG_MSG(SUBSET, nullptr, "Drop U+%04X; no gid", cp); + continue; + } + plan->unicodes->add (cp); + plan->codepoint_to_glyph->set (cp, gid); + plan->_glyphset_gsub->add (gid); } - plan->unicodes->add (cp); - plan->codepoint_to_glyph->set (cp, gid); - plan->_glyphset_gsub->add (gid); - } - - if (glyphs->is_empty ()) - { cmap.fini (); return; } @@ -265,17 +267,27 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes, cmap.fini (); for (hb_pair_t cp_gid : - + unicode_glyphid_map.iter () | hb_filter (glyphs, hb_second)) + + unicode_glyphid_map.iter ()) { + if (!unicodes->has (cp_gid.first) && !glyphs->has (cp_gid.second)) + continue; + plan->unicodes->add (cp_gid.first); plan->codepoint_to_glyph->set (cp_gid.first, cp_gid.second); + plan->_glyphset_gsub->add (cp_gid.second); + } + + // Add gids which where requested, but not mapped in cmap + for (hb_codepoint_t gid : glyphs->iter ()) + { + if (gid >= plan->source->get_num_glyphs ()) + break; + plan->_glyphset_gsub->add (gid); } } static void _populate_gids_to_retain (hb_subset_plan_t* plan, - const hb_set_t *unicodes, - const hb_set_t *input_glyphs_to_retain, bool close_over_gsub, bool close_over_gpos, bool close_over_gdef) @@ -292,7 +304,6 @@ _populate_gids_to_retain (hb_subset_plan_t* plan, colr.init (plan->source); plan->_glyphset_gsub->add (0); // Not-def - hb_set_union (plan->_glyphset_gsub, input_glyphs_to_retain); _cmap_closure (plan->source, plan->unicodes, plan->_glyphset_gsub); @@ -477,8 +488,6 @@ hb_subset_plan_create (hb_face_t *face, _populate_unicodes_to_retain (input->unicodes, input->glyphs, plan); _populate_gids_to_retain (plan, - input->unicodes, - input->glyphs, !input->drop_tables->has (HB_OT_TAG_GSUB), !input->drop_tables->has (HB_OT_TAG_GPOS), !input->drop_tables->has (HB_OT_TAG_GDEF)); diff --git a/util/hb-subset.cc b/util/hb-subset.cc index 8456ae93a..fc7a156c9 100644 --- a/util/hb-subset.cc +++ b/util/hb-subset.cc @@ -265,20 +265,26 @@ parse_text (const char *name G_GNUC_UNUSED, GError **error G_GNUC_UNUSED) { subset_main_t *subset_main = (subset_main_t *) data; + hb_bool_t is_remove = (name[strlen (name) - 1] == '-'); + hb_set_t *unicodes = hb_subset_input_unicode_set (subset_main->input); if (0 == strcmp (arg, "*")) { - subset_main->add_all_unicodes (); + hb_set_clear (unicodes); + if (!is_remove) + hb_set_invert (unicodes); return true; } - hb_set_t *unicodes = hb_subset_input_unicode_set (subset_main->input); for (gchar *c = (gchar *) arg; *c; c = g_utf8_find_next_char(c, nullptr)) { gunichar cp = g_utf8_get_char(c); - hb_set_add (unicodes, cp); + if (!is_remove) + hb_set_add (unicodes, cp); + else + hb_set_del (unicodes, cp); } return true; } @@ -674,6 +680,9 @@ subset_main_t::add_options () {"text", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_text, "Specify text to include in the subset", "string"}, {"text-file", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_file_for,"Specify file to read text from", "filename"}, {"unicodes", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_unicodes, "Specify Unicode codepoints or ranges to include in the subset", "list of hex numbers/ranges"}, + {"unicodes-", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_unicodes, "Specify Unicode codepoints or ranges to include in the subset", "list of hex numbers/ranges"}, + {"unicodes+", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_unicodes, "Specify Unicode codepoints or ranges to include in the subset", "list of hex numbers/ranges"}, + {"unicodes-file", 0, 0, G_OPTION_ARG_CALLBACK, (gpointer) &parse_file_for,"Specify file to read Unicode codepoints or ranges from", "filename"}, {nullptr} };