Merge pull request #3763 from harfbuzz/split_pair_pos

[repacker] Add ability for repacker to pre split PairPosFormat1 subtables.
pull/3771/head
Behdad Esfahbod 2 years ago committed by GitHub
commit 065f1e33c8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      src/Makefile.am
  2. 6
      src/Makefile.sources
  3. 3
      src/OT/Layout/GPOS/MarkArray.hh
  4. 2
      src/OT/Layout/GPOS/PairValueRecord.hh
  5. 80
      src/graph/coverage-graph.hh
  6. 64
      src/graph/graph.hh
  7. 27
      src/graph/gsubgpos-context.cc
  8. 67
      src/graph/gsubgpos-context.hh
  9. 175
      src/graph/gsubgpos-graph.hh
  10. 300
      src/graph/pairpos-graph.hh
  11. 14
      src/harfbuzz-subset.cc
  12. 67
      src/hb-repacker.hh
  13. 8
      src/meson.build
  14. 267
      src/test-repacker.cc

@ -411,7 +411,7 @@ test_priority_queue_SOURCES = test-priority-queue.cc hb-static.cc
test_priority_queue_CPPFLAGS = $(HBCFLAGS)
test_priority_queue_LDADD = libharfbuzz.la $(HBLIBS)
test_repacker_SOURCES = test-repacker.cc hb-static.cc graph/gsubgpos-graph.cc
test_repacker_SOURCES = test-repacker.cc hb-static.cc graph/gsubgpos-context.cc
test_repacker_CPPFLAGS = $(HBCFLAGS)
test_repacker_LDADD = libharfbuzz.la libharfbuzz-subset.la $(HBLIBS)

@ -349,7 +349,11 @@ HB_SUBSET_sources = \
hb-repacker.hh \
graph/graph.hh \
graph/gsubgpos-graph.hh \
graph/gsubgpos-graph.cc \
graph/gsubgpos-context.hh \
graph/gsubgpos-context.cc \
graph/pairpos-graph.hh \
graph/coverage-graph.hh \
graph/pairpos-graph.hh \
graph/serialize.hh \
$(NULL)

@ -97,7 +97,8 @@ struct MarkArray : Array16Of<MarkRecord> /* Array of MarkRecords--in Cove
}
};
static void Markclass_closure_and_remap_indexes (const Coverage &mark_coverage,
HB_INTERNAL inline
void Markclass_closure_and_remap_indexes (const Coverage &mark_coverage,
const MarkArray &mark_array,
const hb_set_t &glyphset,
hb_map_t* klass_mapping /* INOUT */)

@ -1,6 +1,8 @@
#ifndef OT_LAYOUT_GPOS_PAIRVALUERECORD_HH
#define OT_LAYOUT_GPOS_PAIRVALUERECORD_HH
#include "ValueFormat.hh"
namespace OT {
namespace Layout {
namespace GPOS_impl {

@ -0,0 +1,80 @@
/*
* Copyright © 2022 Google, Inc.
*
* This is part of HarfBuzz, a text shaping library.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
* Google Author(s): Garret Rieger
*/
#include "graph.hh"
#include "../OT/Layout/Common/Coverage.hh"
#ifndef GRAPH_COVERAGE_GRAPH_HH
#define GRAPH_COVERAGE_GRAPH_HH
namespace graph {
struct CoverageFormat1 : public OT::Layout::Common::CoverageFormat1_3<SmallTypes>
{
bool sanitize (graph_t::vertex_t& vertex) const
{
int64_t vertex_len = vertex.obj.tail - vertex.obj.head;
constexpr unsigned min_size = OT::Layout::Common::CoverageFormat1_3<SmallTypes>::min_size;
if (vertex_len < min_size) return false;
return vertex_len >= min_size + glyphArray.get_size () - glyphArray.len.get_size ();
}
};
struct CoverageFormat2 : public OT::Layout::Common::CoverageFormat2_4<SmallTypes>
{
bool sanitize (graph_t::vertex_t& vertex) const
{
int64_t vertex_len = vertex.obj.tail - vertex.obj.head;
constexpr unsigned min_size = OT::Layout::Common::CoverageFormat2_4<SmallTypes>::min_size;
if (vertex_len < min_size) return false;
return vertex_len >= min_size + rangeRecord.get_size () - rangeRecord.len.get_size ();
}
};
struct Coverage : public OT::Layout::Common::Coverage
{
bool sanitize (graph_t::vertex_t& vertex) const
{
int64_t vertex_len = vertex.obj.tail - vertex.obj.head;
if (vertex_len < OT::Layout::Common::Coverage::min_size) return false;
switch (u.format)
{
case 1: return ((CoverageFormat1*)this)->sanitize (vertex);
case 2: return ((CoverageFormat2*)this)->sanitize (vertex);
#ifndef HB_NO_BORING_EXPANSION
// Not currently supported
case 3:
case 4:
#endif
default: return false;
}
}
};
}
#endif // GRAPH_COVERAGE_GRAPH_HH

@ -80,6 +80,22 @@ struct graph_t
}
}
void remove_real_link (unsigned child_index, const void* offset)
{
for (unsigned i = 0; i < obj.real_links.length; i++)
{
auto& link = obj.real_links[i];
if (link.objidx != child_index)
continue;
if ((obj.head + link.position) != offset)
continue;
obj.real_links.remove (i);
return;
}
}
void remap_parents (const hb_vector_t<unsigned>& id_map)
{
for (unsigned i = 0; i < parents.length; i++)
@ -212,6 +228,17 @@ struct graph_t
return vertices_[i].obj;
}
/*
* Generates a new topological sorting of graph ordered by the shortest
* distance to each node if positions are marked as invalid.
*/
void sort_shortest_distance_if_needed ()
{
if (!positions_invalid) return;
sort_shortest_distance ();
}
/*
* Generates a new topological sorting of graph ordered by the shortest
* distance to each node.
@ -264,12 +291,12 @@ struct graph_t
check_success (!queue.in_error ());
check_success (!sorted_graph.in_error ());
if (!check_success (new_id == -1))
print_orphaned_nodes ();
remap_all_obj_indices (id_map, &sorted_graph);
hb_swap (vertices_, sorted_graph);
if (!check_success (new_id == -1))
print_orphaned_nodes ();
}
/*
@ -515,6 +542,37 @@ struct graph_t
}
}
/*
* Moves the child of old_parent_idx pointed to by old_offset to a new
* vertex at the new_offset.
*/
template<typename O>
void move_child (unsigned old_parent_idx,
const O* old_offset,
unsigned new_parent_idx,
const O* new_offset)
{
distance_invalid = true;
positions_invalid = true;
auto& old_v = vertices_[old_parent_idx];
auto& new_v = vertices_[new_parent_idx];
unsigned child_id = index_for_offset (old_parent_idx,
old_offset);
auto* new_link = new_v.obj.real_links.push ();
new_link->width = O::static_size;
new_link->objidx = child_id;
new_link->position = (const char*) new_offset - (const char*) new_v.obj.head;
auto& child = vertices_[child_id];
child.parents.push (new_parent_idx);
old_v.remove_real_link (child_id, old_offset);
child.remove_parent (old_parent_idx);
}
/*
* duplicates all nodes in the subgraph reachable from node_idx. Does not re-assign
* links. index_map is updated with mappings from old id to new id. If a duplication has already

@ -28,26 +28,37 @@
namespace graph {
make_extension_context_t::make_extension_context_t (hb_tag_t table_tag_,
graph_t& graph_,
hb_vector_t<char>& buffer_)
gsubgpos_graph_context_t::gsubgpos_graph_context_t (hb_tag_t table_tag_,
graph_t& graph_)
: table_tag (table_tag_),
graph (graph_),
buffer (buffer_),
lookup_list_index (0),
lookups ()
lookups (),
buffers ()
{
if (table_tag_ != HB_OT_TAG_GPOS
&& table_tag_ != HB_OT_TAG_GSUB)
return;
GSTAR* gstar = graph::GSTAR::graph_to_gstar (graph_);
if (gstar) {
gstar->find_lookups (graph, lookups);
lookup_list_index = gstar->get_lookup_list_index (graph_);
}
}
unsigned gsubgpos_graph_context_t::create_node (unsigned size)
{
char* buffer = (char*) hb_calloc (1, size);
if (!buffer)
return -1;
buffers.push (buffer);
unsigned extension_size = OT::ExtensionFormat1<OT::Layout::GSUB_impl::ExtensionSubst>::static_size;
buffer.alloc (num_non_ext_subtables () * extension_size);
return graph.new_node (buffer, buffer + size);
}
unsigned make_extension_context_t::num_non_ext_subtables () {
unsigned gsubgpos_graph_context_t::num_non_ext_subtables () {
unsigned count = 0;
for (auto l : lookups.values ())
{

@ -0,0 +1,67 @@
/*
* Copyright © 2022 Google, Inc.
*
* This is part of HarfBuzz, a text shaping library.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
* Google Author(s): Garret Rieger
*/
#include "graph.hh"
#include "../hb-ot-layout-gsubgpos.hh"
#ifndef GRAPH_GSUBGPOS_CONTEXT_HH
#define GRAPH_GSUBGPOS_CONTEXT_HH
namespace graph {
struct Lookup;
struct gsubgpos_graph_context_t
{
hb_tag_t table_tag;
graph_t& graph;
unsigned lookup_list_index;
hb_hashmap_t<unsigned, graph::Lookup*> lookups;
hb_vector_t<char*> buffers;
HB_INTERNAL gsubgpos_graph_context_t (hb_tag_t table_tag_,
graph_t& graph_);
~gsubgpos_graph_context_t ()
{
for (char* b : buffers)
hb_free (b);
}
HB_INTERNAL unsigned create_node (unsigned size);
void add_buffer (char* buffer)
{
buffers.push (buffer);
}
private:
HB_INTERNAL unsigned num_non_ext_subtables ();
};
}
#endif // GRAPH_GSUBGPOS_CONTEXT

@ -27,6 +27,8 @@
#include "graph.hh"
#include "../hb-ot-layout-gsubgpos.hh"
#include "../OT/Layout/GSUB/ExtensionSubst.hh"
#include "gsubgpos-context.hh"
#include "pairpos-graph.hh"
#ifndef GRAPH_GSUBGPOS_GRAPH_HH
#define GRAPH_GSUBGPOS_GRAPH_HH
@ -35,27 +37,6 @@ namespace graph {
struct Lookup;
struct make_extension_context_t
{
hb_tag_t table_tag;
graph_t& graph;
hb_vector_t<char>& buffer;
unsigned lookup_list_index;
hb_hashmap_t<unsigned, graph::Lookup*> lookups;
HB_INTERNAL make_extension_context_t (hb_tag_t table_tag_,
graph_t& graph_,
hb_vector_t<char>& buffer_);
bool in_error () const
{
return buffer.in_error ();
}
private:
HB_INTERNAL unsigned num_non_ext_subtables ();
};
template<typename T>
struct ExtensionFormat1 : public OT::ExtensionFormat1<T>
{
@ -65,6 +46,22 @@ struct ExtensionFormat1 : public OT::ExtensionFormat1<T>
this->extensionLookupType = type;
this->extensionOffset = 0;
}
bool sanitize (graph_t::vertex_t& vertex) const
{
int64_t vertex_len = vertex.obj.tail - vertex.obj.head;
return vertex_len >= OT::ExtensionFormat1<T>::static_size;
}
unsigned get_lookup_type () const
{
return this->extensionLookupType;
}
unsigned get_subtable_index (graph_t& graph, unsigned this_index) const
{
return graph.index_for_offset (this_index, &this->extensionOffset);
}
};
struct Lookup : public OT::Lookup
@ -86,7 +83,7 @@ struct Lookup : public OT::Lookup
return lookupType == extension_type (table_tag);
}
bool make_extension (make_extension_context_t& c,
bool make_extension (gsubgpos_graph_context_t& c,
unsigned this_index)
{
unsigned type = lookupType;
@ -115,24 +112,127 @@ struct Lookup : public OT::Lookup
return true;
}
bool make_subtable_extension (make_extension_context_t& c,
unsigned lookup_index,
unsigned subtable_index)
bool split_subtables_if_needed (gsubgpos_graph_context_t& c,
unsigned this_index)
{
unsigned type = lookupType;
bool is_ext = is_extension (c.table_tag);
if (c.table_tag != HB_OT_TAG_GPOS)
return true;
if (!is_ext && type != OT::Layout::GPOS_impl::PosLookupSubTable::Type::Pair)
return true;
hb_vector_t<unsigned> all_new_subtables;
for (unsigned i = 0; i < subTable.len; i++)
{
unsigned subtable_index = c.graph.index_for_offset (this_index, &subTable[i]);
if (is_ext) {
unsigned ext_subtable_index = subtable_index;
ExtensionFormat1<OT::Layout::GSUB_impl::ExtensionSubst>* extension =
(ExtensionFormat1<OT::Layout::GSUB_impl::ExtensionSubst>*)
c.graph.object (ext_subtable_index).head;
if (!extension->sanitize (c.graph.vertices_[ext_subtable_index]))
continue;
subtable_index = extension->get_subtable_index (c.graph, ext_subtable_index);
type = extension->get_lookup_type ();
if (type != OT::Layout::GPOS_impl::PosLookupSubTable::Type::Pair)
continue;
}
PairPos* pairPos = (PairPos*) c.graph.object (subtable_index).head;
if (!pairPos->sanitize (c.graph.vertices_[subtable_index])) continue;
hb_vector_t<unsigned> new_sub_tables = pairPos->split_subtables (c, subtable_index);
if (new_sub_tables.in_error ()) return false;
+ new_sub_tables.iter() | hb_sink (all_new_subtables);
}
if (all_new_subtables)
add_sub_tables (c, this_index, type, all_new_subtables);
return true;
}
void add_sub_tables (gsubgpos_graph_context_t& c,
unsigned this_index,
unsigned type,
hb_vector_t<unsigned>& subtable_indices)
{
bool is_ext = is_extension (c.table_tag);
auto& v = c.graph.vertices_[this_index];
size_t new_size = v.table_size ()
+ subtable_indices.length * OT::Offset16::static_size;
char* buffer = (char*) hb_calloc (1, new_size);
c.add_buffer (buffer);
memcpy (buffer, v.obj.head, v.table_size());
v.obj.head = buffer;
v.obj.tail = buffer + new_size;
Lookup* new_lookup = (Lookup*) buffer;
new_lookup->subTable.len = subTable.len + subtable_indices.length;
unsigned offset_index = subTable.len;
for (unsigned subtable_id : subtable_indices)
{
if (is_ext)
{
unsigned ext_id = create_extension_subtable (c, subtable_id, type);
c.graph.vertices_[subtable_id].parents.push (ext_id);
subtable_id = ext_id;
}
auto* link = v.obj.real_links.push ();
link->width = 2;
link->objidx = subtable_id;
link->position = (char*) &new_lookup->subTable[offset_index++] -
(char*) new_lookup;
c.graph.vertices_[subtable_id].parents.push (this_index);
}
// The head location of the lookup has changed, invalidating the lookups map entry
// in the context. Update the map.
c.lookups.set (this_index, new_lookup);
}
unsigned create_extension_subtable (gsubgpos_graph_context_t& c,
unsigned subtable_index,
unsigned type)
{
unsigned extension_size = OT::ExtensionFormat1<OT::Layout::GSUB_impl::ExtensionSubst>::static_size;
unsigned start = c.buffer.length;
unsigned end = start + extension_size;
if (!c.buffer.resize (c.buffer.length + extension_size))
return false;
unsigned ext_index = c.create_node (extension_size);
if (ext_index == (unsigned) -1)
return -1;
auto& ext_vertex = c.graph.vertices_[ext_index];
ExtensionFormat1<OT::Layout::GSUB_impl::ExtensionSubst>* extension =
(ExtensionFormat1<OT::Layout::GSUB_impl::ExtensionSubst>*) &c.buffer[start];
(ExtensionFormat1<OT::Layout::GSUB_impl::ExtensionSubst>*) ext_vertex.obj.head;
extension->reset (type);
unsigned ext_index = c.graph.new_node (&c.buffer.arrayZ[start],
&c.buffer.arrayZ[end]);
if (ext_index == (unsigned) -1) return false;
// Make extension point at the subtable.
auto* l = ext_vertex.obj.real_links.push ();
l->width = 4;
l->objidx = subtable_index;
l->position = 4;
return ext_index;
}
bool make_subtable_extension (gsubgpos_graph_context_t& c,
unsigned lookup_index,
unsigned subtable_index)
{
unsigned type = lookupType;
unsigned ext_index = create_extension_subtable(c, subtable_index, type);
if (ext_index == (unsigned) -1)
return false;
auto& lookup_vertex = c.graph.vertices_[lookup_index];
for (auto& l : lookup_vertex.obj.real_links.writer ())
@ -145,15 +245,6 @@ struct Lookup : public OT::Lookup
// Make extension point at the subtable.
auto& ext_vertex = c.graph.vertices_[ext_index];
auto& subtable_vertex = c.graph.vertices_[subtable_index];
auto* l = ext_vertex.obj.real_links.push ();
l->width = 4;
l->objidx = subtable_index;
l->is_signed = 0;
l->whence = 0;
l->position = 4;
l->bias = 0;
ext_vertex.parents.push (lookup_index);
subtable_vertex.remap_parent (lookup_index, ext_index);

@ -0,0 +1,300 @@
/*
* Copyright © 2022 Google, Inc.
*
* This is part of HarfBuzz, a text shaping library.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
* Google Author(s): Garret Rieger
*/
#ifndef GRAPH_PAIRPOS_GRAPH_HH
#define GRAPH_PAIRPOS_GRAPH_HH
#include "coverage-graph.hh"
#include "../OT/Layout/GPOS/PairPos.hh"
#include "../OT/Layout/GPOS/PosLookupSubTable.hh"
namespace graph {
struct PairPosFormat1 : public OT::Layout::GPOS_impl::PairPosFormat1_3<SmallTypes>
{
bool sanitize (graph_t::vertex_t& vertex) const
{
int64_t vertex_len = vertex.obj.tail - vertex.obj.head;
unsigned min_size = OT::Layout::GPOS_impl::PairPosFormat1_3<SmallTypes>::min_size;
if (vertex_len < min_size) return false;
return vertex_len >=
min_size + pairSet.get_size () - pairSet.len.get_size();
}
hb_vector_t<unsigned> split_subtables (gsubgpos_graph_context_t& c, unsigned this_index)
{
hb_set_t visited;
const unsigned coverage_id = c.graph.index_for_offset (this_index, &coverage);
const unsigned coverage_size = c.graph.vertices_[coverage_id].table_size ();
const unsigned base_size = OT::Layout::GPOS_impl::PairPosFormat1_3<SmallTypes>::min_size
+ coverage_size;
unsigned accumulated = base_size;
hb_vector_t<unsigned> split_points;
for (unsigned i = 0; i < pairSet.len; i++)
{
unsigned pair_set_index = pair_set_graph_index (c, this_index, i);
accumulated += c.graph.find_subgraph_size (pair_set_index, visited);
accumulated += SmallTypes::size; // for PairSet offset.
// TODO(garretrieger): don't count the size of the largest pairset against the limit, since
// it will be packed last in the order and does not contribute to
// the 64kb limit.
if (accumulated > (1 << 16))
{
split_points.push (i);
accumulated = base_size;
visited.clear (); // Pretend node sharing isn't allowed between splits.
}
}
return do_split (c, this_index, split_points);
}
private:
// Split this PairPos into two or more PairPos's. split_points defines
// the indices (first index to include in the new table) to split at.
// Returns the object id's of the newly created PairPos subtables.
hb_vector_t<unsigned> do_split (gsubgpos_graph_context_t& c,
unsigned this_index,
const hb_vector_t<unsigned> split_points)
{
hb_vector_t<unsigned> new_objects;
if (!split_points)
return new_objects;
for (unsigned i = 0; i < split_points.length; i++)
{
unsigned start = split_points[i];
unsigned end = (i < split_points.length - 1) ? split_points[i + 1] : pairSet.len;
unsigned id = clone_range (c, this_index, start, end);
if (id == (unsigned) -1)
{
new_objects.reset ();
new_objects.allocated = -1; // mark error
return new_objects;
}
new_objects.push (id);
}
if (!shrink (c, this_index, split_points[0]))
{
new_objects.reset ();
new_objects.allocated = -1; // mark error
}
return new_objects;
}
bool shrink (gsubgpos_graph_context_t& c,
unsigned this_index,
unsigned count)
{
DEBUG_MSG (SUBSET_REPACK, nullptr,
" Shrinking PairPosFormat1 (%u) to [0, %u).",
this_index,
count);
unsigned old_count = pairSet.len;
if (count >= old_count)
return true;
pairSet.len = count;
c.graph.vertices_[this_index].obj.tail -= (old_count - count) * SmallTypes::size;
unsigned coverage_id = c.graph.index_for_offset (this_index, &coverage);
unsigned coverage_size = c.graph.vertices_[coverage_id].table_size ();
auto& coverage_v = c.graph.vertices_[coverage_id];
Coverage* coverage_table = (Coverage*) coverage_v.obj.head;
if (!coverage_table->sanitize (coverage_v))
return false;
auto new_coverage =
+ hb_zip (coverage_table->iter (), hb_range ())
| hb_filter ([&] (hb_pair_t<unsigned, unsigned> p) {
return p.second < count;
})
| hb_map_retains_sorting (hb_first)
;
return make_coverage (c, new_coverage, coverage_id, coverage_size);
}
// Create a new PairPos including PairSet's from start (inclusive) to end (exclusive).
// Returns object id of the new object.
unsigned clone_range (gsubgpos_graph_context_t& c,
unsigned this_index,
unsigned start, unsigned end) const
{
DEBUG_MSG (SUBSET_REPACK, nullptr,
" Cloning PairPosFormat1 (%u) range [%u, %u).", this_index, start, end);
unsigned num_pair_sets = end - start;
unsigned prime_size = OT::Layout::GPOS_impl::PairPosFormat1_3<SmallTypes>::min_size
+ num_pair_sets * SmallTypes::size;
unsigned pair_pos_prime_id = c.create_node (prime_size);
if (pair_pos_prime_id == (unsigned) -1) return -1;
PairPosFormat1* pair_pos_prime = (PairPosFormat1*) c.graph.object (pair_pos_prime_id).head;
pair_pos_prime->format = this->format;
pair_pos_prime->valueFormat[0] = this->valueFormat[0];
pair_pos_prime->valueFormat[1] = this->valueFormat[1];
pair_pos_prime->pairSet.len = num_pair_sets;
for (unsigned i = start; i < end; i++)
{
c.graph.move_child<> (this_index,
&pairSet[i],
pair_pos_prime_id,
&pair_pos_prime->pairSet[i - start]);
}
unsigned coverage_id = c.graph.index_for_offset (this_index, &coverage);
unsigned coverage_size = c.graph.vertices_[coverage_id].table_size ();
auto& coverage_v = c.graph.vertices_[coverage_id];
Coverage* coverage_table = (Coverage*) coverage_v.obj.head;
if (!coverage_table->sanitize (coverage_v))
return false;
auto new_coverage =
+ hb_zip (coverage_table->iter (), hb_range ())
| hb_filter ([&] (hb_pair_t<unsigned, unsigned> p) {
return p.second >= start && p.second < end;
})
| hb_map_retains_sorting (hb_first)
;
unsigned coverage_prime_id = c.graph.new_node (nullptr, nullptr);
auto& coverage_prime_vertex = c.graph.vertices_[coverage_prime_id];
if (!make_coverage (c, new_coverage, coverage_prime_id, coverage_size))
return -1;
auto* coverage_link = c.graph.vertices_[pair_pos_prime_id].obj.real_links.push ();
coverage_link->width = SmallTypes::size;
coverage_link->objidx = coverage_prime_id;
coverage_link->position = 2;
coverage_prime_vertex.parents.push (pair_pos_prime_id);
return pair_pos_prime_id;
}
template<typename It>
bool make_coverage (gsubgpos_graph_context_t& c,
It glyphs,
unsigned dest_obj,
unsigned max_size) const
{
char* buffer = (char*) hb_calloc (1, max_size);
hb_serialize_context_t serializer = hb_serialize_context_t (buffer,
max_size);
Coverage_serialize (&serializer, glyphs);
serializer.end_serialize ();
if (serializer.in_error ())
{
hb_free (buffer);
return false;
}
hb_bytes_t coverage_copy = serializer.copy_bytes ();
c.add_buffer ((char *) coverage_copy.arrayZ); // Give ownership to the context, it will cleanup the buffer.
auto& obj = c.graph.vertices_[dest_obj].obj;
obj.head = (char *) coverage_copy.arrayZ;
obj.tail = obj.head + coverage_copy.length;
hb_free (buffer);
return true;
}
unsigned pair_set_graph_index (gsubgpos_graph_context_t& c, unsigned this_index, unsigned i) const
{
return c.graph.index_for_offset (this_index, &pairSet[i]);
}
};
struct PairPosFormat2 : public OT::Layout::GPOS_impl::PairPosFormat2_4<SmallTypes>
{
bool sanitize (graph_t::vertex_t& vertex) const
{
// TODO(garretrieger): implement me!
return true;
}
hb_vector_t<unsigned> split_subtables (gsubgpos_graph_context_t& c, unsigned this_index)
{
// TODO(garretrieger): implement me!
return hb_vector_t<unsigned> ();
}
};
struct PairPos : public OT::Layout::GPOS_impl::PairPos
{
hb_vector_t<unsigned> split_subtables (gsubgpos_graph_context_t& c, unsigned this_index)
{
switch (u.format) {
case 1:
return ((PairPosFormat1*)(&u.format1))->split_subtables (c, this_index);
case 2:
return ((PairPosFormat2*)(&u.format2))->split_subtables (c, this_index);
#ifndef HB_NO_BORING_EXPANSION
case 3: HB_FALLTHROUGH;
case 4: HB_FALLTHROUGH;
// Don't split 24bit PairPos's.
#endif
default:
return hb_vector_t<unsigned> ();
}
}
bool sanitize (graph_t::vertex_t& vertex) const
{
int64_t vertex_len = vertex.obj.tail - vertex.obj.head;
if (vertex_len < u.format.get_size ()) return false;
switch (u.format) {
case 1:
return ((PairPosFormat1*)(&u.format1))->sanitize (vertex);
case 2:
return ((PairPosFormat2*)(&u.format2))->sanitize (vertex);
#ifndef HB_NO_BORING_EXPANSION
case 3: HB_FALLTHROUGH;
case 4: HB_FALLTHROUGH;
#endif
default:
// We don't handle format 3 and 4 here.
return false;
}
}
};
}
#endif // GRAPH_PAIRPOS_GRAPH_HH

@ -1,10 +1,10 @@
#include "graph/gsubgpos-graph.cc"
#include "graph/gsubgpos-context.cc"
#include "hb-aat-layout.cc"
#include "hb-aat-map.cc"
#include "hb-blob.cc"
#include "hb-buffer.cc"
#include "hb-buffer-serialize.cc"
#include "hb-buffer-verify.cc"
#include "hb-buffer.cc"
#include "hb-common.cc"
#include "hb-draw.cc"
#include "hb-face.cc"
@ -23,15 +23,15 @@
#include "hb-ot-meta.cc"
#include "hb-ot-metrics.cc"
#include "hb-ot-name.cc"
#include "hb-ot-shape.cc"
#include "hb-ot-shape-fallback.cc"
#include "hb-ot-shape-normalize.cc"
#include "hb-ot-shape.cc"
#include "hb-ot-shaper-arabic.cc"
#include "hb-ot-shaper-default.cc"
#include "hb-ot-shaper-hangul.cc"
#include "hb-ot-shaper-hebrew.cc"
#include "hb-ot-shaper-indic-table.cc"
#include "hb-ot-shaper-indic.cc"
#include "hb-ot-shaper-indic-table.cc"
#include "hb-ot-shaper-khmer.cc"
#include "hb-ot-shaper-myanmar.cc"
#include "hb-ot-shaper-syllabic.cc"
@ -41,17 +41,17 @@
#include "hb-ot-tag.cc"
#include "hb-ot-var.cc"
#include "hb-set.cc"
#include "hb-shape-plan.cc"
#include "hb-shape.cc"
#include "hb-shape-plan.cc"
#include "hb-shaper.cc"
#include "hb-static.cc"
#include "hb-style.cc"
#include "hb-subset-cff-common.cc"
#include "hb-subset.cc"
#include "hb-subset-cff1.cc"
#include "hb-subset-cff2.cc"
#include "hb-subset-cff-common.cc"
#include "hb-subset-input.cc"
#include "hb-subset-plan.cc"
#include "hb-subset-repacker.cc"
#include "hb-subset.cc"
#include "hb-ucd.cc"
#include "hb-unicode.cc"

@ -46,24 +46,47 @@ struct lookup_size_t
unsigned lookup_index;
size_t size;
unsigned num_subtables;
};
inline int compare_sizes (const void* a, const void* b)
{
lookup_size_t* size_a = (lookup_size_t*) a;
lookup_size_t* size_b = (lookup_size_t*) b;
double subtables_per_byte_a = (double) size_a->num_subtables / (double) size_a->size;
double subtables_per_byte_b = (double) size_b->num_subtables / (double) size_b->size;
static int cmp (const void* a, const void* b)
{
return cmp ((const lookup_size_t*) a,
(const lookup_size_t*) b);
}
static int cmp (const lookup_size_t* a, const lookup_size_t* b)
{
double subtables_per_byte_a = (double) a->num_subtables / (double) a->size;
double subtables_per_byte_b = (double) b->num_subtables / (double) b->size;
if (subtables_per_byte_a == subtables_per_byte_b) {
return size_b->lookup_index - size_a->lookup_index;
return b->lookup_index - a->lookup_index;
}
double cmp = subtables_per_byte_b - subtables_per_byte_a;
if (cmp < 0) return -1;
if (cmp > 0) return 1;
return 0;
}
};
static inline
bool _presplit_subtables_if_needed (graph::gsubgpos_graph_context_t& ext_context)
{
// For each lookup this will check the size of subtables and split them as needed
// so that no subtable is at risk of overflowing. (where we support splitting for
// that subtable type).
//
// TODO(grieger): de-dup newly added nodes as necessary. Probably just want a full de-dup
// pass after this processing is done. Not super necessary as splits are
// only done where overflow is likely, so de-dup probably will get undone
// later anyways.
for (unsigned lookup_index : ext_context.lookups.keys ())
{
graph::Lookup* lookup = ext_context.lookups.get(lookup_index);
if (!lookup->split_subtables_if_needed (ext_context, lookup_index))
return false;
}
return true;
}
/*
@ -71,28 +94,24 @@ inline int compare_sizes (const void* a, const void* b)
* to extension lookups.
*/
static inline
bool _promote_extensions_if_needed (graph::make_extension_context_t& ext_context)
bool _promote_extensions_if_needed (graph::gsubgpos_graph_context_t& ext_context)
{
// Simple Algorithm (v1, current):
// 1. Calculate how many bytes each non-extension lookup consumes.
// 2. Select up to 64k of those to remain as non-extension (greedy, smallest first).
// 2. Select up to 64k of those to remain as non-extension (greedy, highest subtables per byte first)
// 3. Promote the rest.
//
// Advanced Algorithm (v2, not implemented):
// 1. Perform connected component analysis using lookups as roots.
// 2. Compute size of each connected component.
// 3. Select up to 64k worth of connected components to remain as non-extensions.
// (greedy, smallest first)
// (greedy, highest subtables per byte first)
// 4. Promote the rest.
// TODO(garretrieger): support extension demotion, then consider all lookups. Requires advanced algo.
// TODO(garretrieger): also support extension promotion during iterative resolution phase, then
// we can use a less conservative threshold here.
// TODO(grieger): skip this for the 24 bit case.
// TODO(grieger): sort by # subtables / size instead (high to low). Goal is to get as many subtables
// as possible into space 0 to minimize the number of extension subtables added.
// A fully optimal solution will require a backpack problem dynamic programming type
// solution.
if (!ext_context.lookups) return true;
hb_vector_t<lookup_size_t> lookup_sizes;
@ -109,7 +128,7 @@ bool _promote_extensions_if_needed (graph::make_extension_context_t& ext_context
});
}
lookup_sizes.qsort (compare_sizes);
lookup_sizes.qsort ();
size_t lookup_list_size = ext_context.graph.vertices_[ext_context.lookup_list_index].table_size ();
size_t l2_l3_size = lookup_list_size; // Lookup List + Lookups
@ -285,18 +304,20 @@ hb_resolve_overflows (const T& packed,
return graph::serialize (sorted_graph);
}
hb_vector_t<char> extension_buffer; // Needs to live until serialization is done.
graph::gsubgpos_graph_context_t ext_context (table_tag, sorted_graph);
if ((table_tag == HB_OT_TAG_GPOS
|| table_tag == HB_OT_TAG_GSUB)
&& will_overflow)
{
if (recalculate_extensions)
{
graph::make_extension_context_t ext_context (table_tag, sorted_graph, extension_buffer);
if (ext_context.in_error ())
DEBUG_MSG (SUBSET_REPACK, nullptr, "Splitting subtables if needed.");
if (!_presplit_subtables_if_needed (ext_context)) {
DEBUG_MSG (SUBSET_REPACK, nullptr, "Subtable splitting failed.");
return nullptr;
}
DEBUG_MSG (SUBSET_REPACK, nullptr, "Promoting lookups to extensions if needed.");
if (!_promote_extensions_if_needed (ext_context)) {
DEBUG_MSG (SUBSET_REPACK, nullptr, "Extensions promotion failed.");
return nullptr;
@ -306,6 +327,8 @@ hb_resolve_overflows (const T& packed,
DEBUG_MSG (SUBSET_REPACK, nullptr, "Assigning spaces to 32 bit subgraphs.");
if (sorted_graph.assign_spaces ())
sorted_graph.sort_shortest_distance ();
else
sorted_graph.sort_shortest_distance_if_needed ();
}
unsigned round = 0;

@ -345,7 +345,11 @@ hb_subset_sources = files(
'hb-subset-plan.cc',
'hb-subset-plan.hh',
'hb-subset-repacker.cc',
'graph/gsubgpos-graph.cc',
'graph/gsubgpos-context.cc',
'graph/gsubgpos-context.hh',
'graph/gsubgpos-graph.hh',
'graph/pairpos-graph.hh',
'graph/coverage-graph.hh',
'hb-subset.cc',
'hb-subset.hh',
)
@ -574,7 +578,7 @@ if get_option('tests').enabled()
'test-number': ['test-number.cc', 'hb-number.cc'],
'test-ot-tag': ['hb-ot-tag.cc'],
'test-priority-queue': ['test-priority-queue.cc', 'hb-static.cc'],
'test-repacker': ['test-repacker.cc', 'hb-static.cc', 'graph/gsubgpos-graph.cc'],
'test-repacker': ['test-repacker.cc', 'hb-static.cc', 'graph/gsubgpos-context.cc'],
'test-set': ['test-set.cc', 'hb-static.cc'],
'test-serialize': ['test-serialize.cc', 'hb-static.cc'],
'test-unicode-ranges': ['test-unicode-ranges.cc'],

@ -79,11 +79,125 @@ static void add_wide_offset (unsigned id,
c->add_link (*offset, id);
}
static void add_gsubgpos_header (unsigned lookup_list,
hb_serialize_context_t* c)
{
char header[] = {
0, 1, // major
0, 0, // minor
0, 0, // script list
0, 0, // feature list
};
start_object (header, 8, c);
add_offset (lookup_list, c);
c->pop_pack (false);
}
static unsigned add_lookup_list (const unsigned* lookups,
char count,
hb_serialize_context_t* c)
{
char lookup_count[] = {0, count};
start_object ((char *) &lookup_count, 2, c);
for (int i = 0; i < count; i++)
add_offset (lookups[i], c);
return c->pop_pack (false);
}
static void start_lookup (int8_t type,
int8_t num_subtables,
hb_serialize_context_t* c)
{
char lookup[] = {
0, type, // type
0, 0, // flag
0, num_subtables, // num subtables
};
start_object (lookup, 6, c);
}
static unsigned finish_lookup (hb_serialize_context_t* c)
{
char filter[] = {0, 0};
extend (filter, 2, c);
return c->pop_pack (false);
}
static unsigned add_extension (unsigned child,
uint8_t type,
hb_serialize_context_t* c)
{
char ext[] = {
0, 1,
0, (char) type,
};
start_object (ext, 4, c);
add_wide_offset (child, c);
return c->pop_pack (false);
}
static unsigned add_coverage (char start, char end,
hb_serialize_context_t* c)
{
if (end - start == 1)
{
char coverage[] = {
0, 1, // format
0, 2, // count
0, start, // glyph[0]
0, end, // glyph[1]
};
return add_object (coverage, 8, c);
}
char coverage[] = {
0, 2, // format
0, 1, // range count
0, start, // start
0, end, // end
0, 0,
};
return add_object (coverage, 10, c);
}
static unsigned add_pair_pos_1 (unsigned* pair_sets,
char count,
unsigned coverage,
hb_serialize_context_t* c)
{
char format[] = {
0, 1
};
start_object (format, 2, c);
add_offset (coverage, c);
char value_format[] = {
0, 0,
0, 0,
0, count,
};
extend (value_format, 6, c);
for (char i = 0; i < count; i++)
add_offset (pair_sets[(unsigned) i], c);
return c->pop_pack (false);
}
static void run_resolve_overflow_test (const char* name,
hb_serialize_context_t& overflowing,
hb_serialize_context_t& expected,
unsigned num_iterations = 0,
bool recalculate_extensions = false)
bool recalculate_extensions = false,
hb_tag_t tag = HB_TAG ('G', 'S', 'U', 'B'))
{
printf (">>> Testing overflowing resolution for %s\n",
name);
@ -93,7 +207,7 @@ static void run_resolve_overflow_test (const char* name,
assert (overflowing.offset_overflow ());
hb_blob_t* out = hb_resolve_overflows (overflowing.object_graph (),
HB_TAG ('G', 'S', 'U', 'B'),
tag,
num_iterations,
recalculate_extensions);
assert (out);
@ -103,6 +217,12 @@ static void run_resolve_overflow_test (const char* name,
assert (!expected.offset_overflow ());
hb_bytes_t expected_result = expected.copy_bytes ();
if (result.length != expected_result.length)
{
printf("result.length (%u) != expected.length (%u).\n",
result.length,
expected_result.length);
}
assert (result.length == expected_result.length);
bool equal = true;
@ -884,7 +1004,6 @@ populate_serializer_with_24_and_32_bit_offsets (hb_serialize_context_t* c)
c->end_serialize();
}
static void
populate_serializer_with_extension_promotion (hb_serialize_context_t* c,
int num_extensions = 0)
@ -906,31 +1025,19 @@ populate_serializer_with_extension_promotion (hb_serialize_context_t* c,
i >= (num_lookups - num_extensions) * 2;
i--)
{
char ext[] = {
0, 1,
0, 5
};
unsigned ext_index = i - (num_lookups - num_extensions) * 2; // 5
unsigned subtable_index = num_subtables - ext_index - 1; // 10 - 5 - 1 = 4
start_object (ext, 4, c);
add_wide_offset (subtables[subtable_index], c);
extensions[i] = c->pop_pack (false);
unsigned ext_index = i - (num_lookups - num_extensions) * 2;
unsigned subtable_index = num_subtables - ext_index - 1;
extensions[i] = add_extension (subtables[subtable_index], 5, c);
}
for (int i = num_lookups - 1; i >= 0; i--)
{
bool is_ext = (i >= (num_lookups - num_extensions));
char lookup[] = {
0, is_ext ? (char) 7 : (char) 5, // type
0, 0, // flag
0, 2, // num subtables
};
start_lookup (is_ext ? (char) 7 : (char) 5,
2,
c);
start_object (lookup, 6, c);
if (is_ext) {
add_offset (extensions[i * 2], c);
add_offset (extensions[i * 2 + 1], c);
@ -939,30 +1046,64 @@ populate_serializer_with_extension_promotion (hb_serialize_context_t* c,
add_offset (subtables[i * 2 + 1], c);
}
char filter[] = {0, 0};
extend (filter, 2, c);
lookups[i] = finish_lookup (c);
}
lookups[i] = c->pop_pack (false);
unsigned lookup_list = add_lookup_list (lookups, num_lookups, c);
add_gsubgpos_header (lookup_list, c);
c->end_serialize();
}
template<int num_pair_pos_1, int num_pair_set>
static void
populate_serializer_with_large_pair_pos_1 (hb_serialize_context_t* c,
bool as_extension = false)
{
std::string large_string(60000, 'a');
c->start_serialize<char> ();
constexpr int total_pair_set = num_pair_pos_1 * num_pair_set;
unsigned pair_set[total_pair_set];
unsigned coverage[num_pair_pos_1];
unsigned pair_pos_1[num_pair_pos_1];
for (int i = num_pair_pos_1 - 1; i >= 0; i--)
{
for (int j = (i + 1) * num_pair_set - 1; j >= i * num_pair_set; j--)
pair_set[j] = add_object (large_string.c_str (), 30000 + j, c);
coverage[i] = add_coverage (i * num_pair_set,
(i + 1) * num_pair_set - 1, c);
pair_pos_1[i] = add_pair_pos_1 (&pair_set[i * num_pair_set],
num_pair_set,
coverage[i],
c);
}
char lookup_count[] = {0, num_lookups};
start_object ((char *) &lookup_count, 2, c);
unsigned pair_pos_2 = add_object (large_string.c_str(), 200, c);
for (int i = 0; i < num_lookups; i++)
add_offset (lookups[i], c);
if (as_extension) {
unsigned lookup_list = c->pop_pack (false);
for (int i = num_pair_pos_1 - 1; i >= 0; i--)
pair_pos_1[i] = add_extension (pair_pos_1[i], 2, c);
pair_pos_2 = add_extension (pair_pos_2, 2, c);
}
char gsub_header[] = {
0, 1, // major
0, 0, // minor
0, 0, // script list
0, 0, // feature list
};
start_lookup (as_extension ? 9 : 2, 1 + num_pair_pos_1, c);
start_object (gsub_header, 8, c);
add_offset (lookup_list, c);
c->pop_pack (false);
add_offset (pair_pos_2, c);
for (int i = 0; i < num_pair_pos_1; i++)
add_offset (pair_pos_1[i], c);
unsigned lookup = finish_lookup (c);
unsigned lookup_list = add_lookup_list (&lookup, 1, c);
add_gsubgpos_header (lookup_list, c);
c->end_serialize();
}
@ -1336,6 +1477,53 @@ static void test_resolve_with_extension_promotion ()
free (expected_buffer);
}
static void test_resolve_with_basic_pair_pos_1_split ()
{
size_t buffer_size = 200000;
void* buffer = malloc (buffer_size);
assert (buffer);
hb_serialize_context_t c (buffer, buffer_size);
populate_serializer_with_large_pair_pos_1 <1, 4>(&c);
void* expected_buffer = malloc (buffer_size);
assert (expected_buffer);
hb_serialize_context_t e (expected_buffer, buffer_size);
populate_serializer_with_large_pair_pos_1 <2, 2>(&e, true);
run_resolve_overflow_test ("test_resolve_with_basic_pair_pos_1_split",
c,
e,
20,
true,
HB_TAG('G', 'P', 'O', 'S'));
free (buffer);
free (expected_buffer);
}
static void test_resolve_with_extension_pair_pos_1_split ()
{
size_t buffer_size = 200000;
void* buffer = malloc (buffer_size);
assert (buffer);
hb_serialize_context_t c (buffer, buffer_size);
populate_serializer_with_large_pair_pos_1 <1, 4>(&c, true);
void* expected_buffer = malloc (buffer_size);
assert (expected_buffer);
hb_serialize_context_t e (expected_buffer, buffer_size);
populate_serializer_with_large_pair_pos_1 <2, 2>(&e, true);
run_resolve_overflow_test ("test_resolve_with_extension_pair_pos_1_split",
c,
e,
20,
true,
HB_TAG('G', 'P', 'O', 'S'));
free (buffer);
free (expected_buffer);
}
static void test_resolve_overflows_via_splitting_spaces ()
{
size_t buffer_size = 160000;
@ -1483,6 +1671,9 @@ main (int argc, char **argv)
test_virtual_link ();
test_shared_node_with_virtual_links ();
test_resolve_with_extension_promotion ();
test_resolve_with_basic_pair_pos_1_split ();
test_resolve_with_extension_pair_pos_1_split ();
// TODO(grieger): test with extensions already mixed in as well.
// TODO(grieger): test two layer ext promotion setup.
// TODO(grieger): test sorting by subtables per byte in ext. promotion.

Loading…
Cancel
Save