commit
e600e5440b
144 changed files with 5196 additions and 1811 deletions
@ -0,0 +1,222 @@ |
||||
<?xml version="1.0"?> |
||||
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN" |
||||
"http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd" [ |
||||
<!ENTITY % local.common.attrib "xmlns:xi CDATA #FIXED 'http://www.w3.org/2003/XInclude'"> |
||||
<!ENTITY version SYSTEM "version.xml"> |
||||
]> |
||||
<chapter id="getting-started"> |
||||
<title>Getting started with HarfBuzz</title> |
||||
<section> |
||||
<title>An overview of the HarfBuzz shaping API</title> |
||||
<para> |
||||
The core of the HarfBuzz shaping API is the function |
||||
<function>hb_shape()</function>. This function takes a font, a |
||||
buffer containing a string of Unicode codepoints and |
||||
(optionally) a list of font features as its input. It replaces |
||||
the codepoints in the buffer with the corresponding glyphs from |
||||
the font, correctly ordered and positioned, and with any of the |
||||
optional font features applied. |
||||
</para> |
||||
<para> |
||||
In addition to holding the pre-shaping input (the Unicode |
||||
codepoints that comprise the input string) and the post-shaping |
||||
output (the glyphs and positions), a HarfBuzz buffer has several |
||||
properties that affect shaping. The most important are the |
||||
text-flow direction (e.g., left-to-right, right-to-left, |
||||
top-to-bottom, or bottom-to-top), the script tag, and the |
||||
language tag. |
||||
</para> |
||||
|
||||
<para> |
||||
For input string buffers, flags are available to denote when the |
||||
buffer represents the beginning or end of a paragraph, to |
||||
indicate whether or not to visibly render Unicode <literal>Default |
||||
Ignorable</literal> codepoints, and to modify the cluster-merging |
||||
behavior for the buffer. For shaped output buffers, the |
||||
individual X and Y offsets and <literal>advances</literal> |
||||
(the logical dimensions) of each glyph are |
||||
accessible. HarfBuzz also flags glyphs as |
||||
<literal>UNSAFE_TO_BREAK</literal> if breaking the string at |
||||
that glyph (e.g., in a line-breaking or hyphenation process) |
||||
would require re-shaping the text. |
||||
</para> |
||||
|
||||
<para> |
||||
HarfBuzz also provides methods to compare the contents of |
||||
buffers, join buffers, normalize buffer contents, and handle |
||||
invalid codepoints, as well as to determine the state of a |
||||
buffer (e.g., input codepoints or output glyphs). Buffer |
||||
lifecycles are managed and all buffers are reference-counted. |
||||
</para> |
||||
|
||||
<para> |
||||
Although the default <function>hb_shape()</function> function is |
||||
sufficient for most use cases, a variant is also provide that |
||||
lets you specify which of HarfBuzz's shapers to use on a buffer. |
||||
</para> |
||||
|
||||
<para> |
||||
HarfBuzz can read TrueType fonts, TrueType collections, OpenType |
||||
fonts, and OpenType collections. Functions are provided to query |
||||
font objects about metrics, Unicode coverage, available tables and |
||||
features, and variation selectors. Individual glyphs can also be |
||||
queried for metrics, variations, and glyph names. OpenType |
||||
variable fonts are supported, and HarfBuzz allows you to set |
||||
variation-axis coordinates on font objects. |
||||
</para> |
||||
|
||||
<para> |
||||
HarfBuzz provides glue code to integrate with various other |
||||
libraries, including FreeType, GObject, and CoreText. Support |
||||
for integrating with Uniscribe and DirectWrite is experimental |
||||
at present. |
||||
</para> |
||||
</section> |
||||
|
||||
<section> |
||||
<title>Terminology</title> |
||||
<variablelist> |
||||
<varlistentry> |
||||
<term>shaper</term> |
||||
<listitem> |
||||
<para> |
||||
In HarfBuzz, a <emphasis>shaper</emphasis> is a |
||||
handler for a specific script shaping model. HarfBuzz |
||||
implements separate shapers for Indic, Arabic, Thai and |
||||
Lao, Khmer, Myanmar, Tibetan, Hangul, Hebrew, the |
||||
Universal Shaping Engine (USE), and a default shaper for |
||||
non-complex scripts. |
||||
</para> |
||||
</listitem> |
||||
</varlistentry> |
||||
|
||||
<varlistentry> |
||||
<term>cluster</term> |
||||
<listitem> |
||||
<para> |
||||
In text shaping, a <emphasis>cluster</emphasis> is a |
||||
sequence of codepoints that must be handled as an |
||||
indivisible unit. Clusters can include codepoint |
||||
sequences that form a ligature or base-and-mark |
||||
sequences. Tracking and preserving clusters is important |
||||
when shaping operations might separate or reorder |
||||
codepoints. |
||||
</para> |
||||
<para> |
||||
HarfBuzz provides three cluster |
||||
<emphasis>levels</emphasis> that implement different |
||||
approaches to the problem of preserving clusters during |
||||
shaping operations. |
||||
</para> |
||||
</listitem> |
||||
</varlistentry> |
||||
|
||||
|
||||
</variablelist> |
||||
|
||||
</section> |
||||
|
||||
|
||||
<section> |
||||
<title>A simple shaping example</title> |
||||
|
||||
<para> |
||||
Below is the simplest HarfBuzz shaping example possible. |
||||
</para> |
||||
<orderedlist numeration="arabic"> |
||||
<listitem> |
||||
<para> |
||||
Create a buffer and put your text in it. |
||||
</para> |
||||
</listitem> |
||||
</orderedlist> |
||||
<programlisting language="C"> |
||||
#include <hb.h> |
||||
hb_buffer_t *buf; |
||||
buf = hb_buffer_create(); |
||||
hb_buffer_add_utf8(buf, text, -1, 0, -1); |
||||
</programlisting> |
||||
<orderedlist numeration="arabic"> |
||||
<listitem override="2"> |
||||
<para> |
||||
Guess the script, language and direction of the buffer. |
||||
</para> |
||||
</listitem> |
||||
</orderedlist> |
||||
<programlisting language="C"> |
||||
hb_buffer_set_direction(buf, HB_DIRECTION_LTR); |
||||
hb_buffer_set_script(buf, HB_SCRIPT_LATIN); |
||||
hb_buffer_set_language(buf, hb_language_from_string("en", -1)); |
||||
</programlisting> |
||||
<orderedlist numeration="arabic"> |
||||
<listitem override="3"> |
||||
<para> |
||||
Create a face and a font, using FreeType for now. |
||||
</para> |
||||
</listitem> |
||||
</orderedlist> |
||||
<programlisting language="C"> |
||||
#include <hb-ft.h> |
||||
FT_New_Face(ft_library, font_path, index, &face) |
||||
hb_font_t *font = hb_ft_font_create(face); |
||||
</programlisting> |
||||
<orderedlist numeration="arabic"> |
||||
<listitem override="4"> |
||||
<para> |
||||
Shape! |
||||
</para> |
||||
</listitem> |
||||
</orderedlist> |
||||
<programlisting> |
||||
hb_shape(font, buf, NULL, 0); |
||||
</programlisting> |
||||
<orderedlist numeration="arabic"> |
||||
<listitem override="5"> |
||||
<para> |
||||
Get the glyph and position information. |
||||
</para> |
||||
</listitem> |
||||
</orderedlist> |
||||
<programlisting language="C"> |
||||
hb_glyph_info_t *glyph_info = hb_buffer_get_glyph_infos(buf, &glyph_count); |
||||
hb_glyph_position_t *glyph_pos = hb_buffer_get_glyph_positions(buf, &glyph_count); |
||||
</programlisting> |
||||
<orderedlist numeration="arabic"> |
||||
<listitem override="6"> |
||||
<para> |
||||
Iterate over each glyph. |
||||
</para> |
||||
</listitem> |
||||
</orderedlist> |
||||
<programlisting language="C"> |
||||
for (i = 0; i < glyph_count; ++i) { |
||||
glyphid = glyph_info[i].codepoint; |
||||
x_offset = glyph_pos[i].x_offset / 64.0; |
||||
y_offset = glyph_pos[i].y_offset / 64.0; |
||||
x_advance = glyph_pos[i].x_advance / 64.0; |
||||
y_advance = glyph_pos[i].y_advance / 64.0; |
||||
draw_glyph(glyphid, cursor_x + x_offset, cursor_y + y_offset); |
||||
cursor_x += x_advance; |
||||
cursor_y += y_advance; |
||||
} |
||||
</programlisting> |
||||
<orderedlist numeration="arabic"> |
||||
<listitem override="7"> |
||||
<para> |
||||
Tidy up. |
||||
</para> |
||||
</listitem> |
||||
</orderedlist> |
||||
<programlisting language="C"> |
||||
hb_buffer_destroy(buf); |
||||
hb_font_destroy(hb_ft_font); |
||||
</programlisting> |
||||
|
||||
<para> |
||||
This example shows enough to get us started using HarfBuzz. In |
||||
the sections that follow, we will use the remainder of |
||||
HarfBuzz's API to refine and extend the example and improve its |
||||
text-shaping capabilities. |
||||
</para> |
||||
</section> |
||||
</chapter> |
@ -1,183 +0,0 @@ |
||||
<chapter id="hello-harfbuzz"> |
||||
<title>Hello, HarfBuzz</title> |
||||
<para> |
||||
Here's the simplest HarfBuzz that can possibly work. We will improve |
||||
it later. |
||||
</para> |
||||
<orderedlist numeration="arabic"> |
||||
<listitem> |
||||
<para> |
||||
Create a buffer and put your text in it. |
||||
</para> |
||||
</listitem> |
||||
</orderedlist> |
||||
<programlisting language="C"> |
||||
#include <hb.h> |
||||
hb_buffer_t *buf; |
||||
buf = hb_buffer_create(); |
||||
hb_buffer_add_utf8(buf, text, strlen(text), 0, strlen(text)); |
||||
</programlisting> |
||||
<orderedlist numeration="arabic"> |
||||
<listitem override="2"> |
||||
<para> |
||||
Guess the script, language and direction of the buffer. |
||||
</para> |
||||
</listitem> |
||||
</orderedlist> |
||||
<programlisting language="C"> |
||||
hb_buffer_guess_segment_properties(buf); |
||||
</programlisting> |
||||
<orderedlist numeration="arabic"> |
||||
<listitem override="3"> |
||||
<para> |
||||
Create a face and a font, using FreeType for now. |
||||
</para> |
||||
</listitem> |
||||
</orderedlist> |
||||
<programlisting language="C"> |
||||
#include <hb-ft.h> |
||||
FT_New_Face(ft_library, font_path, index, &face) |
||||
hb_font_t *font = hb_ft_font_create(face); |
||||
</programlisting> |
||||
<orderedlist numeration="arabic"> |
||||
<listitem override="4"> |
||||
<para> |
||||
Shape! |
||||
</para> |
||||
</listitem> |
||||
</orderedlist> |
||||
<programlisting> |
||||
hb_shape(font, buf, NULL, 0); |
||||
</programlisting> |
||||
<orderedlist numeration="arabic"> |
||||
<listitem override="5"> |
||||
<para> |
||||
Get the glyph and position information. |
||||
</para> |
||||
</listitem> |
||||
</orderedlist> |
||||
<programlisting language="C"> |
||||
hb_glyph_info_t *glyph_info = hb_buffer_get_glyph_infos(buf, &glyph_count); |
||||
hb_glyph_position_t *glyph_pos = hb_buffer_get_glyph_positions(buf, &glyph_count); |
||||
</programlisting> |
||||
<orderedlist numeration="arabic"> |
||||
<listitem override="6"> |
||||
<para> |
||||
Iterate over each glyph. |
||||
</para> |
||||
</listitem> |
||||
</orderedlist> |
||||
<programlisting language="C"> |
||||
for (i = 0; i < glyph_count; ++i) { |
||||
glyphid = glyph_info[i].codepoint; |
||||
x_offset = glyph_pos[i].x_offset / 64.0; |
||||
y_offset = glyph_pos[i].y_offset / 64.0; |
||||
x_advance = glyph_pos[i].x_advance / 64.0; |
||||
y_advance = glyph_pos[i].y_advance / 64.0; |
||||
draw_glyph(glyphid, cursor_x + x_offset, cursor_y + y_offset); |
||||
cursor_x += x_advance; |
||||
cursor_y += y_advance; |
||||
} |
||||
</programlisting> |
||||
<orderedlist numeration="arabic"> |
||||
<listitem override="7"> |
||||
<para> |
||||
Tidy up. |
||||
</para> |
||||
</listitem> |
||||
</orderedlist> |
||||
<programlisting language="C"> |
||||
hb_buffer_destroy(buf); |
||||
hb_font_destroy(hb_ft_font); |
||||
</programlisting> |
||||
<section id="what-harfbuzz-doesnt-do"> |
||||
<title>What HarfBuzz doesn't do</title> |
||||
<para> |
||||
The code above will take a UTF8 string, shape it, and give you the |
||||
information required to lay it out correctly on a single |
||||
horizontal (or vertical) line using the font provided. That is the |
||||
extent of HarfBuzz's responsibility. |
||||
</para> |
||||
<para> |
||||
If you are implementing a text layout engine you may have other |
||||
responsibilities, that HarfBuzz will not help you with: |
||||
</para> |
||||
<itemizedlist> |
||||
<listitem> |
||||
<para> |
||||
HarfBuzz won't help you with bidirectionality. If you want to |
||||
lay out text with mixed Hebrew and English, you will need to |
||||
ensure that the buffer provided to HarfBuzz has those |
||||
characters in the correct layout order. This will be different |
||||
from the logical order in which the Unicode text is stored. In |
||||
other words, the user will hit the keys in the following |
||||
sequence: |
||||
</para> |
||||
<programlisting> |
||||
A B C [space] ג ב א [space] D E F |
||||
</programlisting> |
||||
<para> |
||||
but will expect to see in the output: |
||||
</para> |
||||
<programlisting> |
||||
ABC אבג DEF |
||||
</programlisting> |
||||
<para> |
||||
This reordering is called <emphasis>bidi processing</emphasis> |
||||
("bidi" is short for bidirectional), and there's an |
||||
algorithm as an annex to the Unicode Standard which tells you how |
||||
to reorder a string from logical order into presentation order. |
||||
Before sending your string to HarfBuzz, you may need to apply the |
||||
bidi algorithm to it. Libraries such as ICU and fribidi can do |
||||
this for you. |
||||
</para> |
||||
</listitem> |
||||
<listitem> |
||||
<para> |
||||
HarfBuzz won't help you with text that contains different font |
||||
properties. For instance, if you have the string "a |
||||
<emphasis>huge</emphasis> breakfast", and you expect |
||||
"huge" to be italic, you will need to send three |
||||
strings to HarfBuzz: <literal>a</literal>, in your Roman font; |
||||
<literal>huge</literal> using your italic font; and |
||||
<literal>breakfast</literal> using your Roman font again. |
||||
Similarly if you change font, font size, script, language or |
||||
direction within your string, you will need to shape each run |
||||
independently and then output them independently. HarfBuzz |
||||
expects to shape a run of characters sharing the same |
||||
properties. |
||||
</para> |
||||
</listitem> |
||||
<listitem> |
||||
<para> |
||||
HarfBuzz won't help you with line breaking, hyphenation or |
||||
justification. As mentioned above, it lays out the string |
||||
along a <emphasis>single line</emphasis> of, notionally, |
||||
infinite length. If you want to find out where the potential |
||||
word, sentence and line break points are in your text, you |
||||
could use the ICU library's break iterator functions. |
||||
</para> |
||||
<para> |
||||
HarfBuzz can tell you how wide a shaped piece of text is, which is |
||||
useful input to a justification algorithm, but it knows nothing |
||||
about paragraphs, lines or line lengths. Nor will it adjust the |
||||
space between words to fit them proportionally into a line. If you |
||||
want to layout text in paragraphs, you will probably want to send |
||||
each word of your text to HarfBuzz to determine its shaped width |
||||
after glyph substitutions, then work out how many words will fit |
||||
on a line, and then finally output each word of the line separated |
||||
by a space of the correct size to fully justify the paragraph. |
||||
</para> |
||||
</listitem> |
||||
</itemizedlist> |
||||
<para> |
||||
As a layout engine implementor, HarfBuzz will help you with the |
||||
interface between your text and your font, and that's something |
||||
that you'll need - what you then do with the glyphs that your font |
||||
returns is up to you. The example we saw above enough to get us |
||||
started using HarfBuzz. Now we are going to use the remainder of |
||||
HarfBuzz's API to refine that example and improve our text shaping |
||||
capabilities. |
||||
</para> |
||||
</section> |
||||
</chapter> |
@ -1,70 +1,431 @@ |
||||
<?xml version="1.0"?> |
||||
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN" |
||||
"http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd" [ |
||||
<!ENTITY % local.common.attrib "xmlns:xi CDATA #FIXED 'http://www.w3.org/2003/XInclude'"> |
||||
<!ENTITY version SYSTEM "version.xml"> |
||||
]> |
||||
<chapter id="install-harfbuzz"> |
||||
<title>Install HarfBuzz</title> |
||||
<title>Installing HarfBuzz</title> |
||||
|
||||
<section id="download"> |
||||
<title id="download.title">Download</title> |
||||
<title id="download.title">Downloading HarfBuzz</title> |
||||
<para> |
||||
For tarball releases of HarfBuzz, look |
||||
<ulink url="http://www.freedesktop.org/software/harfbuzz/release/">here</ulink>. |
||||
At the same place you will |
||||
also find Win32 binary bundles that include libharfbuzz DLL, hb-view.exe, |
||||
hb-shape.exe, and all dependencies. |
||||
The HarfBuzz source code is hosted at <ulink |
||||
url="https://github.com/harfbuzz/harfbuzz">github.com/harfbuzz/harfbuzz</ulink>. The |
||||
same source tree is also available at the |
||||
<ulink |
||||
url="http://cgit.freedesktop.org/harfbuzz/">Freedesktop.org</ulink> |
||||
site. |
||||
</para> |
||||
<para> |
||||
The canonical source tree is available |
||||
<ulink url="http://cgit.freedesktop.org/harfbuzz/">here</ulink>. |
||||
Also available on <ulink url="https://github.com/harfbuzz/harfbuzz">github</ulink>. |
||||
Tarball releases and Win32 binary bundles (which include the |
||||
libharfbuzz DLL, hb-view.exe, hb-shape.exe, and all |
||||
dependencies) of HarfBuzz can be downloaded from <ulink |
||||
url="https://github.com/harfbuzz/harfbuzz">github.com/harfbuzz/harfbuzz/releases</ulink> |
||||
or from |
||||
<ulink url="http://www.freedesktop.org/software/harfbuzz/release/">Freedesktop.org</ulink>. |
||||
</para> |
||||
<para> |
||||
The API that comes with <filename class='headerfile'>hb.h</filename> will |
||||
not change incompatibly. Other, peripheral, headers are more likely to go |
||||
through minor modifications, but again, will do our best to never change |
||||
API in an incompatible way. We will never break the ABI. |
||||
Release notes are posted with each new release to provide an |
||||
overview of the changes. The project <ulink url="https://github.com/harfbuzz/harfbuzz/issues">tracks bug |
||||
reports and other issues</ulink> on GitHub. Discussion and |
||||
questions are welcome on the <ulink |
||||
url="http://freedesktop.org/mailman/listinfo/harfbuzz/">HarfBuzz |
||||
mailing list</ulink>. |
||||
</para> |
||||
<para> |
||||
If you are not sure whether Pango or HarfBuzz is right for you, read |
||||
<ulink url="http://mces.blogspot.in/2009/11/pango-vs-harfbuzz.html">this</ulink>. |
||||
The API included in the <filename |
||||
class='headerfile'>hb.h</filename> file will not change in a |
||||
compatibility-breaking way in any release. However, other, |
||||
peripheral headers are more likely to go through minor |
||||
modifications. We will do our best to never change APIs in an |
||||
incompatible way. We will <emphasis>never</emphasis> break the ABI. |
||||
</para> |
||||
</section> |
||||
|
||||
<section id="building"> |
||||
<title>Building</title> |
||||
<title>Building HarfBuzz</title> |
||||
|
||||
<section id="building.linux"> |
||||
<title>Building on Linux</title> |
||||
<para> |
||||
On Linux, install the development packages for FreeType, Cairo, and GLib. |
||||
For example, on Ubuntu / Debian, you would do: |
||||
<programlisting> |
||||
<command>sudo apt-get install</command> <package>gcc g++ libfreetype6-dev libglib2.0-dev libcairo2-dev</package> |
||||
</programlisting> |
||||
whereas on Fedora, RHEL, CentOS, and other Red Hat based systems you would do: |
||||
<emphasis>(1)</emphasis> To build HarfBuzz on Linux, you must first install the |
||||
development packages for FreeType, Cairo, and GLib. The exact |
||||
commands required for this step will vary depending on |
||||
the Linux distribution you use. |
||||
</para> |
||||
<para> |
||||
For example, on an Ubuntu or Debian system, you would run: |
||||
<programlisting> |
||||
<command>sudo yum install</command> <package>gcc gcc-c++ freetype-devel glib2-devel cairo-devel</package> |
||||
<command>sudo apt install</command> <package>gcc g++ |
||||
libfreetype6-dev libglib2.0-dev libcairo2-dev</package> |
||||
</programlisting> |
||||
or using MacPorts: |
||||
On Fedora, RHEL, CentOS, or other Red-Hat–based systems, you would run: |
||||
<programlisting> |
||||
<command>sudo port install</command> <package>freetype glib2 cairo</package> |
||||
<command>sudo yum install</command> <package>gcc gcc-c++ freetype-devel glib2-devel cairo-devel</package> |
||||
</programlisting> |
||||
|
||||
</para> |
||||
|
||||
<para> |
||||
<emphasis>(2)</emphasis> The next step depends on whether you |
||||
are building from the source in a downloaded release tarball or |
||||
from the source directly from the git repository. |
||||
</para> |
||||
<para> |
||||
<emphasis>(2)(a)</emphasis> If you downloaded the HarfBuzz |
||||
source code in a tarball, you can now extract the source. |
||||
</para> |
||||
<para> |
||||
From a shell in the top-level directory of the extracted source |
||||
code, you can run <command>./configure</command> followed by |
||||
<command>make</command> as with any other standard package. |
||||
</para> |
||||
<para> |
||||
This should leave you with a shared |
||||
library in the <filename>src/</filename> directory, and a few |
||||
utility programs including <command>hb-view</command> and |
||||
<command>hb-shape</command> under the <filename>util/</filename> |
||||
directory. |
||||
</para> |
||||
<para> |
||||
If you are using a tarball, you can now proceed to running |
||||
<command>configure</command> and <command>make</command> as with any |
||||
other standard package. That should leave you with a shared library in |
||||
<filename>src/</filename>, and a few utility programs including hb-view |
||||
and hb-shape under <filename>util/</filename>. |
||||
<emphasis>(2)(b)</emphasis> If you are building from the source in the HarfBuzz git |
||||
repository, rather than installing from a downloaded tarball |
||||
release, then you must install two more auxiliary tools before you |
||||
can build for the first time: <package>pkg-config</package> and |
||||
<ulink url="http://www.complang.org/ragel/">ragel</ulink>. |
||||
</para> |
||||
<para> |
||||
If you are bootstrapping from git, you need a few more tools before you |
||||
can run <filename>autogen.sh</filename> for the first time. Namely, |
||||
pkg-config and <ulink url="http://www.complang.org/ragel/">ragel</ulink>. |
||||
Again, on Ubuntu / Debian: |
||||
On Ubuntu or Debian, run: |
||||
<programlisting> |
||||
<command>sudo apt-get install</command> <package>autoconf automake libtool pkg-config ragel gtk-doc-tools</package> |
||||
<command>sudo apt-get install</command> <package>autoconf automake libtool pkg-config ragel gtk-doc-tools</package> |
||||
</programlisting> |
||||
and on Fedora, RHEL, CentOS: |
||||
On Fedora, RHEL, CentOS, run: |
||||
<programlisting> |
||||
<command>sudo yum install</command> <package>autoconf automake libtool pkgconfig ragel gtk-doc</package> |
||||
<command>sudo yum install</command> <package>autoconf automake libtool pkgconfig ragel gtk-doc</package> |
||||
</programlisting> |
||||
or using MacPorts: |
||||
|
||||
</para> |
||||
<para> |
||||
With <package>pkg-config</package> and <package>ragel</package> |
||||
installed, you can now run <command>./autogen.sh</command>, |
||||
followed by <command>./configure</command> and |
||||
<command>make</command> to build HarfBuzz. |
||||
</para> |
||||
</section> |
||||
|
||||
|
||||
<section id="building.windows"> |
||||
<title>Building on Windows</title> |
||||
|
||||
<para> |
||||
On Windows, consider using Microsoft's free <ulink |
||||
url="https://github.com/Microsoft/vcpkg">vcpkg</ulink> utility |
||||
to build HarfBuzz, its dependencies, and other open-source |
||||
libraries. |
||||
</para> |
||||
<para> |
||||
If you need to build HarfBuzz from source, first put the |
||||
<program>ragel</program> binary on your |
||||
<literal>PATH</literal>, then follow the appveyor CI cmake |
||||
<ulink |
||||
url="https://github.com/harfbuzz/harfbuzz/blob/master/appveyor.yml">build |
||||
instructions</ulink>. |
||||
</para> |
||||
</section> |
||||
|
||||
|
||||
<section id="building.macos"> |
||||
<title>Building on macOS</title> |
||||
|
||||
<para> |
||||
There are two ways to build HarfBuzz on Mac systems: MacPorts |
||||
and Homebrew. The process is similar to the process used on a |
||||
Linux system. |
||||
</para> |
||||
<para> |
||||
<emphasis>(1)</emphasis> You must first install the |
||||
development packages for FreeType, Cairo, and GLib. If you are |
||||
using MacPorts, you should run: |
||||
<programlisting> |
||||
<command>sudo port install</command> <package>autoconf automake libtool pkgconfig ragel gtk-doc</package> |
||||
<command>sudo port install</command> <package>freetype glib2 cairo</package> |
||||
</programlisting> |
||||
</para> |
||||
</para> |
||||
<para> |
||||
If you are using Homebrew, you should run: |
||||
<programlisting> |
||||
<command>brew install</command> <package>freetype glib cairo</package> |
||||
</programlisting> |
||||
</para> |
||||
<para> |
||||
<emphasis>(2)</emphasis> The next step depends on whether you are building from the |
||||
source in a downloaded release tarball or from the source directly |
||||
from the git repository. |
||||
</para> |
||||
<para> |
||||
<emphasis>(2)(a)</emphasis> If you are installing HarfBuzz |
||||
from a downloaded tarball release, extract the tarball and |
||||
open a Terminal in the extracted source-code directory. Run: |
||||
<programlisting> |
||||
<command>./configure</command> |
||||
</programlisting> |
||||
followed by: |
||||
<programlisting> |
||||
<command>make</command> |
||||
</programlisting> |
||||
to build HarfBuzz. |
||||
</para> |
||||
<para> |
||||
<emphasis>(2)(b)</emphasis> Alternatively, if you are building |
||||
HarfBuzz from the source in the HarfBuzz git repository, then |
||||
you must install several built-time dependencies before |
||||
proceeding. |
||||
</para> |
||||
<para>If you are |
||||
using MacPorts, you should run: |
||||
<programlisting> |
||||
<command>sudo port install</command> <package>autoconf |
||||
automake libtool pkgconfig ragel gtk-doc</package> |
||||
</programlisting> |
||||
to install the build dependencies. |
||||
</para> |
||||
<para>If you are using Homebrew, you should run: |
||||
<programlisting> |
||||
<command>brew install</command> <package>autoconf automake libtool pkgconfig ragel gtk-doc</package> |
||||
</programlisting> |
||||
Finally, you can run: |
||||
<programlisting> |
||||
<command>./autogen.sh</command> |
||||
</programlisting> |
||||
</para> |
||||
<para> |
||||
<emphasis>(3)</emphasis> You can now build HarfBuzz (on either |
||||
a MacPorts or a Homebrew system) by running: |
||||
<programlisting> |
||||
<command>./configure</command> |
||||
</programlisting> |
||||
followed by: |
||||
<programlisting> |
||||
<command>make</command> |
||||
</programlisting> |
||||
</para> |
||||
<para> |
||||
This should leave you with a shared |
||||
library in the <filename>src/</filename> directory, and a few |
||||
utility programs including <command>hb-view</command> and |
||||
<command>hb-shape</command> under the <filename>util/</filename> |
||||
directory. |
||||
</para> |
||||
|
||||
</section> |
||||
|
||||
<section id="configuration"> |
||||
<title>Configuration options</title> |
||||
|
||||
<para> |
||||
The instructions in the "Building HarfBuzz" section will build |
||||
the source code under its default configuration. If needed, |
||||
the following additional configuration options are available. |
||||
</para> |
||||
|
||||
<variablelist> |
||||
<varlistentry> |
||||
<term>--with-libstdc++</term> |
||||
<listitem> |
||||
<para> |
||||
Allow linking with libstdc++. <emphasis>(Default = no)</emphasis> |
||||
</para> |
||||
<para> |
||||
This option enables or disables linking HarfBuzz to the |
||||
system's libstdc++ library. |
||||
</para> |
||||
</listitem> |
||||
</varlistentry> |
||||
|
||||
<varlistentry> |
||||
<term>--with-glib</term> |
||||
<listitem> |
||||
<para> |
||||
Use <ulink url="https://developer.gnome.org/glib/">GLib</ulink>. <emphasis>(Default = auto)</emphasis> |
||||
</para> |
||||
<para> |
||||
This option enables or disables usage of the GLib |
||||
library. The default setting is to check for the |
||||
presence of GLib and, if it is found, build with |
||||
GLib support. GLib is native to GNU/Linux systems but is |
||||
available on other operating system as well. |
||||
</para> |
||||
</listitem> |
||||
</varlistentry> |
||||
|
||||
<varlistentry> |
||||
<term>--with-gobject</term> |
||||
<listitem> |
||||
<para> |
||||
Use <ulink url="https://developer.gnome.org/gobject/stable/">GObject</ulink>. <emphasis>(Default = no)</emphasis> |
||||
</para> |
||||
<para> |
||||
This option enables or disables usage of the GObject |
||||
library. The default setting is to check for the |
||||
presence of GObject and, if it is found, build with |
||||
GObject support. GObject is native to GNU/Linux systems but is |
||||
available on other operating system as well. |
||||
</para> |
||||
</listitem> |
||||
</varlistentry> |
||||
|
||||
<varlistentry> |
||||
<term>--with-cairo</term> |
||||
<listitem> |
||||
<para> |
||||
Use <ulink url="https://cairographics.org/">Cairo</ulink>. <emphasis>(Default = auto)</emphasis> |
||||
</para> |
||||
<para> |
||||
This option enables or disables usage of the Cairo |
||||
graphics-rendering library. The default setting is to |
||||
check for the presence of Cairo and, if it is found, |
||||
build with Cairo support. |
||||
</para> |
||||
<para> |
||||
Note: Cairo is used only by the HarfBuzz |
||||
command-line utilities, and not by the HarfBuzz library. |
||||
</para> |
||||
</listitem> |
||||
</varlistentry> |
||||
|
||||
<varlistentry> |
||||
<term>--with-fontconfig</term> |
||||
<listitem> |
||||
<para> |
||||
Use <ulink url="https://www.freedesktop.org/wiki/Software/fontconfig/">Fontconfig</ulink>. <emphasis>(Default = auto)</emphasis> |
||||
</para> |
||||
<para> |
||||
This option enables or disables usage of the Fontconfig |
||||
library, which provides font-matching functions and |
||||
provides access to font properties. The default setting |
||||
is to check for the presence of Fontconfig and, if it is |
||||
found, build with Fontconfig support. |
||||
</para> |
||||
<para> |
||||
Note: Fontconfig is used only by the HarfBuzz |
||||
command-line utilities, and not by the HarfBuzz library. |
||||
</para> |
||||
</listitem> |
||||
</varlistentry> |
||||
|
||||
<varlistentry> |
||||
<term>--with-icu</term> |
||||
<listitem> |
||||
<para> |
||||
Use the <ulink url="http://site.icu-project.org/home">ICU</ulink> library. <emphasis>(Default = auto)</emphasis> |
||||
</para> |
||||
<para> |
||||
This option enables or disables usage of the |
||||
<emphasis>International Components for |
||||
Unicode</emphasis> (ICU) library, which provides access |
||||
to Unicode Character Database (UCD) properties as well |
||||
as normalization and conversion functions. The default |
||||
setting is to check for the presence of ICU and, if it |
||||
is found, build with ICU support. |
||||
</para> |
||||
</listitem> |
||||
</varlistentry> |
||||
|
||||
<varlistentry> |
||||
<term>--with-ucdn</term> |
||||
<listitem> |
||||
<para> |
||||
Use HarfBuzz's <ulink url="https://github.com/harfbuzz/harfbuzz/tree/master/src/hb-ucdn">built-in UCDN library</ulink>. <emphasis>(Default = auto)</emphasis> |
||||
</para> |
||||
<para> |
||||
The HarfBuzz source tree includes a <emphasis>Unicode |
||||
Database and Normalization</emphasis> (UCDN) library |
||||
that provides access to basic character properties in |
||||
the Unicode Character Database (UCD) as well as low-level |
||||
normalization functions. HarfBuzz can be built without |
||||
this UCDN support if the usage of a different UCDN |
||||
library is desired. |
||||
</para> |
||||
</listitem> |
||||
</varlistentry> |
||||
|
||||
<varlistentry> |
||||
<term>--with-graphite2</term> |
||||
<listitem> |
||||
<para> |
||||
Use the <ulink url="http://graphite.sil.org/">Graphite2</ulink> library. <emphasis>(Default = no)</emphasis> |
||||
</para> |
||||
<para> |
||||
This option enables or disables usage of the Graphite2 |
||||
library, which provides support for the Graphite shaping |
||||
model. |
||||
</para> |
||||
</listitem> |
||||
</varlistentry> |
||||
|
||||
<varlistentry> |
||||
<term>--with-freetype</term> |
||||
<listitem> |
||||
<para> |
||||
Use the <ulink url="https://www.freetype.org/">FreeType</ulink> library. <emphasis>(Default = auto)</emphasis> |
||||
</para> |
||||
<para> |
||||
This option enables or disables usage of the FreeType |
||||
font-rendering library. The default setting is to check for the |
||||
presence of FreeType and, if it is found, build with |
||||
FreeType support. |
||||
</para> |
||||
</listitem> |
||||
</varlistentry> |
||||
|
||||
<varlistentry> |
||||
<term>--with-uniscribe</term> |
||||
<listitem> |
||||
<para> |
||||
Use the <ulink |
||||
url="https://docs.microsoft.com/en-us/windows/desktop/intl/uniscribe">Uniscribe</ulink> |
||||
library (experimental). <emphasis>(Default = no)</emphasis> |
||||
</para> |
||||
<para> |
||||
This option enables or disables usage of the Uniscribe |
||||
font-rendering library. Uniscribe is available on |
||||
Windows systems. Uniscribe support is used only for |
||||
testing purposes and does not need to be enabled for |
||||
HarfBuzz to run on Windows systems. |
||||
</para> |
||||
</listitem> |
||||
</varlistentry> |
||||
|
||||
<varlistentry> |
||||
<term>--with-directwrite</term> |
||||
<listitem> |
||||
<para> |
||||
Use the <ulink url="https://docs.microsoft.com/en-us/windows/desktop/directwrite/direct-write-portal">DirectWrite</ulink> library (experimental). <emphasis>(Default = no)</emphasis> |
||||
</para> |
||||
<para> |
||||
This option enables or disables usage of the DirectWrite |
||||
font-rendering library. DirectWrite is available on |
||||
Windows systems. DirectWrite support is used only for |
||||
testing purposes and does not need to be enabled for |
||||
HarfBuzz to run on Windows systems. |
||||
</para> |
||||
</listitem> |
||||
</varlistentry> |
||||
|
||||
<varlistentry> |
||||
<term>--with-coretext</term> |
||||
<listitem> |
||||
<para> |
||||
Use the <ulink url="https://developer.apple.com/documentation/coretext">CoreText</ulink> library. <emphasis>(Default = no)</emphasis> |
||||
</para> |
||||
<para> |
||||
This option enables or disables usage of the CoreText |
||||
library. CoreText is available on macOS and iOS systems. |
||||
</para> |
||||
</listitem> |
||||
</varlistentry> |
||||
</variablelist> |
||||
</section> |
||||
|
||||
</section> |
||||
</chapter> |
||||
|
@ -0,0 +1,374 @@ |
||||
<?xml version="1.0"?> |
||||
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN" |
||||
"http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd" [ |
||||
<!ENTITY % local.common.attrib "xmlns:xi CDATA #FIXED 'http://www.w3.org/2003/XInclude'"> |
||||
<!ENTITY version SYSTEM "version.xml"> |
||||
]> |
||||
<chapter id="shaping-concepts"> |
||||
<title>Shaping concepts</title> |
||||
<section id="text-shaping-concepts"> |
||||
<title>Text shaping</title> |
||||
<para> |
||||
Text shaping is the process of transforming a sequence of Unicode |
||||
codepoints that represent individual characters (letters, |
||||
diacritics, tone marks, numbers, symbols, etc.) into the |
||||
orthographically and linguistically correct two-dimensional layout |
||||
of glyph shapes taken from a specified font. |
||||
</para> |
||||
<para> |
||||
For some writing systems (or <emphasis>scripts</emphasis>) and |
||||
languages, the process is simple, requiring the shaper to do |
||||
little more than advance the horizontal position forward by the |
||||
correct amount for each successive glyph. |
||||
</para> |
||||
<para> |
||||
But, for <emphasis>complex scripts</emphasis>, any combination of |
||||
several shaping operations may be required, and the rules for how |
||||
and when they are applied vary from script to script. HarfBuzz and |
||||
other shaping engines implement these rules. |
||||
</para> |
||||
<para> |
||||
The exact rules and necessary operations for a particular script |
||||
constitute a shaping <emphasis>model</emphasis>. OpenType |
||||
specifies a set of shaping models that covers all of |
||||
Unicode. Other shaping models are available, however, including |
||||
Graphite and Apple Advanced Typography (AAT). |
||||
</para> |
||||
</section> |
||||
|
||||
<section id="complex-scripts"> |
||||
<title>Complex scripts</title> |
||||
<para> |
||||
In text-shaping terminology, scripts are generally classified as |
||||
either <emphasis>complex</emphasis> or <emphasis>non-complex</emphasis>. |
||||
</para> |
||||
<para> |
||||
Complex scripts are those for which transforming the input |
||||
sequence into the final layout requires some combination of |
||||
operations—such as context-dependent substitutions, |
||||
context-dependent mark positioning, glyph-to-glyph joining, |
||||
glyph reordering, or glyph stacking. |
||||
</para> |
||||
<para> |
||||
In some complex scripts, the shaping rules require that a text |
||||
run be divided into syllables before the operations can be |
||||
applied. Other complex scripts may apply shaping operations over |
||||
entire words or over the entire text run, with no subdivision |
||||
required. |
||||
</para> |
||||
<para> |
||||
Non-complex scripts, by definition, do not require these |
||||
operations. However, correctly shaping a text run in a |
||||
non-complex script may still involve Unicode normalization, |
||||
ligature substitutions, mark positioning, kerning, and applying |
||||
other font features. The key difference is that a text run in a |
||||
non-complex script can be processed sequentially and in the same |
||||
order as the input sequence of Unicode codepoints, without |
||||
requiring an analysis stage. |
||||
</para> |
||||
</section> |
||||
|
||||
<section id="shaping-operations"> |
||||
<title>Shaping operations</title> |
||||
<para> |
||||
Shaping a complex-script text run involves transforming the |
||||
input sequence of Unicode codepoints with some combination of |
||||
operations that is specified in the shaping model for the |
||||
script. |
||||
</para> |
||||
<para> |
||||
The specific conditions that trigger a given operation for a |
||||
text run varies from script to script, as do the order that the |
||||
operations are performed in and which codepoints are |
||||
affected. However, the same general set of shaping operations is |
||||
common to all of the complex-script shaping models. |
||||
</para> |
||||
|
||||
<itemizedlist> |
||||
<listitem> |
||||
<para> |
||||
A <emphasis>reordering</emphasis> operation moves a glyph |
||||
from its original ("logical") position in the sequence to |
||||
some other ("visual") position. |
||||
</para> |
||||
<para> |
||||
The shaping model for a given complex script might involve |
||||
more than one reordering step. |
||||
</para> |
||||
</listitem> |
||||
|
||||
<listitem> |
||||
<para> |
||||
A <emphasis>joining</emphasis> operation replaces a glyph |
||||
with an alternate form that is designed to connect with one |
||||
or more of the adjacent glyphs in the sequence. |
||||
</para> |
||||
</listitem> |
||||
|
||||
<listitem> |
||||
<para> |
||||
A contextual <emphasis>substitution</emphasis> operation |
||||
replaces either a single glyph or a subsequence of several |
||||
glyphs with an alternate glyph. This substitution is |
||||
performed when the original glyph or subsequence of glyphs |
||||
occurs in a specified position with respect to the |
||||
surrounding sequence. For example, one substitution might be |
||||
performed only when the target glyph is the first glyph in |
||||
the sequence, while another substitution is performed only |
||||
when a different target glyph occurs immediately after a |
||||
particular string pattern. |
||||
</para> |
||||
<para> |
||||
The shaping model for a given complex script might involve |
||||
multiple contextual-substitution operations, each applying |
||||
to different target glyphs and patterns, and which are |
||||
performed in separate steps. |
||||
</para> |
||||
</listitem> |
||||
|
||||
<listitem> |
||||
<para> |
||||
A contextual <emphasis>positioning</emphasis> operation |
||||
moves the horizontal and/or vertical position of a |
||||
glyph. This positioning move is performed when the glyph |
||||
occurs in a specified position with respect to the |
||||
surrounding sequence. |
||||
</para> |
||||
<para> |
||||
Many contextual positioning operations are used to place |
||||
<emphasis>mark</emphasis> glyphs (such as diacritics, vowel |
||||
signs, and tone markers) with respect to |
||||
<emphasis>base</emphasis> glyphs. However, some complex |
||||
scripts may use contextual positioning operations to |
||||
correctly place base glyphs as well, such as |
||||
when the script uses <emphasis>stacking</emphasis> characters. |
||||
</para> |
||||
</listitem> |
||||
|
||||
</itemizedlist> |
||||
</section> |
||||
|
||||
<section id="unicode-character-categories"> |
||||
<title>Unicode character categories</title> |
||||
<para> |
||||
Shaping models are typically specified with respect to how |
||||
scripts are defined in the Unicode standard. |
||||
</para> |
||||
<para> |
||||
Every codepoint in the Unicode Character Database (UCD) is |
||||
assigned a <emphasis>Unicode General Category</emphasis> (UGC), |
||||
which provides the most fundamental information about the |
||||
codepoint: whether the codepoint represents a |
||||
<emphasis>Letter</emphasis>, a <emphasis>Mark</emphasis>, a |
||||
<emphasis>Number</emphasis>, <emphasis>Punctuation</emphasis>, a |
||||
<emphasis>Symbol</emphasis>, a <emphasis>Separator</emphasis>, |
||||
or something else (<emphasis>Other</emphasis>). |
||||
</para> |
||||
<para> |
||||
These UGC properties are "Major" categories. Each codepoint is |
||||
further assigned to a "minor" category within its Major |
||||
category, such as "Letter, uppercase" (<literal>Lu</literal>) or |
||||
"Letter, modifier" (<literal>Lm</literal>). |
||||
</para> |
||||
<para> |
||||
Shaping models are concerned primarily with Letter and Mark |
||||
codepoints. The minor categories of Mark codepoints are |
||||
particularly important for shaping. Marks can be nonspacing |
||||
(<literal>Mn</literal>), spacing combining |
||||
(<literal>Mc</literal>), or enclosing (<literal>Me</literal>). |
||||
</para> |
||||
<para> |
||||
In addition to the UGC property, codepoints in the Indic and |
||||
Southeast Asian scripts are also assigned |
||||
<emphasis>Unicode Indic Syllabic Category</emphasis> (UISC) and |
||||
<emphasis>Unicode Indic Positional Category</emphasis> (UIPC) |
||||
property that provides more detailed information needed for |
||||
shaping. |
||||
</para> |
||||
<para> |
||||
The UISC property sub-categorizes Letters and Marks according to |
||||
common script-shaping behaviors. For example, UISC distinguishes |
||||
between consonant letters, vowel letters, and vowel marks. The |
||||
UIPC property sub-categorizes Mark codepoints by the visual |
||||
position that they occupy (above, below, right, left, or in |
||||
multiple positions). |
||||
</para> |
||||
<para> |
||||
Some complex scripts require that the text run be split into |
||||
syllables, and what constitutes a valid syllable in these |
||||
scripts is specified in regular expressions of the Letter and |
||||
Mark codepoints that take the UISC and UIPC properties into account. |
||||
</para> |
||||
|
||||
</section> |
||||
|
||||
<section id="text-runs"> |
||||
<title>Text runs</title> |
||||
<para> |
||||
Real-world text usually contains codepoints from a mixture of |
||||
different Unicode scripts (including punctuation, numbers, symbols, |
||||
white-space characters, and other codepoints that do not belong |
||||
to any script). Real-world text may also be marked up with |
||||
formatting that changes font properties (including the font, |
||||
font style, and font size). |
||||
</para> |
||||
<para> |
||||
For shaping purposes, all real-world text streams must be first |
||||
segmented into runs that have a uniform set of properties. |
||||
</para> |
||||
<para> |
||||
In particular, shaping models always assume that every codepoint |
||||
in a text run has the same <emphasis>direction</emphasis>, |
||||
<emphasis>script</emphasis> tag, and |
||||
<emphasis>language</emphasis> tag. |
||||
</para> |
||||
</section> |
||||
|
||||
<section id="opentype-shaping-models"> |
||||
<title>OpenType shaping models</title> |
||||
<para> |
||||
OpenType provides shaping models for the following scripts: |
||||
</para> |
||||
|
||||
<itemizedlist> |
||||
<listitem> |
||||
<para> |
||||
The <emphasis>default</emphasis> shaping model handles all |
||||
non-complex scripts, and may also be used as a fallback for |
||||
handling unrecognized scripts. |
||||
</para> |
||||
</listitem> |
||||
|
||||
<listitem> |
||||
<para> |
||||
The <emphasis>Indic</emphasis> shaping model handles the Indic |
||||
scripts Bengali, Devanagari, Gujarati, Gurmukhi, Kannada, |
||||
Malayalam, Oriya, Tamil, Telugu, and Sinhala. |
||||
</para> |
||||
<para> |
||||
The Indic shaping model was revised significantly in |
||||
2005. To denote the change, a new set of <emphasis>script |
||||
tags</emphasis> was assigned for Bengali, Devanagari, |
||||
Gujarati, Gurmukhi, Kannada, Malayalam, Oriya, Tamil, and |
||||
Telugu. For the sake of clarity, the term "Indic2" is |
||||
sometimes used to refer to the current, revised shaping |
||||
model. |
||||
</para> |
||||
</listitem> |
||||
|
||||
<listitem> |
||||
<para> |
||||
The <emphasis>Arabic</emphasis> shaping model supports |
||||
Arabic, Mongolian, N'Ko, Syriac, and several other connected |
||||
or cursive scripts. |
||||
</para> |
||||
</listitem> |
||||
|
||||
<listitem> |
||||
<para> |
||||
The <emphasis>Thai/Lao</emphasis> shaping model supports |
||||
the Thai and Lao scripts. |
||||
</para> |
||||
</listitem> |
||||
|
||||
<listitem> |
||||
<para> |
||||
The <emphasis>Khmer</emphasis> shaping model supports the |
||||
Khmer script. |
||||
</para> |
||||
</listitem> |
||||
|
||||
<listitem> |
||||
<para> |
||||
The <emphasis>Myanmar</emphasis> shaping model supports the |
||||
Myanmar (or Burmese) script. |
||||
</para> |
||||
</listitem> |
||||
|
||||
<listitem> |
||||
<para> |
||||
The <emphasis>Tibetan</emphasis> shaping model supports the |
||||
Tibetan script. |
||||
</para> |
||||
</listitem> |
||||
|
||||
<listitem> |
||||
<para> |
||||
The <emphasis>Hangul</emphasis> shaping model supports the |
||||
Hangul script. |
||||
</para> |
||||
</listitem> |
||||
|
||||
<listitem> |
||||
<para> |
||||
The <emphasis>Hebrew</emphasis> shaping model supports the |
||||
Hebrew script. |
||||
</para> |
||||
</listitem> |
||||
|
||||
<listitem> |
||||
<para> |
||||
The <emphasis>Universal Shaping Engine</emphasis> (USE) |
||||
shaping model supports complex scripts not covered by one of |
||||
the above, script-specific shaping models, including |
||||
Javanese, Balinese, Buginese, Batak, Chakma, Lepcha, Modi, |
||||
Phags-pa, Tagalog, Siddham, Sundanese, Tai Le, Tai Tham, Tai |
||||
Viet, and many others. |
||||
</para> |
||||
</listitem> |
||||
|
||||
<listitem> |
||||
<para> |
||||
Text runs that do not fall under one of the above shaping |
||||
models may still require processing by a shaping engine. Of |
||||
particular note is <emphasis>Emoji</emphasis> shaping, which |
||||
may involve variation-selector sequences and glyph |
||||
substitution. Emoji shaping is handled by the default |
||||
shaping model. |
||||
</para> |
||||
</listitem> |
||||
|
||||
</itemizedlist> |
||||
|
||||
</section> |
||||
|
||||
<section id="graphite-shaping"> |
||||
<title>Graphite shaping</title> |
||||
<para> |
||||
In contrast to OpenType shaping, Graphite shaping does not |
||||
specify a predefined set of shaping models or a set of supported |
||||
scripts. |
||||
</para> |
||||
<para> |
||||
Instead, each Graphite font contains a complete set of rules that |
||||
implement the required shaping model for the intended |
||||
script. These rules include finite-state machines to match |
||||
sequences of codepoints to the shaping operations to perform. |
||||
</para> |
||||
<para> |
||||
Graphite shaping can perform the same shaping operations used in |
||||
OpenType shaping, as well as other functions that have not been |
||||
defined for OpenType shaping. |
||||
</para> |
||||
</section> |
||||
|
||||
<section id="aat-shaping"> |
||||
<title>AAT shaping</title> |
||||
<para> |
||||
In contrast to OpenType shaping, AAT shaping does not specify a |
||||
predefined set of shaping models or a set of supported scripts. |
||||
</para> |
||||
<para> |
||||
Instead, each AAT font includes a complete set of rules that |
||||
implement the desired shaping model for the intended |
||||
script. These rules include finite-state machines to match glyph |
||||
sequences and the shaping operations to perform. |
||||
</para> |
||||
<para> |
||||
Notably, AAT shaping rules are expressed for glyphs in the font, |
||||
not for Unicode codepoints. AAT shaping can perform the same |
||||
shaping operations used in OpenType shaping, as well as other |
||||
functions that have not been defined for OpenType shaping. |
||||
</para> |
||||
</section> |
||||
</chapter> |
@ -0,0 +1,457 @@ |
||||
/*
|
||||
* Copyright © 2018 Google, Inc. |
||||
* |
||||
* This is part of HarfBuzz, a text shaping library. |
||||
* |
||||
* Permission is hereby granted, without written agreement and without |
||||
* license or royalty fees, to use, copy, modify, and distribute this |
||||
* software and its documentation for any purpose, provided that the |
||||
* above copyright notice and the following two paragraphs appear in |
||||
* all copies of this software. |
||||
* |
||||
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
||||
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
||||
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
||||
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
||||
* DAMAGE. |
||||
* |
||||
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
||||
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
||||
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
||||
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
||||
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
||||
* |
||||
* Google Author(s): Behdad Esfahbod |
||||
*/ |
||||
|
||||
#include "hb-ot-name-language.hh" |
||||
|
||||
/* Following two tables were generated by joining FreeType, FontConfig,
|
||||
* and OpenType specification language lists, then filled in missing |
||||
* entries using: |
||||
* https://docs.microsoft.com/en-us/windows/desktop/intl/language-identifier-constants-and-strings
|
||||
*/ |
||||
|
||||
struct hb_ot_language_map_t |
||||
{ |
||||
static int cmp (const void *key, const void *item) |
||||
{ |
||||
unsigned int a = * (unsigned int *) key; |
||||
unsigned int b = ((const hb_ot_language_map_t *) item)->code; |
||||
return a < b ? -1 : a > b ? +1 : 0; |
||||
} |
||||
|
||||
uint16_t code; |
||||
char lang[6]; |
||||
}; |
||||
|
||||
static const hb_ot_language_map_t |
||||
hb_ms_language_map[] = |
||||
{ |
||||
{0x0001, "ar"}, /* ??? */ |
||||
{0x0004, "zh"}, /* ??? */ |
||||
{0x0009, "en"}, /* ??? */ |
||||
{0x0401, "ar"}, /* Arabic (Saudi Arabia) */ |
||||
{0x0402, "bg"}, /* Bulgarian (Bulgaria) */ |
||||
{0x0403, "ca"}, /* Catalan (Catalan) */ |
||||
{0x0404, "zh-tw"}, /* Chinese (Taiwan) */ |
||||
{0x0405, "cs"}, /* Czech (Czech Republic) */ |
||||
{0x0406, "da"}, /* Danish (Denmark) */ |
||||
{0x0407, "de"}, /* German (Germany) */ |
||||
{0x0408, "el"}, /* Greek (Greece) */ |
||||
{0x0409, "en"}, /* English (United States) */ |
||||
{0x040A, "es"}, /* Spanish (Traditional Sort) (Spain) */ |
||||
{0x040B, "fi"}, /* Finnish (Finland) */ |
||||
{0x040C, "fr"}, /* French (France) */ |
||||
{0x040D, "he"}, /* Hebrew (Israel) */ |
||||
{0x040E, "hu"}, /* Hungarian (Hungary) */ |
||||
{0x040F, "is"}, /* Icelandic (Iceland) */ |
||||
{0x0410, "it"}, /* Italian (Italy) */ |
||||
{0x0411, "ja"}, /* Japanese (Japan) */ |
||||
{0x0412, "ko"}, /* Korean (Korea) */ |
||||
{0x0413, "nl"}, /* Dutch (Netherlands) */ |
||||
{0x0414, "no"}, /* Norwegian (Bokmal) (Norway) */ |
||||
{0x0415, "pl"}, /* Polish (Poland) */ |
||||
{0x0416, "pt"}, /* Portuguese (Brazil) */ |
||||
{0x0417, "rm"}, /* Romansh (Switzerland) */ |
||||
{0x0418, "ro"}, /* Romanian (Romania) */ |
||||
{0x0419, "ru"}, /* Russian (Russia) */ |
||||
{0x041A, "hr"}, /* Croatian (Croatia) */ |
||||
{0x041B, "sk"}, /* Slovak (Slovakia) */ |
||||
{0x041C, "sq"}, /* Albanian (Albania) */ |
||||
{0x041D, "sv"}, /* Swedish (Sweden) */ |
||||
{0x041E, "th"}, /* Thai (Thailand) */ |
||||
{0x041F, "tr"}, /* Turkish (Turkey) */ |
||||
{0x0420, "ur"}, /* Urdu (Islamic Republic of Pakistan) */ |
||||
{0x0421, "id"}, /* Indonesian (Indonesia) */ |
||||
{0x0422, "uk"}, /* Ukrainian (Ukraine) */ |
||||
{0x0423, "be"}, /* Belarusian (Belarus) */ |
||||
{0x0424, "sl"}, /* Slovenian (Slovenia) */ |
||||
{0x0425, "et"}, /* Estonian (Estonia) */ |
||||
{0x0426, "lv"}, /* Latvian (Latvia) */ |
||||
{0x0427, "lt"}, /* Lithuanian (Lithuania) */ |
||||
{0x0428, "tg"}, /* Tajik (Cyrillic) (Tajikistan) */ |
||||
{0x0429, "fa"}, /* Persian (Iran) */ |
||||
{0x042A, "vi"}, /* Vietnamese (Vietnam) */ |
||||
{0x042B, "hy"}, /* Armenian (Armenia) */ |
||||
{0x042C, "az"}, /* Azeri (Latin) (Azerbaijan) */ |
||||
{0x042D, "eu"}, /* Basque (Basque) */ |
||||
{0x042E, "hsb"}, /* Upper Sorbian (Germany) */ |
||||
{0x042F, "mk"}, /* Macedonian (FYROM) (Former Yugoslav Republic of Macedonia) */ |
||||
{0x0430, "st"}, /* ??? */ |
||||
{0x0431, "ts"}, /* ??? */ |
||||
{0x0432, "tn"}, /* Setswana (South Africa) */ |
||||
{0x0433, "ven"}, /* ??? */ |
||||
{0x0434, "xh"}, /* isiXhosa (South Africa) */ |
||||
{0x0435, "zu"}, /* isiZulu (South Africa) */ |
||||
{0x0436, "af"}, /* Afrikaans (South Africa) */ |
||||
{0x0437, "ka"}, /* Georgian (Georgia) */ |
||||
{0x0438, "fo"}, /* Faroese (Faroe Islands) */ |
||||
{0x0439, "hi"}, /* Hindi (India) */ |
||||
{0x043A, "mt"}, /* Maltese (Malta) */ |
||||
{0x043B, "se"}, /* Sami (Northern) (Norway) */ |
||||
{0x043C, "ga"}, /* ??? */ |
||||
{0x043D, "yi"}, /* ??? */ |
||||
{0x043E, "ms"}, /* Malay (Malaysia) */ |
||||
{0x043F, "kk"}, /* Kazakh (Kazakhstan) */ |
||||
{0x0440, "ky"}, /* Kyrgyz (Kyrgyzstan) */ |
||||
{0x0441, "sw"}, /* Kiswahili (Kenya) */ |
||||
{0x0442, "tk"}, /* Turkmen (Turkmenistan) */ |
||||
{0x0443, "uz"}, /* Uzbek (Latin) (Uzbekistan) */ |
||||
{0x0444, "tt"}, /* Tatar (Russia) */ |
||||
{0x0445, "bn"}, /* Bengali (India) */ |
||||
{0x0446, "pa"}, /* Punjabi (India) */ |
||||
{0x0447, "gu"}, /* Gujarati (India) */ |
||||
{0x0448, "or"}, /* Odia (formerly Oriya) (India) */ |
||||
{0x0449, "ta"}, /* Tamil (India) */ |
||||
{0x044A, "te"}, /* Telugu (India) */ |
||||
{0x044B, "kn"}, /* Kannada (India) */ |
||||
{0x044C, "ml"}, /* Malayalam (India) */ |
||||
{0x044D, "as"}, /* Assamese (India) */ |
||||
{0x044E, "mr"}, /* Marathi (India) */ |
||||
{0x044F, "sa"}, /* Sanskrit (India) */ |
||||
{0x0450, "mn"}, /* Mongolian (Cyrillic) (Mongolia) */ |
||||
{0x0451, "bo"}, /* Tibetan (PRC) */ |
||||
{0x0452, "cy"}, /* Welsh (United Kingdom) */ |
||||
{0x0453, "km"}, /* Khmer (Cambodia) */ |
||||
{0x0454, "lo"}, /* Lao (Lao P.D.R.) */ |
||||
{0x0455, "my"}, /* ??? */ |
||||
{0x0456, "gl"}, /* Galician (Galician) */ |
||||
{0x0457, "kok"}, /* Konkani (India) */ |
||||
{0x0458, "mni"}, /* ??? */ |
||||
{0x0459, "sd"}, /* ??? */ |
||||
{0x045A, "syr"}, /* Syriac (Syria) */ |
||||
{0x045B, "si"}, /* Sinhala (Sri Lanka) */ |
||||
{0x045C, "chr"}, /* ??? */ |
||||
{0x045D, "iu"}, /* Inuktitut (Canada) */ |
||||
{0x045E, "am"}, /* Amharic (Ethiopia) */ |
||||
{0x0460, "ks"}, /* ??? */ |
||||
{0x0461, "ne"}, /* Nepali (Nepal) */ |
||||
{0x0462, "fy"}, /* Frisian (Netherlands) */ |
||||
{0x0463, "ps"}, /* Pashto (Afghanistan) */ |
||||
{0x0464, "phi"}, /* Filipino (Philippines) */ |
||||
{0x0465, "div"}, /* Divehi (Maldives) */ |
||||
{0x0468, "ha"}, /* Hausa (Latin) (Nigeria) */ |
||||
{0x046A, "yo"}, /* Yoruba (Nigeria) */ |
||||
{0x046B, "quz"}, /* Quechua (Bolivia) */ |
||||
{0x046C, "nso"}, /* Sesotho sa Leboa (South Africa) */ |
||||
{0x046D, "ba"}, /* Bashkir (Russia) */ |
||||
{0x046E, "lb"}, /* Luxembourgish (Luxembourg) */ |
||||
{0x046F, "kl"}, /* Greenlandic (Greenland) */ |
||||
{0x0470, "ibo"}, /* Igbo (Nigeria) */ |
||||
{0x0471, "kau"}, /* ??? */ |
||||
{0x0472, "om"}, /* ??? */ |
||||
{0x0473, "ti"}, /* ??? */ |
||||
{0x0474, "gn"}, /* ??? */ |
||||
{0x0475, "haw"}, /* ??? */ |
||||
{0x0476, "la"}, /* ??? */ |
||||
{0x0477, "so"}, /* ??? */ |
||||
{0x0478, "ii"}, /* Yi (PRC) */ |
||||
{0x0479, "pap"}, /* ??? */ |
||||
{0x047A, "arn"}, /* Mapudungun (Chile) */ |
||||
{0x047C, "moh"}, /* Mohawk (Mohawk) */ |
||||
{0x047E, "br"}, /* Breton (France) */ |
||||
{0x0480, "ug"}, /* Uighur (PRC) */ |
||||
{0x0481, "mi"}, /* Maori (New Zealand) */ |
||||
{0x0482, "oc"}, /* Occitan (France) */ |
||||
{0x0483, "co"}, /* Corsican (France) */ |
||||
{0x0484, "gsw"}, /* Alsatian (France) */ |
||||
{0x0485, "sah"}, /* Yakut (Russia) */ |
||||
{0x0486, "qut"}, /* K'iche (Guatemala) */ |
||||
{0x0487, "rw"}, /* Kinyarwanda (Rwanda) */ |
||||
{0x0488, "wo"}, /* Wolof (Senegal) */ |
||||
{0x048C, "fa"}, /* Dari (Afghanistan) */ |
||||
{0x0801, "ar"}, /* Arabic (Iraq) */ |
||||
{0x0804, "zh-cn"}, /* Chinese (People’s Republic of China) */ |
||||
{0x0807, "de"}, /* German (Switzerland) */ |
||||
{0x0809, "en"}, /* English (United Kingdom) */ |
||||
{0x080A, "es"}, /* Spanish (Mexico) */ |
||||
{0x080C, "fr"}, /* French (Belgium) */ |
||||
{0x0810, "it"}, /* Italian (Switzerland) */ |
||||
{0x0812, "ko"}, /* ??? */ |
||||
{0x0813, "nl"}, /* Dutch (Belgium) */ |
||||
{0x0814, "nn"}, /* Norwegian (Nynorsk) (Norway) */ |
||||
{0x0816, "pt"}, /* Portuguese (Portugal) */ |
||||
{0x0818, "mo"}, /* ??? */ |
||||
{0x0819, "ru"}, /* ??? */ |
||||
{0x081A, "sr"}, /* Serbian (Latin) (Serbia) */ |
||||
{0x081D, "sv"}, /* Sweden (Finland) */ |
||||
{0x0820, "ur"}, /* ??? */ |
||||
{0x0827, "lt"}, /* ??? */ |
||||
{0x082C, "az"}, /* Azeri (Cyrillic) (Azerbaijan) */ |
||||
{0x082E, "dsb"}, /* Lower Sorbian (Germany) */ |
||||
//{0x083B, ""}, /* Sami (Northern) (Sweden) */
|
||||
{0x083C, "gd"}, /* Irish (Ireland) */ |
||||
{0x083E, "ms"}, /* Malay (Brunei Darussalam) */ |
||||
{0x0843, "uz"}, /* Uzbek (Cyrillic) (Uzbekistan) */ |
||||
{0x0845, "bn"}, /* Bengali (Bangladesh) */ |
||||
{0x0846, "ar"}, /* ??? */ |
||||
{0x0850, "mn"}, /* Mongolian (Traditional) (People’s Republic of China) */ |
||||
{0x0851, "dz"}, /* ??? */ |
||||
{0x085D, "iu"}, /* Inuktitut (Latin) (Canada) */ |
||||
{0x085F, "tzm"}, /* Tamazight (Latin) (Algeria) */ |
||||
{0x0861, "ne"}, /* ??? */ |
||||
//{0x086B, ""}, /* Quechua (Ecuador) */
|
||||
{0x0873, "ti"}, /* ??? */ |
||||
{0x0C01, "ar"}, /* Arabic (Egypt) */ |
||||
{0x0C04, "zh-hk"}, /* Chinese (Hong Kong S.A.R.) */ |
||||
{0x0C07, "de"}, /* German (Austria) */ |
||||
{0x0C09, "en"}, /* English (Australia) */ |
||||
{0x0C0A, "es"}, /* Spanish (Modern Sort) (Spain) */ |
||||
{0x0C0C, "fr"}, /* French (Canada) */ |
||||
{0x0C1A, "sr"}, /* Serbian (Cyrillic) (Serbia) */ |
||||
{0x0C3B, "se"}, /* Sami (Northern) (Finland) */ |
||||
//{0x0C6B, ""}, /* Quechua (Peru) */
|
||||
{0x1001, "ar"}, /* Arabic (Libya) */ |
||||
{0x1004, "zh-sg"}, /* Chinese (Singapore) */ |
||||
{0x1007, "de"}, /* German (Luxembourg) */ |
||||
{0x1009, "en"}, /* English (Canada) */ |
||||
{0x100A, "es"}, /* Spanish (Guatemala) */ |
||||
{0x100C, "fr"}, /* French (Switzerland) */ |
||||
{0x101A, "hr"}, /* Croatian (Latin) (Bosnia and Herzegovina) */ |
||||
{0x103B, "smj"}, /* Sami (Lule) (Norway) */ |
||||
{0x1401, "ar"}, /* Arabic (Algeria) */ |
||||
//{0x1404, ""}, /* Chinese (Macao S.A.R.) */
|
||||
{0x1407, "de"}, /* German (Liechtenstein) */ |
||||
{0x1409, "en"}, /* English (New Zealand) */ |
||||
{0x140A, "es"}, /* Spanish (Costa Rica) */ |
||||
{0x140C, "fr"}, /* French (Luxembourg) */ |
||||
{0x141A, "bs"}, /* Bosnian (Latin) (Bosnia and Herzegovina) */ |
||||
//{0x143B, ""}, /* Sami (Lule) (Sweden) */
|
||||
{0x1801, "ar"}, /* Arabic (Morocco) */ |
||||
{0x1809, "en"}, /* English (Ireland) */ |
||||
{0x180A, "es"}, /* Spanish (Panama) */ |
||||
{0x180C, "fr"}, /* French (Principality of Monaco) */ |
||||
//{0x181A, ""}, /* Serbian (Latin) (Bosnia and Herzegovina) */
|
||||
{0x183B, "sma"}, /* Sami (Southern) (Norway) */ |
||||
{0x1C01, "ar"}, /* Arabic (Tunisia) */ |
||||
{0x1C09, "en"}, /* English (South Africa) */ |
||||
{0x1C0A, "es"}, /* Spanish (Dominican Republic) */ |
||||
{0x1C0C, "fr"}, /* ??? */ |
||||
//{0x1C1A, ""}, /* Serbian (Cyrillic) (Bosnia and Herzegovina) */
|
||||
//{0x1C3B, ""}, /* Sami (Southern) (Sweden) */
|
||||
{0x2001, "ar"}, /* Arabic (Oman) */ |
||||
{0x2009, "en"}, /* English (Jamaica) */ |
||||
{0x200A, "es"}, /* Spanish (Venezuela) */ |
||||
{0x200C, "fr"}, /* ??? */ |
||||
{0x201A, "bs"}, /* Bosnian (Cyrillic) (Bosnia and Herzegovina) */ |
||||
{0x203B, "sms"}, /* Sami (Skolt) (Finland) */ |
||||
{0x2401, "ar"}, /* Arabic (Yemen) */ |
||||
{0x2409, "en"}, /* English (Caribbean) */ |
||||
{0x240A, "es"}, /* Spanish (Colombia) */ |
||||
{0x240C, "fr"}, /* ??? */ |
||||
{0x243B, "smn"}, /* Sami (Inari) (Finland) */ |
||||
{0x2801, "ar"}, /* Arabic (Syria) */ |
||||
{0x2809, "en"}, /* English (Belize) */ |
||||
{0x280A, "es"}, /* Spanish (Peru) */ |
||||
{0x280C, "fr"}, /* ??? */ |
||||
{0x2C01, "ar"}, /* Arabic (Jordan) */ |
||||
{0x2C09, "en"}, /* English (Trinidad and Tobago) */ |
||||
{0x2C0A, "es"}, /* Spanish (Argentina) */ |
||||
{0x2C0C, "fr"}, /* ??? */ |
||||
{0x3001, "ar"}, /* Arabic (Lebanon) */ |
||||
{0x3009, "en"}, /* English (Zimbabwe) */ |
||||
{0x300A, "es"}, /* Spanish (Ecuador) */ |
||||
{0x300C, "fr"}, /* ??? */ |
||||
{0x3401, "ar"}, /* Arabic (Kuwait) */ |
||||
{0x3409, "en"}, /* English (Republic of the Philippines) */ |
||||
{0x340A, "es"}, /* Spanish (Chile) */ |
||||
{0x340C, "fr"}, /* ??? */ |
||||
{0x3801, "ar"}, /* Arabic (U.A.E.) */ |
||||
{0x380A, "es"}, /* Spanish (Uruguay) */ |
||||
{0x380C, "fr"}, /* ??? */ |
||||
{0x3C01, "ar"}, /* Arabic (Bahrain) */ |
||||
{0x3C09, "en"}, /* ??? */ |
||||
{0x3C0A, "es"}, /* Spanish (Paraguay) */ |
||||
{0x3C0C, "fr"}, /* ??? */ |
||||
{0x4001, "ar"}, /* Arabic (Qatar) */ |
||||
{0x4009, "en"}, /* English (India) */ |
||||
{0x400A, "es"}, /* Spanish (Bolivia) */ |
||||
{0x4409, "en"}, /* English (Malaysia) */ |
||||
{0x440A, "es"}, /* Spanish (El Salvador) */ |
||||
{0x4809, "en"}, /* English (Singapore) */ |
||||
{0x480A, "es"}, /* Spanish (Honduras) */ |
||||
{0x4C0A, "es"}, /* Spanish (Nicaragua) */ |
||||
{0x500A, "es"}, /* Spanish (Puerto Rico) */ |
||||
{0x540A, "es"}, /* Spanish (United States) */ |
||||
{0xE40A, "es"}, /* ??? */ |
||||
{0xE40C, "fr"}, /* ??? */ |
||||
}; |
||||
|
||||
static const hb_ot_language_map_t |
||||
hb_mac_language_map[] = |
||||
{ |
||||
{ 0, "en"}, /* English */ |
||||
{ 1, "fr"}, /* French */ |
||||
{ 2, "de"}, /* German */ |
||||
{ 3, "it"}, /* Italian */ |
||||
{ 4, "nl"}, /* Dutch */ |
||||
{ 5, "sv"}, /* Swedish */ |
||||
{ 6, "es"}, /* Spanish */ |
||||
{ 7, "da"}, /* Danish */ |
||||
{ 8, "pt"}, /* Portuguese */ |
||||
{ 9, "no"}, /* Norwegian */ |
||||
{ 10, "he"}, /* Hebrew */ |
||||
{ 11, "ja"}, /* Japanese */ |
||||
{ 12, "ar"}, /* Arabic */ |
||||
{ 13, "fi"}, /* Finnish */ |
||||
{ 14, "el"}, /* Greek */ |
||||
{ 15, "is"}, /* Icelandic */ |
||||
{ 16, "mt"}, /* Maltese */ |
||||
{ 17, "tr"}, /* Turkish */ |
||||
{ 18, "hr"}, /* Croatian */ |
||||
{ 19, "zh-tw"}, /* Chinese (Traditional) */ |
||||
{ 20, "ur"}, /* Urdu */ |
||||
{ 21, "hi"}, /* Hindi */ |
||||
{ 22, "th"}, /* Thai */ |
||||
{ 23, "ko"}, /* Korean */ |
||||
{ 24, "lt"}, /* Lithuanian */ |
||||
{ 25, "pl"}, /* Polish */ |
||||
{ 26, "hu"}, /* Hungarian */ |
||||
{ 27, "et"}, /* Estonian */ |
||||
{ 28, "lv"}, /* Latvian */ |
||||
//{ 29, ""}, /* Sami */
|
||||
{ 30, "fo"}, /* Faroese */ |
||||
{ 31, "fa"}, /* Farsi/Persian */ |
||||
{ 32, "ru"}, /* Russian */ |
||||
{ 33, "zh-cn"}, /* Chinese (Simplified) */ |
||||
{ 34, "nl"}, /* Flemish */ |
||||
{ 35, "ga"}, /* Irish Gaelic */ |
||||
{ 36, "sq"}, /* Albanian */ |
||||
{ 37, "ro"}, /* Romanian */ |
||||
{ 38, "cs"}, /* Czech */ |
||||
{ 39, "sk"}, /* Slovak */ |
||||
{ 40, "sl"}, /* Slovenian */ |
||||
{ 41, "yi"}, /* Yiddish */ |
||||
{ 42, "sr"}, /* Serbian */ |
||||
{ 43, "mk"}, /* Macedonian */ |
||||
{ 44, "bg"}, /* Bulgarian */ |
||||
{ 45, "uk"}, /* Ukrainian */ |
||||
{ 46, "be"}, /* Byelorussian */ |
||||
{ 47, "uz"}, /* Uzbek */ |
||||
{ 48, "kk"}, /* Kazakh */ |
||||
{ 49, "az"}, /* Azerbaijani (Cyrillic script) */ |
||||
{ 50, "az"}, /* Azerbaijani (Arabic script) */ |
||||
{ 51, "hy"}, /* Armenian */ |
||||
{ 52, "ka"}, /* Georgian */ |
||||
{ 53, "mo"}, /* Moldavian */ |
||||
{ 54, "ky"}, /* Kirghiz */ |
||||
{ 55, "tg"}, /* Tajiki */ |
||||
{ 56, "tk"}, /* Turkmen */ |
||||
{ 57, "mn"}, /* Mongolian (Mongolian script) */ |
||||
{ 58, "mn"}, /* Mongolian (Cyrillic script) */ |
||||
{ 59, "ps"}, /* Pashto */ |
||||
{ 60, "ku"}, /* Kurdish */ |
||||
{ 61, "ks"}, /* Kashmiri */ |
||||
{ 62, "sd"}, /* Sindhi */ |
||||
{ 63, "bo"}, /* Tibetan */ |
||||
{ 64, "ne"}, /* Nepali */ |
||||
{ 65, "sa"}, /* Sanskrit */ |
||||
{ 66, "mr"}, /* Marathi */ |
||||
{ 67, "bn"}, /* Bengali */ |
||||
{ 68, "as"}, /* Assamese */ |
||||
{ 69, "gu"}, /* Gujarati */ |
||||
{ 70, "pa"}, /* Punjabi */ |
||||
{ 71, "or"}, /* Oriya */ |
||||
{ 72, "ml"}, /* Malayalam */ |
||||
{ 73, "kn"}, /* Kannada */ |
||||
{ 74, "ta"}, /* Tamil */ |
||||
{ 75, "te"}, /* Telugu */ |
||||
{ 76, "si"}, /* Sinhalese */ |
||||
{ 77, "my"}, /* Burmese */ |
||||
{ 78, "km"}, /* Khmer */ |
||||
{ 79, "lo"}, /* Lao */ |
||||
{ 80, "vi"}, /* Vietnamese */ |
||||
{ 81, "id"}, /* Indonesian */ |
||||
{ 82, "tl"}, /* Tagalog */ |
||||
{ 83, "ms"}, /* Malay (Roman script) */ |
||||
{ 84, "ms"}, /* Malay (Arabic script) */ |
||||
{ 85, "am"}, /* Amharic */ |
||||
{ 86, "ti"}, /* Tigrinya */ |
||||
{ 87, "om"}, /* Galla */ |
||||
{ 88, "so"}, /* Somali */ |
||||
{ 89, "sw"}, /* Swahili */ |
||||
{ 90, "rw"}, /* Kinyarwanda/Ruanda */ |
||||
{ 91, "rn"}, /* Rundi */ |
||||
{ 92, "ny"}, /* Nyanja/Chewa */ |
||||
{ 93, "mg"}, /* Malagasy */ |
||||
{ 94, "eo"}, /* Esperanto */ |
||||
{128, "cy"}, /* Welsh */ |
||||
{129, "eu"}, /* Basque */ |
||||
{130, "ca"}, /* Catalan */ |
||||
{131, "la"}, /* Latin */ |
||||
{132, "qu"}, /* Quechua */ |
||||
{133, "gn"}, /* Guarani */ |
||||
{134, "ay"}, /* Aymara */ |
||||
{135, "tt"}, /* Tatar */ |
||||
{136, "ug"}, /* Uighur */ |
||||
{137, "dz"}, /* Dzongkha */ |
||||
{138, "jw"}, /* Javanese (Roman script) */ |
||||
{139, "su"}, /* Sundanese (Roman script) */ |
||||
{140, "gl"}, /* Galician */ |
||||
{141, "af"}, /* Afrikaans */ |
||||
{142, "br"}, /* Breton */ |
||||
{143, "iu"}, /* Inuktitut */ |
||||
{144, "gd"}, /* Scottish Gaelic */ |
||||
{145, "gv"}, /* Manx Gaelic */ |
||||
{146, "ga"}, /* Irish Gaelic (with dot above) */ |
||||
{147, "to"}, /* Tongan */ |
||||
{148, "el"}, /* Greek (polytonic) */ |
||||
{149, "ik"}, /* Greenlandic */ |
||||
{150, "az"}, /* Azerbaijani (Roman script) */ |
||||
}; |
||||
|
||||
|
||||
static hb_language_t |
||||
_hb_ot_name_language_for (unsigned int code, |
||||
const hb_ot_language_map_t *array, |
||||
unsigned int len) |
||||
{ |
||||
const hb_ot_language_map_t *entry = (const hb_ot_language_map_t *) |
||||
hb_bsearch (&code, |
||||
array, |
||||
len, |
||||
sizeof (array[0]), |
||||
hb_ot_language_map_t::cmp); |
||||
|
||||
if (entry) |
||||
return hb_language_from_string (entry->lang, -1); |
||||
|
||||
return HB_LANGUAGE_INVALID; |
||||
} |
||||
|
||||
hb_language_t |
||||
_hb_ot_name_language_for_ms_code (unsigned int code) |
||||
{ |
||||
return _hb_ot_name_language_for (code, |
||||
hb_ms_language_map, |
||||
ARRAY_LENGTH (hb_ms_language_map)); |
||||
} |
||||
|
||||
hb_language_t |
||||
_hb_ot_name_language_for_mac_code (unsigned int code) |
||||
{ |
||||
return _hb_ot_name_language_for (code, |
||||
hb_mac_language_map, |
||||
ARRAY_LENGTH (hb_mac_language_map)); |
||||
} |
@ -0,0 +1,40 @@ |
||||
/*
|
||||
* Copyright © 2018 Google, Inc. |
||||
* |
||||
* This is part of HarfBuzz, a text shaping library. |
||||
* |
||||
* Permission is hereby granted, without written agreement and without |
||||
* license or royalty fees, to use, copy, modify, and distribute this |
||||
* software and its documentation for any purpose, provided that the |
||||
* above copyright notice and the following two paragraphs appear in |
||||
* all copies of this software. |
||||
* |
||||
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
||||
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
||||
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
||||
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
||||
* DAMAGE. |
||||
* |
||||
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
||||
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
||||
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
||||
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
||||
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
||||
* |
||||
* Google Author(s): Behdad Esfahbod |
||||
*/ |
||||
|
||||
#ifndef HB_OT_NAME_LANGUAGE_HH |
||||
#define HB_OT_NAME_LANGUAGE_HH |
||||
|
||||
#include "hb.hh" |
||||
|
||||
|
||||
HB_INTERNAL hb_language_t |
||||
_hb_ot_name_language_for_ms_code (unsigned int code); |
||||
|
||||
HB_INTERNAL hb_language_t |
||||
_hb_ot_name_language_for_mac_code (unsigned int code); |
||||
|
||||
|
||||
#endif /* HB_OT_NAME_LANGUAGE_HH */ |
@ -0,0 +1,231 @@ |
||||
/*
|
||||
* Copyright © 2018 Google, Inc. |
||||
* |
||||
* This is part of HarfBuzz, a text shaping library. |
||||
* |
||||
* Permission is hereby granted, without written agreement and without |
||||
* license or royalty fees, to use, copy, modify, and distribute this |
||||
* software and its documentation for any purpose, provided that the |
||||
* above copyright notice and the following two paragraphs appear in |
||||
* all copies of this software. |
||||
* |
||||
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
||||
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
||||
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
||||
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
||||
* DAMAGE. |
||||
* |
||||
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
||||
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
||||
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
||||
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
||||
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
||||
* |
||||
* Google Author(s): Behdad Esfahbod |
||||
*/ |
||||
|
||||
#include "hb.hh" |
||||
|
||||
#include "hb-ot-name-table.hh" |
||||
|
||||
#include "hb-ot-face.hh" |
||||
#include "hb-utf.hh" |
||||
|
||||
|
||||
/**
|
||||
* SECTION:hb-ot-name |
||||
* @title: hb-ot-name |
||||
* @short_description: OpenType font name information |
||||
* @include: hb-ot.h |
||||
* |
||||
* Functions for fetching name strings from OpenType fonts. |
||||
**/ |
||||
|
||||
|
||||
static inline const OT::name_accelerator_t& |
||||
_get_name (hb_face_t *face) |
||||
{ |
||||
if (unlikely (!hb_ot_shaper_face_data_ensure (face))) return Null(OT::name_accelerator_t); |
||||
return *(hb_ot_face_data (face)->name.get ()); |
||||
} |
||||
|
||||
/**
|
||||
* hb_ot_name_list_names: |
||||
* @face: font face. |
||||
* @num_entries: (out): number of returned entries. |
||||
* |
||||
* Enumerates all available name IDs and language combinations. Returned |
||||
* array is owned by the @face and should not be modified. It can be |
||||
* used as long as @face is alive. |
||||
* |
||||
* Returns: (out) (transfer none) (array length=num_entries): Array of available name entries. |
||||
* Since: 2.1.0 |
||||
**/ |
||||
const hb_ot_name_entry_t * |
||||
hb_ot_name_list_names (hb_face_t *face, |
||||
unsigned int *num_entries /* OUT */) |
||||
{ |
||||
const OT::name_accelerator_t &name = _get_name (face); |
||||
*num_entries = name.names.len; |
||||
return name.names.arrayZ(); |
||||
} |
||||
|
||||
|
||||
template <typename in_utf_t, typename out_utf_t> |
||||
static inline unsigned int |
||||
hb_ot_name_convert_utf (const hb_bytes_t *bytes, |
||||
unsigned int *text_size /* IN/OUT */, |
||||
typename out_utf_t::codepoint_t *text /* OUT */) |
||||
{ |
||||
unsigned int src_len = bytes->len / sizeof (typename in_utf_t::codepoint_t); |
||||
const typename in_utf_t::codepoint_t *src = (const typename in_utf_t::codepoint_t *) bytes->arrayZ; |
||||
const typename in_utf_t::codepoint_t *src_end = src + src_len; |
||||
|
||||
typename out_utf_t::codepoint_t *dst = text; |
||||
|
||||
hb_codepoint_t unicode; |
||||
const hb_codepoint_t replacement = HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT; |
||||
|
||||
if (text_size && *text_size) |
||||
{ |
||||
(*text_size)--; /* Same room for NUL-termination. */ |
||||
const typename out_utf_t::codepoint_t *dst_end = text + *text_size; |
||||
|
||||
while (src < src_end && dst < dst_end) |
||||
{ |
||||
const typename in_utf_t::codepoint_t *src_next = in_utf_t::next (src, src_end, &unicode, replacement); |
||||
typename out_utf_t::codepoint_t *dst_next = out_utf_t::encode (dst, dst_end, unicode); |
||||
if (dst_next == dst) |
||||
break; /* Out-of-room. */ |
||||
|
||||
dst = dst_next; |
||||
src = src_next; |
||||
}; |
||||
|
||||
*text_size = dst - text; |
||||
*dst = 0; /* NUL-terminate. */ |
||||
} |
||||
|
||||
/* Accumulate length of rest. */ |
||||
unsigned int dst_len = dst - text; |
||||
while (src < src_end) |
||||
{ |
||||
src = in_utf_t::next (src, src_end, &unicode, replacement); |
||||
dst_len += out_utf_t::encode_len (unicode); |
||||
}; |
||||
return dst_len; |
||||
} |
||||
|
||||
template <typename utf_t> |
||||
static inline unsigned int |
||||
hb_ot_name_get_utf (hb_face_t *face, |
||||
hb_ot_name_id_t name_id, |
||||
hb_language_t language, |
||||
unsigned int *text_size /* IN/OUT */, |
||||
typename utf_t::codepoint_t *text /* OUT */) |
||||
{ |
||||
const OT::name_accelerator_t &name = _get_name (face); |
||||
|
||||
if (!language) |
||||
language = hb_language_from_string ("en", 2); |
||||
|
||||
unsigned int width; |
||||
int idx = name.get_index (name_id, language, &width); |
||||
if (idx != -1) |
||||
{ |
||||
hb_bytes_t bytes = name.get_name (idx); |
||||
|
||||
if (width == 2) /* UTF16-BE */ |
||||
return hb_ot_name_convert_utf<hb_utf16_be_t, utf_t> (&bytes, text_size, text); |
||||
|
||||
if (width == 1) /* ASCII */ |
||||
return hb_ot_name_convert_utf<hb_ascii_t, utf_t> (&bytes, text_size, text); |
||||
} |
||||
|
||||
if (text_size) |
||||
{ |
||||
if (*text_size) |
||||
*text = 0; |
||||
*text_size = 0; |
||||
} |
||||
return 0; |
||||
} |
||||
|
||||
/**
|
||||
* hb_ot_name_get_utf8: |
||||
* @face: font face. |
||||
* @name_id: OpenType name identifier to fetch. |
||||
* @language: language to fetch the name for. |
||||
* @text_size: (inout) (allow-none): input size of @text buffer, and output size of |
||||
* text written to buffer. |
||||
* @text: (out caller-allocates) (array length=text_size): buffer to write fetched name into. |
||||
* |
||||
* Fetches a font name from the OpenType 'name' table. |
||||
* If @language is #HB_LANGUAGE_INVALID, English ("en") is assumed. |
||||
* Returns string in UTF-8 encoding. |
||||
* |
||||
* Returns: full length of the requested string, or 0 if not found. |
||||
* Since: 2.1.0 |
||||
**/ |
||||
unsigned int |
||||
hb_ot_name_get_utf8 (hb_face_t *face, |
||||
hb_ot_name_id_t name_id, |
||||
hb_language_t language, |
||||
unsigned int *text_size /* IN/OUT */, |
||||
char *text /* OUT */) |
||||
{ |
||||
return hb_ot_name_get_utf<hb_utf8_t> (face, name_id, language, text_size, |
||||
(hb_utf8_t::codepoint_t *) text); |
||||
} |
||||
|
||||
/**
|
||||
* hb_ot_name_get_utf16: |
||||
* @face: font face. |
||||
* @name_id: OpenType name identifier to fetch. |
||||
* @language: language to fetch the name for. |
||||
* @text_size: (inout) (allow-none): input size of @text buffer, and output size of |
||||
* text written to buffer. |
||||
* @text: (out caller-allocates) (array length=text_size): buffer to write fetched name into. |
||||
* |
||||
* Fetches a font name from the OpenType 'name' table. |
||||
* If @language is #HB_LANGUAGE_INVALID, English ("en") is assumed. |
||||
* Returns string in UTF-16 encoding. |
||||
* |
||||
* Returns: full length of the requested string, or 0 if not found. |
||||
* Since: 2.1.0 |
||||
**/ |
||||
unsigned int |
||||
hb_ot_name_get_utf16 (hb_face_t *face, |
||||
hb_ot_name_id_t name_id, |
||||
hb_language_t language, |
||||
unsigned int *text_size /* IN/OUT */, |
||||
uint16_t *text /* OUT */) |
||||
{ |
||||
return hb_ot_name_get_utf<hb_utf16_t> (face, name_id, language, text_size, text); |
||||
} |
||||
|
||||
/**
|
||||
* hb_ot_name_get_utf32: |
||||
* @face: font face. |
||||
* @name_id: OpenType name identifier to fetch. |
||||
* @language: language to fetch the name for. |
||||
* @text_size: (inout) (allow-none): input size of @text buffer, and output size of |
||||
* text written to buffer. |
||||
* @text: (out caller-allocates) (array length=text_size): buffer to write fetched name into. |
||||
* |
||||
* Fetches a font name from the OpenType 'name' table. |
||||
* If @language is #HB_LANGUAGE_INVALID, English ("en") is assumed. |
||||
* Returns string in UTF-32 encoding. |
||||
* |
||||
* Returns: full length of the requested string, or 0 if not found. |
||||
* Since: 2.1.0 |
||||
**/ |
||||
unsigned int |
||||
hb_ot_name_get_utf32 (hb_face_t *face, |
||||
hb_ot_name_id_t name_id, |
||||
hb_language_t language, |
||||
unsigned int *text_size /* IN/OUT */, |
||||
uint32_t *text /* OUT */) |
||||
{ |
||||
return hb_ot_name_get_utf<hb_utf32_t> (face, name_id, language, text_size, text); |
||||
} |
@ -1,78 +0,0 @@ |
||||
/*
|
||||
* Copyright © 2009 Red Hat, Inc. |
||||
* |
||||
* This is part of HarfBuzz, a text shaping library. |
||||
* |
||||
* Permission is hereby granted, without written agreement and without |
||||
* license or royalty fees, to use, copy, modify, and distribute this |
||||
* software and its documentation for any purpose, provided that the |
||||
* above copyright notice and the following two paragraphs appear in |
||||
* all copies of this software. |
||||
* |
||||
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
||||
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
||||
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
||||
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
||||
* DAMAGE. |
||||
* |
||||
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
||||
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
||||
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
||||
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
||||
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
||||
* |
||||
* Red Hat Author(s): Behdad Esfahbod |
||||
*/ |
||||
|
||||
#ifndef HB_OT_H_IN |
||||
#error "Include <hb-ot.h> instead." |
||||
#endif |
||||
|
||||
#ifndef HB_OT_TAG_H |
||||
#define HB_OT_TAG_H |
||||
|
||||
#include "hb.h" |
||||
|
||||
HB_BEGIN_DECLS |
||||
|
||||
|
||||
#define HB_OT_TAG_DEFAULT_SCRIPT HB_TAG ('D', 'F', 'L', 'T') |
||||
#define HB_OT_TAG_DEFAULT_LANGUAGE HB_TAG ('d', 'f', 'l', 't') |
||||
|
||||
/**
|
||||
* HB_OT_MAX_TAGS_PER_SCRIPT: |
||||
* |
||||
* Since: 2.0.0 |
||||
**/ |
||||
#define HB_OT_MAX_TAGS_PER_SCRIPT 3u |
||||
/**
|
||||
* HB_OT_MAX_TAGS_PER_LANGUAGE: |
||||
* |
||||
* Since: 2.0.0 |
||||
**/ |
||||
#define HB_OT_MAX_TAGS_PER_LANGUAGE 3u |
||||
|
||||
HB_EXTERN void |
||||
hb_ot_tags_from_script_and_language (hb_script_t script, |
||||
hb_language_t language, |
||||
unsigned int *script_count /* IN/OUT */, |
||||
hb_tag_t *script_tags /* OUT */, |
||||
unsigned int *language_count /* IN/OUT */, |
||||
hb_tag_t *language_tags /* OUT */); |
||||
|
||||
HB_EXTERN hb_script_t |
||||
hb_ot_tag_to_script (hb_tag_t tag); |
||||
|
||||
HB_EXTERN hb_language_t |
||||
hb_ot_tag_to_language (hb_tag_t tag); |
||||
|
||||
HB_EXTERN void |
||||
hb_ot_tags_to_script_and_language (hb_tag_t script_tag, |
||||
hb_tag_t language_tag, |
||||
hb_script_t *script /* OUT */, |
||||
hb_language_t *language /* OUT */); |
||||
|
||||
|
||||
HB_END_DECLS |
||||
|
||||
#endif /* HB_OT_TAG_H */ |
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue