From 7f362196c5b7ed2856e310f41999092e07d82281 Mon Sep 17 00:00:00 2001
From: Khaled Hosny <khaled@aliftype.com>
Date: Sat, 18 Jun 2022 20:28:43 +0200
Subject: [PATCH] [arabic] Split ligature array

Generate marks and 3-component ligatures in separate arrays. The new
arrays are unused currently.
---
 src/gen-arabic-table.py          | 102 +++++++++++++++++++++++++------
 src/hb-ot-shaper-arabic-table.hh |  42 ++++++++++---
 2 files changed, 117 insertions(+), 27 deletions(-)

diff --git a/src/gen-arabic-table.py b/src/gen-arabic-table.py
index 7bb49693b..0be00faa7 100755
--- a/src/gen-arabic-table.py
+++ b/src/gen-arabic-table.py
@@ -195,7 +195,7 @@ def print_shaping_table(f):
 				items = items[:0:-1]
 				shape = None
 			# We only care about a subset of ligatures
-			if c not in LIGATURES or len (items) != 2:
+			if c not in LIGATURES:
 				continue
 
 			# Save ligature
@@ -231,24 +231,45 @@ def print_shaping_table(f):
 	print ("#define SHAPING_TABLE_LAST	0x%04Xu" % max_u)
 	print ()
 
-	ligas = {}
-	for pair in ligatures.keys ():
-		for shape in ligatures[pair]:
-			c = ligatures[pair][shape]
-			if shape is None:
-				liga = pair
-			elif shape == 'isolated':
-				liga = (shapes[pair[0]]['initial'], shapes[pair[1]]['final'])
-			elif shape == 'final':
-				liga = (shapes[pair[0]]['medial'], shapes[pair[1]]['final'])
-			elif shape == 'initial':
-				liga = (shapes[pair[0]]['initial'], shapes[pair[1]]['medial'])
+	ligas_2 = {}
+	ligas_3 = {}
+	ligas_mark_2 = {}
+	for key in ligatures.keys ():
+		for shape in ligatures[key]:
+			c = ligatures[key][shape]
+			if len(key) == 3:
+				if shape == 'isolated':
+					liga = (shapes[key[0]]['initial'], shapes[key[1]]['medial'], shapes[key[2]]['final'])
+				elif shape == 'final':
+					liga = (shapes[key[0]]['medial'], shapes[key[1]]['medial'], shapes[key[2]]['final'])
+				elif shape == 'initial':
+					liga = (shapes[key[0]]['initial'], shapes[key[1]]['medial'], shapes[key[2]]['medial'])
+				else:
+					raise Exception ("Unexpected shape", shape)
+				if liga[0] not in ligas_3:
+					ligas_3[liga[0]] = []
+				ligas_3[liga[0]].append ((liga[1], liga[2], c))
+			elif len(key) == 2:
+				if shape is None:
+					liga = key
+					if liga[0] not in ligas_mark_2:
+						ligas_mark_2[liga[0]] = []
+					ligas_mark_2[liga[0]].append ((liga[1], c))
+					continue
+				elif shape == 'isolated':
+					liga = (shapes[key[0]]['initial'], shapes[key[1]]['final'])
+				elif shape == 'final':
+					liga = (shapes[key[0]]['medial'], shapes[key[1]]['final'])
+				elif shape == 'initial':
+					liga = (shapes[key[0]]['initial'], shapes[key[1]]['medial'])
+				else:
+					raise Exception ("Unexpected shape", shape)
+				if liga[0] not in ligas_2:
+					ligas_2[liga[0]] = []
+				ligas_2[liga[0]].append ((liga[1], c))
 			else:
-				raise Exception ("Unexpected shape", shape)
-			if liga[0] not in ligas:
-				ligas[liga[0]] = []
-			ligas[liga[0]].append ((liga[1], c))
-	max_i = max (len (ligas[l]) for l in ligas)
+				raise Exception ("Unexpected number of ligature components", key)
+	max_i = max (len (ligas_2[l]) for l in ligas_2)
 	print ()
 	print ("static const struct ligature_set_t {")
 	print (" uint16_t first;")
@@ -258,16 +279,57 @@ def print_shaping_table(f):
 	print (" } ligatures[%d];" % max_i)
 	print ("} ligature_table[] =")
 	print ("{")
-	for first in sorted (ligas.keys ()):
+	for first in sorted (ligas_2.keys ()):
+
+		print ("  { 0x%04Xu, {" % (first))
+		for liga in ligas_2[first]:
+			print ("    { 0x%04Xu, 0x%04Xu }, /* %s */" % (liga[0], liga[1], names[liga[1]]))
+		print ("  }},")
+
+	print ("};")
+	print ()
+
+	max_i = max (len (ligas_mark_2[l]) for l in ligas_mark_2)
+	print ()
+	print ("static const struct ligature_mark_set_t {")
+	print (" uint16_t first;")
+	print (" struct ligature_pairs_t {")
+	print ("   uint16_t second;")
+	print ("   uint16_t ligature;")
+	print (" } ligatures[%d];" % max_i)
+	print ("} ligature_mark_table[] =")
+	print ("{")
+	for first in sorted (ligas_mark_2.keys ()):
 
 		print ("  { 0x%04Xu, {" % (first))
-		for liga in ligas[first]:
+		for liga in ligas_mark_2[first]:
 			print ("    { 0x%04Xu, 0x%04Xu }, /* %s */" % (liga[0], liga[1], names[liga[1]]))
 		print ("  }},")
 
 	print ("};")
 	print ()
 
+	max_i = max (len (ligas_3[l]) for l in ligas_3)
+	print ()
+	print ("static const struct ligature_3_set_t {")
+	print (" uint16_t first;")
+	print (" struct ligature_triplets_t {")
+	print ("   uint16_t second;")
+	print ("   uint16_t third;")
+	print ("   uint16_t ligature;")
+	print (" } ligatures[%d];" % max_i)
+	print ("} ligature_3_table[] =")
+	print ("{")
+	for first in sorted (ligas_3.keys ()):
+
+		print ("  { 0x%04Xu, {" % (first))
+		for liga in ligas_3[first]:
+			print ("    { 0x%04Xu, 0x%04Xu, 0x%04Xu}, /* %s */" % (liga[0], liga[1], liga[2], names[liga[2]]))
+		print ("  }},")
+
+	print ("};")
+	print ()
+
 
 
 print ("/* == Start of generated table == */")
diff --git a/src/hb-ot-shaper-arabic-table.hh b/src/hb-ot-shaper-arabic-table.hh
index b0b65e0a3..a817e4123 100644
--- a/src/hb-ot-shaper-arabic-table.hh
+++ b/src/hb-ot-shaper-arabic-table.hh
@@ -421,13 +421,6 @@ static const struct ligature_set_t {
  } ligatures[14];
 } ligature_table[] =
 {
-  { 0x0651u, {
-    { 0x064Cu, 0xFC5Eu }, /* ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM */
-    { 0x064Eu, 0xFC60u }, /* ARABIC LIGATURE SHADDA WITH FATHA ISOLATED FORM */
-    { 0x064Fu, 0xFC61u }, /* ARABIC LIGATURE SHADDA WITH DAMMA ISOLATED FORM */
-    { 0x0650u, 0xFC62u }, /* ARABIC LIGATURE SHADDA WITH KASRA ISOLATED FORM */
-    { 0x064Bu, 0xF2EEu }, /* PUA ARABIC LIGATURE SHADDA WITH FATHATAN ISOLATED FORM */
-  }},
   { 0xFE91u, {
     { 0xFEE2u, 0xFC08u }, /* ARABIC LIGATURE BEH WITH MEEM ISOLATED FORM */
     { 0xFEE4u, 0xFC9Fu }, /* ARABIC LIGATURE BEH WITH MEEM INITIAL FORM */
@@ -524,6 +517,41 @@ static const struct ligature_set_t {
 };
 
 
+static const struct ligature_mark_set_t {
+ uint16_t first;
+ struct ligature_pairs_t {
+   uint16_t second;
+   uint16_t ligature;
+ } ligatures[5];
+} ligature_mark_table[] =
+{
+  { 0x0651u, {
+    { 0x064Cu, 0xFC5Eu }, /* ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM */
+    { 0x064Eu, 0xFC60u }, /* ARABIC LIGATURE SHADDA WITH FATHA ISOLATED FORM */
+    { 0x064Fu, 0xFC61u }, /* ARABIC LIGATURE SHADDA WITH DAMMA ISOLATED FORM */
+    { 0x0650u, 0xFC62u }, /* ARABIC LIGATURE SHADDA WITH KASRA ISOLATED FORM */
+    { 0x064Bu, 0xF2EEu }, /* PUA ARABIC LIGATURE SHADDA WITH FATHATAN ISOLATED FORM */
+  }},
+};
+
+
+static const struct ligature_3_set_t {
+ uint16_t first;
+ struct ligature_triplets_t {
+   uint16_t second;
+   uint16_t third;
+   uint16_t ligature;
+ } ligatures[3];
+} ligature_3_table[] =
+{
+  { 0xFEDFu, {
+    { 0xFEE4u, 0xFEA4u, 0xFD88u}, /* ARABIC LIGATURE LAM WITH MEEM WITH HAH INITIAL FORM */
+    { 0xFEE0u, 0xFEEAu, 0xF201u}, /* PUA ARABIC LIGATURE LELLAH ISOLATED FORM */
+    { 0xFEE4u, 0xFEA0u, 0xF211u}, /* PUA ARABIC LIGATURE LAM WITH MEEM WITH JEEM INITIAL FORM */
+  }},
+};
+
+
 #endif /* HB_OT_SHAPER_ARABIC_TABLE_HH */
 
 /* == End of generated table == */