From 8708b9e081192786c027bb7f5f23d76dbe5c19e8 Mon Sep 17 00:00:00 2001
From: Behdad Esfahbod <behdad@behdad.org>
Date: Mon, 6 Feb 2023 14:51:25 -0700
Subject: [PATCH] [GPOS] Avoid O(n^2) behavior in mark-attachment

Upstream-Status: Backport from [https://github.com/harfbuzz/harfbuzz/commit/8708b9e081192786c027bb7f5f23d76dbe5c19e8]
Comment1: The Original Patch [https://github.com/harfbuzz/harfbuzz/commit/85be877925ddbf34f74a1229f3ca1716bb6170dc] causes regression and was reverted. This Patch completes the fix.
Comment2: The Patch contained files MarkBasePosFormat1.hh and MarkLigPosFormat1.hh which were moved from hb-ot-layout-gpos-table.hh as per https://github.com/harfbuzz/harfbuzz/commit/197d9a5c994eb41c8c89b7b958b26b1eacfeeb00
CVE: CVE-2023-25193
Signed-off-by: Siddharth Doshi <sdoshi@mvista.com>
---
 src/hb-ot-layout-gpos-table.hh | 98 ++++++++++++++++++++++------------
 src/hb-ot-layout-gsubgpos.hh   |  5 +-
 2 files changed, 68 insertions(+), 35 deletions(-)

diff --git a/src/hb-ot-layout-gpos-table.hh b/src/hb-ot-layout-gpos-table.hh
index 2f9186a..46b09d0 100644
--- a/src/hb-ot-layout-gpos-table.hh
+++ b/src/hb-ot-layout-gpos-table.hh
@@ -2150,6 +2150,25 @@ struct MarkBasePosFormat1
 
   const Coverage &get_coverage () const { return this+markCoverage; }
 
+  static inline bool accept (hb_buffer_t *buffer, unsigned idx)
+  {
+    /* We only want to attach to the first of a MultipleSubst sequence.
+     * https://github.com/harfbuzz/harfbuzz/issues/740
+     * Reject others...
+     * ...but stop if we find a mark in the MultipleSubst sequence:
+     * https://github.com/harfbuzz/harfbuzz/issues/1020 */
+    return !_hb_glyph_info_multiplied (&buffer->info[idx]) ||
+	   0 == _hb_glyph_info_get_lig_comp (&buffer->info[idx]) ||
+	   (idx == 0 ||
+	    _hb_glyph_info_is_mark (&buffer->info[idx - 1]) ||
+	    !_hb_glyph_info_multiplied (&buffer->info[idx - 1]) ||
+	    _hb_glyph_info_get_lig_id (&buffer->info[idx]) !=
+	    _hb_glyph_info_get_lig_id (&buffer->info[idx - 1]) ||
+	    _hb_glyph_info_get_lig_comp (&buffer->info[idx]) !=
+	    _hb_glyph_info_get_lig_comp (&buffer->info[idx - 1]) + 1
+	    );
+  }
+
   bool apply (hb_ot_apply_context_t *c) const
   {
     TRACE_APPLY (this);
@@ -2157,47 +2176,46 @@ struct MarkBasePosFormat1
     unsigned int mark_index = (this+markCoverage).get_coverage  (buffer->cur().codepoint);
     if (likely (mark_index == NOT_COVERED)) return_trace (false);
 
-    /* Now we search backwards for a non-mark glyph */
+    /* Now we search backwards for a non-mark glyph.
+     * We don't use skippy_iter.prev() to avoid O(n^2) behavior. */
+
     hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input;
-    skippy_iter.reset (buffer->idx, 1);
     skippy_iter.set_lookup_props (LookupFlag::IgnoreMarks);
-    do {
-      unsigned unsafe_from;
-      if (!skippy_iter.prev (&unsafe_from))
+    unsigned j;
+    for (j = buffer->idx; j > c->last_base_until; j--)
+    {
+      auto match = skippy_iter.match (buffer->info[j - 1]);
+      if (match == skippy_iter.MATCH)
       {
-	buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx + 1);
-	return_trace (false);
+       if (!accept (buffer, j - 1))
+         match = skippy_iter.SKIP;
       }
+      if (match == skippy_iter.MATCH)
+      {
+       c->last_base = (signed) j - 1;
+       break;
+      }
+    }
+    c->last_base_until = buffer->idx;
+    if (c->last_base == -1)
+    {
+      buffer->unsafe_to_concat_from_outbuffer (0, buffer->idx + 1);
+      return_trace (false);
+    }
 
-      /* We only want to attach to the first of a MultipleSubst sequence.
-       * https://github.com/harfbuzz/harfbuzz/issues/740
-       * Reject others...
-       * ...but stop if we find a mark in the MultipleSubst sequence:
-       * https://github.com/harfbuzz/harfbuzz/issues/1020 */
-      if (!_hb_glyph_info_multiplied (&buffer->info[skippy_iter.idx]) ||
-	  0 == _hb_glyph_info_get_lig_comp (&buffer->info[skippy_iter.idx]) ||
-	  (skippy_iter.idx == 0 ||
-	   _hb_glyph_info_is_mark (&buffer->info[skippy_iter.idx - 1]) ||
-	   _hb_glyph_info_get_lig_id (&buffer->info[skippy_iter.idx]) !=
-	   _hb_glyph_info_get_lig_id (&buffer->info[skippy_iter.idx - 1]) ||
-	   _hb_glyph_info_get_lig_comp (&buffer->info[skippy_iter.idx]) !=
-	   _hb_glyph_info_get_lig_comp (&buffer->info[skippy_iter.idx - 1]) + 1
-	   ))
-	break;
-      skippy_iter.reject ();
-    } while (true);
+    unsigned idx = (unsigned) c->last_base;
 
     /* Checking that matched glyph is actually a base glyph by GDEF is too strong; disabled */
-    //if (!_hb_glyph_info_is_base_glyph (&buffer->info[skippy_iter.idx])) { return_trace (false); }
+    //if (!_hb_glyph_info_is_base_glyph (&buffer->info[idx])) { return_trace (false); }
 
-    unsigned int base_index = (this+baseCoverage).get_coverage  (buffer->info[skippy_iter.idx].codepoint);
+    unsigned int base_index = (this+baseCoverage).get_coverage  (buffer->info[idx].codepoint);
     if (base_index == NOT_COVERED)
     {
-      buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1);
+      buffer->unsafe_to_concat_from_outbuffer (idx, buffer->idx + 1);
       return_trace (false);
     }
 
-    return_trace ((this+markArray).apply (c, mark_index, base_index, this+baseArray, classCount, skippy_iter.idx));
+    return_trace ((this+markArray).apply (c, mark_index, base_index, this+baseArray, classCount, idx));
   }
 
   bool subset (hb_subset_context_t *c) const
@@ -2423,20 +2441,32 @@ struct MarkLigPosFormat1
     if (likely (mark_index == NOT_COVERED)) return_trace (false);
 
     /* Now we search backwards for a non-mark glyph */
+
     hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input;
-    skippy_iter.reset (buffer->idx, 1);
     skippy_iter.set_lookup_props (LookupFlag::IgnoreMarks);
-    unsigned unsafe_from;
-    if (!skippy_iter.prev (&unsafe_from))
+
+    unsigned j;
+    for (j = buffer->idx; j > c->last_base_until; j--)
     {
-      buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx + 1);
+      auto match = skippy_iter.match (buffer->info[j - 1]);
+      if (match == skippy_iter.MATCH)
+      {
+	c->last_base = (signed) j - 1;
+	break;
+      }
+    }
+    c->last_base_until = buffer->idx;
+    if (c->last_base == -1)
+    {
+      buffer->unsafe_to_concat_from_outbuffer (0, buffer->idx + 1);
       return_trace (false);
     }
 
+    j = (unsigned) c->last_base;
+
     /* Checking that matched glyph is actually a ligature by GDEF is too strong; disabled */
-    //if (!_hb_glyph_info_is_ligature (&buffer->info[skippy_iter.idx])) { return_trace (false); }
+    //if (!_hb_glyph_info_is_ligature (&buffer->info[j])) { return_trace (false); }
 
-    unsigned int j = skippy_iter.idx;
     unsigned int lig_index = (this+ligatureCoverage).get_coverage  (buffer->info[j].codepoint);
     if (lig_index == NOT_COVERED)
     {
diff --git a/src/hb-ot-layout-gsubgpos.hh b/src/hb-ot-layout-gsubgpos.hh
index 65de131..d9a068c 100644
--- a/src/hb-ot-layout-gsubgpos.hh
+++ b/src/hb-ot-layout-gsubgpos.hh
@@ -641,6 +641,9 @@ struct hb_ot_apply_context_t :
   uint32_t random_state;
 
 
+  signed last_base = -1; // GPOS uses
+  unsigned last_base_until = 0; // GPOS uses
+
   hb_ot_apply_context_t (unsigned int table_index_,
 			 hb_font_t *font_,
 			 hb_buffer_t *buffer_) :
@@ -673,7 +676,7 @@ struct hb_ot_apply_context_t :
     iter_context.init (this, true);
   }
 
-  void set_lookup_mask (hb_mask_t mask) { lookup_mask = mask; init_iters (); }
+  void set_lookup_mask (hb_mask_t mask) { lookup_mask = mask; last_base = -1; last_base_until = 0; init_iters (); }
   void set_auto_zwj (bool auto_zwj_) { auto_zwj = auto_zwj_; init_iters (); }
   void set_auto_zwnj (bool auto_zwnj_) { auto_zwnj = auto_zwnj_; init_iters (); }
   void set_random (bool random_) { random = random_; }
-- 
2.25.1

