Googletest export

Breaks Windows builds PiperOrigin-RevId: 210434120
7 years ago · 52f8183e7f
parent 167c5e8188
commit 52f8183e7f
5 changed files with 265 additions and 580 deletions
--- a/googletest/include/gtest/internal/gtest-internal.h
+++ b/googletest/include/gtest/internal/gtest-internal.h
@ -159,15 +159,15 @@ GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4275

 #endif  // GTEST_HAS_EXCEPTIONS

+namespace edit_distance {
 // Returns the optimal edits to go from 'left' to 'right'.
 // All edits cost the same, with replace having lower priority than
-// add/remove. Returns an approximation of the maximum memory used in
-// 'memory_usage' if non-null.
-// Uses a Dijkstra search, with a couple of simple bells and whistles added on.
-enum EditType { kEditMatch, kEditAdd, kEditRemove, kEditReplace };
+// add/remove.
+// Simple implementation of the Wagner-Fischer algorithm.
+// See http://en.wikipedia.org/wiki/Wagner-Fischer_algorithm
+enum EditType { kMatch, kAdd, kRemove, kReplace };
 GTEST_API_ std::vector<EditType> CalculateOptimalEdits(
-    const std::vector<size_t>& left, const std::vector<size_t>& right,
-    size_t* memory_usage = NULL);
+    const std::vector<size_t>& left, const std::vector<size_t>& right);

 // Same as above, but the input is represented as strings.
 GTEST_API_ std::vector<EditType> CalculateOptimalEdits(
@ -179,6 +179,8 @@ GTEST_API_ std::string CreateUnifiedDiff(const std::vector<std::string>& left,
                                         const std::vector<std::string>& right,
                                         size_t context = 2);

+}  // namespace edit_distance
+
 // Calculate the diff between 'left' and 'right' and return it in unified diff
 // format.
 // If not null, stores in 'total_line_count' the total number of lines found
--- a/googletest/src/gtest-all.cc
+++ b/googletest/src/gtest-all.cc
@ -38,11 +38,10 @@
 #include "gtest/gtest.h"

 // The following lines pull in the real gtest *.cc files.
+#include "src/gtest.cc"
 #include "src/gtest-death-test.cc"
-#include "src/gtest-edit-distance.cc"
 #include "src/gtest-filepath.cc"
 #include "src/gtest-port.cc"
 #include "src/gtest-printers.cc"
 #include "src/gtest-test-part.cc"
 #include "src/gtest-typed-test.cc"
-#include "src/gtest.cc"
--- a/googletest/src/gtest-edit-distance.cc
+++ b/googletest/src/gtest-edit-distance.cc
@ -1,507 +0,0 @@
-// Copyright 2018, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-//
-// Internal helper functions for finding optimal edit transformations
-// between strings.
-
-#include "gtest/gtest.h"
-
-#include <functional>
-#include <list>
-#include <ostream>  // NOLINT
-#include <queue>
-#include <vector>
-
-namespace testing {
-namespace internal {
-namespace {
-
-// The following implement data structures and code for a Dijkstra-search
-// based implementation of optimal edit distance.
-
-// Posible states a node can be in. Either a node is unsettled (it hasn't been
-// drawn from the priority queue yet), or it is settled and a back-link to its
-// parent node is fixed.
-enum EditSearchState {
-  kUnsettled,
-  kMatchParent,
-  kAddParent,
-  kRemoveParent,
-  kReplaceParent
-};
-
-// Custom container for search states. This is smaller and faster than a hash
-// map, because the used states are dense along diagonals.
-// Specifically, each state requires only 1 byte, whereas a hash_map would
-// require storing the key, which would come to at least 8 bytes. std::map has
-// an extra 32 bytes per node (3 pointers + 1 byte, padded), so even though
-// there are circumstances where this class can have kBlockSize overhead per
-// state, on average it does better than 40 bytes of overhead per state.
-// In addition, in unopt builds (the usual way tests are run) the fewer
-// allocations + better locality has this method running 10-50x faster than
-// std::map for inputs that are large enough to measure.
-class EditSearchMap {
- public:
-  EditSearchMap(size_t left_size, size_t right_size)
-      : left_size_(left_size), right_size_(right_size) {
-    GTEST_CHECK_(left_size_ == left_size && right_size_ == right_size)
-        << "Overflow in size: Arguments too large";
-  }
-
-  // Gets a mutable reference to a state - this is actually of type
-  // EditSearchState - inserting if it does not exist.
-  unsigned char& insert(UInt32 left, UInt32 right) {
-    std::vector<UInt32>* vec;
-    size_t index1;
-    size_t index2;
-    if (left > right) {
-      vec = &left_nodes_;
-      index1 = left - right - 1;
-      index2 = right;
-    } else {
-      vec = &right_nodes_;
-      index1 = right - left;
-      index2 = left;
-    }
-    if (vec->size() <= index1) {
-      GTEST_CHECK_(vec->size() == index1)
-          << "Array diagonals should only grow by one " << vec->size() << " vs "
-          << index1;
-      vec->push_back(block_indices_.size());
-      // Round up
-      block_indices_.resize(
-          block_indices_.size() +
-              (DiagonalLength(left, right) + kBlockSize - 1) / kBlockSize,
-          kUnallocatedBlock);
-    }
-    const size_t bucket = index2 / kBlockSize;
-    const size_t pos_in_bucket = index2 % kBlockSize;
-    UInt32& level2 = block_indices_[(*vec)[index1] + bucket];
-    if (level2 == kUnallocatedBlock) {
-      level2 = nodes_.size();
-      size_t diagonal_length = DiagonalLength(left, right);
-      GTEST_CHECK_(diagonal_length > index2)
-          << diagonal_length << " " << index2;
-      size_t block_size = kBlockSize;
-      if (diagonal_length / kBlockSize == bucket) {
-        // We can never get here if diagonal_length is a multiple of
-        // kBlockSize, which is what we want, since this would evaluate to 0.
-        block_size = diagonal_length % kBlockSize;
-      }
-      nodes_.resize(nodes_.size() + block_size);
-    }
-    return nodes_[level2 + pos_in_bucket];
-  }
-
-  size_t MemoryUsage() const {
-    return nodes_.capacity() +
-           sizeof(UInt32) * (left_nodes_.capacity() + right_nodes_.capacity() +
-                             block_indices_.capacity());
-  }
-
- private:
-  enum { kBlockSize = 1024, kUnallocatedBlock = 0xFFFFFFFFul };
-
-  size_t DiagonalLength(UInt32 left, UInt32 right) const {
-    return std::min(left_size_ - left, right_size_ - right) +
-           (left < right ? left : right);
-  }
-
-  // The state space is conceptually a left_size_ by right_size_ sparse matrix
-  // of EditSearchStates. However, due to the access pattern of the search, it
-  // is much better to store the nodes per diagonal rather than per row.
-  UInt32 left_size_;
-  UInt32 right_size_;
-  // The nodes are stored by diagonals, split in two: Those to the left of the
-  // main diagonal are in left_nodes_, and everything else is in right_nodes_.
-  // The values are indices into block_indices_.
-  std::vector<UInt32> left_nodes_;
-  std::vector<UInt32> right_nodes_;
-  // Every entry here is an offset into the beginning of a kBlockSize-sized
-  // block in nodes_. An entire diagonal is allocated together here; for a
-  // diagonal of length <= kBlockSize, that's just a single entry, but for
-  // longer diagonals multiple contiguous index entries will be reserved at
-  // once. Unused entries will be assigned kUnallocatedBlock; this
-  // double-indirect scheme is used to save memory in the cases when an entire
-  // diagonal isn't needed.
-  std::vector<UInt32> block_indices_;
-  // This stores the actual EditSearchState data, pointed to by block_indices_.
-  std::vector<unsigned char> nodes_;
-};
-
-struct EditHeapEntry {
-  EditHeapEntry(UInt32 l, UInt32 r, UInt64 c, EditSearchState s)
-      : left(l), right(r), cost(c), state(s) {}
-
-  UInt32 left;
-  UInt32 right;
-  UInt64 cost : 61;
-  // The state that the node will get when this entry is settled. Therefore,
-  // this can never be kUnsettled.
-  UInt64 state : 3;
-
-  bool operator>(const EditHeapEntry& other) const { return cost > other.cost; }
-};
-
-// Need a min-queue, so invert the comparator.
-typedef std::priority_queue<EditHeapEntry, std::vector<EditHeapEntry>,
-                            std::greater<EditHeapEntry>>
-    EditHeap;
-
-}  // namespace
-
-std::vector<EditType> CalculateOptimalEdits(const std::vector<size_t>& left,
-                                            const std::vector<size_t>& right,
-                                            size_t* memory_usage) {
-  const UInt64 kBaseCost = 100000;
-  // We make replace a little more expensive than add/remove to lower
-  // their priority.
-  const UInt64 kReplaceCost = 100001;
-  // In the common case where the vectors are the same (or almost the same)
-  // size, we know that an add will have to be followed by some later remove
-  // (or vice versa) in order to get the lengths to balance. We "borrow" some
-  // of the cost of the later operation and bring it forward into the earlier
-  // operation, to increase the cost of exploring (usually fruitlessly) around
-  // the beginning of the graph.
-  // However, there is a trade-off: This cheapens the cost of exploring around
-  // the beginning of the graph (in one direction) when the vectors are
-  // unequal in length. So we don't steal *all* the cost.
-  // You can view this as a form of A*, using an admissable heuristic that has
-  // been re-cast as a cost function that can be used in Dijkstra.
-  const UInt64 kTowardsGoalCost = 50003;
-  const UInt64 kAwayFromGoalCost = 2 * kBaseCost - kTowardsGoalCost;
-
-  EditSearchMap node_map(left.size() + 1, right.size() + 1);
-  EditHeap heap;
-  heap.push(EditHeapEntry(0, 0, 0, kReplaceParent));
-
-  while (!heap.empty()) {
-    const EditHeapEntry current_entry = heap.top();
-    heap.pop();
-
-    UInt32 left_pos = current_entry.left;
-    UInt32 right_pos = current_entry.right;
-    unsigned char& current_state = node_map.insert(left_pos, right_pos);
-    if (current_state != kUnsettled) {
-      // Node was already settled by a previous entry in the priority queue,
-      // this is a suboptimal path that should be ignored.
-      continue;
-    }
-    current_state = current_entry.state;
-
-    if (left_pos == left.size() && right_pos == right.size()) {
-      // This is the normal exit point; if we terminate due to the heap being
-      // empty, we'll fail a check later.
-      break;
-    }
-
-    // Special case: Since the cost of a match is zero, we can immediately
-    // settle the new node without putting it in the queue, since nothing can
-    // have a smaller cost than it. Furthermore, we don't need to relax the
-    // other two edges, since we know we don't need them: Any path from this
-    // node that would use them has an path via the match that is at least as
-    // cheap. Together, this means we can loop here until we stop matching.
-    while (left_pos < left.size() && right_pos < right.size() &&
-           left[left_pos] == right[right_pos]) {
-      left_pos++;
-      right_pos++;
-      unsigned char& fast_forward_state = node_map.insert(left_pos, right_pos);
-      if (fast_forward_state != kUnsettled) {
-        // The search reached around and settled this node before settling the
-        // base node. This means we're completely done with this iteration;
-        // abort to the outer loop.
-        goto outer_loop_bottom;
-        // Otherwise, when can settle this node, even if it was created from
-        // another state - we know the cost of settling it now is optimal.
-      }
-      fast_forward_state = kMatchParent;
-    }
-
-    // Relax adjacent nodes. We have no way to find or lower the cost of
-    // existing entries in the heap, so we just push new entries and throw
-    // them out at the top if the node is already settled. We *could* check to
-    // see if they're already settled before pushing, but it turns out to be
-    // ~not any faster, and more complicated to do so.
-    //
-    // If we're at an edge, there's only one node to relax.
-    if (left_pos >= left.size()) {
-      if (right_pos >= right.size()) {
-        break;  // Can happen due to the fast-path loop above.
-      }
-      heap.push(EditHeapEntry(left_pos, right_pos + 1,
-                              current_entry.cost + kTowardsGoalCost,
-                              kAddParent));
-      continue;
-    }
-    if (right_pos >= right.size()) {
-      heap.push(EditHeapEntry(left_pos + 1, right_pos,
-                              current_entry.cost + kTowardsGoalCost,
-                              kRemoveParent));
-      continue;
-    }
-    // General case: Relax 3 edges.
-    heap.push(EditHeapEntry(
-        left_pos, right_pos + 1,
-        current_entry.cost + (right.size() + left_pos > right_pos + left.size()
-                                  ? kTowardsGoalCost
-                                  : kAwayFromGoalCost),
-        kAddParent));
-    heap.push(EditHeapEntry(
-        left_pos + 1, right_pos,
-        current_entry.cost + (right.size() + left_pos < right_pos + left.size()
-                                  ? kTowardsGoalCost
-                                  : kAwayFromGoalCost),
-        kRemoveParent));
-    heap.push(EditHeapEntry(left_pos + 1, right_pos + 1,
-                            current_entry.cost + kReplaceCost, kReplaceParent));
-  outer_loop_bottom : {}  // Need the curlies to form a statement.
-  }
-
-  // Reconstruct the best path. We do it in reverse order.
-  std::vector<EditType> best_path;
-  UInt32 left_pos = left.size();
-  UInt32 right_pos = right.size();
-  while (left_pos != 0 || right_pos != 0) {
-    GTEST_CHECK_(left_pos <= left.size() && right_pos <= right.size());
-    // The node must already exist, but if it somehow doesn't, it will be
-    // added as kUnsettled, which will crash below.
-    const unsigned char state = node_map.insert(left_pos, right_pos);
-    switch (state) {
-      case kAddParent:
-        right_pos--;
-        break;
-      case kRemoveParent:
-        left_pos--;
-        break;
-      case kMatchParent:
-      case kReplaceParent:
-        left_pos--;
-        right_pos--;
-        break;
-      default:
-        GTEST_LOG_(FATAL) << "Unsettled node at " << left_pos << ","
-                          << right_pos;
-    }
-    best_path.push_back(static_cast<EditType>(state - 1));
-  }
-  std::reverse(best_path.begin(), best_path.end());
-  if (memory_usage != NULL) {
-    *memory_usage = node_map.MemoryUsage();
-  }
-  return best_path;
-}
-
-namespace {
-
-// Helper class to convert string into ids with deduplication.
-class InternalStrings {
- public:
-  size_t GetId(const std::string* str) {
-    IdMap::iterator it = ids_.find(str);
-    if (it != ids_.end()) return it->second;
-    size_t id = ids_.size();
-    return ids_[str] = id;
-  }
-
- private:
-  struct IdMapCmp {
-    bool operator()(const std::string* first, const std::string* second) const {
-      return *first < *second;
-    }
-  };
-  typedef std::map<const std::string*, size_t, IdMapCmp> IdMap;
-  IdMap ids_;
-};
-
-}  // namespace
-
-std::vector<EditType> CalculateOptimalEdits(
-    const std::vector<std::string>& left,
-    const std::vector<std::string>& right) {
-  std::vector<size_t> left_ids, right_ids;
-  {
-    InternalStrings intern_table;
-    for (size_t i = 0; i < left.size(); ++i) {
-      left_ids.push_back(intern_table.GetId(&left[i]));
-    }
-    for (size_t i = 0; i < right.size(); ++i) {
-      right_ids.push_back(intern_table.GetId(&right[i]));
-    }
-  }
-  return CalculateOptimalEdits(left_ids, right_ids);
-}
-
-namespace {
-
-// Helper class that holds the state for one hunk and prints it out to the
-// stream.
-// It reorders adds/removes when possible to group all removes before all
-// adds. It also adds the hunk header before printing into the stream.
-class Hunk {
- public:
-  Hunk(size_t left_start, size_t right_start)
-      : left_start_(left_start),
-        right_start_(right_start),
-        adds_(),
-        removes_(),
-        common_() {}
-
-  void PushLine(char edit, const char* line) {
-    switch (edit) {
-      case ' ':
-        ++common_;
-        FlushEdits();
-        hunk_.push_back(std::make_pair(' ', line));
-        break;
-      case '-':
-        ++removes_;
-        hunk_removes_.push_back(std::make_pair('-', line));
-        break;
-      case '+':
-        ++adds_;
-        hunk_adds_.push_back(std::make_pair('+', line));
-        break;
-    }
-  }
-
-  void PrintTo(std::ostream* os) {
-    PrintHeader(os);
-    FlushEdits();
-    for (std::list<std::pair<char, const char*> >::const_iterator it =
-             hunk_.begin();
-         it != hunk_.end(); ++it) {
-      *os << it->first << it->second << "\n";
-    }
-  }
-
-  bool has_edits() const { return adds_ || removes_; }
-
- private:
-  void FlushEdits() {
-    hunk_.splice(hunk_.end(), hunk_removes_);
-    hunk_.splice(hunk_.end(), hunk_adds_);
-  }
-
-  // Print a unified diff header for one hunk.
-  // The format is
-  //   "@@ -<left_start>,<left_length> +<right_start>,<right_length> @@"
-  // where the left/right parts are omitted if unnecessary.
-  void PrintHeader(std::ostream* ss) const {
-    *ss << "@@ ";
-    if (removes_) {
-      *ss << "-" << left_start_ << "," << (removes_ + common_);
-    }
-    if (removes_ && adds_) {
-      *ss << " ";
-    }
-    if (adds_) {
-      *ss << "+" << right_start_ << "," << (adds_ + common_);
-    }
-    *ss << " @@\n";
-  }
-
-  size_t left_start_, right_start_;
-  size_t adds_, removes_, common_;
-  std::list<std::pair<char, const char*> > hunk_, hunk_adds_, hunk_removes_;
-};
-
-}  // namespace
-
-// Create a list of diff hunks in Unified diff format.
-// Each hunk has a header generated by PrintHeader above plus a body with
-// lines prefixed with ' ' for no change, '-' for deletion and '+' for
-// addition.
-// 'context' represents the desired unchanged prefix/suffix around the diff.
-// If two hunks are close enough that their contexts overlap, then they are
-// joined into one hunk.
-std::string CreateUnifiedDiff(const std::vector<std::string>& left,
-                              const std::vector<std::string>& right,
-                              size_t context) {
-  const std::vector<EditType> edits = CalculateOptimalEdits(left, right);
-
-  size_t l_i = 0, r_i = 0, edit_i = 0;
-  std::stringstream ss;
-  while (edit_i < edits.size()) {
-    // Find first edit.
-    while (edit_i < edits.size() && edits[edit_i] == kEditMatch) {
-      ++l_i;
-      ++r_i;
-      ++edit_i;
-    }
-
-    // Find the first line to include in the hunk.
-    const size_t prefix_context = std::min(l_i, context);
-    Hunk hunk(l_i - prefix_context + 1, r_i - prefix_context + 1);
-    for (size_t i = prefix_context; i > 0; --i) {
-      hunk.PushLine(' ', left[l_i - i].c_str());
-    }
-
-    // Iterate the edits until we found enough suffix for the hunk or the input
-    // is over.
-    size_t n_suffix = 0;
-    for (; edit_i < edits.size(); ++edit_i) {
-      if (n_suffix >= context) {
-        // Continue only if the next hunk is very close.
-        std::vector<EditType>::const_iterator it = edits.begin() + edit_i;
-        while (it != edits.end() && *it == kEditMatch) ++it;
-        if (it == edits.end() || (it - edits.begin()) - edit_i >= context) {
-          // There is no next edit or it is too far away.
-          break;
-        }
-      }
-
-      EditType edit = edits[edit_i];
-      // Reset count when a non match is found.
-      n_suffix = edit == kEditMatch ? n_suffix + 1 : 0;
-
-      if (edit == kEditMatch || edit == kEditRemove || edit == kEditReplace) {
-        hunk.PushLine(edit == kEditMatch ? ' ' : '-', left[l_i].c_str());
-      }
-      if (edit == kEditAdd || edit == kEditReplace) {
-        hunk.PushLine('+', right[r_i].c_str());
-      }
-
-      // Advance indices, depending on edit type.
-      l_i += edit != kEditAdd;
-      r_i += edit != kEditRemove;
-    }
-
-    if (!hunk.has_edits()) {
-      // We are done. We don't want this hunk.
-      break;
-    }
-
-    hunk.PrintTo(&ss);
-  }
-  return ss.str();
-}
-
-}  // namespace internal
-}  // namespace testing
--- a/googletest/src/gtest.cc
+++ b/googletest/src/gtest.cc
@ -46,6 +46,7 @@
 #include <algorithm>
 #include <iomanip>
 #include <limits>
+#include <list>
 #include <map>
 #include <ostream>  // NOLINT
 #include <sstream>
@ -1067,6 +1068,246 @@ AssertionResult AssertionFailure(const Message& message) {

 namespace internal {

+namespace edit_distance {
+std::vector<EditType> CalculateOptimalEdits(const std::vector<size_t>& left,
+                                            const std::vector<size_t>& right) {
+  std::vector<std::vector<double> > costs(
+      left.size() + 1, std::vector<double>(right.size() + 1));
+  std::vector<std::vector<EditType> > best_move(
+      left.size() + 1, std::vector<EditType>(right.size() + 1));
+
+  // Populate for empty right.
+  for (size_t l_i = 0; l_i < costs.size(); ++l_i) {
+    costs[l_i][0] = static_cast<double>(l_i);
+    best_move[l_i][0] = kRemove;
+  }
+  // Populate for empty left.
+  for (size_t r_i = 1; r_i < costs[0].size(); ++r_i) {
+    costs[0][r_i] = static_cast<double>(r_i);
+    best_move[0][r_i] = kAdd;
+  }
+
+  for (size_t l_i = 0; l_i < left.size(); ++l_i) {
+    for (size_t r_i = 0; r_i < right.size(); ++r_i) {
+      if (left[l_i] == right[r_i]) {
+        // Found a match. Consume it.
+        costs[l_i + 1][r_i + 1] = costs[l_i][r_i];
+        best_move[l_i + 1][r_i + 1] = kMatch;
+        continue;
+      }
+
+      const double add = costs[l_i + 1][r_i];
+      const double remove = costs[l_i][r_i + 1];
+      const double replace = costs[l_i][r_i];
+      if (add < remove && add < replace) {
+        costs[l_i + 1][r_i + 1] = add + 1;
+        best_move[l_i + 1][r_i + 1] = kAdd;
+      } else if (remove < add && remove < replace) {
+        costs[l_i + 1][r_i + 1] = remove + 1;
+        best_move[l_i + 1][r_i + 1] = kRemove;
+      } else {
+        // We make replace a little more expensive than add/remove to lower
+        // their priority.
+        costs[l_i + 1][r_i + 1] = replace + 1.00001;
+        best_move[l_i + 1][r_i + 1] = kReplace;
+      }
+    }
+  }
+
+  // Reconstruct the best path. We do it in reverse order.
+  std::vector<EditType> best_path;
+  for (size_t l_i = left.size(), r_i = right.size(); l_i > 0 || r_i > 0;) {
+    EditType move = best_move[l_i][r_i];
+    best_path.push_back(move);
+    l_i -= move != kAdd;
+    r_i -= move != kRemove;
+  }
+  std::reverse(best_path.begin(), best_path.end());
+  return best_path;
+}
+
+namespace {
+
+// Helper class to convert string into ids with deduplication.
+class InternalStrings {
+ public:
+  size_t GetId(const std::string& str) {
+    IdMap::iterator it = ids_.find(str);
+    if (it != ids_.end()) return it->second;
+    size_t id = ids_.size();
+    return ids_[str] = id;
+  }
+
+ private:
+  typedef std::map<std::string, size_t> IdMap;
+  IdMap ids_;
+};
+
+}  // namespace
+
+std::vector<EditType> CalculateOptimalEdits(
+    const std::vector<std::string>& left,
+    const std::vector<std::string>& right) {
+  std::vector<size_t> left_ids, right_ids;
+  {
+    InternalStrings intern_table;
+    for (size_t i = 0; i < left.size(); ++i) {
+      left_ids.push_back(intern_table.GetId(left[i]));
+    }
+    for (size_t i = 0; i < right.size(); ++i) {
+      right_ids.push_back(intern_table.GetId(right[i]));
+    }
+  }
+  return CalculateOptimalEdits(left_ids, right_ids);
+}
+
+namespace {
+
+// Helper class that holds the state for one hunk and prints it out to the
+// stream.
+// It reorders adds/removes when possible to group all removes before all
+// adds. It also adds the hunk header before printint into the stream.
+class Hunk {
+ public:
+  Hunk(size_t left_start, size_t right_start)
+      : left_start_(left_start),
+        right_start_(right_start),
+        adds_(),
+        removes_(),
+        common_() {}
+
+  void PushLine(char edit, const char* line) {
+    switch (edit) {
+      case ' ':
+        ++common_;
+        FlushEdits();
+        hunk_.push_back(std::make_pair(' ', line));
+        break;
+      case '-':
+        ++removes_;
+        hunk_removes_.push_back(std::make_pair('-', line));
+        break;
+      case '+':
+        ++adds_;
+        hunk_adds_.push_back(std::make_pair('+', line));
+        break;
+    }
+  }
+
+  void PrintTo(std::ostream* os) {
+    PrintHeader(os);
+    FlushEdits();
+    for (std::list<std::pair<char, const char*> >::const_iterator it =
+             hunk_.begin();
+         it != hunk_.end(); ++it) {
+      *os << it->first << it->second << "\n";
+    }
+  }
+
+  bool has_edits() const { return adds_ || removes_; }
+
+ private:
+  void FlushEdits() {
+    hunk_.splice(hunk_.end(), hunk_removes_);
+    hunk_.splice(hunk_.end(), hunk_adds_);
+  }
+
+  // Print a unified diff header for one hunk.
+  // The format is
+  //   "@@ -<left_start>,<left_length> +<right_start>,<right_length> @@"
+  // where the left/right parts are omitted if unnecessary.
+  void PrintHeader(std::ostream* ss) const {
+    *ss << "@@ ";
+    if (removes_) {
+      *ss << "-" << left_start_ << "," << (removes_ + common_);
+    }
+    if (removes_ && adds_) {
+      *ss << " ";
+    }
+    if (adds_) {
+      *ss << "+" << right_start_ << "," << (adds_ + common_);
+    }
+    *ss << " @@\n";
+  }
+
+  size_t left_start_, right_start_;
+  size_t adds_, removes_, common_;
+  std::list<std::pair<char, const char*> > hunk_, hunk_adds_, hunk_removes_;
+};
+
+}  // namespace
+
+// Create a list of diff hunks in Unified diff format.
+// Each hunk has a header generated by PrintHeader above plus a body with
+// lines prefixed with ' ' for no change, '-' for deletion and '+' for
+// addition.
+// 'context' represents the desired unchanged prefix/suffix around the diff.
+// If two hunks are close enough that their contexts overlap, then they are
+// joined into one hunk.
+std::string CreateUnifiedDiff(const std::vector<std::string>& left,
+                              const std::vector<std::string>& right,
+                              size_t context) {
+  const std::vector<EditType> edits = CalculateOptimalEdits(left, right);
+
+  size_t l_i = 0, r_i = 0, edit_i = 0;
+  std::stringstream ss;
+  while (edit_i < edits.size()) {
+    // Find first edit.
+    while (edit_i < edits.size() && edits[edit_i] == kMatch) {
+      ++l_i;
+      ++r_i;
+      ++edit_i;
+    }
+
+    // Find the first line to include in the hunk.
+    const size_t prefix_context = std::min(l_i, context);
+    Hunk hunk(l_i - prefix_context + 1, r_i - prefix_context + 1);
+    for (size_t i = prefix_context; i > 0; --i) {
+      hunk.PushLine(' ', left[l_i - i].c_str());
+    }
+
+    // Iterate the edits until we found enough suffix for the hunk or the input
+    // is over.
+    size_t n_suffix = 0;
+    for (; edit_i < edits.size(); ++edit_i) {
+      if (n_suffix >= context) {
+        // Continue only if the next hunk is very close.
+        std::vector<EditType>::const_iterator it = edits.begin() + edit_i;
+        while (it != edits.end() && *it == kMatch) ++it;
+        if (it == edits.end() || (it - edits.begin()) - edit_i >= context) {
+          // There is no next edit or it is too far away.
+          break;
+        }
+      }
+
+      EditType edit = edits[edit_i];
+      // Reset count when a non match is found.
+      n_suffix = edit == kMatch ? n_suffix + 1 : 0;
+
+      if (edit == kMatch || edit == kRemove || edit == kReplace) {
+        hunk.PushLine(edit == kMatch ? ' ' : '-', left[l_i].c_str());
+      }
+      if (edit == kAdd || edit == kReplace) {
+        hunk.PushLine('+', right[r_i].c_str());
+      }
+
+      // Advance indices, depending on edit type.
+      l_i += edit != kAdd;
+      r_i += edit != kRemove;
+    }
+
+    if (!hunk.has_edits()) {
+      // We are done. We don't want this hunk.
+      break;
+    }
+
+    hunk.PrintTo(&ss);
+  }
+  return ss.str();
+}
+
+}  // namespace edit_distance
+
 namespace {

 // The string representation of the values received in EqFailure() are already
@ -1138,7 +1379,8 @@ AssertionResult EqFailure(const char* lhs_expression,
    const std::vector<std::string> rhs_lines =
        SplitEscapedString(rhs_value);
    if (lhs_lines.size() > 1 || rhs_lines.size() > 1) {
-      msg << "\nWith diff:\n" << CreateUnifiedDiff(lhs_lines, rhs_lines);
+      msg << "\nWith diff:\n"
+          << edit_distance::CreateUnifiedDiff(lhs_lines, rhs_lines);
    }
  }

--- a/googletest/test/gtest_unittest.cc
+++ b/googletest/test/gtest_unittest.cc
@ -215,7 +215,6 @@ using testing::GTEST_FLAG(stream_result_to);
 using testing::GTEST_FLAG(throw_on_failure);
 using testing::IsNotSubstring;
 using testing::IsSubstring;
-using testing::kMaxStackTraceDepth;
 using testing::Message;
 using testing::ScopedFakeTestPartResultReporter;
 using testing::StaticAssertTypeEq;
@ -235,18 +234,16 @@ using testing::internal::AlwaysTrue;
 using testing::internal::AppendUserMessage;
 using testing::internal::ArrayAwareFind;
 using testing::internal::ArrayEq;
-using testing::internal::CalculateOptimalEdits;
 using testing::internal::CodePointToUtf8;
 using testing::internal::CompileAssertTypesEqual;
 using testing::internal::CopyArray;
 using testing::internal::CountIf;
-using testing::internal::CreateUnifiedDiff;
-using testing::internal::EditType;
 using testing::internal::EqFailure;
 using testing::internal::FloatingPoint;
 using testing::internal::ForEach;
 using testing::internal::FormatEpochTimeInMillisAsIso8601;
 using testing::internal::FormatTimeInMillisAsSeconds;
+using testing::internal::GTestFlagSaver;
 using testing::internal::GetCurrentOsStackTraceExceptTop;
 using testing::internal::GetElementOr;
 using testing::internal::GetNextRandomSeed;
@ -255,7 +252,6 @@ using testing::internal::GetTestTypeId;
 using testing::internal::GetTimeInMillis;
 using testing::internal::GetTypeId;
 using testing::internal::GetUnitTestImpl;
-using testing::internal::GTestFlagSaver;
 using testing::internal::ImplicitlyConvertible;
 using testing::internal::Int32;
 using testing::internal::Int32FromEnvOrDie;
@ -263,8 +259,6 @@ using testing::internal::IsAProtocolMessage;
 using testing::internal::IsContainer;
 using testing::internal::IsContainerTest;
 using testing::internal::IsNotContainer;
-using testing::internal::kMaxRandomSeed;
-using testing::internal::kTestTypeIdInGoogleTest;
 using testing::internal::NativeArray;
 using testing::internal::OsStackTraceGetter;
 using testing::internal::OsStackTraceGetterInterface;
@ -286,6 +280,12 @@ using testing::internal::TestResultAccessor;
 using testing::internal::UInt32;
 using testing::internal::UnitTestImpl;
 using testing::internal::WideStringToUtf8;
+using testing::internal::edit_distance::CalculateOptimalEdits;
+using testing::internal::edit_distance::CreateUnifiedDiff;
+using testing::internal::edit_distance::EditType;
+using testing::internal::kMaxRandomSeed;
+using testing::internal::kTestTypeIdInGoogleTest;
+using testing::kMaxStackTraceDepth;

 #if GTEST_HAS_STREAM_REDIRECTION
 using testing::internal::CaptureStdout;
@ -3517,14 +3517,14 @@ TEST(EditDistance, TestCases) {
      {__LINE__, "ABCD", "abcd", "////",
       "@@ -1,4 +1,4 @@\n-A\n-B\n-C\n-D\n+a\n+b\n+c\n+d\n"},
      // Path finding.
-      {__LINE__, "ABCDEFGH", "ABXEGH1", "  /- -  +",
+      {__LINE__, "ABCDEFGH", "ABXEGH1", "  -/ -  +",
       "@@ -1,8 +1,7 @@\n A\n B\n-C\n-D\n+X\n E\n-F\n G\n H\n+1\n"},
-      {__LINE__, "AAAABCCCC", "ABABCDCDC", " -/   + / ",
-       "@@ -1,9 +1,9 @@\n A\n-A\n-A\n+B\n A\n B\n C\n+D\n C\n-C\n+D\n C\n"},
-      {__LINE__, "ABCDE", "BCDCD", "-   /+",
+      {__LINE__, "AAAABCCCC", "ABABCDCDC", "- /   + / ",
+       "@@ -1,9 +1,9 @@\n-A\n A\n-A\n+B\n A\n B\n C\n+D\n C\n-C\n+D\n C\n"},
+      {__LINE__, "ABCDE", "BCDCD", "-   +/",
       "@@ -1,5 +1,5 @@\n-A\n B\n C\n D\n-E\n+C\n+D\n"},
-      {__LINE__, "ABCDEFGHIJKL", "BGDCDEFGJKLJK", "- ++     --   ++",
-       "@@ -1,4 +1,5 @@\n-A\n B\n+G\n+D\n C\n D\n"
+      {__LINE__, "ABCDEFGHIJKL", "BCDCDEFGJKLJK", "- ++     --   ++",
+       "@@ -1,4 +1,5 @@\n-A\n B\n+C\n+D\n C\n D\n"
       "@@ -6,7 +7,7 @@\n F\n G\n-H\n-I\n J\n K\n L\n+J\n+K\n"},
      {}};
  for (const Case* c = kCases; c->left; ++c) {
@ -3542,57 +3542,6 @@ TEST(EditDistance, TestCases) {
  }
 }

-// Tests that we can run CalculateOptimalEdits for a large vector, i.e. we can
-// compute diffs for large strings.
-TEST(EditDistance, LargeVectorWithDiffs) {
-  const int kSize = 300000;
-  std::vector<size_t> left;
-  std::vector<size_t> right;
-  std::vector<EditType> expected(kSize, testing::internal::kEditMatch);
-
-  left.reserve(kSize);
-  right.reserve(kSize);
-  for (int i = 0; i < kSize; ++i) {
-    // Make the contents of the vectors unique. This greatly speeds up
-    // the algorithm, since it doesn't spend time finding matches for
-    // different alignments.
-    left.push_back(i);
-    right.push_back(i);
-  }
-
-  for (int i = 0; i < 10; ++i) {
-    right[i] = kSize + i;
-    expected[i] = testing::internal::kEditReplace;
-    right[kSize - i - 1] = kSize * 2 + i;
-    expected[kSize - i - 1] = testing::internal::kEditReplace;
-  }
-  size_t memory_usage;
-  EXPECT_EQ(CalculateOptimalEdits(left, right, &memory_usage), expected);
-  EXPECT_GT(memory_usage, kSize);
-  EXPECT_LT(memory_usage, kSize * 2);
-}
-
-// Tests that we can run CalculateOptimalEdits for two vectors N and M, where
-// M = N plus additional junk at the end. The current algorithm only does O(M)
-// "real" work in this case, but allocates some extra memory. We test that this
-// is still fast enough for common cases, and we aren't allocating an
-// excessive amount of extra memory.
-TEST(EditDistance, LargeVectorWithTrailingJunk) {
-  const int kSize = 200000;
-  const int kAdditionalSize = 2000;
-  std::vector<size_t> left(kSize, 0);
-  std::vector<size_t> right(kSize + kAdditionalSize, 0);
-  std::vector<EditType> expected(kSize + kAdditionalSize,
-                                 testing::internal::kEditMatch);
-  for (int i = 0; i < kAdditionalSize; ++i) {
-    expected[i + kSize] = testing::internal::kEditAdd;
-  }
-  size_t memory_usage;
-  EXPECT_EQ(CalculateOptimalEdits(left, right, &memory_usage), expected);
-  EXPECT_GT(memory_usage, kSize);
-  EXPECT_LT(memory_usage, 6000000);
-}
-
 // Tests EqFailure(), used for implementing *EQ* assertions.
 TEST(AssertionTest, EqFailure) {
  const std::string foo_val("5"), bar_val("6");