libabigail/apidoc/abg-diff-utils_8h_source.html

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 // -*- Mode: C++ -*-

 //

 // Copyright (C) 2013-2023 Red Hat, Inc.


 /// @file

 ///

 /// This file declares types and operations implementing the "O(ND)

 /// Difference Algorithm" (aka diff2) from Eugene W. Myers, to compute

 /// the difference between two sequences.

 ///

 /// To understand what is going on here, one must read the paper at

 /// http://www.xmailserver.org/diff2.pdf.  Throughout this file, that

 /// paper is referred to as "the paper".

 ///

 /// The implementations goes as far as calculating the shortest edit

 /// script (the set of insertions and deletions) for transforming a

 /// sequence into another.  The main entry point for that is the

 /// compute_diff() function.


 #ifndef __ABG_DIFF_UTILS_H__

 #define __ABG_DIFF_UTILS_H__


 #include <cassert>

 #include <cstdlib>

 #include <memory>

 #include <ostream>

 #include <sstream>

 #include <stdexcept>

 #include <string>

 #include <vector>

 #include "abg-fwd.h"


 namespace abigail

 {


 /// @brief Libabigail's core diffing algorithms

 ///

 /// This is the namespace defining the core diffing algorithm used by

 /// libabigail @ref comparison tools.  This based on the diff

 /// algorithm from Eugene Myers.

 namespace diff_utils

 {


 using std::shared_ptr;


 // Inject the names from std:: below into this namespace

 using std::string;

 using std::ostream;

 using std::vector;

 using std::abs;

 using std::ostringstream;


 /// A class representing a vertex in an edit graph, as explained in

 /// the paper.  A vertex is a basically a pair of coordinates

 /// (abscissa and ordinate).

 class point

 {

   int x_;

   int y_;

   bool empty_;


 public:


   point()

     : x_(-1), y_(-1),empty_(true)

   {}


   point(int x, int y)

     : x_(x), y_(y), empty_(false)

   {}


   point(const point& p)

     : x_(p.x()), y_(p.y()), empty_(p.is_empty())

   {}


   int

   x() const

   {return x_;}


   void

   x(int x)

   {

     x_ = x;

     empty_ = false;

   }


   int

   y() const

   {return y_;}


   void

   y(int y)

   {

     y_ = y;

     empty_ = false;

   }


   void

   set(int x, int y)

   {

     x_ = x;

     y_ = y;

     empty_ = false;

   }


   void

   set(int x, int y, bool empty)

   {

     x_ = x;

     y_ = y;

     empty_ = empty;

   }


   void

   add(int ax, int ay)

   {set (x() + ax, y() + ay);}


   bool

   operator!=(const point& o) const

   {return (x() != o.x() || y() != o.y() || is_empty() != o.is_empty());}


   bool

   operator==(const point& o) const

   {return !(operator!=(o));}


   bool

   operator<(const point& o) const

   {return (x() < o.x() && y() < o.y());}


   bool

   operator>(const point& o) const

   {return (x() > o.x() && y() > o.y());}


   bool

   operator<=(const point& o) const

   {return (x() <= o.x() && y() <= o.y());}


   bool

   operator>=(const point& o) const

   {return (x() >= o.x() && y() >= o.y());}


   point

   operator+(int val) const

   {return point(x() + val, y() + val);}


   point

   operator-(int val) const

   {return point(x() - val, y() - val);}


   point&

   operator+= (int val)

   {

     set(x_ + val, y_ + val);

     return *this;

   }


   point&

   operator-= (int val)

   {return (*this) += (-val);}


   point&

   operator--()

   {return (*this) -= 1;}


   point&

   operator++()

   {return (*this) += 1;}


   point

   operator--(int)

   {

     point tmp(*this);

     --(*this);

     return tmp;

   }


   point

   operator++(int)

   {

     point tmp(*this);

     ++(*this);

     return tmp;

   }


   point&

   operator=(int val)

   {

     set(val, val);

     return *this;

   }


   point&

   operator=(const point& p)

   {

     set(p.x(), p.y(), p.is_empty());

     return *this;

   }


   bool

   is_empty() const

   {return empty_;}


   operator bool () const

   {return !is_empty();}


   bool

   operator!() const

   {return is_empty();}


   void

   clear()

   {

     x_ = -1;

     y_ = -1;

     empty_ = true;

   }

 };// end point


 /// The abstraction of the Snake concept, from the paper.

 ///

 /// In a given path from the edit graph, a snake is a non-diagonal

 /// edge followed by zero or more diagonal edges.

 ///

 /// The starting poing of the non-diagonal edge is the beginning of

 /// the snake.  This is given by the snake::begin() method.  This point

 /// is not explicitely referenced in the paper, but we need it for

 /// some grunt implementation details of the algorithm.

 ///

 /// The end point of the non-diagonal edge is the intermediate point

 /// of the snake; it's given by the snake::intermediate() method.

 /// This point is what is referred to as "the begining of the snake"

 /// in the paper.

 ///

 /// The end point of the first diagonal edge is given by the

 /// snake::diagonal_start() method.

 ///

 /// The end point of the last diagonal edge is given by the

 /// snake::end() method.  Note that when the snake contains no

 /// diagonal edge, snake::intermediate(), and snake::end() return the

 /// same point; snake::diagonal_start() contains an empty point (i.e,

 /// a point for which point::is_empty() returns true).

 class snake

 {

   point begin_, intermediate_, diagonal_start_, end_;

   bool forward_;


 public:


   /// Default constructor for snake.

   snake()

     : forward_(false)

   {}


   /// Constructor from the beginning, intermediate and end points.

   ///

   /// @param b the beginning point of the snake.  That is, the

   /// starting point of the non-diagonal edge.

   ///

   /// @param i the intermediate point of the snake.  That is, the end

   /// point of the non-diagonal edge.

   ///

   /// @param e the end point of the snake.  That is the end point of

   /// the last diagonal edge.

   snake(const point& b,

     const point& i,

     const point& e)

     : begin_(b), intermediate_(i),

       end_(e), forward_(false)

   {}


   /// Constructor from the beginning, intermediate and end points.

   ///

   /// @param b the beginning point of the snake.  That is, the

   /// starting point of the non-diagonal edge.

   ///

   /// @param i the intermediate point of the snake.  That is, the end

   /// point of the non-diagonal edge.

   ///

   /// @param d the beginning of the diagonal edge.  That is the end of

   /// the first diagonal edge of the snake.

   ///

   /// @param e the end point of the snake.  That is the end point of

   /// the last diagonal edge.

   snake(const point& b,

     const point& i,

     const point& d,

     const point& e)

     : begin_(b), intermediate_(i),

       diagonal_start_(d), end_(e),

       forward_(false)

   {}


   /// Getter for the starting point of the non-diagonal edge of the

   /// snake.

   ///

   /// @return the starting point of the non-diagonal edge of the snake

   const point&

   begin() const

   {return begin_;}


   /// Getter for the starting point of the non-diagonal edge of the

   /// snake, aka begin point.

   ///

   ///@param p the new begin point.

   void

   begin(const point& p)

   {begin_ = p;}


   /// Getter for the end point of the non-diagonal edge of the snake.

   ///

   /// @return the end point of the non-diagonal edge of the snake

   const point&

   intermediate() const

   {return intermediate_;}


   /// Setter for the end point of the non-diagonal edge of the snake,

   /// aka intermediate point.

   ///

   /// @param p the new intermediate point.

   void

   intermediate(const point& p)

   {intermediate_ = p;}


   /// Getter for the end point of the first diagonal edge, aka

   /// diagonal start point.  Note that if the snake has no diagonal

   /// edge, this point is empty.

   ///

   /// @return the end point of the first diagonal edge.

   const point&

   diagonal_start() const

   {return diagonal_start_;}


   /// Setter for the end point of the first diagonal edge, aka

   /// diagonal start point.

   ///

   /// @param p the new diagonal start.d

   void

   diagonal_start(const point& p)

   {diagonal_start_ = p;}


   /// Getter for the end point of the last diagonal edge, aka snake

   /// end point.  Note that if the snake has no diagonal edge, this

   /// point is equal to the intermediate point.

   ///

   /// @return the end point of the last diagonal edge

   const point&

   end() const

   {return end_;}


   /// Setter for the end point of the last diagonal edge, aka snake

   /// end point.  Note that if the snake has no diagonal edge, this

   /// point is equal to the intermediate point.

   void

   end(const point& p)

   {end_ = p;}


   /// Setter for the begin, intermediate and end points of the snake.

   ///

   /// @param b the new snake begin point

   ///

   /// @param i the new snake intermediate point

   ///

   /// @param e the new snake end point

   void

   set(const point& b, const point&i, const point&e)

   {

     begin(b);

     intermediate(i);

     end(e);

   }


   /// Setter for the begin, intermediate, diagonal start and end points

   /// of the snake.

   ///

   /// @param b the new snake begin point

   ///

   /// @param i the new snake intermediate point

   ///

   /// @param d the new diagonal start point

   ///

   /// @param e the new snake end point

   void

   set(const point& b, const point&i, const point& d, const point&e)

   {

     begin(b);

     intermediate(i);

     diagonal_start(d);

     end(e);

   }


   /// @return true iff the snake is a forward snake.  That is, if it

   /// was built while walking the edit graph going forward (from the

   /// top left corner to the right bottom corner.

   bool

   is_forward() const

   {return forward_;}


   /// Set to true if the snake is a forward snake; that is, if it was

   /// built while walking the edit graph going forward (from the top

   /// left corner to the right bottom corner.  Set to false otherwise.

   ///

   /// @param f whether the snake is a forward snake or not.

   void

   set_forward(bool f)

   {forward_ = f;}


   /// Add an offset to the abscissas of the points of the snake, and

   /// add another offset to the ordinates of these same points.

   ///

   /// @param x_offset the offset to add to the abscissas of all the

   /// points of the snake.

   ///

   /// @param y_offset the offset to add to the ordinates of all the

   /// points of the snake.

   void

   add(int x_offset, int y_offset)

   {

     if (is_empty())

       return;


     begin_.add(x_offset, y_offset);

     intermediate_.add(x_offset, y_offset);

     if (diagonal_start_)

       diagonal_start_.add(x_offset, y_offset);

     end_.add(x_offset, y_offset);

   }


   /// @return true iff the snake has at least one diagonal edge.

   bool

   has_diagonal_edge() const

   {return !diagonal_start().is_empty();}


   /// @return true iff the non-diagonal edge is horizontal.

   bool

   has_horizontal_edge() const

   {return (begin().y() == intermediate().y());}


   /// @return true iff the non-diagonal edge is vertical.

   bool

   has_vertical_edge() const

   {return (begin().x() == intermediate().x());}


   /// @return true iff the snake is empty, that is, if all the points

   /// it contains are empty.

   bool is_empty() const

   {return begin().is_empty() && intermediate().is_empty() && end().is_empty();}

 };// end class snake


 /// The array containing the furthest D-path end-points, for each value

 /// of K.  MAX_D is the maximum value of the D-Path.  That is, M+N if

 /// M is the size of the first input string, and N is the size of the

 /// second.

 class d_path_vec : public std::vector<int>

 {

 private:


   unsigned a_size_;

   unsigned b_size_;


   /// Forbid vector size modifications

   void

   push_back(const vector<int>::value_type&);


   /// Forbid default constructor.

   d_path_vec();


   bool

   over_bounds(long long index) const

   {return  (index + offset()) >= (long long) size();}


   void

   check_index_against_bound(int index) const

   {

     if (over_bounds(index))

       {

     ostringstream o;

     o << "index '" << index

       << "' out of range [-" << max_d() << ", " << max_d() << "]";

     throw std::out_of_range(o.str());

       }

   }


 public:


   /// Constructor of the d_path_vec.

   ///

   /// For forward vectors, the underlying vector allocates 2 *

   /// [MAX_D+1].

   /// space, so that one can address elements in the index range

   /// [-MAX_D, MAX_D].  And MAX_D is the sum of the two sequence

   /// sizes. delta is the difference.

   ///

   /// For reverse vectors, note that we need to be able to address

   /// [-MAX_D - delta, MAX_D + delta], with delta being the (signed)

   /// difference between the size of the two sequences.  We consider

   /// delta being bounded by MAX_D itself; so we say we need to be

   /// able to address [-2MAX_D, 2MAX_D].

   ///

   /// @param size1 the size of the first sequence we are interested

   /// in.

   ///

   /// @param size2 the size of the second sequence we are interested

   /// in.

   d_path_vec(unsigned size1, unsigned size2)

     : vector<int>(2 * (size1 + size2 + 1 + (size1 + size2)) + 1, 0),

       a_size_(size1), b_size_(size2)

   {

   }


   std::vector<int>::const_reference

   operator[](int index) const

   {return at(index);}


   std::vector<int>::reference

   operator[](int index)

   {return at(index);}


   std::vector<int>::reference

   at(long long index)

   {

     //check_index_against_bound(index);

     long long i = index + offset();

     return vector<int>::operator[](i);

   }


   std::vector<int>::const_reference

   at(long long index) const

   {

     check_index_against_bound(index);

     long long i = offset() + index;

     return static_cast<const vector<int>* >(this)->at(i);

   }


   unsigned

   a_size() const

   {return a_size_;}


   unsigned

   b_size() const

   {return b_size_;}


   unsigned

   max_d() const

   {return a_size_ + b_size_;}


   unsigned

   offset() const

   {return max_d() + abs((long long) a_size() - (long long) b_size());}

 }; // end class d_path_vec


 /// The abstration of an insertion of elements of a sequence B into a

 /// sequence A.  This is used to represent the edit script for

 /// transforming a sequence A into a sequence B.

 ///

 /// And insertion mainly encapsulates two components:

 ///

 ///   - An insertion point: this is the index (starting at 0) of the

 ///     element of the sequence A after which the insertion occurs.

 ///

 ///   - Inserted elements: this is a vector of indexes of elements of

 ///     sequence B (starting at 0) that got inserted into sequence A,

 ///     after the insertion point.

 class insertion

 {

   int           insertion_point_;

   vector<unsigned>  inserted_;


 public:


   insertion(int insertion_point,

         const vector<unsigned>& inserted_indexes)

     : insertion_point_(insertion_point),

       inserted_(inserted_indexes)

   {}


     insertion(int insertion_point = 0)

       : insertion_point_(insertion_point)

   {}


   int

   insertion_point_index() const

   {return insertion_point_;}


   void

   insertion_point_index(int i)

   {insertion_point_ = i;}


   const vector<unsigned>&

   inserted_indexes() const

   {return inserted_;}


   vector<unsigned>&

   inserted_indexes()

   {return inserted_;}

 };// end class insertion


 /// The abstraction of the deletion of one element of a sequence A.

 ///

 /// This encapsulates the index of the element A that got deleted.

 class deletion

 {

   int index_;


 public:


   deletion(int i)

     : index_(i)

   {}


   int

   index() const

   {return index_;}


   void

   index(int i)

   {index_ = i;}

 };// end class deletion


 /// The abstraction of an edit script for transforming a sequence A

 /// into a sequence B.

 ///

 /// It encapsulates the insertions and deletions for transforming A

 /// into B.

 class edit_script

 {

   vector<insertion> insertions_;

   vector<deletion>  deletions_;


 public:


   edit_script()

   {}


   const vector<insertion>&

   insertions() const

   {return insertions_;}


   vector<insertion>&

   insertions()

   {return insertions_;}


   const vector<deletion>&

   deletions() const

   {return deletions_;}


   vector<deletion>&

   deletions()

   {return deletions_;}


   void

   append(const edit_script& es)

   {

     insertions().insert(insertions().end(),

             es.insertions().begin(),

             es.insertions().end());

     deletions().insert(deletions().end(),

                es.deletions().begin(),

                es.deletions().end());

   }


   void

   prepend(const edit_script& es)

   {

     insertions().insert(insertions().begin(),

             es.insertions().begin(),

             es.insertions().end());

     deletions().insert(deletions().begin(),

                es.deletions().begin(),

                es.deletions().end());

   }


   void

   clear()

   {

     insertions().clear();

     deletions().clear();

   }


   bool

   is_empty() const

   {return insertions().empty() && deletions().empty();}


   operator bool() const

   {return !is_empty();}


   int

   num_insertions() const

   {

     int l = 0;

     for (vector<insertion>::const_iterator i = insertions().begin();

      i != insertions().end();

      ++i)

       l += i->inserted_indexes().size();

     return l;

   }


   int

   num_deletions() const

   {return deletions().size();}


   int

   length() const

   {return num_insertions() + num_deletions();}

 };//end class edit_script


 bool

 point_is_valid_in_graph(point& p,

             unsigned a_size,

             unsigned b_size);


 bool

 ends_of_furthest_d_paths_overlap(const point& forward_d_path_end,

                  const point& reverse_d_path_end);


 /// The default equality functor used by the core diffing algorithms.

 struct default_eq_functor

 {

   /// This equality operator uses the default "==" to compare its

   /// arguments.

   ///

   /// @param a the first comparison argument.

   ///

   /// @param b the second comparison argument.

   ///

   /// @return true if the two arguments are equal, false otherwise.

   template<typename T>

   bool

   operator()(const T a, const T b) const

   {return a == b;}

 };


 /// An equality functor to deeply compare pointers.

 struct deep_ptr_eq_functor

 {

   /// This equality operator compares pointers by comparing the

   /// pointed-to objects.

   ///

   /// @param first the first comparison argument.

   ///

   /// @param second the second comparison argument.

   ///

   /// @return true if the objects pointed to by the pointers are

   /// equal, false otherwise.

   template<typename T>

   bool

   operator()(const T* first,

          const T* second) const

   {

     if (!!first != !!second)

       return false;


     if (!first)

       return true;


     return *first == *second;

   }


   /// This equality operator compares pointers by comparing the

   /// pointed-to objects.

   ///

   /// @param first the first comparison argument.

   ///

   /// @param second the second comparison argument.

   ///

   /// @return true if the objects pointed to by the pointers are

   /// equal, false otherwise.

   template<typename T>

   bool

   operator()(const shared_ptr<T> first,

          const shared_ptr<T> second) const

   {return operator()(first.get(), second.get());}


   /// This equality operator compares pointers by comparing the

   /// pointed-to objects.

   ///

   /// @param first the first comparison argument.

   ///

   /// @param second the second comparison argument.

   ///

   /// @return true if the objects pointed to by the pointers are

   /// equal, false otherwise.

   template<typename T>

   bool

   operator()(const weak_ptr<T> first,

          const weak_ptr<T> second) const

   {return operator()(shared_ptr<T>(first), shared_ptr<T>(second));}

 }; // end struct deep_ptr_eq_functor


 /// Find the end of the furthest reaching d-path on diagonal k, for

 /// two sequences.  In the paper This is referred to as "the basic

 /// algorithm".

 ///

 /// Unlike in the paper, the coordinates of the edit graph start at

 /// (-1,-1), rather than (0,0), and they end at (M-1, N-1), rather

 /// than (M,N).

 ///

 /// @tparm RandomAccessOutputIterator the type of iterators passed to

 /// this function.  It must be a random access output iterator kind.

 ///

 /// @tparm EqualityFunctor this must be a class that declares a public

 /// call operator member returning a boolean and taking two arguments

 /// that must be of the same type as the one pointed to by the @ref

 /// RandomAccessOutputIterator template parameter. This functor is

 /// used to compare the elements referred to by the iterators passed in

 /// argument to this function.

 ///

 /// @param k the number of the diagonal on which we want to find the

 /// end of the furthest reaching D-path.

 ///

 /// @param d the D in D-Path.  That's the number of insertions/deletions

 /// (the number of changes, in other words) in the changeset.  That is

 /// also the number of non-diagonals in the D-Path.

 ///

 /// @param a_begin an iterator to the beginning of the first sequence

 ///

 /// @param a_end an iterator that points right after the last element

 /// of the second sequence to consider.

 ///

 /// @param b_begin an iterator to the beginning of the second sequence.

 ///

 /// @param b_end an iterator that points right after the last element

 /// of the second sequence to consider.

 ///

 /// @param v the vector of furthest end points of d_paths, at (d-1).

 /// It contains the abscissas of the furthest end points for different

 /// values of k, at (d-1).  That is, for k in [-D + 1, -D + 3, -D + 5,

 /// ..., D - 1], v[k] is the abscissa of the end of the furthest

 /// reaching (D-1)-path on diagonal k.

 ///

 /// @param snak the last snake of the furthest path found.  The end

 /// point of the snake is the end point of the furthest path.

 ///

 /// @return true if the end of the furthest reaching path that was

 /// found was inside the boundaries of the edit graph, false

 /// otherwise.

 template<typename RandomAccessOutputIterator,

      typename EqualityFunctor>

 bool

 end_of_fr_d_path_in_k(int k, int d,

               RandomAccessOutputIterator a_begin,

               RandomAccessOutputIterator a_end,

               RandomAccessOutputIterator b_start,

               RandomAccessOutputIterator b_end,

               d_path_vec& v, snake& snak)

 {

   int x = -1, y = -1;

   point begin, intermediate, diag_start, end;

   snake s;

   EqualityFunctor eq;


   // Let's pick the end point of the furthest reaching

   // (D-1)-path.  It's either v[k-1] or v[k+1]; the word

   // "furthest" means we choose the one which abscissa is the

   // greatest (that is, furthest from abscissa zero).

   if (k == -d || ((k != d) && (v[k-1] < v[k + 1])))

     // So, the abscissa of the end point of the furthest

     // reaching (D-1)-path is v[k+1].  That is a diagonal that

     // is above the current (k) diagonal, and on the right.

     // To move to the current k diagonal, one has to move

     // "down" from the diagonal k+1.  So the abscissa won't

     // change.  Only the ordinate will.  It will be given by y

     // = x - k (a bit below); as k has changed from k - 1 (it

     // has increased), y is going to be the new y that is

     // 'down' from the previous y in k - 1.

     {

       x = v[k+1];

       begin.set(x, x - (k + 1));

     }

   else

     {

       // So the abscissa of the end point of the furthest

       // (D-1)-path is v[k-1].  That is on the left of the

       // current k diagonal.  To move to the current k diagonal,

       // one has to move "right" from diagonal k - 1.  That is,

       // the y stays constant and x is incremented.

       x = v[k-1];

       begin.set(x, x - (k - 1));

       ++x;

     }


   // Now get the value of y from the equation k = x -y.

   // This is the point where we first touch K, when we move

   // from the end of the furthest reaching (D-1)-path.

   y = x - k;


   intermediate.x(x);

   intermediate.y(y);


   int last_x_index = a_end - a_begin - 1;

   int last_y_index = b_end - b_start - 1;

   // Now, follow the snake (aka, zero or more consecutive

   // diagonals).  Note that we stay on the k diagonal when we

   // do this.

   while ((x < last_x_index) && (y < last_y_index))

     if (eq(a_begin[x + 1], b_start[y + 1]))

       {

     x = x + 1;

     y = y + 1;

     if (!diag_start)

       diag_start.set(x, y);

       }

     else

       break;


   end.x(x);

   end.y(y);


   // Note the point that we store in v here might be outside the

   // bounds of the edit graph.  But we store it at this step (for a

   // given D) anyway, because out of bound or not, we need this value

   // at this step to be able to compute the value of the point on the

   // "next" diagonal for the next D.

   v[k] = x;


   if (x >= (int) v.a_size()

       || y >= (int) v.b_size()

       || x < -1 || y < -1)

     return false;


   s.set(begin, intermediate, diag_start, end);

   s.set_forward(true);

   snak = s;


   return true;

 }


 /// Find the end of the furthest reaching reverse d-path on diagonal k

 /// + delta.  Delta is abs(M - N), with M being the size of a and N

 /// being the size of b.  This is the "basic algorithm", run backward.

 /// That is, starting from the point (M,N) of the edit graph.

 ///

 /// Unlike in the paper, the coordinates of the edit graph start at

 /// (-1,-1), rather than (0,0), and they end at (M-1, N-1), rather

 /// than (M,N).

 ///

 /// @tparm RandomAccessOutputIterator the type of iterators passed to

 /// this function.  It must be a random access output iterator kind.

 ///

 /// @tparm EqualityFunctor this must be a class that declares a public

 /// call operator member returning a boolean and taking two arguments

 /// that must be of the same type as the one pointed to by the @ref

 /// RandomAccessOutputIterator template parameter. This functor is

 /// used to compare the elements referred to by the iterators passed in

 /// argument to this function.

 ///

 /// @param k the number of the diagonal on which we want to find the

 /// end of the furthest reaching reverse D-path.  Actually, we want to

 /// find the end of the furthest reaching reverse D-path on diagonal (k

 /// - delta).

 ///

 /// @param d the D in D-path.  That's the number of insertions/deletions

 /// (the number of changes, in other words) in the changeset.  That is

 /// also the number of non-diagonals in the D-Path.

 ///

 /// @param a_begin an iterator to the beginning of the first sequence

 ///

 /// @param a_end an iterator that points right after the last element

 /// of the second sequence to consider.

 ///

 /// @param b_begin an iterator to the beginning of the second sequence.

 ///

 /// @param b_end an iterator that points right after the last element

 /// of the second sequence to consider.

 ///

 /// @param v the vector of furthest end points of d_paths, at (d-1).

 /// It contains the abscissae of the furthest end points for different

 /// values of k - delta, at (d-1).  That is, for k in [-D + 1, -D + 3,

 /// -D + 5, ..., D - 1], v[k - delta] is the abscissa of the end of the

 /// furthest reaching (D-1)-path on diagonal k - delta.

 ///

 /// @param snak the last snake of the furthest path found.  The end

 /// point of the snake is the end point of the furthest path.

 ///

 /// @return true iff the end of the furthest reaching path that was

 /// found was inside the boundaries of the edit graph, false

 /// otherwise.

 template<typename RandomAccessOutputIterator,

      typename EqualityFunctor>

 bool

 end_of_frr_d_path_in_k_plus_delta (int k, int d,

                    RandomAccessOutputIterator a_begin,

                    RandomAccessOutputIterator a_end,

                    RandomAccessOutputIterator b_begin,

                    RandomAccessOutputIterator b_end,

                    d_path_vec& v, snake& snak)

 {

   int a_size = a_end - a_begin;

   int b_size = b_end - b_begin;

   int delta = a_size - b_size;

   int k_plus_delta = k + delta;

   int x = -1, y = -1;

   point begin, intermediate, diag_start, end;

   snake s;

   EqualityFunctor eq;


   // Let's pick the end point of the furthest reaching (D-1)-path and

   // move from there to reach the current k_plus_delta-line.  That end

   // point of the furthest reaching (D-1)-path is either on

   // v[k_plus_delta-1] or on v[k_plus_delta+1]; the word "furthest"

   // means we choose the one which abscissa is the lowest (that is,

   // furthest from abscissa M).

   if (k_plus_delta == -d + delta

       || ((k_plus_delta != d + delta)

       && (v[k_plus_delta + 1] <= v[k_plus_delta - 1])))

     {

       // We move left, that means ordinate won't change ...

       x = v[k_plus_delta + 1];

       y = x - (k_plus_delta + 1);

       begin.set(x, y);

       // ... and abscissa decreases.

       x = x - 1;

     }

   else

     {

       // So the furthest end point is on the k_plus_delta - 1

       // diagonal.  That is a diagonal that is 'below' the

       // k_plus_delta current diagonal.  So to join the current

       // diagonal from the k_plus_delta - 1 one, we need to move up.


       // So moving up means abscissa won't change ...

       x = v[k_plus_delta - 1];

       begin.set(x, x - (k_plus_delta - 1));

       // ... and that ordinate decreases.

       y = x - (k_plus_delta - 1) - 1;

     }


   intermediate.set(x, y);


   // Now, follow the snake.  Note that we stay on the k_plus_delta

   // diagonal when we do this.

   while (x >= 0 && y >= 0)

     if (eq(a_begin[x], b_begin[y]))

       {

     if (!diag_start)

       diag_start.set(x, y);

     x = x - 1;

     y = y - 1;

       }

     else

       break;


   end.set(x, y);


   // Note the point that we store in v here might be outside the

   // bounds of the edit graph.  But we store it at this step (for a

   // given D) anyway, because out of bound or not, we need this value

   // at this step to be able to compute the value of the point on the

   // "next" diagonal for the next D.

   v[k_plus_delta] = x;


   if (x == -1 && y == -1)

     ;

   else if (x < -1 || y < -1)

     return false;


   s.set(begin, intermediate, diag_start, end);

   s.set_forward(false);

   snak = s;


   return true;

 }


 /// Tests if a given point is a match point in an edit graph.

 ///

 /// @param a_begin the begin iterator of the first input sequence of

 /// the edit graph.

 ///

 /// @param a_end the end iterator of the first input sequence of the

 /// edit graph.  This points to one element passed the end of the

 /// sequence.

 ///

 /// @param b_begin the begin iterator of the second input sequence of

 /// the edit graph.

 ///

 /// @param b_end the end iterator of the second input sequence of the

 /// edit graph.  This points the one element passed the end of the

 /// sequence.

 ///

 /// @param point the point to test for being a match point.

 ///

 /// @return true iff \a point is a match point.

 template<typename RandomAccessOutputIterator>

 bool

 is_match_point(RandomAccessOutputIterator a_begin,

            RandomAccessOutputIterator a_end,

            RandomAccessOutputIterator b_begin,

            RandomAccessOutputIterator b_end,

            const point& point)

 {

   int a_size = a_end - a_begin, b_size = b_end - b_begin;


   if (point.x() < 0

       || point.x () >= a_size

       || point.y() < 0

       || point.y() >= b_size)

     return false;


   return (a_begin[point.x()] == b_begin[point.y()]);

 }


 /// Returns the middle snake of two sequences A and B, as well as the

 /// length of their shortest editing script.

 ///

 ///  This uses the "linear space refinement" algorithm presented in

 /// section 4b in the paper.  As the paper says, "The idea for doing

 /// so is to simultaneously run the basic algorithm in both the

 /// forward and reverse directions until furthest reaching forward and

 /// reverse paths starting at opposing corners 'overlap'."

 ///

 /// @tparm RandomAccessOutputIterator the type of iterators passed to

 /// this function.  It must be a random access output iterator kind.

 ///

 /// @tparm EqualityFunctor this must be a class that declares a public

 /// call operator member returning a boolean and taking two arguments

 /// that must be of the same type as the one pointed to by the @ref

 /// RandomAccessOutputIterator template parameter. This functor is

 /// used to compare the elements referred to by the iterators passed in

 /// argument to this function.

 ///

 /// @param a_begin an iterator pointing to the begining of sequence A.

 ///

 /// @param a_end an iterator pointing to the end of sequence A.  Note

 /// that this points right /after/ the end of vector A.

 ///

 /// @param b_begin an iterator pointing to the begining of sequence B.

 ///

 /// @param b_end an iterator pointing to the end of sequence B.  Note

 /// that this points right /after/ the end of vector B

 ///

 /// @param snak out parameter.  This is the snake current when the two

 /// paths overlapped.  This is set iff the function returns true;

 /// otherwise, this is not touched.

 ///

 /// @return true is the snake was found, false otherwise.

 template<typename RandomAccessOutputIterator,

      typename EqualityFunctor>

 bool

 compute_middle_snake(RandomAccessOutputIterator a_begin,

              RandomAccessOutputIterator a_end,

              RandomAccessOutputIterator b_begin,

              RandomAccessOutputIterator b_end,

              snake& snak, int& ses_len)

 {

   int a_size = a_end - a_begin;

   int N = a_size;

   int b_size = b_end - b_begin;

   int M = b_size;

   int delta = N - M;

   d_path_vec forward_d_paths(a_size, b_size);

   d_path_vec reverse_d_paths(a_size, b_size);

   // These points below are the top leftmost point and bottom

   // right-most points of the edit graph.

   point first_point(-1, -1), last_point(a_size -1, b_size -1), point_zero(0, 0);


   // We want the initial step (D = 0, k = 0 in the paper) to find a

   // furthest reaching point on diagonal k == 0; For that, we need the

   // value of x for k == 1; So let's set that value to -1; that is for

   // k == 1 (diagonal 1), the point in the edit graph is (-1,-2).

   // That way, to get the furthest reaching point on diagonal 0 (k ==

   // 0), we go down from (-1,-2) on diagonal 1 and we hit diagonal 0

   // on (-1,-1); that is the starting value that the algorithm expects

   // for k == 0.

   forward_d_paths[1] = -1;


   // Similarly for the reverse paths, for diagonal delta + 1 (note

   // that diagonals are centered on delta, unlike for forward paths

   // where they are centered on zero), we set the initial point to

   // (a_size, b_size - 1).  That way, at step D == 0 and k == delta,

   // to reach diagonal delta from the point (a_size, b_size - 1) on

   // diagonal delta + 1, we just have to move left, and we hit

   // diagonal delta on (a_size - 1, b_size -1); that is the starting

   // point value the algorithm expects for k == 0 in the reverse case.

   reverse_d_paths[delta + 1] = a_size;


   int d_max = (M + N) / 2 + 1;

   for (int d = 0; d <= d_max; ++d)

     {

       // First build forward paths.

       for (int k = -d; k <= d; k += 2)

     {

       snake s;

       bool found =

         end_of_fr_d_path_in_k<RandomAccessOutputIterator,

                   EqualityFunctor>(k, d,

                            a_begin, a_end,

                            b_begin, b_end,

                            forward_d_paths, s);

       if (!found)

         continue;


       // As the paper says in 4b while explaining the middle snake

       // algorithm:

       //

       // "Thus when delta is odd, check for overlap only while

       //  extending forward paths ..."

       if ((delta % 2)

           && (k >= (delta - (d - 1))) && (k <= (delta + (d - 1))))

         {

           point reverse_end;

           reverse_end.x(reverse_d_paths[k]);

           reverse_end.y(reverse_end.x() - k);

           if (ends_of_furthest_d_paths_overlap(s.end(), reverse_end))

         {

           ses_len = 2 * d - 1;

           snak = s;

           return true;

         }

         }

     }


       // Now build reverse paths.

       for (int k = -d; k <= d; k += 2)

     {

       snake s;

       bool found =

         end_of_frr_d_path_in_k_plus_delta<RandomAccessOutputIterator,

                           EqualityFunctor>(k, d,

                                    a_begin, a_end,

                                    b_begin, b_end,

                                    reverse_d_paths,

                                    s);


       if (!found)

         continue;


       // And the paper continues by saying:

       //

       // "... and when delta is even, check for overlap only while

       // extending reverse paths."

       int k_plus_delta = k + delta;

       if (!(delta % 2)

           && (k_plus_delta >= -d) && (k_plus_delta <= d))

         {

           point forward_end;

           forward_end.x(forward_d_paths[k_plus_delta]);

           forward_end.y(forward_end.x() - k_plus_delta);

           if (ends_of_furthest_d_paths_overlap(forward_end, s.end()))

         {

             ses_len = 2 * d;

             snak = s;

             return true;

         }

         }

     }

     }

   return false;

 }


 bool

 compute_middle_snake(const char* str1, const char* str2,

              snake& s, int& ses_len);


 /// This prints the middle snake of two strings.

 ///

 /// @param a_begin the beginning of the first string.

 ///

 /// @param b_begin the beginning of the second string.

 ///

 /// @param s the snake to print.

 ///

 /// @param out the output stream to print the snake to.

 template<typename RandomAccessOutputIterator>

 void

 print_snake(RandomAccessOutputIterator a_begin,

         RandomAccessOutputIterator b_begin,

         const snake &s, ostream& out)

 {

   if (s.is_empty())

     return;


    out << "snake start: ";

    out << "(" << s.begin().x() << ", " << s.end().y() << ")\n";


    out << "snake intermediate: ";

    out << "(" << s.intermediate().x() << ", " << s.intermediate().y() << ")\n";


    out << "diagonal point(s): ";

    if (s.has_diagonal_edge())

      for (int x = s.intermediate().x(), y = s.intermediate().y();

       x <= s.end().x() && y <= s.end().y();

       ++x, ++y)

        {

      ABG_ASSERT(a_begin[x] == b_begin[y]);

      out << "(" << x << "," << y << ") ";

        }

    out << "\n";


    out << "snake end: ";

    out << "(" << s.end().x() << ", " << s.end().y() << ")\n";

 }


 /// Compute the length of the shortest edit script for two sequences a

 /// and b.  This is done using the "Greedy LCS/SES" of figure 2 in the

 /// paper.  It can walk the edit graph either foward (when reverse is

 /// false) or backward starting from the end (when reverse is true).

 ///

 /// Here, note that the real content of a and b should start at index

 /// 1, for this implementatikon algorithm to match the paper's

 /// algorithm in a straightforward manner.  So pleast make sure that

 /// at index 0, we just get some non-used value.

 ///

 /// @tparm RandomAccessOutputIterator the type of iterators passed to

 /// this function.  It must be a random access output iterator kind.

 ///

 /// @tparm EqualityFunctor this must be a class that declares a public

 /// call operator member returning a boolean and taking two arguments

 /// that must be of the same type as the one pointed to by the @ref

 /// RandomAccessOutputIterator template parameter. This functor is

 /// used to compare the elements referred to by the iterators passed in

 /// argument to this function.

 ///

 /// @param a the first sequence we care about.

 ///

 /// @param b the second sequence we care about.

 ///

 /// @param v the vector that contains the end points of the furthest

 /// reaching d-path and (d-1)-path.

 template<typename RandomAccessOutputIterator,

      typename EqualityFunctor>

 int

 ses_len(RandomAccessOutputIterator a_begin,

     RandomAccessOutputIterator a_end,

     RandomAccessOutputIterator b_begin,

     RandomAccessOutputIterator b_end,

     d_path_vec& v, bool reverse)

 {

   unsigned a_size = a_end - a_begin;

   unsigned b_size = b_end - b_begin;

   snake snak;


   ABG_ASSERT(v.max_d() == a_size + b_size);


   int delta = a_size - b_size;


   if (reverse)

     // Set a fictitious (M, N-1) into v[1], to find the furthest

     // reaching reverse 0-path (i.e, when we are at d == 0 and k == 0).

     v[delta + 1] = a_size - 1;

   else

     // Set a fictitious (-1,-2) point into v[1], to find the furthest

     // reaching forward 0-path (i.e, when we are at d == 0 and k == 0).

     v[1] = -1;


   for (unsigned d = 0; d <= v.max_d(); ++d)

     {

       for (int k = -d; k <= (int) d; k += 2)

     {

       point end;

       if (reverse)

         {

           bool found =

         end_of_frr_d_path_in_k_plus_delta<RandomAccessOutputIterator,

                           EqualityFunctor>(k, d,

                                    a_begin, a_end,

                                    b_begin, b_end,

                                    v, snak);

           // If we reached the upper left corner of the edit graph then

           // we are done.

           if (found && snak.end().x() == -1 && snak.end().y() == -1)

         return d;

         }

       else

         {

           end_of_fr_d_path_in_k<RandomAccessOutputIterator,

                     EqualityFunctor>(k, d,

                              a_begin, a_end,

                              b_begin, b_end,

                              v, snak);

           // If we reached the lower right corner of the edit

           // graph then we are done.

           if ((snak.end().x() == (int) a_size - 1)

           && (snak.end().y() == (int) b_size - 1))

         return d;

         }

     }

     }

   return 0;

 }


 /// Compute the length of the shortest edit script for two sequences a

 /// and b.  This is done using the "Greedy LCS/SES" of figure 2 in the

 /// paper.  It can walk the edit graph either foward (when reverse is

 /// false) or backward starting from the end (when reverse is true).

 ///

 /// Here, note that the real content of a and b should start at index

 /// 1, for this implementatikon algorithm to match the paper's

 /// algorithm in a straightforward manner.  So pleast make sure that

 /// at index 0, we just get some non-used value.

 ///

 /// Note that the equality operator used to compare the elements

 /// passed in argument to this function is the default "==" operator.

 ///

 /// @tparm RandomAccessOutputIterator the type of iterators passed to

 /// this function.  It must be a random access output iterator kind.

 ///

 /// @param a the first sequence we care about.

 ///

 /// @param b the second sequence we care about.

 ///

 /// @param v the vector that contains the end points of the furthest

 /// reaching d-path and (d-1)-path.

 template<typename RandomAccessOutputIterator>

 int

 ses_len(RandomAccessOutputIterator a_begin,

     RandomAccessOutputIterator a_end,

     RandomAccessOutputIterator b_begin,

     RandomAccessOutputIterator b_end,

     d_path_vec& v, bool reverse)

 {

   return ses_len<RandomAccessOutputIterator, default_eq_functor>(a_begin, a_end,

                                  b_begin, b_end,

                                  v, reverse);

 }


 int

 ses_len(const char* str1,

     const char* str2,

     bool reverse = false);


 bool

 snake_end_points(const snake& s, point&, point&);


 /// Compute the longest common subsequence of two (sub-regions of)

 /// sequences as well as the shortest edit script from transforming

 /// the first (sub-region of) sequence into the second (sub-region of)

 /// sequence.

 ///

 /// A sequence is determined by a base, a beginning offset and an end

 /// offset.  The base always points to the container that contains the

 /// sequence to consider.  The beginning offset is an iterator that

 /// points the beginning of the sub-region of the sequence that we

 /// actually want to consider.  The end offset is an iterator that

 /// points to the end of the sub-region of the sequence that we

 /// actually want to consider.

 ///

 /// This uses the LCS algorithm of the paper at section 4b.

 ///

 /// @tparm RandomAccessOutputIterator the type of iterators passed to

 /// this function.  It must be a random access output iterator kind.

 ///

 /// @tparm EqualityFunctor this must be a class that declares a public

 /// call operator member returning a boolean and taking two arguments

 /// that must be of the same type as the one pointed to by the @ref

 /// RandomAccessOutputIterator template parameter. This functor is

 /// used to compare the elements referred to by the iterators passed in

 /// argument to this function.

 ///

 /// @param a_base the iterator to the base of the first sequence.

 ///

 /// @param a_start an iterator to the beginning of the sub-region

 /// of the first sequence to actually consider.

 ///

 /// @param a_end an iterator to the end of the sub-region of the first

 /// sequence to consider.

 ///

 ///@param b_base an iterator to the base of the second sequence to

 ///consider.

 ///

 /// @param b_start an iterator to the beginning of the sub-region

 /// of the second sequence to actually consider.

 ///

 /// @param b_end an iterator to the end of the sub-region of the

 /// second sequence to actually consider.

 ///

 /// @param lcs the resulting lcs.  This is set iff the function

 /// returns true.

 ///

 /// @param ses the resulting shortest editing script.

 ///

 /// @param ses_len the length of the ses above.  Normally this can be

 /// retrieved from ses.length(), but this parameter is here for sanity

 /// check purposes.  The function computes the length of the ses in

 /// two redundant ways and ensures that both methods lead to the same

 /// result.

 ///

 /// @return true upon successful completion, false otherwise.

 template<typename RandomAccessOutputIterator,

      typename EqualityFunctor>

 void

 compute_diff(RandomAccessOutputIterator a_base,

          RandomAccessOutputIterator a_begin,

          RandomAccessOutputIterator a_end,

          RandomAccessOutputIterator b_base,

          RandomAccessOutputIterator b_begin,

          RandomAccessOutputIterator b_end,

          vector<point>& lcs,

          edit_script& ses,

          int& ses_len)

 {

   int a_size = a_end - a_begin;

   int b_size = b_end - b_begin;

   unsigned a_offset = a_begin - a_base, b_offset = b_begin - b_base;


   if (a_size == 0 || b_size == 0)

     {

       if (a_size > 0 && b_size == 0)

     // All elements of the first sequences have been deleted.  So add

     // the relevant deletions to the edit script.

     for (RandomAccessOutputIterator i = a_begin; i < a_end; ++i)

       ses.deletions().push_back(deletion(i - a_base));


       if (b_size > 0 && a_size == 0)

     {

       // All elements present in the second sequence are part of

       // an insertion into the first sequence at a_end.  So add

       // that insertion to the edit script.

       int a_full_size = a_end - a_base;

       int insertion_index = a_full_size ? a_full_size - 1 : -1;

       insertion ins(insertion_index);

       for (RandomAccessOutputIterator i = b_begin; i < b_end; ++i)

         ins.inserted_indexes().push_back(i - b_base);


       ses.insertions().push_back(ins);

     }


       ses_len =  a_size + b_size;

       return;

     }


   int d = 0;

   snake snak;

   vector<point> trace; // the trace of the edit graph.  Read the paper

                // to understand what a trace is.

   bool has_snake =

     compute_middle_snake<RandomAccessOutputIterator,

              EqualityFunctor>(a_begin, a_end,

                       b_begin, b_end,

                       snak, d);

   if (has_snake)

     {

       // So middle_{begin,end} are expressed wrt a_begin and b_begin.

       // Let's express them wrt a_base and b_base.

       snak.add(a_offset, b_offset);

       ses_len = d;

     }


   if (has_snake)

     {

       if ( snak.has_diagonal_edge())

     for (int x = snak.diagonal_start().x(), y = snak.diagonal_start().y();

          x <= snak.end().x() && y <= snak.end().y();

          ++x, ++y)

       {

         point p(x, y);

         trace.push_back(p);

       }

     }

   else

     {

       // So there is no middle snake.  That means there is no lcs, so

       // the two sequences are different.


       // In other words, all the elements of the first sequence have

       // been deleted ...

       for (RandomAccessOutputIterator i = a_begin; i < a_end; ++i)

     ses.deletions().push_back(deletion(i - a_base));


       // ... and all the elements of the second sequence are insertions

       // that happen at the beginning of the first sequence.

       insertion ins(a_begin - a_base);

       for (RandomAccessOutputIterator i = b_begin; i < b_end; ++i)

     ins.inserted_indexes().push_back(i - b_base);

       ses.insertions().push_back(ins);


       ses_len = a_size + b_size;

       ABG_ASSERT(ses_len == ses.length());

       return;

     }


   if (d > 1)

     {

       int tmp_ses_len0 = 0;

       edit_script tmp_ses0;

       point px, pu;

       snake_end_points(snak, px, pu);

       compute_diff<RandomAccessOutputIterator,

            EqualityFunctor>(a_base, a_begin, a_base + (px.x() + 1),

                     b_base, b_begin, b_base + (px.y() + 1),

                     lcs, tmp_ses0, tmp_ses_len0);


       lcs.insert(lcs.end(), trace.begin(), trace.end());


       int tmp_ses_len1 = 0;

       edit_script tmp_ses1;

       compute_diff<RandomAccessOutputIterator,

            EqualityFunctor>(a_base, a_base + (pu.x() + 1), a_end,

                     b_base, b_base + (pu.y() + 1), b_end,

                     lcs, tmp_ses1, tmp_ses_len1);

       ABG_ASSERT(tmp_ses0.length() + tmp_ses1.length() == d);

       ABG_ASSERT(tmp_ses_len0 + tmp_ses_len1 == d);

       ses.append(tmp_ses0);

       ses.append(tmp_ses1);

     }

   else if (d == 1)

     {

       if (snak.has_diagonal_edge())

     {

       for (int x = snak.diagonal_start().x(), y = snak.diagonal_start().y();

            x <= snak.end().x() && y <= snak.end().y();

            ++x, ++y)

         {

           point p(x, y);

           trace.push_back(p);

         }

     }


       if (snak.has_vertical_edge())

     {

       point p = snak.intermediate();

       insertion ins(p.x());

       ins.inserted_indexes().push_back(p.y());

       ses.insertions().push_back(ins);

     }

       else if (snak.has_horizontal_edge())

     {

       if (snak.is_forward())

         {

           deletion del(snak.intermediate().x());

           ses.deletions().push_back(del);

         }

       else

         {

           deletion del(snak.begin().x());

           ses.deletions().push_back(del);

         }

     }

     }

   else if (d == 0)

     {

       // Obviously on the middle snake is part of the solution, as

       // there is no edit script; iow, the two sequences are

       // identical.

       lcs.insert(lcs.end(), trace.begin(), trace.end());

       ses_len = 0;

     }


   ABG_ASSERT(ses_len == ses.length());

 }


 /// Compute the longest common subsequence of two (sub-regions of)

 /// sequences as well as the shortest edit script from transforming

 /// the first (sub-region of) sequence into the second (sub-region of)

 /// sequence.

 ///

 /// This uses the LCS algorithm of the paper at section 4b.

 ///

 /// @tparm RandomAccessOutputIterator the type of iterators passed to

 /// this function.  It must be a random access output iterator kind.

 ///

 /// @tparm EqualityFunctor this must be a class that declares a public

 /// call operator member returning a boolean and taking two arguments

 /// that must be of the same type as the one pointed to by the @ref

 /// RandomAccessOutputIterator template parameter. This functor is

 /// used to compare the elements referred to by the iterators passed in

 /// argument to this function.

 ///

 /// @param a_start an iterator to the beginning of the first sequence

 /// to consider.

 ///

 /// @param a_end an iterator to the end of the first sequence to

 /// consider.

 ///

 /// @param b_start an iterator to the beginning of the second sequence

 /// to consider.

 ///

 /// @param b_end an iterator to the end of the second sequence to

 /// consider.

 ///

 /// @param lcs the resulting lcs.  This is set iff the function

 /// returns true.

 ///

 /// @param ses the resulting shortest editing script.

 ///

 /// @param ses_len the length of the ses above.  Normally this can be

 /// retrieved from ses.length(), but this parameter is here for sanity

 /// check purposes.  The function computes the length of the ses in

 /// two redundant ways and ensures that both methods lead to the same

 /// result.

 ///

 /// @return true upon successful completion, false otherwise.

 template<typename RandomAccessOutputIterator,

      typename EqualityFunctor>

 void

 compute_diff(RandomAccessOutputIterator a_begin,

          RandomAccessOutputIterator a_end,

          RandomAccessOutputIterator b_begin,

          RandomAccessOutputIterator b_end,

          vector<point>& lcs,

          edit_script& ses,

          int& ses_len)

 {

   compute_diff<RandomAccessOutputIterator,

            EqualityFunctor>(a_begin, a_begin, a_end,

                 b_begin, b_begin, b_end,

                 lcs, ses, ses_len);

 }


 /// Compute the longest common subsequence of two (sub-regions of)

 /// sequences as well as the shortest edit script from transforming

 /// the first (sub-region of) sequence into the second (sub-region of)

 /// sequence.

 ///

 /// A sequence is determined by a base, a beginning offset and an end

 /// offset.  The base always points to the container that contains the

 /// sequence to consider.  The beginning offset is an iterator that

 /// points the beginning of the sub-region of the sequence that we

 /// actually want to consider.  The end offset is an iterator that

 /// points to the end of the sub-region of the sequence that we

 /// actually want to consider.

 ///

 /// This uses the LCS algorithm of the paper at section 4b.

 ///

 /// @tparm RandomAccessOutputIterator the type of iterators passed to

 /// this function.  It must be a random access output iterator kind.

 ///

 /// @tparm EqualityFunctor this must be a class that declares a public

 /// call operator member returning a boolean and taking two arguments

 /// that must be of the same type as the one pointed to by the @ref

 /// RandomAccessOutputIterator template parameter. This functor is

 /// used to compare the elements referred to by the iterators passed in

 /// argument to this function.

 ///

 /// @param a_base the iterator to the base of the first sequence.

 ///

 /// @param a_start an iterator to the beginning of the sub-region

 /// of the first sequence to actually consider.

 ///

 /// @param a_end an iterator to the end of the sub-region of the first

 /// sequence to consider.

 ///

 ///@param b_base an iterator to the base of the second sequence to

 ///consider.

 ///

 /// @param b_start an iterator to the beginning of the sub-region

 /// of the second sequence to actually consider.

 ///

 /// @param b_end an iterator to the end of the sub-region of the

 /// second sequence to actually consider.

 ///

 /// @param lcs the resulting lcs.  This is set iff the function

 /// returns true.

 ///

 /// @param ses the resulting shortest editing script.

 ///

 /// @return true upon successful completion, false otherwise.

 template<typename RandomAccessOutputIterator,

      typename EqualityFunctor>

 void

 compute_diff(RandomAccessOutputIterator a_base,

          RandomAccessOutputIterator a_begin,

          RandomAccessOutputIterator a_end,

          RandomAccessOutputIterator b_base,

          RandomAccessOutputIterator b_begin,

          RandomAccessOutputIterator b_end,

          vector<point>& lcs,

          edit_script& ses)

 {

   int ses_len = 0;


   compute_diff<RandomAccessOutputIterator,

            EqualityFunctor>(a_base, a_begin, a_end,

                 b_base, b_begin, b_end,

                 lcs, ses, ses_len);

 }


 /// Compute the longest common subsequence of two (sub-regions of)

 /// sequences as well as the shortest edit script from transforming

 /// the first (sub-region of) sequence into the second (sub-region of)

 /// sequence.

 ///

 /// This uses the LCS algorithm of the paper at section 4b.

 ///

 /// @tparm RandomAccessOutputIterator the type of iterators passed to

 /// this function.  It must be a random access output iterator kind.

 ///

 /// @tparm EqualityFunctor this must be a class that declares a public

 /// call operator member returning a boolean and taking two arguments

 /// that must be of the same type as the one pointed to by the @ref

 /// RandomAccessOutputIterator template parameter. This functor is

 /// used to compare the elements referred to by the iterators passed in

 /// argument to this function.

 ///

 /// @param a_start an iterator to the beginning of the first sequence

 /// to consider.

 ///

 /// @param a_end an iterator to the end of the first sequence to

 /// consider.

 ///

 /// @param b_start an iterator to the beginning of the sequence to

 /// actually consider.

 ///

 /// @param b_end an iterator to the end of second sequence to

 /// consider.

 ///

 /// @param lcs the resulting lcs.  This is set iff the function

 /// returns true.

 ///

 /// @param ses the resulting shortest editing script.

 ///

 /// @return true upon successful completion, false otherwise.

 template<typename RandomAccessOutputIterator,

      typename EqualityFunctor>

 void

 compute_diff(RandomAccessOutputIterator a_begin,

          RandomAccessOutputIterator a_end,

          RandomAccessOutputIterator b_begin,

          RandomAccessOutputIterator b_end,

          vector<point>& lcs,

          edit_script& ses)

 {

   compute_diff<RandomAccessOutputIterator,

            EqualityFunctor>(a_begin, a_begin, a_end,

                 b_begin, b_begin, b_end,

                 lcs, ses);

 }


 /// Compute the longest common subsequence of two (sub-regions of)

 /// sequences as well as the shortest edit script from transforming

 /// the first (sub-region of) sequence into the second (sub-region of)

 /// sequence.

 ///

 /// This uses the LCS algorithm of the paper at section 4b.

 ///

 /// @tparm RandomAccessOutputIterator the type of iterators passed to

 /// this function.  It must be a random access output iterator kind.

 ///

 /// @param a_start an iterator to the beginning of the first sequence

 /// to consider.

 ///

 /// @param a_end an iterator to the end of the first sequence to

 /// consider.

 ///

 /// @param b_start an iterator to the beginning of the sequence to

 /// actually consider.

 ///

 /// @param b_end an iterator to the end of second sequence to

 /// consider.

 ///

 /// @param lcs the resulting lcs.  This is set iff the function

 /// returns true.

 ///

 /// @param ses the resulting shortest editing script.

 ///

 /// @return true upon successful completion, false otherwise.

 template<typename RandomAccessOutputIterator>

 void

 compute_diff(RandomAccessOutputIterator a_begin,

          RandomAccessOutputIterator a_end,

          RandomAccessOutputIterator b_begin,

          RandomAccessOutputIterator b_end,

          vector<point>& lcs,

          edit_script& ses)

 {

   compute_diff<RandomAccessOutputIterator,

            default_eq_functor>(a_begin, a_end, b_begin, b_end, lcs, ses);

 }


 /// Compute the longest common subsequence of two (sub-regions of)

 /// sequences as well as the shortest edit script from transforming

 /// the first (sub-region of) sequence into the second (sub-region of)

 /// sequence.

 ///

 /// A sequence is determined by a base, a beginning offset and an end

 /// offset.  The base always points to the container that contains the

 /// sequence to consider.  The beginning offset is an iterator that

 /// points the beginning of the sub-region of the sequence that we

 /// actually want to consider.  The end offset is an iterator that

 /// points to the end of the sub-region of the sequence that we

 /// actually want to consider.

 ///

 /// This uses the LCS algorithm of the paper at section 4b.

 ///

 /// @tparm RandomAccessOutputIterator the type of iterators passed to

 /// this function.  It must be a random access output iterator kind.

 ///

 /// @tparm EqualityFunctor this must be a class that declares a public

 /// call operator member returning a boolean and taking two arguments

 /// that must be of the same type as the one pointed to by the @ref

 /// RandomAccessOutputIterator template parameter. This functor is

 /// used to compare the elements referred to by the iterators passed in

 /// argument to this function.

 ///

 /// @param a_base the iterator to the base of the first sequence.

 ///

 /// @param a_start an iterator to the beginning of the sub-region

 /// of the first sequence to actually consider.

 ///

 /// @param a_end an iterator to the end of the sub-region of the first

 /// sequence to consider.

 ///

 /// @param b_base an iterator to the base of the second sequence to

 /// consider.

 ///

 /// @param b_start an iterator to the beginning of the sub-region

 /// of the second sequence to actually consider.

 ///

 /// @param b_end an iterator to the end of the sub-region of the

 /// second sequence to actually consider.

 ///

 /// @param ses the resulting shortest editing script.

 ///

 /// @return true upon successful completion, false otherwise.

 template<typename RandomAccessOutputIterator,

      typename EqualityFunctor>

 void

 compute_diff(RandomAccessOutputIterator a_base,

          RandomAccessOutputIterator a_begin,

          RandomAccessOutputIterator a_end,

          RandomAccessOutputIterator b_base,

          RandomAccessOutputIterator b_begin,

          RandomAccessOutputIterator b_end,

          edit_script& ses)

 {

   vector<point> lcs;


   compute_diff<RandomAccessOutputIterator,

            EqualityFunctor>(a_base, a_begin, a_end,

                 b_base, b_begin, b_end,

                 lcs, ses);

 }


 /// Compute the longest common subsequence of two (sub-regions of)

 /// sequences as well as the shortest edit script from transforming

 /// the first (sub-region of) sequence into the second (sub-region of)

 /// sequence.

 ///

 /// This uses the LCS algorithm of the paper at section 4b.

 ///

 /// @tparm RandomAccessOutputIterator the type of iterators passed to

 /// this function.  It must be a random access output iterator kind.

 ///

 /// @tparm EqualityFunctor this must be a class that declares a public

 /// call operator member returning a boolean and taking two arguments

 /// that must be of the same type as the one pointed to by the @ref

 /// RandomAccessOutputIterator template parameter. This functor is

 /// used to compare the elements referred to by the iterators passed in

 /// argument to this function.

 ///

 /// @param a_start an iterator to the beginning of the first sequence

 /// to consider.

 ///

 /// @param a_end an iterator to the end of the first sequence to

 /// consider.

 ///

 /// @param b_start an iterator to the beginning of the second sequence

 /// to consider.

 ///

 /// @param b_end an iterator to the end of the second sequence to

 /// consider.

 ///

 /// @param ses the resulting shortest editing script.

 ///

 /// @return true upon successful completion, false otherwise.

 template<typename RandomAccessOutputIterator,

      typename EqualityFunctor>

 void

 compute_diff(RandomAccessOutputIterator a_begin,

          RandomAccessOutputIterator a_end,

          RandomAccessOutputIterator b_begin,

          RandomAccessOutputIterator b_end,

          edit_script& ses)

 {

   compute_diff<RandomAccessOutputIterator,

            EqualityFunctor>(a_begin, a_begin, a_end,

                 b_begin, b_begin, b_end,

                 ses);

 }


 /// Compute the longest common subsequence of two (sub-regions of)

 /// sequences as well as the shortest edit script from transforming

 /// the first (sub-region of) sequence into the second (sub-region of)

 /// sequence.

 ///

 /// This uses the LCS algorithm of the paper at section 4b.

 ///

 /// @tparm RandomAccessOutputIterator the type of iterators passed to

 /// this function.  It must be a random access output iterator kind.

 ///

 /// @param a_start an iterator to the beginning of the first sequence

 /// to consider.

 ///

 /// @param a_end an iterator to the end of the first sequence to

 /// consider.

 ///

 /// @param b_start an iterator to the beginning of the second sequence

 /// to consider.

 ///

 /// @param b_end an iterator to the end of the second sequence to

 /// consider.

 ///

 /// @param ses the resulting shortest editing script.

 ///

 /// @return true upon successful completion, false otherwise.

 template<typename RandomAccessOutputIterator>

 void

 compute_diff(RandomAccessOutputIterator a_begin,

          RandomAccessOutputIterator a_end,

          RandomAccessOutputIterator b_begin,

          RandomAccessOutputIterator b_end,

          edit_script& ses)

 {

   compute_diff<RandomAccessOutputIterator, default_eq_functor>(a_begin, a_end,

                                    b_begin, b_end,

                                    ses);

 }


 void

 compute_lcs(const char* str1, const char* str2, int &ses_len, string& lcs);


 void

 compute_ses(const char* str1, const char* str2, edit_script& ses);


 /// Display an edit script on standard output.

 ///

 /// @param es the edit script to display

 ///

 /// @param str1_base the first string the edit script is about.

 ///

 /// @pram str2_base the second string the edit script is about.

 template<typename RandomAccessOutputIterator>

 void

 display_edit_script(const edit_script& es,

             const RandomAccessOutputIterator str1_base,

             const RandomAccessOutputIterator str2_base,

             ostream& out)

 {

   if (es.num_deletions() == 0)

     out << "no deletion:\n";

   else if (es.num_deletions() == 1)

     {

       out << "1 deletion:\n"

       << "\t happened at index: ";

     }

   else

     {

       out << es.num_deletions() << " deletions:\n"

        << "\t happened at indexes: ";

     }


   for (vector<deletion>::const_iterator i = es.deletions().begin();

        i != es.deletions().end();

        ++i)

     {

       if (i != es.deletions().begin())

     out << ", ";

       out << i->index() << " (" << str1_base[i->index()] << ")";

     }

   out << "\n\n";


   if (es.num_insertions() == 0)

     out << "no insertion\n";

   else if (es.num_insertions() == 1)

     out << "1 insertion\n";

   else

       out << es.num_insertions() << " insertions:\n";

   for (vector<insertion>::const_iterator i = es.insertions().begin();

        i != es.insertions().end();

        ++i)

     {

       int idx = i->insertion_point_index();

       if (idx < 0)

     out << "\t before index of first sequence: " << idx + 1

         << " (" << str1_base[idx + 1] << ")\n";

       else

     out << "\t after index of first sequence: " << idx

         << " (" << str1_base[idx] << ")\n";


       if (!i->inserted_indexes().empty())

     out << "\t\t inserted indexes from second sequence: ";


       for (vector<unsigned>::const_iterator j = i->inserted_indexes().begin();

        j != i->inserted_indexes().end();

        ++j)

     {

       if (j != i->inserted_indexes().begin())

         out << ", ";

       out << *j << " (" << str2_base[*j] << ")";

     }

       out << "\n";

     }

   out << "\n\n";

 }


 }//end namespace diff_utils


 }//end namespace abigail

 #endif // __ABG_DIFF_UTILS_H__

abg-fwd.h

ABG_ASSERT
#define ABG_ASSERT(cond)
This is a wrapper around the 'assert' glibc call. It allows for its argument to have side effects,...
Definition: abg-fwd.h:1714

abigail::diff_utils::d_path_vec
The array containing the furthest D-path end-points, for each value of K. MAX_D is the maximum value ...
Definition: abg-diff-utils.h:455

abigail::diff_utils::d_path_vec::d_path_vec
d_path_vec(unsigned size1, unsigned size2)
Constructor of the d_path_vec.
Definition: abg-diff-utils.h:505

abigail::diff_utils::deletion
The abstraction of the deletion of one element of a sequence A.
Definition: abg-diff-utils.h:602

abigail::diff_utils::edit_script
The abstraction of an edit script for transforming a sequence A into a sequence B.
Definition: abg-diff-utils.h:626

abigail::diff_utils::insertion
The abstration of an insertion of elements of a sequence B into a sequence A. This is used to represe...
Definition: abg-diff-utils.h:565

abigail::diff_utils::point
A class representing a vertex in an edit graph, as explained in the paper. A vertex is a basically a ...
Definition: abg-diff-utils.h:58

abigail::diff_utils::snake
The abstraction of the Snake concept, from the paper.
Definition: abg-diff-utils.h:244

abigail::diff_utils::snake::snake
snake()
Default constructor for snake.
Definition: abg-diff-utils.h:251

abigail::diff_utils::snake::has_vertical_edge
bool has_vertical_edge() const
Definition: abg-diff-utils.h:441

abigail::diff_utils::snake::intermediate
void intermediate(const point &p)
Setter for the end point of the non-diagonal edge of the snake, aka intermediate point.
Definition: abg-diff-utils.h:322

abigail::diff_utils::snake::is_empty
bool is_empty() const
Definition: abg-diff-utils.h:446

abigail::diff_utils::snake::diagonal_start
const point & diagonal_start() const
Getter for the end point of the first diagonal edge, aka diagonal start point. Note that if the snake...
Definition: abg-diff-utils.h:331

abigail::diff_utils::snake::end
void end(const point &p)
Setter for the end point of the last diagonal edge, aka snake end point. Note that if the snake has n...
Definition: abg-diff-utils.h:355

abigail::diff_utils::snake::begin
void begin(const point &p)
Getter for the starting point of the non-diagonal edge of the snake, aka begin point.
Definition: abg-diff-utils.h:307

abigail::diff_utils::snake::end
const point & end() const
Getter for the end point of the last diagonal edge, aka snake end point. Note that if the snake has n...
Definition: abg-diff-utils.h:348

abigail::diff_utils::snake::snake
snake(const point &b, const point &i, const point &e)
Constructor from the beginning, intermediate and end points.
Definition: abg-diff-utils.h:265

abigail::diff_utils::snake::diagonal_start
void diagonal_start(const point &p)
Setter for the end point of the first diagonal edge, aka diagonal start point.
Definition: abg-diff-utils.h:339

abigail::diff_utils::snake::set_forward
void set_forward(bool f)
Set to true if the snake is a forward snake; that is, if it was built while walking the edit graph go...
Definition: abg-diff-utils.h:405

abigail::diff_utils::snake::intermediate
const point & intermediate() const
Getter for the end point of the non-diagonal edge of the snake.
Definition: abg-diff-utils.h:314

abigail::diff_utils::snake::add
void add(int x_offset, int y_offset)
Add an offset to the abscissas of the points of the snake, and add another offset to the ordinates of...
Definition: abg-diff-utils.h:417

abigail::diff_utils::snake::begin
const point & begin() const
Getter for the starting point of the non-diagonal edge of the snake.
Definition: abg-diff-utils.h:299

abigail::diff_utils::snake::set
void set(const point &b, const point &i, const point &d, const point &e)
Setter for the begin, intermediate, diagonal start and end points of the snake.
Definition: abg-diff-utils.h:384

abigail::diff_utils::snake::is_forward
bool is_forward() const
Definition: abg-diff-utils.h:396

abigail::diff_utils::snake::has_diagonal_edge
bool has_diagonal_edge() const
Definition: abg-diff-utils.h:431

abigail::diff_utils::snake::set
void set(const point &b, const point &i, const point &e)
Setter for the begin, intermediate and end points of the snake.
Definition: abg-diff-utils.h:366

abigail::diff_utils::snake::snake
snake(const point &b, const point &i, const point &d, const point &e)
Constructor from the beginning, intermediate and end points.
Definition: abg-diff-utils.h:285

abigail::diff_utils::snake::has_horizontal_edge
bool has_horizontal_edge() const
Definition: abg-diff-utils.h:436

abigail::diff_utils::compute_middle_snake
bool compute_middle_snake(const char *str1, const char *str2, snake &s, int &ses_len)
Returns the middle snake of two strings, as well as the length of their shortest editing script.
Definition: abg-diff-utils.cc:97

abigail::diff_utils::snake_end_points
bool snake_end_points(const snake &s, point &x, point &u)
Get the end points of the snake, as intended by the paper.
Definition: abg-diff-utils.cc:70

abigail::diff_utils::ends_of_furthest_d_paths_overlap
bool ends_of_furthest_d_paths_overlap(const point &forward_d_path_end, const point &reverse_d_path_end)
Definition: abg-diff-utils.cc:32

abigail::diff_utils::is_match_point
bool is_match_point(RandomAccessOutputIterator a_begin, RandomAccessOutputIterator a_end, RandomAccessOutputIterator b_begin, RandomAccessOutputIterator b_end, const point &point)
Tests if a given point is a match point in an edit graph.
Definition: abg-diff-utils.h:1086

abigail::diff_utils::display_edit_script
void display_edit_script(const edit_script &es, const RandomAccessOutputIterator str1_base, const RandomAccessOutputIterator str2_base, ostream &out)
Display an edit script on standard output.
Definition: abg-diff-utils.h:2024

abigail::diff_utils::compute_lcs
void compute_lcs(const char *str1, const char *str2, int &ses_len, string &lcs)
Compute the longest common subsequence of two strings and return the length of the shortest edit scri...
Definition: abg-diff-utils.cc:157

abigail::diff_utils::end_of_fr_d_path_in_k
bool end_of_fr_d_path_in_k(int k, int d, RandomAccessOutputIterator a_begin, RandomAccessOutputIterator a_end, RandomAccessOutputIterator b_start, RandomAccessOutputIterator b_end, d_path_vec &v, snake &snak)
Find the end of the furthest reaching d-path on diagonal k, for two sequences. In the paper This is r...
Definition: abg-diff-utils.h:841

abigail::diff_utils::end_of_frr_d_path_in_k_plus_delta
bool end_of_frr_d_path_in_k_plus_delta(int k, int d, RandomAccessOutputIterator a_begin, RandomAccessOutputIterator a_end, RandomAccessOutputIterator b_begin, RandomAccessOutputIterator b_end, d_path_vec &v, snake &snak)
Find the end of the furthest reaching reverse d-path on diagonal k + delta. Delta is abs(M - N),...
Definition: abg-diff-utils.h:982

abigail::diff_utils::compute_ses
void compute_ses(const char *str1, const char *str2, edit_script &ses)
Compute the shortest edit script for transforming a string into another.
Definition: abg-diff-utils.cc:187

abigail::diff_utils::compute_diff
void compute_diff(RandomAccessOutputIterator a_base, RandomAccessOutputIterator a_begin, RandomAccessOutputIterator a_end, RandomAccessOutputIterator b_base, RandomAccessOutputIterator b_begin, RandomAccessOutputIterator b_end, vector< point > &lcs, edit_script &ses, int &ses_len)
Compute the longest common subsequence of two (sub-regions of) sequences as well as the shortest edit...
Definition: abg-diff-utils.h:1482

abigail::diff_utils::print_snake
void print_snake(RandomAccessOutputIterator a_begin, RandomAccessOutputIterator b_begin, const snake &s, ostream &out)
This prints the middle snake of two strings.
Definition: abg-diff-utils.h:1266

abigail::diff_utils::ses_len
int ses_len(const char *str1, const char *str2, bool reverse)
Compute the length of the shortest edit script for two strings. This is done using the "Greedy LCS/SE...
Definition: abg-diff-utils.cc:128

abigail::diff_utils::point_is_valid_in_graph
bool point_is_valid_in_graph(point &p, unsigned a_size, unsigned b_size)
Definition: abg-diff-utils.cc:52

abigail
Toplevel namespace for libabigail.
Definition: abg-comp-filter.cc:25

abigail::diff_utils::deep_ptr_eq_functor
An equality functor to deeply compare pointers.
Definition: abg-diff-utils.h:736

abigail::diff_utils::deep_ptr_eq_functor::operator()
bool operator()(const shared_ptr< T > first, const shared_ptr< T > second) const
This equality operator compares pointers by comparing the pointed-to objects.
Definition: abg-diff-utils.h:771

abigail::diff_utils::deep_ptr_eq_functor::operator()
bool operator()(const T *first, const T *second) const
This equality operator compares pointers by comparing the pointed-to objects.
Definition: abg-diff-utils.h:748

abigail::diff_utils::deep_ptr_eq_functor::operator()
bool operator()(const weak_ptr< T > first, const weak_ptr< T > second) const
This equality operator compares pointers by comparing the pointed-to objects.
Definition: abg-diff-utils.h:786

abigail::diff_utils::default_eq_functor
The default equality functor used by the core diffing algorithms.
Definition: abg-diff-utils.h:718

abigail::diff_utils::default_eq_functor::operator()
bool operator()(const T a, const T b) const
This equality operator uses the default "==" to compare its arguments.
Definition: abg-diff-utils.h:729