21#ifndef __ABG_DIFF_UTILS_H__
22#define __ABG_DIFF_UTILS_H__
52using std::ostringstream;
66 : x_(-1), y_(-1),empty_(
true)
70 : x_(x), y_(y), empty_(
false)
74 : x_(p.x()), y_(p.y()), empty_(p.is_empty())
108 set(
int x,
int y,
bool empty)
117 {set (x() + ax, y() + ay);}
120 operator!=(
const point& o)
const
121 {
return (x() != o.x() || y() != o.y() || is_empty() != o.is_empty());}
124 operator==(
const point& o)
const
125 {
return !(operator!=(o));}
128 operator<(
const point& o)
const
129 {
return (x() < o.x() && y() < o.y());}
132 operator>(
const point& o)
const
133 {
return (x() > o.x() && y() > o.y());}
136 operator<=(
const point& o)
const
137 {
return (x() <= o.x() && y() <= o.y());}
140 operator>=(
const point& o)
const
141 {
return (x() >= o.x() && y() >= o.y());}
144 operator+(
int val)
const
145 {
return point(x() + val, y() + val);}
148 operator-(
int val)
const
149 {
return point(x() - val, y() - val);}
154 set(x_ + val, y_ + val);
160 {
return (*
this) += (-val);}
164 {
return (*
this) -= 1;}
168 {
return (*
this) += 1;}
194 operator=(
const point& p)
196 set(p.x(), p.y(), p.is_empty());
204 operator bool ()
const
205 {
return !is_empty();}
245 point begin_, intermediate_, diagonal_start_, end_;
268 : begin_(b), intermediate_(i),
269 end_(e), forward_(false)
289 : begin_(b), intermediate_(i),
290 diagonal_start_(d), end_(e),
315 {
return intermediate_;}
332 {
return diagonal_start_;}
340 {diagonal_start_ = p;}
417 add(
int x_offset,
int y_offset)
422 begin_.add(x_offset, y_offset);
423 intermediate_.add(x_offset, y_offset);
425 diagonal_start_.add(x_offset, y_offset);
426 end_.add(x_offset, y_offset);
463 push_back(
const vector<int>::value_type&);
469 over_bounds(
long long index)
const
470 {
return (index + offset()) >= (
long long) size();}
473 check_index_against_bound(
int index)
const
475 if (over_bounds(index))
478 o <<
"index '" << index
479 <<
"' out of range [-" << max_d() <<
", " << max_d() <<
"]";
480 throw std::out_of_range(o.str());
506 : vector<int>(2 * (size1 + size2 + 1 + (size1 + size2)) + 1, 0),
507 a_size_(size1), b_size_(size2)
511 std::vector<int>::const_reference
512 operator[](
int index)
const
515 std::vector<int>::reference
516 operator[](
int index)
519 std::vector<int>::reference
523 long long i = index + offset();
524 return vector<int>::operator[](i);
527 std::vector<int>::const_reference
528 at(
long long index)
const
530 check_index_against_bound(index);
531 long long i = offset() + index;
532 return static_cast<const vector<int>*
>(
this)->at(i);
545 {
return a_size_ + b_size_;}
549 {
return max_d() + abs((
long long) a_size() - (
long long) b_size());}
566 int insertion_point_;
567 vector<unsigned> inserted_;
572 const vector<unsigned>& inserted_indexes)
573 : insertion_point_(insertion_point),
574 inserted_(inserted_indexes)
578 : insertion_point_(insertion_point)
582 insertion_point_index()
const
583 {
return insertion_point_;}
586 insertion_point_index(
int i)
587 {insertion_point_ = i;}
589 const vector<unsigned>&
590 inserted_indexes()
const
627 vector<insertion> insertions_;
628 vector<deletion> deletions_;
635 const vector<insertion>&
637 {
return insertions_;}
641 {
return insertions_;}
643 const vector<deletion>&
654 insertions().insert(insertions().end(),
655 es.insertions().begin(),
656 es.insertions().end());
657 deletions().insert(deletions().end(),
658 es.deletions().begin(),
659 es.deletions().end());
665 insertions().insert(insertions().begin(),
666 es.insertions().begin(),
667 es.insertions().end());
668 deletions().insert(deletions().begin(),
669 es.deletions().begin(),
670 es.deletions().end());
676 insertions().clear();
682 {
return insertions().empty() && deletions().empty();}
684 operator bool()
const
685 {
return !is_empty();}
688 num_insertions()
const
691 for (vector<insertion>::const_iterator i = insertions().begin();
692 i != insertions().end();
694 l += i->inserted_indexes().size();
699 num_deletions()
const
700 {
return deletions().size();}
704 {
return num_insertions() + num_deletions();}
714 const point& reverse_d_path_end);
749 const T* second)
const
751 if (!!first != !!second)
757 return *first == *second;
772 const shared_ptr<T> second)
const
773 {
return operator()(first.get(), second.get());}
787 const weak_ptr<T> second)
const
788 {
return operator()(shared_ptr<T>(first), shared_ptr<T>(second));}
838template<
typename RandomAccessOutputIterator,
839 typename EqualityFunctor>
842 RandomAccessOutputIterator a_begin,
843 RandomAccessOutputIterator a_end,
844 RandomAccessOutputIterator b_start,
845 RandomAccessOutputIterator b_end,
849 point begin, intermediate, diag_start, end;
857 if (k == -d || ((k != d) && (v[k-1] < v[k + 1])))
869 begin.set(x, x - (k + 1));
879 begin.set(x, x - (k - 1));
891 int last_x_index = a_end - a_begin - 1;
892 int last_y_index = b_end - b_start - 1;
896 while ((x < last_x_index) && (y < last_y_index))
897 if (eq(a_begin[x + 1], b_start[y + 1]))
902 diag_start.set(x, y);
917 if (x >= (
int) v.a_size()
918 || y >= (
int) v.b_size()
922 s.
set(begin, intermediate, diag_start, end);
979template<
typename RandomAccessOutputIterator,
980 typename EqualityFunctor>
983 RandomAccessOutputIterator a_begin,
984 RandomAccessOutputIterator a_end,
985 RandomAccessOutputIterator b_begin,
986 RandomAccessOutputIterator b_end,
989 int a_size = a_end - a_begin;
990 int b_size = b_end - b_begin;
991 int delta = a_size - b_size;
992 int k_plus_delta = k + delta;
994 point begin, intermediate, diag_start, end;
1004 if (k_plus_delta == -d + delta
1005 || ((k_plus_delta != d + delta)
1006 && (v[k_plus_delta + 1] <= v[k_plus_delta - 1])))
1009 x = v[k_plus_delta + 1];
1010 y = x - (k_plus_delta + 1);
1023 x = v[k_plus_delta - 1];
1024 begin.set(x, x - (k_plus_delta - 1));
1026 y = x - (k_plus_delta - 1) - 1;
1029 intermediate.set(x, y);
1033 while (x >= 0 && y >= 0)
1034 if (eq(a_begin[x], b_begin[y]))
1037 diag_start.set(x, y);
1051 v[k_plus_delta] = x;
1053 if (x == -1 && y == -1)
1055 else if (x < -1 || y < -1)
1058 s.
set(begin, intermediate, diag_start, end);
1084template<
typename RandomAccessOutputIterator>
1087 RandomAccessOutputIterator a_end,
1088 RandomAccessOutputIterator b_begin,
1089 RandomAccessOutputIterator b_end,
1092 int a_size = a_end - a_begin, b_size = b_end - b_begin;
1095 ||
point.x () >= a_size
1097 ||
point.y() >= b_size)
1100 return (a_begin[
point.x()] == b_begin[
point.y()]);
1137template<
typename RandomAccessOutputIterator,
1138 typename EqualityFunctor>
1141 RandomAccessOutputIterator a_end,
1142 RandomAccessOutputIterator b_begin,
1143 RandomAccessOutputIterator b_end,
1146 int a_size = a_end - a_begin;
1148 int b_size = b_end - b_begin;
1155 point first_point(-1, -1), last_point(a_size -1, b_size -1), point_zero(0, 0);
1165 forward_d_paths[1] = -1;
1175 reverse_d_paths[delta + 1] = a_size;
1177 int d_max = (M + N) / 2 + 1;
1178 for (
int d = 0; d <= d_max; ++d)
1181 for (
int k = -d; k <= d; k += 2)
1186 EqualityFunctor>(k, d,
1189 forward_d_paths, s);
1199 && (k >= (delta - (d - 1))) && (k <= (delta + (d - 1))))
1202 reverse_end.x(reverse_d_paths[k]);
1203 reverse_end.y(reverse_end.x() - k);
1214 for (
int k = -d; k <= d; k += 2)
1219 EqualityFunctor>(k, d,
1232 int k_plus_delta = k + delta;
1234 && (k_plus_delta >= -d) && (k_plus_delta <= d))
1237 forward_end.x(forward_d_paths[k_plus_delta]);
1238 forward_end.y(forward_end.x() - k_plus_delta);
1264template<
typename RandomAccessOutputIterator>
1267 RandomAccessOutputIterator b_begin,
1268 const snake &s, ostream& out)
1273 out <<
"snake start: ";
1274 out <<
"(" << s.
begin().x() <<
", " << s.
end().y() <<
")\n";
1276 out <<
"snake intermediate: ";
1279 out <<
"diagonal point(s): ";
1282 x <= s.
end().x() && y <= s.
end().y();
1286 out <<
"(" << x <<
"," << y <<
") ";
1290 out <<
"snake end: ";
1291 out <<
"(" << s.
end().x() <<
", " << s.
end().y() <<
")\n";
1320template<
typename RandomAccessOutputIterator,
1321 typename EqualityFunctor>
1324 RandomAccessOutputIterator a_end,
1325 RandomAccessOutputIterator b_begin,
1326 RandomAccessOutputIterator b_end,
1329 unsigned a_size = a_end - a_begin;
1330 unsigned b_size = b_end - b_begin;
1335 int delta = a_size - b_size;
1340 v[delta + 1] = a_size - 1;
1346 for (
unsigned d = 0; d <= v.max_d(); ++d)
1348 for (
int k = -d; k <= (int) d; k += 2)
1355 EqualityFunctor>(k, d,
1361 if (found && snak.
end().x() == -1 && snak.
end().y() == -1)
1367 EqualityFunctor>(k, d,
1373 if ((snak.
end().x() == (
int) a_size - 1)
1374 && (snak.
end().y() == (
int) b_size - 1))
1404template<
typename RandomAccessOutputIterator>
1407 RandomAccessOutputIterator a_end,
1408 RandomAccessOutputIterator b_begin,
1409 RandomAccessOutputIterator b_end,
1420 bool reverse =
false);
1479template<
typename RandomAccessOutputIterator,
1480 typename EqualityFunctor>
1483 RandomAccessOutputIterator a_begin,
1484 RandomAccessOutputIterator a_end,
1485 RandomAccessOutputIterator b_base,
1486 RandomAccessOutputIterator b_begin,
1487 RandomAccessOutputIterator b_end,
1492 int a_size = a_end - a_begin;
1493 int b_size = b_end - b_begin;
1494 unsigned a_offset = a_begin - a_base, b_offset = b_begin - b_base;
1496 if (a_size == 0 || b_size == 0)
1498 if (a_size > 0 && b_size == 0)
1501 for (RandomAccessOutputIterator i = a_begin; i < a_end; ++i)
1502 ses.deletions().push_back(
deletion(i - a_base));
1504 if (b_size > 0 && a_size == 0)
1509 int a_full_size = a_end - a_base;
1510 int insertion_index = a_full_size ? a_full_size - 1 : -1;
1512 for (RandomAccessOutputIterator i = b_begin; i < b_end; ++i)
1513 ins.inserted_indexes().push_back(i - b_base);
1515 ses.insertions().push_back(ins);
1524 vector<point> trace;
1528 EqualityFunctor>(a_begin, a_end,
1535 snak.
add(a_offset, b_offset);
1543 x <= snak.
end().x() && y <= snak.
end().y();
1557 for (RandomAccessOutputIterator i = a_begin; i < a_end; ++i)
1558 ses.deletions().push_back(
deletion(i - a_base));
1563 for (RandomAccessOutputIterator i = b_begin; i < b_end; ++i)
1564 ins.inserted_indexes().push_back(i - b_base);
1565 ses.insertions().push_back(ins);
1574 int tmp_ses_len0 = 0;
1579 EqualityFunctor>(a_base, a_begin, a_base + (px.x() + 1),
1580 b_base, b_begin, b_base + (px.y() + 1),
1581 lcs, tmp_ses0, tmp_ses_len0);
1583 lcs.insert(lcs.end(), trace.begin(), trace.end());
1585 int tmp_ses_len1 = 0;
1588 EqualityFunctor>(a_base, a_base + (pu.x() + 1), a_end,
1589 b_base, b_base + (pu.y() + 1), b_end,
1590 lcs, tmp_ses1, tmp_ses_len1);
1591 ABG_ASSERT(tmp_ses0.length() + tmp_ses1.length() == d);
1592 ABG_ASSERT(tmp_ses_len0 + tmp_ses_len1 == d);
1593 ses.append(tmp_ses0);
1594 ses.append(tmp_ses1);
1601 x <= snak.
end().x() && y <= snak.
end().y();
1613 ins.inserted_indexes().push_back(p.y());
1614 ses.insertions().push_back(ins);
1621 ses.deletions().push_back(del);
1626 ses.deletions().push_back(del);
1635 lcs.insert(lcs.end(), trace.begin(), trace.end());
1683template<
typename RandomAccessOutputIterator,
1684 typename EqualityFunctor>
1687 RandomAccessOutputIterator a_end,
1688 RandomAccessOutputIterator b_begin,
1689 RandomAccessOutputIterator b_end,
1695 EqualityFunctor>(a_begin, a_begin, a_end,
1696 b_begin, b_begin, b_end,
1748template<
typename RandomAccessOutputIterator,
1749 typename EqualityFunctor>
1752 RandomAccessOutputIterator a_begin,
1753 RandomAccessOutputIterator a_end,
1754 RandomAccessOutputIterator b_base,
1755 RandomAccessOutputIterator b_begin,
1756 RandomAccessOutputIterator b_end,
1763 EqualityFunctor>(a_base, a_begin, a_end,
1764 b_base, b_begin, b_end,
1803template<
typename RandomAccessOutputIterator,
1804 typename EqualityFunctor>
1807 RandomAccessOutputIterator a_end,
1808 RandomAccessOutputIterator b_begin,
1809 RandomAccessOutputIterator b_end,
1814 EqualityFunctor>(a_begin, a_begin, a_end,
1815 b_begin, b_begin, b_end,
1847template<
typename RandomAccessOutputIterator>
1850 RandomAccessOutputIterator a_end,
1851 RandomAccessOutputIterator b_begin,
1852 RandomAccessOutputIterator b_end,
1905template<
typename RandomAccessOutputIterator,
1906 typename EqualityFunctor>
1909 RandomAccessOutputIterator a_begin,
1910 RandomAccessOutputIterator a_end,
1911 RandomAccessOutputIterator b_base,
1912 RandomAccessOutputIterator b_begin,
1913 RandomAccessOutputIterator b_end,
1919 EqualityFunctor>(a_base, a_begin, a_end,
1920 b_base, b_begin, b_end,
1956template<
typename RandomAccessOutputIterator,
1957 typename EqualityFunctor>
1960 RandomAccessOutputIterator a_end,
1961 RandomAccessOutputIterator b_begin,
1962 RandomAccessOutputIterator b_end,
1966 EqualityFunctor>(a_begin, a_begin, a_end,
1967 b_begin, b_begin, b_end,
1996template<
typename RandomAccessOutputIterator>
1999 RandomAccessOutputIterator a_end,
2000 RandomAccessOutputIterator b_begin,
2001 RandomAccessOutputIterator b_end,
2022template<
typename RandomAccessOutputIterator>
2025 const RandomAccessOutputIterator str1_base,
2026 const RandomAccessOutputIterator str2_base,
2029 if (es.num_deletions() == 0)
2030 out <<
"no deletion:\n";
2031 else if (es.num_deletions() == 1)
2033 out <<
"1 deletion:\n"
2034 <<
"\t happened at index: ";
2038 out << es.num_deletions() <<
" deletions:\n"
2039 <<
"\t happened at indexes: ";
2042 for (vector<deletion>::const_iterator i = es.deletions().begin();
2043 i != es.deletions().end();
2046 if (i != es.deletions().begin())
2048 out << i->index() <<
" (" << str1_base[i->index()] <<
")";
2052 if (es.num_insertions() == 0)
2053 out <<
"no insertion\n";
2054 else if (es.num_insertions() == 1)
2055 out <<
"1 insertion\n";
2057 out << es.num_insertions() <<
" insertions:\n";
2058 for (vector<insertion>::const_iterator i = es.insertions().begin();
2059 i != es.insertions().end();
2062 int idx = i->insertion_point_index();
2064 out <<
"\t before index of first sequence: " << idx + 1
2065 <<
" (" << str1_base[idx + 1] <<
")\n";
2067 out <<
"\t after index of first sequence: " << idx
2068 <<
" (" << str1_base[idx] <<
")\n";
2070 if (!i->inserted_indexes().empty())
2071 out <<
"\t\t inserted indexes from second sequence: ";
2073 for (vector<unsigned>::const_iterator j = i->inserted_indexes().begin();
2074 j != i->inserted_indexes().end();
2077 if (j != i->inserted_indexes().begin())
2079 out << *j <<
" (" << str2_base[*j] <<
")";
#define ABG_ASSERT(cond)
This is a wrapper around the 'assert' glibc call. It allows for its argument to have side effects,...
The array containing the furthest D-path end-points, for each value of K. MAX_D is the maximum value ...
d_path_vec(unsigned size1, unsigned size2)
Constructor of the d_path_vec.
The abstraction of the deletion of one element of a sequence A.
The abstraction of an edit script for transforming a sequence A into a sequence B.
The abstration of an insertion of elements of a sequence B into a sequence A. This is used to represe...
A class representing a vertex in an edit graph, as explained in the paper. A vertex is a basically a ...
The abstraction of the Snake concept, from the paper.
snake()
Default constructor for snake.
bool has_vertical_edge() const
const point & intermediate() const
Getter for the end point of the non-diagonal edge of the snake.
void intermediate(const point &p)
Setter for the end point of the non-diagonal edge of the snake, aka intermediate point.
void end(const point &p)
Setter for the end point of the last diagonal edge, aka snake end point. Note that if the snake has n...
void begin(const point &p)
Getter for the starting point of the non-diagonal edge of the snake, aka begin point.
const point & diagonal_start() const
Getter for the end point of the first diagonal edge, aka diagonal start point. Note that if the snake...
const point & end() const
Getter for the end point of the last diagonal edge, aka snake end point. Note that if the snake has n...
snake(const point &b, const point &i, const point &e)
Constructor from the beginning, intermediate and end points.
void diagonal_start(const point &p)
Setter for the end point of the first diagonal edge, aka diagonal start point.
void set_forward(bool f)
Set to true if the snake is a forward snake; that is, if it was built while walking the edit graph go...
void add(int x_offset, int y_offset)
Add an offset to the abscissas of the points of the snake, and add another offset to the ordinates of...
void set(const point &b, const point &i, const point &d, const point &e)
Setter for the begin, intermediate, diagonal start and end points of the snake.
bool has_diagonal_edge() const
void set(const point &b, const point &i, const point &e)
Setter for the begin, intermediate and end points of the snake.
snake(const point &b, const point &i, const point &d, const point &e)
Constructor from the beginning, intermediate and end points.
const point & begin() const
Getter for the starting point of the non-diagonal edge of the snake.
bool has_horizontal_edge() const
bool is_match_point(RandomAccessOutputIterator a_begin, RandomAccessOutputIterator a_end, RandomAccessOutputIterator b_begin, RandomAccessOutputIterator b_end, const point &point)
Tests if a given point is a match point in an edit graph.
bool compute_middle_snake(const char *str1, const char *str2, snake &s, int &ses_len)
Returns the middle snake of two strings, as well as the length of their shortest editing script.
bool snake_end_points(const snake &s, point &x, point &u)
Get the end points of the snake, as intended by the paper.
void compute_diff(RandomAccessOutputIterator a_base, RandomAccessOutputIterator a_begin, RandomAccessOutputIterator a_end, RandomAccessOutputIterator b_base, RandomAccessOutputIterator b_begin, RandomAccessOutputIterator b_end, vector< point > &lcs, edit_script &ses, int &ses_len)
Compute the longest common subsequence of two (sub-regions of) sequences as well as the shortest edit...
bool ends_of_furthest_d_paths_overlap(const point &forward_d_path_end, const point &reverse_d_path_end)
void display_edit_script(const edit_script &es, const RandomAccessOutputIterator str1_base, const RandomAccessOutputIterator str2_base, ostream &out)
Display an edit script on standard output.
bool end_of_frr_d_path_in_k_plus_delta(int k, int d, RandomAccessOutputIterator a_begin, RandomAccessOutputIterator a_end, RandomAccessOutputIterator b_begin, RandomAccessOutputIterator b_end, d_path_vec &v, snake &snak)
Find the end of the furthest reaching reverse d-path on diagonal k + delta. Delta is abs(M - N),...
void compute_lcs(const char *str1, const char *str2, int &ses_len, string &lcs)
Compute the longest common subsequence of two strings and return the length of the shortest edit scri...
void print_snake(RandomAccessOutputIterator a_begin, RandomAccessOutputIterator b_begin, const snake &s, ostream &out)
This prints the middle snake of two strings.
void compute_ses(const char *str1, const char *str2, edit_script &ses)
Compute the shortest edit script for transforming a string into another.
int ses_len(const char *str1, const char *str2, bool reverse)
Compute the length of the shortest edit script for two strings. This is done using the "Greedy LCS/SE...
bool point_is_valid_in_graph(point &p, unsigned a_size, unsigned b_size)
bool end_of_fr_d_path_in_k(int k, int d, RandomAccessOutputIterator a_begin, RandomAccessOutputIterator a_end, RandomAccessOutputIterator b_start, RandomAccessOutputIterator b_end, d_path_vec &v, snake &snak)
Find the end of the furthest reaching d-path on diagonal k, for two sequences. In the paper This is r...
Toplevel namespace for libabigail.
An equality functor to deeply compare pointers.
bool operator()(const shared_ptr< T > first, const shared_ptr< T > second) const
This equality operator compares pointers by comparing the pointed-to objects.
bool operator()(const T *first, const T *second) const
This equality operator compares pointers by comparing the pointed-to objects.
bool operator()(const weak_ptr< T > first, const weak_ptr< T > second) const
This equality operator compares pointers by comparing the pointed-to objects.
The default equality functor used by the core diffing algorithms.
bool operator()(const T a, const T b) const
This equality operator uses the default "==" to compare its arguments.