libabigail
abg-diff-utils.cc
Go to the documentation of this file.
1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- Mode: C++ -*-
3 //
4 // Copyright (C) 2013-2023 Red Hat, Inc.
5 
6 #include <cstring>
7 
8 #include "abg-fwd.h"
9 #include "abg-internal.h"
10 // <headers defining libabigail's API go under here>
11 ABG_BEGIN_EXPORT_DECLARATIONS
12 
13 #include "abg-diff-utils.h"
14 
15 ABG_END_EXPORT_DECLARATIONS
16 // </headers defining libabigail's API>
17 
18 /// @file
19 ///
20 /// This file defines the declarations found in abg-diff-utils.h
21 namespace abigail
22 {
23 
24 namespace diff_utils
25 {
26 /// @return true iff the end of a furthest forward d_path overlaps the
27 /// end of a furtherst reverse d_path on the same diagonal. This is
28 /// defined by the lemma 3 of the paper.
29 ///
30 /// @param forward_d_path_end
31 bool
32 ends_of_furthest_d_paths_overlap(const point& forward_d_path_end,
33  const point& reverse_d_path_end)
34 {
35  return ((forward_d_path_end.x() - forward_d_path_end.y())
36  == (reverse_d_path_end.x() - reverse_d_path_end.y())
37  && forward_d_path_end.x() >= reverse_d_path_end.x());
38 }
39 //// Test if a point is within the boundaries of the edit graph.
40 ///
41 /// @param p the point to test.
42 ///
43 /// @param a_size the size of abscissas. That is, the size of the
44 /// first input to consider.
45 ///
46 /// @param b_size the of ordinates. That is, the size of the second
47 /// input to consider.
48 ///
49 /// @return true iff a point is within the boundaries of the edit
50 /// graph.
51 bool
53  unsigned a_size,
54  unsigned b_size)
55 {
56  return (p.x() > -1
57  && p.x() < (int) a_size
58  && p.y() > -1
59  && p.y() < (int) b_size);
60 }
61 
62 /// Get the end points of the snake, as intended by the paper.
63 ///
64 /// @param s the snake to consider.
65 ///
66 /// @param x the starting point of the snake.
67 ///
68 /// @param u the end point of the snake.
69 bool
70 snake_end_points(const snake& s, point& x, point& u)
71 {
72  if (s.is_empty())
73  return false;
74 
75  if (s.is_forward())
76  {
77  x = s.intermediate();
78  u = s.end();
79  }
80  else
81  {
82  x = s.end();
83  u = s.intermediate();
84  }
85  return true;
86 }
87 
88 /// Returns the middle snake of two strings, as well as the length of
89 /// their shortest editing script.
90 ///
91 /// @param str1 the first string to consider.
92 ///
93 /// @param str2 the second string to consider.
94 ///
95 /// @param snake_begin the point representing the beginning
96 bool
97 compute_middle_snake(const char* str1, const char* str2,
98  snake& s, int& ses_len)
99 {
100  bool has_snake = false;
101  int str1_size = strlen(str1), str2_size = strlen(str2);
102 
103  if (compute_middle_snake<const char*,
104  default_eq_functor>(str1, str1 + str1_size,
105  str2 , str2 + str2_size,
106  s, ses_len))
107  has_snake = true;
108 
109  return has_snake;
110 }
111 
112 /// Compute the length of the shortest edit script for two strings.
113 /// This is done using the "Greedy LCS/SES" of figure 2 in the paper.
114 /// It can walk the edit graph either foward (when reverse is false)
115 /// or backward starting from the end (when reverse is true).
116 ///
117 /// @param str1 the first string to consider.
118 ///
119 /// @param str2 the second string to consider.
120 ///
121 /// @param reverse when true, walk the edit graph backward, starting
122 /// from the point (M,N) (with M and N being the length of str1 and
123 /// str2). When false, walk the edit graph forward, starting from the
124 /// point (0,0).
125 ///
126 /// @return the length of the shortest edit script.
127 int
128 ses_len(const char* str1,
129  const char* str2,
130  bool reverse)
131 {
132  int str1_size = strlen(str1), str2_size = strlen(str2);
133 
134  d_path_vec v(str1_size, str2_size);
135  return ses_len(str1, str1 + str1_size,
136  str2, str2 + str2_size,
137  v, reverse);
138 }
139 
140 /// Compute the longest common subsequence of two strings and return
141 /// the length of the shortest edit script for transforming the first
142 /// string into the second.
143 ///
144 /// @param str1 the first string to consider.
145 ///
146 /// @param str2 the second string to consider.
147 ///
148 /// @param ses_len the length of the shortest edit script. This is
149 /// set by the function iff it returns true.
150 ///
151 /// @param the longest common subsequence of the two strings. This is
152 /// set the function iff it returns true.
153 ///
154 /// @return true if the function could compute an lcs, false
155 /// otherwise.
156 void
157 compute_lcs(const char* str1, const char* str2, int &ses_len, string& lcs)
158 {
159  vector<point> result;
160  edit_script ses;
161 
162  compute_diff(str1, str1 + strlen(str1),
163  str2, str2 + strlen(str2),
164  result, ses);
165 
166  ses_len = ses.length();
167 
168  for (unsigned i = 0; i < result.size(); ++i)
169  {
170  int x = result[i].x(), y = result[i].y();
171  ABG_ASSERT(str1[x] == str2[y]);
172  lcs.push_back(str1[x]);
173  }
174 }
175 
176 /// Compute the shortest edit script for transforming a string into
177 /// another.
178 ///
179 /// @param str1 the first string to consider.
180 ///
181 /// @param str2 the second string to consider.
182 ///
183 /// @param ses the resulting edit script.
184 ///
185 /// @pram ses_len the length of the edit script.
186 void
187 compute_ses(const char* str1, const char* str2, edit_script& ses)
188 {
189  vector<point> lcs;
190  compute_diff(str1, str1 + strlen(str1),
191  str2, str2 + strlen(str2),
192  lcs, ses);
193 }
194 
195 }//end namespace diff_utils
196 
197 }//end namespace abigail
This file declares types and operations implementing the "O(ND) Difference Algorithm" (aka diff2) fro...
#define ABG_ASSERT(cond)
This is a wrapper around the 'assert' glibc call. It allows for its argument to have side effects,...
Definition: abg-fwd.h:1714
The array containing the furthest D-path end-points, for each value of K. MAX_D is the maximum value ...
The abstraction of an edit script for transforming a sequence A into a sequence B.
A class representing a vertex in an edit graph, as explained in the paper. A vertex is a basically a ...
The abstraction of the Snake concept, from the paper.
const point & end() const
Getter for the end point of the last diagonal edge, aka snake end point. Note that if the snake has n...
const point & intermediate() const
Getter for the end point of the non-diagonal edge of the snake.
bool compute_middle_snake(const char *str1, const char *str2, snake &s, int &ses_len)
Returns the middle snake of two strings, as well as the length of their shortest editing script.
bool snake_end_points(const snake &s, point &x, point &u)
Get the end points of the snake, as intended by the paper.
bool ends_of_furthest_d_paths_overlap(const point &forward_d_path_end, const point &reverse_d_path_end)
void compute_lcs(const char *str1, const char *str2, int &ses_len, string &lcs)
Compute the longest common subsequence of two strings and return the length of the shortest edit scri...
void compute_ses(const char *str1, const char *str2, edit_script &ses)
Compute the shortest edit script for transforming a string into another.
void compute_diff(RandomAccessOutputIterator a_base, RandomAccessOutputIterator a_begin, RandomAccessOutputIterator a_end, RandomAccessOutputIterator b_base, RandomAccessOutputIterator b_begin, RandomAccessOutputIterator b_end, vector< point > &lcs, edit_script &ses, int &ses_len)
Compute the longest common subsequence of two (sub-regions of) sequences as well as the shortest edit...
int ses_len(const char *str1, const char *str2, bool reverse)
Compute the length of the shortest edit script for two strings. This is done using the "Greedy LCS/SE...
bool point_is_valid_in_graph(point &p, unsigned a_size, unsigned b_size)
Toplevel namespace for libabigail.
The default equality functor used by the core diffing algorithms.