libabigail
abg-symtab-reader.h
Go to the documentation of this file.
1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- Mode: C++ -*-
3 //
4 // Copyright (C) 2020-2023 Google, Inc.
5 //
6 // Author: Matthias Maennich
7 
8 /// @file
9 ///
10 /// This contains the declarations for the symtab reader.
11 
12 #ifndef __ABG_SYMTAB_READER_H__
13 #define __ABG_SYMTAB_READER_H__
14 
15 #include <gelf.h>
16 
17 #include <functional>
18 #include <iterator>
19 #include <memory>
20 #include <unordered_map>
21 #include <vector>
22 
23 #include "abg-cxx-compat.h" // for abg_compat::optional
24 #include "abg-ir.h"
25 
26 namespace abigail
27 {
28 namespace symtab_reader
29 {
30 
31 /// The symtab filter is the object passed to the symtab object in order to
32 /// iterate over the symbols in the symtab while applying filters.
33 ///
34 /// The general idea is that it consists of a set of optionally enforced flags,
35 /// such as 'functions' or 'variables'. If not set, those are not filtered for,
36 /// neither inclusive nor exclusive. If set they are all ANDed together.
38 {
39 public:
40  // Default constructor disabling all features.
41  symtab_filter() {}
42 
43  bool
44  matches(const elf_symbol& symbol) const;
45 
46  /// Enable or disable function filtering
47  ///
48  /// @param new_value whether to filter for functions
49  void
50  set_functions(bool new_value = true)
51  {functions_ = new_value;};
52 
53  /// Enable or disable variable filtering
54  ///
55  /// @param new_value whether to filter for variables
56  void
57  set_variables(bool new_value = true)
58  {variables_ = new_value;};
59 
60  /// Enable or disable public symbol filtering
61  ///
62  /// @param new_value whether to filter for public symbols
63  void
64  set_public_symbols(bool new_value = true)
65  {public_symbols_ = new_value;};
66 
67  /// Enable or disable undefined symbol filtering
68  ///
69  /// @param new_value whether to filter for undefined symbols
70  void
71  set_undefined_symbols(bool new_value = true)
72  {undefined_symbols_ = new_value;};
73 
74  /// Enable or disable kernel symbol filtering
75  ///
76  /// @param new_value whether to filter for kernel symbols
77  void
78  set_kernel_symbols(bool new_value = true)
79  {kernel_symbols_ = new_value;};
80 
81 private:
82  // The symbol is a function (FUNC)
83  abg_compat::optional<bool> functions_;
84 
85  // The symbol is a variables (OBJECT)
86  abg_compat::optional<bool> variables_;
87 
88  // The symbol is publicly accessible (global/weak with default/protected
89  // visibility)
90  abg_compat::optional<bool> public_symbols_;
91 
92  // The symbols is not defined (declared)
93  abg_compat::optional<bool> undefined_symbols_;
94 
95  // The symbol is listed in the ksymtab (for Linux Kernel binaries).
96  abg_compat::optional<bool> kernel_symbols_;
97 };
98 
99 /// Base iterator for our custom iterator based on whatever the const_iterator
100 /// is for a vector of symbols.
101 /// As of writing this, std::vector<elf_symbol_sptr>::const_iterator.
102 using base_iterator = elf_symbols::const_iterator;
103 
104 /// An iterator to walk a vector of elf_symbols filtered by symtab_filter.
105 ///
106 /// The implementation inherits all properties from the vector's
107 /// const_iterator, but intercepts where necessary to allow effective
108 /// filtering. This makes it a STL compatible iterator for general purpose
109 /// usage.
111 {
112 public:
113  using value_type = base_iterator::value_type;
114  using reference = base_iterator::reference;
115  using pointer = base_iterator::pointer;
116  using difference_type = base_iterator::difference_type;
117  using iterator_category = std::forward_iterator_tag;
118  using iterator_concept = std::forward_iterator_tag;
119 
120  /// Construct the iterator based on a pair of underlying iterators and a
121  /// symtab_filter object. Immediately fast forward to the next element that
122  /// matches the criteria (if any).
123  ///
124  /// @param begin the underlying begin iterator
125  ///
126  /// @param begin the underlying end iterator
127  ///
128  /// @param filter the symtab_filter to apply
130  base_iterator end,
131  const symtab_filter& filter = symtab_filter())
132  : base_iterator(begin), end_(end), filter_(filter)
133  {skip_to_next();}
134 
135  /// Pre-increment operator to advance to the next matching element.
136  ///
137  /// @return itself after incrementing
140  {
141  base_iterator::operator++();
142  skip_to_next();
143  return *this;
144  }
145 
146  /// Post-increment operator to advance to the next matching element.
147  ///
148  /// @return a copy of the iterator before incrementing
151  {
152  symtab_iterator result(*this);
153  ++(*this);
154  return result;
155  }
156 
157 private:
158  /// The end of the underlying iterator.
159  const base_iterator end_;
160 
161  /// The symtab_filter used to determine when to advance.
162  const symtab_filter& filter_;
163 
164  /// Skip to the next element that matches the filter criteria (if any). Hold
165  /// off when reaching the end of the underlying iterator.
166  void
167  skip_to_next()
168  {
169  while (*this != end_ && !filter_.matches(***this))
170  ++(*this);
171  }
172 };
173 
174 /// Convenience declaration of a unique_ptr<symtab>
175 class symtab;
176 using symtab_ptr = std::unique_ptr<symtab>;
177 
178 /// symtab is the actual data container of the symtab_reader implementation.
179 ///
180 /// The symtab is instantiated either via an Elf handle (from binary) or from a
181 /// set of existing symbol maps (usually when instantiated from XML). It will
182 /// then discover the symtab, possibly the ksymtab (for Linux Kernel binaries)
183 /// and setup the data containers and lookup maps for later perusal.
184 ///
185 /// The symtab is supposed to be used in a const context as all information is
186 /// already computed at construction time. Symbols are stored sorted to allow
187 /// deterministic reading of the entries.
188 ///
189 /// An example use of the symtab class is
190 ///
191 /// const auto symtab = symtab::load(elf_handle, env);
192 /// symtab_filter filter = symtab->make_filter();
193 /// filter.set_public_symbols();
194 /// filter.set_functions();
195 ///
196 /// for (const auto& symbol : filtered_symtab(*symtab, filter))
197 /// {
198 /// std::cout << symbol->get_name() << "\n";
199 /// }
200 ///
201 /// This uses the filtered_symtab proxy object to capture the filter.
202 class symtab
203 {
204 public:
205  using symbol_predicate = std::function<bool(const elf_symbol_sptr&)>;
206 
207  /// Indicate whether any (kernel) symbols have been seen at construction.
208  ///
209  /// @return true if there are symbols detected earlier.
210  bool
211  has_symbols() const
212  {return is_kernel_binary_ ? has_ksymtab_entries_ : !symbols_.empty();}
213 
215  make_filter() const;
216 
217  /// The (only) iterator type we offer is a const_iterator implemented by the
218  /// symtab_iterator.
220 
221  /// Obtain an iterator to the beginning of the symtab according to the filter
222  /// criteria. Whenever this iterator advances, it skips elements that do not
223  /// match the filter criteria.
224  ///
225  /// @param filter the symtab_filter to match symbols against
226  ///
227  /// @return a filtering const_iterator of the underlying type
229  begin(const symtab_filter& filter) const
230  {return symtab_iterator(symbols_.begin(), symbols_.end(), filter);}
231 
232  /// Obtain an iterator to the end of the symtab.
233  ///
234  /// @return an end iterator
236  end() const
237  {return symtab_iterator(symbols_.end(), symbols_.end());}
238 
239  const elf_symbols&
240  lookup_symbol(const std::string& name) const;
241 
242  const elf_symbol_sptr&
243  lookup_symbol(GElf_Addr symbol_addr) const;
244 
245  const elf_symbol_sptr
246  lookup_undefined_function_symbol(const std::string& name);
247 
248  const elf_symbol_sptr
249  lookup_undefined_variable_symbol(const std::string& name);
250 
252  function_symbol_is_exported(const string&);
253 
255  function_symbol_is_exported(const GElf_Addr symbol_address);
256 
258  variable_symbol_is_exported(const string&);
259 
261  variable_symbol_is_exported(const GElf_Addr symbol_address);
262 
264  function_symbol_is_undefined(const string&);
265 
267  variable_symbol_is_undefined(const string&);
268 
269  static symtab_ptr
270  load(Elf* elf_handle,
271  const ir::environment& env,
272  symbol_predicate is_suppressed = NULL);
273 
274  static symtab_ptr
275  load(string_elf_symbols_map_sptr function_symbol_map,
276  string_elf_symbols_map_sptr variables_symbol_map);
277 
278  void
279  update_main_symbol(GElf_Addr addr, const std::string& name);
280 
281 private:
282  /// Default constructor. Private to enforce creation by factory methods.
283  symtab();
284 
285  /// The vector of symbols we discovered.
286  elf_symbols symbols_;
287 
288  /// Whether this is a Linux Kernel binary
289  bool is_kernel_binary_;
290 
291  /// Whether this kernel_binary has ksymtab entries
292  ///
293  /// A kernel module might not have a ksymtab if it does not export any
294  /// symbols. In order to quickly decide whether the symbol table is empty, we
295  /// remember whether we ever saw ksymtab entries.
296  bool has_ksymtab_entries_;
297 
298  /// Lookup map name->symbol(s)
299  using name_symbol_map_type =
300  std::unordered_map<std::string, std::vector<elf_symbol_sptr>>;
301  name_symbol_map_type name_symbol_map_;
302 
303  /// Lookup map addr->symbol
304  using addr_symbol_map_type = std::unordered_map<GElf_Addr, elf_symbol_sptr>;
305  addr_symbol_map_type addr_symbol_map_;
306 
307  /// Lookup map function entry address -> symbol
308  addr_symbol_map_type entry_addr_symbol_map_;
309 
310  /// Set of undefined function symbol names
311  std::unordered_set<std::string> undefined_function_linkage_names_;
312 
313  /// of undefined variable function symbol names
314  std::unordered_set<std::string> undefined_variable_linkage_names_;
315 
316  bool cached_undefined_symbol_names_;
317 
318  bool
319  load_(Elf* elf_handle,
320  const ir::environment& env,
321  symbol_predicate is_suppressed);
322 
323  bool
324  load_(string_elf_symbols_map_sptr function_symbol_map,
325  string_elf_symbols_map_sptr variables_symbol_map);
326 
327  GElf_Addr
328  setup_symbol_lookup_tables(Elf* elf_handle,
329  GElf_Sym* elf_symbol,
330  const elf_symbol_sptr& symbol_sptr);
331 
332  void
333  update_function_entry_address_symbol_map(Elf* elf_handle,
334  GElf_Sym* native_symbol,
335  const elf_symbol_sptr& symbol_sptr);
336 
337  void
338  add_alternative_address_lookups(Elf* elf_handle);
339 
340  void
341  collect_undefined_fns_and_vars_linkage_names();
342 };
343 
344 /// Helper class to allow range-for loops on symtabs for C++11 and later code.
345 /// It serves as a proxy for the symtab iterator and provides a begin() method
346 /// without arguments, as required for range-for loops (and possibly other
347 /// iterator based transformations).
348 ///
349 /// Example usage:
350 ///
351 /// for (const auto& symbol : filtered_symtab(tab, filter))
352 /// {
353 /// std::cout << symbol->get_name() << "\n";
354 /// }
355 ///
357 {
358  const symtab& tab_;
359  const symtab_filter filter_;
360 
361 public:
362  /// Construct the proxy object keeping references to the underlying symtab
363  /// and the filter object.
364  filtered_symtab(const symtab& tab, const symtab_filter& filter)
365  : tab_(tab), filter_(filter)
366  {}
367 
368  /// Pass through symtab.begin(), but also pass on the filter.
370  begin() const
371  {return tab_.begin(filter_);}
372 
373  /// Pass through symtab.end().
375  end() const
376  {return tab_.end();}
377 };
378 
379 } // end namespace symtab_reader
380 } // end namespace abigail
381 
382 #endif // __ABG_SYMTAB_READER_H__
Types of the main internal representation of libabigail.
Abstraction of an elf symbol.
Definition: abg-ir.h:923
This is an abstraction of the set of resources necessary to manage several aspects of the internal re...
Definition: abg-ir.h:140
Helper class to allow range-for loops on symtabs for C++11 and later code. It serves as a proxy for t...
symtab::const_iterator begin() const
Pass through symtab.begin(), but also pass on the filter.
symtab::const_iterator end() const
Pass through symtab.end().
filtered_symtab(const symtab &tab, const symtab_filter &filter)
Construct the proxy object keeping references to the underlying symtab and the filter object.
The symtab filter is the object passed to the symtab object in order to iterate over the symbols in t...
void set_public_symbols(bool new_value=true)
Enable or disable public symbol filtering.
bool matches(const elf_symbol &symbol) const
symtab_filter implementations
void set_functions(bool new_value=true)
Enable or disable function filtering.
void set_kernel_symbols(bool new_value=true)
Enable or disable kernel symbol filtering.
void set_variables(bool new_value=true)
Enable or disable variable filtering.
void set_undefined_symbols(bool new_value=true)
Enable or disable undefined symbol filtering.
An iterator to walk a vector of elf_symbols filtered by symtab_filter.
symtab_iterator(base_iterator begin, base_iterator end, const symtab_filter &filter=symtab_filter())
Construct the iterator based on a pair of underlying iterators and a symtab_filter object....
symtab_iterator operator++(int)
Post-increment operator to advance to the next matching element.
symtab_iterator & operator++()
Pre-increment operator to advance to the next matching element.
symtab is the actual data container of the symtab_reader implementation.
const elf_symbol_sptr lookup_undefined_variable_symbol(const std::string &name)
Lookup an undefined variable symbol with a given name.
const_iterator begin(const symtab_filter &filter) const
Obtain an iterator to the beginning of the symtab according to the filter criteria....
const elf_symbols & lookup_symbol(const std::string &name) const
Get a vector of symbols that are associated with a certain name.
bool has_symbols() const
Indicate whether any (kernel) symbols have been seen at construction.
symtab_filter make_filter() const
symtab implementations
static symtab_ptr load(Elf *elf_handle, const ir::environment &env, symbol_predicate is_suppressed=NULL)
Construct a symtab object and instantiate it from an ELF handle. Also pass in the ir::environment we ...
elf_symbol_sptr function_symbol_is_undefined(const string &)
Test if a name is a the name of an undefined function symbol.
elf_symbol_sptr variable_symbol_is_undefined(const string &)
Test if a name is a the name of an undefined variable symbol.
elf_symbol_sptr function_symbol_is_exported(const string &)
Test if a given function symbol has been exported.
const_iterator end() const
Obtain an iterator to the end of the symtab.
elf_symbol_sptr variable_symbol_is_exported(const string &)
Test if a given variable symbol has been exported.
const elf_symbol_sptr lookup_undefined_function_symbol(const std::string &name)
Lookup an undefined function symbol with a given name.
void update_main_symbol(GElf_Addr addr, const std::string &name)
Notify the symtab about the name of the main symbol at a given address.
symtab_iterator const_iterator
The (only) iterator type we offer is a const_iterator implemented by the symtab_iterator.
shared_ptr< elf_symbol > elf_symbol_sptr
A convenience typedef for a shared pointer to elf_symbol.
Definition: abg-ir.h:886
std::vector< elf_symbol_sptr > elf_symbols
Convenience typedef for a vector of elf_symbol.
Definition: abg-ir.h:904
shared_ptr< string_elf_symbols_map_type > string_elf_symbols_map_sptr
Convenience typedef for a shared pointer to string_elf_symbols_map_type.
Definition: abg-ir.h:913
Toplevel namespace for libabigail.