Take the following program: :::::::::::::: header.hxx :::::::::::::: #include <malloc.h> class Heap { private: char *_memory; size_t _size; public: Heap(); void* allocate (size_t size); static size_t header_size(); }; :::::::::::::: heap.cxx :::::::::::::: #include "header.hxx" size_t Heap::header_size () { return 42; } Heap::Heap() { _size = header_size () + 32; _memory = (char *) malloc (_size); } void* Heap::allocate(size_t size) { _size += size; return _memory + header_size(); } :::::::::::::: main.cxx :::::::::::::: #include "header.hxx" int size (int resize) { return (int)Heap::header_size() - resize; } int main (int argc, char **argv) { return size (argc); } Compile it with g++ 4.7.1: $ g++ -g -O2 -c main.cxx $ g++ -g -O2 -c heap.cxx $ g++ -g main.o heap.o -o prog Now try inspecting it with stap a couple of times and notice it occasionally fails. In this example we are interested in the offset of one of the field members of the Heap class and use @cast on address zero to get it: $ stap -p2 -v -e 'probe process("./prog").function("main") { printf("_size member offset: %d\n", &@cast(0, "Heap")->_size); }' > /dev/null Pass 1: parsed user script and 96 library script(s) using 212776virt/34756res/3024shr/32128data kb, in 110usr/10sys/127real ms. Pass 2: analyzed script: 1 probe(s), 1 function(s), 0 embed(s), 0 global(s) using 215824virt/36464res/3588shr/33056data kb, in 10usr/0sys/7real ms. $ stap -p2 -v -e 'probe process("./prog").function("main") { printf("_size member offset: %d\n", &@cast(0, "Heap")->_size); }' > /dev/null Pass 1: parsed user script and 96 library script(s) using 212776virt/34756res/3024shr/32128data kb, in 110usr/10sys/129real ms. Pass 2: analyzed script: 1 probe(s), 1 function(s), 0 embed(s), 0 global(s) using 215824virt/36464res/3588shr/33056data kb, in 10usr/0sys/7real ms. $ stap -p2 -v -e 'probe process("./prog").function("main") { printf("_size member offset: %d\n", &@cast(0, "Heap")->_size); }' > /dev/null Pass 1: parsed user script and 96 library script(s) using 212776virt/34756res/3024shr/32128data kb, in 120usr/10sys/128real ms. semantic error: unable to find member '_size' for class Heap: operator '->' at <input>:1:97 source: probe process("./prog").function("main") { printf("_size member offset: %d\n", &@cast(0, "Heap")->_size); } ^ Pass 2: analyzed script: 1 probe(s), 0 function(s), 0 embed(s), 0 global(s) using 213716virt/36340res/3476shr/33052data kb, in 10usr/0sys/7real ms. Pass 2: analysis failed. Try again with another '--vp 01' option. Isn't that weird! It worked 2 out of 3 times, but then it suddenly failed.
The reason it sometimes fails is that dwflpp::global_alias_caching_callback puts types in an unordered_map<std::string, Dwarf_Die> (cu_type_cache_t) and depending on the order this map is filled in it might pick the "wrong" class_type for Heap. It can pick the wrong one since there are two in this example: $ eu-readelf --debug-dump=info prog [...] [ b] compile_unit producer (strp) "GNU C++ 4.7.1 20120629 (Red Hat 4.7.1-1) -fpreprocessed -mtune=generic -march=x86-64 -g -O2" language (data1) C++ (4) name (strp) "main.cxx" comp_dir (strp) "/home/mark/src/tests/cxx-decl" ranges (sec_offset) range list [ 0] low_pc (addr) 000000000000000000 stmt_list (sec_offset) 0 [...] [ 289] class_type name (strp) "Heap" byte_size (data1) 16 decl_file (data1) 6 decl_line (data1) 3 sibling (ref4) [ 2a6] [ 295] subprogram external (flag_present) Yes name (strp) "header_size" decl_file (data1) 6 decl_line (data1) 11 linkage_name (strp) "_ZN4Heap11header_sizeEv" type (ref4) [ 30] accessibility (data1) public (1) declaration (flag_present) Yes [ 38e] compile_unit producer (strp) "GNU C++ 4.7.1 20120629 (Red Hat 4.7.1-1) -fpreprocessed -mtune=generic -march=x86-64 -g -O2" language (data1) C++ (4) name (strp) "heap.cxx" comp_dir (strp) "/home/mark/src/tests/cxx-decl" low_pc (addr) 0x00000000004005b0 <_ZN4Heap11header_sizeEv> high_pc (addr) 0x00000000004005ec stmt_list (sec_offset) 220 [...] [ 610] class_type name (strp) "Heap" byte_size (data1) 16 decl_file (data1) 5 decl_line (data1) 3 sibling (ref4) [ 67e] [ 61c] member name (strp) "_memory" decl_file (data1) 5 decl_line (data1) 6 type (ref4) [ 412] data_member_location (data1) 0 [ 628] member name (strp) "_size" decl_file (data1) 5 decl_line (data1) 7 type (ref4) [ 3b7] data_member_location (data1) 8 [ 634] subprogram external (flag_present) Yes name (strp) "Heap" decl_file (data1) 5 decl_line (data1) 9 accessibility (data1) public (1) declaration (flag_present) Yes object_pointer (ref4) [ 644] sibling (ref4) [ 64a] [ 644] formal_parameter type (ref4) [ 67e] artificial (flag_present) Yes [ 64a] subprogram external (flag_present) Yes name (strp) "allocate" decl_file (data1) 5 decl_line (data1) 10 linkage_name (strp) "_ZN4Heap8allocateEm" type (ref4) [ 410] accessibility (data1) public (1) declaration (flag_present) Yes object_pointer (ref4) [ 662] sibling (ref4) [ 66d] [ 662] formal_parameter type (ref4) [ 67e] artificial (flag_present) Yes [ 667] formal_parameter type (ref4) [ 3b7] [ 66d] subprogram external (flag_present) Yes name (strp) "header_size" decl_file (data1) 5 decl_line (data1) 11 linkage_name (strp) "_ZN4Heap11header_sizeEv" type (ref4) [ 3b7] accessibility (data1) public (1) declaration (flag_present) Yes Note how the first one in the main.cxx compile_unit is incomplete. It only contains the header_size subprogram member (since that is the only member used in main.cxx). By just looking at the class_type DIE we cannot tell which one we need though, since both [ 289] and [ 610] look identical, both have a byte_size attribute and neither is marked as declaration. I think this is a gcc DWARF generation bug: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=54181 " partial DW_TAG_class_type generated with DW_AT_byte_size and without DW_AT_declaration"
Proposed patch/workaround: diff --git a/dwflpp.cxx b/dwflpp.cxx index 3fb50b2..ea93469 100644 --- a/dwflpp.cxx +++ b/dwflpp.cxx @@ -791,6 +791,32 @@ cache_type_prefix(Dwarf_Die* type) return ""; } +/* GCC might generate a struct/class without DW_AT_declaration, + but that only contains members which have DW_AT_declaration + set. We aren't interested in those. PR14434 (GCC bug #54181). */ +static bool +has_only_decl_members (Dwarf_Die *die) +{ + Dwarf_Die child; + if (dwarf_child(die, &child) != 0) + return false; /* no members */ + + do + { + if (! dwarf_hasattr(&child, DW_AT_declaration)) + return false; /* real member found. */ + int tag = dwarf_tag(&child); + if ((tag == DW_TAG_namespace + || tag == DW_TAG_structure_type + || tag == DW_TAG_class_type) + && ! has_only_decl_members (&child)) + return false; /* real grand child member found. */ + } + while (dwarf_siblingof(&child, &child) == 0); + + return true; /* Tried all children and grandchildren. */ +} + int dwflpp::global_alias_caching_callback(Dwarf_Die *die, bool has_inner_types, const string& prefix, void *arg) @@ -798,7 +824,8 @@ dwflpp::global_alias_caching_callback(Dwarf_Die *die, bool has_inner_types, cu_type_cache_t *cache = static_cast<cu_type_cache_t*>(arg); const char *name = dwarf_diename(die); - if (!name || dwarf_hasattr(die, DW_AT_declaration)) + if (!name || dwarf_hasattr(die, DW_AT_declaration) + || has_only_decl_members(die)) return DWARF_CB_OK; int tag = dwarf_tag(die);
commit 87eeec9407f8afa5af1037600272eebf0561e1d1 Author: Mark Wielaard <mjw@redhat.com> Date: Mon Aug 6 20:31:20 2012 +0200 PR14434 Filter out partial structs/classes. GCC might generate a struct/class without DW_AT_declaration, but that only contains members which have DW_AT_declaration set. We aren't interested in those. PR14434 (GCC bug #54181). Filter them out with a new function has_only_decl_members in dwflpp.cxx. Add new testcase testsuite/systemtap.base/partial-class-type.exp.