[5/5] RFC: add the checker scripts

Tom Tromey tromey@redhat.com
Thu Feb 7 21:07:00 GMT 2013

This adds the exception checker to contrib/.

I think that future changes to the Python code, at the very least,
should be run through this checker.  As you can see from this series,
and from earlier runs, it catches real bugs; and in its default mode it
doesn't seem to be particularly prone to false reports.

Also, the GCC Python plugin is easy to build, and I believe most
developers are using GCC anyhow.

I'm happy to make allowances and/or test patches for anybody not using


	* contrib/excheck.py: New file.
	* contrib/exsummary.py: New file.
	* contrib/gcc-with-excheck: New file.
 gdb/contrib/excheck.py       |  294 ++++++++++++++++++++++++++++++++++++++++++
 gdb/contrib/exsummary.py     |  185 ++++++++++++++++++++++++++
 gdb/contrib/gcc-with-excheck |   20 +++
 3 files changed, 499 insertions(+), 0 deletions(-)
 create mode 100644 gdb/contrib/excheck.py
 create mode 100644 gdb/contrib/exsummary.py
 create mode 100755 gdb/contrib/gcc-with-excheck

diff --git a/gdb/contrib/excheck.py b/gdb/contrib/excheck.py
new file mode 100644
index 0000000..4955c9e
--- /dev/null
+++ b/gdb/contrib/excheck.py
@@ -0,0 +1,294 @@
+#   Copyright 2011, 2013 Free Software Foundation, Inc.
+#   This is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by
+#   the Free Software Foundation, either version 3 of the License, or
+#   (at your option) any later version.
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   General Public License for more details.
+#   You should have received a copy of the GNU General Public License
+#   along with this program.  If not, see
+#   <http://www.gnu.org/licenses/>.
+# This is a GCC plugin that computes some exception-handling data for
+# gdb.  This data can then be summarized and checked by the
+# exsummary.py script.
+# To use:
+# * First, install the GCC Python plugin.  See
+#   https://fedorahosted.org/gcc-python-plugin/
+# * export PYTHON_PLUGIN=/full/path/to/plugin/directory
+#   This should be the directory holding "python.so".
+# * cd build/gdb; make mostlyclean
+# * make CC=.../gcc-with-excheck
+#   This will write a number of .py files in the build directory.
+# * python .../exsummary.py
+#   This will show the violations.
+import gcc
+import gccutils
+import sys
+# Where our output goes.
+output_file = None
+# Cleanup functions require special treatment, because they take a
+# function argument, but in theory the function must be nothrow.
+cleanup_functions = {
+    'make_cleanup': 1,
+    'make_cleanup_dtor': 1,
+    'make_final_cleanup': 1,
+    'make_my_cleanup2': 1,
+    'make_my_cleanup': 1
+# Functions which may throw but which we want to ignore.
+ignore_functions = {
+    # This one is super special.
+    'exceptions_state_mc': 1,
+    # gdb generally pretends that internal_error cannot throw, even
+    # though it can.
+    'internal_error': 1,
+    # do_cleanups and friends are supposedly nothrow but we don't want
+    # to run afoul of the indirect function call logic.
+    'do_cleanups': 1,
+    'do_final_cleanups': 1
+# Functions which take a function argument, but which are not
+# interesting, usually because the argument is not called in the
+# current context.
+non_passthrough_functions = {
+    'signal': 1,
+    'add_internal_function': 1
+# Return True if the type is from Python.
+def type_is_pythonic(t):
+    if isinstance(t, gcc.ArrayType):
+        t = t.type
+    if not isinstance(t, gcc.RecordType):
+        return False
+    # Hack.
+    return str(t).find('struct Py') == 0
+# Examine all the fields of a struct.  We don't currently need any
+# sort of recursion, so this is simple for now.
+def examine_struct_fields(initializer):
+    global output_file
+    for idx2, value2 in initializer.elements:
+        if isinstance(idx2, gcc.Declaration):
+            if isinstance(value2, gcc.AddrExpr):
+                value2 = value2.operand
+                if isinstance(value2, gcc.FunctionDecl):
+                    output_file.write("declare_nothrow(%s)\n"
+                                      % repr(str(value2.name)))
+# Examine all global variables looking for pointers to functions in
+# structures whose types were defined by Python.
+def examine_globals():
+    global output_file
+    vars = gcc.get_variables()
+    for var in vars:
+        if not isinstance(var.decl, gcc.VarDecl):
+            continue
+        output_file.write("################\n")
+        output_file.write("# Analysis for %s\n" % var.decl.name)
+        if not var.decl.initial:
+            continue
+        if not type_is_pythonic(var.decl.type):
+            continue
+        if isinstance(var.decl.type, gcc.ArrayType):
+            for idx, value in var.decl.initial.elements:
+                examine_struct_fields(value)
+        else:
+            gccutils.check_isinstance(var.decl.type, gcc.RecordType)
+            examine_struct_fields(var.decl.initial)
+# Called at the end of compilation to write out some data derived from
+# globals and to close the output.
+def close_output(*args):
+    global output_file
+    examine_globals()
+    output_file.close()
+# The pass which derives some exception-checking information.  We take
+# a two-step approach: first we get a call graph from the compiler.
+# This is emitted by the plugin as Python code.  Then, we run a second
+# program that reads all the generated Python and uses it to get a
+# global view of exception routes in gdb.
+class GdbExceptionChecker(gcc.GimplePass):
+    def __init__(self, output_file):
+        gcc.GimplePass.__init__(self, 'gdb_exception_checker')
+        self.output_file = output_file
+    def log(self, obj):
+        self.output_file.write("# %s\n" % str(obj))
+    # Return true if FN is a call to a method on a Python object.
+    # We know these cannot throw in the gdb sense.
+    def fn_is_python_ignorable(self, fn):
+        if not isinstance(fn, gcc.SsaName):
+            return False
+        stmt = fn.def_stmt
+        if not isinstance(stmt, gcc.GimpleAssign):
+            return False
+        if stmt.exprcode is not gcc.ComponentRef:
+            return False
+        rhs = stmt.rhs[0]
+        if not isinstance(rhs, gcc.ComponentRef):
+            return False
+        if not isinstance(rhs.field, gcc.FieldDecl):
+            return False
+        return rhs.field.name == 'tp_dealloc' or rhs.field.name == 'tp_free'
+    # Decode a function call and write something to the output.
+    # THIS_FUN is the enclosing function that we are processing.
+    # FNDECL is the call to process; it might not actually be a DECL
+    # node.
+    # LOC is the location of the call.
+    def handle_one_fndecl(self, this_fun, fndecl, loc):
+        callee_name = ''
+        if isinstance(fndecl, gcc.AddrExpr):
+            fndecl = fndecl.operand
+        if isinstance(fndecl, gcc.FunctionDecl):
+            # Ordinary call to a named function.
+            callee_name = str(fndecl.name)
+            self.output_file.write("function_call(%s, %s, %s)\n"
+                                   % (repr(callee_name),
+                                      repr(this_fun.decl.name),
+                                      repr(str(loc))))
+        elif self.fn_is_python_ignorable(fndecl):
+            # Call to tp_dealloc.
+            pass
+        elif (isinstance(fndecl, gcc.SsaName)
+              and isinstance(fndecl.var, gcc.ParmDecl)):
+            # We can ignore an indirect call via a parameter to the
+            # current function, because this is handled via the rule
+            # for passthrough functions.
+            pass
+        else:
+            # Any other indirect call.
+            self.output_file.write("has_indirect_call(%s, %s)\n"
+                                   % (repr(this_fun.decl.name),
+                                      repr(str(loc))))
+        return callee_name
+    # This does most of the work for examine_one_bb.
+    # THIS_FUN is the enclosing function.
+    # BB is the basic block to process.
+    # Returns True if this block is the header of a TRY_CATCH, False
+    # otherwise.
+    def examine_one_bb_inner(self, this_fun, bb):
+        if not bb.gimple:
+            return False
+        try_catch = False
+        for stmt in bb.gimple:
+            loc = stmt.loc
+            if not loc:
+                loc = this_fun.decl.location
+            if not isinstance(stmt, gcc.GimpleCall):
+                continue
+            callee_name = self.handle_one_fndecl(this_fun, stmt.fn, loc)
+            if callee_name == 'exceptions_state_mc_action_iter':
+                try_catch = True
+            global non_passthrough_functions
+            if callee_name in non_passthrough_functions:
+                continue
+            # We have to specially handle calls where an argument to
+            # the call is itself a function, e.g., qsort.  In general
+            # we model these as "passthrough" -- we assume that in
+            # addition to the call the qsort there is also a call to
+            # the argument function.
+            for arg in stmt.args:
+                # We are only interested in arguments which are functions.
+                t = arg.type
+                if isinstance(t, gcc.PointerType):
+                    t = t.dereference
+                if not isinstance(t, gcc.FunctionType):
+                    continue
+                if isinstance(arg, gcc.AddrExpr):
+                    arg = arg.operand
+                global cleanup_functions
+                if callee_name in cleanup_functions:
+                    if not isinstance(arg, gcc.FunctionDecl):
+                        gcc.inform(loc, 'cleanup argument not a DECL: %s' % repr(arg))
+                    else:
+                        # Cleanups must be nothrow.
+                        self.output_file.write("declare_cleanup(%s)\n"
+                                               % repr(str(arg.name)))
+                else:
+                    # Assume we have a passthrough function, like
+                    # qsort or an iterator.  We model this by
+                    # pretending there is an ordinary call at this
+                    # point.
+                    self.handle_one_fndecl(this_fun, arg, loc)
+        return try_catch
+    # Examine all the calls in a basic block and generate output for
+    # them.
+    # THIS_FUN is the enclosing function.
+    # BB is the basic block to examine.
+    # BB_WORKLIST is a list of basic blocks to work on; we add the
+    # appropriate successor blocks to this.
+    # SEEN_BBS is a map whose keys are basic blocks we have already
+    # processed.  We use this to ensure that we only visit a given
+    # block once.
+    def examine_one_bb(self, this_fun, bb, bb_worklist, seen_bbs):
+        try_catch = self.examine_one_bb_inner(this_fun, bb)
+        for edge in bb.succs:
+            if edge.dest in seen_bbs:
+                continue
+            seen_bbs[edge.dest] = 1
+            if try_catch:
+                # This is bogus, but we magically know the right
+                # answer.
+                if edge.false_value:
+                    bb_worklist.append(edge.dest)
+            else:
+                bb_worklist.append(edge.dest)
+    # Iterate over all basic blocks in THIS_FUN.
+    def iterate_bbs(self, this_fun):
+        # Iteration must be in control-flow order, because if we see a
+        # TRY_CATCH construct we need to drop all the contained blocks.
+        bb_worklist = [this_fun.cfg.entry]
+        seen_bbs = {}
+        seen_bbs[this_fun.cfg.entry] = 1
+        for bb in bb_worklist:
+            self.examine_one_bb(this_fun, bb, bb_worklist, seen_bbs)
+    def execute(self, fun):
+        if fun and fun.cfg and fun.decl:
+            self.output_file.write("################\n")
+            self.output_file.write("# Analysis for %s\n" % fun.decl.name)
+            self.output_file.write("define_function(%s, %s)\n"
+                                   % (repr(fun.decl.name),
+                                      repr(str(fun.decl.location))))
+            global ignore_functions
+            if fun.decl.name not in ignore_functions:
+                self.iterate_bbs(fun)
+def main(**kwargs):
+    global output_file
+    output_file = open(gcc.get_dump_base_name() + '.gdb_exc.py', 'w')
+    # We used to use attributes here, but there didn't seem to be a
+    # big benefit over hard-coding.
+    output_file.write('declare_throw("throw_exception")\n')
+    output_file.write('declare_throw("throw_verror")\n')
+    output_file.write('declare_throw("throw_vfatal")\n')
+    output_file.write('declare_throw("throw_error")\n')
+    gcc.register_callback(gcc.PLUGIN_FINISH_UNIT, close_output)
+    ps = GdbExceptionChecker(output_file)
+    ps.register_after('ssa')
diff --git a/gdb/contrib/exsummary.py b/gdb/contrib/exsummary.py
new file mode 100644
index 0000000..5c9d8c4
--- /dev/null
+++ b/gdb/contrib/exsummary.py
@@ -0,0 +1,185 @@
+#   Copyright 2011, 2013 Free Software Foundation, Inc.
+#   This is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by
+#   the Free Software Foundation, either version 3 of the License, or
+#   (at your option) any later version.
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   General Public License for more details.
+#   You should have received a copy of the GNU General Public License
+#   along with this program.  If not, see
+#   <http://www.gnu.org/licenses/>.
+import sys
+import glob
+# Compute the summary information from the files created by
+# excheck.py.  Run in the build directory where you used the
+# excheck.py plugin.
+class Function:
+    def __init__(self, name):
+        self.name = name
+        self.location = None
+        self.callers = []
+        self.can_throw = False
+        self.marked_nothrow = False
+        self.reason = None
+    def log(self, message):
+        print "%s: note: %s" % (self.location, message)
+    def set_location(self, location):
+        self.location = location
+    # CALLER is an Edge.
+    def add_caller(self, caller):
+        # self.log("adding call from %s" % caller.from_fn.name)
+        self.callers.append(caller)
+        # self.log("len = %d" % len(self.callers))
+    def consistency_check(self):
+        if self.marked_nothrow and self.can_throw:
+            print ("%s: error: %s marked as both 'throw' and 'nothrow'"
+                   % (self.location, self.name))
+    def declare_nothrow(self):
+        self.marked_nothrow = True
+        self.consistency_check()
+    def declare_throw(self):
+        result = not self.can_throw # Return True the first time
+        self.can_throw = True
+        self.consistency_check()
+        return result
+    def print_stack(self, is_indirect):
+        if is_indirect:
+            print ("%s: error: function %s is marked nothrow but is assumed to throw due to indirect call"
+                   % (self.location, self.name))
+        else:
+            print ("%s: error: function %s is marked nothrow but can throw"
+                   % (self.location, self.name))
+        edge = self.reason
+        while edge is not None:
+            print ("%s: info: via call to %s"
+                   % (edge.location, edge.to_fn.name))
+            edge = edge.to_fn.reason
+    def mark_throw(self, edge, work_list, is_indirect):
+        if not self.can_throw:
+            # self.log("can throw")
+            self.can_throw = True
+            self.reason = edge
+            if self.marked_nothrow:
+                self.print_stack(is_indirect)
+            else:
+                # Do this in the 'else' to avoid extra error
+                # propagation.
+                work_list.append(self)
+class Edge:
+    def __init__(self, from_fn, to_fn, location):
+        self.from_fn = from_fn
+        self.to_fn = to_fn
+        self.location = location
+# Work list of known-throwing functions.
+work_list = []
+# Map from function name to Function object.
+function_map = {}
+# Work list of indirect calls.
+indirect_functions = []
+# Whether we should process cleanup functions as well.
+process_cleanups = False
+# Whether we should process indirect function calls.
+process_indirect = False
+def declare(fn_name):
+    global function_map
+    if fn_name not in function_map:
+        function_map[fn_name] = Function(fn_name)
+    return function_map[fn_name]
+def define_function(fn_name, location):
+    fn = declare(fn_name)
+    fn.set_location(location)
+def declare_throw(fn_name):
+    global work_list
+    fn = declare(fn_name)
+    if fn.declare_throw():
+        work_list.append(fn)
+def declare_nothrow(fn_name):
+    fn = declare(fn_name)
+    fn.declare_nothrow()
+def declare_cleanup(fn_name):
+    global process_cleanups
+    fn = declare(fn_name)
+    if process_cleanups:
+        fn.declare_nothrow()
+def function_call(to, frm, location):
+    to_fn = declare(to)
+    frm_fn = declare(frm)
+    to_fn.add_caller(Edge(frm_fn, to_fn, location))
+def has_indirect_call(fn_name, location):
+    global indirect_functions
+    fn = declare(fn_name)
+    phony = Function("<indirect call>")
+    phony.add_caller(Edge(fn, phony, location))
+    indirect_functions.append(phony)
+def mark_functions(worklist, is_indirect):
+    for callee in worklist:
+        for edge in callee.callers:
+            edge.from_fn.mark_throw(edge, worklist, is_indirect)
+def help_and_exit():
+    print "Usage: exsummary [OPTION]..."
+    print ""
+    print "Read the .py files from the exception checker plugin and"
+    print "generate an error summary."
+    print ""
+    print "  --cleanups     Include invalid behavior in cleanups"
+    print "  --indirect     Include assumed errors due to indirect function calls"
+    sys.exit(0)
+def main():
+    global work_list
+    global indirect_functions
+    global process_cleanups
+    global process_indirect
+    for arg in sys.argv:
+        if arg == '--cleanups':
+            process_cleanups = True
+        elif arg == '--indirect':
+            process_indirect = True
+        elif arg == '--help':
+            help_and_exit()
+    for fname in sorted(glob.glob('*.c.gdb_exc.py')):
+        execfile(fname)
+    print "================"
+    print "= Ordinary marking"
+    print "================"
+    mark_functions(work_list, False)
+    if process_indirect:
+        print "================"
+        print "= Indirect marking"
+        print "================"
+        mark_functions(indirect_functions, True)
+    return 0
+if __name__ == '__main__':
+    status = main()
+    sys.exit(status)
diff --git a/gdb/contrib/gcc-with-excheck b/gdb/contrib/gcc-with-excheck
new file mode 100755
index 0000000..2a4a9c7
--- /dev/null
+++ b/gdb/contrib/gcc-with-excheck
@@ -0,0 +1,20 @@
+#   Copyright 2011, 2013 Free Software Foundation, Inc.
+#   This is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by
+#   the Free Software Foundation, either version 3 of the License, or
+#   (at your option) any later version.
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   General Public License for more details.
+#   You should have received a copy of the GNU General Public License
+#   along with this program.  If not, see
+#   <http://www.gnu.org/licenses/>.
+# You must set PYTHON_PLUGIN in the environment.
+# It should be the directory holding the "python.so" file.
+gcc -fplugin=$PYTHON_PLUGIN/python.so -fplugin-arg-python-command='from excheck import main; main()' "$@"

More information about the Gdb-patches mailing list