This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[glibc/zack/no-nested-includes] Add check-obsolete-constructs checker for nested includes.


https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=fdedb484e4e5f86fafb77e143e05857db78de071

commit fdedb484e4e5f86fafb77e143e05857db78de071
Author: Zack Weinberg <zackw@panix.com>
Date:   Thu Mar 14 21:03:23 2019 -0400

    Add check-obsolete-constructs checker for nested includes.
    
    As a first step toward minimizing the number of public headers that
    include other public headers, add a checker to check-obsolete-constructs
    that will error out on any such inclusion thatâ??s not on a whitelist.
    The whitelist is initialized to all of the nested inclusions that
    already exist; subsequent patches will remove nested inclusions and
    shrink the whitelist, hopefully to the point where we only have
    nested inclusions as mandated by the relevant standards.
    
    	* scripts/check-obsolete-constructs.py
    	(UNIVERSAL_ALLOWED_INCLUDES, HEADER_ALLOWED_INCLUDES)
    	(SYSDEP_ALLOWED_INCLUDES, NESTED_INCLUDES_EXEMPT_RE)
    	(get_allowed_nested, NestedIncludeCheckerWhitelistOnly)
            (NestedIncludeChecker): New.
    	(HeaderChecker): Instantiate and run a NestedIncludeChecker
    	for each header.

Diff:
---
 scripts/check-obsolete-constructs.py | 409 +++++++++++++++++++++++++++++++++++
 1 file changed, 409 insertions(+)

diff --git a/scripts/check-obsolete-constructs.py b/scripts/check-obsolete-constructs.py
index 49338c0..3edb1af 100755
--- a/scripts/check-obsolete-constructs.py
+++ b/scripts/check-obsolete-constructs.py
@@ -17,6 +17,7 @@
 # <http://www.gnu.org/licenses/>.
 
 """Verifies that installed headers do not use any obsolete constructs:
+
  * legacy BSD typedefs superseded by <stdint.h>:
       ushort uint ulong
       u_char u_short u_int u_long
@@ -24,6 +25,9 @@
       caddr_t daddr_t loff_t register_t
    (sys/types.h is allowed to _define_ these types, but not to use them
     to define anything else).
+
+ * nested includes of other top-level headers, except as required by
+   the relevant standards
 """
 
 import argparse
@@ -479,6 +483,409 @@ def ObsoleteTypedefChecker(reporter, fname):
     return ObsoleteNotAllowed(reporter)
 
 #
+# Nested includes
+#
+
+# All header files are allowed to include these headers.
+# TODO: Every header file _should_ include features.h as its first inclusion,
+# but we are not ready to enforce that yet.
+UNIVERSAL_ALLOWED_INCLUDES = {
+    "features.h",
+
+    # Technically these should only ever be included with __need
+    # macros active, but some headers deliberately break this rule
+    # when they think they're dealing with freestanding headers from a
+    # non-GNU compiler, so enforcing it would be more trouble than
+    # it's worth.
+    "stddef.h",
+    "stdarg.h",
+}
+
+# Specific headers are allowed to include specific other headers.
+# Each key in this dictionary should be the installed name of a
+# header, and its value is a list of installed names that are allowed
+# to be included.  (We do not currently enforce any rules about <> vs
+# "" inclusion.)
+HEADER_ALLOWED_INCLUDES = {
+    # ISO C standard headers
+    # mandated: inttypes.h -> stdint.h
+    #           tgmath.h   -> complex.h, math.h
+    #           threads.h  -> time.h
+    "ctype.h":                     [ "endian.h" ],
+    "inttypes.h":                  [ "stdint.h" ],
+    "signal.h":                    [ "sys/ucontext.h" ],
+    "stdlib.h":                    [ "alloca.h", "sys/types.h" ],
+    "string.h":                    [ "strings.h" ],
+    "tgmath.h":                    [ "complex.h", "math.h" ],
+    "threads.h":                   [ "time.h" ],
+
+    # POSIX top-level headers
+    # mandated: pthread.h -> sched.h, time.h
+    # allowed:  ftw.h -> sys/stat.h
+    #           mqueue.h -> fcntl.h
+    #           sched.h -> time.h
+    #           spawn.h -> sched.h
+    "aio.h":                       [ "sys/types.h" ],
+    "ftw.h":                       [ "sys/stat.h", "sys/types.h" ],
+    "glob.h":                      [ "sys/cdefs.h" ],
+    "langinfo.h":                  [ "nl_types.h" ],
+    "mqueue.h":                    [ "fcntl.h", "sys/types.h" ],
+    "poll.h":                      [ "sys/poll.h" ],
+    "pthread.h":                   [ "endian.h", "sched.h", "time.h",
+                                     "sys/cdefs.h" ],
+    "regex.h":                     [ "limits.h", "sys/types.h" ],
+    "sched.h":                     [ "time.h" ],
+    "semaphore.h":                 [ "sys/types.h" ],
+    "spawn.h":                     [ "sched.h", "sys/types.h" ],
+    "syslog.h":                    [ "sys/syslog.h" ],
+    "termios.h":                   [ "sys/ttydefaults.h" ],
+
+    # POSIX sys/ headers
+    # mandated: sys/msg.h -> sys/ipc.h
+    #           sys/sem.h -> sys/ipc.h
+    #           sys/shm.h -> sys/ipc.h
+    # allowed:  sys/time.h -> sys/select.h
+    #           sys/wait.h -> signal.h
+    "sys/msg.h":                   [ "sys/ipc.h" ],
+    "sys/sem.h":                   [ "sys/ipc.h" ],
+    "sys/shm.h":                   [ "sys/ipc.h" ],
+    "sys/time.h":                  [ "sys/select.h" ],
+    "sys/types.h":                 [ "endian.h", "sys/select.h" ],
+    "sys/uio.h":                   [ "sys/types.h" ],
+    "sys/un.h":                    [ "string.h", "sys/cdefs.h" ],
+    "sys/wait.h":                  [ "signal.h" ],
+
+    # POSIX networking headers
+    # allowed: netdb.h -> netinet/in.h
+    #          arpa/inet.h -> netinet/in.h
+    "netdb.h":                     [ "netinet/in.h", "rpc/netdb.h" ],
+    "arpa/inet.h":                 [ "netinet/in.h" ],
+    "net/if.h":                    [ "sys/socket.h", "sys/types.h" ],
+    "netinet/in.h":                [ "endian.h", "sys/socket.h" ],
+    "netinet/tcp.h":               [ "stdint.h", "sys/socket.h",
+                                     "sys/types.h" ],
+
+    # Nonstandardized top-level headers
+    "aliases.h":                   [ "sys/types.h" ],
+    "ar.h":                        [ "sys/cdefs.h" ],
+    "argp.h":                      [ "ctype.h", "errno.h", "getopt.h",
+                                     "limits.h", "stdio.h" ],
+    "argz.h":                      [ "errno.h", "string.h" ],
+    "elf.h":                       [ "stdint.h" ],
+    "envz.h":                      [ "argz.h", "errno.h" ],
+    "fpregdef.h":                  [ "sys/fpregdef.h" ],
+    "fts.h":                       [ "sys/types.h" ],
+    "gshadow.h":                   [ "paths.h" ],
+    "ieee754.h":                   [ "endian.h", "float.h" ],
+    "lastlog.h":                   [ "utmp.h" ],
+    "libintl.h":                   [ "locale.h" ],
+    "link.h":                      [ "dlfcn.h", "elf.h", "sys/types.h" ],
+    "malloc.h":                    [ "stdio.h" ],
+    "memory.h":                    [ "string.h" ],
+    "mntent.h":                    [ "paths.h" ],
+    "nss.h":                       [ "stdint.h" ],
+    "obstack.h":                   [ "string.h" ],
+    "proc_service.h":              [ "sys/procfs.h" ],
+    "pty.h":                       [ "sys/ioctl.h", "termios.h" ],
+    "re_comp.h":                   [ "regex.h" ],
+    "regdef.h":                    [ "sys/fpregdef.h", "sys/regdef.h" ],
+    "sgtty.h":                     [ "sys/ioctl.h" ],
+    "shadow.h":                    [ "paths.h" ],
+    "stdio_ext.h":                 [ "stdio.h" ],
+    "syscall.h":                   [ "sys/syscall.h" ],
+    "termio.h":                    [ "sys/ioctl.h", "termios.h" ],
+    "thread_db.h":                 [ "pthread.h", "stdint.h", "sys/procfs.h",
+                                     "sys/types.h" ],
+    "ucontext.h":                  [ "sys/ucontext.h" ],
+    "utmp.h":                      [ "sys/types.h" ],
+    "utmpx.h":                     [ "sys/time.h" ],
+    "values.h":                    [ "float.h", "limits.h" ],
+    "wait.h":                      [ "sys/wait.h" ],
+
+    # Nonstandardized sys/ headers
+    "sys/acct.h":                  [ "endian.h", "stdint.h", "sys/types.h" ],
+    "sys/auxv.h":                  [ "elf.h", "sys/cdefs.h" ],
+    "sys/bitypes.h":               [ "sys/types.h" ],
+    "sys/dir.h":                   [ "dirent.h" ],
+    "sys/elf.h":                   [ "sys/procfs.h" ],
+    "sys/epoll.h":                 [ "stdint.h", "sys/types.h" ],
+    "sys/errno.h":                 [ "errno.h" ],
+    "sys/eventfd.h":               [ "stdint.h" ],
+    "sys/fanotify.h":              [ "stdint.h" ],
+    "sys/fcntl.h":                 [ "fcntl.h" ],
+    "sys/file.h":                  [ "fcntl.h" ],
+    "sys/fsuid.h":                 [ "sys/types.h" ],
+    "sys/gmon.h":                  [ "sys/types.h" ],
+    "sys/inotify.h":               [ "stdint.h" ],
+    "sys/ioctl.h":                 [ "sys/ttydefaults.h" ],
+    "sys/mount.h":                 [ "sys/ioctl.h" ],
+    "sys/mtio.h":                  [ "sys/ioctl.h", "sys/types.h" ],
+    "sys/param.h":                 [ "endian.h", "limits.h", "signal.h",
+                                     "sys/types.h" ],
+    "sys/platform/ppc.h":          [ "stdint.h" ],
+    "sys/procfs.h":                [ "sys/time.h", "sys/types.h",
+                                     "sys/user.h" ],
+    "sys/profil.h":                [ "sys/time.h", "sys/types.h" ],
+    "sys/ptrace.h":                [ "sys/ucontext.h" ],
+    "sys/quota.h":                 [ "sys/types.h" ],
+    "sys/random.h":                [ "sys/types.h" ],
+    "sys/raw.h":                   [ "stdint.h", "sys/ioctl.h" ],
+    "sys/sendfile.h":              [ "sys/types.h" ],
+    "sys/signal.h":                [ "signal.h" ],
+    "sys/signalfd.h":              [ "stdint.h" ],
+    "sys/socketvar.h":             [ "sys/socket.h" ],
+    "sys/termios.h":               [ "termios.h" ],
+    "sys/timerfd.h":               [ "time.h" ],
+    "sys/timex.h":                 [ "sys/time.h" ],
+    "sys/ttychars.h":              [ "sys/ttydefaults.h" ],
+    "sys/ucontext.h":              [ "sys/procfs.h" ],
+    "sys/unistd.h":                [ "unistd.h" ],
+    "sys/user.h":                  [ "sys/types.h" ],
+    "sys/vfs.h":                   [ "sys/statfs.h" ],
+    "sys/xattr.h":                 [ "sys/types.h" ],
+
+    # Nonstandardized networking headers
+    "ifaddrs.h":                   [ "sys/socket.h" ],
+    "resolv.h":                    [ "arpa/nameser.h", "netinet/in.h",
+                                     "stdio.h", "sys/cdefs.h", "sys/param.h",\
+                                     "sys/types.h" ],
+
+    "arpa/nameser.h":              [ "arpa/nameser_compat.h", "stdint.h",
+                                     "sys/param.h", "sys/types.h" ],
+    "arpa/nameser_compat.h":       [ "endian.h" ],
+    "net/ethernet.h":              [ "stdint.h", "sys/types.h", "sys/cdefs.h",
+                                     "net/if_ether.h" ],
+    "net/if_arp.h":                [ "stdint.h", "sys/socket.h",
+                                     "sys/types.h", "sys/cdefs.h" ],
+    "net/if_ppp.h":                [ "net/if.h", "net/ppp_defs.h", "stdint.h",
+                                     "sys/ioctl.h", "sys/types.h" ],
+    "net/if_shaper.h":             [ "net/if.h", "stdint.h", "sys/ioctl.h",
+                                     "sys/types.h" ],
+    "net/route.h":                 [ "netinet/in.h", "sys/socket.h",
+                                     "sys/types.h" ],
+    "netatalk/at.h":               [ "sys/socket.h" ],
+    "netinet/ether.h":             [ "netinet/if_ether.h" ],
+    "netinet/icmp6.h":             [ "inttypes.h", "netinet/in.h", "string.h",
+                                     "sys/types.h" ],
+    "netinet/if_ether.h":          [ "net/ethernet.h", "net/if_arp.h",
+                                     "sys/types.h", "stdint.h" ],
+    "netinet/if_fddi.h":           [ "stdint.h", "sys/types.h" ],
+    "netinet/if_tr.h":             [ "stdint.h", "sys/types.h" ],
+    "netinet/igmp.h":              [ "netinet/in.h", "sys/cdefs.h",
+                                     "sys/types.h" ],
+    "netinet/in_systm.h":          [ "stdint.h", "sys/types.h" ],
+    "netinet/ip.h":                [ "netinet/in.h", "sys/types.h" ],
+    "netinet/ip6.h":               [ "inttypes.h", "netinet/in.h" ],
+    "netinet/ip_icmp.h":           [ "netinet/in.h", "netinet/ip.h",
+                                     "stdint.h", "sys/types.h" ],
+    "netinet/udp.h":               [ "stdint.h", "sys/types.h" ],
+    "netipx/ipx.h":                [ "stdint.h", "sys/types.h" ],
+    "netrom/netrom.h":             [ "netax25/ax25.h" ],
+    "netrose/rose.h":              [ "netax25/ax25.h", "sys/socket.h" ],
+    "protocols/routed.h":          [ "sys/socket.h" ],
+    "protocols/rwhod.h":           [ "paths.h", "sys/types.h" ],
+    "protocols/talkd.h":           [ "stdint.h", "sys/socket.h",
+                                     "sys/types.h" ],
+    "protocols/timed.h":           [ "sys/time.h", "sys/types.h" ],
+
+    # Internal headers
+    "features.h":                  [ "gnu/stubs.h", "stdc-predef.h",
+                                     "sys/cdefs.h" ],
+
+    "bits/fcntl.h":                [ "sys/types.h" ],
+    "bits/ipc.h":                  [ "sys/types.h" ],
+    "bits/procfs.h":               [ "signal.h", "sys/ucontext.h" ],
+    "bits/pthreadtypes-arch.h":    [ "endian.h" ],
+    "bits/sem.h":                  [ "sys/types.h" ],
+    "bits/socket.h":               [ "sys/types.h" ],
+    "bits/stat.h":                 [ "endian.h" ],
+    "bits/statfs.h":               [ "endian.h" ],
+    "bits/types/res_state.h":      [ "netinet/in.h", "sys/types.h" ],
+    "bits/utmp.h":                 [ "paths.h", "sys/time.h", "sys/types.h" ],
+    "bits/utmpx.h":                [ "paths.h", "sys/time.h" ],
+    "bits/wctype-wchar.h":         [ "endian.h" ],
+}
+
+# As above, but each group of whitelist entries is only used for
+# headers whose pathname includes a sysdeps directory with that name.
+# This allows us, for instance, to restrict the use of Linux kernel
+# headers to the Linux-specific variants of glibc headers.
+SYSDEP_ALLOWED_INCLUDES = {
+    'linux': {
+        # Nonstandardized sys/ headers
+        "sys/cachectl.h":          [ "asm/cachectl.h" ],
+        "sys/fanotify.h":          [ "linux/fanotify.h" ],
+        "sys/kd.h":                [ "linux/kd.h" ],
+        "sys/pci.h":               [ "linux/pci.h" ],
+        "sys/prctl.h":             [ "linux/prctl.h" ],
+        "sys/quota.h":             [ "linux/quota.h" ],
+        "sys/soundcard.h":         [ "linux/soundcard.h" ],
+        "sys/syscall.h":           [ "asm/unistd.h" ],
+        "sys/sysctl.h":            [ "linux/sysctl.h" ],
+        "sys/sysinfo.h":           [ "linux/kernel.h" ],
+        "sys/user.h":              [ "asm/ptrace.h", "asm/reg.h" ],
+        "sys/vm86.h":              [ "asm/vm86.h" ],
+        "sys/vt.h":                [ "linux/vt.h" ],
+
+        # Nonstandardized networking headers
+        "net/ethernet.h":          [ "linux/if_ether.h" ],
+        "net/if_slip.h":           [ "linux/if_slip.h" ],
+        "net/ppp-comp.h":          [ "linux/ppp-comp.h" ],
+        "net/ppp_defs.h":          [ "asm/types.h", "linux/ppp_defs.h" ],
+        "netatalk/at.h":           [ "asm/types.h", "linux/atalk.h" ],
+        "netinet/if_ether.h":      [ "linux/if_ether.h" ],
+        "netinet/if_fddi.h":       [ "linux/if_fddi.h" ],
+        "nfs/nfs.h":               [ "linux/nfs.h" ],
+
+        # Internal headers
+        "bits/errno.h":            [ "linux/errno.h" ],
+        "bits/fcntl-linux.h":      [ "linux/falloc.h" ],
+        "bits/ioctl-types.h":      [ "asm/ioctls.h" ],
+        "bits/ioctls.h":           [ "asm/ioctls.h", "linux/sockios.h" ],
+        "bits/local_lim.h":        [ "linux/limits.h" ],
+        "bits/param.h":            [ "linux/limits.h", "linux/param.h" ],
+        "bits/sigcontext.h":       [ "asm/sigcontext.h" ],
+        "bits/socket.h":           [ "asm/socket.h" ],
+    },
+
+    'mach': {
+        "bits/spin-lock-inline.h": [ "errno.h", "lock-intern.h" ],
+        "bits/sigcontext.h":       [ "mach/machine/fp_reg.h" ],
+    },
+
+    'mips': {
+        "sys/asm.h":               [ "sgidefs.h" ],
+        "sys/fpregdef.h":          [ "sgidefs.h" ],
+        "sys/regdef.h":            [ "sgidefs.h" ],
+        "sys/tas.h":               [ "sgidefs.h" ],
+        "sys/ucontext.h":          [ "sgidefs.h" ],
+        "sys/user.h":              [ "sgidefs.h" ],
+
+        "bits/fcntl.h":            [ "sgidefs.h" ],
+        "bits/link.h":             [ "sgidefs.h" ],
+        "bits/long-double.h":      [ "sgidefs.h" ],
+        "bits/procfs.h":           [ "sgidefs.h" ],
+        "bits/setjmp.h":           [ "sgidefs.h" ],
+        "bits/sigcontext.h":       [ "sgidefs.h" ],
+        "bits/stat.h":             [ "sgidefs.h" ],
+        "bits/wordsize.h":         [ "sgidefs.h" ],
+    },
+}
+
+# Headers that are exempt from the nested includes check.  This is a
+# giant regex because fnmatch.fnmatch doesn't treat / specially and
+# glob.glob can't be applied to anything but the file system.
+#
+# Hurd-specific headers are exempt for now, because for them, nested
+# include minimization is not a pure cleanup project, the way it is
+# for the standard headers.  The Hurd maintainers first need to make
+# some design decisions about which headers _should_ include which
+# other headers.
+#
+# Sun RPC headers are exempt because most of them are obsolete within
+# glibc; cleanups should be done within the TIRPC project instead.
+# (Same rationale as for exempting them from the obsolete-types checks.)
+NESTED_INCLUDES_EXEMPT_RE = re.compile(r"""
+    (?: \A | / ) (?:
+
+    # Hurd-specific headers
+        faultexc_server\.h
+      | hurd\.h
+      | lock-intern\.h
+      | mach\.h
+      | mach_error\.h
+      | mach_init\.h
+      | mach-shortcuts\.h
+      | spin-lock\.h
+      | device/[^/]+?\.h
+      | mach/[^/]+?\.h
+      | mach/i386/[^/]+?\.h
+      | hurd/[^/]+?\.h
+
+    # Sun RPC headers
+      | rpc/[^/]+?\.h
+      | rpcsvc/[^/]+?\.h
+
+    ) \Z
+""", re.VERBOSE)
+
+
+def get_allowed_nested(fname):
+    """Retrieve the set of allowed nested includes for a header whose
+       filename is FNAME."""
+    HEADER_ALLOWED = HEADER_ALLOWED_INCLUDES
+    SYSDEP_ALLOWED = SYSDEP_ALLOWED_INCLUDES
+
+    allowed = UNIVERSAL_ALLOWED_INCLUDES.copy()
+    sysdirs = {}
+
+    # HEADER_ALLOWED is keyed by the name to be used in an #include,
+    # which is some suffix of the filename; we don't know how many
+    # directory components to chop off, so we go one at a time until
+    # we find a match.  os.path does not include a utility function to
+    # chop off the _first_ component of a pathname.
+    fname = os.path.normpath(fname)
+    inc = fname
+    while True:
+        try:
+            allowed.update(HEADER_ALLOWED[inc])
+            break
+
+        except KeyError:
+            pos = inc.find('/')
+            if pos == -1:
+                break
+            dirname = inc[:pos]
+            inc = inc[(pos+1):]
+            if not inc:
+                break
+            if dirname in SYSDEP_ALLOWED and dirname not in sysdirs:
+                sysdirs[dirname] = SYSDEP_ALLOWED[dirname]
+
+    for hgroup in sysdirs.values():
+        inc = fname
+        while True:
+            try:
+                allowed.update(hgroup[inc])
+                break
+            except KeyError:
+                pos = inc.find('/')
+                if pos == -1:
+                    break
+                dirname = inc[:pos]
+                inc = inc[(pos+1):]
+                if not inc:
+                    break
+
+    return frozenset(allowed)
+
+class NestedIncludeWhitelistOnly(ConstructChecker):
+    def __init__(self, reporter, fname):
+        super().__init__(reporter)
+        self.allowed_nested = get_allowed_nested(fname)
+
+    def examine(self, tok):
+        if tok.kind == "HEADER_NAME":
+            included = tok.text[1:-1] # chop off "" or <>
+            if included in self.allowed_nested:
+                pass
+            # We allow any public header to include any bits header.
+            # More specific rules about which bits headers should be
+            # used by which public headers are enforced by the bits
+            # headers themselves.
+            elif included.startswith("bits/"):
+                pass
+            else:
+                self.reporter.error(tok, "inappropriate inclusion of {!r}")
+
+
+def NestedIncludeChecker(reporter, fname):
+    if NESTED_INCLUDES_EXEMPT_RE.search(fname):
+        return NoCheck(reporter)
+    else:
+        return NestedIncludeWhitelistOnly(reporter, fname)
+
+#
 # Master control
 #
 
@@ -508,9 +915,11 @@ class HeaderChecker:
             return
 
         typedef_checker = ObsoleteTypedefChecker(self, self.fname)
+        nested_checker = NestedIncludeChecker(self, self.fname)
 
         for tok in tokenize_c(contents, self):
             typedef_checker.examine(tok)
+            nested_checker.examine(tok)
 
 def main():
     ap = argparse.ArgumentParser(description=__doc__)


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]