]> sourceware.org Git - libabigail.git/commitdiff
abidb: Introduce a tool to manage the ABI of a Linux distribution
authorFrank Ch. Eigler <fche@redhat.com>
Tue, 5 Dec 2023 15:42:44 +0000 (10:42 -0500)
committerDodji Seketeli <dodji@redhat.com>
Fri, 22 Mar 2024 16:18:17 +0000 (17:18 +0100)
This patch introduces a new tool named abidb.  It manages a Git
repository of the Application Binary Interfaces of a set of shared
libraries.  Those ABIs are stored in the Git repository in the form of
ABIXML files.

The tool then supports the verification of the ABI compatibility of a
given binary against the stored ABIs of shared libraries.

* configure.ac: Condition building abidb on the presence of python
and the required modules.
* doc/manuals/Makefile.am: Add the abidb.rst documentation to
source distribution.  Distribute the abidb.1 manpage file as well.
* doc/manuals/abidb.rst: New documentation file.
* doc/manuals/conf.py: Configure the generation of the abidb.1
manage from the abidb.rst file above.
* doc/manuals/libabigail-tools.rst: Add a reference to the new
abidb tool.
* tests/Makefile.am: Register runabidb1.sh and runabidb2.sh as
tests for abidb.  Register runabidb1.sh.in and runabidb2.sh.in as
input files for autoconf generated runabidb1.sh and runabidb2.sh.
* tests/data/Makefile.am: Add abidb2client.c, abidb2so.c and
abidb2soBAD.c to source distribution.
* tests/data/abidb2client.c: New source file for test input binaries.
* tests/data/abidb2so.c: Likewise.
* tests/data/abidb2soBAD.c: Likewise.
* tests/runtestabidb1.sh.in: New test script input for autoconf generation.
* tests/runtestabidb2.sh.in: Likewise.
* tools/Makefile.am: Add the new abidb tool to the set of tools.
* tools/abidb: The New Tool, ladies and gentlemen!

Signed-off-by: Frank Ch. Eigler <fche@redhat.com>
Signed-off-by: Dodji Seketeli <dodji@redhat.com>
14 files changed:
configure.ac
doc/manuals/Makefile.am
doc/manuals/abidb.rst [new file with mode: 0644]
doc/manuals/conf.py
doc/manuals/libabigail-tools.rst
tests/Makefile.am
tests/data/Makefile.am
tests/data/test-abidb/abidb2client.c [new file with mode: 0644]
tests/data/test-abidb/abidb2so.c [new file with mode: 0644]
tests/data/test-abidb/abidb2soBAD.c [new file with mode: 0644]
tests/runtestabidb1.sh.in [new file with mode: 0755]
tests/runtestabidb2.sh.in [new file with mode: 0755]
tools/Makefile.am
tools/abidb [new file with mode: 0755]

index a94255a19a209951c1b248bc5cb82013f1175d0c..4b115e6569ccce783d1b876bc312277d1a379ca4 100644 (file)
@@ -816,6 +816,23 @@ except koji.ConfigurationError:
   fi
 fi
 
+dnl abidb checks
+ENABLE_ABIDB=no
+if test x$PYTHON3_INTERPRETER != xno; then
+  AX_CHECK_PYTHON_MODULES([git libarchive],
+                         [$PYTHON],
+                         [FOUND_ALL_PYTHON_MODULES=yes],
+                         [FOUND_ALL_PYTHON_MODULES=no])
+
+  if test x$FOUND_ALL_PYTHON_MODULES = xno; then
+    AC_MSG_NOTICE([missing python modules: $MISSING_PYTHON_MODULES])
+    AC_MSG_NOTICE([disabling abidb as a result])
+  else
+    ENABLE_ABIDB=yes
+  fi
+fi
+AM_CONDITIONAL(ENABLE_ABIDB, test x$ENABLE_ABIDB = xyes)
+
 AM_CONDITIONAL(ENABLE_FEDABIPKGDIFF, test x$ENABLE_FEDABIPKGDIFF = xyes)
 AM_CONDITIONAL(ENABLE_RUNNING_TESTS_WITH_PY3, test x$RUN_TESTS_WITH_PY3 = xyes)
 AM_CONDITIONAL(ENABLE_PYTHON3_INTERPRETER, test x$PYTHON3_INTERPRETER != xno)
@@ -1194,6 +1211,10 @@ AC_CONFIG_FILES([tests/runtestdefaultsupprs.py],
                [chmod +x tests/runtestdefaultsupprs.py])
 AC_CONFIG_FILES([tests/runtestdefaultsupprspy3.sh],
                [chmod +x tests/runtestdefaultsupprspy3.sh])
+AC_CONFIG_FILES([tests/runtestabidb1.sh],
+               [chmod +x tests/runtestabidb1.sh])
+AC_CONFIG_FILES([tests/runtestabidb2.sh],
+               [chmod +x tests/runtestabidb2.sh])
 
 AC_OUTPUT
 
@@ -1225,6 +1246,7 @@ AC_MSG_NOTICE([
     Enable GNU tar archive support in abipkgdiff   : ${ENABLE_TAR}
     Enable bash completion                        : ${ENABLE_BASH_COMPLETION}
     Enable fedabipkgdiff                           : ${ENABLE_FEDABIPKGDIFF}
+    Enable abidb                                   : ${ENABLE_ABIDB}    
     Enable python 3                               : ${ENABLE_PYTHON3}
     Enable CTF front-end                           : ${ENABLE_CTF}
     Enable BTF front-end                           : ${ENABLE_BTF}
index e281378561be8f11bcd9b98aa43c181b43585580..3a94f7c2b3ed70345fe6e23ac599d32baa2f6a1b 100644 (file)
@@ -7,6 +7,7 @@ abidiff.rst \
 abipkgdiff.rst \
 abicompat.rst \
 abidw.rst \
+abidb.rst \
 abilint.rst \
 conf.py \
 index.rst \
@@ -173,7 +174,7 @@ texinfodocs =
 
 if ENABLE_MANUAL
 
-section1_manpages += abipkgdiff.1 abidiff.1 abidw.1 abilint.1 abicompat.1
+section1_manpages += abipkgdiff.1 abidiff.1 abidw.1 abidb.1 abilint.1 abicompat.1
 section7_manpages += libabigail.7
 manpages +=  $(section1_manpages) $(section7_manpages)
 texinfodocs += abigail.info
diff --git a/doc/manuals/abidb.rst b/doc/manuals/abidb.rst
new file mode 100644 (file)
index 0000000..52eb464
--- /dev/null
@@ -0,0 +1,174 @@
+
+======
+abidb
+======
+
+``abidb`` manages a git repository of abixml files describing shared
+libraries, and checks binaries against them.  ``elfutils`` and
+``libabigail`` programs are used to query and process the binaries.
+``abidb`` works well with ``debuginfod`` to fetch needed DWARF content
+automatically.
+
+
+.. _abidb_invocation_label:
+
+Invocation
+==========
+
+::
+
+  abidb [OPTIONS] [--submit PATH1 PATH2 ...] [--check PATH1 PATH2 ...]
+
+Common Options
+==============
+
+  * ``--abicompat PATH``
+
+    Specify the path to the ``abicompat`` program to use.  By default,
+    in the absence of this option, the ``abicompat`` program found in
+    directories listed in the $PATH environment is used.
+
+  * ``--abidw PATH``
+
+    Specify the path to the ``abidw`` program to use.  By default,
+    in the absence of this option, the ``abidw`` program found in
+    directories listed in the $PATH environment is used.
+
+  * ``--distrobranch BRANCH``
+
+    Specify the git branch for the abixml files in the git repo.  The
+    default is a string like DISTRO/VERSION/ARCHITECTURE, computed
+    from the running environment.
+
+  * ``--git REPO``
+
+    Specify the preexisting git working tree for abidb to submit to or
+    check against.  The default is the current working directory.  It
+    may be used concurrently by multiple "check" operations, but only
+    one "submit" operation.
+
+  * ``--help | -h``
+
+    Display a short help about the command and exit.
+
+  * ``--loglevel LOGLEVEL``
+
+    Specify the diagnostic level for messages to stderr.  One of
+    ``debug``, ``info``, ``warning``, ``error``, or ``critical``;
+    case-insensitive.  The default is ``info``.
+
+  * ``--timeout SECONDS``
+
+    Specify a maximum limit to the execution time (in seconds) allowed
+    for the ``abidw`` and ``abicompat`` programs that are executed.
+    By default, no limit is set for the execution time of these
+    programs.
+
+Submit Options
+==============
+
+  * ``--archive | -Z .EXT[=CMD]``
+
+    Designate PATH names with a ``.EXT`` suffix to be treated as
+    archives.  If ``CMD`` is present, pipe the PATH through the given
+    shell command, otherwise pass as if through ``cat``.  The
+    resulting stream is then opened by ``libarchive``, to enumerate
+    the contents of a wide variety of possible archive file format.
+    Process each file in the archive individually into abixml.
+
+    For example, ``-Z .zip`` will process each file in a zip file, and
+    ``-Z .deb='dpkg-deb --fsys-tarfile'`` will process each payload file
+    in a Debian archive.
+
+
+  * ``--filter REGEX``
+
+    Limit files selected for abixml extraction to those that match the
+    given regular expression.  The default is ``/lib.*\.so``, as a
+    heuristic to identify shared libraries.
+
+
+  * ``--submit PATH1 PATH2 ...``
+
+    Using ``abidw``, extract abixml for each of the listed files,
+    generally shared libraries, subject to the filename filter and the
+    archive decoding options.  Save the output of each ``abidw`` run
+    into the selected distrobranch of the selected git repo.  If
+    ``--submit`` and ``--check`` are both given, do submit operations
+    first.
+
+
+  * ``--sysroot PREFIX``
+    Specify the a prefix path that is to be removed from submitted
+    file names.
+
+
+Check Options
+=============
+
+  * ``--check PATH1 PATH2 ...``
+
+    Using ``abidiff``, compare each of the listed file, generally
+    executables, against abixml documents for selected versions for
+    all shared libraries needed by the executable.  These are listed
+    by enumerating the dynamic segment tags ``DT_NEEDED`` of the
+    executable.
+
+  * ``--ld-library-path DIR1:DIR2:DIR3...``
+
+    Select the search paths for abixml documents used to locate any
+    particular ``SONAME`` .  The first given directory wins.  However,
+    all versions of the same ``SONAME`` in that directory are selected
+    for comparison.  The default is unspecified, which means to search
+    for all matching ``SONAME`` entries in the distrobranch,
+    regardless of specific directory.
+
+Exit Code
+=========
+
+In case of successful submission and/or checking of all paths, the
+exit code is 0.
+
+In case of error, the exit code of ``abidb`` is nonzero, and a brief
+listing of the binaries unable to be submitted and/or checked is
+printed.
+
+
+Git Repository Schema
+=====================
+
+``abidb`` stores abixml documents in a git repo with the following
+naming schema within the distrobranch:
+
+1. The directory path leading to the shared library file
+
+2. The SONAME of the shared library file, as a subdirectory name
+
+3. A file named BUILDID.xml, where ``BUILDID`` is the hexadecimal ELF
+   build-id note of the shared library.
+
+For example:
+
++---------------------------+-------------------------------------------------------------------+
+|shared library file name   |abixml path in git                                                 |
++---------------------------+-------------------------------------------------------------------+
+| /usr/lib64/libc.so.6.2.32 | /usr/lib64/libc.so.6/788cdd41a15985bf8e0a48d213a46e07d58822df.xml |
+| /usr/lib64/libc.so.6.2.33 | /usr/lib64/libc.so.6/e2ca832f1c2112aea9d7b9bc639e97e873a6b516.xml |
+| /lib/ld-linux.so.2        | /lib/ld-linux.so.2/b65f3c15b129f33f44f504da1719926aec03c07d.xml   |
++---------------------------+-------------------------------------------------------------------+
+
+The intent of including the buildid in the name is so that as a distro
+is updated with multiple versions of a given shared library, they can
+be represented nearby but non-conflicting.  The ``SONAME`` is used in
+the second-last name component, inspired the behavior of ``ld.so`` and
+``ldconfig``, which rely on symbolic links to map references from
+the ``SONAME`` to an actual file.
+
+See Also
+=======
+
+ * ELF: http://en.wikipedia.org/wiki/Executable_and_Linkable_Format
+ * DWARF: https://www.dwarfstd.org
+ * Debuginfod: https://sourceware.org/elfutils/Debuginfod.html
+ * Git: https://git-scm.com/
+ * Libarchive: https://www.libarchive.org/
index 1e3728656975e5eae179c2018e11832c17f9c266..3ff7431efb9f83b6150d7a02b4fbf1ac24dec760 100644 (file)
@@ -217,6 +217,7 @@ man_pages = [
     ('abidiff', 'abidiff', u'compare ABIs of ELF files ', [u'Dodji Seketeli'], 1),
     ('abipkgdiff', 'abipkgdiff', u'compare ABIs of ELF files in software packages ', [u'Dodji Seketeli'], 1),
     ('abidw', 'abidw', u'serialize the ABI of an ELF file', [u'Dodji Seketeli'], 1),
+    ('abidb', 'abidb', u'check binary against abixml corpus and/or submit new data', [u'Frank Ch. Eigler'], 1),    
     ('abilint', 'abilint', u'validate an abigail ABI representation', [u'Dodji Seketeli'], 1),
     ('abicompat', 'abicompat', u'check ABI compatibility', [u'Dodji Seketeli'], 1),
     ('fedabipkgdiff', 'fedabipkgdiff', u'compare ABIs of Fedora packages', [u'Chenxiong Qi'], 1),
index 17600b8e7bcb4ddef3d12daf8dc2124deb764b9c..acac9ffab2635869eed9945db3cf36eb885e4855 100644 (file)
@@ -19,6 +19,7 @@ Tools manuals
    abipkgdiff
    kmidiff
    abidw
+   abidb
    abicompat
    abilint
    fedabipkgdiff
index 2c5e728611b3e2a1debbe2745763ba230b2433cf..141c8768e8f49cd600679437567c9f14f84eb627 100644 (file)
@@ -12,6 +12,13 @@ FEDABIPKGDIFF_TEST += runtestfedabipkgdiff.py
 endif
 endif
 
+ABIDB_TESTS =
+if ENABLE_ABIDB
+ABIDB_TESTS += runtestabidb1.sh runtestabidb2.sh
+endif
+
+
+
 RUN_TEST_SLOW_SELF_COMPARE=runtestslowselfcompare.sh
 
 # rather expensive tests (keep in this order), > 1s runtime
@@ -24,7 +31,8 @@ runtestdiffpkg                        \
 runtestannotate                        \
 runtestdifffilter              \
 runtestreaddwarf               \
-runtestcanonicalizetypes.sh
+runtestcanonicalizetypes.sh     \
+$(ABIDB_TESTS)
 
 if CTF_READER
 TESTS += runtestreadctf
@@ -74,6 +82,10 @@ runtestfedabipkgdiffpy3.sh.in        \
 runtestdefaultsupprspy3.sh.in
 endif
 
+if ENABLE_ABIDB
+EXTRA_DIST += runtestabidb1.sh.in runtestabidb2.sh.in
+endif
+
 CLEANFILES = \
 runtestcanonicalizetypes.output.txt \
 runtestcanonicalizetypes.output.final.txt
@@ -212,6 +224,13 @@ runtestfedabipkgdiffpy3.sh$(EXEEXT):
 runtestdefaultsupprspy3_sh_SOURCES =
 runtestdefaultsupprspy3.sh$(EXEEXT):
 
+runtestabidb1_sh_SOURCES =
+runtestabidb1.sh$(EXEEXT):
+
+runtestabidb2_sh_SOURCES =
+runtestabidb2.sh$(EXEEXT):
+
+
 AM_CPPFLAGS=-I${abs_top_srcdir}/include -I${abs_top_srcdir}/src \
 -I${abs_top_builddir}/include -I${abs_top_srcdir}/tools -fPIC
 
index da37f56c4207b04caa6ff457eca9926e2add371f..b5a9335ffe6ee4e5770cb8fb6842ce5c6f41927f 100644 (file)
@@ -2592,4 +2592,8 @@ test-symtab/kernel-5.6/single_variable_gpl.c \
 test-symtab/kernel-5.6/single_variable_gpl.ko \
 test-symtab/kernel-modversions/Makefile \
 test-symtab/kernel-modversions/one_of_each.c \
-test-symtab/kernel-modversions/one_of_each.ko
+test-symtab/kernel-modversions/one_of_each.ko \
+\
+test-abidb/abidb2client.c \
+test-abidb/abidb2so.c \
+test-abidb/abidb2soBAD.c
diff --git a/tests/data/test-abidb/abidb2client.c b/tests/data/test-abidb/abidb2client.c
new file mode 100644 (file)
index 0000000..107e817
--- /dev/null
@@ -0,0 +1,8 @@
+#include <stdio.h>
+
+extern int foobar(float x);
+
+int main() {
+  printf("%d\n", foobar(1337.0));
+  return 0;
+}
diff --git a/tests/data/test-abidb/abidb2so.c b/tests/data/test-abidb/abidb2so.c
new file mode 100644 (file)
index 0000000..1f98a0b
--- /dev/null
@@ -0,0 +1,6 @@
+#include <stdio.h>
+
+int foobar(float zoo);
+int foobar(float zoo) {
+  return (int) zoo;
+}
diff --git a/tests/data/test-abidb/abidb2soBAD.c b/tests/data/test-abidb/abidb2soBAD.c
new file mode 100644 (file)
index 0000000..5c26c99
--- /dev/null
@@ -0,0 +1,6 @@
+#include <stdio.h>
+
+int foobar(char zoo);
+int foobar(char zoo) {
+  return (int) zoo;
+}
diff --git a/tests/runtestabidb1.sh.in b/tests/runtestabidb1.sh.in
new file mode 100755 (executable)
index 0000000..ad4b7e2
--- /dev/null
@@ -0,0 +1,47 @@
+#! /bin/sh
+
+# Run an abidb test based on the binaries right here
+
+# test prereqs
+type git 2>/dev/null || (echo "need git" ; exit 77)
+type ldd 2>/dev/null || (echo "need ldd" ; exit 77)
+type eu-readelf 2>/dev/null || (echo "need eu-readelf" ; exit 77)
+type debuginfod-find 2>/dev/null || (echo "need debuginfod-find" ; exit 77)
+
+toolssrcdir=@abs_top_srcdir@/tools
+toolsdir=@abs_top_builddir@/tools
+PATH=$toolsdir:$PATH
+export PATH
+
+type $toolsdir/abidw && ABIDW=$toolsdir/abidw
+type $toolsdir/abidiff && ABIDIFF=$toolsdir/abidiff
+type $toolssrcdir/abidb && ABIDB=$toolssrcdir/abidb
+datadir=@abs_top_srcdir@/tests/data
+
+set -x
+set -e
+
+gitdir=$(mktemp -d --tmpdir abidb-XXXXXXXX)
+
+function clean_env
+{
+    rm -rf "$gitdir"
+}
+
+trap "clean_env" EXIT
+
+git init $gitdir
+
+# Run a quick test against the local distribution, presumed via debuginfod
+unset DEBUGINFOD_VERBOSE DEBUGINFOD_PROGRESS
+for targetbin in /bin/sh /bin/ls /bin/make
+do
+    debuginfod-find debuginfo "$targetbin" || continue # skip if debuginfo not available or file does not exist
+    for solib in `ldd "$targetbin" | grep -F '=>' | awk '{print $3}'`;
+    do
+        $ABIDB --loglevel=debug --git $gitdir --submit "$solib"
+    done
+    $ABIDB --loglevel=debug --git $gitdir --check "$targetbin"
+done
+# How many shared libraries did we collect?
+(cd $gitdir; git ls-files)
diff --git a/tests/runtestabidb2.sh.in b/tests/runtestabidb2.sh.in
new file mode 100755 (executable)
index 0000000..bff5fc5
--- /dev/null
@@ -0,0 +1,73 @@
+#! /bin/sh
+
+# Run an abidb test based on the binaries right here
+
+# test prereqs
+type git 2>/dev/null || (echo "need git" ; exit 77)
+type gcc 2>/dev/null || (echo "need gcc" ; exit 77)
+type eu-readelf 2>/dev/null || (echo "need eu-readelf" ; exit 77)
+type debuginfod-find 2>/dev/null || (echo "need debuginfod-find" ; exit 77)
+
+toolssrcdir=@abs_top_srcdir@/tools
+toolsdir=@abs_top_builddir@/tools
+PATH=$toolsdir:$PATH
+export PATH
+
+type $toolsdir/abidw && ABIDW=$toolsdir/abidw
+type $toolsdir/abidiff && ABIDIFF=$toolsdir/abidiff
+type $toolssrcdir/abidb && ABIDB=$toolssrcdir/abidb
+type $toolsdir/abicompat && ABICOMPAT=$toolsdir/abicompat
+datadir=@abs_top_srcdir@/tests/data/test-abidb
+odatadir=@abs_top_builddir@/tests/data/test-abidb
+
+test -d $odatadir || mkdir -p $odatadir
+
+set -x
+
+gitdir=$(mktemp -d --tmpdir abidb-XXXXXXXX)
+
+function clean_env
+{
+    echo cleaning "$gitdir"
+    # comment rm -rf out to preserve the evidence
+    rm -rf "$gitdir"
+    rm -rf $odatadir
+}
+
+git init $gitdir
+
+# Build a little shared library, a binary against it, then
+# break it with an update
+
+gcc -g -fPIC -rdynamic -shared $datadir/abidb2so.c -Wl,-soname=libabidb2.so -o $odatadir/libabidb2.so
+$ABIDB --loglevel=debug --abidw $ABIDW --git $gitdir --submit $odatadir/libabidb2.so
+gcc -g $datadir/abidb2client.c -L$odatadir -labidb2 -o $odatadir/abidb2client
+$ABIDB --loglevel=debug --git $gitdir --check $odatadir/abidb2client
+rm  $odatadir/libabidb2.so
+gcc -g -fPIC -rdynamic -shared $datadir/abidb2soBAD.c -Wl,-soname=libabidb2.so -o $odatadir/libabidb2.so
+
+cmd="$ABIDB --loglevel=debug --abidw $ABIDW  --git $gitdir --submit $odatadir/libabidb2.so"
+$cmd
+RESULT_CODE=$?
+if test $RESULT_CODE -eq 0; then
+    cmd="$ABIDB --loglevel=debug --abicompat $ABICOMPAT --git $gitdir --check $odatadir/abidb2client"
+    $cmd
+    RESULT_CODE=$?
+    echo "expected a failure!"
+    if test $RESULT_CODE -ne 0; then
+       echo "executed: $cmd: "
+       echo "with exit code: $RESULT_CODE"
+       RESULT_CODE=0
+    else
+       RESULT_CODE=1
+    fi
+else
+    echo "command failed: $cmd"
+fi
+
+# How many shared libraries did we collect?
+(cd $gitdir; git ls-files; for file in `git ls-files`; do git show $file; done )
+
+clean_env
+
+exit $RESULT_CODE
index abd090222fdc812db000f98aea87a67c0408b609..cbb5ce10319534e91ad820688c471e10de543666 100644 (file)
@@ -3,10 +3,19 @@ h=$(abs_srcdir)
 
 bin_PROGRAMS = abidiff abilint abidw abicompat abipkgdiff kmidiff
 
+noinst_SCRIPTS=
+dist_bin_SCRIPTS=
+
 if ENABLE_FEDABIPKGDIFF
-  dist_bin_SCRIPTS = fedabipkgdiff
+  dist_bin_SCRIPTS += fedabipkgdiff
+else
+  noinst_SCRIPTS += fedabipkgdiff
+endif
+
+if ENABLE_ABIDB
+  dist_bin_SCRIPTS += abidb
 else
-  noinst_SCRIPTS = fedabipkgdiff
+  noinst_SCRIPTS += abidb
 endif
 
 noinst_PROGRAMS = abisym abinilint
diff --git a/tools/abidb b/tools/abidb
new file mode 100755 (executable)
index 0000000..e95c9f9
--- /dev/null
@@ -0,0 +1,437 @@
+#! /usr/bin/env python3
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+# -*- coding: utf-8 -*-
+# -*- Mode: Python
+#
+# Copyright (C) 2023-2024 Red Hat, Inc.
+#
+# Author: Frank Ch. Eigler
+
+# tolerate DEBUGINFOD_VERBOSE*
+# tolerate --submit of linker-script fake .so's like libc.so
+# option for abidw options such as >> --abidiff  << --outfile new.xml /lib64/foo.so.2
+# option for abicheck options
+# accumulate unresolved symbols
+# parallelize submissions natively - or tolerate xargs -P abidb --submit
+# 32/64-bit-filter check libraries
+# abidw -> -fvisibility filter!
+
+
+from __future__ import print_function
+
+import argparse
+import functools
+import logging
+import subprocess
+import sys
+import time
+import git
+import platform
+import tempfile
+import re
+import ast
+import os
+import glob
+import libarchive
+
+
+# globals
+args = None
+
+
+
+def default_distrobranch():
+    """Compute the default distrobranch string for the current host.
+    This involves parsing /etc/os-release and uname.  (NIST CPE would
+    be another alternative, but there appears to be no standard location
+    on different distros, like Fedora's /etc/system-release-cpe.)
+
+    :returns: String like "fedora/39/x86_64", or None
+    """
+    osrelease = {}
+    with open('/etc/os-release', 'r', encoding='utf-8') as f:
+        r = re.compile(r'(?P<name>\w+)=(?P<value>.+)')
+        for line in f:
+            m = r.match(line)
+            if m:
+                try:
+                    value = ast.literal_eval(m.group("value")) # unquote "strings", parse numbers
+                except:
+                    value = m.group("value") # but some strings are already unquoted
+                osrelease[m.group('name')] = value
+
+    try:
+        return str(osrelease['ID'])+'/'+str(osrelease['VERSION_ID'])+'/'+platform.uname().machine
+    except:
+        return None
+
+
+def get_buildid(path: str, desc: str) -> str:
+    """Invoke eu-readelf to fetch the buildid of the given binary.
+    Unfortunately this involves parsing the textual output via regexp.
+    Raise an exception if not found.
+
+    :param str path: filename
+    :return: lowercase hexadecimal buildid
+    :rtype: str
+    :raises:
+       RuntimeError: if the build-id line is not found in eu-readelf's output 
+    """
+    cmdline = ["eu-readelf",
+               "-n", # fetches all notes; -nSECTION is possible by recent
+               path]
+    logging.debug(f"running {cmdline}")    
+    result = subprocess.run(cmdline,
+                            capture_output=True, check=False)
+    if (result.returncode != 0):
+        logging.error(f"eu-readelf error: {path}\n" +
+                      f"{result.stderr.decode('utf-8')}")
+        raise RuntimeError(f"eu-readelf failure {result.returncode} {path} {desc}")
+
+    r = re.compile(r'^\s+Build ID: ([0-9a-z]+)$')
+    for line in result.stdout.decode('utf-8').split('\n'):
+        m = r.match(line)
+        if m:
+            return m.group(1)
+
+    raise RuntimeError(f"Build ID not found in eu-readelf {path} {desc} output")
+
+
+def get_soname(path: str, desc: str) -> str:
+    """Invoke eu-readelf to fetch the soname of the given shared library.
+    Unfortunately this involves parsing the textual output via regexp.
+    Raise an exception if not found.
+
+    :param str path: filename
+    :return: soname
+    :rtype: str
+    :raises:
+       RuntimeError: if the soname line is not found in eu-readelf's output 
+    """
+    cmdline = ["eu-readelf",
+               "-d",
+               path]
+    logging.debug(f"running {cmdline}")
+    result = subprocess.run(cmdline,
+                            capture_output=True, check=False)
+    if (result.returncode != 0):
+        logging.error(f"eu-readelf error: {path}\n" +
+                      f"{result.stderr.decode('utf-8')}")
+
+    r = re.compile(r'^\s+SONAME\s+Library soname:\s\[(.*)\]$')
+    for line in result.stdout.decode('utf-8').split('\n'):
+        m = r.match(line)
+        if m:
+            return m.group(1)
+
+    raise RuntimeError(f"SONAME not found in eu-readelf {path} {desc} output")
+
+
+def get_solibs(path: str) -> list[str]:
+    """Invoke eu-readelf to fetch the list of dependent shared
+    libraries of the given binary.  Unfortunately this involves
+    parsing the textual output via regexp.  Raise an exception if not
+    found.
+
+    :param str path: filename
+    :return: list of SONAME strings
+    :rtype: List[str]
+    """
+    cmdline =["eu-readelf",
+              "-d",
+              path]
+    logging.debug(f"running {cmdline}")    
+    result = subprocess.run(cmdline,
+                            capture_output=True, check=False)
+    if (result.returncode != 0):
+        logging.error(f"eu-readelf error: {path}\n" +
+                      f"{result.stderr.decode('utf-8')}")
+
+    solibs = []
+    r = re.compile(r'^\s+NEEDED\s+Shared library:\s\[(.*)\]$')
+    for line in result.stdout.decode('utf-8').split('\n'):
+        m = r.match(line)
+        if m:
+            solibs.append(m.group(1))
+    return solibs
+
+
+def main() -> list[str]:
+    parser = argparse.ArgumentParser(description='Check binary against abidb corpus and/or submit new data.',
+                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('--loglevel',type=str,help='logging level',default='info')
+    parser.add_argument('--git',type=str,help='abidb git working tree',default='.')
+    parser.add_argument('--distrobranch',type=str,help='use given abidb distrobranch',default=default_distrobranch())
+    parser.add_argument('--timeout',type=int,help='limit abidw/abicompat runtime (seconds)',default=0)
+    parser.add_argument('--submit',nargs='*',type=str,default=[],
+                        help='submit abidw of given binaries to abidb')
+    parser.add_argument('--archive','-Z',metavar='EXT=CMD',
+                        type=str,help='submit binaries from archives with given extension & decoder',
+                        default=[],action='append') # like debuginfod(8)
+    parser.add_argument('--sysroot',type=str,help='remove given sysroot prefix from submitted file names',default=None)
+    parser.add_argument('--filter',type=str,help='submit only binaries matching given wildcard',default=r'/lib.*\.so') # sub-version suffixes will be flattened into SONAME
+    # --sysroot=PATH subtract this from SUBMIT paths
+    parser.add_argument('--check',type=str,nargs='*',default=[],
+                        help='check given binaries against abidb')
+    parser.add_argument('--ld-library-path',type=str,
+                        help='override LD_LIBRARY_PATH for soname resolution during check',
+                        default=None) # XXX: how to find appropriate default?
+    parser.add_argument('--abicompat',type=str,help='the path to the abicompat program to use',
+                        default='abicompat')
+    parser.add_argument('--abidw',type=str,help='the path to the abidw program to use',
+                        default='abidw')
+
+    global args
+    args = parser.parse_args()
+
+    logging.basicConfig(level=args.loglevel.upper(),
+                        format="%(asctime)s:"+os.path.basename(__file__)+":%(levelname)s:%(message)s")
+    logging.captureWarnings(True)
+
+    if len(args.submit) + len(args.check) == 0:
+        logging.error("need --check or --submit")
+        parser.print_usage()
+        exit(1)
+    
+    
+    # Open the git repo
+    args.git = os.path.realpath(args.git) # canonicalize
+    abidb = git.Repo(args.git) #type: ignore[attr-defined]
+    logging.debug(f'opened git repo {args.git}')
+
+    failures = []
+    
+    # Submit
+    if len(args.submit) > 0:
+        # Check out the distrobranch, creating if necessary
+        if args.distrobranch in abidb.heads:
+            abidb.heads[args.distrobranch].checkout(force=True)
+            abidb.git.reset()
+            abidb.git.clean('-xdf') # clean of misc files
+        else:
+            abidb.git.checkout(args.distrobranch,orphan=True)
+            abidb.git.reset()        
+            abidb.git.clean('-xdf') # clean of misc files, can easily happen in the case of an orphan branch
+            abidb.git.commit(message="initial commit",allow_empty=True) # so index diff HEAD works
+        numfiles=len(abidb.git.ls_files().split())
+        logging.info(f'checked out distrobranch {args.distrobranch} files {numfiles}')
+
+        ra = {}
+        for entry in args.archive: # parse / accumulate -Z EXT=CMD bits
+            extcmd = entry.split('=')
+            ext = extcmd[0]
+            if len(extcmd) == 1:
+                cmd = "cat" # default: pass through to libarchive
+            else:
+                cmd = "=".join(extcmd[1:]) # the rest of the command, filling other ='s back in
+            ra["."+ext] = cmd
+
+
+        def submit_file_generator(args):
+            """Generate a list of (archivename,logicalname,physicalname) tuples."""
+            for submit in args.submit:
+                ext=os.path.splitext(submit)[1] # e.g.,  ".rpm"
+                if (ext not in ra): # not an archive extension?
+                    pn = submit
+                    if args.sysroot and submit.startswith(args.sysroot):
+                        ln = submit[len(args.sysroot):]
+                    else:
+                        ln = submit
+                    yield (None, ln, pn) # must be a plain file # XXX or ldconfig-created symlink, ugh
+                else: # an archive!
+                    cmd = ra[ext]
+                    if (cmd == "cat"): # short-circuit this
+                        with libarchive.file_reader(submit) as archive:
+                            for entry in archive:
+                                if entry.filetype != libarchive.entry.FileType.REGULAR_FILE:
+                                    continue
+                                # canonicalize the logical names to ordinary full paths
+                                canon_entry = entry.name
+                                if canon_entry.startswith("./"):
+                                    canon_entry = canon_entry[1:]
+                                if not canon_entry.startswith("/"):
+                                    canon_entry = "/" + canon_entry
+                                with tempfile.NamedTemporaryFile() as tmp: # extract to temp file
+                                    for block in entry.get_blocks():
+                                        tmp.write(block)
+                                    tmp.flush()
+                                    yield (submit, canon_entry, tmp.name)
+                                    tmp.close()
+                    else: # must run conversion script on archive first
+                        with tempfile.NamedTemporaryFile() as tmp:
+                            with open(submit,"r") as archive:
+                                logging.debug(f"running {cmd}")                                
+                                result = subprocess.run(cmd, stdin=archive, stdout=tmp, stderr=subprocess.PIPE,
+                                                    shell=True, check=False)
+                                if (result.returncode != 0):
+                                    logging.error(f"archive conversion error: {submit} | {cmd}\n" +
+                                                  f"{result.stderr.decode('utf-8')}")
+                                else:
+                                    logging.debug(f"converted archive {submit} | {cmd} to {tmp.name}")
+                        
+                                with libarchive.file_reader(tmp.name) as archive:
+                                    for entry in archive:
+                                        if entry.filetype != libarchive.entry.FileType.REGULAR_FILE:
+                                            continue
+                                        # canonicalize the logical names to ordinary full paths
+                                        canon_entry = entry.name
+                                        if canon_entry.startswith("./"):
+                                            canon_entry = canon_entry[1:]
+                                        if not canon_entry.startswith("/"):
+                                            canon_entry = "/" + canon_entry
+                                        with tempfile.NamedTemporaryFile() as tmp2: # extract to temp file
+                                            for block in entry.get_blocks():
+                                                tmp2.write(block)
+                                            tmp2.flush()
+                                            yield (submit, canon_entry, tmp2.name)
+                                            tmp2.close()
+            
+        rf = re.compile(args.filter)
+        for (an,ln,pn) in submit_file_generator(args): # run in ThreadPoolExecutor?
+            logging.debug(f"considering archive {an} logical {ln} physical {pn}")
+            if (not rf.search(ln)): # unanchored
+                logging.debug(f"filtered {an} {ln}")
+                continue
+            try:
+                ln_soname = os.path.basename(ln) # preliminary guess, for exception
+                buildid = get_buildid(pn, f"{an} {ln}")
+                soname = get_soname(pn, f"{an} {ln}")
+                # map /path/to/libfoo.so.N.M.P.Q to /path/to/SONAME
+                ln_soname = os.path.dirname(ln) + "/" + soname
+                gitpath = args.git + "/" + ln_soname + "/" + buildid + ".xml" # naming convention!
+                gitpath = os.path.realpath(gitpath)  # canonicalize foo//bar's away, absolutize relative paths
+
+                if os.path.exists(gitpath):
+                    # and not --forced
+                    logging.debug(f'binary {ln_soname} abidb-path {gitpath} already-exists')
+                    continue
+
+                cmdline = ["timeout", str(args.timeout), args.abidw,
+                           # option? "--load-all-types"
+                           pn]
+                logging.debug(f"running {cmdline}")                
+                result = subprocess.run(cmdline,
+                                        capture_output=True, check=False)
+                if (result.returncode != 0):
+                    logging.error(f"abidw error: {pn}:\n" +
+                                  f"{result.stdout.decode('utf-8')}\n" +
+                                  f"{result.stderr.decode('utf-8')}")
+                    raise RuntimeError(f"abidw failure {result.returncode} {an} {ln} {pn}")        
+                
+                gitdata = result.stdout
+                if len(result.stderr) > 0:
+                    logging.warning(f"abidw error: {ln_soname} {an} {pn}\n{result.stderr}")
+
+                os.makedirs(os.path.dirname(gitpath), exist_ok=True)
+                with open(gitpath, 'wb') as f: # or we could bother decode/encode utf-8 but nah
+                    f.write(gitdata)
+
+                # or: don't use index-add/diff/commit, just working tree level ops, for better concurrency?
+                abidb.index.add([gitpath])
+
+                diff = abidb.index.diff("HEAD")
+                if len(diff) > 0:
+                    logan = os.path.basename(an) if an else ""
+                    c = abidb.index.commit(f"abidb {ln_soname} {logan}") # customizable
+                    logging.info(f'binary {pn} {an} abidb-path {ln_soname} abixml-length {len(gitdata)} commit {c.hexsha}')
+                else:
+                    logging.info(f'binary {pn} {an} abidb-path {ln_soname} abixml-length {len(gitdata)} unmodified')
+
+            except Exception as e:
+                failures.append(f"submitting soname {ln_soname} archive {an} file {pn}")
+                logging.exception(e)
+                    
+    if len(args.check) > 0:
+        commit = abidb.heads[args.distrobranch].commit # may throw if distrobranch does not exist
+
+        commit_dirs = sorted([t.path for t in commit.tree.traverse() if t.type == 'tree'])
+        logging.info(f'examining distrobranch {args.distrobranch} dirs {len(commit_dirs)}')
+        
+        for a in args.check: # run in ThreadPoolExecutor!
+            try:
+                logging.debug(f"checking {a}")
+                sonames = get_solibs(a)
+                if len(sonames) == 0:
+                    logging.info(f'binary {a} lists no sonames')
+                for soname in sonames:
+                    logging.debug(f"against soname {soname}")
+                    soname_impl = []
+
+                    # Find the libsoname.so.* directories in abidb.
+                    soname_dirs = []
+                    for cd in commit_dirs:
+                        cdbd = os.path.basename(cd)
+                        if (cdbd == soname # exact soname match?
+                            or cdbd.startswith(soname+".")): # or oddball sub-versioned abidb dir
+                            soname_dirs.append(cd)
+                            
+                    # Order & filter them, in --ld-library-path mode
+                    filtered_soname_dirs = []
+                    if args.ld_library_path:
+                        for ldir in args.ld_library_path.split(":"): # search, in order, for a matching commit_dir
+                            ldir_unslashed = ldir[1:] if ldir.startswith("/") else ldir # git uses relative names
+                            for soname_dir in soname_dirs:
+                                if os.path.dirname(soname_dir) == ldir_unslashed: # found one - collect all!
+                                    filtered_soname_dirs.append (soname_dir)
+                            if len(filtered_soname_dirs) > 0: # skip any later ld-library-path entries
+                                break
+                    else:
+                        filtered_soname_dirs = soname_dirs
+                                
+                    logging.debug(f"searching for {soname} in {filtered_soname_dirs}")
+                    for candir in filtered_soname_dirs:
+                        tree = commit.tree[candir] # resolve path/path/libfoo.so.n path
+                        if tree.type != "tree":
+                            logging.warning(f"skipping {candir}, expected a tree instead of {tree.type}")
+                            continue
+
+                        for blob in tree:
+                            if blob.type != "blob":
+                                continue
+                            if not blob.name.endswith(".xml"):
+                                continue
+                            abixml = blob.name
+
+                            # stream it out to a temp file to feed to abicompat later
+                            with tempfile.NamedTemporaryFile(suffix="."+abixml) as tmp:
+                                blob.stream_data(tmp)
+                                tmp.flush()
+
+                                soname_impl.append(abixml)
+                                cmdline = ["timeout", str(args.timeout), args.abicompat,
+                                           "--appd", "/dev/null", # operate even with debuginfod
+                                           "--libd1", "/dev/null", # operate even with debuginfod
+                                           # extra flags?
+                                           a,
+                                           tmp.name]
+                                logging.debug(f"running {cmdline}")
+                                result = subprocess.run(cmdline,
+                                                        capture_output=True, check=False)
+                                if (result.returncode != 0):
+                                    logging.error(f"abicompat: {a} vs. {candir} {abixml}:\n" +
+                                                  f"{result.stdout.decode('utf-8')}\n" +
+                                                  f"{result.stderr.decode('utf-8')}")
+                                    raise RuntimeError(f"abicompat failure {result.returncode} {a} {candir} {abixml}")
+                                else:
+                                    logging.info(f"abicompat success: {a} vs. {candir} {abixml}")
+                                        
+                    if len(soname_impl) == 0:
+                        logging.warning(f"no abi.xml found for {soname}")
+            except Exception as e:
+                failures.append(f"checking {a}")
+                # logging.exception(e)
+                
+    return failures
+
+                
+if __name__ == '__main__':
+    failures = main()
+    if len(failures) == 0:
+        exit(0)
+    else:
+        logging.error(f"{len(failures)} errors")
+        for f in failures:
+            logging.debug(f)
+
+exit(1)
+        
This page took 0.05836 seconds and 5 git commands to generate.