[PATCH] elfclassify tool

Florian Weimer fweimer@redhat.com
Thu Apr 18 11:17:00 GMT 2019


* Florian Weimer:

>> BTW. Florian, the extra options are certainly not required for you to
>> implement to get eu-elfclassify accepted. They are just suggestions,
>> which we might decide not to do/add. Or they can be added by others if
>> they think they are useful.
>
> Understood.  I would rather fix the command line syntax as a priority,
> implement --unstripped, and add a test suite.

The patch below, also available here:

  <https://pagure.io/fweimer/elfutils/commits/elfclassify>

reworks the command line parser, implements filtering of file lists, and
adds the --unstripped option.

I assume the next step is to write tests.

Thanks,
Florian

diff --git a/src/Makefile.am b/src/Makefile.am
index 2b1c0dcb..966d1da7 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -26,7 +26,8 @@ AM_CPPFLAGS += -I$(srcdir)/../libelf -I$(srcdir)/../libebl \
 AM_LDFLAGS = -Wl,-rpath-link,../libelf:../libdw
 
 bin_PROGRAMS = readelf nm size strip elflint findtextrel addr2line \
-	       elfcmp objdump ranlib strings ar unstrip stack elfcompress
+	       elfcmp objdump ranlib strings ar unstrip stack elfcompress \
+	       elfclassify
 
 noinst_LIBRARIES = libar.a
 
@@ -83,6 +84,7 @@ ar_LDADD = libar.a $(libelf) $(libeu) $(argp_LDADD)
 unstrip_LDADD = $(libebl) $(libelf) $(libdw) $(libeu) $(argp_LDADD) -ldl
 stack_LDADD = $(libebl) $(libelf) $(libdw) $(libeu) $(argp_LDADD) -ldl $(demanglelib)
 elfcompress_LDADD = $(libebl) $(libelf) $(libdw) $(libeu) $(argp_LDADD)
+elfclassify_LDADD = $(libelf) $(libeu) $(argp_LDADD)
 
 installcheck-binPROGRAMS: $(bin_PROGRAMS)
 	bad=0; pid=$$$$; list="$(bin_PROGRAMS)"; for p in $$list; do \
diff --git a/src/elfclassify.c b/src/elfclassify.c
new file mode 100644
index 00000000..d4b46b64
--- /dev/null
+++ b/src/elfclassify.c
@@ -0,0 +1,654 @@
+/* Classification of ELF files.
+   Copyright (C) 2019 Red Hat, Inc.
+   This file is part of elfutils.
+
+   This file is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   elfutils is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+#include <argp.h>
+#include <error.h>
+#include <fcntl.h>
+#include <gelf.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include ELFUTILS_HEADER(elf)
+#include "printversion.h"
+
+/* Name and version of program.  */
+ARGP_PROGRAM_VERSION_HOOK_DEF = print_version;
+
+/* Bug report address.  */
+ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT;
+
+/* Set by parse_opt.  */
+static int verbose;
+
+/* Set by the main function.  */
+static const char *current_path;
+
+/* Set by open_file.  */
+static int file_fd = -1;
+
+static bool
+open_file (void)
+{
+  if (file_fd >= 0)
+    {
+      close (file_fd);
+      file_fd = -1;
+    }
+
+  if (verbose > 1)
+    fprintf (stderr, "debug: processing file: %s\n", current_path);
+
+  file_fd = open (current_path, O_RDONLY);
+  if (file_fd < 0)
+    {
+      if (errno == ENOENT)
+        {
+          if (verbose > 0)
+            fprintf (stderr, N_("warning: %s: file does not exist\n"),
+                     current_path);
+          return false;
+        }
+      else
+        error (2, errno, N_("opening %s"), current_path);
+    }
+  struct stat st;
+  if (fstat (file_fd, &st) != 0)
+    error (2, errno, N_("reading %s\n"), current_path);
+
+  /* Reject directories here because processing those as ELF fails
+     would fail.  */
+  if (!S_ISREG (st.st_mode))
+    {
+      if (verbose > 0)
+        fprintf (stderr, N_("warning: %s: not a regular file\n"),
+                 current_path);
+      return false;
+    }
+  return true;
+}
+
+/* Set by open_elf.  */
+static Elf *elf;
+
+static bool
+open_elf (void)
+{
+  if (elf != NULL)
+    {
+      elf_end (elf);
+      elf = NULL;
+    }
+
+  if (!open_file ())
+    return false;
+
+  elf = elf_begin (file_fd, ELF_C_READ, NULL);
+  if (elf == NULL)
+    error (2, 0, "%s: %s", current_path, elf_errmsg (-1));
+  if (elf_kind (elf) != ELF_K_ELF && elf_kind (elf) != ELF_K_AR)
+    {
+      if (verbose > 0)
+        fprintf (stderr, N_("warning: %s: not an ELF file\n"),
+                 current_path);
+      return false;
+    }
+
+  return true;
+}
+
+static int elf_type;
+static bool has_program_interpreter;
+static bool has_dynamic;
+static bool has_soname;
+static bool has_pie_flag;
+static bool has_dt_debug;
+static bool has_symtab;
+static bool has_debug_sections;
+
+static void
+run_classify (void)
+{
+  /* Reset to unanalyzed default.  */
+  elf_type = 0;
+  has_program_interpreter = false;
+  has_dynamic = false;
+  has_soname = false;
+  has_pie_flag = false;
+  has_dt_debug = false;
+  has_symtab = false;
+  has_debug_sections = false;
+
+  if (elf_kind (elf) != ELF_K_ELF)
+    return;
+
+  GElf_Ehdr ehdr_storage;
+  GElf_Ehdr *ehdr = gelf_getehdr (elf, &ehdr_storage);
+  if (ehdr == NULL)
+    error (2, 0, N_("%s: ELF header: %s"), current_path, elf_errmsg (-1));
+  elf_type = ehdr->e_type;
+
+  /* Examine program headers.  */
+  {
+    size_t nphdrs;
+    if (elf_getphdrnum (elf, &nphdrs) != 0)
+      error (2, 0, "%s: program header: %s", current_path, elf_errmsg (-1));
+    if (nphdrs > INT_MAX)
+      error (2, 0, "%s: number of program headers is too large: %zu",
+             current_path, nphdrs);
+    for (size_t phdr_idx = 0; phdr_idx < nphdrs; ++phdr_idx)
+      {
+        GElf_Phdr phdr_storage;
+        GElf_Phdr *phdr = gelf_getphdr (elf, phdr_idx, &phdr_storage);
+        if (phdr == NULL)
+          error (2, 0, "%s: %s", current_path, elf_errmsg (-1));
+        if (phdr->p_type == PT_DYNAMIC)
+          has_dynamic = true;
+        if (phdr->p_type == PT_INTERP)
+          has_program_interpreter = true;
+      }
+  }
+
+  Elf_Scn *dyn_section = NULL;
+  {
+    size_t shstrndx;
+    if (unlikely (elf_getshdrstrndx (elf, &shstrndx) < 0))
+      error (2, 0, N_("%s: section header string table index: %s"),
+             current_path, elf_errmsg (-1));
+
+    Elf_Scn *scn = NULL;
+    while (true)
+      {
+        scn = elf_nextscn (elf, scn);
+        if (scn == NULL)
+          break;
+        GElf_Shdr shdr_storage;
+        GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_storage);
+        if (shdr == NULL)
+          error (2, 0, N_("could not obtain section header: %s"),
+                 elf_errmsg (-1));
+        const char *section_name = elf_strptr (elf, shstrndx, shdr->sh_name);
+        if (section_name == NULL)
+          error (2, 0, N_("%s: could not obtain section name: %s"),
+                 current_path, elf_errmsg (-1));
+        if (verbose > 2)
+          fprintf (stderr, "debug: section header %s (type %d) found\n",
+                   section_name, shdr->sh_type);
+        if (shdr->sh_type == SHT_DYNAMIC)
+          {
+            if (verbose > 1)
+              fputs ("debug: dynamic section found", stderr);
+            dyn_section = scn;
+          }
+        if (shdr->sh_type == SHT_SYMTAB)
+          {
+            if (verbose > 1)
+              fputs ("debug: symtab section found\n", stderr);
+            has_symtab = true;
+          }
+        const char *debug_prefix = ".debug_";
+        if (strncmp (section_name, debug_prefix, strlen (debug_prefix)) == 0)
+          {
+            if (verbose > 1)
+              fputs ("debug: .debug_* section found\n", stderr);
+            has_debug_sections = true;
+          }
+      }
+  }
+
+  /* Examine the dynamic section.  */
+  if (has_dynamic)
+    {
+      if (dyn_section != NULL)
+        {
+          Elf_Data *data = elf_getdata (dyn_section, NULL);
+          if (verbose > 2)
+            fprintf (stderr, "debug: Elf_Data for dynamic section: %p\n",
+                     data);
+
+          if (data != NULL)
+            for (int dyn_idx = 0; ; ++dyn_idx)
+              {
+                GElf_Dyn dyn_storage;
+                GElf_Dyn *dyn = gelf_getdyn (data, dyn_idx, &dyn_storage);
+                if (dyn == NULL)
+                  break;
+                if (verbose > 2)
+                  fprintf (stderr, "debug: dynamic entry %d"
+                           " with tag %llu found\n",
+                           dyn_idx, (unsigned long long int) dyn->d_tag);
+                if (dyn->d_tag == DT_SONAME)
+                  has_soname = true;
+                if (dyn->d_tag == DT_FLAGS_1 && (dyn->d_un.d_val & DF_1_PIE))
+                  has_pie_flag = true;
+                if (dyn->d_tag == DT_DEBUG)
+                  has_dt_debug = true;
+                if (dyn->d_tag == DT_NULL)
+                  break;
+              }
+        }
+    }
+
+  if (verbose)
+    {
+      fprintf (stderr, "info: %s: ELF type: %d\n", current_path, elf_type);
+      if (has_program_interpreter)
+        fprintf (stderr, "info: %s: program interpreter found\n",
+                 current_path);
+      if (has_dynamic)
+        fprintf (stderr, "info: %s: dynamic segment found\n", current_path);
+      if (has_soname)
+        fprintf (stderr, "info: %s: soname found\n", current_path);
+      if (has_pie_flag)
+        fprintf (stderr, "info: %s: DF_1_PIE flag found\n", current_path);
+      if (has_dt_debug)
+        fprintf (stderr, "info: %s: DT_DEBUG found\n", current_path);
+      if (has_symtab)
+        fprintf (stderr, "info: %s: symbol table found\n", current_path);
+      if (has_debug_sections)
+        fprintf (stderr, "info: %s: .debug_* section found\n", current_path);
+    }
+}
+
+/* Return true if the file is a loadable object, which basically means
+   it is an ELF file, but not a relocatable object file.  (The kernel
+   and various userspace components can load ET_REL files, but we
+   disregard that for our classification purposes.)  */
+static bool
+is_loadable (void)
+{
+  return elf_kind (elf) == ELF_K_ELF && elf_type != ET_REL;
+}
+
+/* Return true if the file is an ELF file which has a symbol table or
+   .debug_* sections (and thus can be stripped futher).  */
+static bool
+is_unstripped (void)
+{
+  return elf_kind (elf) != ELF_K_NONE && (has_symtab || has_debug_sections);
+}
+
+static bool
+is_shared (void)
+{
+  if (!is_loadable ())
+    return false;
+
+  /* The ELF type is very clear: this is an executable.  */
+  if (elf_type == ET_EXEC)
+    return false;
+
+  /* If the object is marked as PIE, it is definitely an executable,
+     and not a loadlable shared object.  */
+  if (has_pie_flag)
+    return false;
+
+  /* Treat a DT_SONAME tag as a strong indicator that this is a shared
+     object.  */
+  if (has_soname)
+    return true;
+
+  /* This is probably a PIE program: there is no soname, but a program
+     interpreter.  In theory, this file could be also a DSO with a
+     soname implied by its file name that can be run as a program.
+     This situation is impossible to resolve in the general case. */
+  if (has_program_interpreter)
+    return false;
+
+  /* Roland McGrath mentions in
+     <https://www.sourceware.org/ml/libc-alpha/2015-03/msg00605.html>,
+     that “we defined a PIE as an ET_DYN with a DT_DEBUG”.  This
+     matches current binutils behavior (version 2.32).  DT_DEBUG is
+     added if bfd_link_executable returns true or if bfd_link_pic
+     returns false, depending on the architectures.  However, DT_DEBUG
+     is not documented as being specific to executables, therefore use
+     it only as a low-priority discriminator.  */
+  if (has_dt_debug)
+    return false;
+
+  /* If there is no dynamic section, the file cannot be loaded as a
+     shared object.  */
+  if (!has_dynamic)
+    return false;
+  return true;
+}
+
+static bool
+is_executable (void)
+{
+  if (!is_loadable ())
+    return false;
+
+  /* A loadable object which is not a shared object is treated as an
+     executable.  */
+  return !is_shared ();
+}
+
+enum classify_requirement { do_not_care, required, forbidden };
+
+enum classify_check
+{
+  classify_elf,
+  classify_unstripped,
+  classify_executable,
+  classify_shared,
+  classify_loadable,
+
+  classify_check_last = classify_loadable
+};
+
+enum
+{
+  classify_check_offset = 1000,
+  classify_check_not_offset = 2000,
+
+  classify_flag_stdin = 3000,
+  classify_flag_stdin0,
+  classify_flag_no_stdin,
+  classify_flag_print,
+  classify_flag_print0,
+  classify_flag_no_print,
+  classify_flag_matching,
+  classify_flag_not_matching,
+};
+
+static bool
+classify_check_positive (int key)
+{
+  return key >= classify_check_offset
+    && key <= classify_check_offset + classify_check_last;
+}
+
+static bool
+classify_check_negative (int key)
+{
+  return key >= classify_check_not_offset
+    && key <= classify_check_not_offset + classify_check_last;
+}
+
+/* Set by parse_opt.  */
+static enum classify_requirement requirements[classify_check_last + 1];
+static enum { no_stdin, do_stdin, do_stdin0 } flag_stdin;
+static enum { no_print, do_print, do_print0 } flag_print;
+static bool flag_print_matching = true;
+
+static error_t
+parse_opt (int key, char *arg __attribute__ ((unused)),
+           struct argp_state *state __attribute__ ((unused)))
+{
+  if (classify_check_positive (key))
+    requirements[key - classify_check_offset] = required;
+  else if (classify_check_negative (key))
+    requirements[key - classify_check_not_offset] = forbidden;
+  else
+    switch (key)
+      {
+      case 'v':
+        ++verbose;
+        break;
+
+      case classify_flag_stdin:
+        flag_stdin = do_stdin;
+        break;
+
+      case classify_flag_stdin0:
+        flag_stdin = do_stdin0;
+        break;
+
+      case classify_flag_no_stdin:
+        flag_stdin = no_stdin;
+        break;
+
+      case classify_flag_print:
+        flag_print = do_print;
+        break;
+
+      case classify_flag_print0:
+        flag_print = do_print0;
+        break;
+
+      case classify_flag_no_print:
+        flag_print = no_print;
+        break;
+
+      case classify_flag_matching:
+        flag_print_matching = true;
+        break;
+
+      case classify_flag_not_matching:
+        flag_print_matching = false;
+        break;
+
+      default:
+        return ARGP_ERR_UNKNOWN;
+      }
+
+  return 0;
+}
+
+/* Perform requested checks against the file at current_path.  If
+   necessary, sets *STATUS to 1 if checks failed.  */
+static void
+process_current_path (int *status)
+{
+  bool checks_passed = true;
+
+  if (!open_elf ())
+    {
+      for (enum classify_check check = 0;
+           check <= classify_check_last; ++check)
+        if (requirements[check] == required)
+          checks_passed = false;
+    }
+  else
+    {
+      run_classify ();
+
+      bool checks[] =
+        {
+         [classify_elf] = true,
+         [classify_unstripped] = is_unstripped (),
+         [classify_executable] = is_executable (),
+         [classify_shared] = is_shared (),
+         [classify_loadable] = is_loadable (),
+        };
+
+      if (verbose > 1)
+        {
+          if (checks[classify_unstripped])
+            fprintf (stderr, "debug: %s: unsigned\n", current_path);
+          if (checks[classify_executable])
+            fprintf (stderr, "debug: %s: executable\n", current_path);
+          if (checks[classify_shared])
+            fprintf (stderr, "debug: %s: shared\n", current_path);
+          if (checks[classify_loadable])
+            fprintf (stderr, "debug: %s: loadable\n", current_path);
+        }
+
+      for (enum classify_check check = 0;
+           check <= classify_check_last; ++check)
+        switch (requirements[check])
+          {
+          case required:
+            if (!checks[check])
+              checks_passed = false;
+            break;
+          case forbidden:
+            if (checks[check])
+              checks_passed = false;
+            break;
+          case do_not_care:
+            break;
+          }
+    }
+
+  switch (flag_print)
+    {
+    case do_print:
+      if (checks_passed == flag_print_matching)
+        puts (current_path);
+      break;
+    case do_print0:
+      if (checks_passed == flag_print_matching)
+        fwrite (current_path, strlen (current_path) + 1, 1, stdout);
+      break;
+    case no_print:
+      if (!checks_passed)
+        *status = 1;
+      break;
+    }
+}
+
+/* Called to process standard input if flag_stdin is not no_stdin.  */
+static void
+process_stdin (int *status)
+{
+  char delim;
+  if (flag_stdin == do_stdin0)
+    delim = '\0';
+  else
+    delim = '\n';
+
+  char *buffer = NULL;
+  size_t buffer_size = 0;
+  while (true)
+    {
+      ssize_t ret = getdelim (&buffer, &buffer_size, delim, stdin);
+      if (ferror (stdin))
+        error (2, errno, N_("reading from standard input"));
+      if (feof (stdin))
+        break;
+      if (ret < 0)
+        abort ();           /* Cannot happen due to error checks above.  */
+      current_path = buffer;
+      process_current_path (status);
+    }
+
+  free (buffer);
+}
+
+int
+main (int argc, char **argv)
+{
+  const struct argp_option options[] =
+    {
+      { NULL, 0, NULL, OPTION_DOC, N_("Classification options"), 1 },
+      { "elf", classify_check_offset + classify_elf, NULL, 0,
+        N_("File looks like an ELF object or archive/static library"), 1 },
+      { "unstripped", classify_check_offset + classify_unstripped, NULL, 0,
+        N_("File is an ELF file with symbol table or .debug_* sections \
+and can bre stripped further"), 1 },
+      { "executable", classify_check_offset + classify_executable, NULL, 0,
+        N_("File is an ELF program executable"), 1 },
+      { "shared", classify_check_offset + classify_shared, NULL, 0,
+        N_("File is an ELF shared object (DSO)"), 1 },
+      { "loadable", classify_check_offset + classify_loadable, NULL, 0,
+        N_("File is a loadable ELF object (program or shared object)"), 1 },
+
+      /* Negated versions of the above.  */
+      { "not-elf", classify_check_not_offset + classify_elf,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-unstripped", classify_check_not_offset + classify_unstripped,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-executable", classify_check_not_offset + classify_executable,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-shared", classify_check_not_offset + classify_shared,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+      { "not-loadable", classify_check_not_offset + classify_loadable,
+        NULL, OPTION_HIDDEN, NULL, 1 },
+
+      { NULL, 0, NULL, OPTION_DOC, N_("Input flags"), 2 },
+      { "stdin", classify_flag_stdin, NULL, 0,
+        N_("Also read file names to process from standard input, \
+separated by newlines"), 2 },
+      { "stdin0", classify_flag_stdin0, NULL, 0,
+        N_("Also read file names to process from standard input, \
+separated by ASCII NUL bytes"), 2 },
+      { "no-stdin", classify_flag_stdin, NULL, 0,
+        N_("Do not read files from standard input (default)"), 2 },
+
+      { NULL, 0, NULL, OPTION_DOC, N_("Output flags"), 3 },
+      { "print", classify_flag_print, NULL, 0,
+        N_("Output names of files, separated by newline"), 3 },
+      { "print0", classify_flag_print0, NULL, 0,
+        N_("Output names of files, separated by ASCII NUL"), 3 },
+      { "no-print", classify_flag_no_print, NULL, 0,
+        N_("Do not output file names"), 3 },
+      { "matching", classify_flag_matching, NULL, 0,
+        N_("If printing file names, print matching files (default)"), 3 },
+      { "not-matching", classify_flag_not_matching, NULL, 0,
+        N_("If printing file names, print files that do not match"), 3 },
+
+      { NULL, 0, NULL, OPTION_DOC, N_("Additional flags"), 4 },
+      { "verbose", 'v', NULL, 0,
+        N_("Output additional information (can be specified multiple times)"), 4 },
+      { NULL, 0, NULL, 0, NULL, 0 }
+    };
+
+  const struct argp argp =
+    {
+      .options = options,
+      .parser = parse_opt,
+      .args_doc = N_("FILE..."),
+      .doc = N_("\
+Determine the type of an ELF file.\
+\n\n\
+Only one of the --shared and --executable checks can pass for one file.  \
+Since modern ELF does not clearly distinguish between programs and \
+dynamic shared objects, these options attempt to identify the primary \
+purpose of the file.\
+\n\n\
+All of the classification options must apply at the same time to a \
+particular file.  Classification options can be negated using a \
+\"--not-\" prefix.\
+\n\n\
+Without any of the --print options, the program exits with status 0 \
+if the requested checks pass for all input files, with 1 if a check \
+fails for any file, and 2 if there is an environmental issue (such \
+as a file read error or a memory allocation error).\
+\n\n\
+When printing file names, the program exists with status 0 even if \
+no file names are printed, and exits with status 2 if there is an \
+environmental issue.\
+")
+    };
+
+  int remaining;
+  if (argp_parse (&argp, argc, argv, ARGP_NO_EXIT, &remaining, NULL) != 0)
+    return 2;
+
+  elf_version (EV_CURRENT);
+
+  int status = 0;
+
+  for (int i = remaining; i < argc; ++i)
+    {
+      current_path = argv[i];
+      process_current_path (&status);
+    }
+
+  if (flag_stdin != no_stdin)
+    process_stdin (&status);
+
+  return status;
+}



More information about the Elfutils-devel mailing list