LCOV - code coverage report
Current view: top level - debuginfod - debuginfod.cxx (source / functions) Hit Total Coverage
Test: elfutils-0.182 Lines: 1183 1465 80.8 %
Date: 2020-10-31 23:45:54 Functions: 89 106 84.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* Debuginfo-over-http server.
       2             :    Copyright (C) 2019-2020 Red Hat, Inc.
       3             :    This file is part of elfutils.
       4             : 
       5             :    This file is free software; you can redistribute it and/or modify
       6             :    it under the terms of the GNU General Public License as published by
       7             :    the Free Software Foundation; either version 3 of the License, or
       8             :    (at your option) any later version.
       9             : 
      10             :    elfutils is distributed in the hope that it will be useful, but
      11             :    WITHOUT ANY WARRANTY; without even the implied warranty of
      12             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13             :    GNU General Public License for more details.
      14             : 
      15             :    You should have received a copy of the GNU General Public License
      16             :    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
      17             : 
      18             : 
      19             : /* cargo-cult from libdwfl linux-kernel-modules.c */
      20             : /* In case we have a bad fts we include this before config.h because it
      21             :    can't handle _FILE_OFFSET_BITS.
      22             :    Everything we need here is fine if its declarations just come first.
      23             :    Also, include sys/types.h before fts. On some systems fts.h is not self
      24             :    contained. */
      25             : #ifdef BAD_FTS
      26             :   #include <sys/types.h>
      27             :   #include <fts.h>
      28             : #endif
      29             : 
      30             : #ifdef HAVE_CONFIG_H
      31             :   #include "config.h"
      32             : #endif
      33             : 
      34             : extern "C" {
      35             : #include "printversion.h"
      36             : }
      37             : 
      38             : #include "debuginfod.h"
      39             : #include <dwarf.h>
      40             : 
      41             : #include <argp.h>
      42             : #ifdef __GNUC__
      43             : #undef __attribute__ /* glibc bug - rhbz 1763325 */
      44             : #endif
      45             : 
      46             : #include <unistd.h>
      47             : #include <stdlib.h>
      48             : #include <error.h>
      49             : // #include <libintl.h> // not until it supports C++ << better
      50             : #include <locale.h>
      51             : #include <pthread.h>
      52             : #include <signal.h>
      53             : #include <sys/stat.h>
      54             : #include <sys/time.h>
      55             : #include <sys/vfs.h>
      56             : #include <unistd.h>
      57             : #include <fcntl.h>
      58             : #include <netdb.h>
      59             : 
      60             : 
      61             : /* If fts.h is included before config.h, its indirect inclusions may not
      62             :    give us the right LFS aliases of these functions, so map them manually.  */
      63             : #ifdef BAD_FTS
      64             :   #ifdef _FILE_OFFSET_BITS
      65             :     #define open open64
      66             :     #define fopen fopen64
      67             :   #endif
      68             : #else
      69             :   #include <sys/types.h>
      70             :   #include <fts.h>
      71             : #endif
      72             : 
      73             : #include <cstring>
      74             : #include <vector>
      75             : #include <set>
      76             : #include <map>
      77             : #include <string>
      78             : #include <iostream>
      79             : #include <iomanip>
      80             : #include <ostream>
      81             : #include <sstream>
      82             : #include <mutex>
      83             : #include <deque>
      84             : #include <condition_variable>
      85             : #include <thread>
      86             : // #include <regex> // on rhel7 gcc 4.8, not competent
      87             : #include <regex.h>
      88             : // #include <algorithm>
      89             : using namespace std;
      90             : 
      91             : #include <gelf.h>
      92             : #include <libdwelf.h>
      93             : 
      94             : #include <microhttpd.h>
      95             : 
      96             : #if MHD_VERSION >= 0x00097002
      97             : // libmicrohttpd 0.9.71 broke API
      98             : #define MHD_RESULT enum MHD_Result
      99             : #else
     100             : #define MHD_RESULT int
     101             : #endif
     102             : 
     103             : #include <curl/curl.h>
     104             : #include <archive.h>
     105             : #include <archive_entry.h>
     106             : #include <sqlite3.h>
     107             : 
     108             : #ifdef __linux__
     109             : #include <sys/syscall.h>
     110             : #endif
     111             : 
     112             : #ifdef __linux__
     113             : #define tid() syscall(SYS_gettid)
     114             : #else
     115             : #define tid() pthread_self()
     116             : #endif
     117             : 
     118             : 
     119             : inline bool
     120         524 : string_endswith(const string& haystack, const string& needle)
     121             : {
     122        1047 :   return (haystack.size() >= needle.size() &&
     123         524 :           equal(haystack.end()-needle.size(), haystack.end(),
     124         523 :                 needle.begin()));
     125             : }
     126             : 
     127             : 
     128             : // Roll this identifier for every sqlite schema incompatiblity.
     129             : #define BUILDIDS "buildids9"
     130             : 
     131             : #if SQLITE_VERSION_NUMBER >= 3008000
     132             : #define WITHOUT_ROWID "without rowid"
     133             : #else
     134             : #define WITHOUT_ROWID ""
     135             : #endif
     136             : 
     137             : static const char DEBUGINFOD_SQLITE_DDL[] =
     138             :   "pragma foreign_keys = on;\n"
     139             :   "pragma synchronous = 0;\n" // disable fsync()s - this cache is disposable across a machine crash
     140             :   "pragma journal_mode = wal;\n" // https://sqlite.org/wal.html
     141             :   "pragma wal_checkpoint = truncate;\n" // clean out any preexisting wal file
     142             :   "pragma journal_size_limit = 0;\n" // limit steady state file (between grooming, which also =truncate's)
     143             :   "pragma auto_vacuum = incremental;\n" // https://sqlite.org/pragma.html
     144             :   "pragma busy_timeout = 1000;\n" // https://sqlite.org/pragma.html
     145             :   // NB: all these are overridable with -D option
     146             : 
     147             :   // Normalization table for interning file names
     148             :   "create table if not exists " BUILDIDS "_files (\n"
     149             :   "        id integer primary key not null,\n"
     150             :   "        name text unique not null\n"
     151             :   "        );\n"
     152             :   // Normalization table for interning buildids
     153             :   "create table if not exists " BUILDIDS "_buildids (\n"
     154             :   "        id integer primary key not null,\n"
     155             :   "        hex text unique not null);\n"
     156             :   // Track the completion of scanning of a given file & sourcetype at given time
     157             :   "create table if not exists " BUILDIDS "_file_mtime_scanned (\n"
     158             :   "        mtime integer not null,\n"
     159             :   "        file integer not null,\n"
     160             :   "        size integer not null,\n" // in bytes
     161             :   "        sourcetype text(1) not null\n"
     162             :   "            check (sourcetype IN ('F', 'R')),\n"
     163             :   "        foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
     164             :   "        primary key (file, mtime, sourcetype)\n"
     165             :   "        ) " WITHOUT_ROWID ";\n"
     166             :   "create table if not exists " BUILDIDS "_f_de (\n"
     167             :   "        buildid integer not null,\n"
     168             :   "        debuginfo_p integer not null,\n"
     169             :   "        executable_p integer not null,\n"
     170             :   "        file integer not null,\n"
     171             :   "        mtime integer not null,\n"
     172             :   "        foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
     173             :   "        foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
     174             :   "        primary key (buildid, file, mtime)\n"
     175             :   "        ) " WITHOUT_ROWID ";\n"
     176             :   "create table if not exists " BUILDIDS "_f_s (\n"
     177             :   "        buildid integer not null,\n"
     178             :   "        artifactsrc integer not null,\n"
     179             :   "        file integer not null,\n" // NB: not necessarily entered into _mtime_scanned
     180             :   "        mtime integer not null,\n"
     181             :   "        foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
     182             :   "        foreign key (artifactsrc) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
     183             :   "        foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
     184             :   "        primary key (buildid, artifactsrc, file, mtime)\n"
     185             :   "        ) " WITHOUT_ROWID ";\n"
     186             :   "create table if not exists " BUILDIDS "_r_de (\n"
     187             :   "        buildid integer not null,\n"
     188             :   "        debuginfo_p integer not null,\n"
     189             :   "        executable_p integer not null,\n"
     190             :   "        file integer not null,\n"
     191             :   "        mtime integer not null,\n"
     192             :   "        content integer not null,\n"
     193             :   "        foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
     194             :   "        foreign key (content) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
     195             :   "        foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
     196             :   "        primary key (buildid, debuginfo_p, executable_p, file, content, mtime)\n"
     197             :   "        ) " WITHOUT_ROWID ";\n"
     198             :   "create table if not exists " BUILDIDS "_r_sref (\n" // outgoing dwarf sourcefile references from rpm
     199             :   "        buildid integer not null,\n"
     200             :   "        artifactsrc integer not null,\n"
     201             :   "        foreign key (artifactsrc) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
     202             :   "        foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
     203             :   "        primary key (buildid, artifactsrc)\n"
     204             :   "        ) " WITHOUT_ROWID ";\n"
     205             :   "create table if not exists " BUILDIDS "_r_sdef (\n" // rpm contents that may satisfy sref
     206             :   "        file integer not null,\n"
     207             :   "        mtime integer not null,\n"
     208             :   "        content integer not null,\n"
     209             :   "        foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
     210             :   "        foreign key (content) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
     211             :   "        primary key (content, file, mtime)\n"
     212             :   "        ) " WITHOUT_ROWID ";\n"
     213             :   // create views to glue together some of the above tables, for webapi D queries
     214             :   "create view if not exists " BUILDIDS "_query_d as \n"
     215             :   "select\n"
     216             :   "        b.hex as buildid, n.mtime, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1\n"
     217             :   "        from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_f_de n\n"
     218             :   "        where b.id = n.buildid and f0.id = n.file and n.debuginfo_p = 1\n"
     219             :   "union all select\n"
     220             :   "        b.hex as buildid, n.mtime, 'R' as sourcetype, f0.name as source0, n.mtime as mtime, f1.name as source1\n"
     221             :   "        from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files f1, " BUILDIDS "_r_de n\n"
     222             :   "        where b.id = n.buildid and f0.id = n.file and f1.id = n.content and n.debuginfo_p = 1\n"
     223             :   ";"
     224             :   // ... and for E queries
     225             :   "create view if not exists " BUILDIDS "_query_e as \n"
     226             :   "select\n"
     227             :   "        b.hex as buildid, n.mtime, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1\n"
     228             :   "        from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_f_de n\n"
     229             :   "        where b.id = n.buildid and f0.id = n.file and n.executable_p = 1\n"
     230             :   "union all select\n"
     231             :   "        b.hex as buildid, n.mtime, 'R' as sourcetype, f0.name as source0, n.mtime as mtime, f1.name as source1\n"
     232             :   "        from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files f1, " BUILDIDS "_r_de n\n"
     233             :   "        where b.id = n.buildid and f0.id = n.file and f1.id = n.content and n.executable_p = 1\n"
     234             :   ";"
     235             :   // ... and for S queries
     236             :   "create view if not exists " BUILDIDS "_query_s as \n"
     237             :   "select\n"
     238             :   "        b.hex as buildid, fs.name as artifactsrc, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1, null as source0ref\n"
     239             :   "        from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files fs, " BUILDIDS "_f_s n\n"
     240             :   "        where b.id = n.buildid and f0.id = n.file and fs.id = n.artifactsrc\n"
     241             :   "union all select\n"
     242             :   "        b.hex as buildid, f1.name as artifactsrc, 'R' as sourcetype, f0.name as source0, sd.mtime as mtime, f1.name as source1, fsref.name as source0ref\n"
     243             :   "        from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files f1, " BUILDIDS "_files fsref, "
     244             :   "        " BUILDIDS "_r_sdef sd, " BUILDIDS "_r_sref sr, " BUILDIDS "_r_de sde\n"
     245             :   "        where b.id = sr.buildid and f0.id = sd.file and fsref.id = sde.file and f1.id = sd.content\n"
     246             :   "        and sr.artifactsrc = sd.content and sde.buildid = sr.buildid\n"
     247             :   ";"
     248             :   // and for startup overview counts
     249             :   "drop view if exists " BUILDIDS "_stats;\n"
     250             :   "create view if not exists " BUILDIDS "_stats as\n"
     251             :   "          select 'file d/e' as label,count(*) as quantity from " BUILDIDS "_f_de\n"
     252             :   "union all select 'file s',count(*) from " BUILDIDS "_f_s\n"
     253             :   "union all select 'archive d/e',count(*) from " BUILDIDS "_r_de\n"
     254             :   "union all select 'archive sref',count(*) from " BUILDIDS "_r_sref\n"
     255             :   "union all select 'archive sdef',count(*) from " BUILDIDS "_r_sdef\n"
     256             :   "union all select 'buildids',count(*) from " BUILDIDS "_buildids\n"
     257             :   "union all select 'filenames',count(*) from " BUILDIDS "_files\n"
     258             :   "union all select 'files scanned (#)',count(*) from " BUILDIDS "_file_mtime_scanned\n"
     259             :   "union all select 'files scanned (mb)',coalesce(sum(size)/1024/1024,0) from " BUILDIDS "_file_mtime_scanned\n"
     260             : #if SQLITE_VERSION_NUMBER >= 3016000
     261             :   "union all select 'index db size (mb)',page_count*page_size/1024/1024 as size FROM pragma_page_count(), pragma_page_size()\n"
     262             : #endif
     263             :   ";\n"
     264             : 
     265             : // schema change history & garbage collection
     266             : //
     267             : // XXX: we could have migration queries here to bring prior-schema
     268             : // data over instead of just dropping it.
     269             : //
     270             : // buildids9: widen the mtime_scanned table
     271             :   "" // <<< we are here
     272             : // buildids8: slim the sref table
     273             :   "drop table if exists buildids8_f_de;\n"
     274             :   "drop table if exists buildids8_f_s;\n"
     275             :   "drop table if exists buildids8_r_de;\n"
     276             :   "drop table if exists buildids8_r_sref;\n"
     277             :   "drop table if exists buildids8_r_sdef;\n"
     278             :   "drop table if exists buildids8_file_mtime_scanned;\n"
     279             :   "drop table if exists buildids8_files;\n"
     280             :   "drop table if exists buildids8_buildids;\n"
     281             : // buildids7: separate _norm table into dense subtype tables
     282             :   "drop table if exists buildids7_f_de;\n"
     283             :   "drop table if exists buildids7_f_s;\n"
     284             :   "drop table if exists buildids7_r_de;\n"
     285             :   "drop table if exists buildids7_r_sref;\n"
     286             :   "drop table if exists buildids7_r_sdef;\n"
     287             :   "drop table if exists buildids7_file_mtime_scanned;\n"
     288             :   "drop table if exists buildids7_files;\n"
     289             :   "drop table if exists buildids7_buildids;\n"
     290             : // buildids6: drop bolo/rfolo again, represent sources / rpmcontents in main table
     291             :   "drop table if exists buildids6_norm;\n"
     292             :   "drop table if exists buildids6_files;\n"
     293             :   "drop table if exists buildids6_buildids;\n"
     294             :   "drop view if exists buildids6;\n"
     295             : // buildids5: redefine srcfile1 column to be '.'-less (for rpms)
     296             :   "drop table if exists buildids5_norm;\n"
     297             :   "drop table if exists buildids5_files;\n"
     298             :   "drop table if exists buildids5_buildids;\n"
     299             :   "drop table if exists buildids5_bolo;\n"
     300             :   "drop table if exists buildids5_rfolo;\n"
     301             :   "drop view if exists buildids5;\n"
     302             : // buildids4: introduce rpmfile RFOLO
     303             :   "drop table if exists buildids4_norm;\n"
     304             :   "drop table if exists buildids4_files;\n"
     305             :   "drop table if exists buildids4_buildids;\n"
     306             :   "drop table if exists buildids4_bolo;\n"
     307             :   "drop table if exists buildids4_rfolo;\n"
     308             :   "drop view if exists buildids4;\n"
     309             : // buildids3*: split out srcfile BOLO
     310             :   "drop table if exists buildids3_norm;\n"
     311             :   "drop table if exists buildids3_files;\n"
     312             :   "drop table if exists buildids3_buildids;\n"
     313             :   "drop table if exists buildids3_bolo;\n"
     314             :   "drop view if exists buildids3;\n"
     315             : // buildids2: normalized buildid and filenames into interning tables;
     316             :   "drop table if exists buildids2_norm;\n"
     317             :   "drop table if exists buildids2_files;\n"
     318             :   "drop table if exists buildids2_buildids;\n"
     319             :   "drop view if exists buildids2;\n"
     320             :   // buildids1: made buildid and artifacttype NULLable, to represent cached-negative
     321             : //           lookups from sources, e.g. files or rpms that contain no buildid-indexable content
     322             :   "drop table if exists buildids1;\n"
     323             : // buildids: original
     324             :   "drop table if exists buildids;\n"
     325             :   ;
     326             : 
     327             : static const char DEBUGINFOD_SQLITE_CLEANUP_DDL[] =
     328             :   "pragma wal_checkpoint = truncate;\n" // clean out any preexisting wal file
     329             :   ;
     330             : 
     331             : 
     332             : 
     333             : 
     334             : /* Name and version of program.  */
     335             : /* ARGP_PROGRAM_VERSION_HOOK_DEF = print_version; */ // not this simple for C++
     336             : 
     337             : /* Bug report address.  */
     338             : ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT;
     339             : 
     340             : /* Definitions of arguments for argp functions.  */
     341             : static const struct argp_option options[] =
     342             :   {
     343             :    { NULL, 0, NULL, 0, "Scanners:", 1 },
     344             :    { "scan-file-dir", 'F', NULL, 0, "Enable ELF/DWARF file scanning.", 0 },
     345             :    { "scan-rpm-dir", 'R', NULL, 0, "Enable RPM scanning.", 0 },
     346             :    { "scan-deb-dir", 'U', NULL, 0, "Enable DEB scanning.", 0 },
     347             :    { "scan-archive", 'Z', "EXT=CMD", 0, "Enable arbitrary archive scanning.", 0 },
     348             :    // "source-oci-imageregistry"  ...
     349             : 
     350             :    { NULL, 0, NULL, 0, "Options:", 2 },
     351             :    { "logical", 'L', NULL, 0, "Follow symlinks, default=ignore.", 0 },
     352             :    { "rescan-time", 't', "SECONDS", 0, "Number of seconds to wait between rescans, 0=disable.", 0 },
     353             :    { "groom-time", 'g', "SECONDS", 0, "Number of seconds to wait between database grooming, 0=disable.", 0 },
     354             :    { "maxigroom", 'G', NULL, 0, "Run a complete database groom/shrink pass at startup.", 0 },
     355             :    { "concurrency", 'c', "NUM", 0, "Limit scanning thread concurrency to NUM.", 0 },
     356             :    { "include", 'I', "REGEX", 0, "Include files matching REGEX, default=all.", 0 },
     357             :    { "exclude", 'X', "REGEX", 0, "Exclude files matching REGEX, default=none.", 0 },
     358             :    { "port", 'p', "NUM", 0, "HTTP port to listen on, default 8002.", 0 },
     359             :    { "database", 'd', "FILE", 0, "Path to sqlite database.", 0 },
     360             :    { "ddl", 'D', "SQL", 0, "Apply extra sqlite ddl/pragma to connection.", 0 },
     361             :    { "verbose", 'v', NULL, 0, "Increase verbosity.", 0 },
     362             : #define ARGP_KEY_FDCACHE_FDS 0x1001
     363             :    { "fdcache-fds", ARGP_KEY_FDCACHE_FDS, "NUM", 0, "Maximum number of archive files to keep in fdcache.", 0 },
     364             : #define ARGP_KEY_FDCACHE_MBS 0x1002
     365             :    { "fdcache-mbs", ARGP_KEY_FDCACHE_MBS, "MB", 0, "Maximum total size of archive file fdcache.", 0 },
     366             : #define ARGP_KEY_FDCACHE_PREFETCH 0x1003
     367             :    { "fdcache-prefetch", ARGP_KEY_FDCACHE_PREFETCH, "NUM", 0, "Number of archive files to prefetch into fdcache.", 0 },
     368             :    { NULL, 0, NULL, 0, NULL, 0 }
     369             :   };
     370             : 
     371             : /* Short description of program.  */
     372             : static const char doc[] = "Serve debuginfo-related content across HTTP from files under PATHs.";
     373             : 
     374             : /* Strings for arguments in help texts.  */
     375             : static const char args_doc[] = "[PATH ...]";
     376             : 
     377             : /* Prototype for option handler.  */
     378             : static error_t parse_opt (int key, char *arg, struct argp_state *state);
     379             : 
     380             : /* Data structure to communicate with argp functions.  */
     381             : static struct argp argp =
     382             :   {
     383             :    options, parse_opt, args_doc, doc, NULL, NULL, NULL
     384             :   };
     385             : 
     386             : 
     387             : static string db_path;
     388             : static sqlite3 *db; // single connection, serialized across all our threads!
     389             : static unsigned verbose;
     390             : static volatile sig_atomic_t interrupted = 0;
     391             : static volatile sig_atomic_t forced_rescan_count = 0;
     392             : static volatile sig_atomic_t sigusr1 = 0;
     393             : static volatile sig_atomic_t forced_groom_count = 0;
     394             : static volatile sig_atomic_t sigusr2 = 0;
     395             : static unsigned http_port = 8002;
     396             : static unsigned rescan_s = 300;
     397             : static unsigned groom_s = 86400;
     398             : static bool maxigroom = false;
     399             : static unsigned concurrency = std::thread::hardware_concurrency() ?: 1;
     400             : static set<string> source_paths;
     401             : static bool scan_files = false;
     402             : static map<string,string> scan_archives;
     403             : static vector<string> extra_ddl;
     404             : static regex_t file_include_regex;
     405             : static regex_t file_exclude_regex;
     406             : static bool traverse_logical;
     407             : static long fdcache_fds;
     408             : static long fdcache_mbs;
     409             : static long fdcache_prefetch;
     410             : static string tmpdir;
     411             : 
     412             : static void set_metric(const string& key, int64_t value);
     413             : // static void inc_metric(const string& key);
     414             : static void set_metric(const string& metric,
     415             :                        const string& lname, const string& lvalue,
     416             :                        int64_t value);
     417             : static void inc_metric(const string& metric,
     418             :                        const string& lname, const string& lvalue);
     419             : static void add_metric(const string& metric,
     420             :                        const string& lname, const string& lvalue,
     421             :                        int64_t value);
     422             : // static void add_metric(const string& metric, int64_t value);
     423             : 
     424             : /* Handle program arguments.  */
     425             : static error_t
     426          41 : parse_opt (int key, char *arg,
     427             :            struct argp_state *state __attribute__ ((unused)))
     428             : {
     429             :   int rc;
     430          41 :   switch (key)
     431             :     {
     432           1 :     case 'v': verbose ++; break;
     433           3 :     case 'd': db_path = string(arg); break;
     434           3 :     case 'p': http_port = (unsigned) atoi(arg);
     435           3 :       if (http_port == 0 || http_port > 65535)
     436           0 :         argp_failure(state, 1, EINVAL, "port number");
     437           3 :       break;
     438           4 :     case 'F': scan_files = true; break;
     439           1 :     case 'R':
     440           1 :       scan_archives[".rpm"]="cat"; // libarchive groks rpm natively
     441           1 :       break;
     442           2 :     case 'U':
     443           2 :       if (access("/usr/bin/dpkg-deb", X_OK) == 0)
     444             :         {
     445           2 :           scan_archives[".deb"]="dpkg-deb --fsys-tarfile";
     446           2 :           scan_archives[".ddeb"]="dpkg-deb --fsys-tarfile";
     447             :         }
     448             :       else
     449             :         {
     450           0 :           scan_archives[".deb"]="(bsdtar -O -x -f - data.tar.xz)<";
     451           0 :           scan_archives[".ddeb"]="(bsdtar -O -x -f - data.tar.xz)<";
     452             :         }
     453             :       // .udeb too?
     454           2 :       break;
     455           2 :     case 'Z':
     456             :       {
     457           2 :         char* extension = strchr(arg, '=');
     458           2 :         if (arg[0] == '\0')
     459           0 :           argp_failure(state, 1, EINVAL, "missing EXT");
     460           2 :         else if (extension)
     461           1 :           scan_archives[string(arg, (extension-arg))]=string(extension+1);
     462             :         else
     463           1 :           scan_archives[string(arg)]=string("cat");
     464             :       }
     465           2 :       break;
     466           2 :     case 'L':
     467           2 :       traverse_logical = true;
     468           2 :       break;
     469           0 :     case 'D': extra_ddl.push_back(string(arg)); break;
     470           1 :     case 't':
     471           1 :       rescan_s = (unsigned) atoi(arg);
     472           1 :       break;
     473           1 :     case 'g':
     474           1 :       groom_s = (unsigned) atoi(arg);
     475           1 :       break;
     476           0 :     case 'G':
     477           0 :       maxigroom = true;
     478           0 :       break;
     479           0 :     case 'c':
     480           0 :       concurrency = (unsigned) atoi(arg);
     481           0 :       if (concurrency < 1) concurrency = 1;
     482           0 :       break;
     483           0 :     case 'I':
     484             :       // NB: no problem with unconditional free here - an earlier failed regcomp would exit program
     485           0 :       regfree (&file_include_regex);
     486           0 :       rc = regcomp (&file_include_regex, arg, REG_EXTENDED|REG_NOSUB);
     487           0 :       if (rc != 0)
     488           0 :         argp_failure(state, 1, EINVAL, "regular expession");
     489           0 :       break;
     490           0 :     case 'X':
     491           0 :       regfree (&file_exclude_regex);
     492           0 :       rc = regcomp (&file_exclude_regex, arg, REG_EXTENDED|REG_NOSUB);
     493           0 :       if (rc != 0)
     494           0 :         argp_failure(state, 1, EINVAL, "regular expession");
     495           0 :       break;
     496           1 :     case ARGP_KEY_FDCACHE_FDS:
     497           1 :       fdcache_fds = atol (arg);
     498           1 :       break;
     499           1 :     case ARGP_KEY_FDCACHE_MBS:
     500           1 :       fdcache_mbs = atol (arg);
     501           1 :       break;
     502           0 :     case ARGP_KEY_FDCACHE_PREFETCH:
     503           0 :       fdcache_prefetch = atol (arg);
     504           0 :       break;
     505           6 :     case ARGP_KEY_ARG:
     506           6 :       source_paths.insert(string(arg));
     507           6 :       break;
     508             :       // case 'h': argp_state_help (state, stderr, ARGP_HELP_LONG|ARGP_HELP_EXIT_OK);
     509          13 :     default: return ARGP_ERR_UNKNOWN;
     510             :     }
     511             : 
     512          28 :   return 0;
     513             : }
     514             : 
     515             : 
     516             : ////////////////////////////////////////////////////////////////////////
     517             : 
     518             : 
     519             : // represent errors that may get reported to an ostream and/or a libmicrohttpd connection
     520             : 
     521             : struct reportable_exception
     522             : {
     523             :   int code;
     524             :   string message;
     525             : 
     526           7 :   reportable_exception(int c, const string& m): code(c), message(m) {}
     527          30 :   reportable_exception(const string& m): code(503), message(m) {}
     528             :   reportable_exception(): code(503), message() {}
     529             : 
     530             :   void report(ostream& o) const; // defined under obatched() class below
     531             : 
     532           9 :   MHD_RESULT mhd_send_response(MHD_Connection* c) const {
     533           9 :     MHD_Response* r = MHD_create_response_from_buffer (message.size(),
     534           9 :                                                        (void*) message.c_str(),
     535             :                                                        MHD_RESPMEM_MUST_COPY);
     536           9 :     MHD_add_response_header (r, "Content-Type", "text/plain");
     537           9 :     MHD_RESULT rc = MHD_queue_response (c, code, r);
     538           9 :     MHD_destroy_response (r);
     539           9 :     return rc;
     540             :   }
     541             : };
     542             : 
     543             : 
     544             : struct sqlite_exception: public reportable_exception
     545             : {
     546           0 :   sqlite_exception(int rc, const string& msg):
     547           0 :     reportable_exception(string("sqlite3 error: ") + msg + ": " + string(sqlite3_errstr(rc) ?: "?")) {}
     548             : };
     549             : 
     550             : struct libc_exception: public reportable_exception
     551             : {
     552          28 :   libc_exception(int rc, const string& msg):
     553          28 :     reportable_exception(string("libc error: ") + msg + ": " + string(strerror(rc) ?: "?")) {
     554          28 :     inc_metric("error_count","libc",strerror(rc));
     555          28 :   }
     556             : };
     557             : 
     558             : 
     559             : struct archive_exception: public reportable_exception
     560             : {
     561           0 :   archive_exception(const string& msg):
     562           0 :     reportable_exception(string("libarchive error: ") + msg) {
     563           0 :       inc_metric("error_count","libarchive",msg);
     564           0 :   }
     565           0 :   archive_exception(struct archive* a, const string& msg):
     566           0 :     reportable_exception(string("libarchive error: ") + msg + ": " + string(archive_error_string(a) ?: "?")) {
     567           0 :     inc_metric("error_count","libarchive",msg);
     568           0 :   }
     569             : };
     570             : 
     571             : 
     572             : struct elfutils_exception: public reportable_exception
     573             : {
     574           0 :   elfutils_exception(int rc, const string& msg):
     575           0 :     reportable_exception(string("elfutils error: ") + msg + ": " + string(elf_errmsg(rc) ?: "?")) {
     576           0 :     inc_metric("error_count","elfutils",elf_errmsg(rc));
     577           0 :   }
     578             : };
     579             : 
     580             : 
     581             : ////////////////////////////////////////////////////////////////////////
     582             : 
     583             : template <typename Payload>
     584             : class workq
     585             : {
     586             :   set<Payload> q; // eliminate duplicates
     587             :   mutex mtx;
     588             :   condition_variable cv;
     589             :   bool dead;
     590             :   unsigned idlers;
     591             : 
     592             : public:
     593           3 :   workq() { dead = false; idlers = 0; }
     594           3 :   ~workq() {}
     595             : 
     596         122 :   void push_back(const Payload& p)
     597             :   {
     598         244 :     unique_lock<mutex> lock(mtx);
     599         122 :     q.insert (p);
     600         122 :     set_metric("thread_work_pending","role","scan", q.size());
     601         122 :     cv.notify_all();
     602         122 :   }
     603             : 
     604             :   // kill this workqueue, wake up all idlers / scanners
     605           3 :   void nuke() {
     606           6 :     unique_lock<mutex> lock(mtx);
     607             :     // optional: q.clear();
     608           3 :     dead = true;
     609           3 :     cv.notify_all();
     610           3 :   }
     611             : 
     612             :   // clear the workqueue, when scanning is interrupted with USR2
     613           0 :   void clear() {
     614           0 :     unique_lock<mutex> lock(mtx);
     615           0 :     q.clear();
     616           0 :     set_metric("thread_work_pending","role","scan", q.size());
     617           0 :     cv.notify_all(); // maybe wake up waiting idlers
     618           0 :   }
     619             : 
     620             :   // block this scanner thread until there is work to do and no active
     621         134 :   bool wait_front (Payload& p)
     622             :   {
     623         268 :     unique_lock<mutex> lock(mtx);
     624         385 :     while (!dead && (q.size() == 0 || idlers > 0))
     625         251 :       cv.wait(lock);
     626         134 :     if (dead)
     627          12 :       return false;
     628             :     else
     629             :       {
     630         122 :         p = * q.begin();
     631         122 :         q.erase (q.begin());
     632         122 :         set_metric("thread_work_pending","role","scan", q.size());
     633         122 :         if (q.size() == 0)
     634          62 :           cv.notify_all(); // maybe wake up waiting idlers
     635         122 :         return true;
     636             :       }
     637             :   }
     638             : 
     639             :   // block this idler thread until there is no work to do
     640          36 :   void wait_idle ()
     641             :   {
     642          36 :     unique_lock<mutex> lock(mtx);
     643          36 :     cv.notify_all(); // maybe wake up waiting scanners
     644          44 :     while (!dead && (q.size() != 0))
     645           8 :       cv.wait(lock);
     646          36 :     idlers ++;
     647          36 :   }
     648             : 
     649          33 :   void done_idle ()
     650             :   {
     651          66 :     unique_lock<mutex> lock(mtx);
     652          33 :     idlers --;
     653          33 :     cv.notify_all(); // maybe wake up waiting scanners, but probably not (shutting down)
     654          33 :   }
     655             : };
     656             : 
     657             : typedef struct stat stat_t;
     658             : typedef pair<string,stat_t> scan_payload;
     659         302 : inline bool operator< (const scan_payload& a, const scan_payload& b)
     660             : {
     661         302 :   return a.first < b.first; // don't bother compare the stat fields
     662             : }
     663             : static workq<scan_payload> scanq; // just a single one
     664             : // producer & idler: thread_main_fts_source_paths()
     665             : // consumer: thread_main_scanner()
     666             : // idler: thread_main_groom()
     667             : 
     668             : 
     669             : 
     670             : ////////////////////////////////////////////////////////////////////////
     671             : 
     672             : 
     673             : // Print a standard timestamp.
     674             : static ostream&
     675         315 : timestamp (ostream &o)
     676             : {
     677             :   char datebuf[80];
     678         315 :   char *now2 = NULL;
     679         315 :   time_t now_t = time(NULL);
     680         315 :   struct tm *now = gmtime (&now_t);
     681         315 :   if (now)
     682             :     {
     683         315 :       (void) strftime (datebuf, sizeof (datebuf), "%c", now);
     684         315 :       now2 = datebuf;
     685             :     }
     686             : 
     687             :   return o << "[" << (now2 ? now2 : "") << "] "
     688         315 :            << "(" << getpid () << "/" << tid() << "): ";
     689             : }
     690             : 
     691             : 
     692             : // A little class that impersonates an ostream to the extent that it can
     693             : // take << streaming operations.  It batches up the bits into an internal
     694             : // stringstream until it is destroyed; then flushes to the original ostream.
     695             : // It adds a timestamp
     696             : class obatched
     697             : {
     698             : private:
     699             :   ostream& o;
     700             :   stringstream stro;
     701             :   static mutex lock;
     702             : public:
     703         315 :   obatched(ostream& oo, bool timestamp_p = true): o(oo)
     704             :   {
     705         315 :     if (timestamp_p)
     706         315 :       timestamp(stro);
     707         315 :   }
     708         315 :   ~obatched()
     709         315 :   {
     710         630 :     unique_lock<mutex> do_not_cross_the_streams(obatched::lock);
     711         315 :     o << stro.str();
     712         315 :     o.flush();
     713         315 :   }
     714             :   operator ostream& () { return stro; }
     715         315 :   template <typename T> ostream& operator << (const T& t) { stro << t; return stro; }
     716             : };
     717             : mutex obatched::lock; // just the one, since cout/cerr iostreams are not thread-safe
     718             : 
     719             : 
     720          37 : void reportable_exception::report(ostream& o) const {
     721          37 :   obatched(o) << message << endl;
     722          37 : }
     723             : 
     724             : 
     725             : ////////////////////////////////////////////////////////////////////////
     726             : 
     727             : 
     728             : // RAII style sqlite prepared-statement holder that matches { } block lifetime
     729             : 
     730             : struct sqlite_ps
     731             : {
     732             : private:
     733             :   sqlite3* db;
     734             :   const string nickname;
     735             :   const string sql;
     736             :   sqlite3_stmt *pp;
     737             : 
     738             :   sqlite_ps(const sqlite_ps&); // make uncopyable
     739             :   sqlite_ps& operator=(const sqlite_ps &); // make unassignable
     740             : 
     741             : public:
     742         263 :   sqlite_ps (sqlite3* d, const string& n, const string& s): db(d), nickname(n), sql(s) {
     743         264 :     if (verbose > 4)
     744           0 :       obatched(clog) << nickname << " prep " << sql << endl;
     745         264 :     int rc = sqlite3_prepare_v2 (db, sql.c_str(), -1 /* to \0 */, & this->pp, NULL);
     746         264 :     if (rc != SQLITE_OK)
     747           0 :       throw sqlite_exception(rc, "prepare " + sql);
     748         264 :   }
     749             : 
     750        2275 :   sqlite_ps& reset()
     751             :   {
     752        2275 :     sqlite3_reset(this->pp);
     753        2275 :     return *this;
     754             :   }
     755             : 
     756        3135 :   sqlite_ps& bind(int parameter, const string& str)
     757             :   {
     758        3135 :     if (verbose > 4)
     759           0 :       obatched(clog) << nickname << " bind " << parameter << "=" << str << endl;
     760        3135 :     int rc = sqlite3_bind_text (this->pp, parameter, str.c_str(), -1, SQLITE_TRANSIENT);
     761        3135 :     if (rc != SQLITE_OK)
     762           0 :       throw sqlite_exception(rc, "sqlite3 bind");
     763        3135 :     return *this;
     764             :   }
     765             : 
     766         962 :   sqlite_ps& bind(int parameter, int64_t value)
     767             :   {
     768         962 :     if (verbose > 4)
     769           0 :       obatched(clog) << nickname << " bind " << parameter << "=" << value << endl;
     770         962 :     int rc = sqlite3_bind_int64 (this->pp, parameter, value);
     771         962 :     if (rc != SQLITE_OK)
     772           0 :       throw sqlite_exception(rc, "sqlite3 bind");
     773         962 :     return *this;
     774             :   }
     775             : 
     776             :   sqlite_ps& bind(int parameter)
     777             :   {
     778             :     if (verbose > 4)
     779             :       obatched(clog) << nickname << " bind " << parameter << "=" << "NULL" << endl;
     780             :     int rc = sqlite3_bind_null (this->pp, parameter);
     781             :     if (rc != SQLITE_OK)
     782             :       throw sqlite_exception(rc, "sqlite3 bind");
     783             :     return *this;
     784             :   }
     785             : 
     786             : 
     787        1811 :   void step_ok_done() {
     788        1811 :     int rc = sqlite3_step (this->pp);
     789        1811 :     if (verbose > 4)
     790           0 :       obatched(clog) << nickname << " step-ok-done(" << sqlite3_errstr(rc) << ") " << sql << endl;
     791        1811 :     if (rc != SQLITE_OK && rc != SQLITE_DONE && rc != SQLITE_ROW)
     792           0 :       throw sqlite_exception(rc, "sqlite3 step");
     793        1811 :     (void) sqlite3_reset (this->pp);
     794        1811 :   }
     795             : 
     796             : 
     797         359 :   int step() {
     798         359 :     int rc = sqlite3_step (this->pp);
     799         359 :     if (verbose > 4)
     800           0 :       obatched(clog) << nickname << " step(" << sqlite3_errstr(rc) << ") " << sql << endl;
     801         359 :     return rc;
     802             :   }
     803             : 
     804             : 
     805             : 
     806         264 :   ~sqlite_ps () { sqlite3_finalize (this->pp); }
     807        1030 :   operator sqlite3_stmt* () { return this->pp; }
     808             : };
     809             : 
     810             : 
     811             : ////////////////////////////////////////////////////////////////////////
     812             : 
     813             : // RAII style templated autocloser
     814             : 
     815             : template <class Payload, class Ignore>
     816             : struct defer_dtor
     817             : {
     818             : public:
     819             :   typedef Ignore (*dtor_fn) (Payload);
     820             : 
     821             : private:
     822             :   Payload p;
     823             :   dtor_fn fn;
     824             : 
     825             : public:
     826         259 :   defer_dtor(Payload _p, dtor_fn _fn): p(_p), fn(_fn) {}
     827         259 :   ~defer_dtor() { (void) (*fn)(p); }
     828             : 
     829             : private:
     830             :   defer_dtor(const defer_dtor<Payload,Ignore>&); // make uncopyable
     831             :   defer_dtor& operator=(const defer_dtor<Payload,Ignore> &); // make unassignable
     832             : };
     833             : 
     834             : 
     835             : 
     836             : ////////////////////////////////////////////////////////////////////////
     837             : 
     838             : 
     839             : static string
     840         226 : header_censor(const string& str)
     841             : {
     842         226 :   string y;
     843        2748 :   for (auto&& x : str)
     844             :     {
     845        2522 :       if (isalnum(x) || x == '/' || x == '.' || x == ',' || x == '_' || x == ':')
     846        2521 :         y += x;
     847             :     }
     848         226 :   return y;
     849             : }
     850             : 
     851             : 
     852             : static string
     853         113 : conninfo (struct MHD_Connection * conn)
     854             : {
     855             :   char hostname[256]; // RFC1035
     856             :   char servname[256];
     857         113 :   int sts = -1;
     858             : 
     859         113 :   if (conn == 0)
     860           0 :     return "internal";
     861             : 
     862             :   /* Look up client address data. */
     863         113 :   const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn,
     864             :                                                                MHD_CONNECTION_INFO_CLIENT_ADDRESS);
     865         113 :   struct sockaddr *so = u ? u->client_addr : 0;
     866             : 
     867         113 :   if (so && so->sa_family == AF_INET) {
     868         113 :     sts = getnameinfo (so, sizeof (struct sockaddr_in), hostname, sizeof (hostname), servname,
     869             :                        sizeof (servname), NI_NUMERICHOST | NI_NUMERICSERV);
     870           0 :   } else if (so && so->sa_family == AF_INET6) {
     871           0 :     sts = getnameinfo (so, sizeof (struct sockaddr_in6), hostname, sizeof (hostname),
     872             :                        servname, sizeof (servname), NI_NUMERICHOST | NI_NUMERICSERV);
     873             :   }
     874         113 :   if (sts != 0) {
     875           0 :     hostname[0] = servname[0] = '\0';
     876             :   }
     877             : 
     878             :   // extract headers relevant to administration
     879         113 :   const char* user_agent = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "User-Agent") ?: "";
     880         113 :   const char* x_forwarded_for = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-Forwarded-For") ?: "";
     881             :   // NB: these are untrustworthy, beware if machine-processing log files
     882             : 
     883         226 :   return string(hostname) + string(":") + string(servname) +
     884         452 :     string(" UA:") + header_censor(string(user_agent)) +
     885         339 :     string(" XFF:") + header_censor(string(x_forwarded_for));
     886             : }
     887             : 
     888             : 
     889             : 
     890             : ////////////////////////////////////////////////////////////////////////
     891             : 
     892             : 
     893             : static void
     894          51 : add_mhd_last_modified (struct MHD_Response *resp, time_t mtime)
     895             : {
     896          51 :   struct tm *now = gmtime (&mtime);
     897          51 :   if (now != NULL)
     898             :     {
     899             :       char datebuf[80];
     900          51 :       size_t rc = strftime (datebuf, sizeof (datebuf), "%a, %d %b %Y %T GMT", now);
     901          51 :       if (rc > 0 && rc < sizeof (datebuf))
     902          51 :         (void) MHD_add_response_header (resp, "Last-Modified", datebuf);
     903             :     }
     904             : 
     905          51 :   (void) MHD_add_response_header (resp, "Cache-Control", "public");
     906          51 : }
     907             : 
     908             : 
     909             : 
     910             : static struct MHD_Response*
     911          15 : handle_buildid_f_match (bool internal_req_t,
     912             :                         int64_t b_mtime,
     913             :                         const string& b_source0,
     914             :                         int *result_fd)
     915             : {
     916             :   (void) internal_req_t; // ignored
     917          15 :   int fd = open(b_source0.c_str(), O_RDONLY);
     918          15 :   if (fd < 0)
     919           0 :     throw libc_exception (errno, string("open ") + b_source0);
     920             : 
     921             :   // NB: use manual close(2) in error case instead of defer_dtor, because
     922             :   // in the normal case, we want to hand the fd over to libmicrohttpd for
     923             :   // file transfer.
     924             : 
     925             :   struct stat s;
     926          15 :   int rc = fstat(fd, &s);
     927          15 :   if (rc < 0)
     928             :     {
     929           0 :       close(fd);
     930           0 :       throw libc_exception (errno, string("fstat ") + b_source0);
     931             :     }
     932             : 
     933          15 :   if ((int64_t) s.st_mtime != b_mtime)
     934             :     {
     935           0 :       if (verbose)
     936           0 :         obatched(clog) << "mtime mismatch for " << b_source0 << endl;
     937           0 :       close(fd);
     938           0 :       return 0;
     939             :     }
     940             : 
     941          15 :   inc_metric ("http_responses_total","result","file");
     942          15 :   struct MHD_Response* r = MHD_create_response_from_fd ((uint64_t) s.st_size, fd);
     943          15 :   if (r == 0)
     944             :     {
     945           0 :       if (verbose)
     946           0 :         obatched(clog) << "cannot create fd-response for " << b_source0 << endl;
     947           0 :       close(fd);
     948             :     }
     949             :   else
     950             :     {
     951          15 :       MHD_add_response_header (r, "Content-Type", "application/octet-stream");
     952          15 :       add_mhd_last_modified (r, s.st_mtime);
     953          15 :       if (verbose > 1)
     954           0 :         obatched(clog) << "serving file " << b_source0 << endl;
     955             :       /* libmicrohttpd will close it. */
     956          15 :       if (result_fd)
     957          15 :         *result_fd = fd;
     958             :     }
     959             : 
     960          15 :   return r;
     961             : }
     962             : 
     963             : 
     964             : // quote all questionable characters of str for safe passage through a sh -c expansion.
     965             : static string
     966           2 : shell_escape(const string& str)
     967             : {
     968           2 :   string y;
     969         198 :   for (auto&& x : str)
     970             :     {
     971         196 :       if (! isalnum(x) && x != '/')
     972          22 :         y += "\\";
     973         196 :       y += x;
     974             :     }
     975           2 :   return y;
     976             : }
     977             : 
     978             : 
     979             : // PR25548: Perform POSIX / RFC3986 style path canonicalization on the input string.
     980             : //
     981             : // Namely:
     982             : //    //         ->   /
     983             : //    /foo/../   ->   /
     984             : //    /./        ->   /
     985             : //
     986             : // This mapping is done on dwarf-side source path names, which may
     987             : // include these constructs, so we can deal with debuginfod clients
     988             : // that accidentally canonicalize the paths.
     989             : //
     990             : // realpath(3) is close but not quite right, because it also resolves
     991             : // symbolic links.  Symlinks at the debuginfod server have nothing to
     992             : // do with the build-time symlinks, thus they must not be considered.
     993             : //
     994             : // see also curl Curl_dedotdotify() aka RFC3986, which we mostly follow here
     995             : // see also libc __realpath()
     996             : // see also llvm llvm::sys::path::remove_dots()
     997             : static string
     998         521 : canon_pathname (const string& input)
     999             : {
    1000        1042 :   string i = input; // 5.2.4 (1)
    1001         521 :   string o;
    1002             : 
    1003        3284 :   while (i.size() != 0)
    1004             :     {
    1005             :       // 5.2.4 (2) A
    1006        2763 :       if (i.substr(0,3) == "../")
    1007           0 :         i = i.substr(3);
    1008        2763 :       else if(i.substr(0,2) == "./")
    1009           0 :         i = i.substr(2);
    1010             : 
    1011             :       // 5.2.4 (2) B
    1012        2763 :       else if (i.substr(0,3) == "/./")
    1013          51 :         i = i.substr(2);
    1014        2712 :       else if (i == "/.")
    1015           0 :         i = ""; // no need to handle "/." complete-path-segment case; we're dealing with file names
    1016             : 
    1017             :       // 5.2.4 (2) C
    1018        2712 :       else if (i.substr(0,4) == "/../") {
    1019          73 :         i = i.substr(3);
    1020          73 :         string::size_type sl = o.rfind("/");
    1021          73 :         if (sl != string::npos)
    1022          73 :           o = o.substr(0, sl);
    1023             :         else
    1024           0 :           o = "";
    1025        2639 :       } else if (i == "/..")
    1026           0 :         i = ""; // no need to handle "/.." complete-path-segment case; we're dealing with file names
    1027             : 
    1028             :       // 5.2.4 (2) D
    1029             :       // no need to handle these cases; we're dealing with file names
    1030        2639 :       else if (i == ".")
    1031           0 :         i = "";
    1032        2639 :       else if (i == "..")
    1033           0 :         i = "";
    1034             : 
    1035             :       // POSIX special: map // to /
    1036        2639 :       else if (i.substr(0,2) == "//")
    1037           4 :         i = i.substr(1);
    1038             : 
    1039             :       // 5.2.4 (2) E
    1040             :       else {
    1041        2635 :         string::size_type next_slash = i.find("/", (i[0]=='/' ? 1 : 0)); // skip first slash
    1042        2635 :         o += i.substr(0, next_slash);
    1043        2635 :         if (next_slash == string::npos)
    1044         521 :           i = "";
    1045             :         else
    1046        2114 :           i = i.substr(next_slash);
    1047             :       }
    1048             :     }
    1049             : 
    1050        1042 :   return o;
    1051             : }
    1052             : 
    1053             : 
    1054             : 
    1055             : // A map-like class that owns a cache of file descriptors (indexed by
    1056             : // file / content names).
    1057             : //
    1058             : // If only it could use fd's instead of file names ... but we can't
    1059             : // dup(2) to create independent descriptors for the same unlinked
    1060             : // files, so would have to use some goofy linux /proc/self/fd/%d
    1061             : // hack such as the following
    1062             : 
    1063             : #if 0
    1064             : int superdup(int fd)
    1065             : {
    1066             : #ifdef __linux__
    1067             :   char *fdpath = NULL;
    1068             :   int rc = asprintf(& fdpath, "/proc/self/fd/%d", fd);
    1069             :   int newfd;
    1070             :   if (rc >= 0)
    1071             :     newfd = open(fdpath, O_RDONLY);
    1072             :   else
    1073             :     newfd = -1;
    1074             :   free (fdpath);
    1075             :   return newfd;
    1076             : #else
    1077             :   return -1;
    1078             : #endif
    1079             : }
    1080             : #endif
    1081             : 
    1082             : class libarchive_fdcache
    1083             : {
    1084             : private:
    1085             :   mutex fdcache_lock;
    1086             : 
    1087             :   struct fdcache_entry
    1088             :   {
    1089             :     string archive;
    1090             :     string entry;
    1091             :     string fd;
    1092             :     double fd_size_mb; // slightly rounded up megabytes
    1093             :   };
    1094             :   deque<fdcache_entry> lru; // @head: most recently used
    1095             :   long max_fds;
    1096             :   long max_mbs;
    1097             : 
    1098             : public:
    1099          82 :   void set_metrics()
    1100             :   {
    1101          82 :     double total_mb = 0.0;
    1102         186 :     for (auto i = lru.begin(); i < lru.end(); i++)
    1103         104 :       total_mb += i->fd_size_mb;
    1104          82 :     set_metric("fdcache_bytes", (int64_t)(total_mb*1024.0*1024.0));
    1105          82 :     set_metric("fdcache_count", lru.size());
    1106          82 :   }
    1107             : 
    1108          34 :   void intern(const string& a, const string& b, string fd, off_t sz, bool front_p)
    1109             :   {
    1110             :     {
    1111          68 :       unique_lock<mutex> lock(fdcache_lock);
    1112          69 :       for (auto i = lru.begin(); i < lru.end(); i++) // nuke preexisting copy
    1113             :         {
    1114          35 :           if (i->archive == a && i->entry == b)
    1115             :             {
    1116           0 :               unlink (i->fd.c_str());
    1117           0 :               lru.erase(i);
    1118           0 :               inc_metric("fdcache_op_count","op","dequeue");
    1119           0 :               break; // must not continue iterating
    1120             :             }
    1121             :         }
    1122          34 :       double mb = (sz+65535)/1048576.0; // round up to 64K block
    1123          68 :       fdcache_entry n = { a, b, fd, mb };
    1124          34 :       if (front_p)
    1125             :         {
    1126          24 :           inc_metric("fdcache_op_count","op","enqueue_front");
    1127          24 :           lru.push_front(n);
    1128             :         }
    1129             :       else
    1130             :         {
    1131          10 :           inc_metric("fdcache_op_count","op","enqueue_back");
    1132          10 :           lru.push_back(n);
    1133             :         }
    1134          34 :       if (verbose > 3)
    1135           0 :         obatched(clog) << "fdcache interned a=" << a << " b=" << b
    1136           0 :                        << " fd=" << fd << " mb=" << mb << " front=" << front_p << endl;
    1137             :     }
    1138          34 :     set_metrics();
    1139             : 
    1140             :     // NB: we age the cache at lookup time too
    1141          34 :     if (front_p)
    1142          24 :       this->limit(max_fds, max_mbs); // age cache if required
    1143          34 :   }
    1144             : 
    1145          35 :   int lookup(const string& a, const string& b)
    1146             :   {
    1147          35 :     int fd = -1;
    1148             :     {
    1149          70 :       unique_lock<mutex> lock(fdcache_lock);
    1150          67 :       for (auto i = lru.begin(); i < lru.end(); i++)
    1151             :         {
    1152          43 :           if (i->archive == a && i->entry == b)
    1153             :             { // found it; move it to head of lru
    1154          22 :               fdcache_entry n = *i;
    1155          11 :               lru.erase(i); // invalidates i, so no more iteration!
    1156          11 :               lru.push_front(n);
    1157          11 :               inc_metric("fdcache_op_count","op","requeue_front");
    1158          11 :               fd = open(n.fd.c_str(), O_RDONLY); // NB: no problem if dup() fails; looks like cache miss
    1159          11 :               break;
    1160             :             }
    1161             :         }
    1162             :     }
    1163             : 
    1164          35 :     if (fd >= 0)
    1165          11 :       this->limit(max_fds, max_mbs); // age cache if required
    1166             : 
    1167          35 :     return fd;
    1168             :   }
    1169             : 
    1170          34 :   int probe(const string& a, const string& b) // just a cache residency check - don't modify LRU state, don't open
    1171             :   {
    1172          68 :     unique_lock<mutex> lock(fdcache_lock);
    1173          69 :     for (auto i = lru.begin(); i < lru.end(); i++)
    1174             :       {
    1175          35 :         if (i->archive == a && i->entry == b)
    1176             :           {
    1177           0 :             inc_metric("fdcache_op_count","op","probe_hit");
    1178           0 :             return true;
    1179             :           }
    1180             :       }
    1181          34 :     inc_metric("fdcache_op_count","op","probe_miss");
    1182          34 :     return false;
    1183             :   }
    1184             : 
    1185           0 :   void clear(const string& a, const string& b)
    1186             :   {
    1187           0 :     unique_lock<mutex> lock(fdcache_lock);
    1188           0 :     for (auto i = lru.begin(); i < lru.end(); i++)
    1189             :       {
    1190           0 :         if (i->archive == a && i->entry == b)
    1191             :           { // found it; move it to head of lru
    1192           0 :             fdcache_entry n = *i;
    1193           0 :             lru.erase(i); // invalidates i, so no more iteration!
    1194           0 :             inc_metric("fdcache_op_count","op","clear");
    1195           0 :             unlink (n.fd.c_str());
    1196           0 :             set_metrics();
    1197           0 :             return;
    1198             :           }
    1199             :       }
    1200             :   }
    1201             : 
    1202          51 :   void limit(long maxfds, long maxmbs, bool metrics_p = true)
    1203             :   {
    1204          51 :     if (verbose > 3 && (this->max_fds != maxfds || this->max_mbs != maxmbs))
    1205           0 :       obatched(clog) << "fdcache limited to maxfds=" << maxfds << " maxmbs=" << maxmbs << endl;
    1206             : 
    1207         102 :     unique_lock<mutex> lock(fdcache_lock);
    1208          51 :     this->max_fds = maxfds;
    1209          51 :     this->max_mbs = maxmbs;
    1210             : 
    1211          51 :     long total_fd = 0;
    1212          51 :     double total_mb = 0.0;
    1213          86 :     for (auto i = lru.begin(); i < lru.end(); i++)
    1214             :       {
    1215             :         // accumulate totals from most recently used one going backward
    1216          66 :         total_fd ++;
    1217          66 :         total_mb += i->fd_size_mb;
    1218          66 :         if (total_fd > max_fds || total_mb > max_mbs)
    1219             :           {
    1220             :             // found the cut here point!
    1221             : 
    1222          65 :             for (auto j = i; j < lru.end(); j++) // close all the fds from here on in
    1223             :               {
    1224          34 :                 if (verbose > 3)
    1225           0 :                   obatched(clog) << "fdcache evicted a=" << j->archive << " b=" << j->entry
    1226           0 :                                  << " fd=" << j->fd << " mb=" << j->fd_size_mb << endl;
    1227          34 :                 if (metrics_p)
    1228          33 :                   inc_metric("fdcache_op_count","op","evict");
    1229          34 :                 unlink (j->fd.c_str());
    1230             :               }
    1231             : 
    1232          31 :             lru.erase(i, lru.end()); // erase the nodes generally
    1233          31 :             break;
    1234             :           }
    1235             :       }
    1236          51 :     if (metrics_p) set_metrics();
    1237          51 :   }
    1238             : 
    1239           3 :   ~libarchive_fdcache()
    1240           3 :   {
    1241             :     // unlink any fdcache entries in $TMPDIR
    1242             :     // don't update metrics; those globals may be already destroyed 
    1243           3 :     limit(0, 0, false);
    1244           3 :   }
    1245             : };
    1246             : static libarchive_fdcache fdcache;
    1247             : 
    1248             : 
    1249             : // For security/portability reasons, many distro-package archives have
    1250             : // a "./" in front of path names; others have nothing, others have
    1251             : // "/".  Canonicalize them all to a single leading "/", with the
    1252             : // assumption that this matches the dwarf-derived file names too.
    1253         104 : string canonicalized_archive_entry_pathname(struct archive_entry *e)
    1254             : {
    1255         208 :   string fn = archive_entry_pathname(e);
    1256         104 :   if (fn.size() == 0)
    1257           0 :     return fn;
    1258         104 :   if (fn[0] == '/')
    1259           0 :     return fn;
    1260         104 :   if (fn[0] == '.')
    1261          86 :     return fn.substr(1);
    1262             :   else
    1263          18 :     return string("/")+fn;
    1264             : }
    1265             : 
    1266             : 
    1267             : 
    1268             : static struct MHD_Response*
    1269          61 : handle_buildid_r_match (bool internal_req_p,
    1270             :                         int64_t b_mtime,
    1271             :                         const string& b_source0,
    1272             :                         const string& b_source1,
    1273             :                         int *result_fd)
    1274             : {
    1275             :   struct stat fs;
    1276          61 :   int rc = stat (b_source0.c_str(), &fs);
    1277          61 :   if (rc != 0)
    1278          26 :     throw libc_exception (errno, string("stat ") + b_source0);
    1279             : 
    1280          35 :   if ((int64_t) fs.st_mtime != b_mtime)
    1281             :     {
    1282           0 :       if (verbose)
    1283           0 :         obatched(clog) << "mtime mismatch for " << b_source0 << endl;
    1284           0 :       return 0;
    1285             :     }
    1286             : 
    1287             :   // check for a match in the fdcache first
    1288          35 :   int fd = fdcache.lookup(b_source0, b_source1);
    1289          35 :   while (fd >= 0) // got one!; NB: this is really an if() with a possible branch out to the end
    1290             :     {
    1291          11 :       rc = fstat(fd, &fs);
    1292          11 :       if (rc < 0) // disappeared?
    1293             :         {
    1294           0 :           if (verbose)
    1295           0 :             obatched(clog) << "cannot fstat fdcache " << b_source0 << endl;
    1296           0 :           close(fd);
    1297           0 :           fdcache.clear(b_source0, b_source1);
    1298           0 :           break; // branch out of if "loop", to try new libarchive fetch attempt
    1299             :         }
    1300             : 
    1301          11 :       struct MHD_Response* r = MHD_create_response_from_fd (fs.st_size, fd);
    1302          11 :       if (r == 0)
    1303             :         {
    1304           0 :           if (verbose)
    1305           0 :             obatched(clog) << "cannot create fd-response for " << b_source0 << endl;
    1306           0 :           close(fd);
    1307           0 :           break; // branch out of if "loop", to try new libarchive fetch attempt
    1308             :         }
    1309             : 
    1310          11 :       inc_metric ("http_responses_total","result","archive fdcache");
    1311             : 
    1312          11 :       MHD_add_response_header (r, "Content-Type", "application/octet-stream");
    1313          11 :       add_mhd_last_modified (r, fs.st_mtime);
    1314          11 :       if (verbose > 1)
    1315           0 :         obatched(clog) << "serving fdcache archive " << b_source0 << " file " << b_source1 << endl;
    1316             :       /* libmicrohttpd will close it. */
    1317          11 :       if (result_fd)
    1318          11 :         *result_fd = fd;
    1319          11 :       return r;
    1320             :       // NB: see, we never go around the 'loop' more than once
    1321             :     }
    1322             : 
    1323             :   // no match ... grumble, must process the archive
    1324          48 :   string archive_decoder = "/dev/null";
    1325          48 :   string archive_extension = "";
    1326          96 :   for (auto&& arch : scan_archives)
    1327          72 :     if (string_endswith(b_source0, arch.first))
    1328             :       {
    1329          24 :         archive_extension = arch.first;
    1330          24 :         archive_decoder = arch.second;
    1331             :       }
    1332             :   FILE* fp;
    1333             :   defer_dtor<FILE*,int>::dtor_fn dfn;
    1334          24 :   if (archive_decoder != "cat")
    1335             :     {
    1336           3 :       string popen_cmd = archive_decoder + " " + shell_escape(b_source0);
    1337           1 :       fp = popen (popen_cmd.c_str(), "r"); // "e" O_CLOEXEC?
    1338           1 :       dfn = pclose;
    1339           1 :       if (fp == NULL)
    1340           0 :         throw libc_exception (errno, string("popen ") + popen_cmd);
    1341             :     }
    1342             :   else
    1343             :     {
    1344          23 :       fp = fopen (b_source0.c_str(), "r");
    1345          23 :       dfn = fclose;
    1346          23 :       if (fp == NULL)
    1347           0 :         throw libc_exception (errno, string("fopen ") + b_source0);
    1348             :     }
    1349          48 :   defer_dtor<FILE*,int> fp_closer (fp, dfn);
    1350             : 
    1351             :   struct archive *a;
    1352          24 :   a = archive_read_new();
    1353          24 :   if (a == NULL)
    1354           0 :     throw archive_exception("cannot create archive reader");
    1355          24 :   defer_dtor<struct archive*,int> archive_closer (a, archive_read_free);
    1356             : 
    1357          24 :   rc = archive_read_support_format_all(a);
    1358          24 :   if (rc != ARCHIVE_OK)
    1359           0 :     throw archive_exception(a, "cannot select all format");
    1360          24 :   rc = archive_read_support_filter_all(a);
    1361          24 :   if (rc != ARCHIVE_OK)
    1362           0 :     throw archive_exception(a, "cannot select all filters");
    1363             : 
    1364          24 :   rc = archive_read_open_FILE (a, fp);
    1365          24 :   if (rc != ARCHIVE_OK)
    1366           0 :     throw archive_exception(a, "cannot open archive from pipe");
    1367             : 
    1368             :   // archive traversal is in three stages, no, four stages:
    1369             :   // 1) skip entries whose names do not match the requested one
    1370             :   // 2) extract the matching entry name (set r = result)
    1371             :   // 3) extract some number of prefetched entries (just into fdcache)
    1372             :   // 4) abort any further processing
    1373          24 :   struct MHD_Response* r = 0;                 // will set in stage 2
    1374          24 :   unsigned prefetch_count =
    1375             :     internal_req_p ? 0 : fdcache_prefetch;    // will decrement in stage 3
    1376             : 
    1377         238 :   while(r == 0 || prefetch_count > 0) // stage 1, 2, or 3
    1378             :     {
    1379         234 :       if (interrupted)
    1380          20 :         break;
    1381             : 
    1382             :       struct archive_entry *e;
    1383         234 :       rc = archive_read_next_header (a, &e);
    1384         234 :       if (rc != ARCHIVE_OK)
    1385          20 :         break;
    1386             : 
    1387         214 :       if (! S_ISREG(archive_entry_mode (e))) // skip non-files completely
    1388         214 :         continue;
    1389             : 
    1390          48 :       string fn = canonicalized_archive_entry_pathname (e);
    1391          48 :       if ((r == 0) && (fn != b_source1)) // stage 1
    1392          14 :         continue;
    1393             : 
    1394          34 :       if (fdcache.probe (b_source0, fn)) // skip if already interned
    1395           0 :         continue;
    1396             : 
    1397             :       // extract this file to a temporary file
    1398          34 :       char* tmppath = NULL;
    1399          34 :       rc = asprintf (&tmppath, "%s/debuginfod.XXXXXX", tmpdir.c_str());
    1400          34 :       if (rc < 0)
    1401           0 :         throw libc_exception (ENOMEM, "cannot allocate tmppath");
    1402          34 :       defer_dtor<void*,void> tmmpath_freer (tmppath, free);
    1403          34 :       fd = mkstemp (tmppath);
    1404          34 :       if (fd < 0)
    1405           0 :         throw libc_exception (errno, "cannot create temporary file");
    1406             :       // NB: don't unlink (tmppath), as fdcache will take charge of it.
    1407             : 
    1408             :       // NB: this can take many uninterruptible seconds for a huge file
    1409          34 :       rc = archive_read_data_into_fd (a, fd); 
    1410          34 :       if (rc != ARCHIVE_OK) // e.g. ENOSPC!
    1411             :         {
    1412           0 :           close (fd);
    1413           0 :           unlink (tmppath);
    1414           0 :           throw archive_exception(a, "cannot extract file");
    1415             :         }
    1416             : 
    1417             :       // Set the mtime so the fdcache file mtimes, even prefetched ones,
    1418             :       // propagate to future webapi clients.
    1419             :       struct timeval tvs[2];
    1420          34 :       tvs[0].tv_sec = tvs[1].tv_sec = archive_entry_mtime(e);
    1421          34 :       tvs[0].tv_usec = tvs[1].tv_usec = 0;
    1422          34 :       (void) futimes (fd, tvs);  /* best effort */
    1423             : 
    1424          34 :       if (r != 0) // stage 3
    1425             :         {
    1426             :           // NB: now we know we have a complete reusable file; make fdcache
    1427             :           // responsible for unlinking it later.
    1428          10 :           fdcache.intern(b_source0, fn,
    1429             :                          tmppath, archive_entry_size(e),
    1430             :                          false); // prefetched ones go to back of lru
    1431          10 :           prefetch_count --;
    1432          10 :           close (fd); // we're not saving this fd to make a mhd-response from!
    1433          10 :           continue;
    1434             :         }
    1435             : 
    1436             :       // NB: now we know we have a complete reusable file; make fdcache
    1437             :       // responsible for unlinking it later.
    1438          24 :       fdcache.intern(b_source0, b_source1,
    1439             :                      tmppath, archive_entry_size(e),
    1440             :                      true); // requested ones go to the front of lru
    1441             : 
    1442          24 :       inc_metric ("http_responses_total","result",archive_extension + " archive");
    1443          24 :       r = MHD_create_response_from_fd (archive_entry_size(e), fd);
    1444          24 :       if (r == 0)
    1445             :         {
    1446           0 :           if (verbose)
    1447           0 :             obatched(clog) << "cannot create fd-response for " << b_source0 << endl;
    1448           0 :           close(fd);
    1449           0 :           break; // assume no chance of better luck around another iteration; no other copies of same file
    1450             :         }
    1451             :       else
    1452             :         {
    1453          24 :           MHD_add_response_header (r, "Content-Type", "application/octet-stream");
    1454          24 :           add_mhd_last_modified (r, archive_entry_mtime(e));
    1455          24 :           if (verbose > 1)
    1456           0 :             obatched(clog) << "serving archive " << b_source0 << " file " << b_source1 << endl;
    1457             :           /* libmicrohttpd will close it. */
    1458          24 :           if (result_fd)
    1459          24 :             *result_fd = fd;
    1460          24 :           continue;
    1461             :         }
    1462             :     }
    1463             : 
    1464             :   // XXX: rpm/file not found: delete this R entry?
    1465          24 :   return r;
    1466             : }
    1467             : 
    1468             : 
    1469             : static struct MHD_Response*
    1470          76 : handle_buildid_match (bool internal_req_p,
    1471             :                       int64_t b_mtime,
    1472             :                       const string& b_stype,
    1473             :                       const string& b_source0,
    1474             :                       const string& b_source1,
    1475             :                       int *result_fd)
    1476             : {
    1477             :   try
    1478             :     {
    1479          76 :       if (b_stype == "F")
    1480          15 :         return handle_buildid_f_match(internal_req_p, b_mtime, b_source0, result_fd);
    1481          61 :       else if (b_stype == "R")
    1482          61 :         return handle_buildid_r_match(internal_req_p, b_mtime, b_source0, b_source1, result_fd);
    1483             :     }
    1484          52 :   catch (const reportable_exception &e)
    1485             :     {
    1486          26 :       e.report(clog);
    1487             :       // Report but swallow libc etc. errors here; let the caller
    1488             :       // iterate to other matches of the content.
    1489             :     }
    1490             :   
    1491          26 :   return 0;
    1492             : }
    1493             : 
    1494             : 
    1495             : static int
    1496           6 : debuginfod_find_progress (debuginfod_client *, long a, long b)
    1497             : {
    1498           6 :   if (verbose > 4)
    1499           0 :     obatched(clog) << "federated debuginfod progress=" << a << "/" << b << endl;
    1500             : 
    1501           6 :   return interrupted;
    1502             : }
    1503             : 
    1504             : 
    1505             : static struct MHD_Response*
    1506          58 : handle_buildid (MHD_Connection* conn,
    1507             :                 const string& buildid /* unsafe */,
    1508             :                 const string& artifacttype /* unsafe */,
    1509             :                 const string& suffix /* unsafe */,
    1510             :                 int *result_fd)
    1511             : {
    1512             :   // validate artifacttype
    1513         116 :   string atype_code;
    1514          58 :   if (artifacttype == "debuginfo") atype_code = "D";
    1515          26 :   else if (artifacttype == "executable") atype_code = "E";
    1516          12 :   else if (artifacttype == "source") atype_code = "S";
    1517           0 :   else throw reportable_exception("invalid artifacttype");
    1518             : 
    1519          58 :   if (atype_code == "S" && suffix == "")
    1520           0 :      throw reportable_exception("invalid source suffix");
    1521             : 
    1522             :   // validate buildid
    1523          58 :   if ((buildid.size() < 2) || // not empty
    1524         116 :       (buildid.size() % 2) || // even number
    1525          58 :       (buildid.find_first_not_of("0123456789abcdef") != string::npos)) // pure tasty lowercase hex
    1526           0 :     throw reportable_exception("invalid buildid");
    1527             : 
    1528          58 :   if (verbose > 1)
    1529           0 :     obatched(clog) << "searching for buildid=" << buildid << " artifacttype=" << artifacttype
    1530           0 :          << " suffix=" << suffix << endl;
    1531             : 
    1532          58 :   sqlite_ps *pp = 0;
    1533             : 
    1534          58 :   if (atype_code == "D")
    1535             :     {
    1536          64 :       pp = new sqlite_ps (db, "mhd-query-d",
    1537             :                           "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_d where buildid = ? "
    1538          32 :                           "order by mtime desc");
    1539          32 :       pp->reset();
    1540          32 :       pp->bind(1, buildid);
    1541             :     }
    1542          26 :   else if (atype_code == "E")
    1543             :     {
    1544          28 :       pp = new sqlite_ps (db, "mhd-query-e",
    1545             :                           "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_e where buildid = ? "
    1546          14 :                           "order by mtime desc");
    1547          14 :       pp->reset();
    1548          14 :       pp->bind(1, buildid);
    1549             :     }
    1550          12 :   else if (atype_code == "S")
    1551             :     {
    1552             :       // PR25548
    1553             :       // Incoming source queries may come in with either dwarf-level OR canonicalized paths.
    1554             :       // We let the query pass with either one.
    1555             : 
    1556          24 :       pp = new sqlite_ps (db, "mhd-query-s",
    1557             :                           "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_s where buildid = ? and artifactsrc in (?,?) "
    1558          12 :                           "order by sharedprefix(source0,source0ref) desc, mtime desc");
    1559          12 :       pp->reset();
    1560          12 :       pp->bind(1, buildid);
    1561             :       // NB: we don't store the non-canonicalized path names any more, but old databases
    1562             :       // might have them (and no canon ones), so we keep searching for both.
    1563          12 :       pp->bind(2, suffix);
    1564          12 :       pp->bind(3, canon_pathname(suffix));
    1565             :     }
    1566         116 :   unique_ptr<sqlite_ps> ps_closer(pp); // release pp if exception or return
    1567             : 
    1568             :   // consume all the rows
    1569             :   while (1)
    1570             :     {
    1571          84 :       int rc = pp->step();
    1572          84 :       if (rc == SQLITE_DONE) break;
    1573          76 :       if (rc != SQLITE_ROW)
    1574           0 :         throw sqlite_exception(rc, "step");
    1575             : 
    1576          76 :       int64_t b_mtime = sqlite3_column_int64 (*pp, 0);
    1577          76 :       string b_stype = string((const char*) sqlite3_column_text (*pp, 1) ?: ""); /* by DDL may not be NULL */
    1578          76 :       string b_source0 = string((const char*) sqlite3_column_text (*pp, 2) ?: ""); /* may be NULL */
    1579          76 :       string b_source1 = string((const char*) sqlite3_column_text (*pp, 3) ?: ""); /* may be NULL */
    1580             : 
    1581          76 :       if (verbose > 1)
    1582           0 :         obatched(clog) << "found mtime=" << b_mtime << " stype=" << b_stype
    1583           0 :              << " source0=" << b_source0 << " source1=" << b_source1 << endl;
    1584             : 
    1585             :       // Try accessing the located match.
    1586             :       // XXX: in case of multiple matches, attempt them in parallel?
    1587          76 :       auto r = handle_buildid_match (conn ? false : true,
    1588             :                                      b_mtime, b_stype, b_source0, b_source1, result_fd);
    1589          76 :       if (r)
    1590          50 :         return r;
    1591          26 :     }
    1592             : 
    1593             :   // We couldn't find it in the database.  Last ditch effort
    1594             :   // is to defer to other debuginfo servers.
    1595             : 
    1596           8 :   int fd = -1;
    1597           8 :   debuginfod_client *client = debuginfod_begin ();
    1598           8 :   if (client != NULL)
    1599             :     {
    1600           8 :       debuginfod_set_progressfn (client, & debuginfod_find_progress);
    1601             : 
    1602           8 :       if (conn)
    1603             :         {
    1604             :           // Transcribe incoming User-Agent:
    1605          16 :           string ua = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "User-Agent") ?: "";
    1606          24 :           string ua_complete = string("User-Agent: ") + ua;
    1607           8 :           debuginfod_add_http_header (client, ua_complete.c_str());
    1608             : 
    1609             :           // Compute larger XFF:, for avoiding info loss during
    1610             :           // federation, and for future cyclicity detection.
    1611          16 :           string xff = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-Forwarded-For") ?: "";
    1612           8 :           if (xff != "")
    1613           3 :             xff += string(", "); // comma separated list
    1614             : 
    1615             :           // Compute the client's numeric IP address only - so can't merge with conninfo()
    1616           8 :           const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn,
    1617             :                                                                        MHD_CONNECTION_INFO_CLIENT_ADDRESS);
    1618           8 :           struct sockaddr *so = u ? u->client_addr : 0;
    1619           8 :           char hostname[256] = ""; // RFC1035
    1620           8 :           if (so && so->sa_family == AF_INET)
    1621           8 :             (void) getnameinfo (so, sizeof (struct sockaddr_in), hostname, sizeof (hostname), NULL, 0,
    1622             :                                 NI_NUMERICHOST);
    1623           0 :           else if (so && so->sa_family == AF_INET6)
    1624           0 :             (void) getnameinfo (so, sizeof (struct sockaddr_in6), hostname, sizeof (hostname), NULL, 0,
    1625             :                                 NI_NUMERICHOST);
    1626             : 
    1627          24 :           string xff_complete = string("X-Forwarded-For: ")+xff+string(hostname);
    1628           8 :           debuginfod_add_http_header (client, xff_complete.c_str());
    1629             :         }
    1630             : 
    1631           8 :       if (artifacttype == "debuginfo")
    1632           7 :         fd = debuginfod_find_debuginfo (client,
    1633           7 :                                         (const unsigned char*) buildid.c_str(),
    1634             :                                         0, NULL);
    1635           1 :       else if (artifacttype == "executable")
    1636           1 :         fd = debuginfod_find_executable (client,
    1637           1 :                                          (const unsigned char*) buildid.c_str(),
    1638             :                                          0, NULL);
    1639           0 :       else if (artifacttype == "source")
    1640           0 :         fd = debuginfod_find_source (client,
    1641           0 :                                      (const unsigned char*) buildid.c_str(),
    1642             :                                      0, suffix.c_str(), NULL);
    1643             :     }
    1644             :   else
    1645           0 :     fd = -errno; /* Set by debuginfod_begin.  */
    1646           8 :   debuginfod_end (client);
    1647             : 
    1648           8 :   if (fd >= 0)
    1649             :     {
    1650           1 :       inc_metric ("http_responses_total","result","upstream");
    1651             :       struct stat s;
    1652           1 :       int rc = fstat (fd, &s);
    1653           1 :       if (rc == 0)
    1654             :         {
    1655           1 :           auto r = MHD_create_response_from_fd ((uint64_t) s.st_size, fd);
    1656           1 :           if (r)
    1657             :             {
    1658           1 :               MHD_add_response_header (r, "Content-Type", "application/octet-stream");
    1659           1 :               add_mhd_last_modified (r, s.st_mtime);
    1660           1 :               if (verbose > 1)
    1661           0 :                 obatched(clog) << "serving file from upstream debuginfod/cache" << endl;
    1662           1 :               if (result_fd)
    1663           1 :                 *result_fd = fd;
    1664           1 :               return r; // NB: don't close fd; libmicrohttpd will
    1665             :             }
    1666             :         }
    1667           0 :       close (fd);
    1668             :     }
    1669             :   else
    1670           7 :     switch(fd)
    1671             :       {
    1672           5 :       case -ENOSYS:
    1673           5 :         break;
    1674           2 :       case -ENOENT:
    1675           2 :         break;
    1676           0 :       default: // some more tricky error
    1677           0 :         throw libc_exception(-fd, "upstream debuginfod query failed");
    1678             :       }
    1679             : 
    1680           7 :   throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found");
    1681             : }
    1682             : 
    1683             : 
    1684             : ////////////////////////////////////////////////////////////////////////
    1685             : 
    1686             : static map<string,int64_t> metrics; // arbitrary data for /metrics query
    1687             : // NB: store int64_t since all our metrics are integers; prometheus accepts double
    1688             : static mutex metrics_lock;
    1689             : // NB: these objects get released during the process exit via global dtors
    1690             : // do not call them from within other global dtors
    1691             : 
    1692             : // utility function for assembling prometheus-compatible
    1693             : // name="escaped-value" strings
    1694             : // https://prometheus.io/docs/instrumenting/exposition_formats/
    1695             : static string
    1696        2614 : metric_label(const string& name, const string& value)
    1697             : {
    1698        2614 :   string x = name + "=\"";
    1699       17626 :   for (auto&& c : value)
    1700       15014 :     switch(c)
    1701             :       {
    1702           0 :       case '\\': x += "\\\\"; break;
    1703           0 :       case '\"': x += "\\\""; break;
    1704           0 :       case '\n': x += "\\n"; break;
    1705       15014 :       default: x += c; break;
    1706             :       }
    1707        2612 :   x += "\"";
    1708        2615 :   return x;
    1709             : }
    1710             : 
    1711             : 
    1712             : // add prometheus-format metric name + label tuple (if any) + value
    1713             : 
    1714             : static void
    1715         170 : set_metric(const string& metric, int64_t value)
    1716             : {
    1717         170 :   unique_lock<mutex> lock(metrics_lock);
    1718         170 :   metrics[metric] = value;
    1719         170 : }
    1720             : #if 0 /* unused */
    1721             : static void
    1722             : inc_metric(const string& metric)
    1723             : {
    1724             :   unique_lock<mutex> lock(metrics_lock);
    1725             :   metrics[metric] ++;
    1726             : }
    1727             : #endif
    1728             : static void
    1729         380 : set_metric(const string& metric,
    1730             :            const string& lname, const string& lvalue,
    1731             :            int64_t value)
    1732             : {
    1733        1140 :   string key = (metric + "{" + metric_label(lname, lvalue) + "}");
    1734         380 :   unique_lock<mutex> lock(metrics_lock);
    1735         380 :   metrics[key] = value;
    1736         380 : }
    1737             : 
    1738             : static void
    1739        1551 : inc_metric(const string& metric,
    1740             :            const string& lname, const string& lvalue)
    1741             : {
    1742        4653 :   string key = (metric + "{" + metric_label(lname, lvalue) + "}");
    1743        1551 :   unique_lock<mutex> lock(metrics_lock);
    1744        1551 :   metrics[key] ++;
    1745        1551 : }
    1746             : static void
    1747         684 : add_metric(const string& metric,
    1748             :            const string& lname, const string& lvalue,
    1749             :            int64_t value)
    1750             : {
    1751        2053 :   string key = (metric + "{" + metric_label(lname, lvalue) + "}");
    1752         685 :   unique_lock<mutex> lock(metrics_lock);
    1753         685 :   metrics[key] += value;
    1754         685 : }
    1755             : #if 0
    1756             : static void
    1757             : add_metric(const string& metric,
    1758             :            int64_t value)
    1759             : {
    1760             :   unique_lock<mutex> lock(metrics_lock);
    1761             :   metrics[metric] += value;
    1762             : }
    1763             : #endif
    1764             : 
    1765             : 
    1766             : // and more for higher arity labels if needed
    1767             : 
    1768             : 
    1769             : static struct MHD_Response*
    1770          61 : handle_metrics (off_t* size)
    1771             : {
    1772         122 :   stringstream o;
    1773             :   {
    1774         122 :     unique_lock<mutex> lock(metrics_lock);
    1775        3555 :     for (auto&& i : metrics)
    1776        3494 :       o << i.first << " " << i.second << endl;
    1777             :   }
    1778          61 :   const string& os = o.str();
    1779          61 :   MHD_Response* r = MHD_create_response_from_buffer (os.size(),
    1780          61 :                                                      (void*) os.c_str(),
    1781             :                                                      MHD_RESPMEM_MUST_COPY);
    1782          61 :   *size = os.size();
    1783          61 :   MHD_add_response_header (r, "Content-Type", "text/plain");
    1784         122 :   return r;
    1785             : }
    1786             : 
    1787             : 
    1788             : ////////////////////////////////////////////////////////////////////////
    1789             : 
    1790             : 
    1791             : /* libmicrohttpd callback */
    1792             : static MHD_RESULT
    1793         113 : handler_cb (void * /*cls*/,
    1794             :             struct MHD_Connection *connection,
    1795             :             const char *url,
    1796             :             const char *method,
    1797             :             const char * /*version*/,
    1798             :             const char * /*upload_data*/,
    1799             :             size_t * /*upload_data_size*/,
    1800             :             void ** /*con_cls*/)
    1801             : {
    1802         113 :   struct MHD_Response *r = NULL;
    1803         226 :   string url_copy = url;
    1804             : 
    1805             : #if MHD_VERSION >= 0x00097002
    1806             :   enum MHD_Result rc;
    1807             : #else
    1808         113 :   int rc = MHD_NO; // mhd
    1809             : #endif
    1810         113 :   int http_code = 500;
    1811         113 :   off_t http_size = -1;
    1812             :   struct timeval tv_start, tv_end;
    1813         113 :   gettimeofday (&tv_start, NULL);
    1814             : 
    1815             :   try
    1816             :     {
    1817         113 :       if (string(method) != "GET")
    1818           0 :         throw reportable_exception(400, "we support GET only");
    1819             : 
    1820             :       /* Start decoding the URL. */
    1821         113 :       size_t slash1 = url_copy.find('/', 1);
    1822         226 :       string url1 = url_copy.substr(0, slash1); // ok even if slash1 not found
    1823             : 
    1824         113 :       if (slash1 != string::npos && url1 == "/buildid")
    1825             :         {
    1826          50 :           size_t slash2 = url_copy.find('/', slash1+1);
    1827          50 :           if (slash2 == string::npos)
    1828           0 :             throw reportable_exception("/buildid/ webapi error, need buildid");
    1829             : 
    1830         100 :           string buildid = url_copy.substr(slash1+1, slash2-slash1-1);
    1831             : 
    1832          50 :           size_t slash3 = url_copy.find('/', slash2+1);
    1833         107 :           string artifacttype, suffix;
    1834          50 :           if (slash3 == string::npos)
    1835             :             {
    1836          38 :               artifacttype = url_copy.substr(slash2+1);
    1837          38 :               suffix = "";
    1838             :             }
    1839             :           else
    1840             :             {
    1841          12 :               artifacttype = url_copy.substr(slash2+1, slash3-slash2-1);
    1842          12 :               suffix = url_copy.substr(slash3); // include the slash in the suffix
    1843             :             }
    1844             : 
    1845          50 :           inc_metric("http_requests_total", "type", artifacttype);
    1846             :           // get the resulting fd so we can report its size
    1847             :           int fd;
    1848          50 :           r = handle_buildid(connection, buildid, artifacttype, suffix, &fd);
    1849          43 :           if (r)
    1850             :             {
    1851             :               struct stat fs;
    1852          43 :               if (fstat(fd, &fs) == 0)
    1853          43 :                 http_size = fs.st_size;
    1854             :               // libmicrohttpd will close (fd);
    1855             :             }
    1856             :         }
    1857          63 :       else if (url1 == "/metrics")
    1858             :         {
    1859          61 :           inc_metric("http_requests_total", "type", "metrics");
    1860          61 :           r = handle_metrics(& http_size);
    1861             :         }
    1862             :       else
    1863           2 :         throw reportable_exception("webapi error, unrecognized /operation");
    1864             : 
    1865         104 :       if (r == 0)
    1866           0 :         throw reportable_exception("internal error, missing response");
    1867             : 
    1868         104 :       rc = MHD_queue_response (connection, MHD_HTTP_OK, r);
    1869         104 :       http_code = MHD_HTTP_OK;
    1870         104 :       MHD_destroy_response (r);
    1871             :     }
    1872           9 :   catch (const reportable_exception& e)
    1873             :     {
    1874           9 :       inc_metric("http_responses_total","result","error");
    1875           9 :       e.report(clog);
    1876           9 :       http_code = e.code;
    1877           9 :       http_size = e.message.size();
    1878           9 :       rc = e.mhd_send_response (connection);
    1879             :     }
    1880             : 
    1881         113 :   gettimeofday (&tv_end, NULL);
    1882         113 :   double deltas = (tv_end.tv_sec - tv_start.tv_sec) + (tv_end.tv_usec - tv_start.tv_usec)*0.000001;
    1883         226 :   obatched(clog) << conninfo(connection)
    1884             :                  << ' ' << method << ' ' << url
    1885         113 :                  << ' ' << http_code << ' ' << http_size
    1886         113 :                  << ' ' << (int)(deltas*1000) << "ms"
    1887         113 :                  << endl;
    1888             : 
    1889             :   // related prometheus metrics
    1890         113 :   string http_code_str = to_string(http_code);
    1891         113 :   if (http_size >= 0)
    1892         113 :     add_metric("http_responses_transfer_bytes_sum","code",http_code_str,
    1893             :                http_size);
    1894         113 :   inc_metric("http_responses_transfer_bytes_count","code",http_code_str);
    1895             : 
    1896         113 :   add_metric("http_responses_duration_milliseconds_sum","code",http_code_str,
    1897         113 :              deltas*1000); // prometheus prefers _seconds and floating point
    1898         113 :   inc_metric("http_responses_duration_milliseconds_count","code",http_code_str);
    1899             : 
    1900         226 :   return rc;
    1901             : }
    1902             : 
    1903             : 
    1904             : ////////////////////////////////////////////////////////////////////////
    1905             : // borrowed originally from src/nm.c get_local_names()
    1906             : 
    1907             : static void
    1908          20 : dwarf_extract_source_paths (Elf *elf, set<string>& debug_sourcefiles)
    1909             :   noexcept // no exceptions - so we can simplify the altdbg resource release at end
    1910             : {
    1911          20 :   Dwarf* dbg = dwarf_begin_elf (elf, DWARF_C_READ, NULL);
    1912          20 :   if (dbg == NULL)
    1913           0 :     return;
    1914             : 
    1915          20 :   Dwarf* altdbg = NULL;
    1916          20 :   int    altdbg_fd = -1;
    1917             : 
    1918             :   // DWZ handling: if we have an unsatisfied debug-alt-link, add an
    1919             :   // empty string into the outgoing sourcefiles set, so the caller
    1920             :   // should know that our data is incomplete.
    1921             :   const char *alt_name_p;
    1922             :   const void *alt_build_id; // elfutils-owned memory
    1923          20 :   ssize_t sz = dwelf_dwarf_gnu_debugaltlink (dbg, &alt_name_p, &alt_build_id);
    1924          20 :   if (sz > 0) // got one!
    1925             :     {
    1926          16 :       string buildid;
    1927           8 :       unsigned char* build_id_bytes = (unsigned char*) alt_build_id;
    1928         168 :       for (ssize_t idx=0; idx<sz; idx++)
    1929             :         {
    1930         160 :           buildid += "0123456789abcdef"[build_id_bytes[idx] >> 4];
    1931         160 :           buildid += "0123456789abcdef"[build_id_bytes[idx] & 0xf];
    1932             :         }
    1933             : 
    1934           8 :       if (verbose > 3)
    1935           0 :         obatched(clog) << "Need altdebug buildid=" << buildid << endl;
    1936             : 
    1937             :       // but is it unsatisfied the normal elfutils ways?
    1938           8 :       Dwarf* alt = dwarf_getalt (dbg);
    1939           8 :       if (alt == NULL)
    1940             :         {
    1941             :           // Yup, unsatisfied the normal way.  Maybe we can satisfy it
    1942             :           // from our own debuginfod database.
    1943             :           int alt_fd;
    1944           8 :           struct MHD_Response *r = 0;
    1945             :           try
    1946             :             {
    1947           8 :               r = handle_buildid (0, buildid, "debuginfo", "", &alt_fd);
    1948             :             }
    1949           0 :           catch (const reportable_exception& e)
    1950             :             {
    1951             :               // swallow exceptions
    1952             :             }
    1953             : 
    1954             :           // NB: this is not actually recursive!  This invokes the web-query
    1955             :           // path, which cannot get back into the scan code paths.
    1956           8 :           if (r)
    1957             :             {
    1958             :               // Found it!
    1959           8 :               altdbg_fd = dup(alt_fd); // ok if this fails, downstream failures ok
    1960           8 :               alt = altdbg = dwarf_begin (altdbg_fd, DWARF_C_READ);
    1961             :               // NB: must close this dwarf and this fd at the bottom of the function!
    1962           8 :               MHD_destroy_response (r); // will close alt_fd
    1963           8 :               if (alt)
    1964           8 :                 dwarf_setalt (dbg, alt);
    1965             :             }
    1966             :         }
    1967             :       else
    1968             :         {
    1969             :           // NB: dwarf_setalt(alt) inappropriate - already done!
    1970             :           // NB: altdbg will stay 0 so nothing tries to redundantly dealloc.
    1971             :         }
    1972             : 
    1973           8 :       if (alt)
    1974             :         {
    1975           8 :           if (verbose > 3)
    1976           0 :             obatched(clog) << "Resolved altdebug buildid=" << buildid << endl;
    1977             :         }
    1978             :       else // (alt == NULL) - signal possible presence of poor debuginfo
    1979             :         {
    1980           0 :           debug_sourcefiles.insert("");
    1981           0 :           if (verbose > 3)
    1982           0 :             obatched(clog) << "Unresolved altdebug buildid=" << buildid << endl;
    1983             :         }
    1984             :     }
    1985             : 
    1986          20 :   Dwarf_Off offset = 0;
    1987             :   Dwarf_Off old_offset;
    1988             :   size_t hsize;
    1989             : 
    1990         344 :   while (dwarf_nextcu (dbg, old_offset = offset, &offset, &hsize, NULL, NULL, NULL) == 0)
    1991             :     {
    1992             :       Dwarf_Die cudie_mem;
    1993         324 :       Dwarf_Die *cudie = dwarf_offdie (dbg, old_offset + hsize, &cudie_mem);
    1994             : 
    1995         324 :       if (cudie == NULL)
    1996           4 :         continue;
    1997         324 :       if (dwarf_tag (cudie) != DW_TAG_compile_unit)
    1998           4 :         continue;
    1999             : 
    2000         320 :       const char *cuname = dwarf_diename(cudie) ?: "unknown";
    2001             : 
    2002             :       Dwarf_Files *files;
    2003             :       size_t nfiles;
    2004         320 :       if (dwarf_getsrcfiles (cudie, &files, &nfiles) != 0)
    2005           0 :         continue;
    2006             : 
    2007             :       // extract DW_AT_comp_dir to resolve relative file names
    2008         320 :       const char *comp_dir = "";
    2009             :       const char *const *dirs;
    2010             :       size_t ndirs;
    2011         640 :       if (dwarf_getsrcdirs (files, &dirs, &ndirs) == 0 &&
    2012         320 :           dirs[0] != NULL)
    2013         320 :         comp_dir = dirs[0];
    2014         320 :       if (comp_dir == NULL)
    2015           0 :         comp_dir = "";
    2016             : 
    2017         320 :       if (verbose > 3)
    2018           0 :         obatched(clog) << "searching for sources for cu=" << cuname << " comp_dir=" << comp_dir
    2019           0 :                        << " #files=" << nfiles << " #dirs=" << ndirs << endl;
    2020             : 
    2021         320 :       if (comp_dir[0] == '\0' && cuname[0] != '/')
    2022             :         {
    2023             :           // This is a common symptom for dwz-compressed debug files,
    2024             :           // where the altdebug file cannot be resolved.
    2025           0 :           if (verbose > 3)
    2026           0 :             obatched(clog) << "skipping cu=" << cuname << " due to empty comp_dir" << endl;
    2027           0 :           continue;
    2028             :         }
    2029             : 
    2030        6182 :       for (size_t f = 1; f < nfiles; f++)
    2031             :         {
    2032        5862 :           const char *hat = dwarf_filesrc (files, f, NULL, NULL);
    2033        5862 :           if (hat == NULL)
    2034           0 :             continue;
    2035             : 
    2036        5862 :           if (string(hat) == "<built-in>") // gcc intrinsics, don't bother record
    2037           0 :             continue;
    2038             : 
    2039        5862 :           string waldo;
    2040        5862 :           if (hat[0] == '/') // absolute
    2041        4053 :             waldo = (string (hat));
    2042        1809 :           else if (comp_dir[0] != '\0') // comp_dir relative
    2043        1809 :             waldo = (string (comp_dir) + string("/") + string (hat));
    2044             :           else
    2045             :            {
    2046           0 :              if (verbose > 3)
    2047           0 :                obatched(clog) << "skipping hat=" << hat << " due to empty comp_dir" << endl;
    2048           0 :              continue;
    2049             :            }
    2050             : 
    2051             :           // NB: this is the 'waldo' that a dbginfo client will have
    2052             :           // to supply for us to give them the file The comp_dir
    2053             :           // prefixing is a definite complication.  Otherwise we'd
    2054             :           // have to return a setof comp_dirs (one per CU!) with
    2055             :           // corresponding filesrc[] names, instead of one absolute
    2056             :           // resoved set.  Maybe we'll have to do that anyway.  XXX
    2057             : 
    2058        5862 :           if (verbose > 4)
    2059           0 :             obatched(clog) << waldo
    2060           0 :                            << (debug_sourcefiles.find(waldo)==debug_sourcefiles.end() ? " new" : " dup") <<  endl;
    2061             : 
    2062        5862 :           debug_sourcefiles.insert (waldo);
    2063             :         }
    2064             :     }
    2065             : 
    2066          20 :   dwarf_end(dbg);
    2067          20 :   if (altdbg)
    2068           8 :     dwarf_end(altdbg);
    2069          20 :   if (altdbg_fd >= 0)
    2070           8 :     close(altdbg_fd);
    2071             : }
    2072             : 
    2073             : 
    2074             : 
    2075             : static void
    2076          93 : elf_classify (int fd, bool &executable_p, bool &debuginfo_p, string &buildid, set<string>& debug_sourcefiles)
    2077             : {
    2078          93 :   Elf *elf = elf_begin (fd, ELF_C_READ_MMAP_PRIVATE, NULL);
    2079          93 :   if (elf == NULL)
    2080           0 :     return;
    2081             : 
    2082             :   try // catch our types of errors and clean up the Elf* object
    2083             :     {
    2084          93 :       if (elf_kind (elf) != ELF_K_ELF)
    2085             :         {
    2086          58 :           elf_end (elf);
    2087          58 :           return;
    2088             :         }
    2089             : 
    2090             :       GElf_Ehdr ehdr_storage;
    2091          35 :       GElf_Ehdr *ehdr = gelf_getehdr (elf, &ehdr_storage);
    2092          35 :       if (ehdr == NULL)
    2093             :         {
    2094           0 :           elf_end (elf);
    2095           0 :           return;
    2096             :         }
    2097          35 :       auto elf_type = ehdr->e_type;
    2098             : 
    2099             :       const void *build_id; // elfutils-owned memory
    2100          35 :       ssize_t sz = dwelf_elf_gnu_build_id (elf, & build_id);
    2101          35 :       if (sz <= 0)
    2102             :         {
    2103             :           // It's not a diagnostic-worthy error for an elf file to lack build-id.
    2104             :           // It might just be very old.
    2105           0 :           elf_end (elf);
    2106           0 :           return;
    2107             :         }
    2108             : 
    2109             :       // build_id is a raw byte array; convert to hexadecimal *lowercase*
    2110          35 :       unsigned char* build_id_bytes = (unsigned char*) build_id;
    2111         733 :       for (ssize_t idx=0; idx<sz; idx++)
    2112             :         {
    2113         698 :           buildid += "0123456789abcdef"[build_id_bytes[idx] >> 4];
    2114         699 :           buildid += "0123456789abcdef"[build_id_bytes[idx] & 0xf];
    2115             :         }
    2116             : 
    2117             :       // now decide whether it's an executable - namely, any allocatable section has
    2118             :       // PROGBITS;
    2119          35 :       if (elf_type == ET_EXEC || elf_type == ET_DYN)
    2120             :         {
    2121             :           size_t shnum;
    2122          30 :           int rc = elf_getshdrnum (elf, &shnum);
    2123          30 :           if (rc < 0)
    2124           0 :             throw elfutils_exception(rc, "getshdrnum");
    2125             : 
    2126          30 :           executable_p = false;
    2127         560 :           for (size_t sc = 0; sc < shnum; sc++)
    2128             :             {
    2129         546 :               Elf_Scn *scn = elf_getscn (elf, sc);
    2130         546 :               if (scn == NULL)
    2131           0 :                 continue;
    2132             : 
    2133             :               GElf_Shdr shdr_mem;
    2134         546 :               GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem);
    2135         546 :               if (shdr == NULL)
    2136           0 :                 continue;
    2137             : 
    2138             :               // allocated (loadable / vm-addr-assigned) section with available content?
    2139         546 :               if ((shdr->sh_type == SHT_PROGBITS) && (shdr->sh_flags & SHF_ALLOC))
    2140             :                 {
    2141          16 :                   if (verbose > 4)
    2142           0 :                     obatched(clog) << "executable due to SHF_ALLOC SHT_PROGBITS sc=" << sc << endl;
    2143          16 :                   executable_p = true;
    2144          16 :                   break; // no need to keep looking for others
    2145             :                 }
    2146             :             } // iterate over sections
    2147             :         } // executable_p classification
    2148             : 
    2149             :       // now decide whether it's a debuginfo - namely, if it has any .debug* or .zdebug* sections
    2150             :       // logic mostly stolen from fweimer@redhat.com's elfclassify drafts
    2151             :       size_t shstrndx;
    2152          35 :       int rc = elf_getshdrstrndx (elf, &shstrndx);
    2153          35 :       if (rc < 0)
    2154           0 :         throw elfutils_exception(rc, "getshdrstrndx");
    2155             : 
    2156          35 :       Elf_Scn *scn = NULL;
    2157             :       while (true)
    2158             :         {
    2159         879 :           scn = elf_nextscn (elf, scn);
    2160         879 :           if (scn == NULL)
    2161          35 :             break;
    2162             :           GElf_Shdr shdr_storage;
    2163         864 :           GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_storage);
    2164         863 :           if (shdr == NULL)
    2165           0 :             break;
    2166         863 :           const char *section_name = elf_strptr (elf, shstrndx, shdr->sh_name);
    2167         863 :           if (section_name == NULL)
    2168           0 :             break;
    2169         863 :           if (strncmp(section_name, ".debug_line", 11) == 0 ||
    2170         842 :               strncmp(section_name, ".zdebug_line", 12) == 0)
    2171             :             {
    2172          19 :               debuginfo_p = true;
    2173          19 :               dwarf_extract_source_paths (elf, debug_sourcefiles);
    2174          20 :               break; // expecting only one .*debug_line, so no need to look for others
    2175             :             }
    2176         844 :           else if (strncmp(section_name, ".debug_", 7) == 0 ||
    2177         789 :                    strncmp(section_name, ".zdebug_", 8) == 0)
    2178             :             {
    2179          59 :               debuginfo_p = true;
    2180             :               // NB: don't break; need to parse .debug_line for sources
    2181             :             }
    2182         844 :         }
    2183             :     }
    2184           0 :   catch (const reportable_exception& e)
    2185             :     {
    2186           0 :       e.report(clog);
    2187             :     }
    2188          35 :   elf_end (elf);
    2189             : }
    2190             : 
    2191             : 
    2192             : static void
    2193         122 : scan_source_file (const string& rps, const stat_t& st,
    2194             :                   sqlite_ps& ps_upsert_buildids,
    2195             :                   sqlite_ps& ps_upsert_files,
    2196             :                   sqlite_ps& ps_upsert_de,
    2197             :                   sqlite_ps& ps_upsert_s,
    2198             :                   sqlite_ps& ps_query,
    2199             :                   sqlite_ps& ps_scan_done,
    2200             :                   unsigned& fts_cached,
    2201             :                   unsigned& fts_executable,
    2202             :                   unsigned& fts_debuginfo,
    2203             :                   unsigned& fts_sourcefiles)
    2204             : {
    2205             :   /* See if we know of it already. */
    2206             :   int rc = ps_query
    2207         122 :     .reset()
    2208         122 :     .bind(1, rps)
    2209         122 :     .bind(2, st.st_mtime)
    2210         122 :     .step();
    2211         122 :   ps_query.reset();
    2212         122 :   if (rc == SQLITE_ROW) // i.e., a result, as opposed to DONE (no results)
    2213             :     // no need to recheck a file/version we already know
    2214             :     // specifically, no need to elf-begin a file we already determined is non-elf
    2215             :     // (so is stored with buildid=NULL)
    2216             :     {
    2217          84 :       fts_cached++;
    2218          84 :       return;
    2219             :     }
    2220             : 
    2221          38 :   bool executable_p = false, debuginfo_p = false; // E and/or D
    2222          76 :   string buildid;
    2223          76 :   set<string> sourcefiles;
    2224             : 
    2225          38 :   int fd = open (rps.c_str(), O_RDONLY);
    2226             :   try
    2227             :     {
    2228          38 :       if (fd >= 0)
    2229          37 :         elf_classify (fd, executable_p, debuginfo_p, buildid, sourcefiles);
    2230             :       else
    2231           1 :         throw libc_exception(errno, string("open ") + rps);
    2232          37 :       add_metric ("scanned_bytes_total","source","file",
    2233          37 :                   st.st_size);
    2234          37 :       inc_metric ("scanned_files_total","source","file");
    2235             :     }
    2236             :   // NB: we catch exceptions here too, so that we can
    2237             :   // cache the corrupt-elf case (!executable_p &&
    2238             :   // !debuginfo_p) just below, just as if we had an
    2239             :   // EPERM error from open(2).
    2240           2 :   catch (const reportable_exception& e)
    2241             :     {
    2242           1 :       e.report(clog);
    2243             :     }
    2244             : 
    2245          38 :   if (fd >= 0)
    2246          37 :     close (fd);
    2247             : 
    2248             :   // register this file name in the interning table
    2249             :   ps_upsert_files
    2250          38 :     .reset()
    2251          38 :     .bind(1, rps)
    2252          38 :     .step_ok_done();
    2253             : 
    2254          38 :   if (buildid == "")
    2255             :     {
    2256             :       // no point storing an elf file without buildid
    2257          34 :       executable_p = false;
    2258          34 :       debuginfo_p = false;
    2259             :     }
    2260             :   else
    2261             :     {
    2262             :       // register this build-id in the interning table
    2263             :       ps_upsert_buildids
    2264           4 :         .reset()
    2265           4 :         .bind(1, buildid)
    2266           4 :         .step_ok_done();
    2267             :     }
    2268             : 
    2269          38 :   if (executable_p)
    2270           3 :     fts_executable ++;
    2271          38 :   if (debuginfo_p)
    2272           3 :     fts_debuginfo ++;
    2273          38 :   if (executable_p || debuginfo_p)
    2274             :     {
    2275             :       ps_upsert_de
    2276           4 :         .reset()
    2277           4 :         .bind(1, buildid)
    2278           4 :         .bind(2, debuginfo_p ? 1 : 0)
    2279           4 :         .bind(3, executable_p ? 1 : 0)
    2280           4 :         .bind(4, rps)
    2281           4 :         .bind(5, st.st_mtime)
    2282           4 :         .step_ok_done();
    2283             :     }
    2284          38 :   if (executable_p)
    2285           3 :     inc_metric("found_executable_total","source","files");
    2286          38 :   if (debuginfo_p)
    2287           3 :     inc_metric("found_debuginfo_total","source","files");
    2288             : 
    2289          38 :   if (sourcefiles.size() && buildid != "")
    2290             :     {
    2291           3 :       fts_sourcefiles += sourcefiles.size();
    2292             : 
    2293         485 :       for (auto&& dwarfsrc : sourcefiles)
    2294             :         {
    2295         482 :           char *srp = realpath(dwarfsrc.c_str(), NULL);
    2296         482 :           if (srp == NULL) // also if DWZ unresolved dwarfsrc=""
    2297           6 :             continue; // unresolvable files are not a serious problem
    2298             :           // throw libc_exception(errno, "fts/file realpath " + srcpath);
    2299         476 :           string srps = string(srp);
    2300         476 :           free (srp);
    2301             : 
    2302             :           struct stat sfs;
    2303         476 :           rc = stat(srps.c_str(), &sfs);
    2304         476 :           if (rc != 0)
    2305           0 :             continue;
    2306             : 
    2307         476 :           if (verbose > 2)
    2308           0 :             obatched(clog) << "recorded buildid=" << buildid << " file=" << srps
    2309           0 :                            << " mtime=" << sfs.st_mtime
    2310           0 :                            << " as source " << dwarfsrc << endl;
    2311             : 
    2312             :           ps_upsert_files
    2313         476 :             .reset()
    2314         476 :             .bind(1, srps)
    2315         476 :             .step_ok_done();
    2316             : 
    2317             :           // PR25548: store canonicalized dwarfsrc path
    2318         476 :           string dwarfsrc_canon = canon_pathname (dwarfsrc);
    2319         476 :           if (dwarfsrc_canon != dwarfsrc)
    2320             :             {
    2321          84 :               if (verbose > 3)
    2322           0 :                 obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
    2323             :             }
    2324             : 
    2325             :           ps_upsert_files
    2326         476 :             .reset()
    2327         476 :             .bind(1, dwarfsrc_canon)
    2328         476 :             .step_ok_done();
    2329             : 
    2330             :           ps_upsert_s
    2331         476 :             .reset()
    2332         476 :             .bind(1, buildid)
    2333         476 :             .bind(2, dwarfsrc_canon)
    2334         476 :             .bind(3, srps)
    2335         476 :             .bind(4, sfs.st_mtime)
    2336         476 :             .step_ok_done();
    2337             : 
    2338         476 :           inc_metric("found_sourcerefs_total","source","files");
    2339             :         }
    2340             :     }
    2341             : 
    2342             :   ps_scan_done
    2343          38 :     .reset()
    2344          38 :     .bind(1, rps)
    2345          38 :     .bind(2, st.st_mtime)
    2346          38 :     .bind(3, st.st_size)
    2347          38 :     .step_ok_done();
    2348             : 
    2349          38 :   if (verbose > 2)
    2350           0 :     obatched(clog) << "recorded buildid=" << buildid << " file=" << rps
    2351           0 :                    << " mtime=" << st.st_mtime << " atype="
    2352             :                    << (executable_p ? "E" : "")
    2353           0 :                    << (debuginfo_p ? "D" : "") << endl;
    2354             : }
    2355             : 
    2356             : 
    2357             : 
    2358             : 
    2359             : 
    2360             : // Analyze given archive file of given age; record buildids / exec/debuginfo-ness of its
    2361             : // constituent files with given upsert statements.
    2362             : static void
    2363          29 : archive_classify (const string& rps, string& archive_extension,
    2364             :                   sqlite_ps& ps_upsert_buildids, sqlite_ps& ps_upsert_files,
    2365             :                   sqlite_ps& ps_upsert_de, sqlite_ps& ps_upsert_sref, sqlite_ps& ps_upsert_sdef,
    2366             :                   time_t mtime,
    2367             :                   unsigned& fts_executable, unsigned& fts_debuginfo, unsigned& fts_sref, unsigned& fts_sdef,
    2368             :                   bool& fts_sref_complete_p)
    2369             : {
    2370          58 :   string archive_decoder = "/dev/null";
    2371         116 :   for (auto&& arch : scan_archives)
    2372          87 :     if (string_endswith(rps, arch.first))
    2373             :       {
    2374          29 :         archive_extension = arch.first;
    2375          29 :         archive_decoder = arch.second;
    2376             :       }
    2377             : 
    2378             :   FILE* fp;
    2379             :   defer_dtor<FILE*,int>::dtor_fn dfn;
    2380          29 :   if (archive_decoder != "cat")
    2381             :     {
    2382           3 :       string popen_cmd = archive_decoder + " " + shell_escape(rps);
    2383           1 :       fp = popen (popen_cmd.c_str(), "r"); // "e" O_CLOEXEC?
    2384           1 :       dfn = pclose;
    2385           1 :       if (fp == NULL)
    2386           0 :         throw libc_exception (errno, string("popen ") + popen_cmd);
    2387             :     }
    2388             :   else
    2389             :     {
    2390          28 :       fp = fopen (rps.c_str(), "r");
    2391          28 :       dfn = fclose;
    2392          28 :       if (fp == NULL)
    2393           1 :         throw libc_exception (errno, string("fopen ") + rps);
    2394             :     }
    2395          56 :   defer_dtor<FILE*,int> fp_closer (fp, dfn);
    2396             : 
    2397             :   struct archive *a;
    2398          28 :   a = archive_read_new();
    2399          28 :   if (a == NULL)
    2400           0 :     throw archive_exception("cannot create archive reader");
    2401          56 :   defer_dtor<struct archive*,int> archive_closer (a, archive_read_free);
    2402             : 
    2403          28 :   int rc = archive_read_support_format_all(a);
    2404          28 :   if (rc != ARCHIVE_OK)
    2405           0 :     throw archive_exception(a, "cannot select all formats");
    2406          28 :   rc = archive_read_support_filter_all(a);
    2407          27 :   if (rc != ARCHIVE_OK)
    2408           0 :     throw archive_exception(a, "cannot select all filters");
    2409             : 
    2410          27 :   rc = archive_read_open_FILE (a, fp);
    2411          28 :   if (rc != ARCHIVE_OK)
    2412           0 :     throw archive_exception(a, "cannot open archive from pipe");
    2413             : 
    2414          28 :   if (verbose > 3)
    2415           0 :     obatched(clog) << "libarchive scanning " << rps << endl;
    2416             : 
    2417             :   while(1) // parse archive entries
    2418             :     {
    2419         193 :     if (interrupted)
    2420           0 :       break;
    2421             : 
    2422             :     try
    2423             :         {
    2424             :           struct archive_entry *e;
    2425         193 :           rc = archive_read_next_header (a, &e);
    2426         193 :           if (rc != ARCHIVE_OK)
    2427          28 :             break;
    2428             : 
    2429         165 :           if (! S_ISREG(archive_entry_mode (e))) // skip non-files completely
    2430         109 :             continue;
    2431             : 
    2432         112 :           string fn = canonicalized_archive_entry_pathname (e);
    2433             : 
    2434          56 :           if (verbose > 3)
    2435           0 :             obatched(clog) << "libarchive checking " << fn << endl;
    2436             : 
    2437             :           // extract this file to a temporary file
    2438          56 :           char* tmppath = NULL;
    2439          56 :           rc = asprintf (&tmppath, "%s/debuginfod.XXXXXX", tmpdir.c_str());
    2440          56 :           if (rc < 0)
    2441           0 :             throw libc_exception (ENOMEM, "cannot allocate tmppath");
    2442         112 :           defer_dtor<void*,void> tmmpath_freer (tmppath, free);
    2443          56 :           int fd = mkstemp (tmppath);
    2444          56 :           if (fd < 0)
    2445           0 :             throw libc_exception (errno, "cannot create temporary file");
    2446          56 :           unlink (tmppath); // unlink now so OS will release the file as soon as we close the fd
    2447         112 :           defer_dtor<int,int> minifd_closer (fd, close);
    2448             : 
    2449          56 :           rc = archive_read_data_into_fd (a, fd);
    2450          56 :           if (rc != ARCHIVE_OK)
    2451           0 :             throw archive_exception(a, "cannot extract file");
    2452             : 
    2453             :           // finally ... time to run elf_classify on this bad boy and update the database
    2454          56 :           bool executable_p = false, debuginfo_p = false;
    2455         112 :           string buildid;
    2456         112 :           set<string> sourcefiles;
    2457          56 :           elf_classify (fd, executable_p, debuginfo_p, buildid, sourcefiles);
    2458             :           // NB: might throw
    2459             : 
    2460          56 :           if (buildid != "") // intern buildid
    2461             :             {
    2462             :               ps_upsert_buildids
    2463          31 :                 .reset()
    2464          31 :                 .bind(1, buildid)
    2465          31 :                 .step_ok_done();
    2466             :             }
    2467             : 
    2468             :           ps_upsert_files // register this rpm constituent file name in interning table
    2469          56 :             .reset()
    2470          56 :             .bind(1, fn)
    2471          56 :             .step_ok_done();
    2472             : 
    2473          56 :           if (sourcefiles.size() > 0) // sref records needed
    2474             :             {
    2475             :               // NB: we intern each source file once.  Once raw, as it
    2476             :               // appears in the DWARF file list coming back from
    2477             :               // elf_classify() - because it'll end up in the
    2478             :               // _norm.artifactsrc column.  We don't also put another
    2479             :               // version with a '.' at the front, even though that's
    2480             :               // how rpm/cpio packs names, because we hide that from
    2481             :               // the database for storage efficiency.
    2482             : 
    2483          46 :               for (auto&& s : sourcefiles)
    2484             :                 {
    2485          33 :                   if (s == "")
    2486             :                     {
    2487           0 :                       fts_sref_complete_p = false;
    2488           0 :                       continue;
    2489             :                     }
    2490             : 
    2491             :                   // PR25548: store canonicalized source path
    2492          33 :                   const string& dwarfsrc = s;
    2493          33 :                   string dwarfsrc_canon = canon_pathname (dwarfsrc);
    2494          33 :                   if (dwarfsrc_canon != dwarfsrc)
    2495             :                     {
    2496           0 :                       if (verbose > 3)
    2497           0 :                         obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
    2498             :                     }
    2499             : 
    2500             :                   ps_upsert_files
    2501          33 :                     .reset()
    2502          33 :                     .bind(1, dwarfsrc_canon)
    2503          33 :                     .step_ok_done();
    2504             : 
    2505             :                   ps_upsert_sref
    2506          33 :                     .reset()
    2507          33 :                     .bind(1, buildid)
    2508          33 :                     .bind(2, dwarfsrc_canon)
    2509          33 :                     .step_ok_done();
    2510             : 
    2511          33 :                   fts_sref ++;
    2512             :                 }
    2513             :             }
    2514             : 
    2515          56 :           if (executable_p)
    2516          13 :             fts_executable ++;
    2517          56 :           if (debuginfo_p)
    2518          18 :             fts_debuginfo ++;
    2519             : 
    2520          56 :           if (executable_p || debuginfo_p)
    2521             :             {
    2522             :               ps_upsert_de
    2523          31 :                 .reset()
    2524          31 :                 .bind(1, buildid)
    2525          31 :                 .bind(2, debuginfo_p ? 1 : 0)
    2526          31 :                 .bind(3, executable_p ? 1 : 0)
    2527          31 :                 .bind(4, rps)
    2528          31 :                 .bind(5, mtime)
    2529          31 :                 .bind(6, fn)
    2530          31 :                 .step_ok_done();
    2531             :             }
    2532             :           else // potential source - sdef record
    2533             :             {
    2534          25 :               fts_sdef ++;
    2535             :               ps_upsert_sdef
    2536          25 :                 .reset()
    2537          25 :                 .bind(1, rps)
    2538          25 :                 .bind(2, mtime)
    2539          25 :                 .bind(3, fn)
    2540          25 :                 .step_ok_done();
    2541             :             }
    2542             : 
    2543          56 :           if ((verbose > 2) && (executable_p || debuginfo_p))
    2544           0 :             obatched(clog) << "recorded buildid=" << buildid << " rpm=" << rps << " file=" << fn
    2545           0 :                            << " mtime=" << mtime << " atype="
    2546             :                            << (executable_p ? "E" : "")
    2547             :                            << (debuginfo_p ? "D" : "")
    2548           0 :                            << " sourcefiles=" << sourcefiles.size() << endl;
    2549             : 
    2550             :         }
    2551           0 :       catch (const reportable_exception& e)
    2552             :         {
    2553           0 :           e.report(clog);
    2554             :         }
    2555         165 :     }
    2556          28 : }
    2557             : 
    2558             : 
    2559             : 
    2560             : // scan for archive files such as .rpm
    2561             : static void
    2562          76 : scan_archive_file (const string& rps, const stat_t& st,
    2563             :                    sqlite_ps& ps_upsert_buildids,
    2564             :                    sqlite_ps& ps_upsert_files,
    2565             :                    sqlite_ps& ps_upsert_de,
    2566             :                    sqlite_ps& ps_upsert_sref,
    2567             :                    sqlite_ps& ps_upsert_sdef,
    2568             :                    sqlite_ps& ps_query,
    2569             :                    sqlite_ps& ps_scan_done,
    2570             :                    unsigned& fts_cached,
    2571             :                    unsigned& fts_executable,
    2572             :                    unsigned& fts_debuginfo,
    2573             :                    unsigned& fts_sref,
    2574             :                    unsigned& fts_sdef)
    2575             : {
    2576             :   /* See if we know of it already. */
    2577             :   int rc = ps_query
    2578          76 :     .reset()
    2579          76 :     .bind(1, rps)
    2580          76 :     .bind(2, st.st_mtime)
    2581          76 :     .step();
    2582          76 :   ps_query.reset();
    2583          76 :   if (rc == SQLITE_ROW) // i.e., a result, as opposed to DONE (no results)
    2584             :     // no need to recheck a file/version we already know
    2585             :     // specifically, no need to parse this archive again, since we already have
    2586             :     // it as a D or E or S record,
    2587             :     // (so is stored with buildid=NULL)
    2588             :     {
    2589          47 :       fts_cached ++;
    2590          47 :       return;
    2591             :     }
    2592             : 
    2593             :   // intern the archive file name
    2594             :   ps_upsert_files
    2595          29 :     .reset()
    2596          29 :     .bind(1, rps)
    2597          29 :     .step_ok_done();
    2598             : 
    2599             :   // extract the archive contents
    2600          29 :   unsigned my_fts_executable = 0, my_fts_debuginfo = 0, my_fts_sref = 0, my_fts_sdef = 0;
    2601          29 :   bool my_fts_sref_complete_p = true;
    2602             :   try
    2603             :     {
    2604          30 :       string archive_extension;
    2605          29 :       archive_classify (rps, archive_extension,
    2606             :                         ps_upsert_buildids, ps_upsert_files,
    2607             :                         ps_upsert_de, ps_upsert_sref, ps_upsert_sdef, // dalt
    2608          29 :                         st.st_mtime,
    2609             :                         my_fts_executable, my_fts_debuginfo, my_fts_sref, my_fts_sdef,
    2610             :                         my_fts_sref_complete_p);
    2611          28 :       add_metric ("scanned_bytes_total","source",archive_extension + " archive",
    2612          28 :                   st.st_size);
    2613          28 :       inc_metric ("scanned_files_total","source",archive_extension + " archive");
    2614          28 :       add_metric("found_debuginfo_total","source",archive_extension + " archive",
    2615             :                  my_fts_debuginfo);
    2616          28 :       add_metric("found_executable_total","source",archive_extension + " archive",
    2617             :                  my_fts_executable);
    2618          28 :       add_metric("found_sourcerefs_total","source",archive_extension + " archive",
    2619             :                  my_fts_sref);
    2620             :     }
    2621           2 :   catch (const reportable_exception& e)
    2622             :     {
    2623           1 :       e.report(clog);
    2624             :     }
    2625             : 
    2626          29 :   if (verbose > 2)
    2627           0 :     obatched(clog) << "scanned archive=" << rps
    2628           0 :                    << " mtime=" << st.st_mtime
    2629           0 :                    << " executables=" << my_fts_executable
    2630           0 :                    << " debuginfos=" << my_fts_debuginfo
    2631           0 :                    << " srefs=" << my_fts_sref
    2632           0 :                    << " sdefs=" << my_fts_sdef
    2633           0 :                    << endl;
    2634             : 
    2635          29 :   fts_executable += my_fts_executable;
    2636          29 :   fts_debuginfo += my_fts_debuginfo;
    2637          29 :   fts_sref += my_fts_sref;
    2638          29 :   fts_sdef += my_fts_sdef;
    2639             : 
    2640          29 :   if (my_fts_sref_complete_p) // leave incomplete?
    2641             :     ps_scan_done
    2642          29 :       .reset()
    2643          29 :       .bind(1, rps)
    2644          29 :       .bind(2, st.st_mtime)
    2645          29 :       .bind(3, st.st_size)
    2646          29 :       .step_ok_done();
    2647             : }
    2648             : 
    2649             : 
    2650             : 
    2651             : ////////////////////////////////////////////////////////////////////////
    2652             : 
    2653             : 
    2654             : 
    2655             : // The thread that consumes file names off of the scanq.  We hold
    2656             : // the persistent sqlite_ps's at this level and delegate file/archive
    2657             : // scanning to other functions.
    2658             : static void*
    2659          12 : thread_main_scanner (void* arg)
    2660             : {
    2661             :   (void) arg;
    2662             : 
    2663             :   // all the prepared statements fit to use, the _f_ set:
    2664          36 :   sqlite_ps ps_f_upsert_buildids (db, "file-buildids-intern", "insert or ignore into " BUILDIDS "_buildids VALUES (NULL, ?);");
    2665          36 :   sqlite_ps ps_f_upsert_files (db, "file-files-intern", "insert or ignore into " BUILDIDS "_files VALUES (NULL, ?);");
    2666             :   sqlite_ps ps_f_upsert_de (db, "file-de-upsert",
    2667             :                           "insert or ignore into " BUILDIDS "_f_de "
    2668             :                           "(buildid, debuginfo_p, executable_p, file, mtime) "
    2669             :                           "values ((select id from " BUILDIDS "_buildids where hex = ?),"
    2670             :                           "        ?,?,"
    2671          36 :                           "        (select id from " BUILDIDS "_files where name = ?), ?);");
    2672             :   sqlite_ps ps_f_upsert_s (db, "file-s-upsert",
    2673             :                          "insert or ignore into " BUILDIDS "_f_s "
    2674             :                          "(buildid, artifactsrc, file, mtime) "
    2675             :                          "values ((select id from " BUILDIDS "_buildids where hex = ?),"
    2676             :                          "        (select id from " BUILDIDS "_files where name = ?),"
    2677             :                          "        (select id from " BUILDIDS "_files where name = ?),"
    2678          36 :                          "        ?);");
    2679             :   sqlite_ps ps_f_query (db, "file-negativehit-find",
    2680             :                         "select 1 from " BUILDIDS "_file_mtime_scanned where sourcetype = 'F' "
    2681          36 :                         "and file = (select id from " BUILDIDS "_files where name = ?) and mtime = ?;");
    2682             :   sqlite_ps ps_f_scan_done (db, "file-scanned",
    2683             :                           "insert or ignore into " BUILDIDS "_file_mtime_scanned (sourcetype, file, mtime, size)"
    2684          36 :                           "values ('F', (select id from " BUILDIDS "_files where name = ?), ?, ?);");
    2685             : 
    2686             :   // and now for the _r_ set
    2687          36 :   sqlite_ps ps_r_upsert_buildids (db, "rpm-buildid-intern", "insert or ignore into " BUILDIDS "_buildids VALUES (NULL, ?);");
    2688          36 :   sqlite_ps ps_r_upsert_files (db, "rpm-file-intern", "insert or ignore into " BUILDIDS "_files VALUES (NULL, ?);");
    2689             :   sqlite_ps ps_r_upsert_de (db, "rpm-de-insert",
    2690             :                           "insert or ignore into " BUILDIDS "_r_de (buildid, debuginfo_p, executable_p, file, mtime, content) values ("
    2691             :                           "(select id from " BUILDIDS "_buildids where hex = ?), ?, ?, "
    2692             :                           "(select id from " BUILDIDS "_files where name = ?), ?, "
    2693          36 :                           "(select id from " BUILDIDS "_files where name = ?));");
    2694             :   sqlite_ps ps_r_upsert_sref (db, "rpm-sref-insert",
    2695             :                             "insert or ignore into " BUILDIDS "_r_sref (buildid, artifactsrc) values ("
    2696             :                             "(select id from " BUILDIDS "_buildids where hex = ?), "
    2697          36 :                             "(select id from " BUILDIDS "_files where name = ?));");
    2698             :   sqlite_ps ps_r_upsert_sdef (db, "rpm-sdef-insert",
    2699             :                             "insert or ignore into " BUILDIDS "_r_sdef (file, mtime, content) values ("
    2700             :                             "(select id from " BUILDIDS "_files where name = ?), ?,"
    2701          36 :                             "(select id from " BUILDIDS "_files where name = ?));");
    2702             :   sqlite_ps ps_r_query (db, "rpm-negativehit-query",
    2703             :                       "select 1 from " BUILDIDS "_file_mtime_scanned where "
    2704          36 :                       "sourcetype = 'R' and file = (select id from " BUILDIDS "_files where name = ?) and mtime = ?;");
    2705             :   sqlite_ps ps_r_scan_done (db, "rpm-scanned",
    2706             :                           "insert or ignore into " BUILDIDS "_file_mtime_scanned (sourcetype, file, mtime, size)"
    2707          24 :                           "values ('R', (select id from " BUILDIDS "_files where name = ?), ?, ?);");
    2708             : 
    2709             : 
    2710          12 :   unsigned fts_cached = 0, fts_executable = 0, fts_debuginfo = 0, fts_sourcefiles = 0;
    2711          12 :   unsigned fts_sref = 0, fts_sdef = 0;
    2712             : 
    2713          12 :   add_metric("thread_count", "role", "scan", 1);
    2714          12 :   add_metric("thread_busy", "role", "scan", 1);
    2715         146 :   while (! interrupted)
    2716             :     {
    2717         134 :       scan_payload p;
    2718             : 
    2719         134 :       add_metric("thread_busy", "role", "scan", -1);
    2720         134 :       bool gotone = scanq.wait_front(p);
    2721         133 :       add_metric("thread_busy", "role", "scan", 1);
    2722             : 
    2723         134 :       if (! gotone) continue; // go back to waiting
    2724             : 
    2725             :       try
    2726             :         {
    2727         122 :           bool scan_archive = false;
    2728         487 :           for (auto&& arch : scan_archives)
    2729         365 :             if (string_endswith(p.first, arch.first))
    2730          76 :               scan_archive = true;
    2731             : 
    2732         122 :           if (scan_archive)
    2733          76 :             scan_archive_file (p.first, p.second,
    2734             :                                ps_r_upsert_buildids,
    2735             :                                ps_r_upsert_files,
    2736             :                                ps_r_upsert_de,
    2737             :                                ps_r_upsert_sref,
    2738             :                                ps_r_upsert_sdef,
    2739             :                                ps_r_query,
    2740             :                                ps_r_scan_done,
    2741             :                                fts_cached,
    2742             :                                fts_executable,
    2743             :                                fts_debuginfo,
    2744             :                                fts_sref,
    2745             :                                fts_sdef);
    2746             : 
    2747         122 :           if (scan_files) // NB: maybe "else if" ?
    2748         122 :             scan_source_file (p.first, p.second,
    2749             :                               ps_f_upsert_buildids,
    2750             :                               ps_f_upsert_files,
    2751             :                               ps_f_upsert_de,
    2752             :                               ps_f_upsert_s,
    2753             :                               ps_f_query,
    2754             :                               ps_f_scan_done,
    2755             :                               fts_cached, fts_executable, fts_debuginfo, fts_sourcefiles);
    2756             :         }
    2757           0 :       catch (const reportable_exception& e)
    2758             :         {
    2759           0 :           e.report(cerr);
    2760             :         }
    2761             : 
    2762             :       // finished a scanning step -- not a "loop", because we just
    2763             :       // consume the traversal loop's work, whenever
    2764         122 :       inc_metric("thread_work_total","role","scan");
    2765             :     }
    2766             : 
    2767          12 :   add_metric("thread_busy", "role", "scan", -1);
    2768          24 :   return 0;
    2769             : }
    2770             : 
    2771             : 
    2772             : 
    2773             : // The thread that traverses all the source_paths and enqueues all the
    2774             : // matching files into the file/archive scan queue.
    2775             : static void
    2776          10 : scan_source_paths()
    2777             : {
    2778             :   // NB: fedora 31 glibc/fts(3) crashes inside fts_read() on empty
    2779             :   // path list.
    2780          10 :   if (source_paths.empty())
    2781           1 :     return;
    2782             : 
    2783             :   // Turn the source_paths into an fts(3)-compatible char**.  Since
    2784             :   // source_paths[] does not change after argv processing, the
    2785             :   // c_str()'s are safe to keep around awile.
    2786          18 :   vector<const char *> sps;
    2787          43 :   for (auto&& sp: source_paths)
    2788          34 :     sps.push_back(sp.c_str());
    2789           9 :   sps.push_back(NULL);
    2790             : 
    2791           9 :   FTS *fts = fts_open ((char * const *)sps.data(),
    2792             :                       (traverse_logical ? FTS_LOGICAL : FTS_PHYSICAL|FTS_XDEV)
    2793             :                       | FTS_NOCHDIR /* multithreaded */,
    2794             :                       NULL);
    2795           9 :   if (fts == NULL)
    2796           0 :     throw libc_exception(errno, "cannot fts_open");
    2797           9 :   defer_dtor<FTS*,int> fts_cleanup (fts, fts_close);
    2798             : 
    2799             :   struct timeval tv_start, tv_end;
    2800           9 :   gettimeofday (&tv_start, NULL);
    2801           9 :   unsigned fts_scanned = 0, fts_regex = 0;
    2802             : 
    2803             :   FTSENT *f;
    2804         267 :   while ((f = fts_read (fts)) != NULL)
    2805             :   {
    2806         258 :     if (interrupted) break;
    2807             : 
    2808         258 :     if (sigusr2 != forced_groom_count) // stop early if groom triggered 
    2809             :       {
    2810           0 :         scanq.clear(); // clear previously issued work for scanner threads
    2811           0 :         break;
    2812             :       }
    2813             :     
    2814         258 :     fts_scanned ++;
    2815             : 
    2816         258 :     if (verbose > 2)
    2817           0 :       obatched(clog) << "fts traversing " << f->fts_path << endl;
    2818             : 
    2819         258 :     switch (f->fts_info)
    2820             :       {
    2821         122 :       case FTS_F:
    2822             :         {
    2823             :           /* Found a file.  Convert it to an absolute path, so
    2824             :              the buildid database does not have relative path
    2825             :              names that are unresolvable from a subsequent run
    2826             :              in a different cwd. */
    2827         122 :           char *rp = realpath(f->fts_path, NULL);
    2828         122 :           if (rp == NULL)
    2829           0 :             continue; // ignore dangling symlink or such
    2830         244 :           string rps = string(rp);
    2831         122 :           free (rp);
    2832             :           
    2833         122 :           bool ri = !regexec (&file_include_regex, rps.c_str(), 0, 0, 0);
    2834         122 :           bool rx = !regexec (&file_exclude_regex, rps.c_str(), 0, 0, 0);
    2835         122 :           if (!ri || rx)
    2836             :             {
    2837           0 :               if (verbose > 3)
    2838           0 :                 obatched(clog) << "fts skipped by regex "
    2839           0 :                                << (!ri ? "I" : "") << (rx ? "X" : "") << endl;
    2840           0 :               fts_regex ++;
    2841           0 :               if (!ri)
    2842           0 :                 inc_metric("traversed_total","type","file-skipped-I");
    2843           0 :               if (rx)
    2844           0 :                 inc_metric("traversed_total","type","file-skipped-X");
    2845             :             }
    2846             :           else
    2847             :             {
    2848         122 :               scanq.push_back (make_pair(rps, *f->fts_statp));
    2849         122 :               inc_metric("traversed_total","type","file");
    2850         122 :             }
    2851             :         }
    2852         122 :         break;
    2853             : 
    2854           0 :       case FTS_ERR:
    2855             :       case FTS_NS:
    2856             :         // report on some types of errors because they may reflect fixable misconfiguration
    2857             :         {
    2858           0 :           auto x = libc_exception(f->fts_errno, string("fts traversal ") + string(f->fts_path));
    2859           0 :           x.report(cerr);
    2860             :         }
    2861           0 :         inc_metric("traversed_total","type","error");
    2862           0 :         break;
    2863             : 
    2864           8 :       case FTS_SL: // ignore, but count because debuginfod -L would traverse these
    2865           8 :         inc_metric("traversed_total","type","symlink");
    2866           8 :         break;
    2867             : 
    2868          64 :       case FTS_D: // ignore
    2869          64 :         inc_metric("traversed_total","type","directory");
    2870          64 :         break;
    2871             :         
    2872          64 :       default: // ignore
    2873          64 :         inc_metric("traversed_total","type","other");
    2874          64 :         break;
    2875             :       }
    2876             :   }
    2877           9 :   gettimeofday (&tv_end, NULL);
    2878           9 :   double deltas = (tv_end.tv_sec - tv_start.tv_sec) + (tv_end.tv_usec - tv_start.tv_usec)*0.000001;
    2879             : 
    2880          18 :   obatched(clog) << "fts traversed source paths in " << deltas << "s, scanned=" << fts_scanned
    2881           9 :                  << ", regex-skipped=" << fts_regex << endl;
    2882             : }
    2883             : 
    2884             : 
    2885             : static void*
    2886           3 : thread_main_fts_source_paths (void* arg)
    2887             : {
    2888             :   (void) arg; // ignore; we operate on global data
    2889             : 
    2890           3 :   set_metric("thread_tid", "role","traverse", tid());
    2891           3 :   add_metric("thread_count", "role", "traverse", 1);
    2892             : 
    2893           3 :   time_t last_rescan = 0;
    2894             : 
    2895          18 :   while (! interrupted)
    2896             :     {
    2897          18 :       sleep (1);
    2898          18 :       scanq.wait_idle(); // don't start a new traversal while scanners haven't finished the job
    2899          18 :       scanq.done_idle(); // release the hounds
    2900          18 :       if (interrupted) break;
    2901             : 
    2902          15 :       time_t now = time(NULL);
    2903          15 :       bool rescan_now = false;
    2904          15 :       if (last_rescan == 0) // at least one initial rescan is documented even for -t0
    2905           3 :         rescan_now = true;
    2906          15 :       if (rescan_s > 0 && (long)now > (long)(last_rescan + rescan_s))
    2907           2 :         rescan_now = true;
    2908          15 :       if (sigusr1 != forced_rescan_count)
    2909             :         {
    2910           7 :           forced_rescan_count = sigusr1;
    2911           7 :           rescan_now = true;
    2912             :         }
    2913          15 :       if (rescan_now)
    2914             :         try
    2915             :           {
    2916          10 :             set_metric("thread_busy", "role","traverse", 1);
    2917          10 :             scan_source_paths();
    2918          10 :             last_rescan = time(NULL); // NB: now was before scanning
    2919             :             // finished a traversal loop
    2920          10 :             inc_metric("thread_work_total", "role","traverse");
    2921          10 :             set_metric("thread_busy", "role","traverse", 0);
    2922             :           }
    2923           0 :         catch (const reportable_exception& e)
    2924             :           {
    2925           0 :             e.report(cerr);
    2926             :           }
    2927             :     }
    2928             : 
    2929           3 :   return 0;
    2930             : }
    2931             : 
    2932             : 
    2933             : 
    2934             : ////////////////////////////////////////////////////////////////////////
    2935             : 
    2936             : static void
    2937          10 : database_stats_report()
    2938             : {
    2939             :   sqlite_ps ps_query (db, "database-overview",
    2940          30 :                       "select label,quantity from " BUILDIDS "_stats");
    2941             : 
    2942          10 :   obatched(clog) << "database record counts:" << endl;
    2943             :   while (1)
    2944             :     {
    2945         110 :       int rc = sqlite3_step (ps_query);
    2946         110 :       if (rc == SQLITE_DONE) break;
    2947         100 :       if (rc != SQLITE_ROW)
    2948           0 :         throw sqlite_exception(rc, "step");
    2949             : 
    2950         200 :       obatched(clog)
    2951         200 :         << right << setw(20) << ((const char*) sqlite3_column_text(ps_query, 0) ?: (const char*) "NULL")
    2952             :         << " "
    2953         200 :         << (sqlite3_column_text(ps_query, 1) ?: (const unsigned char*) "NULL")
    2954         100 :         << endl;
    2955             : 
    2956         100 :       set_metric("groom", "statistic",
    2957         100 :                  ((const char*) sqlite3_column_text(ps_query, 0) ?: (const char*) "NULL"),
    2958         100 :                  (sqlite3_column_double(ps_query, 1)));
    2959         100 :     }
    2960          10 : }
    2961             : 
    2962             : 
    2963             : // Do a round of database grooming that might take many minutes to run.
    2964           5 : void groom()
    2965             : {
    2966           5 :   obatched(clog) << "grooming database" << endl;
    2967             : 
    2968             :   struct timeval tv_start, tv_end;
    2969           5 :   gettimeofday (&tv_start, NULL);
    2970             : 
    2971           5 :   database_stats_report();
    2972             :   
    2973             :   // scan for files that have disappeared
    2974             :   sqlite_ps files (db, "check old files", "select s.mtime, s.file, f.name from "
    2975             :                        BUILDIDS "_file_mtime_scanned s, " BUILDIDS "_files f "
    2976          15 :                        "where f.id = s.file");
    2977          15 :   sqlite_ps files_del_f_de (db, "nuke f_de", "delete from " BUILDIDS "_f_de where file = ? and mtime = ?");
    2978          15 :   sqlite_ps files_del_r_de (db, "nuke r_de", "delete from " BUILDIDS "_r_de where file = ? and mtime = ?");
    2979             :   sqlite_ps files_del_scan (db, "nuke f_m_s", "delete from " BUILDIDS "_file_mtime_scanned "
    2980          15 :                             "where file = ? and mtime = ?");
    2981           5 :   files.reset();
    2982             :   while(1)
    2983             :     {
    2984          77 :       int rc = files.step();
    2985          77 :       if (rc != SQLITE_ROW)
    2986           5 :         break;
    2987             : 
    2988          72 :       int64_t mtime = sqlite3_column_int64 (files, 0);
    2989          72 :       int64_t fileid = sqlite3_column_int64 (files, 1);
    2990          72 :       const char* filename = ((const char*) sqlite3_column_text (files, 2) ?: "");
    2991             :       struct stat s;
    2992          72 :       rc = stat(filename, &s);
    2993          72 :       if (rc < 0 || (mtime != (int64_t) s.st_mtime))
    2994             :         {
    2995           4 :           if (verbose > 2)
    2996           0 :             obatched(clog) << "groom: forgetting file=" << filename << " mtime=" << mtime << endl;
    2997           4 :           files_del_f_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
    2998           4 :           files_del_r_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
    2999           4 :           files_del_scan.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
    3000           4 :           inc_metric("groomed_total", "decision", "stale");
    3001             :         }
    3002             :       else
    3003          68 :         inc_metric("groomed_total", "decision", "fresh");
    3004             : 
    3005          72 :       if (sigusr1 != forced_rescan_count) // stop early if scan triggered
    3006           0 :         break;
    3007          72 :     }
    3008           5 :   files.reset();
    3009             : 
    3010             :   // delete buildids with no references in _r_de or _f_de tables;
    3011             :   // cascades to _r_sref & _f_s records
    3012             :   sqlite_ps buildids_del (db, "nuke orphan buildids",
    3013             :                           "delete from " BUILDIDS "_buildids "
    3014             :                           "where not exists (select 1 from " BUILDIDS "_f_de d where " BUILDIDS "_buildids.id = d.buildid) "
    3015          15 :                           "and not exists (select 1 from " BUILDIDS "_r_de d where " BUILDIDS "_buildids.id = d.buildid)");
    3016           5 :   buildids_del.reset().step_ok_done();
    3017             : 
    3018             :   // NB: "vacuum" is too heavy for even daily runs: it rewrites the entire db, so is done as maxigroom -G
    3019          15 :   sqlite_ps g1 (db, "incremental vacuum", "pragma incremental_vacuum");
    3020           5 :   g1.reset().step_ok_done();
    3021          15 :   sqlite_ps g2 (db, "optimize", "pragma optimize");
    3022           5 :   g2.reset().step_ok_done();
    3023          10 :   sqlite_ps g3 (db, "wal checkpoint", "pragma wal_checkpoint=truncate");
    3024           5 :   g3.reset().step_ok_done();
    3025             : 
    3026           5 :   database_stats_report();
    3027             : 
    3028           5 :   sqlite3_db_release_memory(db); // shrink the process if possible
    3029             : 
    3030           5 :   fdcache.limit(0,0); // release the fdcache contents
    3031           5 :   fdcache.limit(fdcache_fds,fdcache_mbs); // restore status quo parameters
    3032             : 
    3033           5 :   gettimeofday (&tv_end, NULL);
    3034           5 :   double deltas = (tv_end.tv_sec - tv_start.tv_sec) + (tv_end.tv_usec - tv_start.tv_usec)*0.000001;
    3035             : 
    3036           5 :   obatched(clog) << "groomed database in " << deltas << "s" << endl;
    3037           5 : }
    3038             : 
    3039             : 
    3040             : static void*
    3041           3 : thread_main_groom (void* /*arg*/)
    3042             : {
    3043           3 :   set_metric("thread_tid", "role", "groom", tid());
    3044           3 :   add_metric("thread_count", "role", "groom", 1);
    3045             : 
    3046           3 :   time_t last_groom = 0;
    3047             : 
    3048             :   while (1)
    3049             :     {
    3050          18 :       sleep (1);
    3051          18 :       scanq.wait_idle(); // PR25394: block scanners during grooming!
    3052          18 :       if (interrupted) break;
    3053             : 
    3054          15 :       time_t now = time(NULL);
    3055          15 :       bool groom_now = false;
    3056          15 :       if (last_groom == 0) // at least one initial groom is documented even for -g0
    3057           3 :         groom_now = true;
    3058          15 :       if (groom_s > 0 && (long)now > (long)(last_groom + groom_s))
    3059           2 :         groom_now = true;
    3060          15 :       if (sigusr2 != forced_groom_count)
    3061             :         {
    3062           2 :           forced_groom_count = sigusr2;
    3063           2 :           groom_now = true;
    3064             :         }
    3065          15 :       if (groom_now)
    3066             :         try
    3067             :           {
    3068           5 :             set_metric("thread_busy", "role", "groom", 1);
    3069           5 :             groom ();
    3070           5 :             last_groom = time(NULL); // NB: now was before grooming
    3071             :             // finished a grooming loop
    3072           5 :             inc_metric("thread_work_total", "role", "groom");
    3073           5 :             set_metric("thread_busy", "role", "groom", 0);
    3074             :           }
    3075           0 :         catch (const sqlite_exception& e)
    3076             :           {
    3077           0 :             obatched(cerr) << e.message << endl;
    3078             :           }
    3079             : 
    3080          15 :       scanq.done_idle();
    3081          15 :     }
    3082             : 
    3083           3 :   return 0;
    3084             : }
    3085             : 
    3086             : 
    3087             : ////////////////////////////////////////////////////////////////////////
    3088             : 
    3089             : 
    3090             : static void
    3091           3 : signal_handler (int /* sig */)
    3092             : {
    3093           3 :   interrupted ++;
    3094             : 
    3095           3 :   if (db)
    3096           3 :     sqlite3_interrupt (db);
    3097             : 
    3098             :   // NB: don't do anything else in here
    3099           3 : }
    3100             : 
    3101             : static void
    3102           7 : sigusr1_handler (int /* sig */)
    3103             : {
    3104           7 :    sigusr1 ++;
    3105             :   // NB: don't do anything else in here
    3106           7 : }
    3107             : 
    3108             : static void
    3109           2 : sigusr2_handler (int /* sig */)
    3110             : {
    3111           2 :    sigusr2 ++;
    3112             :   // NB: don't do anything else in here
    3113           2 : }
    3114             : 
    3115             : 
    3116             : 
    3117             : 
    3118             : 
    3119             : // A user-defined sqlite function, to score the sharedness of the
    3120             : // prefix of two strings.  This is used to compare candidate debuginfo
    3121             : // / source-rpm names, so that the closest match
    3122             : // (directory-topology-wise closest) is found.  This is important in
    3123             : // case the same sref (source file name) is in many -debuginfo or
    3124             : // -debugsource RPMs, such as when multiple versions/releases of the
    3125             : // same package are in the database.
    3126             : 
    3127         105 : static void sqlite3_sharedprefix_fn (sqlite3_context* c, int argc, sqlite3_value** argv)
    3128             : {
    3129         105 :   if (argc != 2)
    3130           0 :     sqlite3_result_error(c, "expect 2 string arguments", -1);
    3131         210 :   else if ((sqlite3_value_type(argv[0]) != SQLITE_TEXT) ||
    3132         105 :            (sqlite3_value_type(argv[1]) != SQLITE_TEXT))
    3133           3 :     sqlite3_result_null(c);
    3134             :   else
    3135             :     {
    3136         102 :       const unsigned char* a = sqlite3_value_text (argv[0]);
    3137         102 :       const unsigned char* b = sqlite3_value_text (argv[1]);
    3138         102 :       int i = 0;
    3139        8041 :       while (*a++ == *b++)
    3140        7939 :         i++;
    3141         102 :       sqlite3_result_int (c, i);
    3142             :     }
    3143         105 : }
    3144             : 
    3145             : 
    3146             : int
    3147           3 : main (int argc, char *argv[])
    3148             : {
    3149           3 :   (void) setlocale (LC_ALL, "");
    3150           3 :   (void) bindtextdomain (PACKAGE_TARNAME, LOCALEDIR);
    3151           3 :   (void) textdomain (PACKAGE_TARNAME);
    3152             : 
    3153             :   /* Tell the library which version we are expecting.  */
    3154           3 :   elf_version (EV_CURRENT);
    3155             : 
    3156           3 :   tmpdir = string(getenv("TMPDIR") ?: "/tmp");
    3157             : 
    3158             :   /* Set computed default values. */
    3159           3 :   db_path = string(getenv("HOME") ?: "/") + string("/.debuginfod.sqlite"); /* XDG? */
    3160           3 :   int rc = regcomp (& file_include_regex, ".*", REG_EXTENDED|REG_NOSUB); // match everything
    3161           3 :   if (rc != 0)
    3162             :     error (EXIT_FAILURE, 0, "regcomp failure: %d", rc);
    3163           3 :   rc = regcomp (& file_exclude_regex, "^$", REG_EXTENDED|REG_NOSUB); // match nothing
    3164           3 :   if (rc != 0)
    3165             :     error (EXIT_FAILURE, 0, "regcomp failure: %d", rc);
    3166             : 
    3167             :   // default parameters for fdcache are computed from system stats
    3168             :   struct statfs sfs;
    3169           3 :   rc = statfs(tmpdir.c_str(), &sfs);
    3170           3 :   if (rc < 0)
    3171           0 :     fdcache_mbs = 1024; // 1 gigabyte
    3172             :   else
    3173           3 :     fdcache_mbs = sfs.f_bavail * sfs.f_bsize / 1024 / 1024 / 4; // 25% of free space
    3174           3 :   fdcache_prefetch = 64; // guesstimate storage is this much less costly than re-decompression
    3175           3 :   fdcache_fds = (concurrency + fdcache_prefetch) * 2;
    3176             : 
    3177             :   /* Parse and process arguments.  */
    3178             :   int remaining;
    3179           3 :   argp_program_version_hook = print_version; // this works
    3180           3 :   (void) argp_parse (&argp, argc, argv, ARGP_IN_ORDER, &remaining, NULL);
    3181           3 :   if (remaining != argc)
    3182           0 :       error (EXIT_FAILURE, 0,
    3183           0 :              "unexpected argument: %s", argv[remaining]);
    3184             : 
    3185           3 :   if (scan_archives.size()==0 && !scan_files && source_paths.size()>0)
    3186           0 :     obatched(clog) << "warning: without -F -R -U -Z, ignoring PATHs" << endl;
    3187             : 
    3188           3 :   fdcache.limit(fdcache_fds, fdcache_mbs);
    3189             : 
    3190           3 :   (void) signal (SIGPIPE, SIG_IGN); // microhttpd can generate it incidentally, ignore
    3191           3 :   (void) signal (SIGINT, signal_handler); // ^C
    3192           3 :   (void) signal (SIGHUP, signal_handler); // EOF
    3193           3 :   (void) signal (SIGTERM, signal_handler); // systemd
    3194           3 :   (void) signal (SIGUSR1, sigusr1_handler); // end-user
    3195           3 :   (void) signal (SIGUSR2, sigusr2_handler); // end-user
    3196             : 
    3197             :   /* Get database ready. */
    3198           3 :   rc = sqlite3_open_v2 (db_path.c_str(), &db, (SQLITE_OPEN_READWRITE
    3199             :                                                |SQLITE_OPEN_CREATE
    3200             :                                                |SQLITE_OPEN_FULLMUTEX), /* thread-safe */
    3201             :                         NULL);
    3202           3 :   if (rc == SQLITE_CORRUPT)
    3203             :     {
    3204           0 :       (void) unlink (db_path.c_str());
    3205           0 :       error (EXIT_FAILURE, 0,
    3206             :              "cannot open %s, deleted database: %s", db_path.c_str(), sqlite3_errmsg(db));
    3207             :     }
    3208           3 :   else if (rc)
    3209             :     {
    3210           0 :       error (EXIT_FAILURE, 0,
    3211             :              "cannot open %s, consider deleting database: %s", db_path.c_str(), sqlite3_errmsg(db));
    3212             :     }
    3213             : 
    3214           3 :   obatched(clog) << "opened database " << db_path << endl;
    3215           3 :   obatched(clog) << "sqlite version " << sqlite3_version << endl;
    3216             : 
    3217             :   // add special string-prefix-similarity function used in rpm sref/sdef resolution
    3218           3 :   rc = sqlite3_create_function(db, "sharedprefix", 2, SQLITE_UTF8, NULL,
    3219             :                                & sqlite3_sharedprefix_fn, NULL, NULL);
    3220           3 :   if (rc != SQLITE_OK)
    3221           0 :     error (EXIT_FAILURE, 0,
    3222             :            "cannot create sharedprefix( function: %s", sqlite3_errmsg(db));
    3223             : 
    3224           3 :   if (verbose > 3)
    3225           0 :     obatched(clog) << "ddl: " << DEBUGINFOD_SQLITE_DDL << endl;
    3226           3 :   rc = sqlite3_exec (db, DEBUGINFOD_SQLITE_DDL, NULL, NULL, NULL);
    3227           3 :   if (rc != SQLITE_OK)
    3228             :     {
    3229           0 :       error (EXIT_FAILURE, 0,
    3230             :              "cannot run database schema ddl: %s", sqlite3_errmsg(db));
    3231             :     }
    3232             : 
    3233             :   // Start httpd server threads.  Separate pool for IPv4 and IPv6, in
    3234             :   // case the host only has one protocol stack.
    3235           3 :   MHD_Daemon *d4 = MHD_start_daemon (MHD_USE_THREAD_PER_CONNECTION
    3236             : #if MHD_VERSION >= 0x00095300
    3237             :                                      | MHD_USE_INTERNAL_POLLING_THREAD
    3238             : #else
    3239             :                                      | MHD_USE_SELECT_INTERNALLY
    3240             : #endif
    3241             :                                      | MHD_USE_DEBUG, /* report errors to stderr */
    3242             :                                      http_port,
    3243             :                                      NULL, NULL, /* default accept policy */
    3244             :                                      handler_cb, NULL, /* handler callback */
    3245             :                                      MHD_OPTION_END);
    3246           3 :   MHD_Daemon *d6 = MHD_start_daemon (MHD_USE_THREAD_PER_CONNECTION
    3247             : #if MHD_VERSION >= 0x00095300
    3248             :                                      | MHD_USE_INTERNAL_POLLING_THREAD
    3249             : #else
    3250             :                                      | MHD_USE_SELECT_INTERNALLY
    3251             : #endif
    3252             :                                      | MHD_USE_IPv6
    3253             :                                      | MHD_USE_DEBUG, /* report errors to stderr */
    3254             :                                      http_port,
    3255             :                                      NULL, NULL, /* default accept policy */
    3256             :                                      handler_cb, NULL, /* handler callback */
    3257             :                                      MHD_OPTION_END);
    3258             : 
    3259           3 :   if (d4 == NULL && d6 == NULL) // neither ipv4 nor ipv6? boo
    3260             :     {
    3261           0 :       sqlite3 *database = db;
    3262           0 :       db = 0; // for signal_handler not to freak
    3263           0 :       sqlite3_close (database);
    3264           0 :       error (EXIT_FAILURE, 0, "cannot start http server at port %d", http_port);
    3265             :     }
    3266             : 
    3267           6 :   obatched(clog) << "started http server on "
    3268             :                  << (d4 != NULL ? "IPv4 " : "")
    3269             :                  << (d6 != NULL ? "IPv6 " : "")
    3270           3 :                  << "port=" << http_port << endl;
    3271             : 
    3272             :   // add maxigroom sql if -G given
    3273           3 :   if (maxigroom)
    3274             :     {
    3275           0 :       obatched(clog) << "maxigrooming database, please wait." << endl;
    3276           0 :       extra_ddl.push_back("create index if not exists " BUILDIDS "_r_sref_arc on " BUILDIDS "_r_sref(artifactsrc);");
    3277           0 :       extra_ddl.push_back("delete from " BUILDIDS "_r_sdef where not exists (select 1 from " BUILDIDS "_r_sref b where " BUILDIDS "_r_sdef.content = b.artifactsrc);");
    3278           0 :       extra_ddl.push_back("drop index if exists " BUILDIDS "_r_sref_arc;");
    3279             : 
    3280             :       // NB: we don't maxigroom the _files interning table.  It'd require a temp index on all the
    3281             :       // tables that have file foreign-keys, which is a lot.
    3282             : 
    3283             :       // NB: with =delete, may take up 3x disk space total during vacuum process
    3284             :       //     vs.  =off (only 2x but may corrupt database if program dies mid-vacuum)
    3285             :       //     vs.  =wal (>3x observed, but safe)
    3286           0 :       extra_ddl.push_back("pragma journal_mode=delete;");
    3287           0 :       extra_ddl.push_back("vacuum;");
    3288           0 :       extra_ddl.push_back("pragma journal_mode=wal;");
    3289             :     }
    3290             : 
    3291             :   // run extra -D sql if given
    3292           3 :   for (auto&& i: extra_ddl)
    3293             :     {
    3294           0 :       if (verbose > 1)
    3295           0 :         obatched(clog) << "extra ddl:\n" << i << endl;
    3296           0 :       rc = sqlite3_exec (db, i.c_str(), NULL, NULL, NULL);
    3297           0 :       if (rc != SQLITE_OK && rc != SQLITE_DONE && rc != SQLITE_ROW)
    3298           0 :         error (0, 0,
    3299             :                "warning: cannot run database extra ddl %s: %s", i.c_str(), sqlite3_errmsg(db));
    3300             :     }
    3301             : 
    3302           3 :   if (maxigroom)
    3303           0 :     obatched(clog) << "maxigroomed database" << endl;
    3304             : 
    3305           3 :   obatched(clog) << "search concurrency " << concurrency << endl;
    3306           3 :   obatched(clog) << "rescan time " << rescan_s << endl;
    3307           3 :   obatched(clog) << "fdcache fds " << fdcache_fds << endl;
    3308           3 :   obatched(clog) << "fdcache mbs " << fdcache_mbs << endl;
    3309           3 :   obatched(clog) << "fdcache prefetch " << fdcache_prefetch << endl;
    3310           3 :   obatched(clog) << "fdcache tmpdir " << tmpdir << endl;
    3311           3 :   obatched(clog) << "groom time " << groom_s << endl;
    3312           3 :   if (scan_archives.size()>0)
    3313             :     {
    3314           6 :       obatched ob(clog);
    3315           3 :       auto& o = ob << "scanning archive types ";
    3316          10 :       for (auto&& arch : scan_archives)
    3317           7 :         o << arch.first << "(" << arch.second << ") ";
    3318           3 :       o << endl;
    3319             :     }
    3320           3 :   const char* du = getenv(DEBUGINFOD_URLS_ENV_VAR);
    3321           3 :   if (du && du[0] != '\0') // set to non-empty string?
    3322           2 :     obatched(clog) << "upstream debuginfod servers: " << du << endl;
    3323             : 
    3324           3 :   vector<pthread_t> all_threads;
    3325             : 
    3326             :   pthread_t pt;
    3327           3 :   rc = pthread_create (& pt, NULL, thread_main_groom, NULL);
    3328           3 :   if (rc < 0)
    3329             :     error (0, 0, "warning: cannot spawn thread (%d) to groom database\n", rc);
    3330             :   else
    3331           3 :     all_threads.push_back(pt);
    3332             : 
    3333           3 :   if (scan_files || scan_archives.size() > 0)
    3334             :     {
    3335           3 :       pthread_create (& pt, NULL, thread_main_fts_source_paths, NULL);
    3336           3 :       if (rc < 0)
    3337             :         error (0, 0, "warning: cannot spawn thread (%d) to traverse source paths\n", rc);
    3338           3 :       all_threads.push_back(pt);
    3339          15 :       for (unsigned i=0; i<concurrency; i++)
    3340             :         {
    3341          12 :           pthread_create (& pt, NULL, thread_main_scanner, NULL);
    3342          12 :           if (rc < 0)
    3343             :             error (0, 0, "warning: cannot spawn thread (%d) to scan source files / archives\n", rc);
    3344          12 :           all_threads.push_back(pt);
    3345             :         }
    3346             :     }
    3347             : 
    3348             :   /* Trivial main loop! */
    3349           3 :   set_metric("ready", 1);
    3350          15 :   while (! interrupted)
    3351          12 :     pause ();
    3352           3 :   scanq.nuke(); // wake up any remaining scanq-related threads, let them die
    3353           3 :   set_metric("ready", 0);
    3354             : 
    3355           3 :   if (verbose)
    3356           1 :     obatched(clog) << "stopping" << endl;
    3357             : 
    3358             :   /* Join all our threads. */
    3359          21 :   for (auto&& it : all_threads)
    3360          18 :     pthread_join (it, NULL);
    3361             : 
    3362             :   /* Stop all the web service threads. */
    3363           3 :   if (d4) MHD_stop_daemon (d4);
    3364           3 :   if (d6) MHD_stop_daemon (d6);
    3365             : 
    3366             :   /* With all threads known dead, we can clean up the global resources. */
    3367           3 :   rc = sqlite3_exec (db, DEBUGINFOD_SQLITE_CLEANUP_DDL, NULL, NULL, NULL);
    3368           3 :   if (rc != SQLITE_OK)
    3369             :     {
    3370           0 :       error (0, 0,
    3371             :              "warning: cannot run database cleanup ddl: %s", sqlite3_errmsg(db));
    3372             :     }
    3373             : 
    3374             :   // NB: no problem with unconditional free here - an earlier failed regcomp would exit program
    3375           3 :   (void) regfree (& file_include_regex);
    3376           3 :   (void) regfree (& file_exclude_regex);
    3377             : 
    3378           3 :   sqlite3 *database = db;
    3379           3 :   db = 0; // for signal_handler not to freak
    3380           3 :   (void) sqlite3_close (database);
    3381             : 
    3382           3 :   return 0;
    3383             : }

Generated by: LCOV version 1.13