LCOV - code coverage report
Current view: top level - debuginfod - debuginfod.cxx (source / functions) Hit Total Coverage
Test: elfutils-0.178 Lines: 903 1162 77.7 %
Date: 2019-11-26 23:55:16 Functions: 70 88 79.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* Debuginfo-over-http server.
       2             :    Copyright (C) 2019 Red Hat, Inc.
       3             :    This file is part of elfutils.
       4             : 
       5             :    This file is free software; you can redistribute it and/or modify
       6             :    it under the terms of the GNU General Public License as published by
       7             :    the Free Software Foundation; either version 3 of the License, or
       8             :    (at your option) any later version.
       9             : 
      10             :    elfutils is distributed in the hope that it will be useful, but
      11             :    WITHOUT ANY WARRANTY; without even the implied warranty of
      12             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13             :    GNU General Public License for more details.
      14             : 
      15             :    You should have received a copy of the GNU General Public License
      16             :    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
      17             : 
      18             : 
      19             : /* cargo-cult from libdwfl linux-kernel-modules.c */
      20             : /* In case we have a bad fts we include this before config.h because it
      21             :    can't handle _FILE_OFFSET_BITS.
      22             :    Everything we need here is fine if its declarations just come first.
      23             :    Also, include sys/types.h before fts. On some systems fts.h is not self
      24             :    contained. */
      25             : #ifdef BAD_FTS
      26             :   #include <sys/types.h>
      27             :   #include <fts.h>
      28             : #endif
      29             : 
      30             : #ifdef HAVE_CONFIG_H
      31             :   #include "config.h"
      32             : #endif
      33             : 
      34             : extern "C" {
      35             : #include "printversion.h"
      36             : }
      37             : 
      38             : #include "debuginfod.h"
      39             : #include <dwarf.h>
      40             : 
      41             : #include <argp.h>
      42             : #ifdef __GNUC__
      43             : #undef __attribute__ /* glibc bug - rhbz 1763325 */
      44             : #endif
      45             : 
      46             : #include <unistd.h>
      47             : #include <stdlib.h>
      48             : #include <error.h>
      49             : // #include <libintl.h> // not until it supports C++ << better
      50             : #include <locale.h>
      51             : #include <pthread.h>
      52             : #include <signal.h>
      53             : #include <sys/stat.h>
      54             : #include <sys/time.h>
      55             : #include <unistd.h>
      56             : #include <fcntl.h>
      57             : #include <netdb.h>
      58             : 
      59             : 
      60             : /* If fts.h is included before config.h, its indirect inclusions may not
      61             :    give us the right LFS aliases of these functions, so map them manually.  */
      62             : #ifdef BAD_FTS
      63             :   #ifdef _FILE_OFFSET_BITS
      64             :     #define open open64
      65             :     #define fopen fopen64
      66             :   #endif
      67             : #else
      68             :   #include <sys/types.h>
      69             :   #include <fts.h>
      70             : #endif
      71             : 
      72             : #include <cstring>
      73             : #include <vector>
      74             : #include <set>
      75             : #include <map>
      76             : #include <string>
      77             : #include <iostream>
      78             : #include <iomanip>
      79             : #include <ostream>
      80             : #include <sstream>
      81             : #include <mutex>
      82             : #include <condition_variable>
      83             : #include <thread>
      84             : // #include <regex> // on rhel7 gcc 4.8, not competent
      85             : #include <regex.h>
      86             : // #include <algorithm>
      87             : using namespace std;
      88             : 
      89             : #include <gelf.h>
      90             : #include <libdwelf.h>
      91             : 
      92             : #include <microhttpd.h>
      93             : #include <curl/curl.h>
      94             : #include <archive.h>
      95             : #include <archive_entry.h>
      96             : #include <sqlite3.h>
      97             : 
      98             : #ifdef __linux__
      99             : #include <sys/syscall.h>
     100             : #endif
     101             : 
     102             : #ifdef __linux__
     103             : #define tid() syscall(SYS_gettid)
     104             : #else
     105             : #define tid() pthread_self()
     106             : #endif
     107             : 
     108             : 
     109             : // Roll this identifier for every sqlite schema incompatiblity.
     110             : #define BUILDIDS "buildids9"
     111             : 
     112             : #if SQLITE_VERSION_NUMBER >= 3008000
     113             : #define WITHOUT_ROWID "without rowid"
     114             : #else
     115             : #define WITHOUT_ROWID ""
     116             : #endif
     117             : 
     118             : static const char DEBUGINFOD_SQLITE_DDL[] =
     119             :   "pragma foreign_keys = on;\n"
     120             :   "pragma synchronous = 0;\n" // disable fsync()s - this cache is disposable across a machine crash
     121             :   "pragma journal_mode = wal;\n" // https://sqlite.org/wal.html
     122             :   "pragma wal_checkpoint = truncate;\n" // clean out any preexisting wal file
     123             :   "pragma journal_size_limit = 0;\n" // limit steady state file (between grooming, which also =truncate's)
     124             :   "pragma auto_vacuum = incremental;\n" // https://sqlite.org/pragma.html
     125             :   "pragma busy_timeout = 1000;\n" // https://sqlite.org/pragma.html
     126             :   // NB: all these are overridable with -D option
     127             : 
     128             :   // Normalization table for interning file names
     129             :   "create table if not exists " BUILDIDS "_files (\n"
     130             :   "        id integer primary key not null,\n"
     131             :   "        name text unique not null\n"
     132             :   "        );\n"
     133             :   // Normalization table for interning buildids
     134             :   "create table if not exists " BUILDIDS "_buildids (\n"
     135             :   "        id integer primary key not null,\n"
     136             :   "        hex text unique not null);\n"
     137             :   // Track the completion of scanning of a given file & sourcetype at given time
     138             :   "create table if not exists " BUILDIDS "_file_mtime_scanned (\n"
     139             :   "        mtime integer not null,\n"
     140             :   "        file integer not null,\n"
     141             :   "        size integer not null,\n" // in bytes
     142             :   "        sourcetype text(1) not null\n"
     143             :   "            check (sourcetype IN ('F', 'R')),\n"
     144             :   "        foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
     145             :   "        primary key (file, mtime, sourcetype)\n"
     146             :   "        ) " WITHOUT_ROWID ";\n"
     147             :   "create table if not exists " BUILDIDS "_f_de (\n"
     148             :   "        buildid integer not null,\n"
     149             :   "        debuginfo_p integer not null,\n"
     150             :   "        executable_p integer not null,\n"
     151             :   "        file integer not null,\n"
     152             :   "        mtime integer not null,\n"
     153             :   "        foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
     154             :   "        foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
     155             :   "        primary key (buildid, file, mtime)\n"
     156             :   "        ) " WITHOUT_ROWID ";\n"
     157             :   "create table if not exists " BUILDIDS "_f_s (\n"
     158             :   "        buildid integer not null,\n"
     159             :   "        artifactsrc integer not null,\n"
     160             :   "        file integer not null,\n" // NB: not necessarily entered into _mtime_scanned
     161             :   "        mtime integer not null,\n"
     162             :   "        foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
     163             :   "        foreign key (artifactsrc) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
     164             :   "        foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
     165             :   "        primary key (buildid, artifactsrc, file, mtime)\n"
     166             :   "        ) " WITHOUT_ROWID ";\n"
     167             :   "create table if not exists " BUILDIDS "_r_de (\n"
     168             :   "        buildid integer not null,\n"
     169             :   "        debuginfo_p integer not null,\n"
     170             :   "        executable_p integer not null,\n"
     171             :   "        file integer not null,\n"
     172             :   "        mtime integer not null,\n"
     173             :   "        content integer not null,\n"
     174             :   "        foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
     175             :   "        foreign key (content) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
     176             :   "        foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
     177             :   "        primary key (buildid, debuginfo_p, executable_p, file, content, mtime)\n"
     178             :   "        ) " WITHOUT_ROWID ";\n"
     179             :   "create table if not exists " BUILDIDS "_r_sref (\n" // outgoing dwarf sourcefile references from rpm
     180             :   "        buildid integer not null,\n"
     181             :   "        artifactsrc integer not null,\n"
     182             :   "        foreign key (artifactsrc) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
     183             :   "        foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
     184             :   "        primary key (buildid, artifactsrc)\n"
     185             :   "        ) " WITHOUT_ROWID ";\n"
     186             :   "create table if not exists " BUILDIDS "_r_sdef (\n" // rpm contents that may satisfy sref
     187             :   "        file integer not null,\n"
     188             :   "        mtime integer not null,\n"
     189             :   "        content integer not null,\n"
     190             :   "        foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
     191             :   "        foreign key (content) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
     192             :   "        primary key (content, file, mtime)\n"
     193             :   "        ) " WITHOUT_ROWID ";\n"
     194             :   // create views to glue together some of the above tables, for webapi D queries
     195             :   "create view if not exists " BUILDIDS "_query_d as \n"
     196             :   "select\n"
     197             :   "        b.hex as buildid, n.mtime, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1\n"
     198             :   "        from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_f_de n\n"
     199             :   "        where b.id = n.buildid and f0.id = n.file and n.debuginfo_p = 1\n"
     200             :   "union all select\n"
     201             :   "        b.hex as buildid, n.mtime, 'R' as sourcetype, f0.name as source0, n.mtime as mtime, f1.name as source1\n"
     202             :   "        from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files f1, " BUILDIDS "_r_de n\n"
     203             :   "        where b.id = n.buildid and f0.id = n.file and f1.id = n.content and n.debuginfo_p = 1\n"
     204             :   ";"
     205             :   // ... and for E queries
     206             :   "create view if not exists " BUILDIDS "_query_e as \n"
     207             :   "select\n"
     208             :   "        b.hex as buildid, n.mtime, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1\n"
     209             :   "        from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_f_de n\n"
     210             :   "        where b.id = n.buildid and f0.id = n.file and n.executable_p = 1\n"
     211             :   "union all select\n"
     212             :   "        b.hex as buildid, n.mtime, 'R' as sourcetype, f0.name as source0, n.mtime as mtime, f1.name as source1\n"
     213             :   "        from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files f1, " BUILDIDS "_r_de n\n"
     214             :   "        where b.id = n.buildid and f0.id = n.file and f1.id = n.content and n.executable_p = 1\n"
     215             :   ";"
     216             :   // ... and for S queries
     217             :   "create view if not exists " BUILDIDS "_query_s as \n"
     218             :   "select\n"
     219             :   "        b.hex as buildid, fs.name as artifactsrc, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1, null as source0ref\n"
     220             :   "        from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files fs, " BUILDIDS "_f_s n\n"
     221             :   "        where b.id = n.buildid and f0.id = n.file and fs.id = n.artifactsrc\n"
     222             :   "union all select\n"
     223             :   "        b.hex as buildid, f1.name as artifactsrc, 'R' as sourcetype, f0.name as source0, sd.mtime as mtime, f1.name as source1, fsref.name as source0ref\n"
     224             :   "        from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files f1, " BUILDIDS "_files fsref, "
     225             :   "        " BUILDIDS "_r_sdef sd, " BUILDIDS "_r_sref sr, " BUILDIDS "_r_de sde\n"
     226             :   "        where b.id = sr.buildid and f0.id = sd.file and fsref.id = sde.file and f1.id = sd.content\n"
     227             :   "        and sr.artifactsrc = sd.content and sde.buildid = sr.buildid\n"
     228             :   ";"
     229             :   // and for startup overview counts
     230             :   "drop view if exists " BUILDIDS "_stats;\n"
     231             :   "create view if not exists " BUILDIDS "_stats as\n"
     232             :   "          select 'file d/e' as label,count(*) as quantity from " BUILDIDS "_f_de\n"
     233             :   "union all select 'file s',count(*) from " BUILDIDS "_f_s\n"
     234             :   "union all select 'rpm d/e',count(*) from " BUILDIDS "_r_de\n"
     235             :   "union all select 'rpm sref',count(*) from " BUILDIDS "_r_sref\n"
     236             :   "union all select 'rpm sdef',count(*) from " BUILDIDS "_r_sdef\n"
     237             :   "union all select 'buildids',count(*) from " BUILDIDS "_buildids\n"
     238             :   "union all select 'filenames',count(*) from " BUILDIDS "_files\n"
     239             :   "union all select 'files scanned (#)',count(*) from " BUILDIDS "_file_mtime_scanned\n"
     240             :   "union all select 'files scanned (mb)',coalesce(sum(size)/1024/1024,0) from " BUILDIDS "_file_mtime_scanned\n"
     241             : #if SQLITE_VERSION_NUMBER >= 3016000
     242             :   "union all select 'index db size (mb)',page_count*page_size/1024/1024 as size FROM pragma_page_count(), pragma_page_size()\n"
     243             : #endif
     244             :   ";\n"
     245             : 
     246             : // schema change history & garbage collection
     247             : //
     248             : // XXX: we could have migration queries here to bring prior-schema
     249             : // data over instead of just dropping it.
     250             : //
     251             : // buildids9: widen the mtime_scanned table
     252             :   "" // <<< we are here
     253             : // buildids8: slim the sref table
     254             :   "drop table if exists buildids8_f_de;\n"
     255             :   "drop table if exists buildids8_f_s;\n"
     256             :   "drop table if exists buildids8_r_de;\n"
     257             :   "drop table if exists buildids8_r_sref;\n"
     258             :   "drop table if exists buildids8_r_sdef;\n"
     259             :   "drop table if exists buildids8_file_mtime_scanned;\n"
     260             :   "drop table if exists buildids8_files;\n"
     261             :   "drop table if exists buildids8_buildids;\n"
     262             : // buildids7: separate _norm table into dense subtype tables
     263             :   "drop table if exists buildids7_f_de;\n"
     264             :   "drop table if exists buildids7_f_s;\n"
     265             :   "drop table if exists buildids7_r_de;\n"
     266             :   "drop table if exists buildids7_r_sref;\n"
     267             :   "drop table if exists buildids7_r_sdef;\n"
     268             :   "drop table if exists buildids7_file_mtime_scanned;\n"
     269             :   "drop table if exists buildids7_files;\n"
     270             :   "drop table if exists buildids7_buildids;\n"
     271             : // buildids6: drop bolo/rfolo again, represent sources / rpmcontents in main table
     272             :   "drop table if exists buildids6_norm;\n"
     273             :   "drop table if exists buildids6_files;\n"
     274             :   "drop table if exists buildids6_buildids;\n"
     275             :   "drop view if exists buildids6;\n"
     276             : // buildids5: redefine srcfile1 column to be '.'-less (for rpms)
     277             :   "drop table if exists buildids5_norm;\n"
     278             :   "drop table if exists buildids5_files;\n"
     279             :   "drop table if exists buildids5_buildids;\n"
     280             :   "drop table if exists buildids5_bolo;\n"
     281             :   "drop table if exists buildids5_rfolo;\n"
     282             :   "drop view if exists buildids5;\n"
     283             : // buildids4: introduce rpmfile RFOLO
     284             :   "drop table if exists buildids4_norm;\n"
     285             :   "drop table if exists buildids4_files;\n"
     286             :   "drop table if exists buildids4_buildids;\n"
     287             :   "drop table if exists buildids4_bolo;\n"
     288             :   "drop table if exists buildids4_rfolo;\n"
     289             :   "drop view if exists buildids4;\n"
     290             : // buildids3*: split out srcfile BOLO
     291             :   "drop table if exists buildids3_norm;\n"
     292             :   "drop table if exists buildids3_files;\n"
     293             :   "drop table if exists buildids3_buildids;\n"
     294             :   "drop table if exists buildids3_bolo;\n"
     295             :   "drop view if exists buildids3;\n"
     296             : // buildids2: normalized buildid and filenames into interning tables;
     297             :   "drop table if exists buildids2_norm;\n"
     298             :   "drop table if exists buildids2_files;\n"
     299             :   "drop table if exists buildids2_buildids;\n"
     300             :   "drop view if exists buildids2;\n"
     301             :   // buildids1: made buildid and artifacttype NULLable, to represent cached-negative
     302             : //           lookups from sources, e.g. files or rpms that contain no buildid-indexable content
     303             :   "drop table if exists buildids1;\n"
     304             : // buildids: original
     305             :   "drop table if exists buildids;\n"
     306             :   ;
     307             : 
     308             : static const char DEBUGINFOD_SQLITE_CLEANUP_DDL[] =
     309             :   "pragma wal_checkpoint = truncate;\n" // clean out any preexisting wal file
     310             :   ;
     311             : 
     312             : 
     313             : 
     314             : 
     315             : /* Name and version of program.  */
     316             : /* ARGP_PROGRAM_VERSION_HOOK_DEF = print_version; */ // not this simple for C++
     317             : 
     318             : /* Bug report address.  */
     319             : ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT;
     320             : 
     321             : /* Definitions of arguments for argp functions.  */
     322             : static const struct argp_option options[] =
     323             :   {
     324             :    { NULL, 0, NULL, 0, "Scanners:", 1 },
     325             :    { "scan-file-dir", 'F', NULL, 0, "Enable ELF/DWARF file scanning threads.", 0 },
     326             :    { "scan-rpm-dir", 'R', NULL, 0, "Enable RPM scanning threads.", 0 },
     327             :    // "source-oci-imageregistry"  ... 
     328             : 
     329             :    { NULL, 0, NULL, 0, "Options:", 2 },
     330             :    { "logical", 'L', NULL, 0, "Follow symlinks, default=ignore.", 0 },
     331             :    { "rescan-time", 't', "SECONDS", 0, "Number of seconds to wait between rescans, 0=disable.", 0 },
     332             :    { "groom-time", 'g', "SECONDS", 0, "Number of seconds to wait between database grooming, 0=disable.", 0 },
     333             :    { "maxigroom", 'G', NULL, 0, "Run a complete database groom/shrink pass at startup.", 0 },
     334             :    { "concurrency", 'c', "NUM", 0, "Limit scanning thread concurrency to NUM.", 0 },
     335             :    { "include", 'I', "REGEX", 0, "Include files matching REGEX, default=all.", 0 },
     336             :    { "exclude", 'X', "REGEX", 0, "Exclude files matching REGEX, default=none.", 0 },
     337             :    { "port", 'p', "NUM", 0, "HTTP port to listen on, default 8002.", 0 },
     338             :    { "database", 'd', "FILE", 0, "Path to sqlite database.", 0 },
     339             :    { "ddl", 'D', "SQL", 0, "Apply extra sqlite ddl/pragma to connection.", 0 },
     340             :    { "verbose", 'v', NULL, 0, "Increase verbosity.", 0 },
     341             : 
     342             :    { NULL, 0, NULL, 0, NULL, 0 }
     343             :   };
     344             : 
     345             : /* Short description of program.  */
     346             : static const char doc[] = "Serve debuginfo-related content across HTTP from files under PATHs.";
     347             : 
     348             : /* Strings for arguments in help texts.  */
     349             : static const char args_doc[] = "[PATH ...]";
     350             : 
     351             : /* Prototype for option handler.  */
     352             : static error_t parse_opt (int key, char *arg, struct argp_state *state);
     353             : 
     354             : /* Data structure to communicate with argp functions.  */
     355             : static struct argp argp =
     356             :   {
     357             :    options, parse_opt, args_doc, doc, NULL, NULL, NULL
     358             :   };
     359             : 
     360             : 
     361             : static string db_path;
     362             : static sqlite3 *db;
     363             : static unsigned verbose;
     364             : static volatile sig_atomic_t interrupted = 0;
     365             : static volatile sig_atomic_t sigusr1 = 0;
     366             : static volatile sig_atomic_t sigusr2 = 0;
     367             : static unsigned http_port = 8002;
     368             : static unsigned rescan_s = 300;
     369             : static unsigned groom_s = 86400;
     370             : static unsigned maxigroom = false;
     371             : static unsigned concurrency = std::thread::hardware_concurrency() ?: 1;
     372             : static set<string> source_paths;
     373             : static bool scan_files = false;
     374             : static bool scan_rpms = false;
     375             : static vector<string> extra_ddl;
     376             : static regex_t file_include_regex;
     377             : static regex_t file_exclude_regex;
     378             : static bool traverse_logical;
     379             : 
     380             : static void set_metric(const string& key, int64_t value);
     381             : // static void inc_metric(const string& key);
     382             : static void set_metric(const string& metric,
     383             :                        const string& lname, const string& lvalue,
     384             :                        int64_t value);
     385             : static void inc_metric(const string& metric,
     386             :                        const string& lname, const string& lvalue);
     387             : static void add_metric(const string& metric,
     388             :                        const string& lname, const string& lvalue,
     389             :                        int64_t value);
     390             : 
     391             : /* Handle program arguments.  */
     392             : static error_t
     393          22 : parse_opt (int key, char *arg,
     394             :            struct argp_state *state __attribute__ ((unused)))
     395             : {
     396             :   int rc;
     397          22 :   switch (key)
     398             :     {
     399           0 :     case 'v': verbose ++; break;
     400           2 :     case 'd': db_path = string(arg); break;
     401           2 :     case 'p': http_port = (unsigned) atoi(arg);
     402           2 :       if (http_port > 65535) argp_failure(state, 1, EINVAL, "port number");
     403           2 :       break;
     404           2 :     case 'F': scan_files = true; break;
     405           1 :     case 'R': scan_rpms = true; break;
     406           1 :     case 'L':
     407           1 :       traverse_logical = true;
     408           1 :       break;
     409           0 :     case 'D': extra_ddl.push_back(string(arg)); break;
     410           1 :     case 't':
     411           1 :       rescan_s = (unsigned) atoi(arg);
     412           1 :       break;
     413           1 :     case 'g':
     414           1 :       groom_s = (unsigned) atoi(arg);
     415           1 :       break;
     416           0 :     case 'G':
     417           0 :       maxigroom = true;
     418           0 :       break;
     419           0 :     case 'c':
     420           0 :       concurrency = (unsigned) atoi(arg);
     421           0 :       if (concurrency < 1) concurrency = 1;
     422           0 :       break;
     423           0 :     case 'I':
     424             :       // NB: no problem with unconditional free here - an earlier failed regcomp would exit program
     425           0 :       regfree (&file_include_regex);
     426           0 :       rc = regcomp (&file_include_regex, arg, REG_EXTENDED|REG_NOSUB);
     427           0 :       if (rc != 0)
     428           0 :         argp_failure(state, 1, EINVAL, "regular expession");
     429           0 :       break;
     430           0 :     case 'X':
     431           0 :       regfree (&file_exclude_regex);
     432           0 :       rc = regcomp (&file_exclude_regex, arg, REG_EXTENDED|REG_NOSUB);
     433           0 :       if (rc != 0)
     434           0 :         argp_failure(state, 1, EINVAL, "regular expession");
     435           0 :       break;
     436           4 :     case ARGP_KEY_ARG:
     437           4 :       source_paths.insert(string(arg));
     438           4 :       break;
     439             :       // case 'h': argp_state_help (state, stderr, ARGP_HELP_LONG|ARGP_HELP_EXIT_OK);
     440           8 :     default: return ARGP_ERR_UNKNOWN;
     441             :     }
     442             : 
     443          14 :   return 0;
     444             : }
     445             : 
     446             : 
     447             : ////////////////////////////////////////////////////////////////////////
     448             : 
     449             : 
     450             : // represent errors that may get reported to an ostream and/or a libmicrohttpd connection
     451             : 
     452             : struct reportable_exception
     453             : {
     454             :   int code;
     455             :   string message;
     456             : 
     457           2 :   reportable_exception(int c, const string& m): code(c), message(m) {}
     458           1 :   reportable_exception(const string& m): code(503), message(m) {}
     459             :   reportable_exception(): code(503), message() {}
     460             : 
     461             :   void report(ostream& o) const; // defined under obatched() class below
     462             : 
     463           3 :   int mhd_send_response(MHD_Connection* c) const {
     464           3 :     MHD_Response* r = MHD_create_response_from_buffer (message.size(),
     465           3 :                                                        (void*) message.c_str(),
     466             :                                                        MHD_RESPMEM_MUST_COPY);
     467           3 :     MHD_add_response_header (r, "Content-Type", "text/plain");
     468           3 :     int rc = MHD_queue_response (c, code, r);
     469           3 :     MHD_destroy_response (r);
     470           3 :     return rc;
     471             :   }
     472             : };
     473             : 
     474             : 
     475             : struct sqlite_exception: public reportable_exception
     476             : {
     477           0 :   sqlite_exception(int rc, const string& msg):
     478           0 :     reportable_exception(string("sqlite3 error: ") + msg + ": " + string(sqlite3_errstr(rc) ?: "?")) {}
     479             : };
     480             : 
     481             : struct libc_exception: public reportable_exception
     482             : {
     483           0 :   libc_exception(int rc, const string& msg):
     484           0 :     reportable_exception(string("libc error: ") + msg + ": " + string(strerror(rc) ?: "?")) {}
     485             : };
     486             : 
     487             : 
     488             : struct archive_exception: public reportable_exception
     489             : {
     490           0 :   archive_exception(const string& msg):
     491           0 :     reportable_exception(string("libarchive error: ") + msg) {}
     492           0 :   archive_exception(struct archive* a, const string& msg):
     493           0 :     reportable_exception(string("libarchive error: ") + msg + ": " + string(archive_error_string(a) ?: "?")) {}
     494             : };
     495             : 
     496             : 
     497             : struct elfutils_exception: public reportable_exception
     498             : {
     499           0 :   elfutils_exception(int rc, const string& msg):
     500           0 :     reportable_exception(string("elfutils error: ") + msg + ": " + string(elf_errmsg(rc) ?: "?")) {}
     501             : };
     502             : 
     503             : 
     504             : ////////////////////////////////////////////////////////////////////////
     505             : 
     506             : // a c++ counting-semaphore class ... since we're c++11 not c++20
     507             : 
     508             : class semaphore
     509             : {
     510             : public:
     511           2 :   semaphore (unsigned c=1): count(c) {}
     512         187 :   inline void notify () {
     513         374 :     unique_lock<mutex> lock(mtx);
     514         187 :     count++;
     515         187 :     cv.notify_one();
     516         187 :   }
     517         187 :   inline void wait() {
     518         187 :     unique_lock<mutex> lock(mtx);
     519         187 :     while (count == 0)
     520           0 :       cv.wait(lock);
     521         187 :     count--;
     522         187 :   }
     523             : private:
     524             :   mutex mtx;
     525             :   condition_variable cv;
     526             :   unsigned count;
     527             : };
     528             : 
     529             : 
     530             : class semaphore_borrower
     531             : {
     532             : public:
     533         187 :   semaphore_borrower(semaphore* s): sem(s) { sem->wait(); }
     534         187 :   ~semaphore_borrower() { sem->notify(); }
     535             : private:
     536             :   semaphore* sem;
     537             : };
     538             : 
     539             : 
     540             : ////////////////////////////////////////////////////////////////////////
     541             : 
     542             : 
     543             : // Print a standard timestamp.
     544             : static ostream&
     545         117 : timestamp (ostream &o)
     546             : {
     547             :   char datebuf[80];
     548         117 :   char *now2 = NULL;
     549         117 :   time_t now_t = time(NULL);
     550         117 :   struct tm *now = gmtime (&now_t);
     551         117 :   if (now)
     552             :     {
     553         117 :       (void) strftime (datebuf, sizeof (datebuf), "%c", now);
     554         117 :       now2 = datebuf;
     555             :     }
     556             : 
     557             :   return o << "[" << (now2 ? now2 : "") << "] "
     558         117 :            << "(" << getpid () << "/" << tid() << "): ";
     559             : }
     560             : 
     561             : 
     562             : // A little class that impersonates an ostream to the extent that it can
     563             : // take << streaming operations.  It batches up the bits into an internal
     564             : // stringstream until it is destroyed; then flushes to the original ostream.
     565             : // It adds a timestamp
     566             : class obatched
     567             : {
     568             : private:
     569             :   ostream& o;
     570             :   stringstream stro;
     571             :   static mutex lock;
     572             : public:
     573         117 :   obatched(ostream& oo, bool timestamp_p = true): o(oo)
     574             :   {
     575         117 :     if (timestamp_p)
     576         117 :       timestamp(stro);
     577         115 :   }
     578         115 :   ~obatched()
     579         117 :   {
     580         232 :     unique_lock<mutex> do_not_cross_the_streams(obatched::lock);
     581         117 :     o << stro.str();
     582         117 :     o.flush();
     583         117 :   }
     584             :   operator ostream& () { return stro; }
     585         117 :   template <typename T> ostream& operator << (const T& t) { stro << t; return stro; }
     586             : };
     587             : mutex obatched::lock; // just the one, since cout/cerr iostreams are not thread-safe
     588             : 
     589             : 
     590           3 : void reportable_exception::report(ostream& o) const {
     591           3 :   obatched(o) << message << endl;
     592           3 : }
     593             : 
     594             : 
     595             : ////////////////////////////////////////////////////////////////////////
     596             : 
     597             : 
     598             : // RAII style sqlite prepared-statement holder that matches { } block lifetime
     599             : 
     600             : struct sqlite_ps
     601             : {
     602             : private:
     603             :   sqlite3* db;
     604             :   const string nickname;
     605             :   const string sql;
     606             :   sqlite3_stmt *pp;
     607             : 
     608             :   sqlite_ps(const sqlite_ps&); // make uncopyable
     609             :   sqlite_ps& operator=(const sqlite_ps &); // make unassignable
     610             : 
     611             : public:
     612         260 :   sqlite_ps (sqlite3* d, const string& n, const string& s): db(d), nickname(n), sql(s) {
     613         260 :     if (verbose > 4)
     614           0 :       obatched(clog) << nickname << " prep " << sql << endl;
     615         260 :     int rc = sqlite3_prepare_v2 (db, sql.c_str(), -1 /* to \0 */, & this->pp, NULL);
     616         260 :     if (rc != SQLITE_OK)
     617           0 :       throw sqlite_exception(rc, "prepare " + sql);
     618         260 :   }
     619             : 
     620        1812 :   sqlite_ps& reset()
     621             :   {
     622        1812 :     sqlite3_reset(this->pp);
     623        1812 :     return *this;
     624             :   }
     625             : 
     626        2741 :   sqlite_ps& bind(int parameter, const string& str)
     627             :   {
     628        2741 :     if (verbose > 4)
     629           0 :       obatched(clog) << nickname << " bind " << parameter << "=" << str << endl;
     630        2741 :     int rc = sqlite3_bind_text (this->pp, parameter, str.c_str(), -1, SQLITE_TRANSIENT);
     631        2741 :     if (rc != SQLITE_OK)
     632           0 :       throw sqlite_exception(rc, "sqlite3 bind");
     633        2741 :     return *this;
     634             :   }
     635             : 
     636         707 :   sqlite_ps& bind(int parameter, int64_t value)
     637             :   {
     638         707 :     if (verbose > 4)
     639           0 :       obatched(clog) << nickname << " bind " << parameter << "=" << value << endl;
     640         707 :     int rc = sqlite3_bind_int64 (this->pp, parameter, value);
     641         707 :     if (rc != SQLITE_OK)
     642           0 :       throw sqlite_exception(rc, "sqlite3 bind");
     643         707 :     return *this;
     644             :   }
     645             : 
     646             :   sqlite_ps& bind(int parameter)
     647             :   {
     648             :     if (verbose > 4)
     649             :       obatched(clog) << nickname << " bind " << parameter << "=" << "NULL" << endl;
     650             :     int rc = sqlite3_bind_null (this->pp, parameter);
     651             :     if (rc != SQLITE_OK)
     652             :       throw sqlite_exception(rc, "sqlite3 bind");
     653             :     return *this;
     654             :   }
     655             : 
     656             : 
     657        1634 :   void step_ok_done() {
     658        1634 :     int rc = sqlite3_step (this->pp);
     659        1634 :     if (verbose > 4)
     660           0 :       obatched(clog) << nickname << " step-ok-done(" << sqlite3_errstr(rc) << ") " << sql << endl;
     661        1634 :     if (rc != SQLITE_OK && rc != SQLITE_DONE && rc != SQLITE_ROW)
     662           0 :       throw sqlite_exception(rc, "sqlite3 step");
     663        1634 :     (void) sqlite3_reset (this->pp);
     664        1634 :   }
     665             : 
     666             : 
     667         133 :   int step() {
     668         133 :     int rc = sqlite3_step (this->pp);
     669         133 :     if (verbose > 4)
     670           0 :       obatched(clog) << nickname << " step(" << sqlite3_errstr(rc) << ") " << sql << endl;
     671         133 :     return rc;
     672             :   }
     673             : 
     674             : 
     675             : 
     676         260 :   ~sqlite_ps () { sqlite3_finalize (this->pp); }
     677         353 :   operator sqlite3_stmt* () { return this->pp; }
     678             : };
     679             : 
     680             : 
     681             : ////////////////////////////////////////////////////////////////////////
     682             : 
     683             : // RAII style templated autocloser
     684             : 
     685             : template <class Payload, class Ignore>
     686             : struct defer_dtor
     687             : {
     688             : public:
     689             :   typedef Ignore (*dtor_fn) (Payload);
     690             : 
     691             : private:
     692             :   Payload p;
     693             :   dtor_fn fn;
     694             : 
     695             : public:
     696         114 :   defer_dtor(Payload _p, dtor_fn _fn): p(_p), fn(_fn) {}
     697         114 :   ~defer_dtor() { (void) (*fn)(p); }
     698             : 
     699             : private:
     700             :   defer_dtor(const defer_dtor<Payload,Ignore>&); // make uncopyable
     701             :   defer_dtor& operator=(const defer_dtor<Payload,Ignore> &); // make unassignable
     702             : };
     703             : 
     704             : 
     705             : 
     706             : ////////////////////////////////////////////////////////////////////////
     707             : 
     708             : 
     709             : 
     710             : 
     711             : 
     712             : static string
     713           0 : conninfo (struct MHD_Connection * conn)
     714             : {
     715             :   char hostname[256]; // RFC1035
     716             :   char servname[256];
     717           0 :   int sts = -1;
     718             : 
     719           0 :   if (conn == 0)
     720           0 :     return "internal";
     721             : 
     722             :   /* Look up client address data. */
     723           0 :   const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn,
     724             :                                                                MHD_CONNECTION_INFO_CLIENT_ADDRESS);
     725           0 :   struct sockaddr *so = u ? u->client_addr : 0;
     726             : 
     727           0 :   if (so && so->sa_family == AF_INET) {
     728           0 :     sts = getnameinfo (so, sizeof (struct sockaddr_in), hostname, sizeof (hostname), servname,
     729             :                        sizeof (servname), NI_NUMERICHOST | NI_NUMERICSERV);
     730           0 :   } else if (so && so->sa_family == AF_INET6) {
     731           0 :     sts = getnameinfo (so, sizeof (struct sockaddr_in6), hostname, sizeof (hostname),
     732             :                        servname, sizeof (servname), NI_NUMERICHOST | NI_NUMERICSERV);
     733             :   }
     734           0 :   if (sts != 0) {
     735           0 :     hostname[0] = servname[0] = '\0';
     736             :   }
     737             : 
     738           0 :   return string(hostname) + string(":") + string(servname);
     739             : }
     740             : 
     741             : 
     742             : 
     743             : ////////////////////////////////////////////////////////////////////////
     744             : 
     745             : static void
     746          30 : add_mhd_last_modified (struct MHD_Response *resp, time_t mtime)
     747             : {
     748          30 :   struct tm *now = gmtime (&mtime);
     749          30 :   if (now != NULL)
     750             :     {
     751             :       char datebuf[80];
     752          30 :       size_t rc = strftime (datebuf, sizeof (datebuf), "%a, %d %b %Y %T GMT", now);
     753          30 :       if (rc > 0 && rc < sizeof (datebuf))
     754          30 :         (void) MHD_add_response_header (resp, "Last-Modified", datebuf);
     755             :     }
     756             : 
     757          30 :   (void) MHD_add_response_header (resp, "Cache-Control", "public");
     758          30 : }
     759             : 
     760             : 
     761             : 
     762             : static struct MHD_Response*
     763           9 : handle_buildid_f_match (int64_t b_mtime,
     764             :                         const string& b_source0,
     765             :                         int *result_fd)
     766             : {
     767           9 :   int fd = open(b_source0.c_str(), O_RDONLY);
     768           9 :   if (fd < 0)
     769             :     {
     770           0 :       if (verbose)
     771           0 :         obatched(clog) << "cannot open " << b_source0 << endl;
     772             :       // if still missing, a periodic groom pass will delete this buildid record
     773           0 :       return 0;
     774             :     }
     775             : 
     776             :   // NB: use manual close(2) in error case instead of defer_dtor, because
     777             :   // in the normal case, we want to hand the fd over to libmicrohttpd for
     778             :   // file transfer.
     779             : 
     780             :   struct stat s;
     781           9 :   int rc = fstat(fd, &s);
     782           9 :   if (rc < 0)
     783             :     {
     784           0 :       if (verbose)
     785           0 :         clog << "cannot fstat " << b_source0 << endl;
     786           0 :       close(fd);
     787           0 :       return 0;
     788             :     }
     789             : 
     790           9 :   if ((int64_t) s.st_mtime != b_mtime)
     791             :     {
     792           0 :       if (verbose)
     793           0 :         obatched(clog) << "mtime mismatch for " << b_source0 << endl;
     794           0 :       close(fd);
     795           0 :       return 0;
     796             :     }
     797             : 
     798           9 :   inc_metric ("http_responses_total","result","file");
     799           9 :   struct MHD_Response* r = MHD_create_response_from_fd ((uint64_t) s.st_size, fd);
     800           9 :   if (r == 0)
     801             :     {
     802           0 :       if (verbose)
     803           0 :         obatched(clog) << "cannot create fd-response for " << b_source0 << endl;
     804           0 :       close(fd);
     805             :     }
     806             :   else
     807             :     {
     808           9 :       MHD_add_response_header (r, "Content-Type", "application/octet-stream");
     809           9 :       add_mhd_last_modified (r, s.st_mtime);
     810           9 :       if (verbose > 1)
     811           0 :         obatched(clog) << "serving file " << b_source0 << endl;
     812             :       /* libmicrohttpd will close it. */
     813           9 :       if (result_fd)
     814           0 :         *result_fd = fd;
     815             :     }
     816             : 
     817           9 :   return r;
     818             : }
     819             : 
     820             : 
     821             : // quote all questionable characters of str for safe passage through a sh -c expansion.
     822             : static string
     823          34 : shell_escape(const string& str)
     824             : {
     825          34 :   string y;
     826        3433 :   for (auto&& x : str)
     827             :     {
     828        3399 :       if (! isalnum(x) && x != '/')
     829         321 :         y += "\\";
     830        3399 :       y += x;
     831             :     }
     832          34 :   return y;
     833             : }
     834             : 
     835             : 
     836             : static struct MHD_Response*
     837          20 : handle_buildid_r_match (int64_t b_mtime,
     838             :                         const string& b_source0,
     839             :                         const string& b_source1,
     840             :                         int *result_fd)
     841             : {
     842             :   struct stat fs;
     843          20 :   int rc = stat (b_source0.c_str(), &fs);
     844          20 :   if (rc != 0)
     845           0 :     throw libc_exception (errno, string("stat ") + b_source0);
     846             : 
     847          20 :   if ((int64_t) fs.st_mtime != b_mtime)
     848             :     {
     849           0 :       if (verbose)
     850           0 :         obatched(clog) << "mtime mismatch for " << b_source0 << endl;
     851           0 :       return 0;
     852             :     }
     853             : 
     854          40 :   string popen_cmd = string("rpm2cpio " + shell_escape(b_source0));
     855          20 :   FILE* fp = popen (popen_cmd.c_str(), "r"); // "e" O_CLOEXEC?
     856          20 :   if (fp == NULL)
     857           0 :     throw libc_exception (errno, string("popen ") + popen_cmd);
     858          40 :   defer_dtor<FILE*,int> fp_closer (fp, pclose);
     859             : 
     860             :   struct archive *a;
     861          20 :   a = archive_read_new();
     862          20 :   if (a == NULL)
     863           0 :     throw archive_exception("cannot create archive reader");
     864          40 :   defer_dtor<struct archive*,int> archive_closer (a, archive_read_free);
     865             : 
     866          20 :   rc = archive_read_support_format_cpio(a);
     867          20 :   if (rc != ARCHIVE_OK)
     868           0 :     throw archive_exception(a, "cannot select cpio format");
     869          20 :   rc = archive_read_support_filter_all(a);
     870          20 :   if (rc != ARCHIVE_OK)
     871           0 :     throw archive_exception(a, "cannot select all filters");
     872             : 
     873          20 :   rc = archive_read_open_FILE (a, fp);
     874          20 :   if (rc != ARCHIVE_OK)
     875           0 :     throw archive_exception(a, "cannot open archive from rpm2cpio pipe");
     876             : 
     877             :   while(1) // parse cpio archive entries
     878             :     {
     879             :       struct archive_entry *e;
     880         186 :       rc = archive_read_next_header (a, &e);
     881         186 :       if (rc != ARCHIVE_OK)
     882           0 :         break;
     883             : 
     884         186 :       if (! S_ISREG(archive_entry_mode (e))) // skip non-files completely
     885         151 :         continue;
     886             : 
     887          35 :       string fn = archive_entry_pathname (e);
     888          35 :       if (fn != string(".")+b_source1)
     889          15 :         continue;
     890             : 
     891             :       // extract this file to a temporary file
     892          20 :       char tmppath[PATH_MAX] = "/tmp/debuginfod.XXXXXX"; // XXX: $TMP_DIR etc.
     893          20 :       int fd = mkstemp (tmppath);
     894          20 :       if (fd < 0)
     895           0 :         throw libc_exception (errno, "cannot create temporary file");
     896          20 :       unlink (tmppath); // unlink now so OS will release the file as soon as we close the fd
     897             : 
     898          20 :       rc = archive_read_data_into_fd (a, fd);
     899          20 :       if (rc != ARCHIVE_OK)
     900             :         {
     901           0 :           close (fd);
     902           0 :           throw archive_exception(a, "cannot extract file");
     903             :         }
     904             : 
     905          20 :       inc_metric ("http_responses_total","result","rpm");
     906          20 :       struct MHD_Response* r = MHD_create_response_from_fd (archive_entry_size(e), fd);
     907          20 :       if (r == 0)
     908             :         {
     909           0 :           if (verbose)
     910           0 :             obatched(clog) << "cannot create fd-response for " << b_source0 << endl;
     911           0 :           close(fd);
     912           0 :           break; // assume no chance of better luck around another iteration
     913             :         }
     914             :       else
     915             :         {
     916          20 :           MHD_add_response_header (r, "Content-Type", "application/octet-stream");
     917          20 :           add_mhd_last_modified (r, archive_entry_mtime(e));
     918          20 :           if (verbose > 1)
     919           0 :             obatched(clog) << "serving rpm " << b_source0 << " file " << b_source1 << endl;
     920             :           /* libmicrohttpd will close it. */
     921          20 :           if (result_fd)
     922           2 :             *result_fd = fd;
     923          20 :           return r;
     924             :         }
     925         166 :     }
     926             : 
     927             :   // XXX: rpm/file not found: delete this R entry?
     928           0 :   return 0;
     929             : }
     930             : 
     931             : 
     932             : static struct MHD_Response*
     933          29 : handle_buildid_match (int64_t b_mtime,
     934             :                       const string& b_stype,
     935             :                       const string& b_source0,
     936             :                       const string& b_source1,
     937             :                       int *result_fd)
     938             : {
     939          29 :   if (b_stype == "F")
     940           9 :     return handle_buildid_f_match(b_mtime, b_source0, result_fd);
     941          20 :   else if (b_stype == "R")
     942          20 :     return handle_buildid_r_match(b_mtime, b_source0, b_source1, result_fd);
     943             :   else
     944           0 :     return 0;
     945             : }
     946             : 
     947             : 
     948             : static int
     949           2 : debuginfod_find_progress (debuginfod_client *, long a, long b)
     950             : {
     951           2 :   if (verbose > 4)
     952           0 :     obatched(clog) << "federated debuginfod progress=" << a << "/" << b << endl;
     953             : 
     954           2 :   return interrupted;
     955             : }
     956             : 
     957             : 
     958          32 : static struct MHD_Response* handle_buildid (const string& buildid /* unsafe */,
     959             :                                             const string& artifacttype /* unsafe */,
     960             :                                             const string& suffix /* unsafe */,
     961             :                                             int *result_fd
     962             :                                             )
     963             : {
     964             :   // validate artifacttype
     965          64 :   string atype_code;
     966          32 :   if (artifacttype == "debuginfo") atype_code = "D";
     967          18 :   else if (artifacttype == "executable") atype_code = "E";
     968           8 :   else if (artifacttype == "source") atype_code = "S";
     969           0 :   else throw reportable_exception("invalid artifacttype");
     970             : 
     971          32 :   if (atype_code == "S" && suffix == "")
     972           0 :      throw reportable_exception("invalid source suffix");
     973             : 
     974             :   // validate buildid
     975          32 :   if ((buildid.size() < 2) || // not empty
     976          64 :       (buildid.size() % 2) || // even number
     977          32 :       (buildid.find_first_not_of("0123456789abcdef") != string::npos)) // pure tasty lowercase hex
     978           0 :     throw reportable_exception("invalid buildid");
     979             : 
     980          32 :   if (verbose > 1)
     981           0 :     obatched(clog) << "searching for buildid=" << buildid << " artifacttype=" << artifacttype
     982           0 :          << " suffix=" << suffix << endl;
     983             : 
     984          32 :   sqlite_ps *pp = 0;
     985             : 
     986          32 :   if (atype_code == "D")
     987             :     {
     988          28 :       pp = new sqlite_ps (db, "mhd-query-d",
     989             :                           "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_d where buildid = ? "
     990          14 :                           "order by mtime desc");
     991          14 :       pp->reset();
     992          14 :       pp->bind(1, buildid);
     993             :     }
     994          18 :   else if (atype_code == "E")
     995             :     {
     996          20 :       pp = new sqlite_ps (db, "mhd-query-e",
     997             :                           "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_e where buildid = ? "
     998          10 :                           "order by mtime desc");
     999          10 :       pp->reset();
    1000          10 :       pp->bind(1, buildid);
    1001             :     }
    1002           8 :   else if (atype_code == "S")
    1003             :     {
    1004          16 :       pp = new sqlite_ps (db, "mhd-query-s",
    1005             :                           "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_s where buildid = ? and artifactsrc = ? "
    1006           8 :                           "order by sharedprefix(source0,source0ref) desc, mtime desc");
    1007           8 :       pp->reset();
    1008           8 :       pp->bind(1, buildid);
    1009           8 :       pp->bind(2, suffix);
    1010             :     }
    1011          64 :   unique_ptr<sqlite_ps> ps_closer(pp); // release pp if exception or return
    1012             : 
    1013             :   // consume all the rows
    1014             :   while (1)
    1015             :     {
    1016          32 :       int rc = pp->step();
    1017          32 :       if (rc == SQLITE_DONE) break;
    1018          29 :       if (rc != SQLITE_ROW)
    1019           0 :         throw sqlite_exception(rc, "step");
    1020             : 
    1021          29 :       int64_t b_mtime = sqlite3_column_int64 (*pp, 0);
    1022          29 :       string b_stype = string((const char*) sqlite3_column_text (*pp, 1) ?: ""); /* by DDL may not be NULL */
    1023          29 :       string b_source0 = string((const char*) sqlite3_column_text (*pp, 2) ?: ""); /* may be NULL */
    1024          29 :       string b_source1 = string((const char*) sqlite3_column_text (*pp, 3) ?: ""); /* may be NULL */
    1025             : 
    1026          29 :       if (verbose > 1)
    1027           0 :         obatched(clog) << "found mtime=" << b_mtime << " stype=" << b_stype
    1028           0 :              << " source0=" << b_source0 << " source1=" << b_source1 << endl;
    1029             : 
    1030             :       // Try accessing the located match.
    1031             :       // XXX: in case of multiple matches, attempt them in parallel?
    1032          29 :       auto r = handle_buildid_match (b_mtime, b_stype, b_source0, b_source1, result_fd);
    1033          29 :       if (r)
    1034          29 :         return r;
    1035           0 :     }
    1036             : 
    1037             :   // We couldn't find it in the database.  Last ditch effort
    1038             :   // is to defer to other debuginfo servers.
    1039             : 
    1040           3 :   int fd = -1;
    1041           3 :   debuginfod_client *client = debuginfod_begin ();
    1042           3 :   if (client != NULL)
    1043             :     {
    1044           3 :       debuginfod_set_progressfn (client, & debuginfod_find_progress);
    1045             : 
    1046           3 :       if (artifacttype == "debuginfo")
    1047           2 :         fd = debuginfod_find_debuginfo (client,
    1048           2 :                                         (const unsigned char*) buildid.c_str(),
    1049             :                                         0, NULL);
    1050           1 :       else if (artifacttype == "executable")
    1051           1 :         fd = debuginfod_find_executable (client,
    1052           1 :                                          (const unsigned char*) buildid.c_str(),
    1053             :                                          0, NULL);
    1054           0 :       else if (artifacttype == "source")
    1055           0 :         fd = debuginfod_find_source (client,
    1056           0 :                                      (const unsigned char*) buildid.c_str(),
    1057             :                                      0, suffix.c_str(), NULL);
    1058             :     }
    1059             :   else
    1060           0 :     fd = -errno; /* Set by debuginfod_begin.  */
    1061           3 :   debuginfod_end (client);
    1062             : 
    1063           3 :   if (fd >= 0)
    1064             :     {
    1065           1 :       inc_metric ("http_responses_total","result","upstream");
    1066             :       struct stat s;
    1067           1 :       int rc = fstat (fd, &s);
    1068           1 :       if (rc == 0)
    1069             :         {
    1070           1 :           auto r = MHD_create_response_from_fd ((uint64_t) s.st_size, fd);
    1071           1 :           if (r)
    1072             :             {
    1073           1 :               MHD_add_response_header (r, "Content-Type", "application/octet-stream");
    1074           1 :               add_mhd_last_modified (r, s.st_mtime);
    1075           1 :               if (verbose > 1)
    1076           0 :                 obatched(clog) << "serving file from upstream debuginfod/cache" << endl;
    1077           1 :               if (result_fd)
    1078           0 :                 *result_fd = fd;
    1079           1 :               return r; // NB: don't close fd; libmicrohttpd will
    1080             :             }
    1081             :         }
    1082           0 :       close (fd);
    1083             :     }
    1084           2 :   else if (fd != -ENOSYS) // no DEBUGINFOD_URLS configured
    1085           0 :     throw libc_exception(-fd, "upstream debuginfod query failed");
    1086             : 
    1087           2 :   throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found");
    1088             : }
    1089             : 
    1090             : 
    1091             : ////////////////////////////////////////////////////////////////////////
    1092             : 
    1093             : static map<string,int64_t> metrics; // arbitrary data for /metrics query
    1094             : // NB: store int64_t since all our metrics are integers; prometheus accepts double
    1095             : static mutex metrics_lock;
    1096             : 
    1097             : // utility function for assembling prometheus-compatible
    1098             : // name="escaped-value" strings
    1099             : // https://prometheus.io/docs/instrumenting/exposition_formats/
    1100             : static string
    1101         893 : metric_label(const string& name, const string& value)
    1102             : {
    1103         893 :   string x = name + "=\"";
    1104        4741 :   for (auto&& c : value)
    1105        3849 :     switch(c)
    1106             :       {
    1107           0 :       case '\\': x += "\\\\"; break;
    1108           0 :       case '\"': x += "\\\""; break;
    1109           0 :       case '\n': x += "\\n"; break;
    1110        3849 :       default: x += c; break;
    1111             :       }
    1112         893 :   x += "\"";
    1113         895 :   return x;
    1114             : }
    1115             : 
    1116             : 
    1117             : // add prometheus-format metric name + label tuple (if any) + value
    1118             : 
    1119             : static void
    1120           4 : set_metric(const string& metric, int64_t value)
    1121             : {
    1122           4 :   unique_lock<mutex> lock(metrics_lock);
    1123           4 :   metrics[metric] = value;
    1124           4 : }
    1125             : #if 0 /* unused */
    1126             : static void
    1127             : inc_metric(const string& metric)
    1128             : {
    1129             :   unique_lock<mutex> lock(metrics_lock);
    1130             :   metrics[metric] ++;
    1131             : }
    1132             : #endif
    1133             : static void
    1134         214 : set_metric(const string& metric,
    1135             :            const string& lname, const string& lvalue,
    1136             :            int64_t value)
    1137             : {
    1138         647 :   string key = (metric + "{" + metric_label(lname, lvalue) + "}");
    1139         218 :   unique_lock<mutex> lock(metrics_lock);
    1140         218 :   metrics[key] = value;
    1141         218 : }
    1142             : 
    1143             : static void
    1144         638 : inc_metric(const string& metric,
    1145             :            const string& lname, const string& lvalue)
    1146             : {
    1147        1914 :   string key = (metric + "{" + metric_label(lname, lvalue) + "}");
    1148         638 :   unique_lock<mutex> lock(metrics_lock);
    1149         638 :   metrics[key] ++;
    1150         638 : }
    1151             : static void
    1152          42 : add_metric(const string& metric,
    1153             :            const string& lname, const string& lvalue,
    1154             :            int64_t value)
    1155             : {
    1156         126 :   string key = (metric + "{" + metric_label(lname, lvalue) + "}");
    1157          42 :   unique_lock<mutex> lock(metrics_lock);
    1158          42 :   metrics[key] += value;
    1159          42 : }
    1160             : 
    1161             : 
    1162             : // and more for higher arity labels if needed
    1163             : 
    1164             : 
    1165             : static struct MHD_Response*
    1166          18 : handle_metrics ()
    1167             : {
    1168          36 :   stringstream o;
    1169             :   {
    1170          36 :     unique_lock<mutex> lock(metrics_lock);
    1171        1055 :     for (auto&& i : metrics)
    1172        1037 :       o << i.first << " " << i.second << endl;
    1173             :   }
    1174          18 :   const string& os = o.str();
    1175          18 :   MHD_Response* r = MHD_create_response_from_buffer (os.size(),
    1176          18 :                                                      (void*) os.c_str(),
    1177             :                                                      MHD_RESPMEM_MUST_COPY);
    1178          18 :   MHD_add_response_header (r, "Content-Type", "text/plain");
    1179          36 :   return r;
    1180             : }
    1181             : 
    1182             : 
    1183             : ////////////////////////////////////////////////////////////////////////
    1184             : 
    1185             : 
    1186             : /* libmicrohttpd callback */
    1187             : static int
    1188          49 : handler_cb (void * /*cls*/,
    1189             :             struct MHD_Connection *connection,
    1190             :             const char *url,
    1191             :             const char *method,
    1192             :             const char * /*version*/,
    1193             :             const char * /*upload_data*/,
    1194             :             size_t * /*upload_data_size*/,
    1195             :             void ** /*con_cls*/)
    1196             : {
    1197          49 :   struct MHD_Response *r = NULL;
    1198          98 :   string url_copy = url;
    1199             : 
    1200          49 :   if (verbose)
    1201           0 :     obatched(clog) << conninfo(connection) << " " << method << " " << url << endl;
    1202             : 
    1203             :   try
    1204             :     {
    1205          49 :       if (string(method) != "GET")
    1206           0 :         throw reportable_exception(400, "we support GET only");
    1207             : 
    1208             :       /* Start decoding the URL. */
    1209          49 :       size_t slash1 = url_copy.find('/', 1);
    1210          52 :       string url1 = url_copy.substr(0, slash1); // ok even if slash1 not found
    1211             : 
    1212          49 :       if (slash1 != string::npos && url1 == "/buildid")
    1213             :         {
    1214          30 :           size_t slash2 = url_copy.find('/', slash1+1);
    1215          30 :           if (slash2 == string::npos)
    1216           0 :             throw reportable_exception("/buildid/ webapi error, need buildid");
    1217             : 
    1218          60 :           string buildid = url_copy.substr(slash1+1, slash2-slash1-1);
    1219             : 
    1220          30 :           size_t slash3 = url_copy.find('/', slash2+1);
    1221          62 :           string artifacttype, suffix;
    1222          30 :           if (slash3 == string::npos)
    1223             :             {
    1224          22 :               artifacttype = url_copy.substr(slash2+1);
    1225          22 :               suffix = "";
    1226             :             }
    1227             :           else
    1228             :             {
    1229           8 :               artifacttype = url_copy.substr(slash2+1, slash3-slash2-1);
    1230           8 :               suffix = url_copy.substr(slash3); // include the slash in the suffix
    1231             :             }
    1232             : 
    1233          30 :           inc_metric("http_requests_total", "type", artifacttype);
    1234          30 :           r = handle_buildid(buildid, artifacttype, suffix, 0); // NB: don't care about result-fd
    1235             :         }
    1236          19 :       else if (url1 == "/metrics")
    1237             :         {
    1238          18 :           inc_metric("http_requests_total", "type", "metrics");
    1239          18 :           r = handle_metrics();
    1240             :         }
    1241             :       else
    1242           1 :         throw reportable_exception("webapi error, unrecognized /operation");
    1243             : 
    1244          46 :       if (r == 0)
    1245           0 :         throw reportable_exception("internal error, missing response");
    1246             : 
    1247          46 :       int rc = MHD_queue_response (connection, MHD_HTTP_OK, r);
    1248          46 :       MHD_destroy_response (r);
    1249          46 :       return rc;
    1250             :     }
    1251           6 :   catch (const reportable_exception& e)
    1252             :     {
    1253           3 :       inc_metric("http_responses_total","result","error");
    1254           3 :       e.report(clog);
    1255           3 :       return e.mhd_send_response (connection);
    1256             :     }
    1257             : }
    1258             : 
    1259             : 
    1260             : ////////////////////////////////////////////////////////////////////////
    1261             : // borrowed originally from src/nm.c get_local_names()
    1262             : 
    1263             : static void
    1264          10 : dwarf_extract_source_paths (Elf *elf, set<string>& debug_sourcefiles)
    1265             :   noexcept // no exceptions - so we can simplify the altdbg resource release at end
    1266             : {
    1267          10 :   Dwarf* dbg = dwarf_begin_elf (elf, DWARF_C_READ, NULL);
    1268          10 :   if (dbg == NULL)
    1269           0 :     return;
    1270             : 
    1271          10 :   Dwarf* altdbg = NULL;
    1272          10 :   int    altdbg_fd = -1;
    1273             : 
    1274             :   // DWZ handling: if we have an unsatisfied debug-alt-link, add an
    1275             :   // empty string into the outgoing sourcefiles set, so the caller
    1276             :   // should know that our data is incomplete.
    1277             :   const char *alt_name_p;
    1278             :   const void *alt_build_id; // elfutils-owned memory
    1279          10 :   ssize_t sz = dwelf_dwarf_gnu_debugaltlink (dbg, &alt_name_p, &alt_build_id);
    1280          10 :   if (sz > 0) // got one!
    1281             :     {
    1282           4 :       string buildid;
    1283           2 :       unsigned char* build_id_bytes = (unsigned char*) alt_build_id;
    1284          42 :       for (ssize_t idx=0; idx<sz; idx++)
    1285             :         {
    1286          40 :           buildid += "0123456789abcdef"[build_id_bytes[idx] >> 4];
    1287          40 :           buildid += "0123456789abcdef"[build_id_bytes[idx] & 0xf];
    1288             :         }
    1289             : 
    1290           2 :       if (verbose > 3)
    1291           0 :         obatched(clog) << "Need altdebug buildid=" << buildid << endl;
    1292             : 
    1293             :       // but is it unsatisfied the normal elfutils ways?
    1294           2 :       Dwarf* alt = dwarf_getalt (dbg);
    1295           2 :       if (alt == NULL)
    1296             :         {
    1297             :           // Yup, unsatisfied the normal way.  Maybe we can satisfy it
    1298             :           // from our own debuginfod database.
    1299             :           int alt_fd;
    1300           2 :           struct MHD_Response *r = 0;
    1301             :           try
    1302             :             {
    1303           2 :               r = handle_buildid (buildid, "debuginfo", "", &alt_fd);
    1304             :             }
    1305           0 :           catch (const reportable_exception& e)
    1306             :             {
    1307             :               // swallow exceptions
    1308             :             }
    1309             : 
    1310             :           // NB: this is not actually recursive!  This invokes the web-query
    1311             :           // path, which cannot get back into the scan code paths.
    1312           2 :           if (r)
    1313             :             {
    1314             :               // Found it!
    1315           2 :               altdbg_fd = dup(alt_fd); // ok if this fails, downstream failures ok
    1316           2 :               alt = altdbg = dwarf_begin (altdbg_fd, DWARF_C_READ);
    1317             :               // NB: must close this dwarf and this fd at the bottom of the function!
    1318           2 :               MHD_destroy_response (r); // will close alt_fd
    1319           2 :               if (alt)
    1320           2 :                 dwarf_setalt (dbg, alt);
    1321             :             }
    1322             :         }
    1323             :       else
    1324             :         {
    1325             :           // NB: dwarf_setalt(alt) inappropriate - already done!
    1326             :           // NB: altdbg will stay 0 so nothing tries to redundantly dealloc.
    1327             :         }
    1328             : 
    1329           2 :       if (alt)
    1330             :         {
    1331           2 :           if (verbose > 3)
    1332           0 :             obatched(clog) << "Resolved altdebug buildid=" << buildid << endl;
    1333             :         }
    1334             :       else // (alt == NULL) - signal possible presence of poor debuginfo
    1335             :         {
    1336           0 :           debug_sourcefiles.insert("");
    1337           0 :           if (verbose > 3)
    1338           0 :             obatched(clog) << "Unresolved altdebug buildid=" << buildid << endl;
    1339             :         }
    1340             :     }
    1341             : 
    1342          10 :   Dwarf_Off offset = 0;
    1343             :   Dwarf_Off old_offset;
    1344             :   size_t hsize;
    1345             : 
    1346         331 :   while (dwarf_nextcu (dbg, old_offset = offset, &offset, &hsize, NULL, NULL, NULL) == 0)
    1347             :     {
    1348             :       Dwarf_Die cudie_mem;
    1349         321 :       Dwarf_Die *cudie = dwarf_offdie (dbg, old_offset + hsize, &cudie_mem);
    1350             : 
    1351         321 :       if (cudie == NULL)
    1352           1 :         continue;
    1353         321 :       if (dwarf_tag (cudie) != DW_TAG_compile_unit)
    1354           1 :         continue;
    1355             : 
    1356         320 :       const char *cuname = dwarf_diename(cudie) ?: "unknown";
    1357             : 
    1358             :       Dwarf_Files *files;
    1359             :       size_t nfiles;
    1360         320 :       if (dwarf_getsrcfiles (cudie, &files, &nfiles) != 0)
    1361           0 :         continue;
    1362             : 
    1363             :       // extract DW_AT_comp_dir to resolve relative file names
    1364         320 :       const char *comp_dir = "";
    1365             :       const char *const *dirs;
    1366             :       size_t ndirs;
    1367         640 :       if (dwarf_getsrcdirs (files, &dirs, &ndirs) == 0 &&
    1368         320 :           dirs[0] != NULL)
    1369         320 :         comp_dir = dirs[0];
    1370         320 :       if (comp_dir == NULL)
    1371           0 :         comp_dir = "";
    1372             : 
    1373         320 :       if (verbose > 3)
    1374           0 :         obatched(clog) << "searching for sources for cu=" << cuname << " comp_dir=" << comp_dir
    1375           0 :                        << " #files=" << nfiles << " #dirs=" << ndirs << endl;
    1376             : 
    1377         320 :       if (comp_dir[0] == '\0' && cuname[0] != '/')
    1378             :         {
    1379             :           // This is a common symptom for dwz-compressed debug files,
    1380             :           // where the altdebug file cannot be resolved.
    1381           0 :           if (verbose > 3)
    1382           0 :             obatched(clog) << "skipping cu=" << cuname << " due to empty comp_dir" << endl;
    1383           0 :           continue;
    1384             :         }
    1385             : 
    1386        6272 :       for (size_t f = 1; f < nfiles; f++)
    1387             :         {
    1388        5952 :           const char *hat = dwarf_filesrc (files, f, NULL, NULL);
    1389        5952 :           if (hat == NULL)
    1390           0 :             continue;
    1391             : 
    1392        5952 :           if (string(hat) == "<built-in>") // gcc intrinsics, don't bother record
    1393           0 :             continue;
    1394             : 
    1395        5952 :           string waldo;
    1396        5952 :           if (hat[0] == '/') // absolute
    1397        4086 :             waldo = (string (hat));
    1398        1866 :           else if (comp_dir[0] != '\0') // comp_dir relative
    1399        1866 :             waldo = (string (comp_dir) + string("/") + string (hat));
    1400             :           else
    1401             :            {
    1402           0 :              obatched(clog) << "skipping hat=" << hat << " due to empty comp_dir" << endl;
    1403           0 :              continue;
    1404             :            }
    1405             : 
    1406             :           // NB: this is the 'waldo' that a dbginfo client will have
    1407             :           // to supply for us to give them the file The comp_dir
    1408             :           // prefixing is a definite complication.  Otherwise we'd
    1409             :           // have to return a setof comp_dirs (one per CU!) with
    1410             :           // corresponding filesrc[] names, instead of one absolute
    1411             :           // resoved set.  Maybe we'll have to do that anyway.  XXX
    1412             : 
    1413        5952 :           if (verbose > 4)
    1414           0 :             obatched(clog) << waldo
    1415           0 :                            << (debug_sourcefiles.find(waldo)==debug_sourcefiles.end() ? " new" : " dup") <<  endl;
    1416             : 
    1417        5952 :           debug_sourcefiles.insert (waldo);
    1418             :         }
    1419             :     }
    1420             : 
    1421          10 :   dwarf_end(dbg);
    1422          10 :   if (altdbg)
    1423           2 :     dwarf_end(altdbg);
    1424          10 :   if (altdbg_fd >= 0)
    1425           2 :     close(altdbg_fd);
    1426             : }
    1427             : 
    1428             : 
    1429             : 
    1430             : static void
    1431          42 : elf_classify (int fd, bool &executable_p, bool &debuginfo_p, string &buildid, set<string>& debug_sourcefiles)
    1432             : {
    1433          42 :   Elf *elf = elf_begin (fd, ELF_C_READ_MMAP_PRIVATE, NULL);
    1434          42 :   if (elf == NULL)
    1435           0 :     return;
    1436             : 
    1437             :   try // catch our types of errors and clean up the Elf* object
    1438             :     {
    1439          42 :       if (elf_kind (elf) != ELF_K_ELF)
    1440             :         {
    1441          24 :           elf_end (elf);
    1442          24 :           return;
    1443             :         }
    1444             : 
    1445             :       GElf_Ehdr ehdr_storage;
    1446          18 :       GElf_Ehdr *ehdr = gelf_getehdr (elf, &ehdr_storage);
    1447          18 :       if (ehdr == NULL)
    1448             :         {
    1449           0 :           elf_end (elf);
    1450           0 :           return;
    1451             :         }
    1452          18 :       auto elf_type = ehdr->e_type;
    1453             : 
    1454             :       const void *build_id; // elfutils-owned memory
    1455          18 :       ssize_t sz = dwelf_elf_gnu_build_id (elf, & build_id);
    1456          18 :       if (sz <= 0)
    1457             :         {
    1458             :           // It's not a diagnostic-worthy error for an elf file to lack build-id.
    1459             :           // It might just be very old.
    1460           0 :           elf_end (elf);
    1461           0 :           return;
    1462             :         }
    1463             : 
    1464             :       // build_id is a raw byte array; convert to hexadecimal *lowercase*
    1465          18 :       unsigned char* build_id_bytes = (unsigned char*) build_id;
    1466         378 :       for (ssize_t idx=0; idx<sz; idx++)
    1467             :         {
    1468         360 :           buildid += "0123456789abcdef"[build_id_bytes[idx] >> 4];
    1469         360 :           buildid += "0123456789abcdef"[build_id_bytes[idx] & 0xf];
    1470             :         }
    1471             : 
    1472             :       // now decide whether it's an executable - namely, any allocatable section has
    1473             :       // PROGBITS;
    1474          18 :       if (elf_type == ET_EXEC || elf_type == ET_DYN)
    1475             :         {
    1476             :           size_t shnum;
    1477          16 :           int rc = elf_getshdrnum (elf, &shnum);
    1478          16 :           if (rc < 0)
    1479           0 :             throw elfutils_exception(rc, "getshdrnum");
    1480             : 
    1481          16 :           executable_p = false;
    1482         283 :           for (size_t sc = 0; sc < shnum; sc++)
    1483             :             {
    1484         276 :               Elf_Scn *scn = elf_getscn (elf, sc);
    1485         276 :               if (scn == NULL)
    1486           0 :                 continue;
    1487             : 
    1488             :               GElf_Shdr shdr_mem;
    1489         276 :               GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem);
    1490         276 :               if (shdr == NULL)
    1491           0 :                 continue;
    1492             : 
    1493             :               // allocated (loadable / vm-addr-assigned) section with available content?
    1494         276 :               if ((shdr->sh_type == SHT_PROGBITS) && (shdr->sh_flags & SHF_ALLOC))
    1495             :                 {
    1496           9 :                   if (verbose > 4)
    1497           0 :                     obatched(clog) << "executable due to SHF_ALLOC SHT_PROGBITS sc=" << sc << endl;
    1498           9 :                   executable_p = true;
    1499           9 :                   break; // no need to keep looking for others
    1500             :                 }
    1501             :             } // iterate over sections
    1502             :         } // executable_p classification
    1503             : 
    1504             :       // now decide whether it's a debuginfo - namely, if it has any .debug* or .zdebug* sections
    1505             :       // logic mostly stolen from fweimer@redhat.com's elfclassify drafts
    1506             :       size_t shstrndx;
    1507          18 :       int rc = elf_getshdrstrndx (elf, &shstrndx);
    1508          18 :       if (rc < 0)
    1509           0 :         throw elfutils_exception(rc, "getshdrstrndx");
    1510             : 
    1511          18 :       Elf_Scn *scn = NULL;
    1512             :       while (true)
    1513             :         {
    1514         483 :           scn = elf_nextscn (elf, scn);
    1515         483 :           if (scn == NULL)
    1516          18 :             break;
    1517             :           GElf_Shdr shdr_storage;
    1518         475 :           GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_storage);
    1519         475 :           if (shdr == NULL)
    1520           0 :             break;
    1521         475 :           const char *section_name = elf_strptr (elf, shstrndx, shdr->sh_name);
    1522         475 :           if (section_name == NULL)
    1523           0 :             break;
    1524         475 :           if (strncmp(section_name, ".debug_line", 11) == 0 ||
    1525         465 :               strncmp(section_name, ".zdebug_line", 12) == 0)
    1526             :             {
    1527          10 :               debuginfo_p = true;
    1528          10 :               dwarf_extract_source_paths (elf, debug_sourcefiles);
    1529          10 :               break; // expecting only one .*debug_line, so no need to look for others
    1530             :             }
    1531         465 :           else if (strncmp(section_name, ".debug_", 7) == 0 ||
    1532         433 :                    strncmp(section_name, ".zdebug_", 8) == 0)
    1533             :             {
    1534          32 :               debuginfo_p = true;
    1535             :               // NB: don't break; need to parse .debug_line for sources
    1536             :             }
    1537         465 :         }
    1538             :     }
    1539           0 :   catch (const reportable_exception& e)
    1540             :     {
    1541           0 :       e.report(clog);
    1542             :     }
    1543          18 :   elf_end (elf);
    1544             : }
    1545             : 
    1546             : 
    1547             : static semaphore* scan_concurrency_sem = 0; // used to implement -c load limiting
    1548             : 
    1549             : 
    1550             : static void
    1551          16 : scan_source_file_path (const string& dir)
    1552             : {
    1553          16 :   obatched(clog) << "fts/file traversing " << dir << endl;
    1554             : 
    1555             :   struct timeval tv_start, tv_end;
    1556          16 :   gettimeofday (&tv_start, NULL);
    1557             : 
    1558          32 :   sqlite_ps ps_upsert_buildids (db, "file-buildids-intern", "insert or ignore into " BUILDIDS "_buildids VALUES (NULL, ?);");
    1559          32 :   sqlite_ps ps_upsert_files (db, "file-files-intern", "insert or ignore into " BUILDIDS "_files VALUES (NULL, ?);");
    1560             :   sqlite_ps ps_upsert_de (db, "file-de-upsert",
    1561             :                           "insert or ignore into " BUILDIDS "_f_de "
    1562             :                           "(buildid, debuginfo_p, executable_p, file, mtime) "
    1563             :                           "values ((select id from " BUILDIDS "_buildids where hex = ?),"
    1564             :                           "        ?,?,"
    1565          32 :                           "        (select id from " BUILDIDS "_files where name = ?), ?);");
    1566             :   sqlite_ps ps_upsert_s (db, "file-s-upsert",
    1567             :                          "insert or ignore into " BUILDIDS "_f_s "
    1568             :                          "(buildid, artifactsrc, file, mtime) "
    1569             :                          "values ((select id from " BUILDIDS "_buildids where hex = ?),"
    1570             :                          "        (select id from " BUILDIDS "_files where name = ?),"
    1571             :                          "        (select id from " BUILDIDS "_files where name = ?),"
    1572          32 :                          "        ?);");
    1573             :   sqlite_ps ps_query (db, "file-negativehit-find",
    1574          32 :                       "select 1 from " BUILDIDS "_file_mtime_scanned where sourcetype = 'F' and file = (select id from " BUILDIDS "_files where name = ?) and mtime = ?;");
    1575             :   sqlite_ps ps_scan_done (db, "file-scanned",
    1576             :                           "insert or ignore into " BUILDIDS "_file_mtime_scanned (sourcetype, file, mtime, size)"
    1577          32 :                           "values ('F', (select id from " BUILDIDS "_files where name = ?), ?, ?);");
    1578             : 
    1579             : 
    1580          16 :   char * const dirs[] = { (char*) dir.c_str(), NULL };
    1581             : 
    1582          16 :   unsigned fts_scanned=0, fts_regex=0, fts_cached=0, fts_debuginfo=0, fts_executable=0, fts_sourcefiles=0;
    1583             : 
    1584          16 :   FTS *fts = fts_open (dirs,
    1585             :                        (traverse_logical ? FTS_LOGICAL : FTS_PHYSICAL|FTS_XDEV)
    1586             :                        | FTS_NOCHDIR /* multithreaded */,
    1587             :                        NULL);
    1588          16 :   if (fts == NULL)
    1589             :     {
    1590           0 :       obatched(cerr) << "cannot fts_open " << dir << endl;
    1591           0 :       return;
    1592             :     }
    1593             : 
    1594             :   FTSENT *f;
    1595         111 :   while ((f = fts_read (fts)) != NULL)
    1596             :     {
    1597          95 :       semaphore_borrower handle_one_file (scan_concurrency_sem);
    1598             : 
    1599          95 :       fts_scanned ++;
    1600          95 :       if (interrupted)
    1601           0 :         break;
    1602             : 
    1603          95 :       if (verbose > 2)
    1604           0 :         obatched(clog) << "fts/file traversing " << f->fts_path << endl;
    1605             : 
    1606             :       try
    1607             :         {
    1608             :           /* Found a file.  Convert it to an absolute path, so
    1609             :              the buildid database does not have relative path
    1610             :              names that are unresolvable from a subsequent run
    1611             :              in a different cwd. */
    1612          95 :           char *rp = realpath(f->fts_path, NULL);
    1613          95 :           if (rp == NULL)
    1614          23 :             continue; // ignore dangling symlink or such
    1615          95 :           string rps = string(rp);
    1616          95 :           free (rp);
    1617             : 
    1618          95 :           bool ri = !regexec (&file_include_regex, rps.c_str(), 0, 0, 0);
    1619          95 :           bool rx = !regexec (&file_exclude_regex, rps.c_str(), 0, 0, 0);
    1620          95 :           if (!ri || rx)
    1621             :             {
    1622           0 :               if (verbose > 3)
    1623           0 :                 obatched(clog) << "fts/file skipped by regex " << (!ri ? "I" : "") << (rx ? "X" : "") << endl;
    1624           0 :               fts_regex ++;
    1625           0 :               continue;
    1626             :             }
    1627             : 
    1628          95 :           switch (f->fts_info)
    1629             :             {
    1630          24 :             case FTS_D:
    1631          24 :               break;
    1632             : 
    1633          24 :             case FTS_DP:
    1634          24 :               break;
    1635             : 
    1636          42 :             case FTS_F:
    1637             :               {
    1638             :                 /* See if we know of it already. */
    1639             :                 int rc = ps_query
    1640          42 :                   .reset()
    1641          42 :                   .bind(1, rps)
    1642          42 :                   .bind(2, f->fts_statp->st_mtime)
    1643          42 :                   .step();
    1644          42 :                 ps_query.reset();
    1645          42 :                 if (rc == SQLITE_ROW) // i.e., a result, as opposed to DONE (no results)
    1646             :                   // no need to recheck a file/version we already know
    1647             :                   // specifically, no need to elf-begin a file we already determined is non-elf
    1648             :                   // (so is stored with buildid=NULL)
    1649             :                   {
    1650          23 :                     fts_cached ++;
    1651          23 :                     continue;
    1652             :                   }
    1653             : 
    1654          19 :                 bool executable_p = false, debuginfo_p = false; // E and/or D
    1655          38 :                 string buildid;
    1656          38 :                 set<string> sourcefiles;
    1657             : 
    1658          19 :                 int fd = open (rps.c_str(), O_RDONLY);
    1659             :                 try
    1660             :                   {
    1661          19 :                     if (fd >= 0)
    1662          19 :                       elf_classify (fd, executable_p, debuginfo_p, buildid, sourcefiles);
    1663             :                     else
    1664           0 :                       throw libc_exception(errno, string("open ") + rps);
    1665          19 :                     inc_metric ("scanned_total","source","file");
    1666             :                   }
    1667             : 
    1668             :                 // NB: we catch exceptions here too, so that we can
    1669             :                 // cache the corrupt-elf case (!executable_p &&
    1670             :                 // !debuginfo_p) just below, just as if we had an
    1671             :                 // EPERM error from open(2).
    1672             : 
    1673           0 :                 catch (const reportable_exception& e)
    1674             :                   {
    1675           0 :                     e.report(clog);
    1676             :                   }
    1677             : 
    1678          19 :                 if (fd >= 0)
    1679          19 :                   close (fd);
    1680             : 
    1681             :                 // register this file name in the interning table
    1682             :                 ps_upsert_files
    1683          19 :                   .reset()
    1684          19 :                   .bind(1, rps)
    1685          19 :                   .step_ok_done();
    1686             : 
    1687          19 :                 if (buildid == "")
    1688             :                   {
    1689             :                     // no point storing an elf file without buildid
    1690          15 :                     executable_p = false;
    1691          15 :                     debuginfo_p = false;
    1692             :                   }
    1693             :                 else
    1694             :                   {
    1695             :                     // register this build-id in the interning table
    1696             :                     ps_upsert_buildids
    1697           4 :                       .reset()
    1698           4 :                       .bind(1, buildid)
    1699           4 :                       .step_ok_done();
    1700             :                   }
    1701             : 
    1702          19 :                 if (executable_p)
    1703           3 :                   fts_executable ++;
    1704          19 :                 if (debuginfo_p)
    1705           3 :                   fts_debuginfo ++;
    1706          19 :                 if (executable_p || debuginfo_p)
    1707             :                   {
    1708             :                     ps_upsert_de
    1709           4 :                       .reset()
    1710           4 :                       .bind(1, buildid)
    1711           4 :                       .bind(2, debuginfo_p ? 1 : 0)
    1712           4 :                       .bind(3, executable_p ? 1 : 0)
    1713           4 :                       .bind(4, rps)
    1714           4 :                       .bind(5, f->fts_statp->st_mtime)
    1715           4 :                       .step_ok_done();
    1716             :                   }
    1717          19 :                 if (executable_p)
    1718           3 :                   inc_metric("found_executable_total","source","files");
    1719          19 :                 if (debuginfo_p)
    1720           3 :                   inc_metric("found_debuginfo_total","source","files");
    1721             : 
    1722          19 :                 if (sourcefiles.size() && buildid != "")
    1723             :                   {
    1724           3 :                     fts_sourcefiles += sourcefiles.size();
    1725             : 
    1726         493 :                     for (auto&& dwarfsrc : sourcefiles)
    1727             :                       {
    1728         490 :                         char *srp = realpath(dwarfsrc.c_str(), NULL);
    1729         490 :                         if (srp == NULL) // also if DWZ unresolved dwarfsrc=""
    1730           6 :                           continue; // unresolvable files are not a serious problem
    1731             :                         // throw libc_exception(errno, "fts/file realpath " + srcpath);
    1732         484 :                         string srps = string(srp);
    1733         484 :                         free (srp);
    1734             : 
    1735             :                         struct stat sfs;
    1736         484 :                         rc = stat(srps.c_str(), &sfs);
    1737         484 :                         if (rc != 0)
    1738           0 :                           continue;
    1739             : 
    1740         484 :                         if (verbose > 2)
    1741           0 :                           obatched(clog) << "recorded buildid=" << buildid << " file=" << srps
    1742           0 :                                          << " mtime=" << sfs.st_mtime
    1743           0 :                                          << " as source " << dwarfsrc << endl;
    1744             : 
    1745             :                         ps_upsert_files
    1746         484 :                           .reset()
    1747         484 :                           .bind(1, srps)
    1748         484 :                           .step_ok_done();
    1749             : 
    1750             :                         // register the dwarfsrc name in the interning table too
    1751             :                         ps_upsert_files
    1752         484 :                           .reset()
    1753         484 :                           .bind(1, dwarfsrc)
    1754         484 :                           .step_ok_done();
    1755             : 
    1756             :                         ps_upsert_s
    1757         484 :                           .reset()
    1758         484 :                           .bind(1, buildid)
    1759         484 :                           .bind(2, dwarfsrc)
    1760         484 :                           .bind(3, srps)
    1761         484 :                           .bind(4, sfs.st_mtime)
    1762         484 :                           .step_ok_done();
    1763             : 
    1764         484 :                         inc_metric("found_sourcerefs_total","source","files");
    1765             :                       }
    1766             :                   }
    1767             : 
    1768             :                 ps_scan_done
    1769          19 :                   .reset()
    1770          19 :                   .bind(1, rps)
    1771          19 :                   .bind(2, f->fts_statp->st_mtime)
    1772          19 :                   .bind(3, f->fts_statp->st_size)
    1773          19 :                   .step_ok_done();
    1774             : 
    1775          19 :                 if (verbose > 2)
    1776           0 :                   obatched(clog) << "recorded buildid=" << buildid << " file=" << rps
    1777           0 :                                  << " mtime=" << f->fts_statp->st_mtime << " atype="
    1778             :                                  << (executable_p ? "E" : "")
    1779          42 :                                  << (debuginfo_p ? "D" : "") << endl;
    1780             :               }
    1781          19 :               break;
    1782             : 
    1783           0 :             case FTS_ERR:
    1784             :             case FTS_NS:
    1785           0 :               throw libc_exception(f->fts_errno, string("fts/file traversal ") + string(f->fts_path));
    1786             : 
    1787           5 :             default:
    1788             :             case FTS_SL: /* ignore symlinks; seen in non-L mode only */
    1789           5 :               break;
    1790             :             }
    1791             : 
    1792          72 :           if ((verbose && f->fts_info == FTS_DP) ||
    1793          72 :               (verbose > 1 && f->fts_info == FTS_F))
    1794           0 :             obatched(clog) << "fts/file traversing " << rps << ", scanned=" << fts_scanned
    1795           0 :                  << ", regex-skipped=" << fts_regex
    1796           0 :                  << ", cached=" << fts_cached << ", debuginfo=" << fts_debuginfo
    1797           0 :                  << ", executable=" << fts_executable << ", source=" << fts_sourcefiles << endl;
    1798             :         }
    1799           0 :       catch (const reportable_exception& e)
    1800             :         {
    1801           0 :           e.report(clog);
    1802             :         }
    1803             :     }
    1804          16 :   fts_close (fts);
    1805             : 
    1806          16 :   gettimeofday (&tv_end, NULL);
    1807          16 :   double deltas = (tv_end.tv_sec - tv_start.tv_sec) + (tv_end.tv_usec - tv_start.tv_usec)*0.000001;
    1808             : 
    1809          32 :   obatched(clog) << "fts/file traversed " << dir << " in " << deltas << "s, scanned=" << fts_scanned
    1810          16 :                  << ", regex-skipped=" << fts_regex
    1811          16 :                  << ", cached=" << fts_cached << ", debuginfo=" << fts_debuginfo
    1812          16 :                  << ", executable=" << fts_executable << ", source=" << fts_sourcefiles << endl;
    1813             : }
    1814             : 
    1815             : 
    1816             : static void*
    1817           4 : thread_main_scan_source_file_path (void* arg)
    1818             : {
    1819           4 :   string dir = string((const char*) arg);
    1820             : 
    1821           4 :   unsigned rescan_timer = 0;
    1822           4 :   sig_atomic_t forced_rescan_count = 0;
    1823           4 :   set_metric("thread_timer_max", "file", dir, rescan_s);
    1824           4 :   set_metric("thread_tid", "file", dir, tid());
    1825          26 :   while (! interrupted)
    1826             :     {
    1827          22 :       set_metric("thread_timer", "file", dir, rescan_timer);
    1828          22 :       set_metric("thread_forced_total", "file", dir, forced_rescan_count);
    1829          22 :       if (rescan_s && rescan_timer > rescan_s)
    1830           0 :         rescan_timer = 0;
    1831          22 :       if (sigusr1 != forced_rescan_count)
    1832             :         {
    1833          12 :           forced_rescan_count = sigusr1;
    1834          12 :           rescan_timer = 0;
    1835             :         }
    1836          22 :       if (rescan_timer == 0)
    1837             :         try
    1838             :           {
    1839          16 :             set_metric("thread_working", "file", dir, time(NULL));
    1840          16 :             inc_metric("thread_work_total", "file", dir);
    1841          16 :             scan_source_file_path (dir);
    1842          16 :             set_metric("thread_working", "file", dir, 0);
    1843             :           }
    1844           0 :         catch (const sqlite_exception& e)
    1845             :           {
    1846           0 :             obatched(cerr) << e.message << endl;
    1847             :           }
    1848          22 :       sleep (1);
    1849          22 :       rescan_timer ++;
    1850             :     }
    1851             : 
    1852           8 :   return 0;
    1853             : }
    1854             : 
    1855             : 
    1856             : ////////////////////////////////////////////////////////////////////////
    1857             : 
    1858             : 
    1859             : 
    1860             : 
    1861             : // Analyze given *.rpm file of given age; record buildids / exec/debuginfo-ness of its
    1862             : // constituent files with given upsert statements.
    1863             : static void
    1864          14 : rpm_classify (const string& rps, sqlite_ps& ps_upsert_buildids, sqlite_ps& ps_upsert_files,
    1865             :               sqlite_ps& ps_upsert_de, sqlite_ps& ps_upsert_sref, sqlite_ps& ps_upsert_sdef,
    1866             :               time_t mtime,
    1867             :               unsigned& fts_executable, unsigned& fts_debuginfo, unsigned& fts_sref, unsigned& fts_sdef,
    1868             :               bool& fts_sref_complete_p)
    1869             : {
    1870          28 :   string popen_cmd = string("rpm2cpio " + shell_escape(rps));
    1871          14 :   FILE* fp = popen (popen_cmd.c_str(), "r"); // "e" O_CLOEXEC?
    1872          14 :   if (fp == NULL)
    1873           0 :     throw libc_exception (errno, string("popen ") + popen_cmd);
    1874          28 :   defer_dtor<FILE*,int> fp_closer (fp, pclose);
    1875             : 
    1876             :   struct archive *a;
    1877          14 :   a = archive_read_new();
    1878          14 :   if (a == NULL)
    1879           0 :     throw archive_exception("cannot create archive reader");
    1880          14 :   defer_dtor<struct archive*,int> archive_closer (a, archive_read_free);
    1881             : 
    1882          14 :   int rc = archive_read_support_format_cpio(a);
    1883          14 :   if (rc != ARCHIVE_OK)
    1884           0 :     throw archive_exception(a, "cannot select cpio format");
    1885          14 :   rc = archive_read_support_filter_all(a);
    1886          14 :   if (rc != ARCHIVE_OK)
    1887           0 :     throw archive_exception(a, "cannot select all filters");
    1888             : 
    1889          14 :   rc = archive_read_open_FILE (a, fp);
    1890          14 :   if (rc != ARCHIVE_OK)
    1891           0 :     throw archive_exception(a, "cannot open archive from rpm2cpio pipe");
    1892             : 
    1893          14 :   if (verbose > 3)
    1894           0 :     obatched(clog) << "rpm2cpio|libarchive scanning " << rps << endl;
    1895             : 
    1896             :   while(1) // parse cpio archive entries
    1897             :     {
    1898             :       try
    1899             :         {
    1900             :           struct archive_entry *e;
    1901          88 :           rc = archive_read_next_header (a, &e);
    1902          88 :           if (rc != ARCHIVE_OK)
    1903          14 :             break;
    1904             : 
    1905          74 :           if (! S_ISREG(archive_entry_mode (e))) // skip non-files completely
    1906          51 :             continue;
    1907             : 
    1908          46 :           string fn = archive_entry_pathname (e);
    1909          23 :           if (fn.size() > 1 && fn[0] == '.')
    1910          17 :             fn = fn.substr(1); // trim off the leading '.'
    1911             : 
    1912          23 :           if (verbose > 3)
    1913           0 :             obatched(clog) << "rpm2cpio|libarchive checking " << fn << endl;
    1914             : 
    1915             :           // extract this file to a temporary file
    1916          23 :           const char *tmpdir_env = getenv ("TMPDIR") ?: "/tmp";
    1917          23 :           char* tmppath = NULL;
    1918          23 :           rc = asprintf (&tmppath, "%s/debuginfod.XXXXXX", tmpdir_env);
    1919          23 :           if (rc < 0)
    1920           0 :             throw libc_exception (ENOMEM, "cannot allocate tmppath");
    1921          46 :           defer_dtor<void*,void> tmmpath_freer (tmppath, free);
    1922          23 :           int fd = mkstemp (tmppath);
    1923          23 :           if (fd < 0)
    1924           0 :             throw libc_exception (errno, "cannot create temporary file");
    1925          23 :           unlink (tmppath); // unlink now so OS will release the file as soon as we close the fd
    1926          46 :           defer_dtor<int,int> minifd_closer (fd, close);
    1927             : 
    1928          23 :           rc = archive_read_data_into_fd (a, fd);
    1929          23 :           if (rc != ARCHIVE_OK)
    1930           0 :             throw archive_exception(a, "cannot extract file");
    1931             : 
    1932             :           // finally ... time to run elf_classify on this bad boy and update the database
    1933          23 :           bool executable_p = false, debuginfo_p = false;
    1934          46 :           string buildid;
    1935          46 :           set<string> sourcefiles;
    1936          23 :           elf_classify (fd, executable_p, debuginfo_p, buildid, sourcefiles);
    1937             :           // NB: might throw
    1938             : 
    1939          23 :           if (buildid != "") // intern buildid
    1940             :             {
    1941             :               ps_upsert_buildids
    1942          14 :                 .reset()
    1943          14 :                 .bind(1, buildid)
    1944          14 :                 .step_ok_done();
    1945             :             }
    1946             : 
    1947             :           ps_upsert_files // register this rpm constituent file name in interning table
    1948          23 :             .reset()
    1949          23 :             .bind(1, fn)
    1950          23 :             .step_ok_done();
    1951             : 
    1952          23 :           if (sourcefiles.size() > 0) // sref records needed
    1953             :             {
    1954             :               // NB: we intern each source file once.  Once raw, as it
    1955             :               // appears in the DWARF file list coming back from
    1956             :               // elf_classify() - because it'll end up in the
    1957             :               // _norm.artifactsrc column.  We don't also put another
    1958             :               // version with a '.' at the front, even though that's
    1959             :               // how rpm/cpio packs names, because we hide that from
    1960             :               // the database for storage efficiency.
    1961             : 
    1962          18 :               for (auto&& s : sourcefiles)
    1963             :                 {
    1964          12 :                   if (s == "")
    1965             :                     {
    1966           0 :                       fts_sref_complete_p = false;
    1967           0 :                       continue;
    1968             :                     }
    1969             : 
    1970             :                   ps_upsert_files
    1971          12 :                     .reset()
    1972          12 :                     .bind(1, s)
    1973          12 :                     .step_ok_done();
    1974             : 
    1975             :                   ps_upsert_sref
    1976          12 :                     .reset()
    1977          12 :                     .bind(1, buildid)
    1978          12 :                     .bind(2, s)
    1979          12 :                     .step_ok_done();
    1980             : 
    1981          12 :                   fts_sref ++;
    1982             :                 }
    1983             :             }
    1984             : 
    1985          23 :           if (executable_p)
    1986           6 :             fts_executable ++;
    1987          23 :           if (debuginfo_p)
    1988           8 :             fts_debuginfo ++;
    1989             : 
    1990          23 :           if (executable_p || debuginfo_p)
    1991             :             {
    1992             :               ps_upsert_de
    1993          14 :                 .reset()
    1994          14 :                 .bind(1, buildid)
    1995          14 :                 .bind(2, debuginfo_p ? 1 : 0)
    1996          14 :                 .bind(3, executable_p ? 1 : 0)
    1997          14 :                 .bind(4, rps)
    1998          14 :                 .bind(5, mtime)
    1999          14 :                 .bind(6, fn)
    2000          14 :                 .step_ok_done();
    2001             :             }
    2002             :           else // potential source - sdef record
    2003             :             {
    2004           9 :               fts_sdef ++;
    2005             :               ps_upsert_sdef
    2006           9 :                 .reset()
    2007           9 :                 .bind(1, rps)
    2008           9 :                 .bind(2, mtime)
    2009           9 :                 .bind(3, fn)
    2010           9 :                 .step_ok_done();
    2011             :             }
    2012             : 
    2013          23 :           if ((verbose > 2) && (executable_p || debuginfo_p))
    2014           0 :             obatched(clog) << "recorded buildid=" << buildid << " rpm=" << rps << " file=" << fn
    2015           0 :                            << " mtime=" << mtime << " atype="
    2016             :                            << (executable_p ? "E" : "")
    2017             :                            << (debuginfo_p ? "D" : "")
    2018           0 :                            << " sourcefiles=" << sourcefiles.size() << endl;
    2019             : 
    2020             :         }
    2021           0 :       catch (const reportable_exception& e)
    2022             :         {
    2023           0 :           e.report(clog);
    2024             :         }
    2025          74 :     }
    2026          14 : }
    2027             : 
    2028             : 
    2029             : 
    2030             : // scan for *.rpm files
    2031             : static void
    2032          15 : scan_source_rpm_path (const string& dir)
    2033             : {
    2034          15 :   obatched(clog) << "fts/rpm traversing " << dir << endl;
    2035             : 
    2036          30 :   sqlite_ps ps_upsert_buildids (db, "rpm-buildid-intern", "insert or ignore into " BUILDIDS "_buildids VALUES (NULL, ?);");
    2037          30 :   sqlite_ps ps_upsert_files (db, "rpm-file-intern", "insert or ignore into " BUILDIDS "_files VALUES (NULL, ?);");
    2038             :   sqlite_ps ps_upsert_de (db, "rpm-de-insert",
    2039             :                           "insert or ignore into " BUILDIDS "_r_de (buildid, debuginfo_p, executable_p, file, mtime, content) values ("
    2040             :                           "(select id from " BUILDIDS "_buildids where hex = ?), ?, ?, "
    2041             :                           "(select id from " BUILDIDS "_files where name = ?), ?, "
    2042          30 :                           "(select id from " BUILDIDS "_files where name = ?));");
    2043             :   sqlite_ps ps_upsert_sref (db, "rpm-sref-insert",
    2044             :                             "insert or ignore into " BUILDIDS "_r_sref (buildid, artifactsrc) values ("
    2045             :                             "(select id from " BUILDIDS "_buildids where hex = ?), "
    2046          30 :                             "(select id from " BUILDIDS "_files where name = ?));");
    2047             :   sqlite_ps ps_upsert_sdef (db, "rpm-sdef-insert",
    2048             :                             "insert or ignore into " BUILDIDS "_r_sdef (file, mtime, content) values ("
    2049             :                             "(select id from " BUILDIDS "_files where name = ?), ?,"
    2050          30 :                             "(select id from " BUILDIDS "_files where name = ?));");
    2051             :   sqlite_ps ps_query (db, "rpm-negativehit-query",
    2052             :                       "select 1 from " BUILDIDS "_file_mtime_scanned where "
    2053          30 :                       "sourcetype = 'R' and file = (select id from " BUILDIDS "_files where name = ?) and mtime = ?;");
    2054             :   sqlite_ps ps_scan_done (db, "rpm-scanned",
    2055             :                           "insert or ignore into " BUILDIDS "_file_mtime_scanned (sourcetype, file, mtime, size)"
    2056          30 :                           "values ('R', (select id from " BUILDIDS "_files where name = ?), ?, ?);");
    2057             : 
    2058          15 :   char * const dirs[] = { (char*) dir.c_str(), NULL };
    2059             : 
    2060             :   struct timeval tv_start, tv_end;
    2061          15 :   gettimeofday (&tv_start, NULL);
    2062          15 :   unsigned fts_scanned=0, fts_regex=0, fts_cached=0, fts_debuginfo=0;
    2063          15 :   unsigned fts_executable=0, fts_rpm = 0, fts_sref=0, fts_sdef=0;
    2064             : 
    2065          15 :   FTS *fts = fts_open (dirs,
    2066             :                        (traverse_logical ? FTS_LOGICAL : FTS_PHYSICAL|FTS_XDEV)
    2067             :                        | FTS_NOCHDIR /* multithreaded */,
    2068             :                        NULL);
    2069          15 :   if (fts == NULL)
    2070             :     {
    2071           0 :       obatched(cerr) << "cannot fts_open " << dir << endl;
    2072           0 :       return;
    2073             :     }
    2074             : 
    2075             :   FTSENT *f;
    2076         107 :   while ((f = fts_read (fts)) != NULL)
    2077             :     {
    2078          92 :       semaphore_borrower handle_one_file (scan_concurrency_sem);
    2079             : 
    2080          92 :       fts_scanned ++;
    2081          92 :       if (interrupted)
    2082           0 :         break;
    2083             : 
    2084          92 :       if (verbose > 2)
    2085           0 :         obatched(clog) << "fts/rpm traversing " << f->fts_path << endl;
    2086             : 
    2087             :       try
    2088             :         {
    2089             :           /* Found a file.  Convert it to an absolute path, so
    2090             :              the buildid database does not have relative path
    2091             :              names that are unresolvable from a subsequent run
    2092             :              in a different cwd. */
    2093          92 :           char *rp = realpath(f->fts_path, NULL);
    2094          92 :           if (rp == NULL)
    2095          27 :             continue; // ignore dangling symlink or such
    2096          92 :           string rps = string(rp);
    2097          92 :           free (rp);
    2098             : 
    2099          92 :           bool ri = !regexec (&file_include_regex, rps.c_str(), 0, 0, 0);
    2100          92 :           bool rx = !regexec (&file_exclude_regex, rps.c_str(), 0, 0, 0);
    2101          92 :           if (!ri || rx)
    2102             :             {
    2103           0 :               if (verbose > 3)
    2104           0 :                 obatched(clog) << "fts/rpm skipped by regex " << (!ri ? "I" : "") << (rx ? "X" : "") << endl;
    2105           0 :               fts_regex ++;
    2106           0 :               continue;
    2107             :             }
    2108             : 
    2109          92 :           switch (f->fts_info)
    2110             :             {
    2111          23 :             case FTS_D:
    2112          23 :               break;
    2113             : 
    2114          23 :             case FTS_DP:
    2115          23 :               break;
    2116             : 
    2117          41 :             case FTS_F:
    2118             :               {
    2119             :                 // heuristic: reject if file name does not end with ".rpm"
    2120             :                 // (alternative: try opening with librpm etc., caching)
    2121          41 :                 string suffix = ".rpm";
    2122          82 :                 if (rps.size() < suffix.size() ||
    2123          82 :                     rps.substr(rps.size()-suffix.size()) != suffix)
    2124          13 :                   continue;
    2125          28 :                 fts_rpm ++;
    2126             : 
    2127             :                 /* See if we know of it already. */
    2128             :                 int rc = ps_query
    2129          28 :                   .reset()
    2130          28 :                   .bind(1, rps)
    2131          28 :                   .bind(2, f->fts_statp->st_mtime)
    2132          28 :                   .step();
    2133          28 :                 ps_query.reset();
    2134          28 :                 if (rc == SQLITE_ROW) // i.e., a result, as opposed to DONE (no results)
    2135             :                   // no need to recheck a file/version we already know
    2136             :                   // specifically, no need to parse this rpm again, since we already have
    2137             :                   // it as a D or E or S record,
    2138             :                   // (so is stored with buildid=NULL)
    2139             :                   {
    2140          14 :                     fts_cached ++;
    2141          14 :                     continue;
    2142             :                   }
    2143             : 
    2144             :                 // intern the rpm file name
    2145             :                 ps_upsert_files
    2146          14 :                   .reset()
    2147          14 :                   .bind(1, rps)
    2148          14 :                   .step_ok_done();
    2149             : 
    2150             :                 // extract the rpm contents via popen("rpm2cpio") | libarchive | loop-of-elf_classify()
    2151          14 :                 unsigned my_fts_executable = 0, my_fts_debuginfo = 0, my_fts_sref = 0, my_fts_sdef = 0;
    2152          14 :                 bool my_fts_sref_complete_p = true;
    2153             :                 try
    2154             :                   {
    2155          14 :                     rpm_classify (rps,
    2156             :                                   ps_upsert_buildids, ps_upsert_files,
    2157             :                                   ps_upsert_de, ps_upsert_sref, ps_upsert_sdef, // dalt
    2158          14 :                                   f->fts_statp->st_mtime,
    2159             :                                   my_fts_executable, my_fts_debuginfo, my_fts_sref, my_fts_sdef,
    2160             :                                   my_fts_sref_complete_p);
    2161          14 :                     inc_metric ("scanned_total","source","rpm");
    2162          14 :                     add_metric("found_debuginfo_total","source","rpm",
    2163             :                                my_fts_debuginfo);
    2164          14 :                     add_metric("found_executable_total","source","rpm",
    2165             :                                my_fts_executable);
    2166          14 :                     add_metric("found_sourcerefs_total","source","rpm",
    2167             :                                my_fts_sref);
    2168             :                   }
    2169           0 :                 catch (const reportable_exception& e)
    2170             :                   {
    2171           0 :                     e.report(clog);
    2172             :                   }
    2173             : 
    2174          14 :                 if (verbose > 2)
    2175           0 :                   obatched(clog) << "scanned rpm=" << rps
    2176           0 :                                  << " mtime=" << f->fts_statp->st_mtime
    2177           0 :                                  << " executables=" << my_fts_executable
    2178           0 :                                  << " debuginfos=" << my_fts_debuginfo
    2179           0 :                                  << " srefs=" << my_fts_sref
    2180           0 :                                  << " sdefs=" << my_fts_sdef
    2181           0 :                                  << endl;
    2182             :  
    2183          14 :                 fts_executable += my_fts_executable;
    2184          14 :                 fts_debuginfo += my_fts_debuginfo;
    2185          14 :                 fts_sref += my_fts_sref;
    2186          14 :                 fts_sdef += my_fts_sdef;
    2187             : 
    2188          14 :                 if (my_fts_sref_complete_p) // leave incomplete?
    2189             :                   ps_scan_done
    2190          14 :                     .reset()
    2191          14 :                     .bind(1, rps)
    2192          14 :                     .bind(2, f->fts_statp->st_mtime)
    2193          14 :                     .bind(3, f->fts_statp->st_size)
    2194          55 :                     .step_ok_done();
    2195             :               }
    2196          14 :               break;
    2197             : 
    2198           0 :             case FTS_ERR:
    2199             :             case FTS_NS:
    2200           0 :               throw libc_exception(f->fts_errno, string("fts/rpm traversal ") + string(f->fts_path));
    2201             : 
    2202           5 :             default:
    2203             :             case FTS_SL: /* ignore symlinks; seen in non-L mode only */
    2204           5 :               break;
    2205             :             }
    2206             : 
    2207          65 :           if ((verbose && f->fts_info == FTS_DP) ||
    2208          65 :               (verbose > 1 && f->fts_info == FTS_F))
    2209           0 :             obatched(clog) << "fts/rpm traversing " << rps << ", scanned=" << fts_scanned
    2210           0 :                            << ", regex-skipped=" << fts_regex
    2211           0 :                            << ", rpm=" << fts_rpm << ", cached=" << fts_cached << ", debuginfo=" << fts_debuginfo
    2212           0 :                            << ", executable=" << fts_executable
    2213           0 :                            << ", sourcerefs=" << fts_sref << ", sourcedefs=" << fts_sdef << endl;
    2214             :         }
    2215           0 :       catch (const reportable_exception& e)
    2216             :         {
    2217           0 :           e.report(clog);
    2218             :         }
    2219             :     }
    2220          15 :   fts_close (fts);
    2221             : 
    2222          15 :   gettimeofday (&tv_end, NULL);
    2223          15 :   double deltas = (tv_end.tv_sec - tv_start.tv_sec) + (tv_end.tv_usec - tv_start.tv_usec)*0.000001;
    2224             : 
    2225          30 :   obatched(clog) << "fts/rpm traversed " << dir << " in " << deltas << "s, scanned=" << fts_scanned
    2226          15 :                  << ", regex-skipped=" << fts_regex
    2227          15 :                  << ", rpm=" << fts_rpm << ", cached=" << fts_cached << ", debuginfo=" << fts_debuginfo
    2228          15 :                  << ", executable=" << fts_executable
    2229          15 :                  << ", sourcerefs=" << fts_sref << ", sourcedefs=" << fts_sdef << endl;
    2230             : }
    2231             : 
    2232             : 
    2233             : 
    2234             : static void*
    2235           3 : thread_main_scan_source_rpm_path (void* arg)
    2236             : {
    2237           3 :   string dir = string((const char*) arg);
    2238             : 
    2239           3 :   unsigned rescan_timer = 0;
    2240           3 :   sig_atomic_t forced_rescan_count = 0;
    2241           3 :   set_metric("thread_timer_max", "rpm", dir, rescan_s);
    2242           3 :   set_metric("thread_tid", "rpm", dir, tid());
    2243          24 :   while (! interrupted)
    2244             :     {
    2245          21 :       set_metric("thread_timer", "rpm", dir, rescan_timer);
    2246          21 :       set_metric("thread_forced_total", "rpm", dir, forced_rescan_count);
    2247          21 :       if (rescan_s && rescan_timer > rescan_s)
    2248           0 :         rescan_timer = 0;
    2249          21 :       if (sigusr1 != forced_rescan_count)
    2250             :         {
    2251          12 :           forced_rescan_count = sigusr1;
    2252          12 :           rescan_timer = 0;
    2253             :         }
    2254          21 :       if (rescan_timer == 0)
    2255             :         try
    2256             :           {
    2257          15 :             set_metric("thread_working", "rpm", dir, time(NULL));
    2258          15 :             inc_metric("thread_work_total", "rpm", dir);
    2259          15 :             scan_source_rpm_path (dir);
    2260          15 :             set_metric("thread_working", "rpm", dir, 0);
    2261             :           }
    2262           0 :         catch (const sqlite_exception& e)
    2263             :           {
    2264           0 :             obatched(cerr) << e.message << endl;
    2265             :           }
    2266          21 :       sleep (1);
    2267          21 :       rescan_timer ++;
    2268             :     }
    2269             : 
    2270           6 :   return 0;
    2271             : }
    2272             : 
    2273             : 
    2274             : ////////////////////////////////////////////////////////////////////////
    2275             : 
    2276             : static void
    2277           3 : database_stats_report()
    2278             : {
    2279             :   sqlite_ps ps_query (db, "database-overview",
    2280           9 :                       "select label,quantity from " BUILDIDS "_stats");
    2281             : 
    2282           3 :   obatched(clog) << "database record counts:" << endl;
    2283             :   while (1)
    2284             :     {
    2285          33 :       int rc = sqlite3_step (ps_query);
    2286          33 :       if (rc == SQLITE_DONE) break;
    2287          30 :       if (rc != SQLITE_ROW)
    2288           0 :         throw sqlite_exception(rc, "step");
    2289             : 
    2290          60 :       obatched(clog)
    2291          60 :         << right << setw(20) << ((const char*) sqlite3_column_text(ps_query, 0) ?: (const char*) "NULL")
    2292             :         << " "
    2293          60 :         << (sqlite3_column_text(ps_query, 1) ?: (const unsigned char*) "NULL")
    2294          30 :         << endl;
    2295             : 
    2296          30 :       set_metric("groom", "statistic",
    2297          30 :                  ((const char*) sqlite3_column_text(ps_query, 0) ?: (const char*) "NULL"),
    2298          30 :                  (sqlite3_column_double(ps_query, 1)));
    2299          30 :     }
    2300           3 : }
    2301             : 
    2302             : 
    2303             : // Do a round of database grooming that might take many minutes to run.
    2304           3 : void groom()
    2305             : {
    2306           3 :   obatched(clog) << "grooming database" << endl;
    2307             : 
    2308             :   struct timeval tv_start, tv_end;
    2309           3 :   gettimeofday (&tv_start, NULL);
    2310             : 
    2311             :   // scan for files that have disappeared
    2312             :   sqlite_ps files (db, "check old files", "select s.mtime, s.file, f.name from "
    2313             :                        BUILDIDS "_file_mtime_scanned s, " BUILDIDS "_files f "
    2314           9 :                        "where f.id = s.file");
    2315           9 :   sqlite_ps files_del_f_de (db, "nuke f_de", "delete from " BUILDIDS "_f_de where file = ? and mtime = ?");
    2316           9 :   sqlite_ps files_del_r_de (db, "nuke r_de", "delete from " BUILDIDS "_r_de where file = ? and mtime = ?");
    2317             :   sqlite_ps files_del_scan (db, "nuke f_m_s", "delete from " BUILDIDS "_file_mtime_scanned "
    2318           9 :                             "where file = ? and mtime = ?");
    2319           3 :   files.reset();
    2320             :   while(1)
    2321             :     {
    2322          31 :       int rc = files.step();
    2323          31 :       if (rc != SQLITE_ROW)
    2324           3 :         break;
    2325             : 
    2326          28 :       int64_t mtime = sqlite3_column_int64 (files, 0);
    2327          28 :       int64_t fileid = sqlite3_column_int64 (files, 1);
    2328          28 :       const char* filename = ((const char*) sqlite3_column_text (files, 2) ?: "");
    2329             :       struct stat s;
    2330          28 :       rc = stat(filename, &s);
    2331          28 :       if (rc < 0 || (mtime != (int64_t) s.st_mtime))
    2332             :         {
    2333           4 :           if (verbose > 2)
    2334           0 :             obatched(clog) << "groom: forgetting file=" << filename << " mtime=" << mtime << endl;
    2335           4 :           files_del_f_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
    2336           4 :           files_del_r_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
    2337           4 :           files_del_scan.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
    2338             :         }
    2339          28 :     }
    2340           3 :   files.reset();
    2341             : 
    2342             :   // delete buildids with no references in _r_de or _f_de tables;
    2343             :   // cascades to _r_sref & _f_s records
    2344             :   sqlite_ps buildids_del (db, "nuke orphan buildids",
    2345             :                           "delete from " BUILDIDS "_buildids "
    2346             :                           "where not exists (select 1 from " BUILDIDS "_f_de d where " BUILDIDS "_buildids.id = d.buildid) "
    2347           9 :                           "and not exists (select 1 from " BUILDIDS "_r_de d where " BUILDIDS "_buildids.id = d.buildid)");
    2348           3 :   buildids_del.reset().step_ok_done();
    2349             : 
    2350             :   // NB: "vacuum" is too heavy for even daily runs: it rewrites the entire db, so is done as maxigroom -G
    2351           9 :   sqlite_ps g1 (db, "incremental vacuum", "pragma incremental_vacuum");
    2352           3 :   g1.reset().step_ok_done();
    2353           9 :   sqlite_ps g2 (db, "optimize", "pragma optimize");
    2354           3 :   g2.reset().step_ok_done();
    2355           6 :   sqlite_ps g3 (db, "wal checkpoint", "pragma wal_checkpoint=truncate");
    2356           3 :   g3.reset().step_ok_done();
    2357             : 
    2358           3 :   database_stats_report();
    2359             : 
    2360           3 :   sqlite3_db_release_memory(db); // shrink the process if possible
    2361             : 
    2362           3 :   gettimeofday (&tv_end, NULL);
    2363           3 :   double deltas = (tv_end.tv_sec - tv_start.tv_sec) + (tv_end.tv_usec - tv_start.tv_usec)*0.000001;
    2364             : 
    2365           3 :   obatched(clog) << "groomed database in " << deltas << "s" << endl;
    2366           3 : }
    2367             : 
    2368             : 
    2369             : static void*
    2370           2 : thread_main_groom (void* /*arg*/)
    2371             : {
    2372           2 :   unsigned groom_timer = 0;
    2373           2 :   sig_atomic_t forced_groom_count = 0;
    2374           2 :   set_metric("thread_timer_max", "role", "groom", groom_s);
    2375           2 :   set_metric("thread_tid", "role", "groom", tid());
    2376          10 :   while (! interrupted)
    2377             :     {
    2378           8 :       set_metric("thread_timer", "role", "groom", groom_timer);
    2379           8 :       set_metric("thread_forced_total", "role", "groom", forced_groom_count);      
    2380           8 :       if (groom_s && groom_timer > groom_s)
    2381           0 :         groom_timer = 0;
    2382           8 :       if (sigusr2 != forced_groom_count)
    2383             :         {
    2384           1 :           forced_groom_count = sigusr2;
    2385           1 :           groom_timer = 0;
    2386             :         }
    2387           8 :       if (groom_timer == 0)
    2388             :         try
    2389             :           {
    2390           3 :             set_metric("thread_working", "role", "groom", time(NULL));
    2391           3 :             inc_metric("thread_work_total", "role", "groom");
    2392           3 :             groom ();
    2393           3 :             set_metric("thread_working", "role", "groom", 0);
    2394             :           }
    2395           0 :         catch (const sqlite_exception& e)
    2396             :           {
    2397           0 :             obatched(cerr) << e.message << endl;
    2398             :           }
    2399           8 :       sleep (1);
    2400           8 :       groom_timer ++;
    2401             :     }
    2402             : 
    2403           2 :   return 0;
    2404             : }
    2405             : 
    2406             : 
    2407             : ////////////////////////////////////////////////////////////////////////
    2408             : 
    2409             : 
    2410             : static void
    2411           2 : signal_handler (int /* sig */)
    2412             : {
    2413           2 :   interrupted ++;
    2414             : 
    2415           2 :   if (db)
    2416           2 :     sqlite3_interrupt (db);
    2417             : 
    2418             :   // NB: don't do anything else in here
    2419           2 : }
    2420             : 
    2421             : static void
    2422           4 : sigusr1_handler (int /* sig */)
    2423             : {
    2424           4 :    sigusr1 ++;
    2425             :   // NB: don't do anything else in here
    2426           4 : }
    2427             : 
    2428             : static void
    2429           1 : sigusr2_handler (int /* sig */)
    2430             : {
    2431           1 :    sigusr2 ++;
    2432             :   // NB: don't do anything else in here
    2433           1 : }
    2434             : 
    2435             : 
    2436             : 
    2437             : 
    2438             : 
    2439             : // A user-defined sqlite function, to score the sharedness of the
    2440             : // prefix of two strings.  This is used to compare candidate debuginfo
    2441             : // / source-rpm names, so that the closest match
    2442             : // (directory-topology-wise closest) is found.  This is important in
    2443             : // case the same sref (source file name) is in many -debuginfo or
    2444             : // -debugsource RPMs, such as when multiple versions/releases of the
    2445             : // same package are in the database.
    2446             : 
    2447          22 : static void sqlite3_sharedprefix_fn (sqlite3_context* c, int argc, sqlite3_value** argv)
    2448             : {
    2449          22 :   if (argc != 2)
    2450           0 :     sqlite3_result_error(c, "expect 2 string arguments", -1);
    2451          44 :   else if ((sqlite3_value_type(argv[0]) != SQLITE_TEXT) ||
    2452          22 :            (sqlite3_value_type(argv[1]) != SQLITE_TEXT))
    2453           2 :     sqlite3_result_null(c);
    2454             :   else
    2455             :     {
    2456          20 :       const unsigned char* a = sqlite3_value_text (argv[0]);
    2457          20 :       const unsigned char* b = sqlite3_value_text (argv[1]);
    2458          20 :       int i = 0;
    2459        1614 :       while (*a++ == *b++)
    2460        1594 :         i++;
    2461          20 :       sqlite3_result_int (c, i);
    2462             :     }
    2463          22 : }
    2464             : 
    2465             : 
    2466             : int
    2467           2 : main (int argc, char *argv[])
    2468             : {
    2469           2 :   (void) setlocale (LC_ALL, "");
    2470           2 :   (void) bindtextdomain (PACKAGE_TARNAME, LOCALEDIR);
    2471           2 :   (void) textdomain (PACKAGE_TARNAME);
    2472             : 
    2473             :   /* Tell the library which version we are expecting.  */
    2474           2 :   elf_version (EV_CURRENT);
    2475             : 
    2476             :   /* Set computed default values. */
    2477           2 :   db_path = string(getenv("HOME") ?: "/") + string("/.debuginfod.sqlite"); /* XDG? */
    2478           2 :   int rc = regcomp (& file_include_regex, ".*", REG_EXTENDED|REG_NOSUB); // match everything
    2479           2 :   if (rc != 0)
    2480             :     error (EXIT_FAILURE, 0, "regcomp failure: %d", rc);
    2481           2 :   rc = regcomp (& file_exclude_regex, "^$", REG_EXTENDED|REG_NOSUB); // match nothing
    2482           2 :   if (rc != 0)
    2483             :     error (EXIT_FAILURE, 0, "regcomp failure: %d", rc);
    2484             : 
    2485             :   /* Parse and process arguments.  */
    2486             :   int remaining;
    2487           2 :   argp_program_version_hook = print_version; // this works
    2488           2 :   (void) argp_parse (&argp, argc, argv, ARGP_IN_ORDER, &remaining, NULL);
    2489           2 :   if (remaining != argc)
    2490           0 :       error (EXIT_FAILURE, 0,
    2491           0 :              "unexpected argument: %s", argv[remaining]);
    2492             : 
    2493           2 :   if (!scan_rpms && !scan_files && source_paths.size()>0)
    2494           0 :     obatched(clog) << "warning: without -F and/or -R, ignoring PATHs" << endl;
    2495             : 
    2496           2 :   (void) signal (SIGPIPE, SIG_IGN); // microhttpd can generate it incidentally, ignore
    2497           2 :   (void) signal (SIGINT, signal_handler); // ^C
    2498           2 :   (void) signal (SIGHUP, signal_handler); // EOF
    2499           2 :   (void) signal (SIGTERM, signal_handler); // systemd
    2500           2 :   (void) signal (SIGUSR1, sigusr1_handler); // end-user
    2501           2 :   (void) signal (SIGUSR2, sigusr2_handler); // end-user
    2502             : 
    2503             :   // do this before any threads start
    2504           2 :   scan_concurrency_sem = new semaphore(concurrency);
    2505             : 
    2506             :   /* Get database ready. */
    2507           2 :   rc = sqlite3_open_v2 (db_path.c_str(), &db, (SQLITE_OPEN_READWRITE
    2508             :                                                |SQLITE_OPEN_CREATE
    2509             :                                                |SQLITE_OPEN_FULLMUTEX), /* thread-safe */
    2510             :                         NULL);
    2511           2 :   if (rc == SQLITE_CORRUPT)
    2512             :     {
    2513           0 :       (void) unlink (db_path.c_str());
    2514           0 :       error (EXIT_FAILURE, 0,
    2515             :              "cannot open %s, deleted database: %s", db_path.c_str(), sqlite3_errmsg(db));
    2516             :     }
    2517           2 :   else if (rc)
    2518             :     {
    2519           0 :       error (EXIT_FAILURE, 0,
    2520             :              "cannot open %s, consider deleting database: %s", db_path.c_str(), sqlite3_errmsg(db));
    2521             :     }
    2522             : 
    2523           2 :   obatched(clog) << "opened database " << db_path << endl;
    2524           2 :   obatched(clog) << "sqlite version " << sqlite3_version << endl;
    2525             : 
    2526             :   // add special string-prefix-similarity function used in rpm sref/sdef resolution
    2527           2 :   rc = sqlite3_create_function(db, "sharedprefix", 2, SQLITE_UTF8, NULL,
    2528             :                                & sqlite3_sharedprefix_fn, NULL, NULL);
    2529           2 :   if (rc != SQLITE_OK)
    2530           0 :     error (EXIT_FAILURE, 0,
    2531             :            "cannot create sharedprefix( function: %s", sqlite3_errmsg(db));
    2532             : 
    2533           2 :   if (verbose > 3)
    2534           0 :     obatched(clog) << "ddl: " << DEBUGINFOD_SQLITE_DDL << endl;
    2535           2 :   rc = sqlite3_exec (db, DEBUGINFOD_SQLITE_DDL, NULL, NULL, NULL);
    2536           2 :   if (rc != SQLITE_OK)
    2537             :     {
    2538           0 :       error (EXIT_FAILURE, 0,
    2539             :              "cannot run database schema ddl: %s", sqlite3_errmsg(db));
    2540             :     }
    2541             : 
    2542             :   // Start httpd server threads.  Separate pool for IPv4 and IPv6, in
    2543             :   // case the host only has one protocol stack.
    2544           2 :   MHD_Daemon *d4 = MHD_start_daemon (MHD_USE_THREAD_PER_CONNECTION
    2545             : #if MHD_VERSION >= 0x00095300
    2546             :                                      | MHD_USE_INTERNAL_POLLING_THREAD
    2547             : #else
    2548             :                                      | MHD_USE_SELECT_INTERNALLY
    2549             : #endif
    2550             :                                      | MHD_USE_DEBUG, /* report errors to stderr */
    2551             :                                      http_port,
    2552             :                                      NULL, NULL, /* default accept policy */
    2553             :                                      handler_cb, NULL, /* handler callback */
    2554             :                                      MHD_OPTION_END);
    2555           2 :   MHD_Daemon *d6 = MHD_start_daemon (MHD_USE_THREAD_PER_CONNECTION
    2556             : #if MHD_VERSION >= 0x00095300
    2557             :                                      | MHD_USE_INTERNAL_POLLING_THREAD
    2558             : #else
    2559             :                                      | MHD_USE_SELECT_INTERNALLY
    2560             : #endif
    2561             :                                      | MHD_USE_IPv6
    2562             :                                      | MHD_USE_DEBUG, /* report errors to stderr */
    2563             :                                      http_port,
    2564             :                                      NULL, NULL, /* default accept policy */
    2565             :                                      handler_cb, NULL, /* handler callback */
    2566             :                                      MHD_OPTION_END);
    2567             : 
    2568           2 :   if (d4 == NULL && d6 == NULL) // neither ipv4 nor ipv6? boo
    2569             :     {
    2570           0 :       sqlite3 *database = db;
    2571           0 :       db = 0; // for signal_handler not to freak
    2572           0 :       sqlite3_close (database);
    2573           0 :       error (EXIT_FAILURE, 0, "cannot start http server at port %d", http_port);
    2574             :     }
    2575             : 
    2576           4 :   obatched(clog) << "started http server on "
    2577             :                  << (d4 != NULL ? "IPv4 " : "")
    2578             :                  << (d6 != NULL ? "IPv6 " : "")
    2579           2 :                  << "port=" << http_port << endl;
    2580             : 
    2581             :   // add maxigroom sql if -G given
    2582           2 :   if (maxigroom)
    2583             :     {
    2584           0 :       obatched(clog) << "maxigrooming database, please wait." << endl;
    2585           0 :       extra_ddl.push_back("create index if not exists " BUILDIDS "_r_sref_arc on " BUILDIDS "_r_sref(artifactsrc);");
    2586           0 :       extra_ddl.push_back("delete from " BUILDIDS "_r_sdef where not exists (select 1 from " BUILDIDS "_r_sref b where " BUILDIDS "_r_sdef.content = b.artifactsrc);");
    2587           0 :       extra_ddl.push_back("drop index if exists " BUILDIDS "_r_sref_arc;");
    2588             : 
    2589             :       // NB: we don't maxigroom the _files interning table.  It'd require a temp index on all the
    2590             :       // tables that have file foreign-keys, which is a lot.
    2591             : 
    2592             :       // NB: with =delete, may take up 3x disk space total during vacuum process
    2593             :       //     vs.  =off (only 2x but may corrupt database if program dies mid-vacuum)
    2594             :       //     vs.  =wal (>3x observed, but safe)
    2595           0 :       extra_ddl.push_back("pragma journal_mode=delete;");
    2596           0 :       extra_ddl.push_back("vacuum;");
    2597           0 :       extra_ddl.push_back("pragma journal_mode=wal;");
    2598             :     }
    2599             : 
    2600             :   // run extra -D sql if given
    2601           2 :   for (auto&& i: extra_ddl)
    2602             :     {
    2603           0 :       if (verbose > 1)
    2604           0 :         obatched(clog) << "extra ddl:\n" << i << endl;
    2605           0 :       rc = sqlite3_exec (db, i.c_str(), NULL, NULL, NULL);
    2606           0 :       if (rc != SQLITE_OK && rc != SQLITE_DONE && rc != SQLITE_ROW)
    2607           0 :         error (0, 0,
    2608             :                "warning: cannot run database extra ddl %s: %s", i.c_str(), sqlite3_errmsg(db));
    2609             :     }
    2610             : 
    2611           2 :   if (maxigroom)
    2612           0 :     obatched(clog) << "maxigroomed database" << endl;
    2613             : 
    2614             : 
    2615           2 :   obatched(clog) << "search concurrency " << concurrency << endl;
    2616           2 :   obatched(clog) << "rescan time " << rescan_s << endl;
    2617           2 :   obatched(clog) << "groom time " << groom_s << endl;
    2618           2 :   const char* du = getenv(DEBUGINFOD_URLS_ENV_VAR);
    2619           2 :   if (du && du[0] != '\0') // set to non-empty string?
    2620           1 :     obatched(clog) << "upstream debuginfod servers: " << du << endl;
    2621             : 
    2622           4 :   vector<pthread_t> source_file_scanner_threads;
    2623           2 :   vector<pthread_t> source_rpm_scanner_threads;
    2624             :   pthread_t groom_thread;
    2625             : 
    2626           2 :   rc = pthread_create (& groom_thread, NULL, thread_main_groom, NULL);
    2627           2 :   if (rc < 0)
    2628             :     error (0, 0, "warning: cannot spawn thread (%d) to groom database\n", rc);
    2629             :  
    2630           6 :   if (scan_files) for (auto&& it : source_paths)
    2631             :     {
    2632             :       pthread_t pt;
    2633           4 :       rc = pthread_create (& pt, NULL, thread_main_scan_source_file_path, (void*) it.c_str());
    2634           4 :       if (rc < 0)
    2635           0 :         error (0, 0, "warning: cannot spawn thread (%d) to scan source files %s\n", rc, it.c_str());
    2636             :       else
    2637           4 :         source_file_scanner_threads.push_back(pt);
    2638             :     }
    2639             : 
    2640           5 :   if (scan_rpms) for (auto&& it : source_paths)
    2641             :     {
    2642             :       pthread_t pt;
    2643           3 :       rc = pthread_create (& pt, NULL, thread_main_scan_source_rpm_path, (void*) it.c_str());
    2644           3 :       if (rc < 0)
    2645           0 :         error (0, 0, "warning: cannot spawn thread (%d) to scan source rpms %s\n", rc, it.c_str());
    2646             :       else
    2647           3 :         source_rpm_scanner_threads.push_back(pt);
    2648             :     }
    2649             : 
    2650             :   /* Trivial main loop! */
    2651           2 :   set_metric("ready", 1);
    2652           9 :   while (! interrupted)
    2653           7 :     pause ();
    2654           2 :   set_metric("ready", 0);
    2655             : 
    2656           2 :   if (verbose)
    2657           0 :     obatched(clog) << "stopping" << endl;
    2658             : 
    2659             :   /* Join any source scanning threads. */
    2660           6 :   for (auto&& it : source_file_scanner_threads)
    2661           4 :     pthread_join (it, NULL);
    2662           5 :   for (auto&& it : source_rpm_scanner_threads)
    2663           3 :     pthread_join (it, NULL);
    2664           2 :   pthread_join (groom_thread, NULL);
    2665             :   
    2666             :   /* Stop all the web service threads. */
    2667           2 :   if (d4) MHD_stop_daemon (d4);
    2668           2 :   if (d6) MHD_stop_daemon (d6);
    2669             : 
    2670             :   /* With all threads known dead, we can clean up the global resources. */
    2671           2 :   delete scan_concurrency_sem;
    2672           2 :   rc = sqlite3_exec (db, DEBUGINFOD_SQLITE_CLEANUP_DDL, NULL, NULL, NULL);
    2673           2 :   if (rc != SQLITE_OK)
    2674             :     {
    2675           0 :       error (0, 0,
    2676             :              "warning: cannot run database cleanup ddl: %s", sqlite3_errmsg(db));
    2677             :     }
    2678             : 
    2679             :   // NB: no problem with unconditional free here - an earlier failed regcomp would exit program
    2680           2 :   (void) regfree (& file_include_regex);
    2681           2 :   (void) regfree (& file_exclude_regex);
    2682             : 
    2683           2 :   sqlite3 *database = db;
    2684           2 :   db = 0; // for signal_handler not to freak
    2685           2 :   (void) sqlite3_close (database);
    2686             : 
    2687           2 :   return 0;
    2688             : }

Generated by: LCOV version 1.13