Line data Source code
1 : /* Debuginfo-over-http server.
2 : Copyright (C) 2019-2020 Red Hat, Inc.
3 : This file is part of elfutils.
4 :
5 : This file is free software; you can redistribute it and/or modify
6 : it under the terms of the GNU General Public License as published by
7 : the Free Software Foundation; either version 3 of the License, or
8 : (at your option) any later version.
9 :
10 : elfutils is distributed in the hope that it will be useful, but
11 : WITHOUT ANY WARRANTY; without even the implied warranty of
12 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 : GNU General Public License for more details.
14 :
15 : You should have received a copy of the GNU General Public License
16 : along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 :
18 :
19 : /* cargo-cult from libdwfl linux-kernel-modules.c */
20 : /* In case we have a bad fts we include this before config.h because it
21 : can't handle _FILE_OFFSET_BITS.
22 : Everything we need here is fine if its declarations just come first.
23 : Also, include sys/types.h before fts. On some systems fts.h is not self
24 : contained. */
25 : #ifdef BAD_FTS
26 : #include <sys/types.h>
27 : #include <fts.h>
28 : #endif
29 :
30 : #ifdef HAVE_CONFIG_H
31 : #include "config.h"
32 : #endif
33 :
34 : extern "C" {
35 : #include "printversion.h"
36 : }
37 :
38 : #include "debuginfod.h"
39 : #include <dwarf.h>
40 :
41 : #include <argp.h>
42 : #ifdef __GNUC__
43 : #undef __attribute__ /* glibc bug - rhbz 1763325 */
44 : #endif
45 :
46 : #include <unistd.h>
47 : #include <stdlib.h>
48 : #include <error.h>
49 : // #include <libintl.h> // not until it supports C++ << better
50 : #include <locale.h>
51 : #include <pthread.h>
52 : #include <signal.h>
53 : #include <sys/stat.h>
54 : #include <sys/time.h>
55 : #include <sys/vfs.h>
56 : #include <unistd.h>
57 : #include <fcntl.h>
58 : #include <netdb.h>
59 :
60 :
61 : /* If fts.h is included before config.h, its indirect inclusions may not
62 : give us the right LFS aliases of these functions, so map them manually. */
63 : #ifdef BAD_FTS
64 : #ifdef _FILE_OFFSET_BITS
65 : #define open open64
66 : #define fopen fopen64
67 : #endif
68 : #else
69 : #include <sys/types.h>
70 : #include <fts.h>
71 : #endif
72 :
73 : #include <cstring>
74 : #include <vector>
75 : #include <set>
76 : #include <map>
77 : #include <string>
78 : #include <iostream>
79 : #include <iomanip>
80 : #include <ostream>
81 : #include <sstream>
82 : #include <mutex>
83 : #include <deque>
84 : #include <condition_variable>
85 : #include <thread>
86 : // #include <regex> // on rhel7 gcc 4.8, not competent
87 : #include <regex.h>
88 : // #include <algorithm>
89 : using namespace std;
90 :
91 : #include <gelf.h>
92 : #include <libdwelf.h>
93 :
94 : #include <microhttpd.h>
95 :
96 : #if MHD_VERSION >= 0x00097002
97 : // libmicrohttpd 0.9.71 broke API
98 : #define MHD_RESULT enum MHD_Result
99 : #else
100 : #define MHD_RESULT int
101 : #endif
102 :
103 : #include <curl/curl.h>
104 : #include <archive.h>
105 : #include <archive_entry.h>
106 : #include <sqlite3.h>
107 :
108 : #ifdef __linux__
109 : #include <sys/syscall.h>
110 : #endif
111 :
112 : #ifdef __linux__
113 : #define tid() syscall(SYS_gettid)
114 : #else
115 : #define tid() pthread_self()
116 : #endif
117 :
118 :
119 : inline bool
120 524 : string_endswith(const string& haystack, const string& needle)
121 : {
122 1047 : return (haystack.size() >= needle.size() &&
123 524 : equal(haystack.end()-needle.size(), haystack.end(),
124 523 : needle.begin()));
125 : }
126 :
127 :
128 : // Roll this identifier for every sqlite schema incompatiblity.
129 : #define BUILDIDS "buildids9"
130 :
131 : #if SQLITE_VERSION_NUMBER >= 3008000
132 : #define WITHOUT_ROWID "without rowid"
133 : #else
134 : #define WITHOUT_ROWID ""
135 : #endif
136 :
137 : static const char DEBUGINFOD_SQLITE_DDL[] =
138 : "pragma foreign_keys = on;\n"
139 : "pragma synchronous = 0;\n" // disable fsync()s - this cache is disposable across a machine crash
140 : "pragma journal_mode = wal;\n" // https://sqlite.org/wal.html
141 : "pragma wal_checkpoint = truncate;\n" // clean out any preexisting wal file
142 : "pragma journal_size_limit = 0;\n" // limit steady state file (between grooming, which also =truncate's)
143 : "pragma auto_vacuum = incremental;\n" // https://sqlite.org/pragma.html
144 : "pragma busy_timeout = 1000;\n" // https://sqlite.org/pragma.html
145 : // NB: all these are overridable with -D option
146 :
147 : // Normalization table for interning file names
148 : "create table if not exists " BUILDIDS "_files (\n"
149 : " id integer primary key not null,\n"
150 : " name text unique not null\n"
151 : " );\n"
152 : // Normalization table for interning buildids
153 : "create table if not exists " BUILDIDS "_buildids (\n"
154 : " id integer primary key not null,\n"
155 : " hex text unique not null);\n"
156 : // Track the completion of scanning of a given file & sourcetype at given time
157 : "create table if not exists " BUILDIDS "_file_mtime_scanned (\n"
158 : " mtime integer not null,\n"
159 : " file integer not null,\n"
160 : " size integer not null,\n" // in bytes
161 : " sourcetype text(1) not null\n"
162 : " check (sourcetype IN ('F', 'R')),\n"
163 : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
164 : " primary key (file, mtime, sourcetype)\n"
165 : " ) " WITHOUT_ROWID ";\n"
166 : "create table if not exists " BUILDIDS "_f_de (\n"
167 : " buildid integer not null,\n"
168 : " debuginfo_p integer not null,\n"
169 : " executable_p integer not null,\n"
170 : " file integer not null,\n"
171 : " mtime integer not null,\n"
172 : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
173 : " foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
174 : " primary key (buildid, file, mtime)\n"
175 : " ) " WITHOUT_ROWID ";\n"
176 : "create table if not exists " BUILDIDS "_f_s (\n"
177 : " buildid integer not null,\n"
178 : " artifactsrc integer not null,\n"
179 : " file integer not null,\n" // NB: not necessarily entered into _mtime_scanned
180 : " mtime integer not null,\n"
181 : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
182 : " foreign key (artifactsrc) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
183 : " foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
184 : " primary key (buildid, artifactsrc, file, mtime)\n"
185 : " ) " WITHOUT_ROWID ";\n"
186 : "create table if not exists " BUILDIDS "_r_de (\n"
187 : " buildid integer not null,\n"
188 : " debuginfo_p integer not null,\n"
189 : " executable_p integer not null,\n"
190 : " file integer not null,\n"
191 : " mtime integer not null,\n"
192 : " content integer not null,\n"
193 : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
194 : " foreign key (content) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
195 : " foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
196 : " primary key (buildid, debuginfo_p, executable_p, file, content, mtime)\n"
197 : " ) " WITHOUT_ROWID ";\n"
198 : "create table if not exists " BUILDIDS "_r_sref (\n" // outgoing dwarf sourcefile references from rpm
199 : " buildid integer not null,\n"
200 : " artifactsrc integer not null,\n"
201 : " foreign key (artifactsrc) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
202 : " foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
203 : " primary key (buildid, artifactsrc)\n"
204 : " ) " WITHOUT_ROWID ";\n"
205 : "create table if not exists " BUILDIDS "_r_sdef (\n" // rpm contents that may satisfy sref
206 : " file integer not null,\n"
207 : " mtime integer not null,\n"
208 : " content integer not null,\n"
209 : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
210 : " foreign key (content) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
211 : " primary key (content, file, mtime)\n"
212 : " ) " WITHOUT_ROWID ";\n"
213 : // create views to glue together some of the above tables, for webapi D queries
214 : "create view if not exists " BUILDIDS "_query_d as \n"
215 : "select\n"
216 : " b.hex as buildid, n.mtime, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1\n"
217 : " from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_f_de n\n"
218 : " where b.id = n.buildid and f0.id = n.file and n.debuginfo_p = 1\n"
219 : "union all select\n"
220 : " b.hex as buildid, n.mtime, 'R' as sourcetype, f0.name as source0, n.mtime as mtime, f1.name as source1\n"
221 : " from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files f1, " BUILDIDS "_r_de n\n"
222 : " where b.id = n.buildid and f0.id = n.file and f1.id = n.content and n.debuginfo_p = 1\n"
223 : ";"
224 : // ... and for E queries
225 : "create view if not exists " BUILDIDS "_query_e as \n"
226 : "select\n"
227 : " b.hex as buildid, n.mtime, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1\n"
228 : " from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_f_de n\n"
229 : " where b.id = n.buildid and f0.id = n.file and n.executable_p = 1\n"
230 : "union all select\n"
231 : " b.hex as buildid, n.mtime, 'R' as sourcetype, f0.name as source0, n.mtime as mtime, f1.name as source1\n"
232 : " from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files f1, " BUILDIDS "_r_de n\n"
233 : " where b.id = n.buildid and f0.id = n.file and f1.id = n.content and n.executable_p = 1\n"
234 : ";"
235 : // ... and for S queries
236 : "create view if not exists " BUILDIDS "_query_s as \n"
237 : "select\n"
238 : " b.hex as buildid, fs.name as artifactsrc, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1, null as source0ref\n"
239 : " from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files fs, " BUILDIDS "_f_s n\n"
240 : " where b.id = n.buildid and f0.id = n.file and fs.id = n.artifactsrc\n"
241 : "union all select\n"
242 : " b.hex as buildid, f1.name as artifactsrc, 'R' as sourcetype, f0.name as source0, sd.mtime as mtime, f1.name as source1, fsref.name as source0ref\n"
243 : " from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files f1, " BUILDIDS "_files fsref, "
244 : " " BUILDIDS "_r_sdef sd, " BUILDIDS "_r_sref sr, " BUILDIDS "_r_de sde\n"
245 : " where b.id = sr.buildid and f0.id = sd.file and fsref.id = sde.file and f1.id = sd.content\n"
246 : " and sr.artifactsrc = sd.content and sde.buildid = sr.buildid\n"
247 : ";"
248 : // and for startup overview counts
249 : "drop view if exists " BUILDIDS "_stats;\n"
250 : "create view if not exists " BUILDIDS "_stats as\n"
251 : " select 'file d/e' as label,count(*) as quantity from " BUILDIDS "_f_de\n"
252 : "union all select 'file s',count(*) from " BUILDIDS "_f_s\n"
253 : "union all select 'archive d/e',count(*) from " BUILDIDS "_r_de\n"
254 : "union all select 'archive sref',count(*) from " BUILDIDS "_r_sref\n"
255 : "union all select 'archive sdef',count(*) from " BUILDIDS "_r_sdef\n"
256 : "union all select 'buildids',count(*) from " BUILDIDS "_buildids\n"
257 : "union all select 'filenames',count(*) from " BUILDIDS "_files\n"
258 : "union all select 'files scanned (#)',count(*) from " BUILDIDS "_file_mtime_scanned\n"
259 : "union all select 'files scanned (mb)',coalesce(sum(size)/1024/1024,0) from " BUILDIDS "_file_mtime_scanned\n"
260 : #if SQLITE_VERSION_NUMBER >= 3016000
261 : "union all select 'index db size (mb)',page_count*page_size/1024/1024 as size FROM pragma_page_count(), pragma_page_size()\n"
262 : #endif
263 : ";\n"
264 :
265 : // schema change history & garbage collection
266 : //
267 : // XXX: we could have migration queries here to bring prior-schema
268 : // data over instead of just dropping it.
269 : //
270 : // buildids9: widen the mtime_scanned table
271 : "" // <<< we are here
272 : // buildids8: slim the sref table
273 : "drop table if exists buildids8_f_de;\n"
274 : "drop table if exists buildids8_f_s;\n"
275 : "drop table if exists buildids8_r_de;\n"
276 : "drop table if exists buildids8_r_sref;\n"
277 : "drop table if exists buildids8_r_sdef;\n"
278 : "drop table if exists buildids8_file_mtime_scanned;\n"
279 : "drop table if exists buildids8_files;\n"
280 : "drop table if exists buildids8_buildids;\n"
281 : // buildids7: separate _norm table into dense subtype tables
282 : "drop table if exists buildids7_f_de;\n"
283 : "drop table if exists buildids7_f_s;\n"
284 : "drop table if exists buildids7_r_de;\n"
285 : "drop table if exists buildids7_r_sref;\n"
286 : "drop table if exists buildids7_r_sdef;\n"
287 : "drop table if exists buildids7_file_mtime_scanned;\n"
288 : "drop table if exists buildids7_files;\n"
289 : "drop table if exists buildids7_buildids;\n"
290 : // buildids6: drop bolo/rfolo again, represent sources / rpmcontents in main table
291 : "drop table if exists buildids6_norm;\n"
292 : "drop table if exists buildids6_files;\n"
293 : "drop table if exists buildids6_buildids;\n"
294 : "drop view if exists buildids6;\n"
295 : // buildids5: redefine srcfile1 column to be '.'-less (for rpms)
296 : "drop table if exists buildids5_norm;\n"
297 : "drop table if exists buildids5_files;\n"
298 : "drop table if exists buildids5_buildids;\n"
299 : "drop table if exists buildids5_bolo;\n"
300 : "drop table if exists buildids5_rfolo;\n"
301 : "drop view if exists buildids5;\n"
302 : // buildids4: introduce rpmfile RFOLO
303 : "drop table if exists buildids4_norm;\n"
304 : "drop table if exists buildids4_files;\n"
305 : "drop table if exists buildids4_buildids;\n"
306 : "drop table if exists buildids4_bolo;\n"
307 : "drop table if exists buildids4_rfolo;\n"
308 : "drop view if exists buildids4;\n"
309 : // buildids3*: split out srcfile BOLO
310 : "drop table if exists buildids3_norm;\n"
311 : "drop table if exists buildids3_files;\n"
312 : "drop table if exists buildids3_buildids;\n"
313 : "drop table if exists buildids3_bolo;\n"
314 : "drop view if exists buildids3;\n"
315 : // buildids2: normalized buildid and filenames into interning tables;
316 : "drop table if exists buildids2_norm;\n"
317 : "drop table if exists buildids2_files;\n"
318 : "drop table if exists buildids2_buildids;\n"
319 : "drop view if exists buildids2;\n"
320 : // buildids1: made buildid and artifacttype NULLable, to represent cached-negative
321 : // lookups from sources, e.g. files or rpms that contain no buildid-indexable content
322 : "drop table if exists buildids1;\n"
323 : // buildids: original
324 : "drop table if exists buildids;\n"
325 : ;
326 :
327 : static const char DEBUGINFOD_SQLITE_CLEANUP_DDL[] =
328 : "pragma wal_checkpoint = truncate;\n" // clean out any preexisting wal file
329 : ;
330 :
331 :
332 :
333 :
334 : /* Name and version of program. */
335 : /* ARGP_PROGRAM_VERSION_HOOK_DEF = print_version; */ // not this simple for C++
336 :
337 : /* Bug report address. */
338 : ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT;
339 :
340 : /* Definitions of arguments for argp functions. */
341 : static const struct argp_option options[] =
342 : {
343 : { NULL, 0, NULL, 0, "Scanners:", 1 },
344 : { "scan-file-dir", 'F', NULL, 0, "Enable ELF/DWARF file scanning.", 0 },
345 : { "scan-rpm-dir", 'R', NULL, 0, "Enable RPM scanning.", 0 },
346 : { "scan-deb-dir", 'U', NULL, 0, "Enable DEB scanning.", 0 },
347 : { "scan-archive", 'Z', "EXT=CMD", 0, "Enable arbitrary archive scanning.", 0 },
348 : // "source-oci-imageregistry" ...
349 :
350 : { NULL, 0, NULL, 0, "Options:", 2 },
351 : { "logical", 'L', NULL, 0, "Follow symlinks, default=ignore.", 0 },
352 : { "rescan-time", 't', "SECONDS", 0, "Number of seconds to wait between rescans, 0=disable.", 0 },
353 : { "groom-time", 'g', "SECONDS", 0, "Number of seconds to wait between database grooming, 0=disable.", 0 },
354 : { "maxigroom", 'G', NULL, 0, "Run a complete database groom/shrink pass at startup.", 0 },
355 : { "concurrency", 'c', "NUM", 0, "Limit scanning thread concurrency to NUM.", 0 },
356 : { "include", 'I', "REGEX", 0, "Include files matching REGEX, default=all.", 0 },
357 : { "exclude", 'X', "REGEX", 0, "Exclude files matching REGEX, default=none.", 0 },
358 : { "port", 'p', "NUM", 0, "HTTP port to listen on, default 8002.", 0 },
359 : { "database", 'd', "FILE", 0, "Path to sqlite database.", 0 },
360 : { "ddl", 'D', "SQL", 0, "Apply extra sqlite ddl/pragma to connection.", 0 },
361 : { "verbose", 'v', NULL, 0, "Increase verbosity.", 0 },
362 : #define ARGP_KEY_FDCACHE_FDS 0x1001
363 : { "fdcache-fds", ARGP_KEY_FDCACHE_FDS, "NUM", 0, "Maximum number of archive files to keep in fdcache.", 0 },
364 : #define ARGP_KEY_FDCACHE_MBS 0x1002
365 : { "fdcache-mbs", ARGP_KEY_FDCACHE_MBS, "MB", 0, "Maximum total size of archive file fdcache.", 0 },
366 : #define ARGP_KEY_FDCACHE_PREFETCH 0x1003
367 : { "fdcache-prefetch", ARGP_KEY_FDCACHE_PREFETCH, "NUM", 0, "Number of archive files to prefetch into fdcache.", 0 },
368 : { NULL, 0, NULL, 0, NULL, 0 }
369 : };
370 :
371 : /* Short description of program. */
372 : static const char doc[] = "Serve debuginfo-related content across HTTP from files under PATHs.";
373 :
374 : /* Strings for arguments in help texts. */
375 : static const char args_doc[] = "[PATH ...]";
376 :
377 : /* Prototype for option handler. */
378 : static error_t parse_opt (int key, char *arg, struct argp_state *state);
379 :
380 : /* Data structure to communicate with argp functions. */
381 : static struct argp argp =
382 : {
383 : options, parse_opt, args_doc, doc, NULL, NULL, NULL
384 : };
385 :
386 :
387 : static string db_path;
388 : static sqlite3 *db; // single connection, serialized across all our threads!
389 : static unsigned verbose;
390 : static volatile sig_atomic_t interrupted = 0;
391 : static volatile sig_atomic_t forced_rescan_count = 0;
392 : static volatile sig_atomic_t sigusr1 = 0;
393 : static volatile sig_atomic_t forced_groom_count = 0;
394 : static volatile sig_atomic_t sigusr2 = 0;
395 : static unsigned http_port = 8002;
396 : static unsigned rescan_s = 300;
397 : static unsigned groom_s = 86400;
398 : static bool maxigroom = false;
399 : static unsigned concurrency = std::thread::hardware_concurrency() ?: 1;
400 : static set<string> source_paths;
401 : static bool scan_files = false;
402 : static map<string,string> scan_archives;
403 : static vector<string> extra_ddl;
404 : static regex_t file_include_regex;
405 : static regex_t file_exclude_regex;
406 : static bool traverse_logical;
407 : static long fdcache_fds;
408 : static long fdcache_mbs;
409 : static long fdcache_prefetch;
410 : static string tmpdir;
411 :
412 : static void set_metric(const string& key, int64_t value);
413 : // static void inc_metric(const string& key);
414 : static void set_metric(const string& metric,
415 : const string& lname, const string& lvalue,
416 : int64_t value);
417 : static void inc_metric(const string& metric,
418 : const string& lname, const string& lvalue);
419 : static void add_metric(const string& metric,
420 : const string& lname, const string& lvalue,
421 : int64_t value);
422 : // static void add_metric(const string& metric, int64_t value);
423 :
424 : /* Handle program arguments. */
425 : static error_t
426 41 : parse_opt (int key, char *arg,
427 : struct argp_state *state __attribute__ ((unused)))
428 : {
429 : int rc;
430 41 : switch (key)
431 : {
432 1 : case 'v': verbose ++; break;
433 3 : case 'd': db_path = string(arg); break;
434 3 : case 'p': http_port = (unsigned) atoi(arg);
435 3 : if (http_port == 0 || http_port > 65535)
436 0 : argp_failure(state, 1, EINVAL, "port number");
437 3 : break;
438 4 : case 'F': scan_files = true; break;
439 1 : case 'R':
440 1 : scan_archives[".rpm"]="cat"; // libarchive groks rpm natively
441 1 : break;
442 2 : case 'U':
443 2 : if (access("/usr/bin/dpkg-deb", X_OK) == 0)
444 : {
445 2 : scan_archives[".deb"]="dpkg-deb --fsys-tarfile";
446 2 : scan_archives[".ddeb"]="dpkg-deb --fsys-tarfile";
447 : }
448 : else
449 : {
450 0 : scan_archives[".deb"]="(bsdtar -O -x -f - data.tar.xz)<";
451 0 : scan_archives[".ddeb"]="(bsdtar -O -x -f - data.tar.xz)<";
452 : }
453 : // .udeb too?
454 2 : break;
455 2 : case 'Z':
456 : {
457 2 : char* extension = strchr(arg, '=');
458 2 : if (arg[0] == '\0')
459 0 : argp_failure(state, 1, EINVAL, "missing EXT");
460 2 : else if (extension)
461 1 : scan_archives[string(arg, (extension-arg))]=string(extension+1);
462 : else
463 1 : scan_archives[string(arg)]=string("cat");
464 : }
465 2 : break;
466 2 : case 'L':
467 2 : traverse_logical = true;
468 2 : break;
469 0 : case 'D': extra_ddl.push_back(string(arg)); break;
470 1 : case 't':
471 1 : rescan_s = (unsigned) atoi(arg);
472 1 : break;
473 1 : case 'g':
474 1 : groom_s = (unsigned) atoi(arg);
475 1 : break;
476 0 : case 'G':
477 0 : maxigroom = true;
478 0 : break;
479 0 : case 'c':
480 0 : concurrency = (unsigned) atoi(arg);
481 0 : if (concurrency < 1) concurrency = 1;
482 0 : break;
483 0 : case 'I':
484 : // NB: no problem with unconditional free here - an earlier failed regcomp would exit program
485 0 : regfree (&file_include_regex);
486 0 : rc = regcomp (&file_include_regex, arg, REG_EXTENDED|REG_NOSUB);
487 0 : if (rc != 0)
488 0 : argp_failure(state, 1, EINVAL, "regular expession");
489 0 : break;
490 0 : case 'X':
491 0 : regfree (&file_exclude_regex);
492 0 : rc = regcomp (&file_exclude_regex, arg, REG_EXTENDED|REG_NOSUB);
493 0 : if (rc != 0)
494 0 : argp_failure(state, 1, EINVAL, "regular expession");
495 0 : break;
496 1 : case ARGP_KEY_FDCACHE_FDS:
497 1 : fdcache_fds = atol (arg);
498 1 : break;
499 1 : case ARGP_KEY_FDCACHE_MBS:
500 1 : fdcache_mbs = atol (arg);
501 1 : break;
502 0 : case ARGP_KEY_FDCACHE_PREFETCH:
503 0 : fdcache_prefetch = atol (arg);
504 0 : break;
505 6 : case ARGP_KEY_ARG:
506 6 : source_paths.insert(string(arg));
507 6 : break;
508 : // case 'h': argp_state_help (state, stderr, ARGP_HELP_LONG|ARGP_HELP_EXIT_OK);
509 13 : default: return ARGP_ERR_UNKNOWN;
510 : }
511 :
512 28 : return 0;
513 : }
514 :
515 :
516 : ////////////////////////////////////////////////////////////////////////
517 :
518 :
519 : // represent errors that may get reported to an ostream and/or a libmicrohttpd connection
520 :
521 : struct reportable_exception
522 : {
523 : int code;
524 : string message;
525 :
526 7 : reportable_exception(int c, const string& m): code(c), message(m) {}
527 30 : reportable_exception(const string& m): code(503), message(m) {}
528 : reportable_exception(): code(503), message() {}
529 :
530 : void report(ostream& o) const; // defined under obatched() class below
531 :
532 9 : MHD_RESULT mhd_send_response(MHD_Connection* c) const {
533 9 : MHD_Response* r = MHD_create_response_from_buffer (message.size(),
534 9 : (void*) message.c_str(),
535 : MHD_RESPMEM_MUST_COPY);
536 9 : MHD_add_response_header (r, "Content-Type", "text/plain");
537 9 : MHD_RESULT rc = MHD_queue_response (c, code, r);
538 9 : MHD_destroy_response (r);
539 9 : return rc;
540 : }
541 : };
542 :
543 :
544 : struct sqlite_exception: public reportable_exception
545 : {
546 0 : sqlite_exception(int rc, const string& msg):
547 0 : reportable_exception(string("sqlite3 error: ") + msg + ": " + string(sqlite3_errstr(rc) ?: "?")) {}
548 : };
549 :
550 : struct libc_exception: public reportable_exception
551 : {
552 28 : libc_exception(int rc, const string& msg):
553 28 : reportable_exception(string("libc error: ") + msg + ": " + string(strerror(rc) ?: "?")) {
554 28 : inc_metric("error_count","libc",strerror(rc));
555 28 : }
556 : };
557 :
558 :
559 : struct archive_exception: public reportable_exception
560 : {
561 0 : archive_exception(const string& msg):
562 0 : reportable_exception(string("libarchive error: ") + msg) {
563 0 : inc_metric("error_count","libarchive",msg);
564 0 : }
565 0 : archive_exception(struct archive* a, const string& msg):
566 0 : reportable_exception(string("libarchive error: ") + msg + ": " + string(archive_error_string(a) ?: "?")) {
567 0 : inc_metric("error_count","libarchive",msg);
568 0 : }
569 : };
570 :
571 :
572 : struct elfutils_exception: public reportable_exception
573 : {
574 0 : elfutils_exception(int rc, const string& msg):
575 0 : reportable_exception(string("elfutils error: ") + msg + ": " + string(elf_errmsg(rc) ?: "?")) {
576 0 : inc_metric("error_count","elfutils",elf_errmsg(rc));
577 0 : }
578 : };
579 :
580 :
581 : ////////////////////////////////////////////////////////////////////////
582 :
583 : template <typename Payload>
584 : class workq
585 : {
586 : set<Payload> q; // eliminate duplicates
587 : mutex mtx;
588 : condition_variable cv;
589 : bool dead;
590 : unsigned idlers;
591 :
592 : public:
593 3 : workq() { dead = false; idlers = 0; }
594 3 : ~workq() {}
595 :
596 122 : void push_back(const Payload& p)
597 : {
598 244 : unique_lock<mutex> lock(mtx);
599 122 : q.insert (p);
600 122 : set_metric("thread_work_pending","role","scan", q.size());
601 122 : cv.notify_all();
602 122 : }
603 :
604 : // kill this workqueue, wake up all idlers / scanners
605 3 : void nuke() {
606 6 : unique_lock<mutex> lock(mtx);
607 : // optional: q.clear();
608 3 : dead = true;
609 3 : cv.notify_all();
610 3 : }
611 :
612 : // clear the workqueue, when scanning is interrupted with USR2
613 0 : void clear() {
614 0 : unique_lock<mutex> lock(mtx);
615 0 : q.clear();
616 0 : set_metric("thread_work_pending","role","scan", q.size());
617 0 : cv.notify_all(); // maybe wake up waiting idlers
618 0 : }
619 :
620 : // block this scanner thread until there is work to do and no active
621 134 : bool wait_front (Payload& p)
622 : {
623 268 : unique_lock<mutex> lock(mtx);
624 385 : while (!dead && (q.size() == 0 || idlers > 0))
625 251 : cv.wait(lock);
626 134 : if (dead)
627 12 : return false;
628 : else
629 : {
630 122 : p = * q.begin();
631 122 : q.erase (q.begin());
632 122 : set_metric("thread_work_pending","role","scan", q.size());
633 122 : if (q.size() == 0)
634 62 : cv.notify_all(); // maybe wake up waiting idlers
635 122 : return true;
636 : }
637 : }
638 :
639 : // block this idler thread until there is no work to do
640 36 : void wait_idle ()
641 : {
642 36 : unique_lock<mutex> lock(mtx);
643 36 : cv.notify_all(); // maybe wake up waiting scanners
644 44 : while (!dead && (q.size() != 0))
645 8 : cv.wait(lock);
646 36 : idlers ++;
647 36 : }
648 :
649 33 : void done_idle ()
650 : {
651 66 : unique_lock<mutex> lock(mtx);
652 33 : idlers --;
653 33 : cv.notify_all(); // maybe wake up waiting scanners, but probably not (shutting down)
654 33 : }
655 : };
656 :
657 : typedef struct stat stat_t;
658 : typedef pair<string,stat_t> scan_payload;
659 302 : inline bool operator< (const scan_payload& a, const scan_payload& b)
660 : {
661 302 : return a.first < b.first; // don't bother compare the stat fields
662 : }
663 : static workq<scan_payload> scanq; // just a single one
664 : // producer & idler: thread_main_fts_source_paths()
665 : // consumer: thread_main_scanner()
666 : // idler: thread_main_groom()
667 :
668 :
669 :
670 : ////////////////////////////////////////////////////////////////////////
671 :
672 :
673 : // Print a standard timestamp.
674 : static ostream&
675 315 : timestamp (ostream &o)
676 : {
677 : char datebuf[80];
678 315 : char *now2 = NULL;
679 315 : time_t now_t = time(NULL);
680 315 : struct tm *now = gmtime (&now_t);
681 315 : if (now)
682 : {
683 315 : (void) strftime (datebuf, sizeof (datebuf), "%c", now);
684 315 : now2 = datebuf;
685 : }
686 :
687 : return o << "[" << (now2 ? now2 : "") << "] "
688 315 : << "(" << getpid () << "/" << tid() << "): ";
689 : }
690 :
691 :
692 : // A little class that impersonates an ostream to the extent that it can
693 : // take << streaming operations. It batches up the bits into an internal
694 : // stringstream until it is destroyed; then flushes to the original ostream.
695 : // It adds a timestamp
696 : class obatched
697 : {
698 : private:
699 : ostream& o;
700 : stringstream stro;
701 : static mutex lock;
702 : public:
703 315 : obatched(ostream& oo, bool timestamp_p = true): o(oo)
704 : {
705 315 : if (timestamp_p)
706 315 : timestamp(stro);
707 315 : }
708 315 : ~obatched()
709 315 : {
710 630 : unique_lock<mutex> do_not_cross_the_streams(obatched::lock);
711 315 : o << stro.str();
712 315 : o.flush();
713 315 : }
714 : operator ostream& () { return stro; }
715 315 : template <typename T> ostream& operator << (const T& t) { stro << t; return stro; }
716 : };
717 : mutex obatched::lock; // just the one, since cout/cerr iostreams are not thread-safe
718 :
719 :
720 37 : void reportable_exception::report(ostream& o) const {
721 37 : obatched(o) << message << endl;
722 37 : }
723 :
724 :
725 : ////////////////////////////////////////////////////////////////////////
726 :
727 :
728 : // RAII style sqlite prepared-statement holder that matches { } block lifetime
729 :
730 : struct sqlite_ps
731 : {
732 : private:
733 : sqlite3* db;
734 : const string nickname;
735 : const string sql;
736 : sqlite3_stmt *pp;
737 :
738 : sqlite_ps(const sqlite_ps&); // make uncopyable
739 : sqlite_ps& operator=(const sqlite_ps &); // make unassignable
740 :
741 : public:
742 263 : sqlite_ps (sqlite3* d, const string& n, const string& s): db(d), nickname(n), sql(s) {
743 264 : if (verbose > 4)
744 0 : obatched(clog) << nickname << " prep " << sql << endl;
745 264 : int rc = sqlite3_prepare_v2 (db, sql.c_str(), -1 /* to \0 */, & this->pp, NULL);
746 264 : if (rc != SQLITE_OK)
747 0 : throw sqlite_exception(rc, "prepare " + sql);
748 264 : }
749 :
750 2275 : sqlite_ps& reset()
751 : {
752 2275 : sqlite3_reset(this->pp);
753 2275 : return *this;
754 : }
755 :
756 3135 : sqlite_ps& bind(int parameter, const string& str)
757 : {
758 3135 : if (verbose > 4)
759 0 : obatched(clog) << nickname << " bind " << parameter << "=" << str << endl;
760 3135 : int rc = sqlite3_bind_text (this->pp, parameter, str.c_str(), -1, SQLITE_TRANSIENT);
761 3135 : if (rc != SQLITE_OK)
762 0 : throw sqlite_exception(rc, "sqlite3 bind");
763 3135 : return *this;
764 : }
765 :
766 962 : sqlite_ps& bind(int parameter, int64_t value)
767 : {
768 962 : if (verbose > 4)
769 0 : obatched(clog) << nickname << " bind " << parameter << "=" << value << endl;
770 962 : int rc = sqlite3_bind_int64 (this->pp, parameter, value);
771 962 : if (rc != SQLITE_OK)
772 0 : throw sqlite_exception(rc, "sqlite3 bind");
773 962 : return *this;
774 : }
775 :
776 : sqlite_ps& bind(int parameter)
777 : {
778 : if (verbose > 4)
779 : obatched(clog) << nickname << " bind " << parameter << "=" << "NULL" << endl;
780 : int rc = sqlite3_bind_null (this->pp, parameter);
781 : if (rc != SQLITE_OK)
782 : throw sqlite_exception(rc, "sqlite3 bind");
783 : return *this;
784 : }
785 :
786 :
787 1811 : void step_ok_done() {
788 1811 : int rc = sqlite3_step (this->pp);
789 1811 : if (verbose > 4)
790 0 : obatched(clog) << nickname << " step-ok-done(" << sqlite3_errstr(rc) << ") " << sql << endl;
791 1811 : if (rc != SQLITE_OK && rc != SQLITE_DONE && rc != SQLITE_ROW)
792 0 : throw sqlite_exception(rc, "sqlite3 step");
793 1811 : (void) sqlite3_reset (this->pp);
794 1811 : }
795 :
796 :
797 359 : int step() {
798 359 : int rc = sqlite3_step (this->pp);
799 359 : if (verbose > 4)
800 0 : obatched(clog) << nickname << " step(" << sqlite3_errstr(rc) << ") " << sql << endl;
801 359 : return rc;
802 : }
803 :
804 :
805 :
806 264 : ~sqlite_ps () { sqlite3_finalize (this->pp); }
807 1030 : operator sqlite3_stmt* () { return this->pp; }
808 : };
809 :
810 :
811 : ////////////////////////////////////////////////////////////////////////
812 :
813 : // RAII style templated autocloser
814 :
815 : template <class Payload, class Ignore>
816 : struct defer_dtor
817 : {
818 : public:
819 : typedef Ignore (*dtor_fn) (Payload);
820 :
821 : private:
822 : Payload p;
823 : dtor_fn fn;
824 :
825 : public:
826 259 : defer_dtor(Payload _p, dtor_fn _fn): p(_p), fn(_fn) {}
827 259 : ~defer_dtor() { (void) (*fn)(p); }
828 :
829 : private:
830 : defer_dtor(const defer_dtor<Payload,Ignore>&); // make uncopyable
831 : defer_dtor& operator=(const defer_dtor<Payload,Ignore> &); // make unassignable
832 : };
833 :
834 :
835 :
836 : ////////////////////////////////////////////////////////////////////////
837 :
838 :
839 : static string
840 226 : header_censor(const string& str)
841 : {
842 226 : string y;
843 2748 : for (auto&& x : str)
844 : {
845 2522 : if (isalnum(x) || x == '/' || x == '.' || x == ',' || x == '_' || x == ':')
846 2521 : y += x;
847 : }
848 226 : return y;
849 : }
850 :
851 :
852 : static string
853 113 : conninfo (struct MHD_Connection * conn)
854 : {
855 : char hostname[256]; // RFC1035
856 : char servname[256];
857 113 : int sts = -1;
858 :
859 113 : if (conn == 0)
860 0 : return "internal";
861 :
862 : /* Look up client address data. */
863 113 : const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn,
864 : MHD_CONNECTION_INFO_CLIENT_ADDRESS);
865 113 : struct sockaddr *so = u ? u->client_addr : 0;
866 :
867 113 : if (so && so->sa_family == AF_INET) {
868 113 : sts = getnameinfo (so, sizeof (struct sockaddr_in), hostname, sizeof (hostname), servname,
869 : sizeof (servname), NI_NUMERICHOST | NI_NUMERICSERV);
870 0 : } else if (so && so->sa_family == AF_INET6) {
871 0 : sts = getnameinfo (so, sizeof (struct sockaddr_in6), hostname, sizeof (hostname),
872 : servname, sizeof (servname), NI_NUMERICHOST | NI_NUMERICSERV);
873 : }
874 113 : if (sts != 0) {
875 0 : hostname[0] = servname[0] = '\0';
876 : }
877 :
878 : // extract headers relevant to administration
879 113 : const char* user_agent = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "User-Agent") ?: "";
880 113 : const char* x_forwarded_for = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-Forwarded-For") ?: "";
881 : // NB: these are untrustworthy, beware if machine-processing log files
882 :
883 226 : return string(hostname) + string(":") + string(servname) +
884 452 : string(" UA:") + header_censor(string(user_agent)) +
885 339 : string(" XFF:") + header_censor(string(x_forwarded_for));
886 : }
887 :
888 :
889 :
890 : ////////////////////////////////////////////////////////////////////////
891 :
892 :
893 : static void
894 51 : add_mhd_last_modified (struct MHD_Response *resp, time_t mtime)
895 : {
896 51 : struct tm *now = gmtime (&mtime);
897 51 : if (now != NULL)
898 : {
899 : char datebuf[80];
900 51 : size_t rc = strftime (datebuf, sizeof (datebuf), "%a, %d %b %Y %T GMT", now);
901 51 : if (rc > 0 && rc < sizeof (datebuf))
902 51 : (void) MHD_add_response_header (resp, "Last-Modified", datebuf);
903 : }
904 :
905 51 : (void) MHD_add_response_header (resp, "Cache-Control", "public");
906 51 : }
907 :
908 :
909 :
910 : static struct MHD_Response*
911 15 : handle_buildid_f_match (bool internal_req_t,
912 : int64_t b_mtime,
913 : const string& b_source0,
914 : int *result_fd)
915 : {
916 : (void) internal_req_t; // ignored
917 15 : int fd = open(b_source0.c_str(), O_RDONLY);
918 15 : if (fd < 0)
919 0 : throw libc_exception (errno, string("open ") + b_source0);
920 :
921 : // NB: use manual close(2) in error case instead of defer_dtor, because
922 : // in the normal case, we want to hand the fd over to libmicrohttpd for
923 : // file transfer.
924 :
925 : struct stat s;
926 15 : int rc = fstat(fd, &s);
927 15 : if (rc < 0)
928 : {
929 0 : close(fd);
930 0 : throw libc_exception (errno, string("fstat ") + b_source0);
931 : }
932 :
933 15 : if ((int64_t) s.st_mtime != b_mtime)
934 : {
935 0 : if (verbose)
936 0 : obatched(clog) << "mtime mismatch for " << b_source0 << endl;
937 0 : close(fd);
938 0 : return 0;
939 : }
940 :
941 15 : inc_metric ("http_responses_total","result","file");
942 15 : struct MHD_Response* r = MHD_create_response_from_fd ((uint64_t) s.st_size, fd);
943 15 : if (r == 0)
944 : {
945 0 : if (verbose)
946 0 : obatched(clog) << "cannot create fd-response for " << b_source0 << endl;
947 0 : close(fd);
948 : }
949 : else
950 : {
951 15 : MHD_add_response_header (r, "Content-Type", "application/octet-stream");
952 15 : add_mhd_last_modified (r, s.st_mtime);
953 15 : if (verbose > 1)
954 0 : obatched(clog) << "serving file " << b_source0 << endl;
955 : /* libmicrohttpd will close it. */
956 15 : if (result_fd)
957 15 : *result_fd = fd;
958 : }
959 :
960 15 : return r;
961 : }
962 :
963 :
964 : // quote all questionable characters of str for safe passage through a sh -c expansion.
965 : static string
966 2 : shell_escape(const string& str)
967 : {
968 2 : string y;
969 198 : for (auto&& x : str)
970 : {
971 196 : if (! isalnum(x) && x != '/')
972 22 : y += "\\";
973 196 : y += x;
974 : }
975 2 : return y;
976 : }
977 :
978 :
979 : // PR25548: Perform POSIX / RFC3986 style path canonicalization on the input string.
980 : //
981 : // Namely:
982 : // // -> /
983 : // /foo/../ -> /
984 : // /./ -> /
985 : //
986 : // This mapping is done on dwarf-side source path names, which may
987 : // include these constructs, so we can deal with debuginfod clients
988 : // that accidentally canonicalize the paths.
989 : //
990 : // realpath(3) is close but not quite right, because it also resolves
991 : // symbolic links. Symlinks at the debuginfod server have nothing to
992 : // do with the build-time symlinks, thus they must not be considered.
993 : //
994 : // see also curl Curl_dedotdotify() aka RFC3986, which we mostly follow here
995 : // see also libc __realpath()
996 : // see also llvm llvm::sys::path::remove_dots()
997 : static string
998 521 : canon_pathname (const string& input)
999 : {
1000 1042 : string i = input; // 5.2.4 (1)
1001 521 : string o;
1002 :
1003 3284 : while (i.size() != 0)
1004 : {
1005 : // 5.2.4 (2) A
1006 2763 : if (i.substr(0,3) == "../")
1007 0 : i = i.substr(3);
1008 2763 : else if(i.substr(0,2) == "./")
1009 0 : i = i.substr(2);
1010 :
1011 : // 5.2.4 (2) B
1012 2763 : else if (i.substr(0,3) == "/./")
1013 51 : i = i.substr(2);
1014 2712 : else if (i == "/.")
1015 0 : i = ""; // no need to handle "/." complete-path-segment case; we're dealing with file names
1016 :
1017 : // 5.2.4 (2) C
1018 2712 : else if (i.substr(0,4) == "/../") {
1019 73 : i = i.substr(3);
1020 73 : string::size_type sl = o.rfind("/");
1021 73 : if (sl != string::npos)
1022 73 : o = o.substr(0, sl);
1023 : else
1024 0 : o = "";
1025 2639 : } else if (i == "/..")
1026 0 : i = ""; // no need to handle "/.." complete-path-segment case; we're dealing with file names
1027 :
1028 : // 5.2.4 (2) D
1029 : // no need to handle these cases; we're dealing with file names
1030 2639 : else if (i == ".")
1031 0 : i = "";
1032 2639 : else if (i == "..")
1033 0 : i = "";
1034 :
1035 : // POSIX special: map // to /
1036 2639 : else if (i.substr(0,2) == "//")
1037 4 : i = i.substr(1);
1038 :
1039 : // 5.2.4 (2) E
1040 : else {
1041 2635 : string::size_type next_slash = i.find("/", (i[0]=='/' ? 1 : 0)); // skip first slash
1042 2635 : o += i.substr(0, next_slash);
1043 2635 : if (next_slash == string::npos)
1044 521 : i = "";
1045 : else
1046 2114 : i = i.substr(next_slash);
1047 : }
1048 : }
1049 :
1050 1042 : return o;
1051 : }
1052 :
1053 :
1054 :
1055 : // A map-like class that owns a cache of file descriptors (indexed by
1056 : // file / content names).
1057 : //
1058 : // If only it could use fd's instead of file names ... but we can't
1059 : // dup(2) to create independent descriptors for the same unlinked
1060 : // files, so would have to use some goofy linux /proc/self/fd/%d
1061 : // hack such as the following
1062 :
1063 : #if 0
1064 : int superdup(int fd)
1065 : {
1066 : #ifdef __linux__
1067 : char *fdpath = NULL;
1068 : int rc = asprintf(& fdpath, "/proc/self/fd/%d", fd);
1069 : int newfd;
1070 : if (rc >= 0)
1071 : newfd = open(fdpath, O_RDONLY);
1072 : else
1073 : newfd = -1;
1074 : free (fdpath);
1075 : return newfd;
1076 : #else
1077 : return -1;
1078 : #endif
1079 : }
1080 : #endif
1081 :
1082 : class libarchive_fdcache
1083 : {
1084 : private:
1085 : mutex fdcache_lock;
1086 :
1087 : struct fdcache_entry
1088 : {
1089 : string archive;
1090 : string entry;
1091 : string fd;
1092 : double fd_size_mb; // slightly rounded up megabytes
1093 : };
1094 : deque<fdcache_entry> lru; // @head: most recently used
1095 : long max_fds;
1096 : long max_mbs;
1097 :
1098 : public:
1099 82 : void set_metrics()
1100 : {
1101 82 : double total_mb = 0.0;
1102 186 : for (auto i = lru.begin(); i < lru.end(); i++)
1103 104 : total_mb += i->fd_size_mb;
1104 82 : set_metric("fdcache_bytes", (int64_t)(total_mb*1024.0*1024.0));
1105 82 : set_metric("fdcache_count", lru.size());
1106 82 : }
1107 :
1108 34 : void intern(const string& a, const string& b, string fd, off_t sz, bool front_p)
1109 : {
1110 : {
1111 68 : unique_lock<mutex> lock(fdcache_lock);
1112 69 : for (auto i = lru.begin(); i < lru.end(); i++) // nuke preexisting copy
1113 : {
1114 35 : if (i->archive == a && i->entry == b)
1115 : {
1116 0 : unlink (i->fd.c_str());
1117 0 : lru.erase(i);
1118 0 : inc_metric("fdcache_op_count","op","dequeue");
1119 0 : break; // must not continue iterating
1120 : }
1121 : }
1122 34 : double mb = (sz+65535)/1048576.0; // round up to 64K block
1123 68 : fdcache_entry n = { a, b, fd, mb };
1124 34 : if (front_p)
1125 : {
1126 24 : inc_metric("fdcache_op_count","op","enqueue_front");
1127 24 : lru.push_front(n);
1128 : }
1129 : else
1130 : {
1131 10 : inc_metric("fdcache_op_count","op","enqueue_back");
1132 10 : lru.push_back(n);
1133 : }
1134 34 : if (verbose > 3)
1135 0 : obatched(clog) << "fdcache interned a=" << a << " b=" << b
1136 0 : << " fd=" << fd << " mb=" << mb << " front=" << front_p << endl;
1137 : }
1138 34 : set_metrics();
1139 :
1140 : // NB: we age the cache at lookup time too
1141 34 : if (front_p)
1142 24 : this->limit(max_fds, max_mbs); // age cache if required
1143 34 : }
1144 :
1145 35 : int lookup(const string& a, const string& b)
1146 : {
1147 35 : int fd = -1;
1148 : {
1149 70 : unique_lock<mutex> lock(fdcache_lock);
1150 67 : for (auto i = lru.begin(); i < lru.end(); i++)
1151 : {
1152 43 : if (i->archive == a && i->entry == b)
1153 : { // found it; move it to head of lru
1154 22 : fdcache_entry n = *i;
1155 11 : lru.erase(i); // invalidates i, so no more iteration!
1156 11 : lru.push_front(n);
1157 11 : inc_metric("fdcache_op_count","op","requeue_front");
1158 11 : fd = open(n.fd.c_str(), O_RDONLY); // NB: no problem if dup() fails; looks like cache miss
1159 11 : break;
1160 : }
1161 : }
1162 : }
1163 :
1164 35 : if (fd >= 0)
1165 11 : this->limit(max_fds, max_mbs); // age cache if required
1166 :
1167 35 : return fd;
1168 : }
1169 :
1170 34 : int probe(const string& a, const string& b) // just a cache residency check - don't modify LRU state, don't open
1171 : {
1172 68 : unique_lock<mutex> lock(fdcache_lock);
1173 69 : for (auto i = lru.begin(); i < lru.end(); i++)
1174 : {
1175 35 : if (i->archive == a && i->entry == b)
1176 : {
1177 0 : inc_metric("fdcache_op_count","op","probe_hit");
1178 0 : return true;
1179 : }
1180 : }
1181 34 : inc_metric("fdcache_op_count","op","probe_miss");
1182 34 : return false;
1183 : }
1184 :
1185 0 : void clear(const string& a, const string& b)
1186 : {
1187 0 : unique_lock<mutex> lock(fdcache_lock);
1188 0 : for (auto i = lru.begin(); i < lru.end(); i++)
1189 : {
1190 0 : if (i->archive == a && i->entry == b)
1191 : { // found it; move it to head of lru
1192 0 : fdcache_entry n = *i;
1193 0 : lru.erase(i); // invalidates i, so no more iteration!
1194 0 : inc_metric("fdcache_op_count","op","clear");
1195 0 : unlink (n.fd.c_str());
1196 0 : set_metrics();
1197 0 : return;
1198 : }
1199 : }
1200 : }
1201 :
1202 51 : void limit(long maxfds, long maxmbs, bool metrics_p = true)
1203 : {
1204 51 : if (verbose > 3 && (this->max_fds != maxfds || this->max_mbs != maxmbs))
1205 0 : obatched(clog) << "fdcache limited to maxfds=" << maxfds << " maxmbs=" << maxmbs << endl;
1206 :
1207 102 : unique_lock<mutex> lock(fdcache_lock);
1208 51 : this->max_fds = maxfds;
1209 51 : this->max_mbs = maxmbs;
1210 :
1211 51 : long total_fd = 0;
1212 51 : double total_mb = 0.0;
1213 86 : for (auto i = lru.begin(); i < lru.end(); i++)
1214 : {
1215 : // accumulate totals from most recently used one going backward
1216 66 : total_fd ++;
1217 66 : total_mb += i->fd_size_mb;
1218 66 : if (total_fd > max_fds || total_mb > max_mbs)
1219 : {
1220 : // found the cut here point!
1221 :
1222 65 : for (auto j = i; j < lru.end(); j++) // close all the fds from here on in
1223 : {
1224 34 : if (verbose > 3)
1225 0 : obatched(clog) << "fdcache evicted a=" << j->archive << " b=" << j->entry
1226 0 : << " fd=" << j->fd << " mb=" << j->fd_size_mb << endl;
1227 34 : if (metrics_p)
1228 33 : inc_metric("fdcache_op_count","op","evict");
1229 34 : unlink (j->fd.c_str());
1230 : }
1231 :
1232 31 : lru.erase(i, lru.end()); // erase the nodes generally
1233 31 : break;
1234 : }
1235 : }
1236 51 : if (metrics_p) set_metrics();
1237 51 : }
1238 :
1239 3 : ~libarchive_fdcache()
1240 3 : {
1241 : // unlink any fdcache entries in $TMPDIR
1242 : // don't update metrics; those globals may be already destroyed
1243 3 : limit(0, 0, false);
1244 3 : }
1245 : };
1246 : static libarchive_fdcache fdcache;
1247 :
1248 :
1249 : // For security/portability reasons, many distro-package archives have
1250 : // a "./" in front of path names; others have nothing, others have
1251 : // "/". Canonicalize them all to a single leading "/", with the
1252 : // assumption that this matches the dwarf-derived file names too.
1253 104 : string canonicalized_archive_entry_pathname(struct archive_entry *e)
1254 : {
1255 208 : string fn = archive_entry_pathname(e);
1256 104 : if (fn.size() == 0)
1257 0 : return fn;
1258 104 : if (fn[0] == '/')
1259 0 : return fn;
1260 104 : if (fn[0] == '.')
1261 86 : return fn.substr(1);
1262 : else
1263 18 : return string("/")+fn;
1264 : }
1265 :
1266 :
1267 :
1268 : static struct MHD_Response*
1269 61 : handle_buildid_r_match (bool internal_req_p,
1270 : int64_t b_mtime,
1271 : const string& b_source0,
1272 : const string& b_source1,
1273 : int *result_fd)
1274 : {
1275 : struct stat fs;
1276 61 : int rc = stat (b_source0.c_str(), &fs);
1277 61 : if (rc != 0)
1278 26 : throw libc_exception (errno, string("stat ") + b_source0);
1279 :
1280 35 : if ((int64_t) fs.st_mtime != b_mtime)
1281 : {
1282 0 : if (verbose)
1283 0 : obatched(clog) << "mtime mismatch for " << b_source0 << endl;
1284 0 : return 0;
1285 : }
1286 :
1287 : // check for a match in the fdcache first
1288 35 : int fd = fdcache.lookup(b_source0, b_source1);
1289 35 : while (fd >= 0) // got one!; NB: this is really an if() with a possible branch out to the end
1290 : {
1291 11 : rc = fstat(fd, &fs);
1292 11 : if (rc < 0) // disappeared?
1293 : {
1294 0 : if (verbose)
1295 0 : obatched(clog) << "cannot fstat fdcache " << b_source0 << endl;
1296 0 : close(fd);
1297 0 : fdcache.clear(b_source0, b_source1);
1298 0 : break; // branch out of if "loop", to try new libarchive fetch attempt
1299 : }
1300 :
1301 11 : struct MHD_Response* r = MHD_create_response_from_fd (fs.st_size, fd);
1302 11 : if (r == 0)
1303 : {
1304 0 : if (verbose)
1305 0 : obatched(clog) << "cannot create fd-response for " << b_source0 << endl;
1306 0 : close(fd);
1307 0 : break; // branch out of if "loop", to try new libarchive fetch attempt
1308 : }
1309 :
1310 11 : inc_metric ("http_responses_total","result","archive fdcache");
1311 :
1312 11 : MHD_add_response_header (r, "Content-Type", "application/octet-stream");
1313 11 : add_mhd_last_modified (r, fs.st_mtime);
1314 11 : if (verbose > 1)
1315 0 : obatched(clog) << "serving fdcache archive " << b_source0 << " file " << b_source1 << endl;
1316 : /* libmicrohttpd will close it. */
1317 11 : if (result_fd)
1318 11 : *result_fd = fd;
1319 11 : return r;
1320 : // NB: see, we never go around the 'loop' more than once
1321 : }
1322 :
1323 : // no match ... grumble, must process the archive
1324 48 : string archive_decoder = "/dev/null";
1325 48 : string archive_extension = "";
1326 96 : for (auto&& arch : scan_archives)
1327 72 : if (string_endswith(b_source0, arch.first))
1328 : {
1329 24 : archive_extension = arch.first;
1330 24 : archive_decoder = arch.second;
1331 : }
1332 : FILE* fp;
1333 : defer_dtor<FILE*,int>::dtor_fn dfn;
1334 24 : if (archive_decoder != "cat")
1335 : {
1336 3 : string popen_cmd = archive_decoder + " " + shell_escape(b_source0);
1337 1 : fp = popen (popen_cmd.c_str(), "r"); // "e" O_CLOEXEC?
1338 1 : dfn = pclose;
1339 1 : if (fp == NULL)
1340 0 : throw libc_exception (errno, string("popen ") + popen_cmd);
1341 : }
1342 : else
1343 : {
1344 23 : fp = fopen (b_source0.c_str(), "r");
1345 23 : dfn = fclose;
1346 23 : if (fp == NULL)
1347 0 : throw libc_exception (errno, string("fopen ") + b_source0);
1348 : }
1349 48 : defer_dtor<FILE*,int> fp_closer (fp, dfn);
1350 :
1351 : struct archive *a;
1352 24 : a = archive_read_new();
1353 24 : if (a == NULL)
1354 0 : throw archive_exception("cannot create archive reader");
1355 24 : defer_dtor<struct archive*,int> archive_closer (a, archive_read_free);
1356 :
1357 24 : rc = archive_read_support_format_all(a);
1358 24 : if (rc != ARCHIVE_OK)
1359 0 : throw archive_exception(a, "cannot select all format");
1360 24 : rc = archive_read_support_filter_all(a);
1361 24 : if (rc != ARCHIVE_OK)
1362 0 : throw archive_exception(a, "cannot select all filters");
1363 :
1364 24 : rc = archive_read_open_FILE (a, fp);
1365 24 : if (rc != ARCHIVE_OK)
1366 0 : throw archive_exception(a, "cannot open archive from pipe");
1367 :
1368 : // archive traversal is in three stages, no, four stages:
1369 : // 1) skip entries whose names do not match the requested one
1370 : // 2) extract the matching entry name (set r = result)
1371 : // 3) extract some number of prefetched entries (just into fdcache)
1372 : // 4) abort any further processing
1373 24 : struct MHD_Response* r = 0; // will set in stage 2
1374 24 : unsigned prefetch_count =
1375 : internal_req_p ? 0 : fdcache_prefetch; // will decrement in stage 3
1376 :
1377 238 : while(r == 0 || prefetch_count > 0) // stage 1, 2, or 3
1378 : {
1379 234 : if (interrupted)
1380 20 : break;
1381 :
1382 : struct archive_entry *e;
1383 234 : rc = archive_read_next_header (a, &e);
1384 234 : if (rc != ARCHIVE_OK)
1385 20 : break;
1386 :
1387 214 : if (! S_ISREG(archive_entry_mode (e))) // skip non-files completely
1388 214 : continue;
1389 :
1390 48 : string fn = canonicalized_archive_entry_pathname (e);
1391 48 : if ((r == 0) && (fn != b_source1)) // stage 1
1392 14 : continue;
1393 :
1394 34 : if (fdcache.probe (b_source0, fn)) // skip if already interned
1395 0 : continue;
1396 :
1397 : // extract this file to a temporary file
1398 34 : char* tmppath = NULL;
1399 34 : rc = asprintf (&tmppath, "%s/debuginfod.XXXXXX", tmpdir.c_str());
1400 34 : if (rc < 0)
1401 0 : throw libc_exception (ENOMEM, "cannot allocate tmppath");
1402 34 : defer_dtor<void*,void> tmmpath_freer (tmppath, free);
1403 34 : fd = mkstemp (tmppath);
1404 34 : if (fd < 0)
1405 0 : throw libc_exception (errno, "cannot create temporary file");
1406 : // NB: don't unlink (tmppath), as fdcache will take charge of it.
1407 :
1408 : // NB: this can take many uninterruptible seconds for a huge file
1409 34 : rc = archive_read_data_into_fd (a, fd);
1410 34 : if (rc != ARCHIVE_OK) // e.g. ENOSPC!
1411 : {
1412 0 : close (fd);
1413 0 : unlink (tmppath);
1414 0 : throw archive_exception(a, "cannot extract file");
1415 : }
1416 :
1417 : // Set the mtime so the fdcache file mtimes, even prefetched ones,
1418 : // propagate to future webapi clients.
1419 : struct timeval tvs[2];
1420 34 : tvs[0].tv_sec = tvs[1].tv_sec = archive_entry_mtime(e);
1421 34 : tvs[0].tv_usec = tvs[1].tv_usec = 0;
1422 34 : (void) futimes (fd, tvs); /* best effort */
1423 :
1424 34 : if (r != 0) // stage 3
1425 : {
1426 : // NB: now we know we have a complete reusable file; make fdcache
1427 : // responsible for unlinking it later.
1428 10 : fdcache.intern(b_source0, fn,
1429 : tmppath, archive_entry_size(e),
1430 : false); // prefetched ones go to back of lru
1431 10 : prefetch_count --;
1432 10 : close (fd); // we're not saving this fd to make a mhd-response from!
1433 10 : continue;
1434 : }
1435 :
1436 : // NB: now we know we have a complete reusable file; make fdcache
1437 : // responsible for unlinking it later.
1438 24 : fdcache.intern(b_source0, b_source1,
1439 : tmppath, archive_entry_size(e),
1440 : true); // requested ones go to the front of lru
1441 :
1442 24 : inc_metric ("http_responses_total","result",archive_extension + " archive");
1443 24 : r = MHD_create_response_from_fd (archive_entry_size(e), fd);
1444 24 : if (r == 0)
1445 : {
1446 0 : if (verbose)
1447 0 : obatched(clog) << "cannot create fd-response for " << b_source0 << endl;
1448 0 : close(fd);
1449 0 : break; // assume no chance of better luck around another iteration; no other copies of same file
1450 : }
1451 : else
1452 : {
1453 24 : MHD_add_response_header (r, "Content-Type", "application/octet-stream");
1454 24 : add_mhd_last_modified (r, archive_entry_mtime(e));
1455 24 : if (verbose > 1)
1456 0 : obatched(clog) << "serving archive " << b_source0 << " file " << b_source1 << endl;
1457 : /* libmicrohttpd will close it. */
1458 24 : if (result_fd)
1459 24 : *result_fd = fd;
1460 24 : continue;
1461 : }
1462 : }
1463 :
1464 : // XXX: rpm/file not found: delete this R entry?
1465 24 : return r;
1466 : }
1467 :
1468 :
1469 : static struct MHD_Response*
1470 76 : handle_buildid_match (bool internal_req_p,
1471 : int64_t b_mtime,
1472 : const string& b_stype,
1473 : const string& b_source0,
1474 : const string& b_source1,
1475 : int *result_fd)
1476 : {
1477 : try
1478 : {
1479 76 : if (b_stype == "F")
1480 15 : return handle_buildid_f_match(internal_req_p, b_mtime, b_source0, result_fd);
1481 61 : else if (b_stype == "R")
1482 61 : return handle_buildid_r_match(internal_req_p, b_mtime, b_source0, b_source1, result_fd);
1483 : }
1484 52 : catch (const reportable_exception &e)
1485 : {
1486 26 : e.report(clog);
1487 : // Report but swallow libc etc. errors here; let the caller
1488 : // iterate to other matches of the content.
1489 : }
1490 :
1491 26 : return 0;
1492 : }
1493 :
1494 :
1495 : static int
1496 6 : debuginfod_find_progress (debuginfod_client *, long a, long b)
1497 : {
1498 6 : if (verbose > 4)
1499 0 : obatched(clog) << "federated debuginfod progress=" << a << "/" << b << endl;
1500 :
1501 6 : return interrupted;
1502 : }
1503 :
1504 :
1505 : static struct MHD_Response*
1506 58 : handle_buildid (MHD_Connection* conn,
1507 : const string& buildid /* unsafe */,
1508 : const string& artifacttype /* unsafe */,
1509 : const string& suffix /* unsafe */,
1510 : int *result_fd)
1511 : {
1512 : // validate artifacttype
1513 116 : string atype_code;
1514 58 : if (artifacttype == "debuginfo") atype_code = "D";
1515 26 : else if (artifacttype == "executable") atype_code = "E";
1516 12 : else if (artifacttype == "source") atype_code = "S";
1517 0 : else throw reportable_exception("invalid artifacttype");
1518 :
1519 58 : if (atype_code == "S" && suffix == "")
1520 0 : throw reportable_exception("invalid source suffix");
1521 :
1522 : // validate buildid
1523 58 : if ((buildid.size() < 2) || // not empty
1524 116 : (buildid.size() % 2) || // even number
1525 58 : (buildid.find_first_not_of("0123456789abcdef") != string::npos)) // pure tasty lowercase hex
1526 0 : throw reportable_exception("invalid buildid");
1527 :
1528 58 : if (verbose > 1)
1529 0 : obatched(clog) << "searching for buildid=" << buildid << " artifacttype=" << artifacttype
1530 0 : << " suffix=" << suffix << endl;
1531 :
1532 58 : sqlite_ps *pp = 0;
1533 :
1534 58 : if (atype_code == "D")
1535 : {
1536 64 : pp = new sqlite_ps (db, "mhd-query-d",
1537 : "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_d where buildid = ? "
1538 32 : "order by mtime desc");
1539 32 : pp->reset();
1540 32 : pp->bind(1, buildid);
1541 : }
1542 26 : else if (atype_code == "E")
1543 : {
1544 28 : pp = new sqlite_ps (db, "mhd-query-e",
1545 : "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_e where buildid = ? "
1546 14 : "order by mtime desc");
1547 14 : pp->reset();
1548 14 : pp->bind(1, buildid);
1549 : }
1550 12 : else if (atype_code == "S")
1551 : {
1552 : // PR25548
1553 : // Incoming source queries may come in with either dwarf-level OR canonicalized paths.
1554 : // We let the query pass with either one.
1555 :
1556 24 : pp = new sqlite_ps (db, "mhd-query-s",
1557 : "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_s where buildid = ? and artifactsrc in (?,?) "
1558 12 : "order by sharedprefix(source0,source0ref) desc, mtime desc");
1559 12 : pp->reset();
1560 12 : pp->bind(1, buildid);
1561 : // NB: we don't store the non-canonicalized path names any more, but old databases
1562 : // might have them (and no canon ones), so we keep searching for both.
1563 12 : pp->bind(2, suffix);
1564 12 : pp->bind(3, canon_pathname(suffix));
1565 : }
1566 116 : unique_ptr<sqlite_ps> ps_closer(pp); // release pp if exception or return
1567 :
1568 : // consume all the rows
1569 : while (1)
1570 : {
1571 84 : int rc = pp->step();
1572 84 : if (rc == SQLITE_DONE) break;
1573 76 : if (rc != SQLITE_ROW)
1574 0 : throw sqlite_exception(rc, "step");
1575 :
1576 76 : int64_t b_mtime = sqlite3_column_int64 (*pp, 0);
1577 76 : string b_stype = string((const char*) sqlite3_column_text (*pp, 1) ?: ""); /* by DDL may not be NULL */
1578 76 : string b_source0 = string((const char*) sqlite3_column_text (*pp, 2) ?: ""); /* may be NULL */
1579 76 : string b_source1 = string((const char*) sqlite3_column_text (*pp, 3) ?: ""); /* may be NULL */
1580 :
1581 76 : if (verbose > 1)
1582 0 : obatched(clog) << "found mtime=" << b_mtime << " stype=" << b_stype
1583 0 : << " source0=" << b_source0 << " source1=" << b_source1 << endl;
1584 :
1585 : // Try accessing the located match.
1586 : // XXX: in case of multiple matches, attempt them in parallel?
1587 76 : auto r = handle_buildid_match (conn ? false : true,
1588 : b_mtime, b_stype, b_source0, b_source1, result_fd);
1589 76 : if (r)
1590 50 : return r;
1591 26 : }
1592 :
1593 : // We couldn't find it in the database. Last ditch effort
1594 : // is to defer to other debuginfo servers.
1595 :
1596 8 : int fd = -1;
1597 8 : debuginfod_client *client = debuginfod_begin ();
1598 8 : if (client != NULL)
1599 : {
1600 8 : debuginfod_set_progressfn (client, & debuginfod_find_progress);
1601 :
1602 8 : if (conn)
1603 : {
1604 : // Transcribe incoming User-Agent:
1605 16 : string ua = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "User-Agent") ?: "";
1606 24 : string ua_complete = string("User-Agent: ") + ua;
1607 8 : debuginfod_add_http_header (client, ua_complete.c_str());
1608 :
1609 : // Compute larger XFF:, for avoiding info loss during
1610 : // federation, and for future cyclicity detection.
1611 16 : string xff = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-Forwarded-For") ?: "";
1612 8 : if (xff != "")
1613 3 : xff += string(", "); // comma separated list
1614 :
1615 : // Compute the client's numeric IP address only - so can't merge with conninfo()
1616 8 : const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn,
1617 : MHD_CONNECTION_INFO_CLIENT_ADDRESS);
1618 8 : struct sockaddr *so = u ? u->client_addr : 0;
1619 8 : char hostname[256] = ""; // RFC1035
1620 8 : if (so && so->sa_family == AF_INET)
1621 8 : (void) getnameinfo (so, sizeof (struct sockaddr_in), hostname, sizeof (hostname), NULL, 0,
1622 : NI_NUMERICHOST);
1623 0 : else if (so && so->sa_family == AF_INET6)
1624 0 : (void) getnameinfo (so, sizeof (struct sockaddr_in6), hostname, sizeof (hostname), NULL, 0,
1625 : NI_NUMERICHOST);
1626 :
1627 24 : string xff_complete = string("X-Forwarded-For: ")+xff+string(hostname);
1628 8 : debuginfod_add_http_header (client, xff_complete.c_str());
1629 : }
1630 :
1631 8 : if (artifacttype == "debuginfo")
1632 7 : fd = debuginfod_find_debuginfo (client,
1633 7 : (const unsigned char*) buildid.c_str(),
1634 : 0, NULL);
1635 1 : else if (artifacttype == "executable")
1636 1 : fd = debuginfod_find_executable (client,
1637 1 : (const unsigned char*) buildid.c_str(),
1638 : 0, NULL);
1639 0 : else if (artifacttype == "source")
1640 0 : fd = debuginfod_find_source (client,
1641 0 : (const unsigned char*) buildid.c_str(),
1642 : 0, suffix.c_str(), NULL);
1643 : }
1644 : else
1645 0 : fd = -errno; /* Set by debuginfod_begin. */
1646 8 : debuginfod_end (client);
1647 :
1648 8 : if (fd >= 0)
1649 : {
1650 1 : inc_metric ("http_responses_total","result","upstream");
1651 : struct stat s;
1652 1 : int rc = fstat (fd, &s);
1653 1 : if (rc == 0)
1654 : {
1655 1 : auto r = MHD_create_response_from_fd ((uint64_t) s.st_size, fd);
1656 1 : if (r)
1657 : {
1658 1 : MHD_add_response_header (r, "Content-Type", "application/octet-stream");
1659 1 : add_mhd_last_modified (r, s.st_mtime);
1660 1 : if (verbose > 1)
1661 0 : obatched(clog) << "serving file from upstream debuginfod/cache" << endl;
1662 1 : if (result_fd)
1663 1 : *result_fd = fd;
1664 1 : return r; // NB: don't close fd; libmicrohttpd will
1665 : }
1666 : }
1667 0 : close (fd);
1668 : }
1669 : else
1670 7 : switch(fd)
1671 : {
1672 5 : case -ENOSYS:
1673 5 : break;
1674 2 : case -ENOENT:
1675 2 : break;
1676 0 : default: // some more tricky error
1677 0 : throw libc_exception(-fd, "upstream debuginfod query failed");
1678 : }
1679 :
1680 7 : throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found");
1681 : }
1682 :
1683 :
1684 : ////////////////////////////////////////////////////////////////////////
1685 :
1686 : static map<string,int64_t> metrics; // arbitrary data for /metrics query
1687 : // NB: store int64_t since all our metrics are integers; prometheus accepts double
1688 : static mutex metrics_lock;
1689 : // NB: these objects get released during the process exit via global dtors
1690 : // do not call them from within other global dtors
1691 :
1692 : // utility function for assembling prometheus-compatible
1693 : // name="escaped-value" strings
1694 : // https://prometheus.io/docs/instrumenting/exposition_formats/
1695 : static string
1696 2614 : metric_label(const string& name, const string& value)
1697 : {
1698 2614 : string x = name + "=\"";
1699 17626 : for (auto&& c : value)
1700 15014 : switch(c)
1701 : {
1702 0 : case '\\': x += "\\\\"; break;
1703 0 : case '\"': x += "\\\""; break;
1704 0 : case '\n': x += "\\n"; break;
1705 15014 : default: x += c; break;
1706 : }
1707 2612 : x += "\"";
1708 2615 : return x;
1709 : }
1710 :
1711 :
1712 : // add prometheus-format metric name + label tuple (if any) + value
1713 :
1714 : static void
1715 170 : set_metric(const string& metric, int64_t value)
1716 : {
1717 170 : unique_lock<mutex> lock(metrics_lock);
1718 170 : metrics[metric] = value;
1719 170 : }
1720 : #if 0 /* unused */
1721 : static void
1722 : inc_metric(const string& metric)
1723 : {
1724 : unique_lock<mutex> lock(metrics_lock);
1725 : metrics[metric] ++;
1726 : }
1727 : #endif
1728 : static void
1729 380 : set_metric(const string& metric,
1730 : const string& lname, const string& lvalue,
1731 : int64_t value)
1732 : {
1733 1140 : string key = (metric + "{" + metric_label(lname, lvalue) + "}");
1734 380 : unique_lock<mutex> lock(metrics_lock);
1735 380 : metrics[key] = value;
1736 380 : }
1737 :
1738 : static void
1739 1551 : inc_metric(const string& metric,
1740 : const string& lname, const string& lvalue)
1741 : {
1742 4653 : string key = (metric + "{" + metric_label(lname, lvalue) + "}");
1743 1551 : unique_lock<mutex> lock(metrics_lock);
1744 1551 : metrics[key] ++;
1745 1551 : }
1746 : static void
1747 684 : add_metric(const string& metric,
1748 : const string& lname, const string& lvalue,
1749 : int64_t value)
1750 : {
1751 2053 : string key = (metric + "{" + metric_label(lname, lvalue) + "}");
1752 685 : unique_lock<mutex> lock(metrics_lock);
1753 685 : metrics[key] += value;
1754 685 : }
1755 : #if 0
1756 : static void
1757 : add_metric(const string& metric,
1758 : int64_t value)
1759 : {
1760 : unique_lock<mutex> lock(metrics_lock);
1761 : metrics[metric] += value;
1762 : }
1763 : #endif
1764 :
1765 :
1766 : // and more for higher arity labels if needed
1767 :
1768 :
1769 : static struct MHD_Response*
1770 61 : handle_metrics (off_t* size)
1771 : {
1772 122 : stringstream o;
1773 : {
1774 122 : unique_lock<mutex> lock(metrics_lock);
1775 3555 : for (auto&& i : metrics)
1776 3494 : o << i.first << " " << i.second << endl;
1777 : }
1778 61 : const string& os = o.str();
1779 61 : MHD_Response* r = MHD_create_response_from_buffer (os.size(),
1780 61 : (void*) os.c_str(),
1781 : MHD_RESPMEM_MUST_COPY);
1782 61 : *size = os.size();
1783 61 : MHD_add_response_header (r, "Content-Type", "text/plain");
1784 122 : return r;
1785 : }
1786 :
1787 :
1788 : ////////////////////////////////////////////////////////////////////////
1789 :
1790 :
1791 : /* libmicrohttpd callback */
1792 : static MHD_RESULT
1793 113 : handler_cb (void * /*cls*/,
1794 : struct MHD_Connection *connection,
1795 : const char *url,
1796 : const char *method,
1797 : const char * /*version*/,
1798 : const char * /*upload_data*/,
1799 : size_t * /*upload_data_size*/,
1800 : void ** /*con_cls*/)
1801 : {
1802 113 : struct MHD_Response *r = NULL;
1803 226 : string url_copy = url;
1804 :
1805 : #if MHD_VERSION >= 0x00097002
1806 : enum MHD_Result rc;
1807 : #else
1808 113 : int rc = MHD_NO; // mhd
1809 : #endif
1810 113 : int http_code = 500;
1811 113 : off_t http_size = -1;
1812 : struct timeval tv_start, tv_end;
1813 113 : gettimeofday (&tv_start, NULL);
1814 :
1815 : try
1816 : {
1817 113 : if (string(method) != "GET")
1818 0 : throw reportable_exception(400, "we support GET only");
1819 :
1820 : /* Start decoding the URL. */
1821 113 : size_t slash1 = url_copy.find('/', 1);
1822 226 : string url1 = url_copy.substr(0, slash1); // ok even if slash1 not found
1823 :
1824 113 : if (slash1 != string::npos && url1 == "/buildid")
1825 : {
1826 50 : size_t slash2 = url_copy.find('/', slash1+1);
1827 50 : if (slash2 == string::npos)
1828 0 : throw reportable_exception("/buildid/ webapi error, need buildid");
1829 :
1830 100 : string buildid = url_copy.substr(slash1+1, slash2-slash1-1);
1831 :
1832 50 : size_t slash3 = url_copy.find('/', slash2+1);
1833 107 : string artifacttype, suffix;
1834 50 : if (slash3 == string::npos)
1835 : {
1836 38 : artifacttype = url_copy.substr(slash2+1);
1837 38 : suffix = "";
1838 : }
1839 : else
1840 : {
1841 12 : artifacttype = url_copy.substr(slash2+1, slash3-slash2-1);
1842 12 : suffix = url_copy.substr(slash3); // include the slash in the suffix
1843 : }
1844 :
1845 50 : inc_metric("http_requests_total", "type", artifacttype);
1846 : // get the resulting fd so we can report its size
1847 : int fd;
1848 50 : r = handle_buildid(connection, buildid, artifacttype, suffix, &fd);
1849 43 : if (r)
1850 : {
1851 : struct stat fs;
1852 43 : if (fstat(fd, &fs) == 0)
1853 43 : http_size = fs.st_size;
1854 : // libmicrohttpd will close (fd);
1855 : }
1856 : }
1857 63 : else if (url1 == "/metrics")
1858 : {
1859 61 : inc_metric("http_requests_total", "type", "metrics");
1860 61 : r = handle_metrics(& http_size);
1861 : }
1862 : else
1863 2 : throw reportable_exception("webapi error, unrecognized /operation");
1864 :
1865 104 : if (r == 0)
1866 0 : throw reportable_exception("internal error, missing response");
1867 :
1868 104 : rc = MHD_queue_response (connection, MHD_HTTP_OK, r);
1869 104 : http_code = MHD_HTTP_OK;
1870 104 : MHD_destroy_response (r);
1871 : }
1872 9 : catch (const reportable_exception& e)
1873 : {
1874 9 : inc_metric("http_responses_total","result","error");
1875 9 : e.report(clog);
1876 9 : http_code = e.code;
1877 9 : http_size = e.message.size();
1878 9 : rc = e.mhd_send_response (connection);
1879 : }
1880 :
1881 113 : gettimeofday (&tv_end, NULL);
1882 113 : double deltas = (tv_end.tv_sec - tv_start.tv_sec) + (tv_end.tv_usec - tv_start.tv_usec)*0.000001;
1883 226 : obatched(clog) << conninfo(connection)
1884 : << ' ' << method << ' ' << url
1885 113 : << ' ' << http_code << ' ' << http_size
1886 113 : << ' ' << (int)(deltas*1000) << "ms"
1887 113 : << endl;
1888 :
1889 : // related prometheus metrics
1890 113 : string http_code_str = to_string(http_code);
1891 113 : if (http_size >= 0)
1892 113 : add_metric("http_responses_transfer_bytes_sum","code",http_code_str,
1893 : http_size);
1894 113 : inc_metric("http_responses_transfer_bytes_count","code",http_code_str);
1895 :
1896 113 : add_metric("http_responses_duration_milliseconds_sum","code",http_code_str,
1897 113 : deltas*1000); // prometheus prefers _seconds and floating point
1898 113 : inc_metric("http_responses_duration_milliseconds_count","code",http_code_str);
1899 :
1900 226 : return rc;
1901 : }
1902 :
1903 :
1904 : ////////////////////////////////////////////////////////////////////////
1905 : // borrowed originally from src/nm.c get_local_names()
1906 :
1907 : static void
1908 20 : dwarf_extract_source_paths (Elf *elf, set<string>& debug_sourcefiles)
1909 : noexcept // no exceptions - so we can simplify the altdbg resource release at end
1910 : {
1911 20 : Dwarf* dbg = dwarf_begin_elf (elf, DWARF_C_READ, NULL);
1912 20 : if (dbg == NULL)
1913 0 : return;
1914 :
1915 20 : Dwarf* altdbg = NULL;
1916 20 : int altdbg_fd = -1;
1917 :
1918 : // DWZ handling: if we have an unsatisfied debug-alt-link, add an
1919 : // empty string into the outgoing sourcefiles set, so the caller
1920 : // should know that our data is incomplete.
1921 : const char *alt_name_p;
1922 : const void *alt_build_id; // elfutils-owned memory
1923 20 : ssize_t sz = dwelf_dwarf_gnu_debugaltlink (dbg, &alt_name_p, &alt_build_id);
1924 20 : if (sz > 0) // got one!
1925 : {
1926 16 : string buildid;
1927 8 : unsigned char* build_id_bytes = (unsigned char*) alt_build_id;
1928 168 : for (ssize_t idx=0; idx<sz; idx++)
1929 : {
1930 160 : buildid += "0123456789abcdef"[build_id_bytes[idx] >> 4];
1931 160 : buildid += "0123456789abcdef"[build_id_bytes[idx] & 0xf];
1932 : }
1933 :
1934 8 : if (verbose > 3)
1935 0 : obatched(clog) << "Need altdebug buildid=" << buildid << endl;
1936 :
1937 : // but is it unsatisfied the normal elfutils ways?
1938 8 : Dwarf* alt = dwarf_getalt (dbg);
1939 8 : if (alt == NULL)
1940 : {
1941 : // Yup, unsatisfied the normal way. Maybe we can satisfy it
1942 : // from our own debuginfod database.
1943 : int alt_fd;
1944 8 : struct MHD_Response *r = 0;
1945 : try
1946 : {
1947 8 : r = handle_buildid (0, buildid, "debuginfo", "", &alt_fd);
1948 : }
1949 0 : catch (const reportable_exception& e)
1950 : {
1951 : // swallow exceptions
1952 : }
1953 :
1954 : // NB: this is not actually recursive! This invokes the web-query
1955 : // path, which cannot get back into the scan code paths.
1956 8 : if (r)
1957 : {
1958 : // Found it!
1959 8 : altdbg_fd = dup(alt_fd); // ok if this fails, downstream failures ok
1960 8 : alt = altdbg = dwarf_begin (altdbg_fd, DWARF_C_READ);
1961 : // NB: must close this dwarf and this fd at the bottom of the function!
1962 8 : MHD_destroy_response (r); // will close alt_fd
1963 8 : if (alt)
1964 8 : dwarf_setalt (dbg, alt);
1965 : }
1966 : }
1967 : else
1968 : {
1969 : // NB: dwarf_setalt(alt) inappropriate - already done!
1970 : // NB: altdbg will stay 0 so nothing tries to redundantly dealloc.
1971 : }
1972 :
1973 8 : if (alt)
1974 : {
1975 8 : if (verbose > 3)
1976 0 : obatched(clog) << "Resolved altdebug buildid=" << buildid << endl;
1977 : }
1978 : else // (alt == NULL) - signal possible presence of poor debuginfo
1979 : {
1980 0 : debug_sourcefiles.insert("");
1981 0 : if (verbose > 3)
1982 0 : obatched(clog) << "Unresolved altdebug buildid=" << buildid << endl;
1983 : }
1984 : }
1985 :
1986 20 : Dwarf_Off offset = 0;
1987 : Dwarf_Off old_offset;
1988 : size_t hsize;
1989 :
1990 344 : while (dwarf_nextcu (dbg, old_offset = offset, &offset, &hsize, NULL, NULL, NULL) == 0)
1991 : {
1992 : Dwarf_Die cudie_mem;
1993 324 : Dwarf_Die *cudie = dwarf_offdie (dbg, old_offset + hsize, &cudie_mem);
1994 :
1995 324 : if (cudie == NULL)
1996 4 : continue;
1997 324 : if (dwarf_tag (cudie) != DW_TAG_compile_unit)
1998 4 : continue;
1999 :
2000 320 : const char *cuname = dwarf_diename(cudie) ?: "unknown";
2001 :
2002 : Dwarf_Files *files;
2003 : size_t nfiles;
2004 320 : if (dwarf_getsrcfiles (cudie, &files, &nfiles) != 0)
2005 0 : continue;
2006 :
2007 : // extract DW_AT_comp_dir to resolve relative file names
2008 320 : const char *comp_dir = "";
2009 : const char *const *dirs;
2010 : size_t ndirs;
2011 640 : if (dwarf_getsrcdirs (files, &dirs, &ndirs) == 0 &&
2012 320 : dirs[0] != NULL)
2013 320 : comp_dir = dirs[0];
2014 320 : if (comp_dir == NULL)
2015 0 : comp_dir = "";
2016 :
2017 320 : if (verbose > 3)
2018 0 : obatched(clog) << "searching for sources for cu=" << cuname << " comp_dir=" << comp_dir
2019 0 : << " #files=" << nfiles << " #dirs=" << ndirs << endl;
2020 :
2021 320 : if (comp_dir[0] == '\0' && cuname[0] != '/')
2022 : {
2023 : // This is a common symptom for dwz-compressed debug files,
2024 : // where the altdebug file cannot be resolved.
2025 0 : if (verbose > 3)
2026 0 : obatched(clog) << "skipping cu=" << cuname << " due to empty comp_dir" << endl;
2027 0 : continue;
2028 : }
2029 :
2030 6182 : for (size_t f = 1; f < nfiles; f++)
2031 : {
2032 5862 : const char *hat = dwarf_filesrc (files, f, NULL, NULL);
2033 5862 : if (hat == NULL)
2034 0 : continue;
2035 :
2036 5862 : if (string(hat) == "<built-in>") // gcc intrinsics, don't bother record
2037 0 : continue;
2038 :
2039 5862 : string waldo;
2040 5862 : if (hat[0] == '/') // absolute
2041 4053 : waldo = (string (hat));
2042 1809 : else if (comp_dir[0] != '\0') // comp_dir relative
2043 1809 : waldo = (string (comp_dir) + string("/") + string (hat));
2044 : else
2045 : {
2046 0 : if (verbose > 3)
2047 0 : obatched(clog) << "skipping hat=" << hat << " due to empty comp_dir" << endl;
2048 0 : continue;
2049 : }
2050 :
2051 : // NB: this is the 'waldo' that a dbginfo client will have
2052 : // to supply for us to give them the file The comp_dir
2053 : // prefixing is a definite complication. Otherwise we'd
2054 : // have to return a setof comp_dirs (one per CU!) with
2055 : // corresponding filesrc[] names, instead of one absolute
2056 : // resoved set. Maybe we'll have to do that anyway. XXX
2057 :
2058 5862 : if (verbose > 4)
2059 0 : obatched(clog) << waldo
2060 0 : << (debug_sourcefiles.find(waldo)==debug_sourcefiles.end() ? " new" : " dup") << endl;
2061 :
2062 5862 : debug_sourcefiles.insert (waldo);
2063 : }
2064 : }
2065 :
2066 20 : dwarf_end(dbg);
2067 20 : if (altdbg)
2068 8 : dwarf_end(altdbg);
2069 20 : if (altdbg_fd >= 0)
2070 8 : close(altdbg_fd);
2071 : }
2072 :
2073 :
2074 :
2075 : static void
2076 93 : elf_classify (int fd, bool &executable_p, bool &debuginfo_p, string &buildid, set<string>& debug_sourcefiles)
2077 : {
2078 93 : Elf *elf = elf_begin (fd, ELF_C_READ_MMAP_PRIVATE, NULL);
2079 93 : if (elf == NULL)
2080 0 : return;
2081 :
2082 : try // catch our types of errors and clean up the Elf* object
2083 : {
2084 93 : if (elf_kind (elf) != ELF_K_ELF)
2085 : {
2086 58 : elf_end (elf);
2087 58 : return;
2088 : }
2089 :
2090 : GElf_Ehdr ehdr_storage;
2091 35 : GElf_Ehdr *ehdr = gelf_getehdr (elf, &ehdr_storage);
2092 35 : if (ehdr == NULL)
2093 : {
2094 0 : elf_end (elf);
2095 0 : return;
2096 : }
2097 35 : auto elf_type = ehdr->e_type;
2098 :
2099 : const void *build_id; // elfutils-owned memory
2100 35 : ssize_t sz = dwelf_elf_gnu_build_id (elf, & build_id);
2101 35 : if (sz <= 0)
2102 : {
2103 : // It's not a diagnostic-worthy error for an elf file to lack build-id.
2104 : // It might just be very old.
2105 0 : elf_end (elf);
2106 0 : return;
2107 : }
2108 :
2109 : // build_id is a raw byte array; convert to hexadecimal *lowercase*
2110 35 : unsigned char* build_id_bytes = (unsigned char*) build_id;
2111 733 : for (ssize_t idx=0; idx<sz; idx++)
2112 : {
2113 698 : buildid += "0123456789abcdef"[build_id_bytes[idx] >> 4];
2114 699 : buildid += "0123456789abcdef"[build_id_bytes[idx] & 0xf];
2115 : }
2116 :
2117 : // now decide whether it's an executable - namely, any allocatable section has
2118 : // PROGBITS;
2119 35 : if (elf_type == ET_EXEC || elf_type == ET_DYN)
2120 : {
2121 : size_t shnum;
2122 30 : int rc = elf_getshdrnum (elf, &shnum);
2123 30 : if (rc < 0)
2124 0 : throw elfutils_exception(rc, "getshdrnum");
2125 :
2126 30 : executable_p = false;
2127 560 : for (size_t sc = 0; sc < shnum; sc++)
2128 : {
2129 546 : Elf_Scn *scn = elf_getscn (elf, sc);
2130 546 : if (scn == NULL)
2131 0 : continue;
2132 :
2133 : GElf_Shdr shdr_mem;
2134 546 : GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem);
2135 546 : if (shdr == NULL)
2136 0 : continue;
2137 :
2138 : // allocated (loadable / vm-addr-assigned) section with available content?
2139 546 : if ((shdr->sh_type == SHT_PROGBITS) && (shdr->sh_flags & SHF_ALLOC))
2140 : {
2141 16 : if (verbose > 4)
2142 0 : obatched(clog) << "executable due to SHF_ALLOC SHT_PROGBITS sc=" << sc << endl;
2143 16 : executable_p = true;
2144 16 : break; // no need to keep looking for others
2145 : }
2146 : } // iterate over sections
2147 : } // executable_p classification
2148 :
2149 : // now decide whether it's a debuginfo - namely, if it has any .debug* or .zdebug* sections
2150 : // logic mostly stolen from fweimer@redhat.com's elfclassify drafts
2151 : size_t shstrndx;
2152 35 : int rc = elf_getshdrstrndx (elf, &shstrndx);
2153 35 : if (rc < 0)
2154 0 : throw elfutils_exception(rc, "getshdrstrndx");
2155 :
2156 35 : Elf_Scn *scn = NULL;
2157 : while (true)
2158 : {
2159 879 : scn = elf_nextscn (elf, scn);
2160 879 : if (scn == NULL)
2161 35 : break;
2162 : GElf_Shdr shdr_storage;
2163 864 : GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_storage);
2164 863 : if (shdr == NULL)
2165 0 : break;
2166 863 : const char *section_name = elf_strptr (elf, shstrndx, shdr->sh_name);
2167 863 : if (section_name == NULL)
2168 0 : break;
2169 863 : if (strncmp(section_name, ".debug_line", 11) == 0 ||
2170 842 : strncmp(section_name, ".zdebug_line", 12) == 0)
2171 : {
2172 19 : debuginfo_p = true;
2173 19 : dwarf_extract_source_paths (elf, debug_sourcefiles);
2174 20 : break; // expecting only one .*debug_line, so no need to look for others
2175 : }
2176 844 : else if (strncmp(section_name, ".debug_", 7) == 0 ||
2177 789 : strncmp(section_name, ".zdebug_", 8) == 0)
2178 : {
2179 59 : debuginfo_p = true;
2180 : // NB: don't break; need to parse .debug_line for sources
2181 : }
2182 844 : }
2183 : }
2184 0 : catch (const reportable_exception& e)
2185 : {
2186 0 : e.report(clog);
2187 : }
2188 35 : elf_end (elf);
2189 : }
2190 :
2191 :
2192 : static void
2193 122 : scan_source_file (const string& rps, const stat_t& st,
2194 : sqlite_ps& ps_upsert_buildids,
2195 : sqlite_ps& ps_upsert_files,
2196 : sqlite_ps& ps_upsert_de,
2197 : sqlite_ps& ps_upsert_s,
2198 : sqlite_ps& ps_query,
2199 : sqlite_ps& ps_scan_done,
2200 : unsigned& fts_cached,
2201 : unsigned& fts_executable,
2202 : unsigned& fts_debuginfo,
2203 : unsigned& fts_sourcefiles)
2204 : {
2205 : /* See if we know of it already. */
2206 : int rc = ps_query
2207 122 : .reset()
2208 122 : .bind(1, rps)
2209 122 : .bind(2, st.st_mtime)
2210 122 : .step();
2211 122 : ps_query.reset();
2212 122 : if (rc == SQLITE_ROW) // i.e., a result, as opposed to DONE (no results)
2213 : // no need to recheck a file/version we already know
2214 : // specifically, no need to elf-begin a file we already determined is non-elf
2215 : // (so is stored with buildid=NULL)
2216 : {
2217 84 : fts_cached++;
2218 84 : return;
2219 : }
2220 :
2221 38 : bool executable_p = false, debuginfo_p = false; // E and/or D
2222 76 : string buildid;
2223 76 : set<string> sourcefiles;
2224 :
2225 38 : int fd = open (rps.c_str(), O_RDONLY);
2226 : try
2227 : {
2228 38 : if (fd >= 0)
2229 37 : elf_classify (fd, executable_p, debuginfo_p, buildid, sourcefiles);
2230 : else
2231 1 : throw libc_exception(errno, string("open ") + rps);
2232 37 : add_metric ("scanned_bytes_total","source","file",
2233 37 : st.st_size);
2234 37 : inc_metric ("scanned_files_total","source","file");
2235 : }
2236 : // NB: we catch exceptions here too, so that we can
2237 : // cache the corrupt-elf case (!executable_p &&
2238 : // !debuginfo_p) just below, just as if we had an
2239 : // EPERM error from open(2).
2240 2 : catch (const reportable_exception& e)
2241 : {
2242 1 : e.report(clog);
2243 : }
2244 :
2245 38 : if (fd >= 0)
2246 37 : close (fd);
2247 :
2248 : // register this file name in the interning table
2249 : ps_upsert_files
2250 38 : .reset()
2251 38 : .bind(1, rps)
2252 38 : .step_ok_done();
2253 :
2254 38 : if (buildid == "")
2255 : {
2256 : // no point storing an elf file without buildid
2257 34 : executable_p = false;
2258 34 : debuginfo_p = false;
2259 : }
2260 : else
2261 : {
2262 : // register this build-id in the interning table
2263 : ps_upsert_buildids
2264 4 : .reset()
2265 4 : .bind(1, buildid)
2266 4 : .step_ok_done();
2267 : }
2268 :
2269 38 : if (executable_p)
2270 3 : fts_executable ++;
2271 38 : if (debuginfo_p)
2272 3 : fts_debuginfo ++;
2273 38 : if (executable_p || debuginfo_p)
2274 : {
2275 : ps_upsert_de
2276 4 : .reset()
2277 4 : .bind(1, buildid)
2278 4 : .bind(2, debuginfo_p ? 1 : 0)
2279 4 : .bind(3, executable_p ? 1 : 0)
2280 4 : .bind(4, rps)
2281 4 : .bind(5, st.st_mtime)
2282 4 : .step_ok_done();
2283 : }
2284 38 : if (executable_p)
2285 3 : inc_metric("found_executable_total","source","files");
2286 38 : if (debuginfo_p)
2287 3 : inc_metric("found_debuginfo_total","source","files");
2288 :
2289 38 : if (sourcefiles.size() && buildid != "")
2290 : {
2291 3 : fts_sourcefiles += sourcefiles.size();
2292 :
2293 485 : for (auto&& dwarfsrc : sourcefiles)
2294 : {
2295 482 : char *srp = realpath(dwarfsrc.c_str(), NULL);
2296 482 : if (srp == NULL) // also if DWZ unresolved dwarfsrc=""
2297 6 : continue; // unresolvable files are not a serious problem
2298 : // throw libc_exception(errno, "fts/file realpath " + srcpath);
2299 476 : string srps = string(srp);
2300 476 : free (srp);
2301 :
2302 : struct stat sfs;
2303 476 : rc = stat(srps.c_str(), &sfs);
2304 476 : if (rc != 0)
2305 0 : continue;
2306 :
2307 476 : if (verbose > 2)
2308 0 : obatched(clog) << "recorded buildid=" << buildid << " file=" << srps
2309 0 : << " mtime=" << sfs.st_mtime
2310 0 : << " as source " << dwarfsrc << endl;
2311 :
2312 : ps_upsert_files
2313 476 : .reset()
2314 476 : .bind(1, srps)
2315 476 : .step_ok_done();
2316 :
2317 : // PR25548: store canonicalized dwarfsrc path
2318 476 : string dwarfsrc_canon = canon_pathname (dwarfsrc);
2319 476 : if (dwarfsrc_canon != dwarfsrc)
2320 : {
2321 84 : if (verbose > 3)
2322 0 : obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
2323 : }
2324 :
2325 : ps_upsert_files
2326 476 : .reset()
2327 476 : .bind(1, dwarfsrc_canon)
2328 476 : .step_ok_done();
2329 :
2330 : ps_upsert_s
2331 476 : .reset()
2332 476 : .bind(1, buildid)
2333 476 : .bind(2, dwarfsrc_canon)
2334 476 : .bind(3, srps)
2335 476 : .bind(4, sfs.st_mtime)
2336 476 : .step_ok_done();
2337 :
2338 476 : inc_metric("found_sourcerefs_total","source","files");
2339 : }
2340 : }
2341 :
2342 : ps_scan_done
2343 38 : .reset()
2344 38 : .bind(1, rps)
2345 38 : .bind(2, st.st_mtime)
2346 38 : .bind(3, st.st_size)
2347 38 : .step_ok_done();
2348 :
2349 38 : if (verbose > 2)
2350 0 : obatched(clog) << "recorded buildid=" << buildid << " file=" << rps
2351 0 : << " mtime=" << st.st_mtime << " atype="
2352 : << (executable_p ? "E" : "")
2353 0 : << (debuginfo_p ? "D" : "") << endl;
2354 : }
2355 :
2356 :
2357 :
2358 :
2359 :
2360 : // Analyze given archive file of given age; record buildids / exec/debuginfo-ness of its
2361 : // constituent files with given upsert statements.
2362 : static void
2363 29 : archive_classify (const string& rps, string& archive_extension,
2364 : sqlite_ps& ps_upsert_buildids, sqlite_ps& ps_upsert_files,
2365 : sqlite_ps& ps_upsert_de, sqlite_ps& ps_upsert_sref, sqlite_ps& ps_upsert_sdef,
2366 : time_t mtime,
2367 : unsigned& fts_executable, unsigned& fts_debuginfo, unsigned& fts_sref, unsigned& fts_sdef,
2368 : bool& fts_sref_complete_p)
2369 : {
2370 58 : string archive_decoder = "/dev/null";
2371 116 : for (auto&& arch : scan_archives)
2372 87 : if (string_endswith(rps, arch.first))
2373 : {
2374 29 : archive_extension = arch.first;
2375 29 : archive_decoder = arch.second;
2376 : }
2377 :
2378 : FILE* fp;
2379 : defer_dtor<FILE*,int>::dtor_fn dfn;
2380 29 : if (archive_decoder != "cat")
2381 : {
2382 3 : string popen_cmd = archive_decoder + " " + shell_escape(rps);
2383 1 : fp = popen (popen_cmd.c_str(), "r"); // "e" O_CLOEXEC?
2384 1 : dfn = pclose;
2385 1 : if (fp == NULL)
2386 0 : throw libc_exception (errno, string("popen ") + popen_cmd);
2387 : }
2388 : else
2389 : {
2390 28 : fp = fopen (rps.c_str(), "r");
2391 28 : dfn = fclose;
2392 28 : if (fp == NULL)
2393 1 : throw libc_exception (errno, string("fopen ") + rps);
2394 : }
2395 56 : defer_dtor<FILE*,int> fp_closer (fp, dfn);
2396 :
2397 : struct archive *a;
2398 28 : a = archive_read_new();
2399 28 : if (a == NULL)
2400 0 : throw archive_exception("cannot create archive reader");
2401 56 : defer_dtor<struct archive*,int> archive_closer (a, archive_read_free);
2402 :
2403 28 : int rc = archive_read_support_format_all(a);
2404 28 : if (rc != ARCHIVE_OK)
2405 0 : throw archive_exception(a, "cannot select all formats");
2406 28 : rc = archive_read_support_filter_all(a);
2407 27 : if (rc != ARCHIVE_OK)
2408 0 : throw archive_exception(a, "cannot select all filters");
2409 :
2410 27 : rc = archive_read_open_FILE (a, fp);
2411 28 : if (rc != ARCHIVE_OK)
2412 0 : throw archive_exception(a, "cannot open archive from pipe");
2413 :
2414 28 : if (verbose > 3)
2415 0 : obatched(clog) << "libarchive scanning " << rps << endl;
2416 :
2417 : while(1) // parse archive entries
2418 : {
2419 193 : if (interrupted)
2420 0 : break;
2421 :
2422 : try
2423 : {
2424 : struct archive_entry *e;
2425 193 : rc = archive_read_next_header (a, &e);
2426 193 : if (rc != ARCHIVE_OK)
2427 28 : break;
2428 :
2429 165 : if (! S_ISREG(archive_entry_mode (e))) // skip non-files completely
2430 109 : continue;
2431 :
2432 112 : string fn = canonicalized_archive_entry_pathname (e);
2433 :
2434 56 : if (verbose > 3)
2435 0 : obatched(clog) << "libarchive checking " << fn << endl;
2436 :
2437 : // extract this file to a temporary file
2438 56 : char* tmppath = NULL;
2439 56 : rc = asprintf (&tmppath, "%s/debuginfod.XXXXXX", tmpdir.c_str());
2440 56 : if (rc < 0)
2441 0 : throw libc_exception (ENOMEM, "cannot allocate tmppath");
2442 112 : defer_dtor<void*,void> tmmpath_freer (tmppath, free);
2443 56 : int fd = mkstemp (tmppath);
2444 56 : if (fd < 0)
2445 0 : throw libc_exception (errno, "cannot create temporary file");
2446 56 : unlink (tmppath); // unlink now so OS will release the file as soon as we close the fd
2447 112 : defer_dtor<int,int> minifd_closer (fd, close);
2448 :
2449 56 : rc = archive_read_data_into_fd (a, fd);
2450 56 : if (rc != ARCHIVE_OK)
2451 0 : throw archive_exception(a, "cannot extract file");
2452 :
2453 : // finally ... time to run elf_classify on this bad boy and update the database
2454 56 : bool executable_p = false, debuginfo_p = false;
2455 112 : string buildid;
2456 112 : set<string> sourcefiles;
2457 56 : elf_classify (fd, executable_p, debuginfo_p, buildid, sourcefiles);
2458 : // NB: might throw
2459 :
2460 56 : if (buildid != "") // intern buildid
2461 : {
2462 : ps_upsert_buildids
2463 31 : .reset()
2464 31 : .bind(1, buildid)
2465 31 : .step_ok_done();
2466 : }
2467 :
2468 : ps_upsert_files // register this rpm constituent file name in interning table
2469 56 : .reset()
2470 56 : .bind(1, fn)
2471 56 : .step_ok_done();
2472 :
2473 56 : if (sourcefiles.size() > 0) // sref records needed
2474 : {
2475 : // NB: we intern each source file once. Once raw, as it
2476 : // appears in the DWARF file list coming back from
2477 : // elf_classify() - because it'll end up in the
2478 : // _norm.artifactsrc column. We don't also put another
2479 : // version with a '.' at the front, even though that's
2480 : // how rpm/cpio packs names, because we hide that from
2481 : // the database for storage efficiency.
2482 :
2483 46 : for (auto&& s : sourcefiles)
2484 : {
2485 33 : if (s == "")
2486 : {
2487 0 : fts_sref_complete_p = false;
2488 0 : continue;
2489 : }
2490 :
2491 : // PR25548: store canonicalized source path
2492 33 : const string& dwarfsrc = s;
2493 33 : string dwarfsrc_canon = canon_pathname (dwarfsrc);
2494 33 : if (dwarfsrc_canon != dwarfsrc)
2495 : {
2496 0 : if (verbose > 3)
2497 0 : obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
2498 : }
2499 :
2500 : ps_upsert_files
2501 33 : .reset()
2502 33 : .bind(1, dwarfsrc_canon)
2503 33 : .step_ok_done();
2504 :
2505 : ps_upsert_sref
2506 33 : .reset()
2507 33 : .bind(1, buildid)
2508 33 : .bind(2, dwarfsrc_canon)
2509 33 : .step_ok_done();
2510 :
2511 33 : fts_sref ++;
2512 : }
2513 : }
2514 :
2515 56 : if (executable_p)
2516 13 : fts_executable ++;
2517 56 : if (debuginfo_p)
2518 18 : fts_debuginfo ++;
2519 :
2520 56 : if (executable_p || debuginfo_p)
2521 : {
2522 : ps_upsert_de
2523 31 : .reset()
2524 31 : .bind(1, buildid)
2525 31 : .bind(2, debuginfo_p ? 1 : 0)
2526 31 : .bind(3, executable_p ? 1 : 0)
2527 31 : .bind(4, rps)
2528 31 : .bind(5, mtime)
2529 31 : .bind(6, fn)
2530 31 : .step_ok_done();
2531 : }
2532 : else // potential source - sdef record
2533 : {
2534 25 : fts_sdef ++;
2535 : ps_upsert_sdef
2536 25 : .reset()
2537 25 : .bind(1, rps)
2538 25 : .bind(2, mtime)
2539 25 : .bind(3, fn)
2540 25 : .step_ok_done();
2541 : }
2542 :
2543 56 : if ((verbose > 2) && (executable_p || debuginfo_p))
2544 0 : obatched(clog) << "recorded buildid=" << buildid << " rpm=" << rps << " file=" << fn
2545 0 : << " mtime=" << mtime << " atype="
2546 : << (executable_p ? "E" : "")
2547 : << (debuginfo_p ? "D" : "")
2548 0 : << " sourcefiles=" << sourcefiles.size() << endl;
2549 :
2550 : }
2551 0 : catch (const reportable_exception& e)
2552 : {
2553 0 : e.report(clog);
2554 : }
2555 165 : }
2556 28 : }
2557 :
2558 :
2559 :
2560 : // scan for archive files such as .rpm
2561 : static void
2562 76 : scan_archive_file (const string& rps, const stat_t& st,
2563 : sqlite_ps& ps_upsert_buildids,
2564 : sqlite_ps& ps_upsert_files,
2565 : sqlite_ps& ps_upsert_de,
2566 : sqlite_ps& ps_upsert_sref,
2567 : sqlite_ps& ps_upsert_sdef,
2568 : sqlite_ps& ps_query,
2569 : sqlite_ps& ps_scan_done,
2570 : unsigned& fts_cached,
2571 : unsigned& fts_executable,
2572 : unsigned& fts_debuginfo,
2573 : unsigned& fts_sref,
2574 : unsigned& fts_sdef)
2575 : {
2576 : /* See if we know of it already. */
2577 : int rc = ps_query
2578 76 : .reset()
2579 76 : .bind(1, rps)
2580 76 : .bind(2, st.st_mtime)
2581 76 : .step();
2582 76 : ps_query.reset();
2583 76 : if (rc == SQLITE_ROW) // i.e., a result, as opposed to DONE (no results)
2584 : // no need to recheck a file/version we already know
2585 : // specifically, no need to parse this archive again, since we already have
2586 : // it as a D or E or S record,
2587 : // (so is stored with buildid=NULL)
2588 : {
2589 47 : fts_cached ++;
2590 47 : return;
2591 : }
2592 :
2593 : // intern the archive file name
2594 : ps_upsert_files
2595 29 : .reset()
2596 29 : .bind(1, rps)
2597 29 : .step_ok_done();
2598 :
2599 : // extract the archive contents
2600 29 : unsigned my_fts_executable = 0, my_fts_debuginfo = 0, my_fts_sref = 0, my_fts_sdef = 0;
2601 29 : bool my_fts_sref_complete_p = true;
2602 : try
2603 : {
2604 30 : string archive_extension;
2605 29 : archive_classify (rps, archive_extension,
2606 : ps_upsert_buildids, ps_upsert_files,
2607 : ps_upsert_de, ps_upsert_sref, ps_upsert_sdef, // dalt
2608 29 : st.st_mtime,
2609 : my_fts_executable, my_fts_debuginfo, my_fts_sref, my_fts_sdef,
2610 : my_fts_sref_complete_p);
2611 28 : add_metric ("scanned_bytes_total","source",archive_extension + " archive",
2612 28 : st.st_size);
2613 28 : inc_metric ("scanned_files_total","source",archive_extension + " archive");
2614 28 : add_metric("found_debuginfo_total","source",archive_extension + " archive",
2615 : my_fts_debuginfo);
2616 28 : add_metric("found_executable_total","source",archive_extension + " archive",
2617 : my_fts_executable);
2618 28 : add_metric("found_sourcerefs_total","source",archive_extension + " archive",
2619 : my_fts_sref);
2620 : }
2621 2 : catch (const reportable_exception& e)
2622 : {
2623 1 : e.report(clog);
2624 : }
2625 :
2626 29 : if (verbose > 2)
2627 0 : obatched(clog) << "scanned archive=" << rps
2628 0 : << " mtime=" << st.st_mtime
2629 0 : << " executables=" << my_fts_executable
2630 0 : << " debuginfos=" << my_fts_debuginfo
2631 0 : << " srefs=" << my_fts_sref
2632 0 : << " sdefs=" << my_fts_sdef
2633 0 : << endl;
2634 :
2635 29 : fts_executable += my_fts_executable;
2636 29 : fts_debuginfo += my_fts_debuginfo;
2637 29 : fts_sref += my_fts_sref;
2638 29 : fts_sdef += my_fts_sdef;
2639 :
2640 29 : if (my_fts_sref_complete_p) // leave incomplete?
2641 : ps_scan_done
2642 29 : .reset()
2643 29 : .bind(1, rps)
2644 29 : .bind(2, st.st_mtime)
2645 29 : .bind(3, st.st_size)
2646 29 : .step_ok_done();
2647 : }
2648 :
2649 :
2650 :
2651 : ////////////////////////////////////////////////////////////////////////
2652 :
2653 :
2654 :
2655 : // The thread that consumes file names off of the scanq. We hold
2656 : // the persistent sqlite_ps's at this level and delegate file/archive
2657 : // scanning to other functions.
2658 : static void*
2659 12 : thread_main_scanner (void* arg)
2660 : {
2661 : (void) arg;
2662 :
2663 : // all the prepared statements fit to use, the _f_ set:
2664 36 : sqlite_ps ps_f_upsert_buildids (db, "file-buildids-intern", "insert or ignore into " BUILDIDS "_buildids VALUES (NULL, ?);");
2665 36 : sqlite_ps ps_f_upsert_files (db, "file-files-intern", "insert or ignore into " BUILDIDS "_files VALUES (NULL, ?);");
2666 : sqlite_ps ps_f_upsert_de (db, "file-de-upsert",
2667 : "insert or ignore into " BUILDIDS "_f_de "
2668 : "(buildid, debuginfo_p, executable_p, file, mtime) "
2669 : "values ((select id from " BUILDIDS "_buildids where hex = ?),"
2670 : " ?,?,"
2671 36 : " (select id from " BUILDIDS "_files where name = ?), ?);");
2672 : sqlite_ps ps_f_upsert_s (db, "file-s-upsert",
2673 : "insert or ignore into " BUILDIDS "_f_s "
2674 : "(buildid, artifactsrc, file, mtime) "
2675 : "values ((select id from " BUILDIDS "_buildids where hex = ?),"
2676 : " (select id from " BUILDIDS "_files where name = ?),"
2677 : " (select id from " BUILDIDS "_files where name = ?),"
2678 36 : " ?);");
2679 : sqlite_ps ps_f_query (db, "file-negativehit-find",
2680 : "select 1 from " BUILDIDS "_file_mtime_scanned where sourcetype = 'F' "
2681 36 : "and file = (select id from " BUILDIDS "_files where name = ?) and mtime = ?;");
2682 : sqlite_ps ps_f_scan_done (db, "file-scanned",
2683 : "insert or ignore into " BUILDIDS "_file_mtime_scanned (sourcetype, file, mtime, size)"
2684 36 : "values ('F', (select id from " BUILDIDS "_files where name = ?), ?, ?);");
2685 :
2686 : // and now for the _r_ set
2687 36 : sqlite_ps ps_r_upsert_buildids (db, "rpm-buildid-intern", "insert or ignore into " BUILDIDS "_buildids VALUES (NULL, ?);");
2688 36 : sqlite_ps ps_r_upsert_files (db, "rpm-file-intern", "insert or ignore into " BUILDIDS "_files VALUES (NULL, ?);");
2689 : sqlite_ps ps_r_upsert_de (db, "rpm-de-insert",
2690 : "insert or ignore into " BUILDIDS "_r_de (buildid, debuginfo_p, executable_p, file, mtime, content) values ("
2691 : "(select id from " BUILDIDS "_buildids where hex = ?), ?, ?, "
2692 : "(select id from " BUILDIDS "_files where name = ?), ?, "
2693 36 : "(select id from " BUILDIDS "_files where name = ?));");
2694 : sqlite_ps ps_r_upsert_sref (db, "rpm-sref-insert",
2695 : "insert or ignore into " BUILDIDS "_r_sref (buildid, artifactsrc) values ("
2696 : "(select id from " BUILDIDS "_buildids where hex = ?), "
2697 36 : "(select id from " BUILDIDS "_files where name = ?));");
2698 : sqlite_ps ps_r_upsert_sdef (db, "rpm-sdef-insert",
2699 : "insert or ignore into " BUILDIDS "_r_sdef (file, mtime, content) values ("
2700 : "(select id from " BUILDIDS "_files where name = ?), ?,"
2701 36 : "(select id from " BUILDIDS "_files where name = ?));");
2702 : sqlite_ps ps_r_query (db, "rpm-negativehit-query",
2703 : "select 1 from " BUILDIDS "_file_mtime_scanned where "
2704 36 : "sourcetype = 'R' and file = (select id from " BUILDIDS "_files where name = ?) and mtime = ?;");
2705 : sqlite_ps ps_r_scan_done (db, "rpm-scanned",
2706 : "insert or ignore into " BUILDIDS "_file_mtime_scanned (sourcetype, file, mtime, size)"
2707 24 : "values ('R', (select id from " BUILDIDS "_files where name = ?), ?, ?);");
2708 :
2709 :
2710 12 : unsigned fts_cached = 0, fts_executable = 0, fts_debuginfo = 0, fts_sourcefiles = 0;
2711 12 : unsigned fts_sref = 0, fts_sdef = 0;
2712 :
2713 12 : add_metric("thread_count", "role", "scan", 1);
2714 12 : add_metric("thread_busy", "role", "scan", 1);
2715 146 : while (! interrupted)
2716 : {
2717 134 : scan_payload p;
2718 :
2719 134 : add_metric("thread_busy", "role", "scan", -1);
2720 134 : bool gotone = scanq.wait_front(p);
2721 133 : add_metric("thread_busy", "role", "scan", 1);
2722 :
2723 134 : if (! gotone) continue; // go back to waiting
2724 :
2725 : try
2726 : {
2727 122 : bool scan_archive = false;
2728 487 : for (auto&& arch : scan_archives)
2729 365 : if (string_endswith(p.first, arch.first))
2730 76 : scan_archive = true;
2731 :
2732 122 : if (scan_archive)
2733 76 : scan_archive_file (p.first, p.second,
2734 : ps_r_upsert_buildids,
2735 : ps_r_upsert_files,
2736 : ps_r_upsert_de,
2737 : ps_r_upsert_sref,
2738 : ps_r_upsert_sdef,
2739 : ps_r_query,
2740 : ps_r_scan_done,
2741 : fts_cached,
2742 : fts_executable,
2743 : fts_debuginfo,
2744 : fts_sref,
2745 : fts_sdef);
2746 :
2747 122 : if (scan_files) // NB: maybe "else if" ?
2748 122 : scan_source_file (p.first, p.second,
2749 : ps_f_upsert_buildids,
2750 : ps_f_upsert_files,
2751 : ps_f_upsert_de,
2752 : ps_f_upsert_s,
2753 : ps_f_query,
2754 : ps_f_scan_done,
2755 : fts_cached, fts_executable, fts_debuginfo, fts_sourcefiles);
2756 : }
2757 0 : catch (const reportable_exception& e)
2758 : {
2759 0 : e.report(cerr);
2760 : }
2761 :
2762 : // finished a scanning step -- not a "loop", because we just
2763 : // consume the traversal loop's work, whenever
2764 122 : inc_metric("thread_work_total","role","scan");
2765 : }
2766 :
2767 12 : add_metric("thread_busy", "role", "scan", -1);
2768 24 : return 0;
2769 : }
2770 :
2771 :
2772 :
2773 : // The thread that traverses all the source_paths and enqueues all the
2774 : // matching files into the file/archive scan queue.
2775 : static void
2776 10 : scan_source_paths()
2777 : {
2778 : // NB: fedora 31 glibc/fts(3) crashes inside fts_read() on empty
2779 : // path list.
2780 10 : if (source_paths.empty())
2781 1 : return;
2782 :
2783 : // Turn the source_paths into an fts(3)-compatible char**. Since
2784 : // source_paths[] does not change after argv processing, the
2785 : // c_str()'s are safe to keep around awile.
2786 18 : vector<const char *> sps;
2787 43 : for (auto&& sp: source_paths)
2788 34 : sps.push_back(sp.c_str());
2789 9 : sps.push_back(NULL);
2790 :
2791 9 : FTS *fts = fts_open ((char * const *)sps.data(),
2792 : (traverse_logical ? FTS_LOGICAL : FTS_PHYSICAL|FTS_XDEV)
2793 : | FTS_NOCHDIR /* multithreaded */,
2794 : NULL);
2795 9 : if (fts == NULL)
2796 0 : throw libc_exception(errno, "cannot fts_open");
2797 9 : defer_dtor<FTS*,int> fts_cleanup (fts, fts_close);
2798 :
2799 : struct timeval tv_start, tv_end;
2800 9 : gettimeofday (&tv_start, NULL);
2801 9 : unsigned fts_scanned = 0, fts_regex = 0;
2802 :
2803 : FTSENT *f;
2804 267 : while ((f = fts_read (fts)) != NULL)
2805 : {
2806 258 : if (interrupted) break;
2807 :
2808 258 : if (sigusr2 != forced_groom_count) // stop early if groom triggered
2809 : {
2810 0 : scanq.clear(); // clear previously issued work for scanner threads
2811 0 : break;
2812 : }
2813 :
2814 258 : fts_scanned ++;
2815 :
2816 258 : if (verbose > 2)
2817 0 : obatched(clog) << "fts traversing " << f->fts_path << endl;
2818 :
2819 258 : switch (f->fts_info)
2820 : {
2821 122 : case FTS_F:
2822 : {
2823 : /* Found a file. Convert it to an absolute path, so
2824 : the buildid database does not have relative path
2825 : names that are unresolvable from a subsequent run
2826 : in a different cwd. */
2827 122 : char *rp = realpath(f->fts_path, NULL);
2828 122 : if (rp == NULL)
2829 0 : continue; // ignore dangling symlink or such
2830 244 : string rps = string(rp);
2831 122 : free (rp);
2832 :
2833 122 : bool ri = !regexec (&file_include_regex, rps.c_str(), 0, 0, 0);
2834 122 : bool rx = !regexec (&file_exclude_regex, rps.c_str(), 0, 0, 0);
2835 122 : if (!ri || rx)
2836 : {
2837 0 : if (verbose > 3)
2838 0 : obatched(clog) << "fts skipped by regex "
2839 0 : << (!ri ? "I" : "") << (rx ? "X" : "") << endl;
2840 0 : fts_regex ++;
2841 0 : if (!ri)
2842 0 : inc_metric("traversed_total","type","file-skipped-I");
2843 0 : if (rx)
2844 0 : inc_metric("traversed_total","type","file-skipped-X");
2845 : }
2846 : else
2847 : {
2848 122 : scanq.push_back (make_pair(rps, *f->fts_statp));
2849 122 : inc_metric("traversed_total","type","file");
2850 122 : }
2851 : }
2852 122 : break;
2853 :
2854 0 : case FTS_ERR:
2855 : case FTS_NS:
2856 : // report on some types of errors because they may reflect fixable misconfiguration
2857 : {
2858 0 : auto x = libc_exception(f->fts_errno, string("fts traversal ") + string(f->fts_path));
2859 0 : x.report(cerr);
2860 : }
2861 0 : inc_metric("traversed_total","type","error");
2862 0 : break;
2863 :
2864 8 : case FTS_SL: // ignore, but count because debuginfod -L would traverse these
2865 8 : inc_metric("traversed_total","type","symlink");
2866 8 : break;
2867 :
2868 64 : case FTS_D: // ignore
2869 64 : inc_metric("traversed_total","type","directory");
2870 64 : break;
2871 :
2872 64 : default: // ignore
2873 64 : inc_metric("traversed_total","type","other");
2874 64 : break;
2875 : }
2876 : }
2877 9 : gettimeofday (&tv_end, NULL);
2878 9 : double deltas = (tv_end.tv_sec - tv_start.tv_sec) + (tv_end.tv_usec - tv_start.tv_usec)*0.000001;
2879 :
2880 18 : obatched(clog) << "fts traversed source paths in " << deltas << "s, scanned=" << fts_scanned
2881 9 : << ", regex-skipped=" << fts_regex << endl;
2882 : }
2883 :
2884 :
2885 : static void*
2886 3 : thread_main_fts_source_paths (void* arg)
2887 : {
2888 : (void) arg; // ignore; we operate on global data
2889 :
2890 3 : set_metric("thread_tid", "role","traverse", tid());
2891 3 : add_metric("thread_count", "role", "traverse", 1);
2892 :
2893 3 : time_t last_rescan = 0;
2894 :
2895 18 : while (! interrupted)
2896 : {
2897 18 : sleep (1);
2898 18 : scanq.wait_idle(); // don't start a new traversal while scanners haven't finished the job
2899 18 : scanq.done_idle(); // release the hounds
2900 18 : if (interrupted) break;
2901 :
2902 15 : time_t now = time(NULL);
2903 15 : bool rescan_now = false;
2904 15 : if (last_rescan == 0) // at least one initial rescan is documented even for -t0
2905 3 : rescan_now = true;
2906 15 : if (rescan_s > 0 && (long)now > (long)(last_rescan + rescan_s))
2907 2 : rescan_now = true;
2908 15 : if (sigusr1 != forced_rescan_count)
2909 : {
2910 7 : forced_rescan_count = sigusr1;
2911 7 : rescan_now = true;
2912 : }
2913 15 : if (rescan_now)
2914 : try
2915 : {
2916 10 : set_metric("thread_busy", "role","traverse", 1);
2917 10 : scan_source_paths();
2918 10 : last_rescan = time(NULL); // NB: now was before scanning
2919 : // finished a traversal loop
2920 10 : inc_metric("thread_work_total", "role","traverse");
2921 10 : set_metric("thread_busy", "role","traverse", 0);
2922 : }
2923 0 : catch (const reportable_exception& e)
2924 : {
2925 0 : e.report(cerr);
2926 : }
2927 : }
2928 :
2929 3 : return 0;
2930 : }
2931 :
2932 :
2933 :
2934 : ////////////////////////////////////////////////////////////////////////
2935 :
2936 : static void
2937 10 : database_stats_report()
2938 : {
2939 : sqlite_ps ps_query (db, "database-overview",
2940 30 : "select label,quantity from " BUILDIDS "_stats");
2941 :
2942 10 : obatched(clog) << "database record counts:" << endl;
2943 : while (1)
2944 : {
2945 110 : int rc = sqlite3_step (ps_query);
2946 110 : if (rc == SQLITE_DONE) break;
2947 100 : if (rc != SQLITE_ROW)
2948 0 : throw sqlite_exception(rc, "step");
2949 :
2950 200 : obatched(clog)
2951 200 : << right << setw(20) << ((const char*) sqlite3_column_text(ps_query, 0) ?: (const char*) "NULL")
2952 : << " "
2953 200 : << (sqlite3_column_text(ps_query, 1) ?: (const unsigned char*) "NULL")
2954 100 : << endl;
2955 :
2956 100 : set_metric("groom", "statistic",
2957 100 : ((const char*) sqlite3_column_text(ps_query, 0) ?: (const char*) "NULL"),
2958 100 : (sqlite3_column_double(ps_query, 1)));
2959 100 : }
2960 10 : }
2961 :
2962 :
2963 : // Do a round of database grooming that might take many minutes to run.
2964 5 : void groom()
2965 : {
2966 5 : obatched(clog) << "grooming database" << endl;
2967 :
2968 : struct timeval tv_start, tv_end;
2969 5 : gettimeofday (&tv_start, NULL);
2970 :
2971 5 : database_stats_report();
2972 :
2973 : // scan for files that have disappeared
2974 : sqlite_ps files (db, "check old files", "select s.mtime, s.file, f.name from "
2975 : BUILDIDS "_file_mtime_scanned s, " BUILDIDS "_files f "
2976 15 : "where f.id = s.file");
2977 15 : sqlite_ps files_del_f_de (db, "nuke f_de", "delete from " BUILDIDS "_f_de where file = ? and mtime = ?");
2978 15 : sqlite_ps files_del_r_de (db, "nuke r_de", "delete from " BUILDIDS "_r_de where file = ? and mtime = ?");
2979 : sqlite_ps files_del_scan (db, "nuke f_m_s", "delete from " BUILDIDS "_file_mtime_scanned "
2980 15 : "where file = ? and mtime = ?");
2981 5 : files.reset();
2982 : while(1)
2983 : {
2984 77 : int rc = files.step();
2985 77 : if (rc != SQLITE_ROW)
2986 5 : break;
2987 :
2988 72 : int64_t mtime = sqlite3_column_int64 (files, 0);
2989 72 : int64_t fileid = sqlite3_column_int64 (files, 1);
2990 72 : const char* filename = ((const char*) sqlite3_column_text (files, 2) ?: "");
2991 : struct stat s;
2992 72 : rc = stat(filename, &s);
2993 72 : if (rc < 0 || (mtime != (int64_t) s.st_mtime))
2994 : {
2995 4 : if (verbose > 2)
2996 0 : obatched(clog) << "groom: forgetting file=" << filename << " mtime=" << mtime << endl;
2997 4 : files_del_f_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
2998 4 : files_del_r_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
2999 4 : files_del_scan.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
3000 4 : inc_metric("groomed_total", "decision", "stale");
3001 : }
3002 : else
3003 68 : inc_metric("groomed_total", "decision", "fresh");
3004 :
3005 72 : if (sigusr1 != forced_rescan_count) // stop early if scan triggered
3006 0 : break;
3007 72 : }
3008 5 : files.reset();
3009 :
3010 : // delete buildids with no references in _r_de or _f_de tables;
3011 : // cascades to _r_sref & _f_s records
3012 : sqlite_ps buildids_del (db, "nuke orphan buildids",
3013 : "delete from " BUILDIDS "_buildids "
3014 : "where not exists (select 1 from " BUILDIDS "_f_de d where " BUILDIDS "_buildids.id = d.buildid) "
3015 15 : "and not exists (select 1 from " BUILDIDS "_r_de d where " BUILDIDS "_buildids.id = d.buildid)");
3016 5 : buildids_del.reset().step_ok_done();
3017 :
3018 : // NB: "vacuum" is too heavy for even daily runs: it rewrites the entire db, so is done as maxigroom -G
3019 15 : sqlite_ps g1 (db, "incremental vacuum", "pragma incremental_vacuum");
3020 5 : g1.reset().step_ok_done();
3021 15 : sqlite_ps g2 (db, "optimize", "pragma optimize");
3022 5 : g2.reset().step_ok_done();
3023 10 : sqlite_ps g3 (db, "wal checkpoint", "pragma wal_checkpoint=truncate");
3024 5 : g3.reset().step_ok_done();
3025 :
3026 5 : database_stats_report();
3027 :
3028 5 : sqlite3_db_release_memory(db); // shrink the process if possible
3029 :
3030 5 : fdcache.limit(0,0); // release the fdcache contents
3031 5 : fdcache.limit(fdcache_fds,fdcache_mbs); // restore status quo parameters
3032 :
3033 5 : gettimeofday (&tv_end, NULL);
3034 5 : double deltas = (tv_end.tv_sec - tv_start.tv_sec) + (tv_end.tv_usec - tv_start.tv_usec)*0.000001;
3035 :
3036 5 : obatched(clog) << "groomed database in " << deltas << "s" << endl;
3037 5 : }
3038 :
3039 :
3040 : static void*
3041 3 : thread_main_groom (void* /*arg*/)
3042 : {
3043 3 : set_metric("thread_tid", "role", "groom", tid());
3044 3 : add_metric("thread_count", "role", "groom", 1);
3045 :
3046 3 : time_t last_groom = 0;
3047 :
3048 : while (1)
3049 : {
3050 18 : sleep (1);
3051 18 : scanq.wait_idle(); // PR25394: block scanners during grooming!
3052 18 : if (interrupted) break;
3053 :
3054 15 : time_t now = time(NULL);
3055 15 : bool groom_now = false;
3056 15 : if (last_groom == 0) // at least one initial groom is documented even for -g0
3057 3 : groom_now = true;
3058 15 : if (groom_s > 0 && (long)now > (long)(last_groom + groom_s))
3059 2 : groom_now = true;
3060 15 : if (sigusr2 != forced_groom_count)
3061 : {
3062 2 : forced_groom_count = sigusr2;
3063 2 : groom_now = true;
3064 : }
3065 15 : if (groom_now)
3066 : try
3067 : {
3068 5 : set_metric("thread_busy", "role", "groom", 1);
3069 5 : groom ();
3070 5 : last_groom = time(NULL); // NB: now was before grooming
3071 : // finished a grooming loop
3072 5 : inc_metric("thread_work_total", "role", "groom");
3073 5 : set_metric("thread_busy", "role", "groom", 0);
3074 : }
3075 0 : catch (const sqlite_exception& e)
3076 : {
3077 0 : obatched(cerr) << e.message << endl;
3078 : }
3079 :
3080 15 : scanq.done_idle();
3081 15 : }
3082 :
3083 3 : return 0;
3084 : }
3085 :
3086 :
3087 : ////////////////////////////////////////////////////////////////////////
3088 :
3089 :
3090 : static void
3091 3 : signal_handler (int /* sig */)
3092 : {
3093 3 : interrupted ++;
3094 :
3095 3 : if (db)
3096 3 : sqlite3_interrupt (db);
3097 :
3098 : // NB: don't do anything else in here
3099 3 : }
3100 :
3101 : static void
3102 7 : sigusr1_handler (int /* sig */)
3103 : {
3104 7 : sigusr1 ++;
3105 : // NB: don't do anything else in here
3106 7 : }
3107 :
3108 : static void
3109 2 : sigusr2_handler (int /* sig */)
3110 : {
3111 2 : sigusr2 ++;
3112 : // NB: don't do anything else in here
3113 2 : }
3114 :
3115 :
3116 :
3117 :
3118 :
3119 : // A user-defined sqlite function, to score the sharedness of the
3120 : // prefix of two strings. This is used to compare candidate debuginfo
3121 : // / source-rpm names, so that the closest match
3122 : // (directory-topology-wise closest) is found. This is important in
3123 : // case the same sref (source file name) is in many -debuginfo or
3124 : // -debugsource RPMs, such as when multiple versions/releases of the
3125 : // same package are in the database.
3126 :
3127 105 : static void sqlite3_sharedprefix_fn (sqlite3_context* c, int argc, sqlite3_value** argv)
3128 : {
3129 105 : if (argc != 2)
3130 0 : sqlite3_result_error(c, "expect 2 string arguments", -1);
3131 210 : else if ((sqlite3_value_type(argv[0]) != SQLITE_TEXT) ||
3132 105 : (sqlite3_value_type(argv[1]) != SQLITE_TEXT))
3133 3 : sqlite3_result_null(c);
3134 : else
3135 : {
3136 102 : const unsigned char* a = sqlite3_value_text (argv[0]);
3137 102 : const unsigned char* b = sqlite3_value_text (argv[1]);
3138 102 : int i = 0;
3139 8041 : while (*a++ == *b++)
3140 7939 : i++;
3141 102 : sqlite3_result_int (c, i);
3142 : }
3143 105 : }
3144 :
3145 :
3146 : int
3147 3 : main (int argc, char *argv[])
3148 : {
3149 3 : (void) setlocale (LC_ALL, "");
3150 3 : (void) bindtextdomain (PACKAGE_TARNAME, LOCALEDIR);
3151 3 : (void) textdomain (PACKAGE_TARNAME);
3152 :
3153 : /* Tell the library which version we are expecting. */
3154 3 : elf_version (EV_CURRENT);
3155 :
3156 3 : tmpdir = string(getenv("TMPDIR") ?: "/tmp");
3157 :
3158 : /* Set computed default values. */
3159 3 : db_path = string(getenv("HOME") ?: "/") + string("/.debuginfod.sqlite"); /* XDG? */
3160 3 : int rc = regcomp (& file_include_regex, ".*", REG_EXTENDED|REG_NOSUB); // match everything
3161 3 : if (rc != 0)
3162 : error (EXIT_FAILURE, 0, "regcomp failure: %d", rc);
3163 3 : rc = regcomp (& file_exclude_regex, "^$", REG_EXTENDED|REG_NOSUB); // match nothing
3164 3 : if (rc != 0)
3165 : error (EXIT_FAILURE, 0, "regcomp failure: %d", rc);
3166 :
3167 : // default parameters for fdcache are computed from system stats
3168 : struct statfs sfs;
3169 3 : rc = statfs(tmpdir.c_str(), &sfs);
3170 3 : if (rc < 0)
3171 0 : fdcache_mbs = 1024; // 1 gigabyte
3172 : else
3173 3 : fdcache_mbs = sfs.f_bavail * sfs.f_bsize / 1024 / 1024 / 4; // 25% of free space
3174 3 : fdcache_prefetch = 64; // guesstimate storage is this much less costly than re-decompression
3175 3 : fdcache_fds = (concurrency + fdcache_prefetch) * 2;
3176 :
3177 : /* Parse and process arguments. */
3178 : int remaining;
3179 3 : argp_program_version_hook = print_version; // this works
3180 3 : (void) argp_parse (&argp, argc, argv, ARGP_IN_ORDER, &remaining, NULL);
3181 3 : if (remaining != argc)
3182 0 : error (EXIT_FAILURE, 0,
3183 0 : "unexpected argument: %s", argv[remaining]);
3184 :
3185 3 : if (scan_archives.size()==0 && !scan_files && source_paths.size()>0)
3186 0 : obatched(clog) << "warning: without -F -R -U -Z, ignoring PATHs" << endl;
3187 :
3188 3 : fdcache.limit(fdcache_fds, fdcache_mbs);
3189 :
3190 3 : (void) signal (SIGPIPE, SIG_IGN); // microhttpd can generate it incidentally, ignore
3191 3 : (void) signal (SIGINT, signal_handler); // ^C
3192 3 : (void) signal (SIGHUP, signal_handler); // EOF
3193 3 : (void) signal (SIGTERM, signal_handler); // systemd
3194 3 : (void) signal (SIGUSR1, sigusr1_handler); // end-user
3195 3 : (void) signal (SIGUSR2, sigusr2_handler); // end-user
3196 :
3197 : /* Get database ready. */
3198 3 : rc = sqlite3_open_v2 (db_path.c_str(), &db, (SQLITE_OPEN_READWRITE
3199 : |SQLITE_OPEN_CREATE
3200 : |SQLITE_OPEN_FULLMUTEX), /* thread-safe */
3201 : NULL);
3202 3 : if (rc == SQLITE_CORRUPT)
3203 : {
3204 0 : (void) unlink (db_path.c_str());
3205 0 : error (EXIT_FAILURE, 0,
3206 : "cannot open %s, deleted database: %s", db_path.c_str(), sqlite3_errmsg(db));
3207 : }
3208 3 : else if (rc)
3209 : {
3210 0 : error (EXIT_FAILURE, 0,
3211 : "cannot open %s, consider deleting database: %s", db_path.c_str(), sqlite3_errmsg(db));
3212 : }
3213 :
3214 3 : obatched(clog) << "opened database " << db_path << endl;
3215 3 : obatched(clog) << "sqlite version " << sqlite3_version << endl;
3216 :
3217 : // add special string-prefix-similarity function used in rpm sref/sdef resolution
3218 3 : rc = sqlite3_create_function(db, "sharedprefix", 2, SQLITE_UTF8, NULL,
3219 : & sqlite3_sharedprefix_fn, NULL, NULL);
3220 3 : if (rc != SQLITE_OK)
3221 0 : error (EXIT_FAILURE, 0,
3222 : "cannot create sharedprefix( function: %s", sqlite3_errmsg(db));
3223 :
3224 3 : if (verbose > 3)
3225 0 : obatched(clog) << "ddl: " << DEBUGINFOD_SQLITE_DDL << endl;
3226 3 : rc = sqlite3_exec (db, DEBUGINFOD_SQLITE_DDL, NULL, NULL, NULL);
3227 3 : if (rc != SQLITE_OK)
3228 : {
3229 0 : error (EXIT_FAILURE, 0,
3230 : "cannot run database schema ddl: %s", sqlite3_errmsg(db));
3231 : }
3232 :
3233 : // Start httpd server threads. Separate pool for IPv4 and IPv6, in
3234 : // case the host only has one protocol stack.
3235 3 : MHD_Daemon *d4 = MHD_start_daemon (MHD_USE_THREAD_PER_CONNECTION
3236 : #if MHD_VERSION >= 0x00095300
3237 : | MHD_USE_INTERNAL_POLLING_THREAD
3238 : #else
3239 : | MHD_USE_SELECT_INTERNALLY
3240 : #endif
3241 : | MHD_USE_DEBUG, /* report errors to stderr */
3242 : http_port,
3243 : NULL, NULL, /* default accept policy */
3244 : handler_cb, NULL, /* handler callback */
3245 : MHD_OPTION_END);
3246 3 : MHD_Daemon *d6 = MHD_start_daemon (MHD_USE_THREAD_PER_CONNECTION
3247 : #if MHD_VERSION >= 0x00095300
3248 : | MHD_USE_INTERNAL_POLLING_THREAD
3249 : #else
3250 : | MHD_USE_SELECT_INTERNALLY
3251 : #endif
3252 : | MHD_USE_IPv6
3253 : | MHD_USE_DEBUG, /* report errors to stderr */
3254 : http_port,
3255 : NULL, NULL, /* default accept policy */
3256 : handler_cb, NULL, /* handler callback */
3257 : MHD_OPTION_END);
3258 :
3259 3 : if (d4 == NULL && d6 == NULL) // neither ipv4 nor ipv6? boo
3260 : {
3261 0 : sqlite3 *database = db;
3262 0 : db = 0; // for signal_handler not to freak
3263 0 : sqlite3_close (database);
3264 0 : error (EXIT_FAILURE, 0, "cannot start http server at port %d", http_port);
3265 : }
3266 :
3267 6 : obatched(clog) << "started http server on "
3268 : << (d4 != NULL ? "IPv4 " : "")
3269 : << (d6 != NULL ? "IPv6 " : "")
3270 3 : << "port=" << http_port << endl;
3271 :
3272 : // add maxigroom sql if -G given
3273 3 : if (maxigroom)
3274 : {
3275 0 : obatched(clog) << "maxigrooming database, please wait." << endl;
3276 0 : extra_ddl.push_back("create index if not exists " BUILDIDS "_r_sref_arc on " BUILDIDS "_r_sref(artifactsrc);");
3277 0 : extra_ddl.push_back("delete from " BUILDIDS "_r_sdef where not exists (select 1 from " BUILDIDS "_r_sref b where " BUILDIDS "_r_sdef.content = b.artifactsrc);");
3278 0 : extra_ddl.push_back("drop index if exists " BUILDIDS "_r_sref_arc;");
3279 :
3280 : // NB: we don't maxigroom the _files interning table. It'd require a temp index on all the
3281 : // tables that have file foreign-keys, which is a lot.
3282 :
3283 : // NB: with =delete, may take up 3x disk space total during vacuum process
3284 : // vs. =off (only 2x but may corrupt database if program dies mid-vacuum)
3285 : // vs. =wal (>3x observed, but safe)
3286 0 : extra_ddl.push_back("pragma journal_mode=delete;");
3287 0 : extra_ddl.push_back("vacuum;");
3288 0 : extra_ddl.push_back("pragma journal_mode=wal;");
3289 : }
3290 :
3291 : // run extra -D sql if given
3292 3 : for (auto&& i: extra_ddl)
3293 : {
3294 0 : if (verbose > 1)
3295 0 : obatched(clog) << "extra ddl:\n" << i << endl;
3296 0 : rc = sqlite3_exec (db, i.c_str(), NULL, NULL, NULL);
3297 0 : if (rc != SQLITE_OK && rc != SQLITE_DONE && rc != SQLITE_ROW)
3298 0 : error (0, 0,
3299 : "warning: cannot run database extra ddl %s: %s", i.c_str(), sqlite3_errmsg(db));
3300 : }
3301 :
3302 3 : if (maxigroom)
3303 0 : obatched(clog) << "maxigroomed database" << endl;
3304 :
3305 3 : obatched(clog) << "search concurrency " << concurrency << endl;
3306 3 : obatched(clog) << "rescan time " << rescan_s << endl;
3307 3 : obatched(clog) << "fdcache fds " << fdcache_fds << endl;
3308 3 : obatched(clog) << "fdcache mbs " << fdcache_mbs << endl;
3309 3 : obatched(clog) << "fdcache prefetch " << fdcache_prefetch << endl;
3310 3 : obatched(clog) << "fdcache tmpdir " << tmpdir << endl;
3311 3 : obatched(clog) << "groom time " << groom_s << endl;
3312 3 : if (scan_archives.size()>0)
3313 : {
3314 6 : obatched ob(clog);
3315 3 : auto& o = ob << "scanning archive types ";
3316 10 : for (auto&& arch : scan_archives)
3317 7 : o << arch.first << "(" << arch.second << ") ";
3318 3 : o << endl;
3319 : }
3320 3 : const char* du = getenv(DEBUGINFOD_URLS_ENV_VAR);
3321 3 : if (du && du[0] != '\0') // set to non-empty string?
3322 2 : obatched(clog) << "upstream debuginfod servers: " << du << endl;
3323 :
3324 3 : vector<pthread_t> all_threads;
3325 :
3326 : pthread_t pt;
3327 3 : rc = pthread_create (& pt, NULL, thread_main_groom, NULL);
3328 3 : if (rc < 0)
3329 : error (0, 0, "warning: cannot spawn thread (%d) to groom database\n", rc);
3330 : else
3331 3 : all_threads.push_back(pt);
3332 :
3333 3 : if (scan_files || scan_archives.size() > 0)
3334 : {
3335 3 : pthread_create (& pt, NULL, thread_main_fts_source_paths, NULL);
3336 3 : if (rc < 0)
3337 : error (0, 0, "warning: cannot spawn thread (%d) to traverse source paths\n", rc);
3338 3 : all_threads.push_back(pt);
3339 15 : for (unsigned i=0; i<concurrency; i++)
3340 : {
3341 12 : pthread_create (& pt, NULL, thread_main_scanner, NULL);
3342 12 : if (rc < 0)
3343 : error (0, 0, "warning: cannot spawn thread (%d) to scan source files / archives\n", rc);
3344 12 : all_threads.push_back(pt);
3345 : }
3346 : }
3347 :
3348 : /* Trivial main loop! */
3349 3 : set_metric("ready", 1);
3350 15 : while (! interrupted)
3351 12 : pause ();
3352 3 : scanq.nuke(); // wake up any remaining scanq-related threads, let them die
3353 3 : set_metric("ready", 0);
3354 :
3355 3 : if (verbose)
3356 1 : obatched(clog) << "stopping" << endl;
3357 :
3358 : /* Join all our threads. */
3359 21 : for (auto&& it : all_threads)
3360 18 : pthread_join (it, NULL);
3361 :
3362 : /* Stop all the web service threads. */
3363 3 : if (d4) MHD_stop_daemon (d4);
3364 3 : if (d6) MHD_stop_daemon (d6);
3365 :
3366 : /* With all threads known dead, we can clean up the global resources. */
3367 3 : rc = sqlite3_exec (db, DEBUGINFOD_SQLITE_CLEANUP_DDL, NULL, NULL, NULL);
3368 3 : if (rc != SQLITE_OK)
3369 : {
3370 0 : error (0, 0,
3371 : "warning: cannot run database cleanup ddl: %s", sqlite3_errmsg(db));
3372 : }
3373 :
3374 : // NB: no problem with unconditional free here - an earlier failed regcomp would exit program
3375 3 : (void) regfree (& file_include_regex);
3376 3 : (void) regfree (& file_exclude_regex);
3377 :
3378 3 : sqlite3 *database = db;
3379 3 : db = 0; // for signal_handler not to freak
3380 3 : (void) sqlite3_close (database);
3381 :
3382 3 : return 0;
3383 : }
|