Line data Source code
1 : /* Debuginfo-over-http server.
2 : Copyright (C) 2019 Red Hat, Inc.
3 : This file is part of elfutils.
4 :
5 : This file is free software; you can redistribute it and/or modify
6 : it under the terms of the GNU General Public License as published by
7 : the Free Software Foundation; either version 3 of the License, or
8 : (at your option) any later version.
9 :
10 : elfutils is distributed in the hope that it will be useful, but
11 : WITHOUT ANY WARRANTY; without even the implied warranty of
12 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 : GNU General Public License for more details.
14 :
15 : You should have received a copy of the GNU General Public License
16 : along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 :
18 :
19 : /* cargo-cult from libdwfl linux-kernel-modules.c */
20 : /* In case we have a bad fts we include this before config.h because it
21 : can't handle _FILE_OFFSET_BITS.
22 : Everything we need here is fine if its declarations just come first.
23 : Also, include sys/types.h before fts. On some systems fts.h is not self
24 : contained. */
25 : #ifdef BAD_FTS
26 : #include <sys/types.h>
27 : #include <fts.h>
28 : #endif
29 :
30 : #ifdef HAVE_CONFIG_H
31 : #include "config.h"
32 : #endif
33 :
34 : extern "C" {
35 : #include "printversion.h"
36 : }
37 :
38 : #include "debuginfod.h"
39 : #include <dwarf.h>
40 :
41 : #include <argp.h>
42 : #ifdef __GNUC__
43 : #undef __attribute__ /* glibc bug - rhbz 1763325 */
44 : #endif
45 :
46 : #include <unistd.h>
47 : #include <stdlib.h>
48 : #include <error.h>
49 : // #include <libintl.h> // not until it supports C++ << better
50 : #include <locale.h>
51 : #include <pthread.h>
52 : #include <signal.h>
53 : #include <sys/stat.h>
54 : #include <sys/time.h>
55 : #include <unistd.h>
56 : #include <fcntl.h>
57 : #include <netdb.h>
58 :
59 :
60 : /* If fts.h is included before config.h, its indirect inclusions may not
61 : give us the right LFS aliases of these functions, so map them manually. */
62 : #ifdef BAD_FTS
63 : #ifdef _FILE_OFFSET_BITS
64 : #define open open64
65 : #define fopen fopen64
66 : #endif
67 : #else
68 : #include <sys/types.h>
69 : #include <fts.h>
70 : #endif
71 :
72 : #include <cstring>
73 : #include <vector>
74 : #include <set>
75 : #include <map>
76 : #include <string>
77 : #include <iostream>
78 : #include <iomanip>
79 : #include <ostream>
80 : #include <sstream>
81 : #include <mutex>
82 : #include <condition_variable>
83 : #include <thread>
84 : // #include <regex> // on rhel7 gcc 4.8, not competent
85 : #include <regex.h>
86 : // #include <algorithm>
87 : using namespace std;
88 :
89 : #include <gelf.h>
90 : #include <libdwelf.h>
91 :
92 : #include <microhttpd.h>
93 : #include <curl/curl.h>
94 : #include <archive.h>
95 : #include <archive_entry.h>
96 : #include <sqlite3.h>
97 :
98 : #ifdef __linux__
99 : #include <sys/syscall.h>
100 : #endif
101 :
102 : #ifdef __linux__
103 : #define tid() syscall(SYS_gettid)
104 : #else
105 : #define tid() pthread_self()
106 : #endif
107 :
108 :
109 : // Roll this identifier for every sqlite schema incompatiblity.
110 : #define BUILDIDS "buildids9"
111 :
112 : #if SQLITE_VERSION_NUMBER >= 3008000
113 : #define WITHOUT_ROWID "without rowid"
114 : #else
115 : #define WITHOUT_ROWID ""
116 : #endif
117 :
118 : static const char DEBUGINFOD_SQLITE_DDL[] =
119 : "pragma foreign_keys = on;\n"
120 : "pragma synchronous = 0;\n" // disable fsync()s - this cache is disposable across a machine crash
121 : "pragma journal_mode = wal;\n" // https://sqlite.org/wal.html
122 : "pragma wal_checkpoint = truncate;\n" // clean out any preexisting wal file
123 : "pragma journal_size_limit = 0;\n" // limit steady state file (between grooming, which also =truncate's)
124 : "pragma auto_vacuum = incremental;\n" // https://sqlite.org/pragma.html
125 : "pragma busy_timeout = 1000;\n" // https://sqlite.org/pragma.html
126 : // NB: all these are overridable with -D option
127 :
128 : // Normalization table for interning file names
129 : "create table if not exists " BUILDIDS "_files (\n"
130 : " id integer primary key not null,\n"
131 : " name text unique not null\n"
132 : " );\n"
133 : // Normalization table for interning buildids
134 : "create table if not exists " BUILDIDS "_buildids (\n"
135 : " id integer primary key not null,\n"
136 : " hex text unique not null);\n"
137 : // Track the completion of scanning of a given file & sourcetype at given time
138 : "create table if not exists " BUILDIDS "_file_mtime_scanned (\n"
139 : " mtime integer not null,\n"
140 : " file integer not null,\n"
141 : " size integer not null,\n" // in bytes
142 : " sourcetype text(1) not null\n"
143 : " check (sourcetype IN ('F', 'R')),\n"
144 : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
145 : " primary key (file, mtime, sourcetype)\n"
146 : " ) " WITHOUT_ROWID ";\n"
147 : "create table if not exists " BUILDIDS "_f_de (\n"
148 : " buildid integer not null,\n"
149 : " debuginfo_p integer not null,\n"
150 : " executable_p integer not null,\n"
151 : " file integer not null,\n"
152 : " mtime integer not null,\n"
153 : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
154 : " foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
155 : " primary key (buildid, file, mtime)\n"
156 : " ) " WITHOUT_ROWID ";\n"
157 : "create table if not exists " BUILDIDS "_f_s (\n"
158 : " buildid integer not null,\n"
159 : " artifactsrc integer not null,\n"
160 : " file integer not null,\n" // NB: not necessarily entered into _mtime_scanned
161 : " mtime integer not null,\n"
162 : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
163 : " foreign key (artifactsrc) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
164 : " foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
165 : " primary key (buildid, artifactsrc, file, mtime)\n"
166 : " ) " WITHOUT_ROWID ";\n"
167 : "create table if not exists " BUILDIDS "_r_de (\n"
168 : " buildid integer not null,\n"
169 : " debuginfo_p integer not null,\n"
170 : " executable_p integer not null,\n"
171 : " file integer not null,\n"
172 : " mtime integer not null,\n"
173 : " content integer not null,\n"
174 : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
175 : " foreign key (content) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
176 : " foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
177 : " primary key (buildid, debuginfo_p, executable_p, file, content, mtime)\n"
178 : " ) " WITHOUT_ROWID ";\n"
179 : "create table if not exists " BUILDIDS "_r_sref (\n" // outgoing dwarf sourcefile references from rpm
180 : " buildid integer not null,\n"
181 : " artifactsrc integer not null,\n"
182 : " foreign key (artifactsrc) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
183 : " foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
184 : " primary key (buildid, artifactsrc)\n"
185 : " ) " WITHOUT_ROWID ";\n"
186 : "create table if not exists " BUILDIDS "_r_sdef (\n" // rpm contents that may satisfy sref
187 : " file integer not null,\n"
188 : " mtime integer not null,\n"
189 : " content integer not null,\n"
190 : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
191 : " foreign key (content) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
192 : " primary key (content, file, mtime)\n"
193 : " ) " WITHOUT_ROWID ";\n"
194 : // create views to glue together some of the above tables, for webapi D queries
195 : "create view if not exists " BUILDIDS "_query_d as \n"
196 : "select\n"
197 : " b.hex as buildid, n.mtime, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1\n"
198 : " from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_f_de n\n"
199 : " where b.id = n.buildid and f0.id = n.file and n.debuginfo_p = 1\n"
200 : "union all select\n"
201 : " b.hex as buildid, n.mtime, 'R' as sourcetype, f0.name as source0, n.mtime as mtime, f1.name as source1\n"
202 : " from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files f1, " BUILDIDS "_r_de n\n"
203 : " where b.id = n.buildid and f0.id = n.file and f1.id = n.content and n.debuginfo_p = 1\n"
204 : ";"
205 : // ... and for E queries
206 : "create view if not exists " BUILDIDS "_query_e as \n"
207 : "select\n"
208 : " b.hex as buildid, n.mtime, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1\n"
209 : " from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_f_de n\n"
210 : " where b.id = n.buildid and f0.id = n.file and n.executable_p = 1\n"
211 : "union all select\n"
212 : " b.hex as buildid, n.mtime, 'R' as sourcetype, f0.name as source0, n.mtime as mtime, f1.name as source1\n"
213 : " from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files f1, " BUILDIDS "_r_de n\n"
214 : " where b.id = n.buildid and f0.id = n.file and f1.id = n.content and n.executable_p = 1\n"
215 : ";"
216 : // ... and for S queries
217 : "create view if not exists " BUILDIDS "_query_s as \n"
218 : "select\n"
219 : " b.hex as buildid, fs.name as artifactsrc, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1, null as source0ref\n"
220 : " from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files fs, " BUILDIDS "_f_s n\n"
221 : " where b.id = n.buildid and f0.id = n.file and fs.id = n.artifactsrc\n"
222 : "union all select\n"
223 : " b.hex as buildid, f1.name as artifactsrc, 'R' as sourcetype, f0.name as source0, sd.mtime as mtime, f1.name as source1, fsref.name as source0ref\n"
224 : " from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files f1, " BUILDIDS "_files fsref, "
225 : " " BUILDIDS "_r_sdef sd, " BUILDIDS "_r_sref sr, " BUILDIDS "_r_de sde\n"
226 : " where b.id = sr.buildid and f0.id = sd.file and fsref.id = sde.file and f1.id = sd.content\n"
227 : " and sr.artifactsrc = sd.content and sde.buildid = sr.buildid\n"
228 : ";"
229 : // and for startup overview counts
230 : "drop view if exists " BUILDIDS "_stats;\n"
231 : "create view if not exists " BUILDIDS "_stats as\n"
232 : " select 'file d/e' as label,count(*) as quantity from " BUILDIDS "_f_de\n"
233 : "union all select 'file s',count(*) from " BUILDIDS "_f_s\n"
234 : "union all select 'rpm d/e',count(*) from " BUILDIDS "_r_de\n"
235 : "union all select 'rpm sref',count(*) from " BUILDIDS "_r_sref\n"
236 : "union all select 'rpm sdef',count(*) from " BUILDIDS "_r_sdef\n"
237 : "union all select 'buildids',count(*) from " BUILDIDS "_buildids\n"
238 : "union all select 'filenames',count(*) from " BUILDIDS "_files\n"
239 : "union all select 'files scanned (#)',count(*) from " BUILDIDS "_file_mtime_scanned\n"
240 : "union all select 'files scanned (mb)',coalesce(sum(size)/1024/1024,0) from " BUILDIDS "_file_mtime_scanned\n"
241 : #if SQLITE_VERSION_NUMBER >= 3016000
242 : "union all select 'index db size (mb)',page_count*page_size/1024/1024 as size FROM pragma_page_count(), pragma_page_size()\n"
243 : #endif
244 : ";\n"
245 :
246 : // schema change history & garbage collection
247 : //
248 : // XXX: we could have migration queries here to bring prior-schema
249 : // data over instead of just dropping it.
250 : //
251 : // buildids9: widen the mtime_scanned table
252 : "" // <<< we are here
253 : // buildids8: slim the sref table
254 : "drop table if exists buildids8_f_de;\n"
255 : "drop table if exists buildids8_f_s;\n"
256 : "drop table if exists buildids8_r_de;\n"
257 : "drop table if exists buildids8_r_sref;\n"
258 : "drop table if exists buildids8_r_sdef;\n"
259 : "drop table if exists buildids8_file_mtime_scanned;\n"
260 : "drop table if exists buildids8_files;\n"
261 : "drop table if exists buildids8_buildids;\n"
262 : // buildids7: separate _norm table into dense subtype tables
263 : "drop table if exists buildids7_f_de;\n"
264 : "drop table if exists buildids7_f_s;\n"
265 : "drop table if exists buildids7_r_de;\n"
266 : "drop table if exists buildids7_r_sref;\n"
267 : "drop table if exists buildids7_r_sdef;\n"
268 : "drop table if exists buildids7_file_mtime_scanned;\n"
269 : "drop table if exists buildids7_files;\n"
270 : "drop table if exists buildids7_buildids;\n"
271 : // buildids6: drop bolo/rfolo again, represent sources / rpmcontents in main table
272 : "drop table if exists buildids6_norm;\n"
273 : "drop table if exists buildids6_files;\n"
274 : "drop table if exists buildids6_buildids;\n"
275 : "drop view if exists buildids6;\n"
276 : // buildids5: redefine srcfile1 column to be '.'-less (for rpms)
277 : "drop table if exists buildids5_norm;\n"
278 : "drop table if exists buildids5_files;\n"
279 : "drop table if exists buildids5_buildids;\n"
280 : "drop table if exists buildids5_bolo;\n"
281 : "drop table if exists buildids5_rfolo;\n"
282 : "drop view if exists buildids5;\n"
283 : // buildids4: introduce rpmfile RFOLO
284 : "drop table if exists buildids4_norm;\n"
285 : "drop table if exists buildids4_files;\n"
286 : "drop table if exists buildids4_buildids;\n"
287 : "drop table if exists buildids4_bolo;\n"
288 : "drop table if exists buildids4_rfolo;\n"
289 : "drop view if exists buildids4;\n"
290 : // buildids3*: split out srcfile BOLO
291 : "drop table if exists buildids3_norm;\n"
292 : "drop table if exists buildids3_files;\n"
293 : "drop table if exists buildids3_buildids;\n"
294 : "drop table if exists buildids3_bolo;\n"
295 : "drop view if exists buildids3;\n"
296 : // buildids2: normalized buildid and filenames into interning tables;
297 : "drop table if exists buildids2_norm;\n"
298 : "drop table if exists buildids2_files;\n"
299 : "drop table if exists buildids2_buildids;\n"
300 : "drop view if exists buildids2;\n"
301 : // buildids1: made buildid and artifacttype NULLable, to represent cached-negative
302 : // lookups from sources, e.g. files or rpms that contain no buildid-indexable content
303 : "drop table if exists buildids1;\n"
304 : // buildids: original
305 : "drop table if exists buildids;\n"
306 : ;
307 :
308 : static const char DEBUGINFOD_SQLITE_CLEANUP_DDL[] =
309 : "pragma wal_checkpoint = truncate;\n" // clean out any preexisting wal file
310 : ;
311 :
312 :
313 :
314 :
315 : /* Name and version of program. */
316 : /* ARGP_PROGRAM_VERSION_HOOK_DEF = print_version; */ // not this simple for C++
317 :
318 : /* Bug report address. */
319 : ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT;
320 :
321 : /* Definitions of arguments for argp functions. */
322 : static const struct argp_option options[] =
323 : {
324 : { NULL, 0, NULL, 0, "Scanners:", 1 },
325 : { "scan-file-dir", 'F', NULL, 0, "Enable ELF/DWARF file scanning threads.", 0 },
326 : { "scan-rpm-dir", 'R', NULL, 0, "Enable RPM scanning threads.", 0 },
327 : // "source-oci-imageregistry" ...
328 :
329 : { NULL, 0, NULL, 0, "Options:", 2 },
330 : { "logical", 'L', NULL, 0, "Follow symlinks, default=ignore.", 0 },
331 : { "rescan-time", 't', "SECONDS", 0, "Number of seconds to wait between rescans, 0=disable.", 0 },
332 : { "groom-time", 'g', "SECONDS", 0, "Number of seconds to wait between database grooming, 0=disable.", 0 },
333 : { "maxigroom", 'G', NULL, 0, "Run a complete database groom/shrink pass at startup.", 0 },
334 : { "concurrency", 'c', "NUM", 0, "Limit scanning thread concurrency to NUM.", 0 },
335 : { "include", 'I', "REGEX", 0, "Include files matching REGEX, default=all.", 0 },
336 : { "exclude", 'X', "REGEX", 0, "Exclude files matching REGEX, default=none.", 0 },
337 : { "port", 'p', "NUM", 0, "HTTP port to listen on, default 8002.", 0 },
338 : { "database", 'd', "FILE", 0, "Path to sqlite database.", 0 },
339 : { "ddl", 'D', "SQL", 0, "Apply extra sqlite ddl/pragma to connection.", 0 },
340 : { "verbose", 'v', NULL, 0, "Increase verbosity.", 0 },
341 :
342 : { NULL, 0, NULL, 0, NULL, 0 }
343 : };
344 :
345 : /* Short description of program. */
346 : static const char doc[] = "Serve debuginfo-related content across HTTP from files under PATHs.";
347 :
348 : /* Strings for arguments in help texts. */
349 : static const char args_doc[] = "[PATH ...]";
350 :
351 : /* Prototype for option handler. */
352 : static error_t parse_opt (int key, char *arg, struct argp_state *state);
353 :
354 : /* Data structure to communicate with argp functions. */
355 : static struct argp argp =
356 : {
357 : options, parse_opt, args_doc, doc, NULL, NULL, NULL
358 : };
359 :
360 :
361 : static string db_path;
362 : static sqlite3 *db;
363 : static unsigned verbose;
364 : static volatile sig_atomic_t interrupted = 0;
365 : static volatile sig_atomic_t sigusr1 = 0;
366 : static volatile sig_atomic_t sigusr2 = 0;
367 : static unsigned http_port = 8002;
368 : static unsigned rescan_s = 300;
369 : static unsigned groom_s = 86400;
370 : static unsigned maxigroom = false;
371 : static unsigned concurrency = std::thread::hardware_concurrency() ?: 1;
372 : static set<string> source_paths;
373 : static bool scan_files = false;
374 : static bool scan_rpms = false;
375 : static vector<string> extra_ddl;
376 : static regex_t file_include_regex;
377 : static regex_t file_exclude_regex;
378 : static bool traverse_logical;
379 :
380 : static void set_metric(const string& key, int64_t value);
381 : // static void inc_metric(const string& key);
382 : static void set_metric(const string& metric,
383 : const string& lname, const string& lvalue,
384 : int64_t value);
385 : static void inc_metric(const string& metric,
386 : const string& lname, const string& lvalue);
387 : static void add_metric(const string& metric,
388 : const string& lname, const string& lvalue,
389 : int64_t value);
390 :
391 : /* Handle program arguments. */
392 : static error_t
393 22 : parse_opt (int key, char *arg,
394 : struct argp_state *state __attribute__ ((unused)))
395 : {
396 : int rc;
397 22 : switch (key)
398 : {
399 0 : case 'v': verbose ++; break;
400 2 : case 'd': db_path = string(arg); break;
401 2 : case 'p': http_port = (unsigned) atoi(arg);
402 2 : if (http_port > 65535) argp_failure(state, 1, EINVAL, "port number");
403 2 : break;
404 2 : case 'F': scan_files = true; break;
405 1 : case 'R': scan_rpms = true; break;
406 1 : case 'L':
407 1 : traverse_logical = true;
408 1 : break;
409 0 : case 'D': extra_ddl.push_back(string(arg)); break;
410 1 : case 't':
411 1 : rescan_s = (unsigned) atoi(arg);
412 1 : break;
413 1 : case 'g':
414 1 : groom_s = (unsigned) atoi(arg);
415 1 : break;
416 0 : case 'G':
417 0 : maxigroom = true;
418 0 : break;
419 0 : case 'c':
420 0 : concurrency = (unsigned) atoi(arg);
421 0 : if (concurrency < 1) concurrency = 1;
422 0 : break;
423 0 : case 'I':
424 : // NB: no problem with unconditional free here - an earlier failed regcomp would exit program
425 0 : regfree (&file_include_regex);
426 0 : rc = regcomp (&file_include_regex, arg, REG_EXTENDED|REG_NOSUB);
427 0 : if (rc != 0)
428 0 : argp_failure(state, 1, EINVAL, "regular expession");
429 0 : break;
430 0 : case 'X':
431 0 : regfree (&file_exclude_regex);
432 0 : rc = regcomp (&file_exclude_regex, arg, REG_EXTENDED|REG_NOSUB);
433 0 : if (rc != 0)
434 0 : argp_failure(state, 1, EINVAL, "regular expession");
435 0 : break;
436 4 : case ARGP_KEY_ARG:
437 4 : source_paths.insert(string(arg));
438 4 : break;
439 : // case 'h': argp_state_help (state, stderr, ARGP_HELP_LONG|ARGP_HELP_EXIT_OK);
440 8 : default: return ARGP_ERR_UNKNOWN;
441 : }
442 :
443 14 : return 0;
444 : }
445 :
446 :
447 : ////////////////////////////////////////////////////////////////////////
448 :
449 :
450 : // represent errors that may get reported to an ostream and/or a libmicrohttpd connection
451 :
452 : struct reportable_exception
453 : {
454 : int code;
455 : string message;
456 :
457 2 : reportable_exception(int c, const string& m): code(c), message(m) {}
458 1 : reportable_exception(const string& m): code(503), message(m) {}
459 : reportable_exception(): code(503), message() {}
460 :
461 : void report(ostream& o) const; // defined under obatched() class below
462 :
463 3 : int mhd_send_response(MHD_Connection* c) const {
464 3 : MHD_Response* r = MHD_create_response_from_buffer (message.size(),
465 3 : (void*) message.c_str(),
466 : MHD_RESPMEM_MUST_COPY);
467 3 : MHD_add_response_header (r, "Content-Type", "text/plain");
468 3 : int rc = MHD_queue_response (c, code, r);
469 3 : MHD_destroy_response (r);
470 3 : return rc;
471 : }
472 : };
473 :
474 :
475 : struct sqlite_exception: public reportable_exception
476 : {
477 0 : sqlite_exception(int rc, const string& msg):
478 0 : reportable_exception(string("sqlite3 error: ") + msg + ": " + string(sqlite3_errstr(rc) ?: "?")) {}
479 : };
480 :
481 : struct libc_exception: public reportable_exception
482 : {
483 0 : libc_exception(int rc, const string& msg):
484 0 : reportable_exception(string("libc error: ") + msg + ": " + string(strerror(rc) ?: "?")) {}
485 : };
486 :
487 :
488 : struct archive_exception: public reportable_exception
489 : {
490 0 : archive_exception(const string& msg):
491 0 : reportable_exception(string("libarchive error: ") + msg) {}
492 0 : archive_exception(struct archive* a, const string& msg):
493 0 : reportable_exception(string("libarchive error: ") + msg + ": " + string(archive_error_string(a) ?: "?")) {}
494 : };
495 :
496 :
497 : struct elfutils_exception: public reportable_exception
498 : {
499 0 : elfutils_exception(int rc, const string& msg):
500 0 : reportable_exception(string("elfutils error: ") + msg + ": " + string(elf_errmsg(rc) ?: "?")) {}
501 : };
502 :
503 :
504 : ////////////////////////////////////////////////////////////////////////
505 :
506 : // a c++ counting-semaphore class ... since we're c++11 not c++20
507 :
508 : class semaphore
509 : {
510 : public:
511 2 : semaphore (unsigned c=1): count(c) {}
512 187 : inline void notify () {
513 374 : unique_lock<mutex> lock(mtx);
514 187 : count++;
515 187 : cv.notify_one();
516 187 : }
517 187 : inline void wait() {
518 187 : unique_lock<mutex> lock(mtx);
519 187 : while (count == 0)
520 0 : cv.wait(lock);
521 187 : count--;
522 187 : }
523 : private:
524 : mutex mtx;
525 : condition_variable cv;
526 : unsigned count;
527 : };
528 :
529 :
530 : class semaphore_borrower
531 : {
532 : public:
533 187 : semaphore_borrower(semaphore* s): sem(s) { sem->wait(); }
534 187 : ~semaphore_borrower() { sem->notify(); }
535 : private:
536 : semaphore* sem;
537 : };
538 :
539 :
540 : ////////////////////////////////////////////////////////////////////////
541 :
542 :
543 : // Print a standard timestamp.
544 : static ostream&
545 117 : timestamp (ostream &o)
546 : {
547 : char datebuf[80];
548 117 : char *now2 = NULL;
549 117 : time_t now_t = time(NULL);
550 117 : struct tm *now = gmtime (&now_t);
551 117 : if (now)
552 : {
553 117 : (void) strftime (datebuf, sizeof (datebuf), "%c", now);
554 117 : now2 = datebuf;
555 : }
556 :
557 : return o << "[" << (now2 ? now2 : "") << "] "
558 117 : << "(" << getpid () << "/" << tid() << "): ";
559 : }
560 :
561 :
562 : // A little class that impersonates an ostream to the extent that it can
563 : // take << streaming operations. It batches up the bits into an internal
564 : // stringstream until it is destroyed; then flushes to the original ostream.
565 : // It adds a timestamp
566 : class obatched
567 : {
568 : private:
569 : ostream& o;
570 : stringstream stro;
571 : static mutex lock;
572 : public:
573 117 : obatched(ostream& oo, bool timestamp_p = true): o(oo)
574 : {
575 117 : if (timestamp_p)
576 117 : timestamp(stro);
577 115 : }
578 115 : ~obatched()
579 117 : {
580 232 : unique_lock<mutex> do_not_cross_the_streams(obatched::lock);
581 117 : o << stro.str();
582 117 : o.flush();
583 117 : }
584 : operator ostream& () { return stro; }
585 117 : template <typename T> ostream& operator << (const T& t) { stro << t; return stro; }
586 : };
587 : mutex obatched::lock; // just the one, since cout/cerr iostreams are not thread-safe
588 :
589 :
590 3 : void reportable_exception::report(ostream& o) const {
591 3 : obatched(o) << message << endl;
592 3 : }
593 :
594 :
595 : ////////////////////////////////////////////////////////////////////////
596 :
597 :
598 : // RAII style sqlite prepared-statement holder that matches { } block lifetime
599 :
600 : struct sqlite_ps
601 : {
602 : private:
603 : sqlite3* db;
604 : const string nickname;
605 : const string sql;
606 : sqlite3_stmt *pp;
607 :
608 : sqlite_ps(const sqlite_ps&); // make uncopyable
609 : sqlite_ps& operator=(const sqlite_ps &); // make unassignable
610 :
611 : public:
612 260 : sqlite_ps (sqlite3* d, const string& n, const string& s): db(d), nickname(n), sql(s) {
613 260 : if (verbose > 4)
614 0 : obatched(clog) << nickname << " prep " << sql << endl;
615 260 : int rc = sqlite3_prepare_v2 (db, sql.c_str(), -1 /* to \0 */, & this->pp, NULL);
616 260 : if (rc != SQLITE_OK)
617 0 : throw sqlite_exception(rc, "prepare " + sql);
618 260 : }
619 :
620 1812 : sqlite_ps& reset()
621 : {
622 1812 : sqlite3_reset(this->pp);
623 1812 : return *this;
624 : }
625 :
626 2741 : sqlite_ps& bind(int parameter, const string& str)
627 : {
628 2741 : if (verbose > 4)
629 0 : obatched(clog) << nickname << " bind " << parameter << "=" << str << endl;
630 2741 : int rc = sqlite3_bind_text (this->pp, parameter, str.c_str(), -1, SQLITE_TRANSIENT);
631 2741 : if (rc != SQLITE_OK)
632 0 : throw sqlite_exception(rc, "sqlite3 bind");
633 2741 : return *this;
634 : }
635 :
636 707 : sqlite_ps& bind(int parameter, int64_t value)
637 : {
638 707 : if (verbose > 4)
639 0 : obatched(clog) << nickname << " bind " << parameter << "=" << value << endl;
640 707 : int rc = sqlite3_bind_int64 (this->pp, parameter, value);
641 707 : if (rc != SQLITE_OK)
642 0 : throw sqlite_exception(rc, "sqlite3 bind");
643 707 : return *this;
644 : }
645 :
646 : sqlite_ps& bind(int parameter)
647 : {
648 : if (verbose > 4)
649 : obatched(clog) << nickname << " bind " << parameter << "=" << "NULL" << endl;
650 : int rc = sqlite3_bind_null (this->pp, parameter);
651 : if (rc != SQLITE_OK)
652 : throw sqlite_exception(rc, "sqlite3 bind");
653 : return *this;
654 : }
655 :
656 :
657 1634 : void step_ok_done() {
658 1634 : int rc = sqlite3_step (this->pp);
659 1634 : if (verbose > 4)
660 0 : obatched(clog) << nickname << " step-ok-done(" << sqlite3_errstr(rc) << ") " << sql << endl;
661 1634 : if (rc != SQLITE_OK && rc != SQLITE_DONE && rc != SQLITE_ROW)
662 0 : throw sqlite_exception(rc, "sqlite3 step");
663 1634 : (void) sqlite3_reset (this->pp);
664 1634 : }
665 :
666 :
667 133 : int step() {
668 133 : int rc = sqlite3_step (this->pp);
669 133 : if (verbose > 4)
670 0 : obatched(clog) << nickname << " step(" << sqlite3_errstr(rc) << ") " << sql << endl;
671 133 : return rc;
672 : }
673 :
674 :
675 :
676 260 : ~sqlite_ps () { sqlite3_finalize (this->pp); }
677 353 : operator sqlite3_stmt* () { return this->pp; }
678 : };
679 :
680 :
681 : ////////////////////////////////////////////////////////////////////////
682 :
683 : // RAII style templated autocloser
684 :
685 : template <class Payload, class Ignore>
686 : struct defer_dtor
687 : {
688 : public:
689 : typedef Ignore (*dtor_fn) (Payload);
690 :
691 : private:
692 : Payload p;
693 : dtor_fn fn;
694 :
695 : public:
696 114 : defer_dtor(Payload _p, dtor_fn _fn): p(_p), fn(_fn) {}
697 114 : ~defer_dtor() { (void) (*fn)(p); }
698 :
699 : private:
700 : defer_dtor(const defer_dtor<Payload,Ignore>&); // make uncopyable
701 : defer_dtor& operator=(const defer_dtor<Payload,Ignore> &); // make unassignable
702 : };
703 :
704 :
705 :
706 : ////////////////////////////////////////////////////////////////////////
707 :
708 :
709 :
710 :
711 :
712 : static string
713 0 : conninfo (struct MHD_Connection * conn)
714 : {
715 : char hostname[256]; // RFC1035
716 : char servname[256];
717 0 : int sts = -1;
718 :
719 0 : if (conn == 0)
720 0 : return "internal";
721 :
722 : /* Look up client address data. */
723 0 : const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn,
724 : MHD_CONNECTION_INFO_CLIENT_ADDRESS);
725 0 : struct sockaddr *so = u ? u->client_addr : 0;
726 :
727 0 : if (so && so->sa_family == AF_INET) {
728 0 : sts = getnameinfo (so, sizeof (struct sockaddr_in), hostname, sizeof (hostname), servname,
729 : sizeof (servname), NI_NUMERICHOST | NI_NUMERICSERV);
730 0 : } else if (so && so->sa_family == AF_INET6) {
731 0 : sts = getnameinfo (so, sizeof (struct sockaddr_in6), hostname, sizeof (hostname),
732 : servname, sizeof (servname), NI_NUMERICHOST | NI_NUMERICSERV);
733 : }
734 0 : if (sts != 0) {
735 0 : hostname[0] = servname[0] = '\0';
736 : }
737 :
738 0 : return string(hostname) + string(":") + string(servname);
739 : }
740 :
741 :
742 :
743 : ////////////////////////////////////////////////////////////////////////
744 :
745 : static void
746 30 : add_mhd_last_modified (struct MHD_Response *resp, time_t mtime)
747 : {
748 30 : struct tm *now = gmtime (&mtime);
749 30 : if (now != NULL)
750 : {
751 : char datebuf[80];
752 30 : size_t rc = strftime (datebuf, sizeof (datebuf), "%a, %d %b %Y %T GMT", now);
753 30 : if (rc > 0 && rc < sizeof (datebuf))
754 30 : (void) MHD_add_response_header (resp, "Last-Modified", datebuf);
755 : }
756 :
757 30 : (void) MHD_add_response_header (resp, "Cache-Control", "public");
758 30 : }
759 :
760 :
761 :
762 : static struct MHD_Response*
763 9 : handle_buildid_f_match (int64_t b_mtime,
764 : const string& b_source0,
765 : int *result_fd)
766 : {
767 9 : int fd = open(b_source0.c_str(), O_RDONLY);
768 9 : if (fd < 0)
769 : {
770 0 : if (verbose)
771 0 : obatched(clog) << "cannot open " << b_source0 << endl;
772 : // if still missing, a periodic groom pass will delete this buildid record
773 0 : return 0;
774 : }
775 :
776 : // NB: use manual close(2) in error case instead of defer_dtor, because
777 : // in the normal case, we want to hand the fd over to libmicrohttpd for
778 : // file transfer.
779 :
780 : struct stat s;
781 9 : int rc = fstat(fd, &s);
782 9 : if (rc < 0)
783 : {
784 0 : if (verbose)
785 0 : clog << "cannot fstat " << b_source0 << endl;
786 0 : close(fd);
787 0 : return 0;
788 : }
789 :
790 9 : if ((int64_t) s.st_mtime != b_mtime)
791 : {
792 0 : if (verbose)
793 0 : obatched(clog) << "mtime mismatch for " << b_source0 << endl;
794 0 : close(fd);
795 0 : return 0;
796 : }
797 :
798 9 : inc_metric ("http_responses_total","result","file");
799 9 : struct MHD_Response* r = MHD_create_response_from_fd ((uint64_t) s.st_size, fd);
800 9 : if (r == 0)
801 : {
802 0 : if (verbose)
803 0 : obatched(clog) << "cannot create fd-response for " << b_source0 << endl;
804 0 : close(fd);
805 : }
806 : else
807 : {
808 9 : MHD_add_response_header (r, "Content-Type", "application/octet-stream");
809 9 : add_mhd_last_modified (r, s.st_mtime);
810 9 : if (verbose > 1)
811 0 : obatched(clog) << "serving file " << b_source0 << endl;
812 : /* libmicrohttpd will close it. */
813 9 : if (result_fd)
814 0 : *result_fd = fd;
815 : }
816 :
817 9 : return r;
818 : }
819 :
820 :
821 : // quote all questionable characters of str for safe passage through a sh -c expansion.
822 : static string
823 34 : shell_escape(const string& str)
824 : {
825 34 : string y;
826 3433 : for (auto&& x : str)
827 : {
828 3399 : if (! isalnum(x) && x != '/')
829 321 : y += "\\";
830 3399 : y += x;
831 : }
832 34 : return y;
833 : }
834 :
835 :
836 : static struct MHD_Response*
837 20 : handle_buildid_r_match (int64_t b_mtime,
838 : const string& b_source0,
839 : const string& b_source1,
840 : int *result_fd)
841 : {
842 : struct stat fs;
843 20 : int rc = stat (b_source0.c_str(), &fs);
844 20 : if (rc != 0)
845 0 : throw libc_exception (errno, string("stat ") + b_source0);
846 :
847 20 : if ((int64_t) fs.st_mtime != b_mtime)
848 : {
849 0 : if (verbose)
850 0 : obatched(clog) << "mtime mismatch for " << b_source0 << endl;
851 0 : return 0;
852 : }
853 :
854 40 : string popen_cmd = string("rpm2cpio " + shell_escape(b_source0));
855 20 : FILE* fp = popen (popen_cmd.c_str(), "r"); // "e" O_CLOEXEC?
856 20 : if (fp == NULL)
857 0 : throw libc_exception (errno, string("popen ") + popen_cmd);
858 40 : defer_dtor<FILE*,int> fp_closer (fp, pclose);
859 :
860 : struct archive *a;
861 20 : a = archive_read_new();
862 20 : if (a == NULL)
863 0 : throw archive_exception("cannot create archive reader");
864 40 : defer_dtor<struct archive*,int> archive_closer (a, archive_read_free);
865 :
866 20 : rc = archive_read_support_format_cpio(a);
867 20 : if (rc != ARCHIVE_OK)
868 0 : throw archive_exception(a, "cannot select cpio format");
869 20 : rc = archive_read_support_filter_all(a);
870 20 : if (rc != ARCHIVE_OK)
871 0 : throw archive_exception(a, "cannot select all filters");
872 :
873 20 : rc = archive_read_open_FILE (a, fp);
874 20 : if (rc != ARCHIVE_OK)
875 0 : throw archive_exception(a, "cannot open archive from rpm2cpio pipe");
876 :
877 : while(1) // parse cpio archive entries
878 : {
879 : struct archive_entry *e;
880 186 : rc = archive_read_next_header (a, &e);
881 186 : if (rc != ARCHIVE_OK)
882 0 : break;
883 :
884 186 : if (! S_ISREG(archive_entry_mode (e))) // skip non-files completely
885 151 : continue;
886 :
887 35 : string fn = archive_entry_pathname (e);
888 35 : if (fn != string(".")+b_source1)
889 15 : continue;
890 :
891 : // extract this file to a temporary file
892 20 : char tmppath[PATH_MAX] = "/tmp/debuginfod.XXXXXX"; // XXX: $TMP_DIR etc.
893 20 : int fd = mkstemp (tmppath);
894 20 : if (fd < 0)
895 0 : throw libc_exception (errno, "cannot create temporary file");
896 20 : unlink (tmppath); // unlink now so OS will release the file as soon as we close the fd
897 :
898 20 : rc = archive_read_data_into_fd (a, fd);
899 20 : if (rc != ARCHIVE_OK)
900 : {
901 0 : close (fd);
902 0 : throw archive_exception(a, "cannot extract file");
903 : }
904 :
905 20 : inc_metric ("http_responses_total","result","rpm");
906 20 : struct MHD_Response* r = MHD_create_response_from_fd (archive_entry_size(e), fd);
907 20 : if (r == 0)
908 : {
909 0 : if (verbose)
910 0 : obatched(clog) << "cannot create fd-response for " << b_source0 << endl;
911 0 : close(fd);
912 0 : break; // assume no chance of better luck around another iteration
913 : }
914 : else
915 : {
916 20 : MHD_add_response_header (r, "Content-Type", "application/octet-stream");
917 20 : add_mhd_last_modified (r, archive_entry_mtime(e));
918 20 : if (verbose > 1)
919 0 : obatched(clog) << "serving rpm " << b_source0 << " file " << b_source1 << endl;
920 : /* libmicrohttpd will close it. */
921 20 : if (result_fd)
922 2 : *result_fd = fd;
923 20 : return r;
924 : }
925 166 : }
926 :
927 : // XXX: rpm/file not found: delete this R entry?
928 0 : return 0;
929 : }
930 :
931 :
932 : static struct MHD_Response*
933 29 : handle_buildid_match (int64_t b_mtime,
934 : const string& b_stype,
935 : const string& b_source0,
936 : const string& b_source1,
937 : int *result_fd)
938 : {
939 29 : if (b_stype == "F")
940 9 : return handle_buildid_f_match(b_mtime, b_source0, result_fd);
941 20 : else if (b_stype == "R")
942 20 : return handle_buildid_r_match(b_mtime, b_source0, b_source1, result_fd);
943 : else
944 0 : return 0;
945 : }
946 :
947 :
948 : static int
949 2 : debuginfod_find_progress (debuginfod_client *, long a, long b)
950 : {
951 2 : if (verbose > 4)
952 0 : obatched(clog) << "federated debuginfod progress=" << a << "/" << b << endl;
953 :
954 2 : return interrupted;
955 : }
956 :
957 :
958 32 : static struct MHD_Response* handle_buildid (const string& buildid /* unsafe */,
959 : const string& artifacttype /* unsafe */,
960 : const string& suffix /* unsafe */,
961 : int *result_fd
962 : )
963 : {
964 : // validate artifacttype
965 64 : string atype_code;
966 32 : if (artifacttype == "debuginfo") atype_code = "D";
967 18 : else if (artifacttype == "executable") atype_code = "E";
968 8 : else if (artifacttype == "source") atype_code = "S";
969 0 : else throw reportable_exception("invalid artifacttype");
970 :
971 32 : if (atype_code == "S" && suffix == "")
972 0 : throw reportable_exception("invalid source suffix");
973 :
974 : // validate buildid
975 32 : if ((buildid.size() < 2) || // not empty
976 64 : (buildid.size() % 2) || // even number
977 32 : (buildid.find_first_not_of("0123456789abcdef") != string::npos)) // pure tasty lowercase hex
978 0 : throw reportable_exception("invalid buildid");
979 :
980 32 : if (verbose > 1)
981 0 : obatched(clog) << "searching for buildid=" << buildid << " artifacttype=" << artifacttype
982 0 : << " suffix=" << suffix << endl;
983 :
984 32 : sqlite_ps *pp = 0;
985 :
986 32 : if (atype_code == "D")
987 : {
988 28 : pp = new sqlite_ps (db, "mhd-query-d",
989 : "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_d where buildid = ? "
990 14 : "order by mtime desc");
991 14 : pp->reset();
992 14 : pp->bind(1, buildid);
993 : }
994 18 : else if (atype_code == "E")
995 : {
996 20 : pp = new sqlite_ps (db, "mhd-query-e",
997 : "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_e where buildid = ? "
998 10 : "order by mtime desc");
999 10 : pp->reset();
1000 10 : pp->bind(1, buildid);
1001 : }
1002 8 : else if (atype_code == "S")
1003 : {
1004 16 : pp = new sqlite_ps (db, "mhd-query-s",
1005 : "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_s where buildid = ? and artifactsrc = ? "
1006 8 : "order by sharedprefix(source0,source0ref) desc, mtime desc");
1007 8 : pp->reset();
1008 8 : pp->bind(1, buildid);
1009 8 : pp->bind(2, suffix);
1010 : }
1011 64 : unique_ptr<sqlite_ps> ps_closer(pp); // release pp if exception or return
1012 :
1013 : // consume all the rows
1014 : while (1)
1015 : {
1016 32 : int rc = pp->step();
1017 32 : if (rc == SQLITE_DONE) break;
1018 29 : if (rc != SQLITE_ROW)
1019 0 : throw sqlite_exception(rc, "step");
1020 :
1021 29 : int64_t b_mtime = sqlite3_column_int64 (*pp, 0);
1022 29 : string b_stype = string((const char*) sqlite3_column_text (*pp, 1) ?: ""); /* by DDL may not be NULL */
1023 29 : string b_source0 = string((const char*) sqlite3_column_text (*pp, 2) ?: ""); /* may be NULL */
1024 29 : string b_source1 = string((const char*) sqlite3_column_text (*pp, 3) ?: ""); /* may be NULL */
1025 :
1026 29 : if (verbose > 1)
1027 0 : obatched(clog) << "found mtime=" << b_mtime << " stype=" << b_stype
1028 0 : << " source0=" << b_source0 << " source1=" << b_source1 << endl;
1029 :
1030 : // Try accessing the located match.
1031 : // XXX: in case of multiple matches, attempt them in parallel?
1032 29 : auto r = handle_buildid_match (b_mtime, b_stype, b_source0, b_source1, result_fd);
1033 29 : if (r)
1034 29 : return r;
1035 0 : }
1036 :
1037 : // We couldn't find it in the database. Last ditch effort
1038 : // is to defer to other debuginfo servers.
1039 :
1040 3 : int fd = -1;
1041 3 : debuginfod_client *client = debuginfod_begin ();
1042 3 : if (client != NULL)
1043 : {
1044 3 : debuginfod_set_progressfn (client, & debuginfod_find_progress);
1045 :
1046 3 : if (artifacttype == "debuginfo")
1047 2 : fd = debuginfod_find_debuginfo (client,
1048 2 : (const unsigned char*) buildid.c_str(),
1049 : 0, NULL);
1050 1 : else if (artifacttype == "executable")
1051 1 : fd = debuginfod_find_executable (client,
1052 1 : (const unsigned char*) buildid.c_str(),
1053 : 0, NULL);
1054 0 : else if (artifacttype == "source")
1055 0 : fd = debuginfod_find_source (client,
1056 0 : (const unsigned char*) buildid.c_str(),
1057 : 0, suffix.c_str(), NULL);
1058 : }
1059 : else
1060 0 : fd = -errno; /* Set by debuginfod_begin. */
1061 3 : debuginfod_end (client);
1062 :
1063 3 : if (fd >= 0)
1064 : {
1065 1 : inc_metric ("http_responses_total","result","upstream");
1066 : struct stat s;
1067 1 : int rc = fstat (fd, &s);
1068 1 : if (rc == 0)
1069 : {
1070 1 : auto r = MHD_create_response_from_fd ((uint64_t) s.st_size, fd);
1071 1 : if (r)
1072 : {
1073 1 : MHD_add_response_header (r, "Content-Type", "application/octet-stream");
1074 1 : add_mhd_last_modified (r, s.st_mtime);
1075 1 : if (verbose > 1)
1076 0 : obatched(clog) << "serving file from upstream debuginfod/cache" << endl;
1077 1 : if (result_fd)
1078 0 : *result_fd = fd;
1079 1 : return r; // NB: don't close fd; libmicrohttpd will
1080 : }
1081 : }
1082 0 : close (fd);
1083 : }
1084 2 : else if (fd != -ENOSYS) // no DEBUGINFOD_URLS configured
1085 0 : throw libc_exception(-fd, "upstream debuginfod query failed");
1086 :
1087 2 : throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found");
1088 : }
1089 :
1090 :
1091 : ////////////////////////////////////////////////////////////////////////
1092 :
1093 : static map<string,int64_t> metrics; // arbitrary data for /metrics query
1094 : // NB: store int64_t since all our metrics are integers; prometheus accepts double
1095 : static mutex metrics_lock;
1096 :
1097 : // utility function for assembling prometheus-compatible
1098 : // name="escaped-value" strings
1099 : // https://prometheus.io/docs/instrumenting/exposition_formats/
1100 : static string
1101 893 : metric_label(const string& name, const string& value)
1102 : {
1103 893 : string x = name + "=\"";
1104 4741 : for (auto&& c : value)
1105 3849 : switch(c)
1106 : {
1107 0 : case '\\': x += "\\\\"; break;
1108 0 : case '\"': x += "\\\""; break;
1109 0 : case '\n': x += "\\n"; break;
1110 3849 : default: x += c; break;
1111 : }
1112 893 : x += "\"";
1113 895 : return x;
1114 : }
1115 :
1116 :
1117 : // add prometheus-format metric name + label tuple (if any) + value
1118 :
1119 : static void
1120 4 : set_metric(const string& metric, int64_t value)
1121 : {
1122 4 : unique_lock<mutex> lock(metrics_lock);
1123 4 : metrics[metric] = value;
1124 4 : }
1125 : #if 0 /* unused */
1126 : static void
1127 : inc_metric(const string& metric)
1128 : {
1129 : unique_lock<mutex> lock(metrics_lock);
1130 : metrics[metric] ++;
1131 : }
1132 : #endif
1133 : static void
1134 214 : set_metric(const string& metric,
1135 : const string& lname, const string& lvalue,
1136 : int64_t value)
1137 : {
1138 647 : string key = (metric + "{" + metric_label(lname, lvalue) + "}");
1139 218 : unique_lock<mutex> lock(metrics_lock);
1140 218 : metrics[key] = value;
1141 218 : }
1142 :
1143 : static void
1144 638 : inc_metric(const string& metric,
1145 : const string& lname, const string& lvalue)
1146 : {
1147 1914 : string key = (metric + "{" + metric_label(lname, lvalue) + "}");
1148 638 : unique_lock<mutex> lock(metrics_lock);
1149 638 : metrics[key] ++;
1150 638 : }
1151 : static void
1152 42 : add_metric(const string& metric,
1153 : const string& lname, const string& lvalue,
1154 : int64_t value)
1155 : {
1156 126 : string key = (metric + "{" + metric_label(lname, lvalue) + "}");
1157 42 : unique_lock<mutex> lock(metrics_lock);
1158 42 : metrics[key] += value;
1159 42 : }
1160 :
1161 :
1162 : // and more for higher arity labels if needed
1163 :
1164 :
1165 : static struct MHD_Response*
1166 18 : handle_metrics ()
1167 : {
1168 36 : stringstream o;
1169 : {
1170 36 : unique_lock<mutex> lock(metrics_lock);
1171 1055 : for (auto&& i : metrics)
1172 1037 : o << i.first << " " << i.second << endl;
1173 : }
1174 18 : const string& os = o.str();
1175 18 : MHD_Response* r = MHD_create_response_from_buffer (os.size(),
1176 18 : (void*) os.c_str(),
1177 : MHD_RESPMEM_MUST_COPY);
1178 18 : MHD_add_response_header (r, "Content-Type", "text/plain");
1179 36 : return r;
1180 : }
1181 :
1182 :
1183 : ////////////////////////////////////////////////////////////////////////
1184 :
1185 :
1186 : /* libmicrohttpd callback */
1187 : static int
1188 49 : handler_cb (void * /*cls*/,
1189 : struct MHD_Connection *connection,
1190 : const char *url,
1191 : const char *method,
1192 : const char * /*version*/,
1193 : const char * /*upload_data*/,
1194 : size_t * /*upload_data_size*/,
1195 : void ** /*con_cls*/)
1196 : {
1197 49 : struct MHD_Response *r = NULL;
1198 98 : string url_copy = url;
1199 :
1200 49 : if (verbose)
1201 0 : obatched(clog) << conninfo(connection) << " " << method << " " << url << endl;
1202 :
1203 : try
1204 : {
1205 49 : if (string(method) != "GET")
1206 0 : throw reportable_exception(400, "we support GET only");
1207 :
1208 : /* Start decoding the URL. */
1209 49 : size_t slash1 = url_copy.find('/', 1);
1210 52 : string url1 = url_copy.substr(0, slash1); // ok even if slash1 not found
1211 :
1212 49 : if (slash1 != string::npos && url1 == "/buildid")
1213 : {
1214 30 : size_t slash2 = url_copy.find('/', slash1+1);
1215 30 : if (slash2 == string::npos)
1216 0 : throw reportable_exception("/buildid/ webapi error, need buildid");
1217 :
1218 60 : string buildid = url_copy.substr(slash1+1, slash2-slash1-1);
1219 :
1220 30 : size_t slash3 = url_copy.find('/', slash2+1);
1221 62 : string artifacttype, suffix;
1222 30 : if (slash3 == string::npos)
1223 : {
1224 22 : artifacttype = url_copy.substr(slash2+1);
1225 22 : suffix = "";
1226 : }
1227 : else
1228 : {
1229 8 : artifacttype = url_copy.substr(slash2+1, slash3-slash2-1);
1230 8 : suffix = url_copy.substr(slash3); // include the slash in the suffix
1231 : }
1232 :
1233 30 : inc_metric("http_requests_total", "type", artifacttype);
1234 30 : r = handle_buildid(buildid, artifacttype, suffix, 0); // NB: don't care about result-fd
1235 : }
1236 19 : else if (url1 == "/metrics")
1237 : {
1238 18 : inc_metric("http_requests_total", "type", "metrics");
1239 18 : r = handle_metrics();
1240 : }
1241 : else
1242 1 : throw reportable_exception("webapi error, unrecognized /operation");
1243 :
1244 46 : if (r == 0)
1245 0 : throw reportable_exception("internal error, missing response");
1246 :
1247 46 : int rc = MHD_queue_response (connection, MHD_HTTP_OK, r);
1248 46 : MHD_destroy_response (r);
1249 46 : return rc;
1250 : }
1251 6 : catch (const reportable_exception& e)
1252 : {
1253 3 : inc_metric("http_responses_total","result","error");
1254 3 : e.report(clog);
1255 3 : return e.mhd_send_response (connection);
1256 : }
1257 : }
1258 :
1259 :
1260 : ////////////////////////////////////////////////////////////////////////
1261 : // borrowed originally from src/nm.c get_local_names()
1262 :
1263 : static void
1264 10 : dwarf_extract_source_paths (Elf *elf, set<string>& debug_sourcefiles)
1265 : noexcept // no exceptions - so we can simplify the altdbg resource release at end
1266 : {
1267 10 : Dwarf* dbg = dwarf_begin_elf (elf, DWARF_C_READ, NULL);
1268 10 : if (dbg == NULL)
1269 0 : return;
1270 :
1271 10 : Dwarf* altdbg = NULL;
1272 10 : int altdbg_fd = -1;
1273 :
1274 : // DWZ handling: if we have an unsatisfied debug-alt-link, add an
1275 : // empty string into the outgoing sourcefiles set, so the caller
1276 : // should know that our data is incomplete.
1277 : const char *alt_name_p;
1278 : const void *alt_build_id; // elfutils-owned memory
1279 10 : ssize_t sz = dwelf_dwarf_gnu_debugaltlink (dbg, &alt_name_p, &alt_build_id);
1280 10 : if (sz > 0) // got one!
1281 : {
1282 4 : string buildid;
1283 2 : unsigned char* build_id_bytes = (unsigned char*) alt_build_id;
1284 42 : for (ssize_t idx=0; idx<sz; idx++)
1285 : {
1286 40 : buildid += "0123456789abcdef"[build_id_bytes[idx] >> 4];
1287 40 : buildid += "0123456789abcdef"[build_id_bytes[idx] & 0xf];
1288 : }
1289 :
1290 2 : if (verbose > 3)
1291 0 : obatched(clog) << "Need altdebug buildid=" << buildid << endl;
1292 :
1293 : // but is it unsatisfied the normal elfutils ways?
1294 2 : Dwarf* alt = dwarf_getalt (dbg);
1295 2 : if (alt == NULL)
1296 : {
1297 : // Yup, unsatisfied the normal way. Maybe we can satisfy it
1298 : // from our own debuginfod database.
1299 : int alt_fd;
1300 2 : struct MHD_Response *r = 0;
1301 : try
1302 : {
1303 2 : r = handle_buildid (buildid, "debuginfo", "", &alt_fd);
1304 : }
1305 0 : catch (const reportable_exception& e)
1306 : {
1307 : // swallow exceptions
1308 : }
1309 :
1310 : // NB: this is not actually recursive! This invokes the web-query
1311 : // path, which cannot get back into the scan code paths.
1312 2 : if (r)
1313 : {
1314 : // Found it!
1315 2 : altdbg_fd = dup(alt_fd); // ok if this fails, downstream failures ok
1316 2 : alt = altdbg = dwarf_begin (altdbg_fd, DWARF_C_READ);
1317 : // NB: must close this dwarf and this fd at the bottom of the function!
1318 2 : MHD_destroy_response (r); // will close alt_fd
1319 2 : if (alt)
1320 2 : dwarf_setalt (dbg, alt);
1321 : }
1322 : }
1323 : else
1324 : {
1325 : // NB: dwarf_setalt(alt) inappropriate - already done!
1326 : // NB: altdbg will stay 0 so nothing tries to redundantly dealloc.
1327 : }
1328 :
1329 2 : if (alt)
1330 : {
1331 2 : if (verbose > 3)
1332 0 : obatched(clog) << "Resolved altdebug buildid=" << buildid << endl;
1333 : }
1334 : else // (alt == NULL) - signal possible presence of poor debuginfo
1335 : {
1336 0 : debug_sourcefiles.insert("");
1337 0 : if (verbose > 3)
1338 0 : obatched(clog) << "Unresolved altdebug buildid=" << buildid << endl;
1339 : }
1340 : }
1341 :
1342 10 : Dwarf_Off offset = 0;
1343 : Dwarf_Off old_offset;
1344 : size_t hsize;
1345 :
1346 331 : while (dwarf_nextcu (dbg, old_offset = offset, &offset, &hsize, NULL, NULL, NULL) == 0)
1347 : {
1348 : Dwarf_Die cudie_mem;
1349 321 : Dwarf_Die *cudie = dwarf_offdie (dbg, old_offset + hsize, &cudie_mem);
1350 :
1351 321 : if (cudie == NULL)
1352 1 : continue;
1353 321 : if (dwarf_tag (cudie) != DW_TAG_compile_unit)
1354 1 : continue;
1355 :
1356 320 : const char *cuname = dwarf_diename(cudie) ?: "unknown";
1357 :
1358 : Dwarf_Files *files;
1359 : size_t nfiles;
1360 320 : if (dwarf_getsrcfiles (cudie, &files, &nfiles) != 0)
1361 0 : continue;
1362 :
1363 : // extract DW_AT_comp_dir to resolve relative file names
1364 320 : const char *comp_dir = "";
1365 : const char *const *dirs;
1366 : size_t ndirs;
1367 640 : if (dwarf_getsrcdirs (files, &dirs, &ndirs) == 0 &&
1368 320 : dirs[0] != NULL)
1369 320 : comp_dir = dirs[0];
1370 320 : if (comp_dir == NULL)
1371 0 : comp_dir = "";
1372 :
1373 320 : if (verbose > 3)
1374 0 : obatched(clog) << "searching for sources for cu=" << cuname << " comp_dir=" << comp_dir
1375 0 : << " #files=" << nfiles << " #dirs=" << ndirs << endl;
1376 :
1377 320 : if (comp_dir[0] == '\0' && cuname[0] != '/')
1378 : {
1379 : // This is a common symptom for dwz-compressed debug files,
1380 : // where the altdebug file cannot be resolved.
1381 0 : if (verbose > 3)
1382 0 : obatched(clog) << "skipping cu=" << cuname << " due to empty comp_dir" << endl;
1383 0 : continue;
1384 : }
1385 :
1386 6272 : for (size_t f = 1; f < nfiles; f++)
1387 : {
1388 5952 : const char *hat = dwarf_filesrc (files, f, NULL, NULL);
1389 5952 : if (hat == NULL)
1390 0 : continue;
1391 :
1392 5952 : if (string(hat) == "<built-in>") // gcc intrinsics, don't bother record
1393 0 : continue;
1394 :
1395 5952 : string waldo;
1396 5952 : if (hat[0] == '/') // absolute
1397 4086 : waldo = (string (hat));
1398 1866 : else if (comp_dir[0] != '\0') // comp_dir relative
1399 1866 : waldo = (string (comp_dir) + string("/") + string (hat));
1400 : else
1401 : {
1402 0 : obatched(clog) << "skipping hat=" << hat << " due to empty comp_dir" << endl;
1403 0 : continue;
1404 : }
1405 :
1406 : // NB: this is the 'waldo' that a dbginfo client will have
1407 : // to supply for us to give them the file The comp_dir
1408 : // prefixing is a definite complication. Otherwise we'd
1409 : // have to return a setof comp_dirs (one per CU!) with
1410 : // corresponding filesrc[] names, instead of one absolute
1411 : // resoved set. Maybe we'll have to do that anyway. XXX
1412 :
1413 5952 : if (verbose > 4)
1414 0 : obatched(clog) << waldo
1415 0 : << (debug_sourcefiles.find(waldo)==debug_sourcefiles.end() ? " new" : " dup") << endl;
1416 :
1417 5952 : debug_sourcefiles.insert (waldo);
1418 : }
1419 : }
1420 :
1421 10 : dwarf_end(dbg);
1422 10 : if (altdbg)
1423 2 : dwarf_end(altdbg);
1424 10 : if (altdbg_fd >= 0)
1425 2 : close(altdbg_fd);
1426 : }
1427 :
1428 :
1429 :
1430 : static void
1431 42 : elf_classify (int fd, bool &executable_p, bool &debuginfo_p, string &buildid, set<string>& debug_sourcefiles)
1432 : {
1433 42 : Elf *elf = elf_begin (fd, ELF_C_READ_MMAP_PRIVATE, NULL);
1434 42 : if (elf == NULL)
1435 0 : return;
1436 :
1437 : try // catch our types of errors and clean up the Elf* object
1438 : {
1439 42 : if (elf_kind (elf) != ELF_K_ELF)
1440 : {
1441 24 : elf_end (elf);
1442 24 : return;
1443 : }
1444 :
1445 : GElf_Ehdr ehdr_storage;
1446 18 : GElf_Ehdr *ehdr = gelf_getehdr (elf, &ehdr_storage);
1447 18 : if (ehdr == NULL)
1448 : {
1449 0 : elf_end (elf);
1450 0 : return;
1451 : }
1452 18 : auto elf_type = ehdr->e_type;
1453 :
1454 : const void *build_id; // elfutils-owned memory
1455 18 : ssize_t sz = dwelf_elf_gnu_build_id (elf, & build_id);
1456 18 : if (sz <= 0)
1457 : {
1458 : // It's not a diagnostic-worthy error for an elf file to lack build-id.
1459 : // It might just be very old.
1460 0 : elf_end (elf);
1461 0 : return;
1462 : }
1463 :
1464 : // build_id is a raw byte array; convert to hexadecimal *lowercase*
1465 18 : unsigned char* build_id_bytes = (unsigned char*) build_id;
1466 378 : for (ssize_t idx=0; idx<sz; idx++)
1467 : {
1468 360 : buildid += "0123456789abcdef"[build_id_bytes[idx] >> 4];
1469 360 : buildid += "0123456789abcdef"[build_id_bytes[idx] & 0xf];
1470 : }
1471 :
1472 : // now decide whether it's an executable - namely, any allocatable section has
1473 : // PROGBITS;
1474 18 : if (elf_type == ET_EXEC || elf_type == ET_DYN)
1475 : {
1476 : size_t shnum;
1477 16 : int rc = elf_getshdrnum (elf, &shnum);
1478 16 : if (rc < 0)
1479 0 : throw elfutils_exception(rc, "getshdrnum");
1480 :
1481 16 : executable_p = false;
1482 283 : for (size_t sc = 0; sc < shnum; sc++)
1483 : {
1484 276 : Elf_Scn *scn = elf_getscn (elf, sc);
1485 276 : if (scn == NULL)
1486 0 : continue;
1487 :
1488 : GElf_Shdr shdr_mem;
1489 276 : GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem);
1490 276 : if (shdr == NULL)
1491 0 : continue;
1492 :
1493 : // allocated (loadable / vm-addr-assigned) section with available content?
1494 276 : if ((shdr->sh_type == SHT_PROGBITS) && (shdr->sh_flags & SHF_ALLOC))
1495 : {
1496 9 : if (verbose > 4)
1497 0 : obatched(clog) << "executable due to SHF_ALLOC SHT_PROGBITS sc=" << sc << endl;
1498 9 : executable_p = true;
1499 9 : break; // no need to keep looking for others
1500 : }
1501 : } // iterate over sections
1502 : } // executable_p classification
1503 :
1504 : // now decide whether it's a debuginfo - namely, if it has any .debug* or .zdebug* sections
1505 : // logic mostly stolen from fweimer@redhat.com's elfclassify drafts
1506 : size_t shstrndx;
1507 18 : int rc = elf_getshdrstrndx (elf, &shstrndx);
1508 18 : if (rc < 0)
1509 0 : throw elfutils_exception(rc, "getshdrstrndx");
1510 :
1511 18 : Elf_Scn *scn = NULL;
1512 : while (true)
1513 : {
1514 483 : scn = elf_nextscn (elf, scn);
1515 483 : if (scn == NULL)
1516 18 : break;
1517 : GElf_Shdr shdr_storage;
1518 475 : GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_storage);
1519 475 : if (shdr == NULL)
1520 0 : break;
1521 475 : const char *section_name = elf_strptr (elf, shstrndx, shdr->sh_name);
1522 475 : if (section_name == NULL)
1523 0 : break;
1524 475 : if (strncmp(section_name, ".debug_line", 11) == 0 ||
1525 465 : strncmp(section_name, ".zdebug_line", 12) == 0)
1526 : {
1527 10 : debuginfo_p = true;
1528 10 : dwarf_extract_source_paths (elf, debug_sourcefiles);
1529 10 : break; // expecting only one .*debug_line, so no need to look for others
1530 : }
1531 465 : else if (strncmp(section_name, ".debug_", 7) == 0 ||
1532 433 : strncmp(section_name, ".zdebug_", 8) == 0)
1533 : {
1534 32 : debuginfo_p = true;
1535 : // NB: don't break; need to parse .debug_line for sources
1536 : }
1537 465 : }
1538 : }
1539 0 : catch (const reportable_exception& e)
1540 : {
1541 0 : e.report(clog);
1542 : }
1543 18 : elf_end (elf);
1544 : }
1545 :
1546 :
1547 : static semaphore* scan_concurrency_sem = 0; // used to implement -c load limiting
1548 :
1549 :
1550 : static void
1551 16 : scan_source_file_path (const string& dir)
1552 : {
1553 16 : obatched(clog) << "fts/file traversing " << dir << endl;
1554 :
1555 : struct timeval tv_start, tv_end;
1556 16 : gettimeofday (&tv_start, NULL);
1557 :
1558 32 : sqlite_ps ps_upsert_buildids (db, "file-buildids-intern", "insert or ignore into " BUILDIDS "_buildids VALUES (NULL, ?);");
1559 32 : sqlite_ps ps_upsert_files (db, "file-files-intern", "insert or ignore into " BUILDIDS "_files VALUES (NULL, ?);");
1560 : sqlite_ps ps_upsert_de (db, "file-de-upsert",
1561 : "insert or ignore into " BUILDIDS "_f_de "
1562 : "(buildid, debuginfo_p, executable_p, file, mtime) "
1563 : "values ((select id from " BUILDIDS "_buildids where hex = ?),"
1564 : " ?,?,"
1565 32 : " (select id from " BUILDIDS "_files where name = ?), ?);");
1566 : sqlite_ps ps_upsert_s (db, "file-s-upsert",
1567 : "insert or ignore into " BUILDIDS "_f_s "
1568 : "(buildid, artifactsrc, file, mtime) "
1569 : "values ((select id from " BUILDIDS "_buildids where hex = ?),"
1570 : " (select id from " BUILDIDS "_files where name = ?),"
1571 : " (select id from " BUILDIDS "_files where name = ?),"
1572 32 : " ?);");
1573 : sqlite_ps ps_query (db, "file-negativehit-find",
1574 32 : "select 1 from " BUILDIDS "_file_mtime_scanned where sourcetype = 'F' and file = (select id from " BUILDIDS "_files where name = ?) and mtime = ?;");
1575 : sqlite_ps ps_scan_done (db, "file-scanned",
1576 : "insert or ignore into " BUILDIDS "_file_mtime_scanned (sourcetype, file, mtime, size)"
1577 32 : "values ('F', (select id from " BUILDIDS "_files where name = ?), ?, ?);");
1578 :
1579 :
1580 16 : char * const dirs[] = { (char*) dir.c_str(), NULL };
1581 :
1582 16 : unsigned fts_scanned=0, fts_regex=0, fts_cached=0, fts_debuginfo=0, fts_executable=0, fts_sourcefiles=0;
1583 :
1584 16 : FTS *fts = fts_open (dirs,
1585 : (traverse_logical ? FTS_LOGICAL : FTS_PHYSICAL|FTS_XDEV)
1586 : | FTS_NOCHDIR /* multithreaded */,
1587 : NULL);
1588 16 : if (fts == NULL)
1589 : {
1590 0 : obatched(cerr) << "cannot fts_open " << dir << endl;
1591 0 : return;
1592 : }
1593 :
1594 : FTSENT *f;
1595 111 : while ((f = fts_read (fts)) != NULL)
1596 : {
1597 95 : semaphore_borrower handle_one_file (scan_concurrency_sem);
1598 :
1599 95 : fts_scanned ++;
1600 95 : if (interrupted)
1601 0 : break;
1602 :
1603 95 : if (verbose > 2)
1604 0 : obatched(clog) << "fts/file traversing " << f->fts_path << endl;
1605 :
1606 : try
1607 : {
1608 : /* Found a file. Convert it to an absolute path, so
1609 : the buildid database does not have relative path
1610 : names that are unresolvable from a subsequent run
1611 : in a different cwd. */
1612 95 : char *rp = realpath(f->fts_path, NULL);
1613 95 : if (rp == NULL)
1614 23 : continue; // ignore dangling symlink or such
1615 95 : string rps = string(rp);
1616 95 : free (rp);
1617 :
1618 95 : bool ri = !regexec (&file_include_regex, rps.c_str(), 0, 0, 0);
1619 95 : bool rx = !regexec (&file_exclude_regex, rps.c_str(), 0, 0, 0);
1620 95 : if (!ri || rx)
1621 : {
1622 0 : if (verbose > 3)
1623 0 : obatched(clog) << "fts/file skipped by regex " << (!ri ? "I" : "") << (rx ? "X" : "") << endl;
1624 0 : fts_regex ++;
1625 0 : continue;
1626 : }
1627 :
1628 95 : switch (f->fts_info)
1629 : {
1630 24 : case FTS_D:
1631 24 : break;
1632 :
1633 24 : case FTS_DP:
1634 24 : break;
1635 :
1636 42 : case FTS_F:
1637 : {
1638 : /* See if we know of it already. */
1639 : int rc = ps_query
1640 42 : .reset()
1641 42 : .bind(1, rps)
1642 42 : .bind(2, f->fts_statp->st_mtime)
1643 42 : .step();
1644 42 : ps_query.reset();
1645 42 : if (rc == SQLITE_ROW) // i.e., a result, as opposed to DONE (no results)
1646 : // no need to recheck a file/version we already know
1647 : // specifically, no need to elf-begin a file we already determined is non-elf
1648 : // (so is stored with buildid=NULL)
1649 : {
1650 23 : fts_cached ++;
1651 23 : continue;
1652 : }
1653 :
1654 19 : bool executable_p = false, debuginfo_p = false; // E and/or D
1655 38 : string buildid;
1656 38 : set<string> sourcefiles;
1657 :
1658 19 : int fd = open (rps.c_str(), O_RDONLY);
1659 : try
1660 : {
1661 19 : if (fd >= 0)
1662 19 : elf_classify (fd, executable_p, debuginfo_p, buildid, sourcefiles);
1663 : else
1664 0 : throw libc_exception(errno, string("open ") + rps);
1665 19 : inc_metric ("scanned_total","source","file");
1666 : }
1667 :
1668 : // NB: we catch exceptions here too, so that we can
1669 : // cache the corrupt-elf case (!executable_p &&
1670 : // !debuginfo_p) just below, just as if we had an
1671 : // EPERM error from open(2).
1672 :
1673 0 : catch (const reportable_exception& e)
1674 : {
1675 0 : e.report(clog);
1676 : }
1677 :
1678 19 : if (fd >= 0)
1679 19 : close (fd);
1680 :
1681 : // register this file name in the interning table
1682 : ps_upsert_files
1683 19 : .reset()
1684 19 : .bind(1, rps)
1685 19 : .step_ok_done();
1686 :
1687 19 : if (buildid == "")
1688 : {
1689 : // no point storing an elf file without buildid
1690 15 : executable_p = false;
1691 15 : debuginfo_p = false;
1692 : }
1693 : else
1694 : {
1695 : // register this build-id in the interning table
1696 : ps_upsert_buildids
1697 4 : .reset()
1698 4 : .bind(1, buildid)
1699 4 : .step_ok_done();
1700 : }
1701 :
1702 19 : if (executable_p)
1703 3 : fts_executable ++;
1704 19 : if (debuginfo_p)
1705 3 : fts_debuginfo ++;
1706 19 : if (executable_p || debuginfo_p)
1707 : {
1708 : ps_upsert_de
1709 4 : .reset()
1710 4 : .bind(1, buildid)
1711 4 : .bind(2, debuginfo_p ? 1 : 0)
1712 4 : .bind(3, executable_p ? 1 : 0)
1713 4 : .bind(4, rps)
1714 4 : .bind(5, f->fts_statp->st_mtime)
1715 4 : .step_ok_done();
1716 : }
1717 19 : if (executable_p)
1718 3 : inc_metric("found_executable_total","source","files");
1719 19 : if (debuginfo_p)
1720 3 : inc_metric("found_debuginfo_total","source","files");
1721 :
1722 19 : if (sourcefiles.size() && buildid != "")
1723 : {
1724 3 : fts_sourcefiles += sourcefiles.size();
1725 :
1726 493 : for (auto&& dwarfsrc : sourcefiles)
1727 : {
1728 490 : char *srp = realpath(dwarfsrc.c_str(), NULL);
1729 490 : if (srp == NULL) // also if DWZ unresolved dwarfsrc=""
1730 6 : continue; // unresolvable files are not a serious problem
1731 : // throw libc_exception(errno, "fts/file realpath " + srcpath);
1732 484 : string srps = string(srp);
1733 484 : free (srp);
1734 :
1735 : struct stat sfs;
1736 484 : rc = stat(srps.c_str(), &sfs);
1737 484 : if (rc != 0)
1738 0 : continue;
1739 :
1740 484 : if (verbose > 2)
1741 0 : obatched(clog) << "recorded buildid=" << buildid << " file=" << srps
1742 0 : << " mtime=" << sfs.st_mtime
1743 0 : << " as source " << dwarfsrc << endl;
1744 :
1745 : ps_upsert_files
1746 484 : .reset()
1747 484 : .bind(1, srps)
1748 484 : .step_ok_done();
1749 :
1750 : // register the dwarfsrc name in the interning table too
1751 : ps_upsert_files
1752 484 : .reset()
1753 484 : .bind(1, dwarfsrc)
1754 484 : .step_ok_done();
1755 :
1756 : ps_upsert_s
1757 484 : .reset()
1758 484 : .bind(1, buildid)
1759 484 : .bind(2, dwarfsrc)
1760 484 : .bind(3, srps)
1761 484 : .bind(4, sfs.st_mtime)
1762 484 : .step_ok_done();
1763 :
1764 484 : inc_metric("found_sourcerefs_total","source","files");
1765 : }
1766 : }
1767 :
1768 : ps_scan_done
1769 19 : .reset()
1770 19 : .bind(1, rps)
1771 19 : .bind(2, f->fts_statp->st_mtime)
1772 19 : .bind(3, f->fts_statp->st_size)
1773 19 : .step_ok_done();
1774 :
1775 19 : if (verbose > 2)
1776 0 : obatched(clog) << "recorded buildid=" << buildid << " file=" << rps
1777 0 : << " mtime=" << f->fts_statp->st_mtime << " atype="
1778 : << (executable_p ? "E" : "")
1779 42 : << (debuginfo_p ? "D" : "") << endl;
1780 : }
1781 19 : break;
1782 :
1783 0 : case FTS_ERR:
1784 : case FTS_NS:
1785 0 : throw libc_exception(f->fts_errno, string("fts/file traversal ") + string(f->fts_path));
1786 :
1787 5 : default:
1788 : case FTS_SL: /* ignore symlinks; seen in non-L mode only */
1789 5 : break;
1790 : }
1791 :
1792 72 : if ((verbose && f->fts_info == FTS_DP) ||
1793 72 : (verbose > 1 && f->fts_info == FTS_F))
1794 0 : obatched(clog) << "fts/file traversing " << rps << ", scanned=" << fts_scanned
1795 0 : << ", regex-skipped=" << fts_regex
1796 0 : << ", cached=" << fts_cached << ", debuginfo=" << fts_debuginfo
1797 0 : << ", executable=" << fts_executable << ", source=" << fts_sourcefiles << endl;
1798 : }
1799 0 : catch (const reportable_exception& e)
1800 : {
1801 0 : e.report(clog);
1802 : }
1803 : }
1804 16 : fts_close (fts);
1805 :
1806 16 : gettimeofday (&tv_end, NULL);
1807 16 : double deltas = (tv_end.tv_sec - tv_start.tv_sec) + (tv_end.tv_usec - tv_start.tv_usec)*0.000001;
1808 :
1809 32 : obatched(clog) << "fts/file traversed " << dir << " in " << deltas << "s, scanned=" << fts_scanned
1810 16 : << ", regex-skipped=" << fts_regex
1811 16 : << ", cached=" << fts_cached << ", debuginfo=" << fts_debuginfo
1812 16 : << ", executable=" << fts_executable << ", source=" << fts_sourcefiles << endl;
1813 : }
1814 :
1815 :
1816 : static void*
1817 4 : thread_main_scan_source_file_path (void* arg)
1818 : {
1819 4 : string dir = string((const char*) arg);
1820 :
1821 4 : unsigned rescan_timer = 0;
1822 4 : sig_atomic_t forced_rescan_count = 0;
1823 4 : set_metric("thread_timer_max", "file", dir, rescan_s);
1824 4 : set_metric("thread_tid", "file", dir, tid());
1825 26 : while (! interrupted)
1826 : {
1827 22 : set_metric("thread_timer", "file", dir, rescan_timer);
1828 22 : set_metric("thread_forced_total", "file", dir, forced_rescan_count);
1829 22 : if (rescan_s && rescan_timer > rescan_s)
1830 0 : rescan_timer = 0;
1831 22 : if (sigusr1 != forced_rescan_count)
1832 : {
1833 12 : forced_rescan_count = sigusr1;
1834 12 : rescan_timer = 0;
1835 : }
1836 22 : if (rescan_timer == 0)
1837 : try
1838 : {
1839 16 : set_metric("thread_working", "file", dir, time(NULL));
1840 16 : inc_metric("thread_work_total", "file", dir);
1841 16 : scan_source_file_path (dir);
1842 16 : set_metric("thread_working", "file", dir, 0);
1843 : }
1844 0 : catch (const sqlite_exception& e)
1845 : {
1846 0 : obatched(cerr) << e.message << endl;
1847 : }
1848 22 : sleep (1);
1849 22 : rescan_timer ++;
1850 : }
1851 :
1852 8 : return 0;
1853 : }
1854 :
1855 :
1856 : ////////////////////////////////////////////////////////////////////////
1857 :
1858 :
1859 :
1860 :
1861 : // Analyze given *.rpm file of given age; record buildids / exec/debuginfo-ness of its
1862 : // constituent files with given upsert statements.
1863 : static void
1864 14 : rpm_classify (const string& rps, sqlite_ps& ps_upsert_buildids, sqlite_ps& ps_upsert_files,
1865 : sqlite_ps& ps_upsert_de, sqlite_ps& ps_upsert_sref, sqlite_ps& ps_upsert_sdef,
1866 : time_t mtime,
1867 : unsigned& fts_executable, unsigned& fts_debuginfo, unsigned& fts_sref, unsigned& fts_sdef,
1868 : bool& fts_sref_complete_p)
1869 : {
1870 28 : string popen_cmd = string("rpm2cpio " + shell_escape(rps));
1871 14 : FILE* fp = popen (popen_cmd.c_str(), "r"); // "e" O_CLOEXEC?
1872 14 : if (fp == NULL)
1873 0 : throw libc_exception (errno, string("popen ") + popen_cmd);
1874 28 : defer_dtor<FILE*,int> fp_closer (fp, pclose);
1875 :
1876 : struct archive *a;
1877 14 : a = archive_read_new();
1878 14 : if (a == NULL)
1879 0 : throw archive_exception("cannot create archive reader");
1880 14 : defer_dtor<struct archive*,int> archive_closer (a, archive_read_free);
1881 :
1882 14 : int rc = archive_read_support_format_cpio(a);
1883 14 : if (rc != ARCHIVE_OK)
1884 0 : throw archive_exception(a, "cannot select cpio format");
1885 14 : rc = archive_read_support_filter_all(a);
1886 14 : if (rc != ARCHIVE_OK)
1887 0 : throw archive_exception(a, "cannot select all filters");
1888 :
1889 14 : rc = archive_read_open_FILE (a, fp);
1890 14 : if (rc != ARCHIVE_OK)
1891 0 : throw archive_exception(a, "cannot open archive from rpm2cpio pipe");
1892 :
1893 14 : if (verbose > 3)
1894 0 : obatched(clog) << "rpm2cpio|libarchive scanning " << rps << endl;
1895 :
1896 : while(1) // parse cpio archive entries
1897 : {
1898 : try
1899 : {
1900 : struct archive_entry *e;
1901 88 : rc = archive_read_next_header (a, &e);
1902 88 : if (rc != ARCHIVE_OK)
1903 14 : break;
1904 :
1905 74 : if (! S_ISREG(archive_entry_mode (e))) // skip non-files completely
1906 51 : continue;
1907 :
1908 46 : string fn = archive_entry_pathname (e);
1909 23 : if (fn.size() > 1 && fn[0] == '.')
1910 17 : fn = fn.substr(1); // trim off the leading '.'
1911 :
1912 23 : if (verbose > 3)
1913 0 : obatched(clog) << "rpm2cpio|libarchive checking " << fn << endl;
1914 :
1915 : // extract this file to a temporary file
1916 23 : const char *tmpdir_env = getenv ("TMPDIR") ?: "/tmp";
1917 23 : char* tmppath = NULL;
1918 23 : rc = asprintf (&tmppath, "%s/debuginfod.XXXXXX", tmpdir_env);
1919 23 : if (rc < 0)
1920 0 : throw libc_exception (ENOMEM, "cannot allocate tmppath");
1921 46 : defer_dtor<void*,void> tmmpath_freer (tmppath, free);
1922 23 : int fd = mkstemp (tmppath);
1923 23 : if (fd < 0)
1924 0 : throw libc_exception (errno, "cannot create temporary file");
1925 23 : unlink (tmppath); // unlink now so OS will release the file as soon as we close the fd
1926 46 : defer_dtor<int,int> minifd_closer (fd, close);
1927 :
1928 23 : rc = archive_read_data_into_fd (a, fd);
1929 23 : if (rc != ARCHIVE_OK)
1930 0 : throw archive_exception(a, "cannot extract file");
1931 :
1932 : // finally ... time to run elf_classify on this bad boy and update the database
1933 23 : bool executable_p = false, debuginfo_p = false;
1934 46 : string buildid;
1935 46 : set<string> sourcefiles;
1936 23 : elf_classify (fd, executable_p, debuginfo_p, buildid, sourcefiles);
1937 : // NB: might throw
1938 :
1939 23 : if (buildid != "") // intern buildid
1940 : {
1941 : ps_upsert_buildids
1942 14 : .reset()
1943 14 : .bind(1, buildid)
1944 14 : .step_ok_done();
1945 : }
1946 :
1947 : ps_upsert_files // register this rpm constituent file name in interning table
1948 23 : .reset()
1949 23 : .bind(1, fn)
1950 23 : .step_ok_done();
1951 :
1952 23 : if (sourcefiles.size() > 0) // sref records needed
1953 : {
1954 : // NB: we intern each source file once. Once raw, as it
1955 : // appears in the DWARF file list coming back from
1956 : // elf_classify() - because it'll end up in the
1957 : // _norm.artifactsrc column. We don't also put another
1958 : // version with a '.' at the front, even though that's
1959 : // how rpm/cpio packs names, because we hide that from
1960 : // the database for storage efficiency.
1961 :
1962 18 : for (auto&& s : sourcefiles)
1963 : {
1964 12 : if (s == "")
1965 : {
1966 0 : fts_sref_complete_p = false;
1967 0 : continue;
1968 : }
1969 :
1970 : ps_upsert_files
1971 12 : .reset()
1972 12 : .bind(1, s)
1973 12 : .step_ok_done();
1974 :
1975 : ps_upsert_sref
1976 12 : .reset()
1977 12 : .bind(1, buildid)
1978 12 : .bind(2, s)
1979 12 : .step_ok_done();
1980 :
1981 12 : fts_sref ++;
1982 : }
1983 : }
1984 :
1985 23 : if (executable_p)
1986 6 : fts_executable ++;
1987 23 : if (debuginfo_p)
1988 8 : fts_debuginfo ++;
1989 :
1990 23 : if (executable_p || debuginfo_p)
1991 : {
1992 : ps_upsert_de
1993 14 : .reset()
1994 14 : .bind(1, buildid)
1995 14 : .bind(2, debuginfo_p ? 1 : 0)
1996 14 : .bind(3, executable_p ? 1 : 0)
1997 14 : .bind(4, rps)
1998 14 : .bind(5, mtime)
1999 14 : .bind(6, fn)
2000 14 : .step_ok_done();
2001 : }
2002 : else // potential source - sdef record
2003 : {
2004 9 : fts_sdef ++;
2005 : ps_upsert_sdef
2006 9 : .reset()
2007 9 : .bind(1, rps)
2008 9 : .bind(2, mtime)
2009 9 : .bind(3, fn)
2010 9 : .step_ok_done();
2011 : }
2012 :
2013 23 : if ((verbose > 2) && (executable_p || debuginfo_p))
2014 0 : obatched(clog) << "recorded buildid=" << buildid << " rpm=" << rps << " file=" << fn
2015 0 : << " mtime=" << mtime << " atype="
2016 : << (executable_p ? "E" : "")
2017 : << (debuginfo_p ? "D" : "")
2018 0 : << " sourcefiles=" << sourcefiles.size() << endl;
2019 :
2020 : }
2021 0 : catch (const reportable_exception& e)
2022 : {
2023 0 : e.report(clog);
2024 : }
2025 74 : }
2026 14 : }
2027 :
2028 :
2029 :
2030 : // scan for *.rpm files
2031 : static void
2032 15 : scan_source_rpm_path (const string& dir)
2033 : {
2034 15 : obatched(clog) << "fts/rpm traversing " << dir << endl;
2035 :
2036 30 : sqlite_ps ps_upsert_buildids (db, "rpm-buildid-intern", "insert or ignore into " BUILDIDS "_buildids VALUES (NULL, ?);");
2037 30 : sqlite_ps ps_upsert_files (db, "rpm-file-intern", "insert or ignore into " BUILDIDS "_files VALUES (NULL, ?);");
2038 : sqlite_ps ps_upsert_de (db, "rpm-de-insert",
2039 : "insert or ignore into " BUILDIDS "_r_de (buildid, debuginfo_p, executable_p, file, mtime, content) values ("
2040 : "(select id from " BUILDIDS "_buildids where hex = ?), ?, ?, "
2041 : "(select id from " BUILDIDS "_files where name = ?), ?, "
2042 30 : "(select id from " BUILDIDS "_files where name = ?));");
2043 : sqlite_ps ps_upsert_sref (db, "rpm-sref-insert",
2044 : "insert or ignore into " BUILDIDS "_r_sref (buildid, artifactsrc) values ("
2045 : "(select id from " BUILDIDS "_buildids where hex = ?), "
2046 30 : "(select id from " BUILDIDS "_files where name = ?));");
2047 : sqlite_ps ps_upsert_sdef (db, "rpm-sdef-insert",
2048 : "insert or ignore into " BUILDIDS "_r_sdef (file, mtime, content) values ("
2049 : "(select id from " BUILDIDS "_files where name = ?), ?,"
2050 30 : "(select id from " BUILDIDS "_files where name = ?));");
2051 : sqlite_ps ps_query (db, "rpm-negativehit-query",
2052 : "select 1 from " BUILDIDS "_file_mtime_scanned where "
2053 30 : "sourcetype = 'R' and file = (select id from " BUILDIDS "_files where name = ?) and mtime = ?;");
2054 : sqlite_ps ps_scan_done (db, "rpm-scanned",
2055 : "insert or ignore into " BUILDIDS "_file_mtime_scanned (sourcetype, file, mtime, size)"
2056 30 : "values ('R', (select id from " BUILDIDS "_files where name = ?), ?, ?);");
2057 :
2058 15 : char * const dirs[] = { (char*) dir.c_str(), NULL };
2059 :
2060 : struct timeval tv_start, tv_end;
2061 15 : gettimeofday (&tv_start, NULL);
2062 15 : unsigned fts_scanned=0, fts_regex=0, fts_cached=0, fts_debuginfo=0;
2063 15 : unsigned fts_executable=0, fts_rpm = 0, fts_sref=0, fts_sdef=0;
2064 :
2065 15 : FTS *fts = fts_open (dirs,
2066 : (traverse_logical ? FTS_LOGICAL : FTS_PHYSICAL|FTS_XDEV)
2067 : | FTS_NOCHDIR /* multithreaded */,
2068 : NULL);
2069 15 : if (fts == NULL)
2070 : {
2071 0 : obatched(cerr) << "cannot fts_open " << dir << endl;
2072 0 : return;
2073 : }
2074 :
2075 : FTSENT *f;
2076 107 : while ((f = fts_read (fts)) != NULL)
2077 : {
2078 92 : semaphore_borrower handle_one_file (scan_concurrency_sem);
2079 :
2080 92 : fts_scanned ++;
2081 92 : if (interrupted)
2082 0 : break;
2083 :
2084 92 : if (verbose > 2)
2085 0 : obatched(clog) << "fts/rpm traversing " << f->fts_path << endl;
2086 :
2087 : try
2088 : {
2089 : /* Found a file. Convert it to an absolute path, so
2090 : the buildid database does not have relative path
2091 : names that are unresolvable from a subsequent run
2092 : in a different cwd. */
2093 92 : char *rp = realpath(f->fts_path, NULL);
2094 92 : if (rp == NULL)
2095 27 : continue; // ignore dangling symlink or such
2096 92 : string rps = string(rp);
2097 92 : free (rp);
2098 :
2099 92 : bool ri = !regexec (&file_include_regex, rps.c_str(), 0, 0, 0);
2100 92 : bool rx = !regexec (&file_exclude_regex, rps.c_str(), 0, 0, 0);
2101 92 : if (!ri || rx)
2102 : {
2103 0 : if (verbose > 3)
2104 0 : obatched(clog) << "fts/rpm skipped by regex " << (!ri ? "I" : "") << (rx ? "X" : "") << endl;
2105 0 : fts_regex ++;
2106 0 : continue;
2107 : }
2108 :
2109 92 : switch (f->fts_info)
2110 : {
2111 23 : case FTS_D:
2112 23 : break;
2113 :
2114 23 : case FTS_DP:
2115 23 : break;
2116 :
2117 41 : case FTS_F:
2118 : {
2119 : // heuristic: reject if file name does not end with ".rpm"
2120 : // (alternative: try opening with librpm etc., caching)
2121 41 : string suffix = ".rpm";
2122 82 : if (rps.size() < suffix.size() ||
2123 82 : rps.substr(rps.size()-suffix.size()) != suffix)
2124 13 : continue;
2125 28 : fts_rpm ++;
2126 :
2127 : /* See if we know of it already. */
2128 : int rc = ps_query
2129 28 : .reset()
2130 28 : .bind(1, rps)
2131 28 : .bind(2, f->fts_statp->st_mtime)
2132 28 : .step();
2133 28 : ps_query.reset();
2134 28 : if (rc == SQLITE_ROW) // i.e., a result, as opposed to DONE (no results)
2135 : // no need to recheck a file/version we already know
2136 : // specifically, no need to parse this rpm again, since we already have
2137 : // it as a D or E or S record,
2138 : // (so is stored with buildid=NULL)
2139 : {
2140 14 : fts_cached ++;
2141 14 : continue;
2142 : }
2143 :
2144 : // intern the rpm file name
2145 : ps_upsert_files
2146 14 : .reset()
2147 14 : .bind(1, rps)
2148 14 : .step_ok_done();
2149 :
2150 : // extract the rpm contents via popen("rpm2cpio") | libarchive | loop-of-elf_classify()
2151 14 : unsigned my_fts_executable = 0, my_fts_debuginfo = 0, my_fts_sref = 0, my_fts_sdef = 0;
2152 14 : bool my_fts_sref_complete_p = true;
2153 : try
2154 : {
2155 14 : rpm_classify (rps,
2156 : ps_upsert_buildids, ps_upsert_files,
2157 : ps_upsert_de, ps_upsert_sref, ps_upsert_sdef, // dalt
2158 14 : f->fts_statp->st_mtime,
2159 : my_fts_executable, my_fts_debuginfo, my_fts_sref, my_fts_sdef,
2160 : my_fts_sref_complete_p);
2161 14 : inc_metric ("scanned_total","source","rpm");
2162 14 : add_metric("found_debuginfo_total","source","rpm",
2163 : my_fts_debuginfo);
2164 14 : add_metric("found_executable_total","source","rpm",
2165 : my_fts_executable);
2166 14 : add_metric("found_sourcerefs_total","source","rpm",
2167 : my_fts_sref);
2168 : }
2169 0 : catch (const reportable_exception& e)
2170 : {
2171 0 : e.report(clog);
2172 : }
2173 :
2174 14 : if (verbose > 2)
2175 0 : obatched(clog) << "scanned rpm=" << rps
2176 0 : << " mtime=" << f->fts_statp->st_mtime
2177 0 : << " executables=" << my_fts_executable
2178 0 : << " debuginfos=" << my_fts_debuginfo
2179 0 : << " srefs=" << my_fts_sref
2180 0 : << " sdefs=" << my_fts_sdef
2181 0 : << endl;
2182 :
2183 14 : fts_executable += my_fts_executable;
2184 14 : fts_debuginfo += my_fts_debuginfo;
2185 14 : fts_sref += my_fts_sref;
2186 14 : fts_sdef += my_fts_sdef;
2187 :
2188 14 : if (my_fts_sref_complete_p) // leave incomplete?
2189 : ps_scan_done
2190 14 : .reset()
2191 14 : .bind(1, rps)
2192 14 : .bind(2, f->fts_statp->st_mtime)
2193 14 : .bind(3, f->fts_statp->st_size)
2194 55 : .step_ok_done();
2195 : }
2196 14 : break;
2197 :
2198 0 : case FTS_ERR:
2199 : case FTS_NS:
2200 0 : throw libc_exception(f->fts_errno, string("fts/rpm traversal ") + string(f->fts_path));
2201 :
2202 5 : default:
2203 : case FTS_SL: /* ignore symlinks; seen in non-L mode only */
2204 5 : break;
2205 : }
2206 :
2207 65 : if ((verbose && f->fts_info == FTS_DP) ||
2208 65 : (verbose > 1 && f->fts_info == FTS_F))
2209 0 : obatched(clog) << "fts/rpm traversing " << rps << ", scanned=" << fts_scanned
2210 0 : << ", regex-skipped=" << fts_regex
2211 0 : << ", rpm=" << fts_rpm << ", cached=" << fts_cached << ", debuginfo=" << fts_debuginfo
2212 0 : << ", executable=" << fts_executable
2213 0 : << ", sourcerefs=" << fts_sref << ", sourcedefs=" << fts_sdef << endl;
2214 : }
2215 0 : catch (const reportable_exception& e)
2216 : {
2217 0 : e.report(clog);
2218 : }
2219 : }
2220 15 : fts_close (fts);
2221 :
2222 15 : gettimeofday (&tv_end, NULL);
2223 15 : double deltas = (tv_end.tv_sec - tv_start.tv_sec) + (tv_end.tv_usec - tv_start.tv_usec)*0.000001;
2224 :
2225 30 : obatched(clog) << "fts/rpm traversed " << dir << " in " << deltas << "s, scanned=" << fts_scanned
2226 15 : << ", regex-skipped=" << fts_regex
2227 15 : << ", rpm=" << fts_rpm << ", cached=" << fts_cached << ", debuginfo=" << fts_debuginfo
2228 15 : << ", executable=" << fts_executable
2229 15 : << ", sourcerefs=" << fts_sref << ", sourcedefs=" << fts_sdef << endl;
2230 : }
2231 :
2232 :
2233 :
2234 : static void*
2235 3 : thread_main_scan_source_rpm_path (void* arg)
2236 : {
2237 3 : string dir = string((const char*) arg);
2238 :
2239 3 : unsigned rescan_timer = 0;
2240 3 : sig_atomic_t forced_rescan_count = 0;
2241 3 : set_metric("thread_timer_max", "rpm", dir, rescan_s);
2242 3 : set_metric("thread_tid", "rpm", dir, tid());
2243 24 : while (! interrupted)
2244 : {
2245 21 : set_metric("thread_timer", "rpm", dir, rescan_timer);
2246 21 : set_metric("thread_forced_total", "rpm", dir, forced_rescan_count);
2247 21 : if (rescan_s && rescan_timer > rescan_s)
2248 0 : rescan_timer = 0;
2249 21 : if (sigusr1 != forced_rescan_count)
2250 : {
2251 12 : forced_rescan_count = sigusr1;
2252 12 : rescan_timer = 0;
2253 : }
2254 21 : if (rescan_timer == 0)
2255 : try
2256 : {
2257 15 : set_metric("thread_working", "rpm", dir, time(NULL));
2258 15 : inc_metric("thread_work_total", "rpm", dir);
2259 15 : scan_source_rpm_path (dir);
2260 15 : set_metric("thread_working", "rpm", dir, 0);
2261 : }
2262 0 : catch (const sqlite_exception& e)
2263 : {
2264 0 : obatched(cerr) << e.message << endl;
2265 : }
2266 21 : sleep (1);
2267 21 : rescan_timer ++;
2268 : }
2269 :
2270 6 : return 0;
2271 : }
2272 :
2273 :
2274 : ////////////////////////////////////////////////////////////////////////
2275 :
2276 : static void
2277 3 : database_stats_report()
2278 : {
2279 : sqlite_ps ps_query (db, "database-overview",
2280 9 : "select label,quantity from " BUILDIDS "_stats");
2281 :
2282 3 : obatched(clog) << "database record counts:" << endl;
2283 : while (1)
2284 : {
2285 33 : int rc = sqlite3_step (ps_query);
2286 33 : if (rc == SQLITE_DONE) break;
2287 30 : if (rc != SQLITE_ROW)
2288 0 : throw sqlite_exception(rc, "step");
2289 :
2290 60 : obatched(clog)
2291 60 : << right << setw(20) << ((const char*) sqlite3_column_text(ps_query, 0) ?: (const char*) "NULL")
2292 : << " "
2293 60 : << (sqlite3_column_text(ps_query, 1) ?: (const unsigned char*) "NULL")
2294 30 : << endl;
2295 :
2296 30 : set_metric("groom", "statistic",
2297 30 : ((const char*) sqlite3_column_text(ps_query, 0) ?: (const char*) "NULL"),
2298 30 : (sqlite3_column_double(ps_query, 1)));
2299 30 : }
2300 3 : }
2301 :
2302 :
2303 : // Do a round of database grooming that might take many minutes to run.
2304 3 : void groom()
2305 : {
2306 3 : obatched(clog) << "grooming database" << endl;
2307 :
2308 : struct timeval tv_start, tv_end;
2309 3 : gettimeofday (&tv_start, NULL);
2310 :
2311 : // scan for files that have disappeared
2312 : sqlite_ps files (db, "check old files", "select s.mtime, s.file, f.name from "
2313 : BUILDIDS "_file_mtime_scanned s, " BUILDIDS "_files f "
2314 9 : "where f.id = s.file");
2315 9 : sqlite_ps files_del_f_de (db, "nuke f_de", "delete from " BUILDIDS "_f_de where file = ? and mtime = ?");
2316 9 : sqlite_ps files_del_r_de (db, "nuke r_de", "delete from " BUILDIDS "_r_de where file = ? and mtime = ?");
2317 : sqlite_ps files_del_scan (db, "nuke f_m_s", "delete from " BUILDIDS "_file_mtime_scanned "
2318 9 : "where file = ? and mtime = ?");
2319 3 : files.reset();
2320 : while(1)
2321 : {
2322 31 : int rc = files.step();
2323 31 : if (rc != SQLITE_ROW)
2324 3 : break;
2325 :
2326 28 : int64_t mtime = sqlite3_column_int64 (files, 0);
2327 28 : int64_t fileid = sqlite3_column_int64 (files, 1);
2328 28 : const char* filename = ((const char*) sqlite3_column_text (files, 2) ?: "");
2329 : struct stat s;
2330 28 : rc = stat(filename, &s);
2331 28 : if (rc < 0 || (mtime != (int64_t) s.st_mtime))
2332 : {
2333 4 : if (verbose > 2)
2334 0 : obatched(clog) << "groom: forgetting file=" << filename << " mtime=" << mtime << endl;
2335 4 : files_del_f_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
2336 4 : files_del_r_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
2337 4 : files_del_scan.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
2338 : }
2339 28 : }
2340 3 : files.reset();
2341 :
2342 : // delete buildids with no references in _r_de or _f_de tables;
2343 : // cascades to _r_sref & _f_s records
2344 : sqlite_ps buildids_del (db, "nuke orphan buildids",
2345 : "delete from " BUILDIDS "_buildids "
2346 : "where not exists (select 1 from " BUILDIDS "_f_de d where " BUILDIDS "_buildids.id = d.buildid) "
2347 9 : "and not exists (select 1 from " BUILDIDS "_r_de d where " BUILDIDS "_buildids.id = d.buildid)");
2348 3 : buildids_del.reset().step_ok_done();
2349 :
2350 : // NB: "vacuum" is too heavy for even daily runs: it rewrites the entire db, so is done as maxigroom -G
2351 9 : sqlite_ps g1 (db, "incremental vacuum", "pragma incremental_vacuum");
2352 3 : g1.reset().step_ok_done();
2353 9 : sqlite_ps g2 (db, "optimize", "pragma optimize");
2354 3 : g2.reset().step_ok_done();
2355 6 : sqlite_ps g3 (db, "wal checkpoint", "pragma wal_checkpoint=truncate");
2356 3 : g3.reset().step_ok_done();
2357 :
2358 3 : database_stats_report();
2359 :
2360 3 : sqlite3_db_release_memory(db); // shrink the process if possible
2361 :
2362 3 : gettimeofday (&tv_end, NULL);
2363 3 : double deltas = (tv_end.tv_sec - tv_start.tv_sec) + (tv_end.tv_usec - tv_start.tv_usec)*0.000001;
2364 :
2365 3 : obatched(clog) << "groomed database in " << deltas << "s" << endl;
2366 3 : }
2367 :
2368 :
2369 : static void*
2370 2 : thread_main_groom (void* /*arg*/)
2371 : {
2372 2 : unsigned groom_timer = 0;
2373 2 : sig_atomic_t forced_groom_count = 0;
2374 2 : set_metric("thread_timer_max", "role", "groom", groom_s);
2375 2 : set_metric("thread_tid", "role", "groom", tid());
2376 10 : while (! interrupted)
2377 : {
2378 8 : set_metric("thread_timer", "role", "groom", groom_timer);
2379 8 : set_metric("thread_forced_total", "role", "groom", forced_groom_count);
2380 8 : if (groom_s && groom_timer > groom_s)
2381 0 : groom_timer = 0;
2382 8 : if (sigusr2 != forced_groom_count)
2383 : {
2384 1 : forced_groom_count = sigusr2;
2385 1 : groom_timer = 0;
2386 : }
2387 8 : if (groom_timer == 0)
2388 : try
2389 : {
2390 3 : set_metric("thread_working", "role", "groom", time(NULL));
2391 3 : inc_metric("thread_work_total", "role", "groom");
2392 3 : groom ();
2393 3 : set_metric("thread_working", "role", "groom", 0);
2394 : }
2395 0 : catch (const sqlite_exception& e)
2396 : {
2397 0 : obatched(cerr) << e.message << endl;
2398 : }
2399 8 : sleep (1);
2400 8 : groom_timer ++;
2401 : }
2402 :
2403 2 : return 0;
2404 : }
2405 :
2406 :
2407 : ////////////////////////////////////////////////////////////////////////
2408 :
2409 :
2410 : static void
2411 2 : signal_handler (int /* sig */)
2412 : {
2413 2 : interrupted ++;
2414 :
2415 2 : if (db)
2416 2 : sqlite3_interrupt (db);
2417 :
2418 : // NB: don't do anything else in here
2419 2 : }
2420 :
2421 : static void
2422 4 : sigusr1_handler (int /* sig */)
2423 : {
2424 4 : sigusr1 ++;
2425 : // NB: don't do anything else in here
2426 4 : }
2427 :
2428 : static void
2429 1 : sigusr2_handler (int /* sig */)
2430 : {
2431 1 : sigusr2 ++;
2432 : // NB: don't do anything else in here
2433 1 : }
2434 :
2435 :
2436 :
2437 :
2438 :
2439 : // A user-defined sqlite function, to score the sharedness of the
2440 : // prefix of two strings. This is used to compare candidate debuginfo
2441 : // / source-rpm names, so that the closest match
2442 : // (directory-topology-wise closest) is found. This is important in
2443 : // case the same sref (source file name) is in many -debuginfo or
2444 : // -debugsource RPMs, such as when multiple versions/releases of the
2445 : // same package are in the database.
2446 :
2447 22 : static void sqlite3_sharedprefix_fn (sqlite3_context* c, int argc, sqlite3_value** argv)
2448 : {
2449 22 : if (argc != 2)
2450 0 : sqlite3_result_error(c, "expect 2 string arguments", -1);
2451 44 : else if ((sqlite3_value_type(argv[0]) != SQLITE_TEXT) ||
2452 22 : (sqlite3_value_type(argv[1]) != SQLITE_TEXT))
2453 2 : sqlite3_result_null(c);
2454 : else
2455 : {
2456 20 : const unsigned char* a = sqlite3_value_text (argv[0]);
2457 20 : const unsigned char* b = sqlite3_value_text (argv[1]);
2458 20 : int i = 0;
2459 1614 : while (*a++ == *b++)
2460 1594 : i++;
2461 20 : sqlite3_result_int (c, i);
2462 : }
2463 22 : }
2464 :
2465 :
2466 : int
2467 2 : main (int argc, char *argv[])
2468 : {
2469 2 : (void) setlocale (LC_ALL, "");
2470 2 : (void) bindtextdomain (PACKAGE_TARNAME, LOCALEDIR);
2471 2 : (void) textdomain (PACKAGE_TARNAME);
2472 :
2473 : /* Tell the library which version we are expecting. */
2474 2 : elf_version (EV_CURRENT);
2475 :
2476 : /* Set computed default values. */
2477 2 : db_path = string(getenv("HOME") ?: "/") + string("/.debuginfod.sqlite"); /* XDG? */
2478 2 : int rc = regcomp (& file_include_regex, ".*", REG_EXTENDED|REG_NOSUB); // match everything
2479 2 : if (rc != 0)
2480 : error (EXIT_FAILURE, 0, "regcomp failure: %d", rc);
2481 2 : rc = regcomp (& file_exclude_regex, "^$", REG_EXTENDED|REG_NOSUB); // match nothing
2482 2 : if (rc != 0)
2483 : error (EXIT_FAILURE, 0, "regcomp failure: %d", rc);
2484 :
2485 : /* Parse and process arguments. */
2486 : int remaining;
2487 2 : argp_program_version_hook = print_version; // this works
2488 2 : (void) argp_parse (&argp, argc, argv, ARGP_IN_ORDER, &remaining, NULL);
2489 2 : if (remaining != argc)
2490 0 : error (EXIT_FAILURE, 0,
2491 0 : "unexpected argument: %s", argv[remaining]);
2492 :
2493 2 : if (!scan_rpms && !scan_files && source_paths.size()>0)
2494 0 : obatched(clog) << "warning: without -F and/or -R, ignoring PATHs" << endl;
2495 :
2496 2 : (void) signal (SIGPIPE, SIG_IGN); // microhttpd can generate it incidentally, ignore
2497 2 : (void) signal (SIGINT, signal_handler); // ^C
2498 2 : (void) signal (SIGHUP, signal_handler); // EOF
2499 2 : (void) signal (SIGTERM, signal_handler); // systemd
2500 2 : (void) signal (SIGUSR1, sigusr1_handler); // end-user
2501 2 : (void) signal (SIGUSR2, sigusr2_handler); // end-user
2502 :
2503 : // do this before any threads start
2504 2 : scan_concurrency_sem = new semaphore(concurrency);
2505 :
2506 : /* Get database ready. */
2507 2 : rc = sqlite3_open_v2 (db_path.c_str(), &db, (SQLITE_OPEN_READWRITE
2508 : |SQLITE_OPEN_CREATE
2509 : |SQLITE_OPEN_FULLMUTEX), /* thread-safe */
2510 : NULL);
2511 2 : if (rc == SQLITE_CORRUPT)
2512 : {
2513 0 : (void) unlink (db_path.c_str());
2514 0 : error (EXIT_FAILURE, 0,
2515 : "cannot open %s, deleted database: %s", db_path.c_str(), sqlite3_errmsg(db));
2516 : }
2517 2 : else if (rc)
2518 : {
2519 0 : error (EXIT_FAILURE, 0,
2520 : "cannot open %s, consider deleting database: %s", db_path.c_str(), sqlite3_errmsg(db));
2521 : }
2522 :
2523 2 : obatched(clog) << "opened database " << db_path << endl;
2524 2 : obatched(clog) << "sqlite version " << sqlite3_version << endl;
2525 :
2526 : // add special string-prefix-similarity function used in rpm sref/sdef resolution
2527 2 : rc = sqlite3_create_function(db, "sharedprefix", 2, SQLITE_UTF8, NULL,
2528 : & sqlite3_sharedprefix_fn, NULL, NULL);
2529 2 : if (rc != SQLITE_OK)
2530 0 : error (EXIT_FAILURE, 0,
2531 : "cannot create sharedprefix( function: %s", sqlite3_errmsg(db));
2532 :
2533 2 : if (verbose > 3)
2534 0 : obatched(clog) << "ddl: " << DEBUGINFOD_SQLITE_DDL << endl;
2535 2 : rc = sqlite3_exec (db, DEBUGINFOD_SQLITE_DDL, NULL, NULL, NULL);
2536 2 : if (rc != SQLITE_OK)
2537 : {
2538 0 : error (EXIT_FAILURE, 0,
2539 : "cannot run database schema ddl: %s", sqlite3_errmsg(db));
2540 : }
2541 :
2542 : // Start httpd server threads. Separate pool for IPv4 and IPv6, in
2543 : // case the host only has one protocol stack.
2544 2 : MHD_Daemon *d4 = MHD_start_daemon (MHD_USE_THREAD_PER_CONNECTION
2545 : #if MHD_VERSION >= 0x00095300
2546 : | MHD_USE_INTERNAL_POLLING_THREAD
2547 : #else
2548 : | MHD_USE_SELECT_INTERNALLY
2549 : #endif
2550 : | MHD_USE_DEBUG, /* report errors to stderr */
2551 : http_port,
2552 : NULL, NULL, /* default accept policy */
2553 : handler_cb, NULL, /* handler callback */
2554 : MHD_OPTION_END);
2555 2 : MHD_Daemon *d6 = MHD_start_daemon (MHD_USE_THREAD_PER_CONNECTION
2556 : #if MHD_VERSION >= 0x00095300
2557 : | MHD_USE_INTERNAL_POLLING_THREAD
2558 : #else
2559 : | MHD_USE_SELECT_INTERNALLY
2560 : #endif
2561 : | MHD_USE_IPv6
2562 : | MHD_USE_DEBUG, /* report errors to stderr */
2563 : http_port,
2564 : NULL, NULL, /* default accept policy */
2565 : handler_cb, NULL, /* handler callback */
2566 : MHD_OPTION_END);
2567 :
2568 2 : if (d4 == NULL && d6 == NULL) // neither ipv4 nor ipv6? boo
2569 : {
2570 0 : sqlite3 *database = db;
2571 0 : db = 0; // for signal_handler not to freak
2572 0 : sqlite3_close (database);
2573 0 : error (EXIT_FAILURE, 0, "cannot start http server at port %d", http_port);
2574 : }
2575 :
2576 4 : obatched(clog) << "started http server on "
2577 : << (d4 != NULL ? "IPv4 " : "")
2578 : << (d6 != NULL ? "IPv6 " : "")
2579 2 : << "port=" << http_port << endl;
2580 :
2581 : // add maxigroom sql if -G given
2582 2 : if (maxigroom)
2583 : {
2584 0 : obatched(clog) << "maxigrooming database, please wait." << endl;
2585 0 : extra_ddl.push_back("create index if not exists " BUILDIDS "_r_sref_arc on " BUILDIDS "_r_sref(artifactsrc);");
2586 0 : extra_ddl.push_back("delete from " BUILDIDS "_r_sdef where not exists (select 1 from " BUILDIDS "_r_sref b where " BUILDIDS "_r_sdef.content = b.artifactsrc);");
2587 0 : extra_ddl.push_back("drop index if exists " BUILDIDS "_r_sref_arc;");
2588 :
2589 : // NB: we don't maxigroom the _files interning table. It'd require a temp index on all the
2590 : // tables that have file foreign-keys, which is a lot.
2591 :
2592 : // NB: with =delete, may take up 3x disk space total during vacuum process
2593 : // vs. =off (only 2x but may corrupt database if program dies mid-vacuum)
2594 : // vs. =wal (>3x observed, but safe)
2595 0 : extra_ddl.push_back("pragma journal_mode=delete;");
2596 0 : extra_ddl.push_back("vacuum;");
2597 0 : extra_ddl.push_back("pragma journal_mode=wal;");
2598 : }
2599 :
2600 : // run extra -D sql if given
2601 2 : for (auto&& i: extra_ddl)
2602 : {
2603 0 : if (verbose > 1)
2604 0 : obatched(clog) << "extra ddl:\n" << i << endl;
2605 0 : rc = sqlite3_exec (db, i.c_str(), NULL, NULL, NULL);
2606 0 : if (rc != SQLITE_OK && rc != SQLITE_DONE && rc != SQLITE_ROW)
2607 0 : error (0, 0,
2608 : "warning: cannot run database extra ddl %s: %s", i.c_str(), sqlite3_errmsg(db));
2609 : }
2610 :
2611 2 : if (maxigroom)
2612 0 : obatched(clog) << "maxigroomed database" << endl;
2613 :
2614 :
2615 2 : obatched(clog) << "search concurrency " << concurrency << endl;
2616 2 : obatched(clog) << "rescan time " << rescan_s << endl;
2617 2 : obatched(clog) << "groom time " << groom_s << endl;
2618 2 : const char* du = getenv(DEBUGINFOD_URLS_ENV_VAR);
2619 2 : if (du && du[0] != '\0') // set to non-empty string?
2620 1 : obatched(clog) << "upstream debuginfod servers: " << du << endl;
2621 :
2622 4 : vector<pthread_t> source_file_scanner_threads;
2623 2 : vector<pthread_t> source_rpm_scanner_threads;
2624 : pthread_t groom_thread;
2625 :
2626 2 : rc = pthread_create (& groom_thread, NULL, thread_main_groom, NULL);
2627 2 : if (rc < 0)
2628 : error (0, 0, "warning: cannot spawn thread (%d) to groom database\n", rc);
2629 :
2630 6 : if (scan_files) for (auto&& it : source_paths)
2631 : {
2632 : pthread_t pt;
2633 4 : rc = pthread_create (& pt, NULL, thread_main_scan_source_file_path, (void*) it.c_str());
2634 4 : if (rc < 0)
2635 0 : error (0, 0, "warning: cannot spawn thread (%d) to scan source files %s\n", rc, it.c_str());
2636 : else
2637 4 : source_file_scanner_threads.push_back(pt);
2638 : }
2639 :
2640 5 : if (scan_rpms) for (auto&& it : source_paths)
2641 : {
2642 : pthread_t pt;
2643 3 : rc = pthread_create (& pt, NULL, thread_main_scan_source_rpm_path, (void*) it.c_str());
2644 3 : if (rc < 0)
2645 0 : error (0, 0, "warning: cannot spawn thread (%d) to scan source rpms %s\n", rc, it.c_str());
2646 : else
2647 3 : source_rpm_scanner_threads.push_back(pt);
2648 : }
2649 :
2650 : /* Trivial main loop! */
2651 2 : set_metric("ready", 1);
2652 9 : while (! interrupted)
2653 7 : pause ();
2654 2 : set_metric("ready", 0);
2655 :
2656 2 : if (verbose)
2657 0 : obatched(clog) << "stopping" << endl;
2658 :
2659 : /* Join any source scanning threads. */
2660 6 : for (auto&& it : source_file_scanner_threads)
2661 4 : pthread_join (it, NULL);
2662 5 : for (auto&& it : source_rpm_scanner_threads)
2663 3 : pthread_join (it, NULL);
2664 2 : pthread_join (groom_thread, NULL);
2665 :
2666 : /* Stop all the web service threads. */
2667 2 : if (d4) MHD_stop_daemon (d4);
2668 2 : if (d6) MHD_stop_daemon (d6);
2669 :
2670 : /* With all threads known dead, we can clean up the global resources. */
2671 2 : delete scan_concurrency_sem;
2672 2 : rc = sqlite3_exec (db, DEBUGINFOD_SQLITE_CLEANUP_DDL, NULL, NULL, NULL);
2673 2 : if (rc != SQLITE_OK)
2674 : {
2675 0 : error (0, 0,
2676 : "warning: cannot run database cleanup ddl: %s", sqlite3_errmsg(db));
2677 : }
2678 :
2679 : // NB: no problem with unconditional free here - an earlier failed regcomp would exit program
2680 2 : (void) regfree (& file_include_regex);
2681 2 : (void) regfree (& file_exclude_regex);
2682 :
2683 2 : sqlite3 *database = db;
2684 2 : db = 0; // for signal_handler not to freak
2685 2 : (void) sqlite3_close (database);
2686 :
2687 2 : return 0;
2688 : }
|