]> sourceware.org Git - systemtap.git/blame - util.cxx
Fix typo in languange reference
[systemtap.git] / util.cxx
CommitLineData
1b78aef5 1// Copyright (C) Andrew Tridgell 2002 (original file)
73fcca6f 2// Copyright (C) 2006-2018 Red Hat Inc. (systemtap changes)
1b78aef5
DS
3//
4// This program is free software; you can redistribute it and/or
5// modify it under the terms of the GNU General Public License as
6// published by the Free Software Foundation; either version 2 of the
7// License, or (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful, but
10// WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the GNU
12// General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
e8daaf60 15// along with this program. If not, see <http://www.gnu.org/licenses/>.
1b78aef5
DS
16
17#include "util.h"
0f5d597d 18#include "stap-probe.h"
1b78aef5
DS
19#include <stdexcept>
20#include <cerrno>
c0d1b5a0 21#include <map>
01cc94dc 22#include <set>
c0d1b5a0 23#include <string>
85007c04 24#include <fstream>
3b6f3bbb 25#include <cassert>
77455f97 26#include <ext/stdio_filebuf.h>
38bf68a8 27#include <algorithm>
e6eea51b 28#include <mutex>
1f4b9e55
DS
29#include <functional>
30#include <cctype>
31#include <locale>
757d4f65 32#include <memory>
1b78aef5
DS
33
34extern "C" {
081b45d1 35#include <elf.h>
4cc40e82 36#include <fcntl.h>
baba4e15 37#include <grp.h>
1b78aef5 38#include <pwd.h>
4cc40e82 39#include <spawn.h>
8bf4a144 40#include <stdio.h>
1b78aef5 41#include <stdlib.h>
4cc40e82
JS
42#include <sys/stat.h>
43#include <sys/types.h>
44#include <sys/wait.h>
45#include <unistd.h>
c0d1b5a0 46#include <regex.h>
7a1513b1 47#include <stdarg.h>
42be330a 48#include <libgen.h>
be7e131b 49#include <pwd.h>
7a23ba3a
AM
50
51#ifdef HAVE_LIBDEBUGINFOD
52#include <elfutils/debuginfod.h>
53#endif
1b78aef5
DS
54}
55
7a23ba3a
AM
56
57
1b78aef5 58using namespace std;
77455f97 59using namespace __gnu_cxx;
1b78aef5
DS
60
61
62// Return current users home directory or die.
63const char *
64get_home_directory(void)
65{
66 const char *p = getenv("HOME");
67 if (p)
68 return p;
69
70 struct passwd *pwd = getpwuid(getuid());
71 if (pwd)
72 return pwd->pw_dir;
73
35f29567
CM
74 cerr << _("Unable to determine home directory") << endl;
75 return "/";
1b78aef5
DS
76}
77
78
b12c8986
DB
79// Get the size of a file in bytes
80size_t
81get_file_size(const string &path)
82{
83 struct stat file_info;
84
85 if (stat(path.c_str(), &file_info) == 0)
86 return file_info.st_size;
87 else
88 return 0;
89}
90
91// Get the size of a file in bytes
a5751672
JS
92size_t
93get_file_size(int fd)
94{
95 struct stat file_info;
96
97 if (fstat(fd, &file_info) == 0)
98 return file_info.st_size;
99 else
100 return 0;
101}
102
103// Check that a file is present
b12c8986
DB
104bool
105file_exists (const string &path)
106{
107 struct stat file_info;
108
109 if (stat(path.c_str(), &file_info) == 0)
110 return true;
111
112 return false;
113}
114
f3fdcc93
HT
115// Check that a dir is present
116bool
117dir_exists(const string &path)
118{
119 struct stat info;
120
121 if (stat(path.c_str(), &info) == 0 &&
122 S_ISDIR(info.st_mode))
123 return true;
124
125 return false;
126}
127
1b78aef5
DS
128// Copy a file. The copy is done via a temporary file and atomic
129// rename.
e16dc041
JS
130bool
131copy_file(const string& src, const string& dest, bool verbose)
1b78aef5
DS
132{
133 int fd1, fd2;
134 char buf[10240];
135 int n;
136 string tmp;
137 char *tmp_name;
138 mode_t mask;
139
e16dc041 140 if (verbose)
b530b5b3 141 clog << _F("Copying %s to %s", src.c_str(), dest.c_str()) << endl;
e16dc041 142
1b78aef5 143 // Open the src file.
e16dc041 144 fd1 = open(src.c_str(), O_RDONLY);
1b78aef5 145 if (fd1 == -1)
e16dc041 146 goto error;
1b78aef5
DS
147
148 // Open the temporary output file.
149 tmp = dest + string(".XXXXXX");
150 tmp_name = (char *)tmp.c_str();
151 fd2 = mkstemp(tmp_name);
152 if (fd2 == -1)
153 {
154 close(fd1);
e16dc041 155 goto error;
1b78aef5
DS
156 }
157
158 // Copy the src file to the temporary output file.
159 while ((n = read(fd1, buf, sizeof(buf))) > 0)
160 {
161 if (write(fd2, buf, n) != n)
162 {
163 close(fd2);
164 close(fd1);
165 unlink(tmp_name);
e16dc041 166 goto error;
1b78aef5
DS
167 }
168 }
169 close(fd1);
170
171 // Set the permissions on the temporary output file.
172 mask = umask(0);
173 fchmod(fd2, 0666 & ~mask);
174 umask(mask);
175
176 // Close the temporary output file. The close can fail on NFS if
177 // out of space.
178 if (close(fd2) == -1)
179 {
180 unlink(tmp_name);
e16dc041 181 goto error;
1b78aef5
DS
182 }
183
184 // Rename the temporary output file to the destination file.
e16dc041 185 if (rename(tmp_name, dest.c_str()) == -1)
1b78aef5
DS
186 {
187 unlink(tmp_name);
e16dc041 188 goto error;
1b78aef5
DS
189 }
190
e16dc041
JS
191 return true;
192
193error:
b530b5b3 194 cerr << _F("Copy failed (\"%s\" to \"%s\"): %s", src.c_str(),
be7e131b
MC
195 dest.c_str(), strerror(errno))
196 << ((getuid() != 0) ?
197 _F(". Your uid=%d.", getuid()) : "") << endl;
e16dc041 198 return false;
1b78aef5
DS
199}
200
201
202// Make sure a directory exists.
203int
3b6f3bbb 204create_dir(const char *dir, int mode)
1b78aef5
DS
205{
206 struct stat st;
207 if (stat(dir, &st) == 0)
208 {
209 if (S_ISDIR(st.st_mode))
210 return 0;
211 errno = ENOTDIR;
212 return 1;
213 }
214
3b6f3bbb
DB
215 // Create the directory. We must create each component
216 // of the path ourselves.
217 vector<string> components;
218 tokenize (dir, components, "/");
219 string path;
220 if (*dir == '/')
221 {
222 // Absolute path
223 path = "/";
224 }
225 unsigned limit = components.size ();
226 assert (limit != 0);
227 for (unsigned ix = 0; ix < limit; ++ix)
228 {
229 path += components[ix] + '/';
be7e131b 230 umask(0); mode=0777;
3b6f3bbb
DB
231 if (mkdir(path.c_str (), mode) != 0 && errno != EEXIST)
232 return 1;
233 }
1b78aef5
DS
234
235 return 0;
236}
237
98f552c2
DB
238// Remove a file or directory
239int
240remove_file_or_dir (const char *name)
241{
242 int rc;
243 struct stat st;
244
245 if ((rc = stat(name, &st)) != 0)
246 {
247 if (errno == ENOENT)
248 return 0;
249 return 1;
250 }
251
252 if (remove (name) != 0)
253 return 1;
aeb9cc10 254
98f552c2
DB
255 return 0;
256}
1b78aef5 257
438b5a40 258
42be330a
SC
259void
260split_path (string &path, string &directory, string &entry)
261{
262 char *dirc, *basec, *bname, *dname;
263
264 dirc = strdupa (path.c_str());
265 basec = strdupa (path.c_str());
266 dname = dirname (dirc);
267 bname = basename (basec);
268 directory = dname;
269 entry = bname;
270}
271
272
438b5a40
SC
273int
274appendenv (const char *env_name, const string source)
275{
276 string dirname = source.substr(0, source.rfind("/"));
277 char *env = getenv(env_name);
278 string new_env;
279
280 if (env)
281 new_env = string (env) + ":" + dirname;
282 else
283 new_env = dirname;
284
285 return setenv(env_name, new_env.c_str(), 1);
286}
287
288
3892d516
DB
289/* Obtain the gid of the given group. */
290gid_t get_gid (const char *group_name)
291{
292 struct group *stgr;
293 /* If we couldn't find the group, return an invalid number. */
294 stgr = getgrnam(group_name);
295 if (stgr == NULL)
296 return (gid_t)-1;
297 return stgr->gr_gid;
298}
299
0da3e7a0
DB
300// Determine whether the current user is in the given group
301// by gid.
baba4e15 302bool
0da3e7a0 303in_group_id (gid_t target_gid)
baba4e15 304{
baba4e15
DB
305 // According to the getgroups() man page, getgroups() may not
306 // return the effective gid, so try to match it first. */
db0a43c3 307 if (target_gid == getegid())
baba4e15
DB
308 return true;
309
310 // Get the list of the user's groups.
f429df66 311 int ngids = getgroups(0, 0); // Returns the number to allocate.
db0a43c3
RM
312 if (ngids > 0) {
313 gid_t gidlist[ngids];
314 ngids = getgroups(ngids, gidlist);
315 for (int i = 0; i < ngids; i++) {
316 // If the user is a member of the target group, then we're done.
317 if (gidlist[i] == target_gid)
318 return true;
319 }
320 }
baba4e15 321 if (ngids < 0) {
b530b5b3 322 cerr << _("Unable to retrieve group list") << endl;
baba4e15
DB
323 return false;
324 }
325
baba4e15
DB
326 // The user is not a member of the target group
327 return false;
328}
329
85007c04
DB
330/*
331 * Returns a string describing memory resource usage.
332 * Since it seems getrusage() doesn't maintain the mem related fields,
333 * this routine parses /proc/self/statm to get the statistics.
334 */
335string
336getmemusage ()
337{
338 static long sz = sysconf(_SC_PAGESIZE);
339
d54a65f6 340 long pages;
85007c04
DB
341 ostringstream oss;
342 ifstream statm("/proc/self/statm");
343 statm >> pages;
c338e2c5 344 long kb1 = pages * sz / 1024; // total program size; vmsize
85007c04 345 statm >> pages;
c338e2c5 346 long kb2 = pages * sz / 1024; // resident set size; vmrss
85007c04 347 statm >> pages;
c338e2c5
FCE
348 long kb3 = pages * sz / 1024; // shared pages
349 statm >> pages;
350 long kb4 = pages * sz / 1024; // text
351 statm >> pages;
352 (void) kb4;
353 long kb5 = pages * sz / 1024; // library
354 statm >> pages;
355 (void) kb5;
356 long kb6 = pages * sz / 1024; // data+stack
357 statm >> pages;
358 long kb7 = pages * sz / 1024; // dirty
359 (void) kb7;
360
361 oss << _F("using %ldvirt/%ldres/%ldshr/%lddata kb, ", kb1, kb2, kb3, kb6);
85007c04
DB
362 return oss.str();
363}
364
1b78aef5
DS
365void
366tokenize(const string& str, vector<string>& tokens,
9c269440 367 const string& delimiters)
1b78aef5
DS
368{
369 // Skip delimiters at beginning.
370 string::size_type lastPos = str.find_first_not_of(delimiters, 0);
371 // Find first "non-delimiter".
372 string::size_type pos = str.find_first_of(delimiters, lastPos);
373
374 while (pos != string::npos || lastPos != string::npos)
375 {
376 // Found a token, add it to the vector.
377 tokens.push_back(str.substr(lastPos, pos - lastPos));
378 // Skip delimiters. Note the "not_of"
379 lastPos = str.find_first_not_of(delimiters, pos);
380 // Find next "non-delimiter"
381 pos = str.find_first_of(delimiters, lastPos);
382 }
383}
384
49dbe419
DB
385// Akin to tokenize(...,...), but allow tokens before the first delimeter, after the
386// last delimiter and allow internal empty tokens
387void
388tokenize_full(const string& str, vector<string>& tokens,
9c269440 389 const string& delimiters)
49dbe419
DB
390{
391 // Check for an empty string or a string of length 1. Neither can have the requested
392 // components.
393 if (str.size() <= 1)
394 return;
395
396 // Find the first delimeter.
397 string::size_type lastPos = 0;
398 string::size_type pos = str.find_first_of(delimiters, lastPos);
399 if (pos == string::npos)
400 return; // no delimeters
401
402 /* No leading empty component allowed. */
403 if (pos == lastPos)
404 ++lastPos;
405
406 assert (lastPos < str.size());
407 do
408 {
409 pos = str.find_first_of(delimiters, lastPos);
410 if (pos == string::npos)
411 break; // Final trailing component
412 // Found a token, add it to the vector.
413 tokens.push_back(str.substr (lastPos, pos - lastPos));
414 // Skip the delimiter.
415 lastPos = pos + 1;
416 }
417 while (lastPos < str.size());
418
419 // A final non-delimited token, if it is not empty.
420 if (lastPos < str.size())
421 {
422 assert (pos == string::npos);
423 tokens.push_back(str.substr (lastPos));
424 }
425}
1b78aef5 426
91699a70
JS
427// Akin to tokenize(...,"::"), but it also has to deal with C++ template
428// madness. We do this naively by balancing '<' and '>' characters. This
429// doesn't eliminate blanks either, so a leading ::scope still works.
430void
431tokenize_cxx(const string& str, vector<string>& tokens)
432{
433 int angle_count = 0;
434 string::size_type pos = 0;
435 string::size_type colon_pos = str.find("::");
436 string::size_type angle_pos = str.find_first_of("<>");
437 while (colon_pos != string::npos &&
438 (angle_count == 0 || angle_pos != string::npos))
439 {
440 if (angle_count > 0 || angle_pos < colon_pos)
441 {
442 angle_count += str.at(angle_pos) == '<' ? 1 : -1;
443 colon_pos = str.find("::", angle_pos + 1);
444 angle_pos = str.find_first_of("<>", angle_pos + 1);
445 }
446 else
447 {
448 tokens.push_back(str.substr(pos, colon_pos - pos));
449 pos = colon_pos + 2;
450 colon_pos = str.find("::", pos);
451 angle_pos = str.find_first_of("<>", pos);
452 }
453 }
454 tokens.push_back(str.substr(pos));
455}
456
004f2b9c
JL
457// Searches for lines in buf delimited by either \n, \0. Returns a vector
458// containing tuples of the type (start of line, length of line). If data
459// remains before the end of the buffer, a last line is added. All delimiters
460// are kept.
461vector<pair<const char*,int> >
462split_lines(const char *buf, size_t n)
463{
464 vector<pair<const char*,int> > lines;
465 const char *eol, *line;
466 line = eol = buf;
467 while ((size_t)(eol-buf) < n)
468 {
469 if (*eol == '\n' || *eol == '\0')
470 {
471 lines.push_back(make_pair(line, eol-line+1));
472 line = ++eol;
473 }
474 else
475 eol++;
476 }
477
478 // add any last line
479 if (eol > line)
480 lines.push_back(make_pair(line, eol-line));
481
482 return lines;
483}
91699a70 484
2041085d
VK
485static string
486follow_link(const string& name, const string& sysroot)
487{
488 char *linkname;
489 ssize_t r;
490 string retpath;
491 struct stat st;
492
493 const char *f = name.c_str();
494
495 lstat(f, &st);
496
497 linkname = (char *) malloc(st.st_size + 1);
498
499 if (linkname)
500 {
501 r = readlink(f, linkname, st.st_size + 1);
7df9f191
DS
502 if (r != -1)
503 linkname[r] = '\0';
2041085d
VK
504 /*
505 * If we have non-empty sysroot and we got link that
506 * points to absolute path name, we need to look at
507 * this path relative to sysroot itself. access and
508 * stat will follow symbolic links correctly only in
509 * case with empty sysroot.
510 */
511 while (r != -1 && linkname && linkname[0] == '/')
512 {
513 string fname1 = sysroot + linkname;
514 const char *f1 = fname1.c_str();
a790dc64 515 if (access(f1, R_OK) == 0
2041085d
VK
516 && stat(f1, &st) == 0
517 && S_ISREG(st.st_mode))
518 {
519 retpath = fname1;
520 break;
521 }
522 else if (lstat(f1, &st) == 0
523 && S_ISLNK(st.st_mode))
524 {
525 free(linkname);
526 linkname = (char *) malloc(st.st_size + 1);
527 if (linkname)
528 {
529 r = readlink(f1, linkname, st.st_size + 1);
7df9f191
DS
530 if (r != -1)
531 linkname[r] = '\0';
2041085d
VK
532 }
533 }
534 else
535 {
536 break;
537 }
538 }
539 }
540 free(linkname);
541
542 return retpath;
543}
544
37200b68
AM
545// Return true if str is a build-id. Build-ids are considered
546// to be strings with lengths greater than 2 and consisting
547// only of lowercase hex digits.
548bool is_build_id(const string& str)
549{
550 if (str.size() <= 2)
551 return false;
552
553 for (auto it = str.begin(); it != str.end(); ++it)
554 if (! ((*it >= '0' && *it <= '9')
555 || (*it >= 'a' && *it <= 'f')))
556 return false;
557
558 return true;
559}
560
d0a7f5a9
FCE
561// Resolve an executable name to a canonical full path name, with the
562// same policy as execvp(). A program name not containing a slash
563// will be searched along the $PATH.
564
0a567f6d
JS
565string find_executable(const string& name)
566{
567 const map<string, string> sysenv;
568 return find_executable(name, "", sysenv);
569}
570
05fb3e0c 571string find_executable(const string& name, const string& sysroot,
0a567f6d 572 const map<string, string>& sysenv,
05fb3e0c 573 const string& env_path)
1b78aef5 574{
d9736de1 575 string retpath;
1b78aef5 576
d9736de1
FCE
577 if (name.size() == 0)
578 return name;
1b78aef5 579
d0a7f5a9
FCE
580 struct stat st;
581
37200b68 582 if (is_build_id(name))
7a23ba3a
AM
583 {
584 // Search for executable with the given build-id.
37200b68
AM
585 string fname = sysroot + string("/usr/lib/.build-id/");
586 fname += name.substr(0, 2) + string("/") + name.substr(2);
7a23ba3a 587
37200b68 588 const char *f = fname.c_str();
7a23ba3a 589
a790dc64 590 if (access(f, R_OK) == 0
37200b68
AM
591 && stat(f, &st) == 0
592 && S_ISREG(st.st_mode))
593 {
594 retpath = fname;
595 }
596 else if (sysroot != ""
597 && lstat(f, &st) == 0
598 && S_ISLNK(st.st_mode))
599 {
600 retpath = follow_link(f, sysroot);
601 }
7a23ba3a 602#ifdef HAVE_LIBDEBUGINFOD
37200b68
AM
603 if (retpath == "")
604 {
605 // Query debuginfod for the executable.
15e40b63
FCE
606 static unique_ptr <debuginfod_client, void (*)(debuginfod_client*)>
607 client (debuginfod_begin(), &debuginfod_end);
608
609 if (client.get() != NULL)
37200b68
AM
610 {
611 char *p;
15e40b63 612 int fd = debuginfod_find_executable(client.get(),
37200b68
AM
613 (const unsigned char*)(name.c_str()),
614 0, &p);
615 if (fd >= 0)
7a23ba3a 616 {
37200b68
AM
617 retpath = p;
618 free(p);
619 close(fd);
7a23ba3a
AM
620 }
621 }
7a23ba3a 622 }
37200b68 623#endif /* HAVE_LIBDEBUGINFOD */
7a23ba3a
AM
624 }
625 else if (name.find('/') != string::npos) // slash in the path already?
d0a7f5a9 626 {
05fb3e0c 627 retpath = sysroot + name;
2041085d
VK
628
629 const char *f = retpath.c_str();
630 if (sysroot != ""
631 && lstat(f, &st) == 0
632 && S_ISLNK(st.st_mode))
633 {
634 retpath = follow_link(f, sysroot);
635 }
d0a7f5a9
FCE
636 }
637 else // Nope, search $PATH.
1b78aef5 638 {
0a567f6d 639 const char *path;
05fb3e0c 640 if (sysenv.count(env_path) != 0)
0a567f6d 641 path = sysenv.find(env_path)->second.c_str();
05fb3e0c
WF
642 else
643 path = getenv(env_path.c_str());
d9736de1 644 if (path)
1b78aef5 645 {
d9736de1
FCE
646 // Split PATH up.
647 vector<string> dirs;
648 tokenize(string(path), dirs, string(":"));
30f926f0 649
d9736de1
FCE
650 // Search the path looking for the first executable of the right name.
651 for (vector<string>::iterator i = dirs.begin(); i != dirs.end(); i++)
652 {
05fb3e0c 653 string fname = sysroot + *i + "/" + name;
d9736de1 654 const char *f = fname.c_str();
30f926f0 655
d9736de1 656 // Look for a normal executable file.
a790dc64 657 if (access(f, R_OK) == 0
d0a7f5a9
FCE
658 && stat(f, &st) == 0
659 && S_ISREG(st.st_mode))
d9736de1
FCE
660 {
661 retpath = fname;
662 break;
663 }
2041085d
VK
664 else if (sysroot != ""
665 && lstat(f, &st) == 0
666 && S_ISLNK(st.st_mode))
667 {
668 retpath = follow_link(f, sysroot);
669 if (retpath != "")
670 {
671 break;
672 }
673 }
d9736de1
FCE
674 }
675 }
1b78aef5
DS
676 }
677
dff50e09 678
d0a7f5a9
FCE
679 // Could not find the program on the $PATH. We'll just fall back to
680 // the unqualified name, which our caller will probably fail with.
681 if (retpath == "")
05fb3e0c 682 retpath = sysroot + name;
d0a7f5a9 683
d9736de1 684 // Canonicalize the path name.
5bca76a8
JS
685 string scf = resolve_path(retpath);
686 if (!startswith(scf, sysroot))
687 throw runtime_error(_F("find_executable(): file %s not in sysroot %s",
688 scf.c_str(), sysroot.c_str()));
689 return scf;
1b78aef5 690}
8c711d30 691
d9736de1 692
dbe9d133
JL
693bool is_fully_resolved(const string& path, const string& sysroot,
694 const map<string, string>& sysenv,
695 const string& env_path)
696{
697 return !path.empty()
698 && !contains_glob_chars(path)
699 && path.find('/') != string::npos
700 && path == find_executable(path, sysroot, sysenv, env_path);
701}
d9736de1 702
8c711d30
MH
703const string cmdstr_quoted(const string& cmd)
704{
705 // original cmd : substr1
706 // or : substr1'substr2
707 // or : substr1'substr2'substr3......
708 // after quoted :
709 // every substr(even it's empty) is quoted by ''
710 // every single-quote(') is quoted by ""
711 // examples: substr1 --> 'substr1'
712 // substr1'substr2 --> 'substr1'"'"'substr2'
713
714 string quoted_cmd;
715 string quote("'");
716 string replace("'\"'\"'");
717 string::size_type pos = 0;
718
719 quoted_cmd += quote;
dff50e09
FCE
720 for (string::size_type quote_pos = cmd.find(quote, pos);
721 quote_pos != string::npos;
8c711d30
MH
722 quote_pos = cmd.find(quote, pos)) {
723 quoted_cmd += cmd.substr(pos, quote_pos - pos);
724 quoted_cmd += replace;
725 pos = quote_pos + 1;
726 }
727 quoted_cmd += cmd.substr(pos, cmd.length() - pos);
728 quoted_cmd += quote;
729
730 return quoted_cmd;
731}
732
38bf68a8
MC
733const string
734detox_path(const string& str)
735{
736 ostringstream hash;
737 for (int i=0; i<int(str.length()); i++)
738 if (isalnum(str[i]))
739 hash << str[i];
740 else
741 hash << "_";
742 hash << "_";
743 return hash.str();
744}
a5e8d632 745
5eea6ed1
JS
746const string
747cmdstr_join(const vector<string>& cmds)
748{
749 if (cmds.empty())
b530b5b3 750 throw runtime_error(_("cmdstr_join called with an empty command!"));
5eea6ed1
JS
751
752 stringstream cmd;
753 cmd << cmdstr_quoted(cmds[0]);
754 for (size_t i = 1; i < cmds.size(); ++i)
755 cmd << " " << cmdstr_quoted(cmds[i]);
756
757 return cmd.str();
758}
759
760
8fdd1f2e
DS
761const string
762join(const vector<string>& vec, const string &delim)
763{
764 if (vec.empty())
765 throw runtime_error(_("join called with an empty vector!"));
766
767 stringstream join_str;
768 join_str << vec[0];
769 for (size_t i = 1; i < vec.size(); ++i)
770 join_str << delim << vec[i];
771
772 return join_str.str();
773}
774
775
b30895fa
JS
776// signal-safe set of pids
777class spawned_pids_t {
778 private:
779 set<pid_t> pids;
e6eea51b 780
83ae46dd 781#ifndef SINGLE_THREADED
e6eea51b 782 mutex mux_pids;
83ae46dd 783#endif
4cc40e82 784
e6eea51b
JS
785 unique_lock<mutex> lock()
786 {
787#ifndef SINGLE_THREADED
788 return unique_lock<mutex>(mux_pids);
789#else
790 return {};
791#endif
792 }
793
b30895fa 794 public:
e6eea51b 795
b30895fa
JS
796 bool contains (pid_t p)
797 {
798 stap_sigmasker masked;
e6eea51b 799 auto guard = lock();
26a39006 800
26a39006 801 bool ret = (pids.count(p)==0) ? true : false;
26a39006
CM
802
803 return ret;
b30895fa 804 }
e6eea51b 805
b30895fa
JS
806 bool insert (pid_t p)
807 {
808 stap_sigmasker masked;
e6eea51b 809 auto guard = lock();
26a39006 810
26a39006 811 bool ret = (p > 0) ? pids.insert(p).second : false;
26a39006
CM
812
813 return ret;
b30895fa 814 }
e6eea51b 815
b30895fa
JS
816 void erase (pid_t p)
817 {
818 stap_sigmasker masked;
e6eea51b 819 auto guard = lock();
26a39006 820
b30895fa
JS
821 pids.erase(p);
822 }
e6eea51b 823
b30895fa
JS
824 int killall (int sig)
825 {
826 int ret = 0;
827 stap_sigmasker masked;
e6eea51b 828 auto guard = lock();
26a39006 829
a5fa9c25
JS
830 for (set<pid_t>::const_iterator it = pids.begin();
831 it != pids.end(); ++it)
b30895fa
JS
832 ret = kill(*it, sig) ?: ret;
833 return ret;
834 }
26a39006 835
b30895fa
JS
836};
837static spawned_pids_t spawned_pids;
4cc40e82 838
13efed2a
JL
839/* Returns exit code of pid if terminated nicely, or 128+signal if terminated
840 * not nicely, or -1 if waitpid() failed. So if ret >= 0, it's the rc/signal */
01cc94dc
JS
841int
842stap_waitpid(int verbose, pid_t pid)
daa75206
JS
843{
844 int ret, status;
b30895fa
JS
845 if (verbose > 1 && spawned_pids.contains(pid))
846 clog << _F("Spawn waitpid call on unmanaged pid %d", pid) << endl;
01cc94dc
JS
847 ret = waitpid(pid, &status, 0);
848 if (ret == pid)
daa75206 849 {
01cc94dc 850 spawned_pids.erase(pid);
daa75206 851 ret = WIFEXITED(status) ? WEXITSTATUS(status) : 128 + WTERMSIG(status);
483cf56e 852 if (verbose > 1)
f245e619 853 clog << _F("Spawn waitpid result (0x%x): %d", (unsigned)status, ret) << endl;
daa75206
JS
854 }
855 else
856 {
857 if (verbose > 1)
b530b5b3 858 clog << _F("Spawn waitpid error (%d): %s", ret, strerror(errno)) << endl;
daa75206
JS
859 ret = -1;
860 }
20f90026 861 PROBE2(stap, stap_system__complete, ret, pid);
daa75206
JS
862 return ret;
863}
864
b63fab87
JS
865static int
866pipe_child_fd(posix_spawn_file_actions_t* fa, int pipefd[2], int childfd)
867{
868 if (pipe(pipefd))
869 return -1;
870
871 int dir = childfd ? 1 : 0;
872 if (!fcntl(pipefd[0], F_SETFD, FD_CLOEXEC) &&
873 !fcntl(pipefd[1], F_SETFD, FD_CLOEXEC) &&
874 !posix_spawn_file_actions_adddup2(fa, pipefd[dir], childfd))
875 return 0;
876
877 close(pipefd[0]);
878 close(pipefd[1]);
879 return -1;
880}
881
ff520ff4
JS
882static int
883null_child_fd(posix_spawn_file_actions_t* fa, int childfd)
884{
885 int flags = childfd ? O_WRONLY : O_RDONLY;
886 return posix_spawn_file_actions_addopen(fa, childfd, "/dev/null", flags, 0);
887}
888
4cc40e82 889// Runs a command with a saved PID, so we can kill it from the signal handler
aeb9cc10 890pid_t
20f90026 891stap_spawn(int verbose, const vector<string>& args,
e4e3d6b7 892 posix_spawn_file_actions_t* fa, const vector<string>& envVec)
4cc40e82 893{
5def5d2a
LB
894 string::const_iterator it;
895 it = args[0].begin();
20f90026 896 string command;
5def5d2a 897 if(*it == '/' && (access(args[0].c_str(), X_OK)==-1)) //checking to see if staprun is executable
2713ea24
CM
898 // XXX PR13274 needs-session to use print_warning()
899 clog << _F("WARNING: %s is not executable (%s)", args[0].c_str(), strerror(errno)) << endl;
20f90026
JS
900 for (size_t i = 0; i < args.size(); ++i)
901 command += " " + args[i];
1acfc030 902 PROBE1(stap, stap_system__start, command.c_str());
db0a43c3 903 if (verbose > 1)
b530b5b3 904 clog << _("Running") << command << endl;
36ef6d6a 905
20f90026
JS
906 char const * argv[args.size() + 1];
907 for (size_t i = 0; i < args.size(); ++i)
908 argv[i] = args[i].c_str();
909 argv[args.size()] = NULL;
910
e4e3d6b7
CM
911 char** env;
912 bool allocated;
8a398d03
DB
913 // environ can be NULL. This has been observed when running under gdb.
914 if(envVec.empty() && environ != NULL)
e4e3d6b7
CM
915 {
916 env = environ;
917 allocated = false;
918 }
919 else
920 {
921 allocated = true;
922 env = new char*[envVec.size() + 1];
923
924 for (size_t i = 0; i < envVec.size(); ++i)
925 env[i] = (char*)envVec[i].c_str();
926 env[envVec.size()] = NULL;
927 }
928
20f90026
JS
929 pid_t pid = 0;
930 int ret = posix_spawnp(&pid, argv[0], fa, NULL,
e4e3d6b7
CM
931 const_cast<char * const *>(argv), env);
932 if (allocated)
933 delete[] env;
934
01cc94dc
JS
935 PROBE2(stap, stap_system__spawn, ret, pid);
936 if (ret != 0)
36ef6d6a 937 {
db0a43c3 938 if (verbose > 1)
b530b5b3 939 clog << _F("Spawn error (%d): %s", ret, strerror(ret)) << endl;
01cc94dc 940 pid = -1;
4cc40e82 941 }
01cc94dc
JS
942 else
943 spawned_pids.insert(pid);
944 return pid;
945}
946
b63fab87
JS
947// The API version of stap_spawn doesn't expose file_actions, for now.
948pid_t
20f90026 949stap_spawn(int verbose, const vector<string>& args)
b63fab87 950{
20f90026 951 return stap_spawn(verbose, args, NULL);
b63fab87
JS
952}
953
645383d5
JS
954pid_t
955stap_spawn_piped(int verbose, const vector<string>& args,
e96f2257 956 int *child_in, int *child_out, int* child_err)
645383d5
JS
957{
958 pid_t pid = -1;
e96f2257 959 int infd[2], outfd[2], errfd[2];
645383d5
JS
960 posix_spawn_file_actions_t fa;
961 if (posix_spawn_file_actions_init(&fa) != 0)
962 return -1;
963
e96f2257 964 if (child_in && pipe_child_fd(&fa, infd, 0) != 0)
645383d5 965 goto cleanup_fa;
e96f2257
JS
966 if (child_out && pipe_child_fd(&fa, outfd, 1) != 0)
967 goto cleanup_in;
645383d5
JS
968 if (child_err && pipe_child_fd(&fa, errfd, 2) != 0)
969 goto cleanup_out;
970
971 pid = stap_spawn(verbose, args, &fa);
972
973 if (child_err)
974 {
975 if (pid > 0)
976 *child_err = errfd[0];
977 else
978 close(errfd[0]);
979 close(errfd[1]);
980 }
981
982cleanup_out:
983 if (child_out)
984 {
985 if (pid > 0)
986 *child_out = outfd[0];
987 else
988 close(outfd[0]);
989 close(outfd[1]);
990 }
991
e96f2257
JS
992cleanup_in:
993 if (child_in)
994 {
995 if (pid > 0)
996 *child_in = infd[1];
997 else
998 close(infd[1]);
999 close(infd[0]);
1000 }
1001
645383d5
JS
1002cleanup_fa:
1003 posix_spawn_file_actions_destroy(&fa);
1004
1005 return pid;
1006}
1007
e4e3d6b7
CM
1008// Global set of supported localization variables. Make changes here to
1009// add or remove variables. List of variables from:
1010// http://publib.boulder.ibm.com/infocenter/tivihelp/v8r1/index.jsp?topic=/
1011// com.ibm.netcool_OMNIbus.doc_7.3.0/omnibus/wip/install/concept/omn_con_settingyourlocale.html
1012const set<string>&
1013localization_variables()
1014{
1015 static set<string> localeVars;
1016 if (localeVars.empty())
1017 {
1018 localeVars.insert("LANG");
1019 localeVars.insert("LC_ALL");
1020 localeVars.insert("LC_CTYPE");
1021 localeVars.insert("LC_COLLATE");
1022 localeVars.insert("LC_MESSAGES");
1023 localeVars.insert("LC_TIME");
1024 localeVars.insert("LC_MONETARY");
1025 localeVars.insert("LC_NUMERIC");
1026 }
1027 return localeVars;
1028}
1029
01cc94dc
JS
1030// Runs a command with a saved PID, so we can kill it from the signal handler,
1031// and wait for it to finish.
1032int
b13c6a37
JS
1033stap_system(int verbose, const string& description,
1034 const vector<string>& args,
ff520ff4 1035 bool null_out, bool null_err)
01cc94dc 1036{
ff520ff4
JS
1037 int ret = 0;
1038 posix_spawn_file_actions_t fa;
1039 if (posix_spawn_file_actions_init(&fa) != 0)
1040 return -1;
4cc40e82 1041
ff520ff4
JS
1042 if ((null_out && null_child_fd(&fa, 1) != 0) ||
1043 (null_err && null_child_fd(&fa, 2) != 0))
1044 ret = -1;
1045 else
1046 {
1047 pid_t pid = stap_spawn(verbose, args, &fa);
1048 ret = pid;
5def5d2a 1049 if (pid > 0){
ff520ff4 1050 ret = stap_waitpid(verbose, pid);
df324c9f
JS
1051
1052 // XXX PR13274 needs-session to use print_warning()
1053 if (ret > 128)
1054 clog << _F("WARNING: %s exited with signal: %d (%s)",
1055 description.c_str(), ret - 128, strsignal(ret - 128)) << endl;
1056 else if (ret > 0)
1057 clog << _F("WARNING: %s exited with status: %d",
1058 description.c_str(), ret) << endl;
5def5d2a 1059 }
ff520ff4
JS
1060 }
1061
1062 posix_spawn_file_actions_destroy(&fa);
1063 return ret;
20f90026
JS
1064}
1065
daa75206
JS
1066// Like stap_system, but capture stdout
1067int
20f90026 1068stap_system_read(int verbose, const vector<string>& args, ostream& out)
daa75206 1069{
645383d5 1070 int child_fd = -1;
e96f2257 1071 pid_t child = stap_spawn_piped(verbose, args, NULL, &child_fd);
645383d5 1072 if (child > 0)
daa75206 1073 {
645383d5
JS
1074 // read everything from the child
1075 stdio_filebuf<char> in(child_fd, ios_base::in);
1076 out << &in;
1077 return stap_waitpid(verbose, child);
daa75206 1078 }
645383d5 1079 return -1;
daa75206
JS
1080}
1081
1082
9a9be966
DS
1083std::pair<bool,int>
1084stap_fork_read(int verbose, ostream& out)
1085{
1086 int pipefd[2];
1087 if (pipe(pipefd) != 0)
1088 return make_pair(false, -1);
1089
1090 fflush(stdout); cout.flush();
1091
1092 if (verbose > 1)
1093 clog << _("Forking subprocess...") << endl;
1094
1095 pid_t child = fork();
1096 PROBE1(stap, stap_system__fork, child);
1097 // child < 0: fork failure
1098 if (child < 0)
1099 {
1100 if (verbose > 1)
1101 clog << _F("Fork error (%d): %s", child, strerror(errno)) << endl;
1102 close(pipefd[0]);
1103 close(pipefd[1]);
1104 return make_pair(false, -1);
1105 }
1106 // child == 0: we're the child
1107 else if (child == 0)
1108 {
1109 close(pipefd[0]);
1110 fcntl(pipefd[1], F_SETFD, FD_CLOEXEC);
1111 return make_pair(true, pipefd[1]);
1112 }
1113
1114 // child > 0: we're the parent
1115 spawned_pids.insert(child);
1116
1117 // read everything from the child
1118 close(pipefd[1]);
1119 stdio_filebuf<char> in(pipefd[0], ios_base::in);
1120 out << &in;
1121 return make_pair(false, stap_waitpid(verbose, child));
1122}
1123
1124
01cc94dc 1125// Send a signal to our spawned commands
4cc40e82
JS
1126int
1127kill_stap_spawn(int sig)
1128{
b30895fa 1129 return spawned_pids.killall(sig);
4cc40e82
JS
1130}
1131
c0d1b5a0 1132
8aabf152 1133
c0d1b5a0
FCE
1134void assert_regexp_match (const string& name, const string& value, const string& re)
1135{
1136 typedef map<string,regex_t*> cache;
1137 static cache compiled;
1138 cache::iterator it = compiled.find (re);
1139 regex_t* r = 0;
1140 if (it == compiled.end())
1141 {
1142 r = new regex_t;
1143 int rc = regcomp (r, re.c_str(), REG_ICASE|REG_NOSUB|REG_EXTENDED);
1d7ae21b 1144 assert (rc == 0);
c0d1b5a0
FCE
1145 compiled[re] = r;
1146 }
1147 else
1148 r = it->second;
1149
1150 // run regexec
1151 int rc = regexec (r, value.c_str(), 0, 0, 0);
1152 if (rc)
e2d0f787
JS
1153 throw runtime_error
1154 (_F("ERROR: Safety pattern mismatch for %s ('%s' vs. '%s') rc=%d",
1155 name.c_str(), value.c_str(), re.c_str(), rc));
c0d1b5a0
FCE
1156}
1157
1158
8aabf152
FCE
1159int regexp_match (const string& value, const string& re, vector<string>& matches)
1160{
1161 typedef map<string,regex_t*> cache; // separate cache because we use different regcomp options
1162 static cache compiled;
1163 cache::iterator it = compiled.find (re);
1164 regex_t* r = 0;
1165 if (it == compiled.end())
1166 {
1167 r = new regex_t;
1168 int rc = regcomp (r, re.c_str(), REG_EXTENDED); /* REG_ICASE? */
1d7ae21b 1169 assert (rc == 0);
8aabf152
FCE
1170 compiled[re] = r;
1171 }
1172 else
1173 r = it->second;
1174
1175
1176 // run regexec
1177#define maxmatches 10
1178 regmatch_t rm[maxmatches];
1179
1180 int rc = regexec (r, value.c_str(), maxmatches, rm, 0);
1181 if (rc) return rc;
1182
1183 matches.erase(matches.begin(), matches.end());
1184 for (unsigned i=0; i<maxmatches; i++) // XXX: ideally, the number of actual subexpressions in re
1185 {
1186 if (rm[i].rm_so >= 0)
1187 matches.push_back(value.substr (rm[i].rm_so, rm[i].rm_eo-rm[i].rm_so));
1188 else
1189 matches.push_back("");
1190 }
1191
1192 return 0;
1193}
1194
1195
37001baa
FCE
1196bool contains_glob_chars (const string& str)
1197{
5750ecc6
FCE
1198 for (unsigned i=0; i<str.size(); i++)
1199 {
1200 char this_char = str[i];
1201 if (this_char == '\\' && (str.size() > i+1))
1202 {
1203 // PR13338: skip the escape backslash and the escaped character
1204 i++;
1205 continue;
1206 }
1207 if (this_char == '*' || this_char == '?' || this_char == '[')
1208 return true;
1209 }
1210
1211 return false;
1212}
1213
1214
1215// PR13338: we need these functions to be able to pass through glob metacharacters
1216// through the recursive process("...*...") expansion process.
1217string escape_glob_chars (const string& str)
1218{
1219 string op;
1220 for (unsigned i=0; i<str.size(); i++)
1221 {
1222 char this_char = str[i];
1223 if (this_char == '*' || this_char == '?' || this_char == '[')
1224 op += '\\';
1225 op += this_char;
1226 }
1227 return op;
37001baa
FCE
1228}
1229
5750ecc6
FCE
1230string unescape_glob_chars (const string& str)
1231{
1232 string op;
1233 for (unsigned i=0; i<str.size(); i++)
1234 {
1235 char this_char = str[i];
1236 if (this_char == '\\' && (str.size() > i+1) )
1237 {
1238 op += str[i+1];
1239 i++;
1240 continue;
1241 }
1242 op += this_char;
1243 }
1244
1245 return op;
1246}
1247
a81fb5d4
JU
1248// PR23391, this is still incomplete but enough is
1249// complete to handle "__{ia32,x64}_sys_$syscall"
1250// functions.
1251string csh_to_ksh (const string& csh)
1252{
1253 string ksh;
1254 for (unsigned i=0; i<csh.size(); i++)
1255 {
1256 if (csh[i] == '{')
1257 ksh += "@(";
1258 else if (csh[i] == '}')
1259 ksh += ')';
1260 else if (csh[i] == ',')
1261 ksh += '|';
1262 else
1263 ksh += csh[i];
1264 }
1265 return ksh;
1266}
1267
e207d98f
MW
1268bool identifier_string_needs_escape (const string& str)
1269{
1270 for (unsigned i = 0; i < str.size (); i++)
1271 {
1272 char this_char = str[i];
1273 if (! isalnum (this_char) && this_char != '_')
1274 return true;
1275 }
1276
1277 return false;
1278}
1279
dd8cb30b 1280string escaped_identifier_string (const string &str)
e207d98f
MW
1281{
1282 if (! identifier_string_needs_escape (str))
1283 return str;
5750ecc6 1284
e207d98f
MW
1285 string op;
1286 for (unsigned i = 0; i < str.size (); i++)
1287 {
1288 char this_char = str[i];
1289 if (! isalnum (this_char) && this_char != '_')
1290 {
1291 char b[32];
1292 sprintf (b, "_%x_", (unsigned int) this_char);
1293 op += b;
1294 }
1295 else
1296 op += this_char;
1297 }
1298
1299 return op;
1300}
5750ecc6 1301
f44430b4
SM
1302unsigned char
1303octal_character (unsigned c)
1304{
1305 return '0' + c % 8;
1306}
1307
7c0f5bf8
YZ
1308static inline void
1309escaped_character_impl (unsigned c, ostringstream &o)
f44430b4 1310{
f44430b4
SM
1311 int oc = (int)c;
1312
1313 switch (oc)
1314 {
1315 case '\'':
1316 o << "\\'";
1317 break;
1318
1319 case '"':
1320 o << "\\\"";
1321 break;
1322
1323 case '\n':
1324 o << "\\n";
1325 break;
1326
1327 case '\t':
1328 o << "\\t";
1329 break;
1330
1331 case '\v':
1332 o << "\\v";
1333 break;
1334
1335 case '\b':
1336 o << "\\b";
1337 break;
1338
1339 case '\r':
1340 o << "\\r";
1341 break;
1342
1343 case '\f':
1344 o << "\\f";
1345 break;
1346
1347 case '\a':
1348 o << "\\a";
1349 break;
1350
1351 case '\\':
1352 o << "\\\\";
1353 break;
1354
1355 default:
1356
1357 if ((oc < 256) && isprint(oc))
1358 {
1359 o << (unsigned char) oc;
1360 }
1361 else
1362 {
1363 o << '\\' << octal_character(oc / 64)
1364 << octal_character(oc / 8)
1365 << octal_character(oc);
1366 }
1367 }
7c0f5bf8
YZ
1368}
1369
1370string
1371escaped_character (unsigned c)
1372{
1373 ostringstream o;
1374
1375 escaped_character_impl(c, o);
1376
f44430b4
SM
1377 return o.str();
1378}
1379
1380string
1381escaped_literal_string (const string& str)
1382{
7c0f5bf8
YZ
1383 ostringstream o;
1384
f44430b4
SM
1385 for (unsigned i = 0; i < str.size (); i++)
1386 {
7c0f5bf8 1387 escaped_character_impl((unsigned char)str[i], o);
f44430b4 1388 }
7c0f5bf8 1389 return o.str();
f44430b4
SM
1390}
1391
daa75206
JS
1392string
1393normalize_machine(const string& machine)
1394{
1395 // PR4186: Copy logic from coreutils uname (uname -i) to squash
1396 // i?86->i386. Actually, copy logic from linux top-level Makefile
1397 // to squash uname -m -> $(SUBARCH).
1398 //
1399 // This logic needs to match the logic in the stap_get_arch shell
1400 // function in stap-env.
4c25c410
FCE
1401 //
1402 // But: RHBZ669082 reminds us that this renaming post-dates some
1403 // of the kernel versions we know and love. So in buildrun.cxx
1404 // we undo this renaming for ancient powerpc.
7dde4b6e
FCE
1405 //
1406 // NB repeated: see also stap-env (stap_get_arch)
daa75206
JS
1407 if (machine == "i486") return "i386";
1408 else if (machine == "i586") return "i386";
1409 else if (machine == "i686") return "i386";
1410 else if (machine == "sun4u") return "sparc64";
1411 else if (machine.substr(0,3) == "arm") return "arm";
1412 else if (machine == "sa110") return "arm";
1413 else if (machine == "s390x") return "s390";
7ddd76f3 1414 else if (machine == "aarch64") return "arm64";
a463860c 1415 else if (machine == "riscv64") return "riscv";
daa75206
JS
1416 else if (machine.substr(0,3) == "ppc") return "powerpc";
1417 else if (machine.substr(0,4) == "mips") return "mips";
1418 else if (machine.substr(0,3) == "sh2") return "sh";
1419 else if (machine.substr(0,3) == "sh3") return "sh";
1420 else if (machine.substr(0,3) == "sh4") return "sh";
7dde4b6e 1421 // NB repeated: see also stap-env (stap_get_arch)
daa75206
JS
1422 return machine;
1423}
1424
081b45d1
MW
1425int
1426elf_class_from_normalized_machine (const string &machine)
1427{
1428 // Must match kernel machine architectures as used un tapset directory.
1429 // And must match normalization done in normalize_machine ().
1430 if (machine == "i386"
1431 || machine == "arm") // arm assumes 32-bit
1432 return ELFCLASS32;
1433 else if (machine == "s390" // powerpc and s390 always assume 64-bit,
1434 || machine == "powerpc" // see normalize_machine ().
1435 || machine == "x86_64"
7ddd76f3
WC
1436 || machine == "ia64"
1437 || machine == "arm64")
081b45d1
MW
1438 return ELFCLASS64;
1439
1440 cerr << _F("Unknown kernel machine architecture '%s', don't know elf class",
1441 machine.c_str()) << endl;
1442 return -1;
1443}
1444
aeb9cc10
DB
1445string
1446kernel_release_from_build_tree (const string &kernel_build_tree, int verbose)
1447{
1448 string version_file_name = kernel_build_tree + "/include/config/kernel.release";
1449 // The file include/config/kernel.release within the
1450 // build tree is used to pull out the version information
1451 ifstream version_file (version_file_name.c_str());
1452 if (version_file.fail ())
1453 {
1454 if (verbose > 1)
1455 //TRANSLATORS: Missing a file
1456 cerr << _F("Missing %s", version_file_name.c_str()) << endl;
1457 return "";
1458 }
1459
1460 string kernel_release;
1461 char c;
1462 while (version_file.get(c) && c != '\n')
1463 kernel_release.push_back(c);
1464
1465 return kernel_release;
1466}
8aabf152 1467
cde0f3ce 1468string autosprintf(const char* format, ...)
7a1513b1
FCE
1469{
1470 va_list args;
1471 char *str;
1472 va_start (args, format);
1473 int rc = vasprintf (&str, format, args);
1474 if (rc < 0)
33631d57
CM
1475 {
1476 va_end(args);
1477 return _F("autosprintf/vasprintf error %d", rc);
1478 }
7a1513b1
FCE
1479 string s = str;
1480 va_end (args);
1481 free (str);
1482 return s; /* by copy */
1483}
1484
cde0f3ce 1485string
28946fe7
MW
1486get_self_path()
1487{
1488 char buf[1024]; // This really should be enough for anybody...
1489 const char *file = "/proc/self/exe";
1490 ssize_t len = readlink(file, buf, sizeof(buf) - 1);
1491 if (len > 0)
1492 {
1493 buf[len] = '\0';
1494 file = buf;
1495 }
1496 // otherwise the path is ridiculously large, fall back to /proc/self/exe.
1497 //
cde0f3ce 1498 return string(file);
28946fe7
MW
1499}
1500
30b865ce 1501bool
a03a3744 1502is_valid_pid (pid_t pid, string& err_msg)
30b865ce
AJ
1503{
1504 err_msg = "";
b6eb07fd 1505 if (pid <= 0)
30b865ce 1506 {
b6eb07fd
AJ
1507 err_msg = _F("cannot probe pid %d: Invalid pid", pid);
1508 return false;
1509 }
1510 else if (kill(pid, 0) == -1)
1511 {
be7e131b
MC
1512 if (getuid() != 0)
1513 err_msg = _F("cannot probe pid %d: %s. Your uid=%d.", pid, strerror(errno), getuid());
1514 else
1515 err_msg = _F("cannot probe pid %d: %s", pid, strerror(errno));
30b865ce
AJ
1516 return false;
1517 }
1518 return true;
1519}
1520
cde0f3ce
JL
1521// String sorter using the Levenshtein algorithm
1522// TODO: Performance may be improved by adding a maximum distance
1523// parameter which would abort the operation if we know the final
1524// distance will be larger than the maximum. This may entail maintaining
1525// another data structure, and thus the cost might outweigh the benefit
593f09eb
JL
1526unsigned
1527levenshtein(const string& a, const string& b)
cde0f3ce
JL
1528{
1529 Array2D<unsigned> d(a.size()+1, b.size()+1);
1530
1531 // border values
1532 for (unsigned i = 0; i < d.width; i++)
1533 d(i, 0) = i;
1534 for (unsigned j = 0; j < d.height; j++)
1535 d(0, j) = j;
1536
1537 // the meat
1538 for (unsigned i = 1; i < d.width; i++) {
1539 for (unsigned j = 1; j < d.height; j++) {
a88f4aec 1540 if (a[i-1] == b[j-1]) // match
cde0f3ce
JL
1541 d(i,j) = d(i-1, j-1);
1542 else // penalties open for adjustments
75d96dc7
JL
1543 {
1544 unsigned subpen = 2; // substitution penalty
1545 // check if they are upper/lowercase related
1546 if (tolower(a[i-1]) == tolower(b[j-1]))
1547 subpen = 1; // half penalty
1548 d(i,j) = min(min(
1549 d(i-1,j-1) + subpen, // substitution
1550 d(i-1,j) + 2), // deletion
1551 d(i,j-1) + 2); // insertion
1552 }
cde0f3ce
JL
1553 }
1554 }
1555
1556 return d(d.width-1, d.height-1);
1557}
3f95ed01 1558
593f09eb
JL
1559// Returns comma-separated list of set elements closest to the target string.
1560// Print a maximum amount of 'max' elements, with a maximum levenshtein score
1561// of 'threshold'.
1562string
1563levenshtein_suggest(const string& target, // string to match against
1564 const set<string>& elems, // elements to suggest from
1565 unsigned max, // max elements to print
1566 unsigned threshold) // max leven score to print
1567{
1568 // calculate leven score for each elem and put in map
1569 multimap<unsigned, string> scores;
1570 for (set<string>::const_iterator it = elems.begin();
1571 it != elems.end(); ++it)
1572 {
1062ce54
JL
1573 if (it->empty()) // skip empty strings
1574 continue;
1575
59b11ead
JL
1576 // Approximate levenshtein by size-difference only; real score
1577 // is at least this high
14f04522 1578 unsigned min_score = abs(static_cast<signed>(target.size()) - static_cast<signed>(it->size()));
59b11ead
JL
1579
1580 if (min_score > threshold) // min-score too high for threshold
1581 continue;
1582
1583 /* Check if we can skip calculating the score for this element. This works
1584 * on knowing two facts:
1585 * (1) We will only print the 'max' number of the top elements. The
1586 * current top 'max' candidates reside in the scores map already.
1587 * (2) The score will be AT LEAST the difference between the lengths of
1588 * the two strings.
1589 * So what we do is retrieve the WORST score of the current best
1590 * candidates by iterating through the map (which is ordered) and
1591 * retrieving the 'max-th' item and check if that's still better than the
1592 * BEST score we could possibly get from the two strings (by comparing
1593 * their lengths). If the 'max-th' item is indeed better, then we know
1594 * this element will NEVER make it to the terminal. So we just skip it and
1595 * move on. Quite tragic if you ask me...
1596 */
1597 unsigned maxth_score = std::numeric_limits<unsigned>::max();
1598 if (scores.size() >= max) // do we have at least 'max' items?
1599 {
1600 // retrieve 'max-th' item
1601 multimap<unsigned, string>::iterator itt = scores.begin();
1602 for (unsigned i = 0; i < max-1; i++) itt++; // will not go to .end()
1603 maxth_score = itt->first;
1604 }
1605
1606 if (min_score > maxth_score) // min-score too high for known candidates
1607 continue;
1608
593f09eb 1609 unsigned score = levenshtein(target, *it);
59b11ead
JL
1610
1611 if (score > maxth_score) // actual score too high for known candidates
1612 continue;
1613
1614 if (score > threshold) // actual score too high for threshold
1615 continue;
1616
1617 // a candidate!
1618 scores.insert(make_pair(score, *it));
593f09eb
JL
1619 }
1620
1621 string suggestions;
1622
1623 // Print out the top 'max' elements
1624 multimap<unsigned, string>::iterator it = scores.begin();
1625 for (unsigned i = 0; it != scores.end() && i < max; ++it, i++)
1626 suggestions += it->second + ", ";
1627 if (!suggestions.empty())
1628 suggestions.erase(suggestions.size()-2);
1629
1630 return suggestions;
1631}
1632
a3e980f9
FCE
1633string
1634levenshtein_suggest(const string& target, // string to match against
47d349b1 1635 const set<interned_string>& elems,// elements to suggest from
a3e980f9
FCE
1636 unsigned max, // max elements to print
1637 unsigned threshold) // max leven score to print
1638{
1639 set<string> elems2;
47d349b1 1640 for (set<interned_string>::const_iterator it = elems.begin();
a3e980f9
FCE
1641 it != elems.end();
1642 it++)
1643 elems2.insert(it->to_string());
1644
1645 return levenshtein_suggest (target, elems2, max, threshold);
1646
1647}
1648
1649
3f95ed01
JS
1650#ifndef HAVE_PPOLL
1651// This is a poor-man's ppoll, only used carefully by readers that need to be
1652// interruptible, like remote::run and mutator::run. It does not provide the
1653// same guarantee of atomicity as on systems with a true ppoll.
1654//
1655// In our use, this would cause trouble if a signal came in any time from the
1656// moment we mask signals to prepare pollfds, to the moment we call poll in
1657// emulation here. If there's no data on any of the pollfds, we will be stuck
1658// waiting indefinitely.
1659//
1660// Since this is mainly about responsiveness of CTRL-C cleanup, we'll just
1661// throw in a one-second forced timeout to ensure we have a chance to notice
1662// there was an interrupt without too much delay.
1663int
1664ppoll(struct pollfd *fds, nfds_t nfds,
1665 const struct timespec *timeout_ts,
1666 const sigset_t *sigmask)
1667{
1668 sigset_t origmask;
1669 int timeout = (timeout_ts == NULL) ? 1000 // don't block forever...
1670 : (timeout_ts->tv_sec * 1000 + timeout_ts->tv_nsec / 1000000);
1671 sigprocmask(SIG_SETMASK, sigmask, &origmask);
1672 int rc = poll(fds, nfds, timeout);
1673 sigprocmask(SIG_SETMASK, &origmask, NULL);
1674 return rc;
1675}
1676#endif
1677
1678
01fb72a0
DS
1679int
1680read_from_file (const string &fname, int &data)
1681{
1682 // C++ streams may not set errno in the even of a failure. However if we
1683 // set it to 0 before each operation and it gets set during the operation,
1684 // then we can use its value in order to determine what happened.
1685 errno = 0;
1686 ifstream f (fname.c_str ());
1687 if (! f.good ())
1688 {
1689 clog << _F("Unable to open file '%s' for reading: ", fname.c_str());
1690 goto error;
1691 }
1692
1693 // Read the data;
1694 errno = 0;
1695 f >> data;
1696 if (f.fail ())
1697 {
1698 clog << _F("Unable to read from file '%s': ", fname.c_str());
1699 goto error;
1700 }
1701
1702 // NB: not necessary to f.close ();
1703 return 0; // Success
1704
1705 error:
1706 if (errno)
1707 clog << strerror (errno) << endl;
1708 else
1709 clog << _("unknown error") << endl;
1710 return 1; // Failure
1711}
1712
1713template <class T>
1714int
1715write_to_file (const string &fname, const T &data)
1716{
1717 // C++ streams may not set errno in the even of a failure. However if we
1718 // set it to 0 before each operation and it gets set during the operation,
1719 // then we can use its value in order to determine what happened.
1720 errno = 0;
1721 ofstream f (fname.c_str ());
1722 if (! f.good ())
1723 {
1724 clog << _F("Unable to open file '%s' for writing: ", fname.c_str());
1725 goto error;
1726 }
1727
1728 // Write the data;
1729 f << data;
1730 errno = 0;
1731 if (f.fail ())
1732 {
1733 clog << _F("Unable to write to file '%s': ", fname.c_str());
1734 goto error;
1735 }
1736
1737 // NB: not necessary to f.close ();
1738 return 0; // Success
1739
1740 error:
1741 if (errno)
1742 clog << strerror (errno) << endl;
1743 else
1744 clog << _("unknown error") << endl;
1745 return 1; // Failure
1746}
1747
1748// Let's go ahead an instantiate a few variants of the write_to_file()
1749// templated function.
1750template int write_to_file (const string &fname, const string &data);
586c8666 1751template int write_to_file (const string &fname, const int &data);
01fb72a0
DS
1752
1753int
1754flush_to_stream (const string &fname, ostream &o)
1755{
1756 // C++ streams may not set errno in the even of a failure. However if we
1757 // set it to 0 before each operation and it gets set during the operation,
1758 // then we can use its value in order to determine what happened.
1759 errno = 0;
1760 ifstream f (fname.c_str ());
1761 if (! f.good ())
1762 {
1763 clog << _F("Unable to open file '%s' for reading: ", fname.c_str());
1764 goto error;
1765 }
1766
1767 // Stream the data
1768
1769 // NB: o << f.rdbuf() misbehaves for some reason, appearing to close o,
1770 // which is unfortunate if o == clog or cout.
1771 while (1)
1772 {
1773 errno = 0;
1774 int c = f.get();
1775 if (f.eof ()) return 0; // normal exit
1776 if (! f.good()) break;
1777 o.put(c);
1778 if (! o.good()) break;
1779 }
1780
1781 // NB: not necessary to f.close ();
1782
1783 error:
1784 if (errno)
1785 clog << strerror (errno) << endl;
1786 else
1787 clog << _("unknown error") << endl;
1788 return 1; // Failure
1789}
1790
f199d198
SM
1791int
1792not_isspace(unsigned char c)
1793{
1794 return !std::isspace(c);
1795}
1796
1f4b9e55
DS
1797// trim from start (in place)
1798void
1799ltrim(std::string &s)
1800{
f199d198 1801 s.erase(s.begin(), std::find_if(s.begin(), s.end(), not_isspace));
1f4b9e55
DS
1802}
1803
1804// trim from end (in place)
1805void
1806rtrim(std::string &s)
1807{
f199d198 1808 s.erase(std::find_if(s.rbegin(), s.rend(), not_isspace).base(), s.end());
1f4b9e55
DS
1809}
1810
1811// trim from both ends (in place)
1812void
1813trim(std::string &s)
1814{
1815 ltrim(s);
1816 rtrim(s);
1817}
1818
109c21b7
DS
1819// Tries to determine the name and version of the running Linux OS
1820// distribution. Fills in the 'info' argument with (name, version)
1821// strings. Returns true if it was able to retrieve the information.
1822bool
1823get_distro_info(vector<string> &info)
1824{
1825 string name, version;
1826
1827 // Getting the distro name and version is harder than it should
1828 // be. We've got a multi-pronged strategy.
1829 //
1830 // (1) First, try the "lsb_release" executable, which may or may
1831 // not exist on the system.
1832 vector<string> cmd { "lsb_release", "--short", "--id" };
1833 stringstream out;
1834 int rc = stap_system_read(0, cmd, out);
1835 if (rc == 0) {
1836 name = out.str();
1837
1838 vector<string> cmd2 { "lsb_release", "--short", "--release" };
7561738b
DS
1839 stringstream out2;
1840 rc = stap_system_read(0, cmd2, out2);
109c21b7 1841 if (rc == 0) {
7561738b 1842 version = out2.str();
109c21b7
DS
1843 }
1844 }
1845
1846 // (2) Look for the /etc/os-release file.
1847 if (name.empty()) {
1848 ifstream infile;
1849 infile.open("/etc/os-release");
1850 if (infile.is_open()) {
1851 string line;
1852 while (getline(infile, line)) {
1853 vector<string> components;
1854 tokenize(line, components, "=");
75f9ecf4
DS
1855 if (components.empty())
1856 continue;
109c21b7
DS
1857 transform(components[0].begin(), components[0].end(),
1858 components[0].begin(), ::tolower);
1859 if (components[0] == "name") {
75f9ecf4
DS
1860 string::size_type pos = components[1].find(' ');
1861 if (pos == string::npos)
1862 name = components[1];
109c21b7
DS
1863 }
1864 else if (components[0] == "version_id") {
1865 version = components[1];
1866 }
75f9ecf4
DS
1867 else if (components[0] == "id" && name.empty()) {
1868 name = components[1];
1869 }
109c21b7
DS
1870 }
1871 infile.close();
1872 }
1873 }
1874
1875 // (3) Here we could look for /etc/*-release ('redhat', 'arch', 'gentoo',
1876 // etc.) or /etc/*_version ('debian', etc.), if needed.
1877
1878 info.clear();
75f9ecf4 1879 if (! name.empty()) {
1f4b9e55 1880 trim(name);
75f9ecf4 1881 // If the string is quoted, remove the quotes.
7a517c1f 1882 if (*name.begin() == '"') {
75f9ecf4
DS
1883 name.erase(0, 1);
1884 name.erase(name.size() - 1);
1885 trim(name);
1886 }
1887 }
1888 if (! version.empty()) {
1f4b9e55 1889 trim(version);
75f9ecf4 1890 // If the string is quoted, remove the quotes.
7a517c1f 1891 if (*version.begin() == '"') {
75f9ecf4
DS
1892 version.erase(0, 1);
1893 version.erase(version.size() - 1);
1894 trim(version);
1895 }
1896 }
109c21b7
DS
1897 if (! name.empty()) {
1898 info.push_back(name);
1899 info.push_back(version);
1900 return true;
1901 }
1902 return false;
1903}
1904
be7e131b
MC
1905// PR30321: Privilege separation
1906int
1907run_unprivileged(const std::string& build_as, uid_t build_as_uid, gid_t build_as_gid, int verbosity)
1908{
1909 if (build_as == "")
1910 return EXIT_SUCCESS;
1911
1912 int ret;
1913 ret = setregid(build_as_gid, build_as_gid);
1914 if (ret != 0) {
1915 clog << "ERROR: setregid() failed" << endl;
1916 clog << strerror (errno) << endl;
1917 return EXIT_FAILURE;
1918 }
1919 ret = setreuid(build_as_uid, build_as_uid);
1920 if (ret != 0) {
1921 clog << "ERROR: setreuid() failed" << endl;
1922 clog << strerror (errno) << endl;
1923 return EXIT_FAILURE;
1924 }
1925 if (verbosity > 2)
1926 cout << _F("Running passes 1-4 using user \"%s\" userid \"%d\" group id \"%d\"",
1927 build_as.c_str(), build_as_uid, build_as_gid) << "<<<" << endl;
1928 return EXIT_SUCCESS;
1929}
1930
1931
73267b89 1932/* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */
This page took 0.465435 seconds and 6 git commands to generate.