]>
sourceware.org Git - systemtap.git/blob - remote.cxx
1 // systemtap remote execution
2 // Copyright (C) 2010-2011 Red Hat Inc.
4 // This file is part of systemtap, and is free software. You can
5 // redistribute it and/or modify it under the terms of the GNU General
6 // Public License (GPL); either version 2, or (at your option) any
14 #include <sys/types.h>
17 #include <sys/socket.h>
35 // Decode URIs as per RFC 3986, though not bothering to be strict
39 string scheme
, authority
, path
, query
, fragment
;
40 bool has_authority
, has_query
, has_fragment
;
42 uri_decoder(const string
& uri
):
43 uri(uri
), has_authority(false), has_query(false), has_fragment(false)
46 "^([^:]+):(//[^/?#]*)?([^?#]*)(\\?[^#]*)?(#.*)?$";
48 vector
<string
> matches
;
49 if (regexp_match(uri
, re
, matches
) != 0)
50 throw runtime_error(_F("string doesn't appear to be a URI: %s", uri
.c_str()));
54 if (!matches
[2].empty())
57 authority
= matches
[2].substr(2);
62 if (!matches
[4].empty())
65 query
= matches
[4].substr(1);
68 if (!matches
[5].empty())
71 fragment
= matches
[5].substr(1);
77 // loopback target for running locally
78 class direct
: public remote
{
82 direct(systemtap_session
& s
): remote(s
), child(0) {}
86 args
= make_run_command(*s
);
87 if (! staprun_r_arg
.empty()) // PR13354
89 args
.push_back ("-r");
90 args
.push_back (staprun_r_arg
);
92 pid_t pid
= stap_spawn (s
->verbose
, args
);
104 int ret
= stap_waitpid(s
->verbose
, child
);
106 s
->print_warning(_F("%s exited with status: %d", args
.front().c_str(), ret
));
114 virtual ~direct() { finish(); }
118 class stapsh
: public remote
{
123 string remote_version
;
125 virtual void prepare_poll(vector
<pollfd
>& fds
)
127 if (fdout
>= 0 && OUT
)
129 pollfd p
= { fdout
, POLLIN
, 0 };
133 // need to send a signal?
134 if (fdin
>= 0 && IN
&& interrupts_sent
< pending_interrupts
)
136 pollfd p
= { fdin
, POLLOUT
, 0 };
141 virtual void handle_poll(vector
<pollfd
>& fds
)
143 for (unsigned i
=0; i
< fds
.size(); ++i
)
144 if (fds
[i
].fd
== fdin
|| fds
[i
].fd
== fdout
)
148 // need to send a signal?
149 if (fds
[i
].revents
& POLLOUT
&& IN
&&
150 interrupts_sent
< pending_interrupts
)
152 if (send_command("quit\n") == 0)
158 // have data to read?
159 if (fds
[i
].revents
& POLLIN
&& OUT
)
164 // If we have a line prefix, then read lines one at a
165 // time and copy out with the prefix.
167 while (fgets(buf
, sizeof(buf
), OUT
))
168 cout
<< prefix
<< buf
;
174 // Otherwise read an entire block of data at once.
175 size_t rc
= fread(buf
, 1, sizeof(buf
), OUT
);
178 // NB: The buf could contain binary data,
179 // including \0, so write as a block instead of
180 // the usual <<string.
189 if (err
|| fds
[i
].revents
& ~(POLLIN
|POLLOUT
))
196 // Some schemes like unix may have stdout and stderr mushed together.
197 // There shouldn't be anything except dbug messages on stderr before we
198 // actually start running, and there's no get_reply after that. So
199 // we'll just loop and skip those that start with "stapsh:".
201 while (fgets(reply
, sizeof(reply
), OUT
))
203 if (!startswith(reply
, "stapsh:"))
206 // Why not clog here? Well, once things get running we won't be
207 // able to distinguish stdout/err, so trying to fake it here would
208 // be less consistent than just keeping it merged.
212 // Reached EOF, nothing to reply...
216 int send_command(const string
& cmd
)
220 if (fputs(cmd
.c_str(), IN
) < 0 ||
226 int send_file(const string
& filename
, const string
& dest
)
229 FILE* f
= fopen(filename
.c_str(), "r");
234 rc
= fstat(fileno(f
), &fs
);
238 cmd
<< "file " << fs
.st_size
<< " " << dest
<< "\n";
239 rc
= send_command(cmd
.str());
243 while (!rc
&& i
< fs
.st_size
)
246 size_t r
= sizeof(buf
);
247 if (fs
.st_size
- i
< (off_t
)r
)
249 r
= fread(buf
, 1, r
, f
);
254 size_t w
= fwrite(buf
, 1, r
, IN
);
268 string reply
= get_reply();
275 clog
<< _("stapsh file ERROR: no reply") << endl
;
277 clog
<< _F("stapsh file replied %s", reply
.c_str());
285 static string
qpencode(const string
& str
)
288 o
<< setfill('0') << hex
;
289 for (const char* s
= str
.c_str(); *s
; ++s
)
290 if (*s
>= 33 && *s
<= 126 && *s
!= 61)
293 o
<< '=' << setw(2) << (unsigned)(unsigned char) *s
;
298 stapsh(systemtap_session
& s
)
299 : remote(s
), interrupts_sent(0),
300 fdin(-1), fdout(-1), IN(0), OUT(0)
303 virtual int prepare()
307 string localmodule
= s
->tmpdir
+ "/" + s
->module_name
+ ".ko";
308 string remotemodule
= s
->module_name
+ ".ko";
309 if ((rc
= send_file(localmodule
, remotemodule
)))
312 if (file_exists(localmodule
+ ".sgn") &&
313 (rc
= send_file(localmodule
+ ".sgn", remotemodule
+ ".sgn")))
316 if (!s
->uprobes_path
.empty())
318 string remoteuprobes
= basename(s
->uprobes_path
.c_str());
319 if ((rc
= send_file(s
->uprobes_path
, remoteuprobes
)))
322 if (file_exists(s
->uprobes_path
+ ".sgn") &&
323 (rc
= send_file(s
->uprobes_path
+ ".sgn", remoteuprobes
+ ".sgn")))
332 // Send the staprun args
333 // NB: The remote is left to decide its own staprun path
334 ostringstream
run("run", ios::out
| ios::ate
);
335 vector
<string
> cmd
= make_run_command(*s
, ".", remote_version
);
337 // PR13354: identify our remote index/url
338 if (strverscmp("1.7", remote_version
.c_str()) <= 0 && // -r supported?
339 ! staprun_r_arg
.empty())
341 cmd
.push_back ("-r");
342 cmd
.push_back (staprun_r_arg
);
345 for (unsigned i
= 1; i
< cmd
.size(); ++i
)
346 run
<< ' ' << qpencode(cmd
[i
]);
349 int rc
= send_command(run
.str());
353 string reply
= get_reply();
360 clog
<< _("stapsh run ERROR: no reply") << endl
;
362 clog
<< _F("stapsh run replied %s", reply
.c_str());
369 long flags
= fcntl(fdout
, F_GETFL
) | O_NONBLOCK
;
370 fcntl(fdout
, F_SETFL
, flags
);
373 // If run failed for any reason, then this
374 // connection is effectively dead to us.
383 if (OUT
) fclose(OUT
);
394 void set_child_fds(int in
, int out
)
396 if (fdin
>= 0 || fdout
>= 0 || IN
|| OUT
)
397 throw runtime_error(_("stapsh file descriptors already set"));
401 IN
= fdopen(fdin
, "w");
402 OUT
= fdopen(fdout
, "r");
404 throw runtime_error(_("invalid file descriptors for stapsh"));
406 if (send_command("stap " VERSION
"\n"))
407 throw runtime_error(_("error sending hello to stapsh"));
409 string reply
= get_reply();
411 throw runtime_error(_("error receiving hello from stapsh"));
413 // stapsh VERSION MACHINE RELEASE
414 vector
<string
> uname
;
415 tokenize(reply
, uname
, " \t\r\n");
416 if (uname
.size() != 4 || uname
[0] != "stapsh")
417 throw runtime_error(_("failed to get uname from stapsh"));
419 // We assume that later versions will know how to talk to us.
420 // Looking backward, we use this for make_run_command().
421 this->remote_version
= uname
[1];
423 this->s
= s
->clone(uname
[2], uname
[3]);
427 virtual ~stapsh() { close(); }
431 // direct_stapsh is meant only for testing, as a way to exercise the stapsh
432 // mechanisms without requiring test machines to have actual remote access.
433 class direct_stapsh
: public stapsh
{
437 direct_stapsh(systemtap_session
& s
)
438 : stapsh(s
), child(0)
442 cmd
.push_back(BINDIR
"/stapsh");
443 if (s
.perpass_verbose
[4] > 1)
445 if (s
.perpass_verbose
[4] > 2)
448 // mask signals while we spawn, so we can simulate manual signals to
449 // the "remote" target, as we must for the real ssh_remote case.
451 stap_sigmasker masked
;
452 child
= stap_spawn_piped(s
.verbose
, cmd
, &in
, &out
);
456 throw runtime_error(_("error launching stapsh"));
460 set_child_fds(in
, out
);
462 catch (runtime_error
&)
471 int rc
= stapsh::finish();
475 int rc2
= stap_waitpid(s
->verbose
, child
);
483 virtual ~direct_stapsh() { finish(); }
487 // Connect to an existing stapsh on a unix socket.
488 class unix_stapsh
: public stapsh
{
491 unix_stapsh(systemtap_session
& s
, const uri_decoder
& ud
)
495 server
.sun_family
= AF_UNIX
;
497 throw runtime_error(_("unix target requires a /path"));
498 if (ud
.path
.size() > sizeof(server
.sun_path
) - 1)
499 throw runtime_error(_("unix target /path is too long"));
500 strcpy(server
.sun_path
, ud
.path
.c_str());
502 if (ud
.has_authority
)
503 throw runtime_error(_("unix target doesn't support a hostname"));
505 throw runtime_error(_("unix target URI doesn't support a ?query"));
507 throw runtime_error(_("unix target URI doesn't support a #fragment"));
509 int fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
511 throw runtime_error(_("error opening a socket"));
513 if (connect(fd
, (struct sockaddr
*)&server
, SUN_LEN(&server
)) < 0)
515 const char *msg
= strerror(errno
);
517 throw runtime_error(_F("error connecting to socket: %s", msg
));
520 // Try to dup it, so class stapsh can have truly separate fds for its
521 // fdopen handles. If it fails for some reason, it can still work with
529 set_child_fds(fd
, fd2
);
531 catch (runtime_error
&)
543 virtual ~unix_stapsh() { finish(); }
547 // stapsh-based ssh_remote
548 class ssh_remote
: public stapsh
{
552 ssh_remote(systemtap_session
& s
): stapsh(s
), child(0) {}
554 int connect(const string
& host
, const string
& port
)
559 cmd
.push_back("ssh");
568 // This is crafted so that we get a silent failure with status 127 if
569 // the command is not found. The combination of -P and $cmd ensures
570 // that we pull the command out of the PATH, not aliases or such.
571 string stapsh_cmd
= "cmd=`type -P stapsh || exit 127` && exec \"$cmd\"";
572 if (s
->perpass_verbose
[4] > 1)
573 stapsh_cmd
.append(" -v");
574 if (s
->perpass_verbose
[4] > 2)
575 stapsh_cmd
.append(" -v");
576 // NB: We need to explicitly choose bash, as $SHELL may be weird...
577 cmd
.push_back("/bin/bash -c '" + stapsh_cmd
+ "'");
579 // mask signals while we spawn, so we can manually send even tty
580 // signals *through* ssh rather than to ssh itself
582 stap_sigmasker masked
;
583 child
= stap_spawn_piped(s
->verbose
, cmd
, &in
, &out
);
587 throw runtime_error(_("error launching stapsh"));
591 set_child_fds(in
, out
);
593 catch (runtime_error
&)
597 // ssh itself signals errors with 255
599 throw runtime_error(_("error establishing ssh connection"));
601 // If rc == 127, that's command-not-found, so we let ::create()
602 // try again in legacy mode. But only do this if there's a single
603 // remote, as the old code didn't handle ttys well with multiple
604 // remotes. Otherwise, throw up again. *barf*
605 if (rc
!= 127 || s
->remote_uris
.size() > 1)
614 int rc
= stapsh::finish();
618 int rc2
= stap_waitpid(s
->verbose
, child
);
625 static remote
* create(systemtap_session
& s
, const string
& host
);
626 static remote
* create(systemtap_session
& s
, const uri_decoder
& ud
);
628 virtual ~ssh_remote() { finish(); }
632 // ssh connection without stapsh, for legacy stap installations
633 // NB: ssh commands use a tty (-t) so signals are passed along to the remote.
634 // It does this by putting the local tty in raw mode, so it only works for tty
635 // signals, and only for a single remote at a time.
636 class ssh_legacy_remote
: public remote
{
638 vector
<string
> ssh_args
, scp_args
;
640 string host
, port
, tmpdir
;
643 ssh_legacy_remote(systemtap_session
& s
, const string
& host
, const string
& port
)
644 : remote(s
), host(host
), port(port
), child(0)
646 open_control_master();
651 catch (runtime_error
&)
653 close_control_master();
658 void open_control_master()
660 static unsigned index
= 0;
662 if (s
->tmpdir
.empty()) // sanity check, shouldn't happen
663 throw runtime_error(_("No tmpdir available for ssh control master"));
665 ssh_control
= s
->tmpdir
+ "/ssh_remote_control_" + lex_cast(++index
);
668 scp_args
.push_back("scp");
669 scp_args
.push_back("-q");
670 scp_args
.push_back("-o");
671 scp_args
.push_back("ControlPath=" + ssh_control
);
675 ssh_args
.push_back(host
);
679 scp_args
.push_back("-P");
680 scp_args
.push_back(port
);
681 ssh_args
.push_back("-p");
682 ssh_args
.push_back(port
);
685 // NB: ssh -f will stay in the foreground until authentication is
686 // complete and the control socket is created, so we know it's ready to
687 // go when stap_system returns.
688 vector
<string
> cmd
= ssh_args
;
692 int rc
= stap_system(s
->verbose
, cmd
);
694 throw runtime_error(_F("failed to create an ssh control master for %s : rc= %d",
698 clog
<< _F("Created ssh control master at %s",
699 lex_cast_qstring(ssh_control
).c_str()) << endl
;
702 void close_control_master()
704 if (ssh_control
.empty())
707 vector
<string
> cmd
= ssh_args
;
709 cmd
.push_back("exit");
710 int rc
= stap_system(s
->verbose
, cmd
, true, true);
712 cerr
<< _F("failed to stop the ssh control master for %s : rc=%d",
713 host
.c_str(), rc
) << endl
;
723 vector
<string
> uname
;
724 vector
<string
> cmd
= ssh_args
;
726 cmd
.push_back("uname -rm");
727 int rc
= stap_system_read(s
->verbose
, cmd
, out
);
729 tokenize(out
.str(), uname
, " \t\r\n");
730 if (uname
.size() != 2)
731 throw runtime_error(_F("failed to get uname from %s : rc= %d", host
.c_str(), rc
));
732 const string
& release
= uname
[0];
733 const string
& arch
= uname
[1];
734 // XXX need to deal with command-line vs. implied arch/release
735 this->s
= s
->clone(arch
, release
);
741 string localmodule
= s
->tmpdir
+ "/" + s
->module_name
+ ".ko";
744 // Make a remote tempdir.
748 vector
<string
> cmd
= ssh_args
;
750 cmd
.push_back("mktemp -d -t stapXXXXXX");
751 rc
= stap_system_read(s
->verbose
, cmd
, out
);
753 tokenize(out
.str(), vout
, "\r\n");
754 if (vout
.size() != 1)
756 cerr
<< _F("failed to make a tempdir on %s : rc=%d",
757 host
.c_str(), rc
) << endl
;
761 tmpmodule
= tmpdir
+ "/" + s
->module_name
+ ".ko";
764 // Transfer the module.
767 vector
<string
> cmd
= scp_args
;
768 cmd
.push_back(localmodule
);
769 cmd
.push_back(host
+ ":" + tmpmodule
);
770 rc
= stap_system(s
->verbose
, cmd
);
772 cerr
<< _F("failed to copy the module to %s : rc=%d",
773 host
.c_str(), rc
) << endl
;
776 // Transfer the module signature.
777 if (rc
== 0 && file_exists(localmodule
+ ".sgn"))
779 vector
<string
> cmd
= scp_args
;
780 cmd
.push_back(localmodule
+ ".sgn");
781 cmd
.push_back(host
+ ":" + tmpmodule
+ ".sgn");
782 rc
= stap_system(s
->verbose
, cmd
);
784 cerr
<< _F("failed to copy the module signature to %s : rc=%d",
785 host
.c_str(), rc
) << endl
;
788 // What about transfering uprobes.ko? In this ssh "legacy" mode, we
789 // don't the remote systemtap version, but -uPATH wasn't added until
790 // 1.4. Rather than risking a getopt error, we'll just assume that
791 // this isn't supported. The remote will just have to provide its own
792 // uprobes.ko in SYSTEMTAP_RUNTIME or already loaded.
794 // Run the module on the remote.
796 vector
<string
> cmd
= ssh_args
;
798 // We don't know the actual version, but all <=1.3 are approx equal.
799 vector
<string
> staprun_cmd
= make_run_command(*s
, tmpdir
, "1.3");
800 staprun_cmd
[0] = "staprun"; // NB: The remote decides its own path
801 // NB: PR13354: we assume legacy installations don't have
802 // staprun -r support, so we ignore staprun_r_arg.
803 cmd
.push_back(cmdstr_join(staprun_cmd
));
804 pid_t pid
= stap_spawn(s
->verbose
, cmd
);
809 cerr
<< _F("failed to run the module on %s : ret=%d",
810 host
.c_str(), pid
) << endl
;
824 rc
= stap_waitpid(s
->verbose
, child
);
830 // Remove the tempdir.
831 // XXX need to make sure this runs even with e.g. CTRL-C exits
832 vector
<string
> cmd
= ssh_args
;
834 cmd
.push_back("rm -r " + cmdstr_quoted(tmpdir
));
835 int rc2
= stap_system(s
->verbose
, cmd
);
837 cerr
<< _F("failed to delete the tempdir on %s : rc=%d",
838 host
.c_str(), rc2
) << endl
;
844 close_control_master();
850 friend class ssh_remote
;
852 virtual ~ssh_legacy_remote()
854 close_control_master();
859 // Try to establish a stapsh connection to the remote, but fallback
860 // to the older mechanism if the command is not found.
862 ssh_remote::create(systemtap_session
& s
, const string
& target
)
864 string port
, host
= target
;
865 size_t i
= host
.find(':');
866 if (i
!= string::npos
)
868 port
= host
.substr(i
+ 1);
872 auto_ptr
<ssh_remote
> p (new ssh_remote(s
));
873 int rc
= p
->connect(host
, port
);
876 else if (rc
== 127) // stapsh command not found
877 return new ssh_legacy_remote(s
, host
, port
); // try legacy instead
882 ssh_remote::create(systemtap_session
& s
, const uri_decoder
& ud
)
884 if (!ud
.has_authority
|| ud
.authority
.empty())
885 throw runtime_error(_("ssh target requires a hostname"));
886 if (!ud
.path
.empty() && ud
.path
!= "/")
887 throw runtime_error(_("ssh target URI doesn't support a /path"));
889 throw runtime_error(_("ssh target URI doesn't support a ?query"));
891 throw runtime_error(_("ssh target URI doesn't support a #fragment"));
893 return create(s
, ud
.authority
);
898 remote::create(systemtap_session
& s
, const string
& uri
, int idx
)
903 if (uri
.find(':') != string::npos
)
905 const uri_decoder
ud(uri
);
907 // An ssh "host:port" is ambiguous with a URI "scheme:path".
908 // So if it looks like a number, just assume ssh.
909 if (!ud
.has_authority
&& !ud
.has_query
&&
910 !ud
.has_fragment
&& !ud
.path
.empty() &&
911 ud
.path
.find_first_not_of("1234567890") == string::npos
)
912 it
= ssh_remote::create(s
, uri
);
913 else if (ud
.scheme
== "direct")
915 else if (ud
.scheme
== "stapsh")
916 it
= new direct_stapsh(s
);
917 else if (ud
.scheme
== "unix")
918 it
= new unix_stapsh(s
, ud
);
919 else if (ud
.scheme
== "ssh")
920 it
= ssh_remote::create(s
, ud
);
922 throw runtime_error(_F("unrecognized URI scheme '%s' in remote: %s",
923 ud
.scheme
.c_str(), uri
.c_str()));
926 // XXX assuming everything else is ssh for now...
927 it
= ssh_remote::create(s
, uri
);
929 catch (std::runtime_error
& e
)
931 cerr
<< e
.what() << " on remote '" << uri
<< "'" << endl
;
935 if (it
&& idx
>= 0) // PR13354: remote metadata for staprun -r IDX:URI
938 r_arg
<< idx
<< ":" << uri
;
939 it
->staprun_r_arg
= r_arg
.str();
946 // This is a poor-man's ppoll, only used by remote::run below. It does not
947 // provide the same guarantee of atomicity as on systems with a true ppoll.
949 // In our use, this would cause trouble if a signal came in any time from the
950 // moment we mask signals to prepare pollfds, to the moment we call poll in
951 // emulation here. If there's no data on any of the pollfds, we will be stuck
952 // waiting indefinitely.
954 // Since this is mainly about responsiveness of CTRL-C cleanup, we'll just
955 // throw in a one-second forced timeout to ensure we have a chance to notice
956 // there was an interrupt without too much delay.
958 ppoll(struct pollfd
*fds
, nfds_t nfds
,
959 const struct timespec
*timeout_ts
,
960 const sigset_t
*sigmask
)
963 int timeout
= (timeout_ts
== NULL
) ? 1000 // don't block forever...
964 : (timeout_ts
->tv_sec
* 1000 + timeout_ts
->tv_nsec
/ 1000000);
965 sigprocmask(SIG_SETMASK
, sigmask
, &origmask
);
966 int rc
= poll(fds
, nfds
, timeout
);
967 sigprocmask(SIG_SETMASK
, &origmask
, NULL
);
973 remote::run(const vector
<remote
*>& remotes
)
975 // NB: the first failure "wins"
978 for (unsigned i
= 0; i
< remotes
.size() && !pending_interrupts
; ++i
)
980 remote
*r
= remotes
[i
];
981 r
->s
->verbose
= r
->s
->perpass_verbose
[4];
982 if (r
->s
->use_remote_prefix
)
983 r
->prefix
= lex_cast(i
) + ": ";
989 for (unsigned i
= 0; i
< remotes
.size() && !pending_interrupts
; ++i
)
991 rc
= remotes
[i
]->start();
996 // mask signals while we're preparing to poll
998 stap_sigmasker masked
;
1000 // polling loop for remotes that have fds to watch
1004 for (unsigned i
= 0; i
< remotes
.size(); ++i
)
1005 remotes
[i
]->prepare_poll (fds
);
1009 rc
= ppoll (&fds
[0], fds
.size(), NULL
, &masked
.old
);
1010 if (rc
< 0 && errno
!= EINTR
)
1013 for (unsigned i
= 0; i
< remotes
.size(); ++i
)
1014 remotes
[i
]->handle_poll (fds
);
1018 for (unsigned i
= 0; i
< remotes
.size(); ++i
)
1020 rc
= remotes
[i
]->finish();
1029 /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */
This page took 0.081562 seconds and 5 git commands to generate.