tapsets.cxx

   1 // tapset resolution
   2 // Copyright (C) 2005-2020 Red Hat Inc.
   3 // Copyright (C) 2005-2007 Intel Corporation.
   4 // Copyright (C) 2008 James.Bottomley@HansenPartnership.com
   5 //
   6 // This file is part of systemtap, and is free software.  You can
   7 // redistribute it and/or modify it under the terms of the GNU General
   8 // Public License (GPL); either version 2, or (at your option) any
   9 // later version.
  10
  11 #include "config.h"
  12 #include "staptree.h"
  13 #include "elaborate.h"
  14 #include "tapsets.h"
  15 #include "task_finder.h"
  16 #include "tapset-dynprobe.h"
  17 #include "translate.h"
  18 #include "session.h"
  19 #include "util.h"
  20 #include "buildrun.h"
  21 #include "dwarf_wrappers.h"
  22 #include "hash.h"
  23 #include "dwflpp.h"
  24 #include "setupdwfl.h"
  25 #include "loc2stap.h"
  26 #include "analysis.h"
  27 #include <gelf.h>
  28
  29 #include "sdt_types.h"
  30 #include "stringtable.h"
  31
  32 #include <cstdlib>
  33 #include <algorithm>
  34 #include <deque>
  35 #include <iostream>
  36 #include <fstream>
  37 #include <map>
  38 #include <set>
  39 #include <sstream>
  40 #include <stdexcept>
  41 #include <vector>
  42 #include <stack>
  43 #include <cstdarg>
  44 #include <cassert>
  45 #include <iomanip>
  46 #include <cerrno>
  47
  48 extern "C" {
  49 #include <fcntl.h>
  50 #include <elfutils/libdwfl.h>
  51 #include <elfutils/libdw.h>
  52 #include <dwarf.h>
  53 #include <elf.h>
  54 #include <obstack.h>
  55 #include <glob.h>
  56 #include <fnmatch.h>
  57 #include <stdio.h>
  58 #include <sys/types.h>
  59 #include <sys/stat.h>
  60 #include <math.h>
  61 #include <regex.h>
  62 #include <unistd.h>
  63
  64 #define __STDC_FORMAT_MACROS
  65 #include <inttypes.h>
  66 }
  67
  68 using namespace std;
  69 using namespace __gnu_cxx;
  70
  71 // for elf.h where PPC64_LOCAL_ENTRY_OFFSET isn't defined
  72 #ifndef PPC64_LOCAL_ENTRY_OFFSET
  73 #define STO_PPC64_LOCAL_BIT    5
  74 #define STO_PPC64_LOCAL_MASK   (7 << STO_PPC64_LOCAL_BIT)
  75 #define PPC64_LOCAL_ENTRY_OFFSET(other)                                 \
  76  (((1 << (((other) & STO_PPC64_LOCAL_MASK) >> STO_PPC64_LOCAL_BIT)) >> 2) << 2)
  77 #endif
  78 // for elf.h where EF_PPC64_ABI isn't defined
  79 #ifndef EF_PPC64_ABI
  80 #define EF_PPC64_ABI 3
  81 #endif
  82
  83 // ------------------------------------------------------------------------
  84
  85 string
  86 common_probe_init (derived_probe* p)
  87 {
  88   assert(p->session_index != (unsigned)-1);
  89   return "(&stap_probes[" + lex_cast(p->session_index) + "])";
  90 }
  91
  92
  93 void
  94 common_probe_entryfn_prologue (systemtap_session& s,
  95                                string statestr, string statestr2, string probe,
  96                                string probe_type, bool overload_processing,
  97                                void (*declaration_callback)(systemtap_session& s, void *data),
  98                                void (*pre_context_callback)(systemtap_session& s, void *data),
  99                                void *callback_data)
 100 {
 101   if (s.runtime_usermode_p())
 102     {
 103       // If session_state() is NULL, then we haven't even initialized shm yet,
 104       // and there's *nothing* for the probe to do.  (even alibi is in shm)
 105       // So failure skips this whole block through the end of the epilogue.
 106       s.op->newline() << "if (likely(session_state())) {";
 107       s.op->indent(1);
 108     }
 109
 110   s.op->newline() << "#ifdef STP_ALIBI";
 111   s.op->newline() << "atomic_inc(probe_alibi(" << probe << "->index));";
 112   s.op->newline() << "#else";
 113
 114   if (s.runtime_usermode_p())
 115     s.op->newline() << "int _stp_saved_errno = errno;";
 116
 117   s.op->newline() << "struct context* __restrict__ c = NULL;";
 118   s.op->newline() << "#if !INTERRUPTIBLE";
 119   s.op->newline() << "unsigned long flags;";
 120   s.op->newline() << "#endif";
 121
 122   s.op->newline() << "#ifdef STP_TIMING";
 123   s.op->newline() << "Stat stat = probe_timing(" << probe << "->index);";
 124   s.op->newline() << "#endif";
 125   if (declaration_callback)
 126     declaration_callback(s, callback_data);
 127   if (overload_processing && !s.runtime_usermode_p())
 128     s.op->newline() << "#if defined(STP_TIMING) || defined(STP_OVERLOAD)";
 129   else
 130     s.op->newline() << "#ifdef STP_TIMING";
 131
 132   if (! s.runtime_usermode_p())
 133     {
 134       s.op->newline() << "#ifdef STP_TIMING_NSECS";
 135       s.op->newline() << "s64 cycles_atstart = ktime_get_ns ();";
 136       s.op->newline() << "#else";
 137       s.op->newline() << "cycles_t cycles_atstart = get_cycles ();";
 138       s.op->newline() << "#endif";
 139     }
 140   else
 141     {
 142     s.op->newline() << "struct timespec timespec_atstart;";
 143     s.op->newline() << "(void)clock_gettime(CLOCK_MONOTONIC, &timespec_atstart);";
 144     }
 145   s.op->newline() << "#endif";
 146
 147   s.op->newline() << "#if !INTERRUPTIBLE";
 148   if (pre_context_callback)
 149     pre_context_callback(s, callback_data);
 150   s.op->newline() << "local_irq_save (flags);";
 151   s.op->newline() << "#endif";
 152
 153   if (! s.runtime_usermode_p())
 154     {
 155       // Check for enough free enough stack space
 156       s.op->newline() << "if (unlikely ((((unsigned long) (& c)) & (THREAD_SIZE-1))"; // free space
 157       s.op->newline(1) << "< (MINSTACKSPACE + sizeof (struct thread_info)))) {"; // needed space
 158       // XXX: may need porting to platforms where task_struct is not
 159       // at bottom of kernel stack NB: see also
 160       // CONFIG_DEBUG_STACKOVERFLOW
 161       s.op->newline() << "atomic_inc (skipped_count());";
 162       s.op->newline() << "#ifdef STP_TIMING";
 163       s.op->newline() << "atomic_inc (skipped_count_lowstack());";
 164       s.op->newline() << "#endif";
 165       s.op->newline() << "goto probe_epilogue;";
 166       s.op->newline(-1) << "}";
 167     }
 168
 169   s.op->newline() << "{";
 170   s.op->newline(1) << "unsigned sess_state = atomic_read (session_state());";
 171   s.op->newline() << "#ifdef DEBUG_PROBES";
 172   s.op->newline() << "_stp_dbug(__FUNCTION__, __LINE__, \"session state: %d, "
 173     "expecting " << statestr << " (%d)"
 174     << (statestr2.empty() ? "" : string(" or ") + statestr2 + " (%d)")
 175     << "\", sess_state, " << statestr
 176     << (statestr2.empty() ? "" : string(", ") + statestr2)  << ");";
 177   s.op->newline() << "#endif";
 178   s.op->newline() << "if (sess_state != " << statestr
 179     << (statestr2.empty() ? "" : string(" && sess_state != ") + statestr2)
 180     << ")";
 181   s.op->newline() << "goto probe_epilogue;";
 182   s.op->newline(-1) << "}";
 183
 184   if (pre_context_callback)
 185     {
 186       s.op->newline() << "#if INTERRUPTIBLE";
 187       pre_context_callback(s, callback_data);
 188       s.op->newline() << "#endif";
 189     }
 190   s.op->newline() << "c = _stp_runtime_entryfn_get_context();";
 191   s.op->newline() << "if (!c) {";
 192   s.op->newline(1) << "#if !INTERRUPTIBLE";
 193   s.op->newline() << "atomic_inc (skipped_count());";
 194   s.op->newline() << "#endif";
 195   s.op->newline() << "#ifdef STP_TIMING";
 196   s.op->newline() << "atomic_inc (skipped_count_reentrant());";
 197   s.op->newline() << "#endif";
 198   s.op->newline() << "goto probe_epilogue;";
 199   s.op->newline(-1) << "}";
 200
 201   s.op->newline();
 202   s.op->newline() << "c->aborted = 0;";
 203   s.op->newline() << "c->locked = 0;";
 204   s.op->newline() << "c->last_stmt = 0;";
 205   s.op->newline() << "c->last_error = 0;";
 206   s.op->newline() << "c->nesting = -1;"; // NB: PR10516 packs locals[] tighter
 207   s.op->newline() << "c->uregs = 0;";
 208   s.op->newline() << "c->kregs = 0;";
 209   s.op->newline() << "c->sregs = 0;";
 210   s.op->newline() << "#if defined __ia64__";
 211   s.op->newline() << "c->unwaddr = 0;";
 212   s.op->newline() << "#endif";
 213   if (s.runtime_usermode_p())
 214     s.op->newline() << "c->probe_index = " << probe << "->index;";
 215   s.op->newline() << "c->probe_point = " << probe << "->pp;";
 216   s.op->newline() << "#ifdef STP_NEED_PROBE_NAME";
 217   s.op->newline() << "c->probe_name = " << probe << "->pn;";
 218   s.op->newline() << "#endif";
 219   s.op->newline() << "c->probe_type = " << probe_type << ";";
 220   // reset Individual Probe State union
 221   s.op->newline() << "memset(&c->ips, 0, sizeof(c->ips));";
 222   s.op->newline() << "c->user_mode_p = 0; c->full_uregs_p = 0; ";
 223   s.op->newline() << "#ifdef STAP_NEED_REGPARM"; // i386 or x86_64 register.stp
 224   s.op->newline() << "c->regparm = 0;";
 225   s.op->newline() << "#endif";
 226
 227   if(!s.suppress_time_limits){
 228     s.op->newline() << "#if INTERRUPTIBLE";
 229     s.op->newline() << "c->actionremaining = MAXACTION_INTERRUPTIBLE;";
 230     s.op->newline() << "#else";
 231     s.op->newline() << "c->actionremaining = MAXACTION;";
 232     s.op->newline() << "#endif";
 233   }
 234   // NB: The following would actually be incorrect.
 235   // That's because cycles_sum/cycles_base values are supposed to survive
 236   // between consecutive probes.  Periodically (STP_OVERLOAD_INTERVAL
 237   // cycles), the values will be reset.
 238   /*
 239   s.op->newline() << "#ifdef STP_OVERLOAD";
 240   s.op->newline() << "c->cycles_sum = 0;";
 241   s.op->newline() << "c->cycles_base = 0;";
 242   s.op->newline() << "#endif";
 243   */
 244
 245   s.op->newline() << "#if defined(STP_NEED_UNWIND_DATA)";
 246   s.op->newline() << "c->uwcache_user.state = uwcache_uninitialized;";
 247   s.op->newline() << "c->uwcache_kernel.state = uwcache_uninitialized;";
 248   s.op->newline() << "#endif";
 249
 250   s.op->newline() << "#if defined(STAP_NEED_CONTEXT_RETURNVAL)";
 251   s.op->newline() << "c->returnval_override_p = 0;";
 252   s.op->newline() << "c->returnval_override = 0;"; // unnecessary
 253   s.op->newline() << "#endif";
 254 }
 255
 256
 257 void
 258 common_probe_entryfn_epilogue (systemtap_session& s,
 259                                bool overload_processing,
 260                                bool schedule_work_safe)
 261 {
 262   if (!s.runtime_usermode_p()
 263       && schedule_work_safe)
 264     {
 265       // If a refresh is required, we can safely schedule_work() here
 266       s.op->newline( 0) <<  "if (atomic_cmpxchg(&need_module_refresh, 1, 0) == 1)";
 267       s.op->newline(+1) <<    "schedule_work(&module_refresher_work);";
 268       s.op->indent(-1);
 269     }
 270
 271   if (overload_processing && !s.runtime_usermode_p())
 272     s.op->newline() << "#if defined(STP_TIMING) || defined(STP_OVERLOAD)";
 273   else
 274     s.op->newline() << "#ifdef STP_TIMING";
 275   s.op->newline() << "{";
 276   s.op->indent(1);
 277   if (! s.runtime_usermode_p())
 278     {
 279       s.op->newline() << "#ifdef STP_TIMING_NSECS";
 280
 281       s.op->newline() << "s64 cycles_atend = ktime_get_ns ();";
 282       // NB: we truncate nsecs to 64 bits.  Perhaps it should be
 283       // fewer, if the hardware counter rolls over really quickly.  We
 284       // handle 64-bit wraparound here.
 285       s.op->newline() << "s64 cycles_elapsed = ((s64)cycles_atend > (s64)cycles_atstart)";
 286       s.op->newline(1) << "? ((s64)cycles_atend - (s64)cycles_atstart)";
 287       s.op->newline() << ": (~(s64)0) - (s64)cycles_atstart + (s64)cycles_atend + 1;";
 288
 289       s.op->newline(-1) << "#else";
 290
 291       s.op->newline() << "cycles_t cycles_atend = get_cycles ();";
 292       // NB: we truncate cycles counts to 32 bits.  Perhaps it should be
 293       // fewer, if the hardware counter rolls over really quickly.  We
 294       // handle 32-bit wraparound here.
 295       s.op->newline() << "int32_t cycles_elapsed = ((int32_t)cycles_atend > (int32_t)cycles_atstart)";
 296       s.op->newline(1) << "? ((int32_t)cycles_atend - (int32_t)cycles_atstart)";
 297       s.op->newline() << ": (~(int32_t)0) - (int32_t)cycles_atstart + (int32_t)cycles_atend + 1;";
 298
 299       s.op->newline() << "#endif";
 300       s.op->indent(-1);
 301     }
 302   else
 303     {
 304       s.op->newline() << "struct timespec timespec_atend, timespec_elapsed;";
 305       s.op->newline() << "long cycles_elapsed;";
 306       s.op->newline() << "(void)clock_gettime(CLOCK_MONOTONIC, &timespec_atend);";
 307       s.op->newline() << "_stp_timespec_sub(&timespec_atend, &timespec_atstart, &timespec_elapsed);";
 308       // 'cycles_elapsed' is really elapsed nanoseconds
 309       s.op->newline() << "cycles_elapsed = (timespec_elapsed.tv_sec * NSEC_PER_SEC) + timespec_elapsed.tv_nsec;";
 310     }
 311
 312   s.op->newline() << "#ifdef STP_TIMING";
 313   // STP_TIMING requires min, max, avg (and thus count and sum), but not variance.
 314   s.op->newline() << "if (likely (stat)) {";
 315   s.op->newline(1) << "preempt_disable();";
 316   s.op->newline() << "_stp_stat_add(stat, cycles_elapsed, 1, 1, 1, 1, 0);";
 317   s.op->newline() << "preempt_enable_no_resched();";
 318   s.op->newline(-1) << "}";
 319   s.op->newline() << "#endif";
 320
 321   if (overload_processing && !s.runtime_usermode_p())
 322     {
 323       s.op->newline() << "#ifdef STP_OVERLOAD";
 324       s.op->newline() << "{";
 325       // If the cycle count has wrapped (cycles_atend > cycles_base),
 326       // let's go ahead and pretend the interval has been reached.
 327       // This should reset cycles_base and cycles_sum.
 328       s.op->newline(1) << "cycles_t interval = (cycles_atend > c->cycles_base)";
 329       s.op->newline(1) << "? (cycles_atend - c->cycles_base)";
 330       s.op->newline() << ": (STP_OVERLOAD_INTERVAL + 1);";
 331       s.op->newline(-1) << "c->cycles_sum += cycles_elapsed;";
 332
 333       // If we've spent more than STP_OVERLOAD_THRESHOLD cycles in a
 334       // probe during the last STP_OVERLOAD_INTERVAL cycles, the probe
 335       // has overloaded the system and we need to quit.
 336       // NB: this is not suppressible via --suppress-runtime-errors,
 337       // because this is a system safety metric that we cannot trust
 338       // unprivileged users to override.
 339       s.op->newline() << "if (interval > STP_OVERLOAD_INTERVAL) {";
 340       s.op->newline(1) << "if (c->cycles_sum > STP_OVERLOAD_THRESHOLD) {";
 341       s.op->newline(1) << "_stp_error (\"probe overhead (%lld cycles) exceeded threshold (%lld cycles) in last"
 342                           " %lld cycles\", (long long) c->cycles_sum, STP_OVERLOAD_THRESHOLD, STP_OVERLOAD_INTERVAL);";
 343       s.op->newline() << "atomic_set (session_state(), STAP_SESSION_ERROR);";
 344       s.op->newline() << "atomic_inc (error_count());";
 345       s.op->newline(-1) << "}";
 346
 347       s.op->newline() << "c->cycles_base = cycles_atend;";
 348       s.op->newline() << "c->cycles_sum = 0;";
 349       s.op->newline(-1) << "}";
 350       s.op->newline(-1) << "}";
 351       s.op->newline() << "#endif";
 352     }
 353
 354   s.op->newline(-1) << "}";
 355   s.op->newline() << "#endif";
 356
 357   s.op->newline() << "c->probe_point = 0;"; // vacated
 358   s.op->newline() << "#ifdef STP_NEED_PROBE_NAME";
 359   s.op->newline() << "c->probe_name = 0;";
 360   s.op->newline() << "#endif";
 361   s.op->newline() << "c->probe_type = 0;";
 362
 363
 364   s.op->newline() << "if (unlikely (c->last_error)) {";
 365   s.op->indent(1);
 366   if (s.suppress_handler_errors) // PR 13306
 367     {
 368       s.op->newline() << "atomic_inc (error_count());";
 369     }
 370   else
 371     {
 372       s.op->newline() << "if (c->last_stmt != NULL)";
 373       s.op->newline(1) << "_stp_softerror (\"%s near %s\", c->last_error, c->last_stmt);";
 374       s.op->newline(-1) << "else";
 375       s.op->newline(1) << "_stp_softerror (\"%s\", c->last_error);";
 376       s.op->indent(-1);
 377       s.op->newline() << "atomic_inc (error_count());";
 378       s.op->newline() << "if (atomic_read (error_count()) > MAXERRORS) {";
 379       s.op->newline(1) << "atomic_set (session_state(), STAP_SESSION_ERROR);";
 380       s.op->newline() << "_stp_exit ();";
 381       s.op->newline(-1) << "}";
 382     }
 383
 384   s.op->newline(-1) << "}";
 385
 386
 387   s.op->newline(-1) << "probe_epilogue:"; // context is free
 388   s.op->indent(1);
 389
 390   if (! s.suppress_handler_errors) // PR 13306
 391     {
 392       // Check for excessive skip counts.
 393       s.op->newline() << "if (unlikely (atomic_read (skipped_count()) > MAXSKIPPED)) {";
 394       s.op->newline(1) << "if (unlikely (pseudo_atomic_cmpxchg(session_state(), STAP_SESSION_RUNNING, STAP_SESSION_ERROR) == STAP_SESSION_RUNNING))";
 395       s.op->newline() << "_stp_error (\"Skipped too many probes, check MAXSKIPPED or try again with stap -t for more details.\");";
 396       s.op->newline(-1) << "}";
 397     }
 398
 399   // We mustn't release the context until after all _stp_error(), so dyninst
 400   // mode can still access the log buffers stored therein.
 401   s.op->newline() << "_stp_runtime_entryfn_put_context(c);";
 402
 403   s.op->newline() << "#if !INTERRUPTIBLE";
 404   s.op->newline() << "local_irq_restore (flags);";
 405   s.op->newline() << "#endif";
 406
 407   if (s.runtime_usermode_p())
 408     {
 409       s.op->newline() << "errno = _stp_saved_errno;";
 410     }
 411
 412   s.op->newline() << "#endif // STP_ALIBI";
 413
 414   if (s.runtime_usermode_p())
 415     s.op->newline(-1) << "}";
 416 }
 417
 418
 419 // ------------------------------------------------------------------------
 420
 421 // ------------------------------------------------------------------------
 422 // kprobes (both dwarf based and non-dwarf based) probes
 423 // ------------------------------------------------------------------------
 424
 425
 426 struct generic_kprobe_derived_probe: public derived_probe
 427 {
 428   generic_kprobe_derived_probe(probe *base,
 429                                probe_point *location,
 430                                interned_string module,
 431                                interned_string section,
 432                                Dwarf_Addr addr,
 433                                bool has_return,
 434                                bool has_maxactive = false,
 435                                int64_t maxactive_val = 0,
 436                                interned_string symbol_name = "",
 437                                Dwarf_Addr offset = 0);
 438
 439   virtual void join_group(systemtap_session&) = 0;
 440
 441   interned_string module;
 442   interned_string section;
 443   Dwarf_Addr addr;
 444   bool has_return;
 445   bool has_maxactive;
 446   int64_t maxactive_val;
 447
 448   // PR18889: For modules, we have to probe using "symbol+offset"
 449   // instead of using an address, otherwise we can't probe the init
 450   // section. 'symbol_name' is the closest known symbol to 'addr' and
 451   // 'offset' is the offset from the symbol.
 452   interned_string symbol_name;
 453   Dwarf_Addr offset;
 454
 455   unsigned saved_longs, saved_strings;
 456   generic_kprobe_derived_probe* entry_handler;
 457
 458   std::string args_for_bpf() const;
 459   interned_string sym_name_for_bpf;
 460 };
 461
 462 generic_kprobe_derived_probe::generic_kprobe_derived_probe(probe *base,
 463                                                            probe_point *location,
 464                                                            interned_string module,
 465                                                            interned_string section,
 466                                                            Dwarf_Addr addr,
 467                                                            bool has_return,
 468                                                            bool has_maxactive,
 469                                                            int64_t maxactive_val,
 470                                                            interned_string symbol_name,
 471                                                            Dwarf_Addr offset) :
 472   derived_probe (base, location, true /* .components soon rewritten */ ),
 473   module(module), section(section), addr(addr), has_return(has_return),
 474   has_maxactive(has_maxactive), maxactive_val(maxactive_val),
 475   symbol_name(symbol_name), offset(offset),
 476   saved_longs(0), saved_strings(0), entry_handler(0)
 477 {
 478 }
 479
 480 // ------------------------------------------------------------------------
 481 //  Dwarf derived probes.  "We apologize for the inconvience."
 482 // ------------------------------------------------------------------------
 483
 484 static const string TOK_KERNEL("kernel");
 485 static const string TOK_MODULE("module");
 486 static const string TOK_FUNCTION("function");
 487 static const string TOK_INLINE("inline");
 488 static const string TOK_CALL("call");
 489 static const string TOK_EXPORTED("exported");
 490 static const string TOK_RETURN("return");
 491 static const string TOK_MAXACTIVE("maxactive");
 492 static const string TOK_STATEMENT("statement");
 493 static const string TOK_ABSOLUTE("absolute");
 494 static const string TOK_PROCESS("process");
 495 static const string TOK_PROVIDER("provider");
 496 static const string TOK_MARK("mark");
 497 static const string TOK_TRACE("trace");
 498 static const string TOK_LABEL("label");
 499 static const string TOK_LIBRARY("library");
 500 static const string TOK_PLT("plt");
 501 static const string TOK_METHOD("method");
 502 static const string TOK_CLASS("class");;
 503 static const string TOK_CALLEE("callee");;
 504 static const string TOK_CALLEES("callees");;
 505 static const string TOK_NEAREST("nearest");;
 506
 507
 508
 509 struct dwarf_query; // forward decl
 510
 511 static int query_cu (Dwarf_Die * cudie, dwarf_query *q);
 512 static void query_addr(Dwarf_Addr addr, dwarf_query *q);
 513 static void query_plt_statement(dwarf_query *q);
 514
 515 struct
 516 symbol_table
 517 {
 518   module_info *mod_info;        // associated module
 519   unordered_multimap<interned_string, func_info*> map_by_name;
 520   multimap<Dwarf_Addr, func_info*> map_by_addr;
 521   unordered_map<interned_string, Dwarf_Addr> globals;
 522   unordered_map<interned_string, Dwarf_Addr> locals;
 523   // Section describing function descriptors.
 524   // Set to SHN_UNDEF if there is no such section.
 525   GElf_Word opd_section;
 526   void add_symbol(interned_string name, bool weak, bool descriptor,
 527                   Dwarf_Addr addr, Dwarf_Addr entrypc);
 528   enum info_status get_from_elf();
 529   void prepare_section_rejection(Dwfl_Module *mod);
 530   bool reject_section(GElf_Word section);
 531   void purge_syscall_stubs();
 532   set <func_info*> lookup_symbol(interned_string name);
 533   set <Dwarf_Addr> lookup_symbol_address(interned_string name);
 534   func_info *get_func_containing_address(Dwarf_Addr addr);
 535   func_info *get_first_func();
 536
 537   symbol_table(module_info *mi) : mod_info(mi), opd_section(SHN_UNDEF) {}
 538   ~symbol_table();
 539 };
 540
 541 static bool null_die(Dwarf_Die *die)
 542 {
 543   static Dwarf_Die null;
 544   return (!die || !memcmp(die, &null, sizeof(null)));
 545 }
 546
 547
 548 enum
 549 function_spec_type
 550   {
 551     function_alone,
 552     function_and_file,
 553     function_file_and_line
 554   };
 555
 556
 557 struct dwarf_builder;
 558 struct dwarf_var_expanding_visitor;
 559
 560
 561 // XXX: This class is a candidate for subclassing to separate
 562 // the relocation vs non-relocation variants.  Likewise for
 563 // kprobe vs kretprobe variants.
 564
 565 struct dwarf_derived_probe: public generic_kprobe_derived_probe
 566 {
 567   dwarf_derived_probe (interned_string function,
 568                        interned_string filename,
 569                        int line,
 570                        interned_string module,
 571                        interned_string section,
 572                        Dwarf_Addr dwfl_addr,
 573                        Dwarf_Addr addr,
 574                        dwarf_query & q,
 575                        Dwarf_Die* scope_die,
 576                        interned_string symbol_name = "",
 577                        Dwarf_Addr offset = 0);
 578
 579   interned_string path;
 580   bool has_process;
 581   bool has_library;
 582   // generic_kprobe_derived_probe_group::emit_module_decls uses this to emit sdt kprobe definition
 583   interned_string user_path;
 584   interned_string user_lib;
 585   bool access_vars;
 586
 587   void printsig (std::ostream &o) const;
 588   void printsig_nonest (std::ostream &o) const;
 589   virtual void join_group (systemtap_session& s);
 590   void emit_probe_local_init(systemtap_session& s, translator_output * o);
 591   void getargs(std::list<std::string> &arg_set) const;
 592
 593   void emit_privilege_assertion (translator_output*);
 594   void print_dupe_stamp(ostream& o);
 595
 596   // Pattern registration helpers.
 597   static void register_statement_variants(match_node * root,
 598                                          dwarf_builder * dw,
 599                                          privilege_t privilege);
 600   static void register_function_variants(match_node * root,
 601                                         dwarf_builder * dw,
 602                                         privilege_t privilege);
 603   static void register_function_and_statement_variants(systemtap_session& s,
 604                                                       match_node * root,
 605                                                       dwarf_builder * dw,
 606                                                       privilege_t privilege);
 607   static void register_sdt_variants(systemtap_session& s,
 608                                    match_node * root,
 609                                    dwarf_builder * dw);
 610   static void register_plt_variants(systemtap_session& s,
 611                                    match_node * root,
 612                                    dwarf_builder * dw);
 613   static void register_patterns(systemtap_session& s);
 614
 615 protected:
 616   dwarf_derived_probe(probe *base,
 617                       probe_point *location,
 618                       Dwarf_Addr addr,
 619                       bool has_return):
 620     generic_kprobe_derived_probe(base, location, "", "", addr, has_return),
 621     has_process(0), has_library(0),
 622     access_vars(false)
 623   {}
 624
 625 private:
 626   list<string> args;
 627   void saveargs(dwarf_query& q, Dwarf_Die* scope_die, Dwarf_Addr dwfl_addr);
 628 };
 629
 630
 631 struct uprobe_derived_probe: public dwarf_derived_probe
 632 {
 633   int pid; // 0 => unrestricted
 634
 635   interned_string build_id_val;
 636   GElf_Addr build_id_vaddr;
 637
 638   uprobe_derived_probe (interned_string function,
 639                         interned_string filename,
 640                         int line,
 641                         interned_string module,
 642                         interned_string section,
 643                         Dwarf_Addr dwfl_addr,
 644                         Dwarf_Addr addr,
 645                         dwarf_query & q,
 646                         Dwarf_Die* scope_die);
 647
 648   // alternate constructor for process(PID).statement(ADDR).absolute
 649   uprobe_derived_probe (probe *base,
 650                         probe_point *location,
 651                         int pid,
 652                         Dwarf_Addr addr,
 653                         bool has_return):
 654     dwarf_derived_probe(base, location, addr, has_return), pid(pid)
 655   {}
 656
 657   void join_group (systemtap_session& s);
 658
 659   void emit_privilege_assertion (translator_output*);
 660   void print_dupe_stamp(ostream& o) { print_dupe_stamp_unprivileged_process_owner (o); }
 661   void getargs(std::list<std::string> &arg_set) const;
 662   void saveargs(int nargs);
 663   void emit_perf_read_handler(systemtap_session& s, unsigned i);
 664
 665 private:
 666   list<string> args;
 667 };
 668
 669 struct generic_kprobe_derived_probe_group: public derived_probe_group
 670 {
 671   friend bool sort_for_bpf(systemtap_session& s,
 672                            generic_kprobe_derived_probe_group *ge,
 673                            sort_for_bpf_probe_arg_vector &v);
 674
 675 private:
 676   unordered_multimap<interned_string,generic_kprobe_derived_probe*> probes_by_module;
 677
 678 public:
 679   generic_kprobe_derived_probe_group() {}
 680   void enroll (generic_kprobe_derived_probe* probe);
 681   void emit_module_decls (systemtap_session& s);
 682   void emit_module_init (systemtap_session& s);
 683   void emit_module_refresh (systemtap_session& s);
 684   void emit_module_exit (systemtap_session& s);
 685   bool otf_supported (systemtap_session&) { return true; }
 686
 687   // workqueue handling not safe in kprobes context
 688   bool otf_safe_context (systemtap_session&) { return false; }
 689 };
 690
 691 // Helper struct to thread through the dwfl callbacks.
 692 struct base_query
 693 {
 694   base_query(dwflpp & dw, literal_map_t const & params);
 695   base_query(dwflpp & dw, interned_string module_val);
 696   virtual ~base_query() {}
 697
 698   systemtap_session & sess;
 699   dwflpp & dw;
 700
 701   // Used to keep track of which modules were visited during
 702   // iterate_over_modules()
 703   set<string> visited_modules;
 704
 705   // Parameter extractors.
 706   static bool has_null_param(literal_map_t const & params,
 707                              interned_string k);
 708   static bool get_string_param(literal_map_t const & params,
 709                                interned_string k, interned_string &v);
 710   static bool get_number_param(literal_map_t const & params,
 711                                interned_string k, int64_t & v);
 712   static bool get_number_param(literal_map_t const & params,
 713                                interned_string k, Dwarf_Addr & v);
 714   static void query_library_callback (base_query *me, const char *data);
 715   static void query_plt_callback (base_query *me, const char *link, size_t addr);
 716   virtual void query_library (const char *data) = 0;
 717   virtual void query_plt (const char *link, size_t addr) = 0;
 718
 719
 720   // Extracted parameters.
 721   bool has_kernel;
 722   bool has_module;
 723   bool has_process;
 724   bool has_library;
 725   bool has_plt;
 726   bool has_statement;
 727   interned_string  module_val;   // has_kernel => module_val = "kernel"
 728   interned_string  path;         // executable path if module is a .so
 729   interned_string  plt_val;      // has_plt => plt wildcard
 730   interned_string  build_id_val; // if non-empty, buildid that resulted in resolved path
 731   int64_t pid_val;
 732
 733   virtual void handle_query_module() = 0;
 734 };
 735
 736 base_query::base_query(dwflpp & dw, literal_map_t const & params):
 737   sess(dw.sess), dw(dw),
 738   has_kernel(false), has_module(false), has_process(false),
 739   has_library(false), has_plt(false), has_statement(false),
 740   pid_val(0)
 741 {
 742   has_kernel = has_null_param (params, TOK_KERNEL);
 743   if (has_kernel)
 744     module_val = "kernel";
 745
 746   has_module = get_string_param (params, TOK_MODULE, module_val);
 747   if (has_module)
 748     has_process = false;
 749   else
 750     {
 751       interned_string library_name;
 752       Dwarf_Addr statement_num_val;
 753       has_process =  derived_probe_builder::has_param(params, TOK_PROCESS);
 754       has_library = get_string_param (params, TOK_LIBRARY, library_name);
 755       if ((has_plt = has_null_param (params, TOK_PLT)))
 756         plt_val = "*";
 757       else has_plt = get_string_param (params, TOK_PLT, plt_val);
 758       has_statement = get_number_param(params, TOK_STATEMENT, statement_num_val);
 759
 760       if (has_process)
 761         {
 762           if (get_number_param(params, TOK_PROCESS, pid_val))
 763             {
 764               // check that the pid given corresponds to a running process
 765               string pid_err_msg;
 766               if (!is_valid_pid(pid_val, pid_err_msg))
 767                 throw SEMANTIC_ERROR(pid_err_msg);
 768
 769               string pid_path = string("/proc/") + lex_cast(pid_val) + "/exe";
 770               module_val = sess.sysroot + pid_path;
 771             }
 772           else
 773             {
 774               // reset the pid_val in case anything weird got written into it
 775               pid_val = 0;
 776               get_string_param(params, TOK_PROCESS, module_val);
 777
 778               if (is_build_id(module_val))
 779                 build_id_val = module_val;
 780             }
 781           module_val = find_executable (module_val, sess.sysroot, sess.sysenv);
 782           if (!is_fully_resolved(module_val, "", sess.sysenv))
 783             throw SEMANTIC_ERROR(_F("cannot find executable '%s'",
 784                                     module_val.to_string().c_str()));
 785         }
 786
 787       // Library probe? Let's target that instead if it is fully resolved (such
 788       // as what query_one_library() would have done for us). Otherwise, we
 789       // resort to iterate_over_libraries().
 790       if (has_library)
 791         {
 792           string library = find_executable (library_name, sess.sysroot,
 793                                             sess.sysenv, "LD_LIBRARY_PATH");
 794           if (is_fully_resolved(library, "", sess.sysenv, "LD_LIBRARY_PATH"))
 795             {
 796               path = path_remove_sysroot(sess, module_val);
 797               module_val = library;
 798             }
 799         }
 800     }
 801
 802   assert (has_kernel || has_process || has_module);
 803 }
 804
 805 base_query::base_query(dwflpp & dw, interned_string module_val)
 806   : sess(dw.sess), dw(dw),
 807     has_kernel(false), has_module(false), has_process(false),
 808     has_library(false), has_plt(false), has_statement(false),
 809     module_val(module_val), pid_val(0)
 810 {
 811   // NB: This uses '/' to distinguish between kernel modules and userspace,
 812   // which means that userspace modules won't get any PATH searching.
 813   if (module_val.find('/') == string::npos)
 814     {
 815       has_kernel = (module_val == TOK_KERNEL);
 816       has_module = !has_kernel;
 817       has_process = false;
 818     }
 819   else
 820     {
 821       has_kernel = has_module = false;
 822       has_process = true;
 823     }
 824 }
 825
 826 bool
 827 base_query::has_null_param(literal_map_t const & params,
 828                            interned_string k)
 829 {
 830   return derived_probe_builder::has_null_param(params, k);
 831 }
 832
 833
 834 bool
 835 base_query::get_string_param(literal_map_t const & params,
 836                              interned_string k, interned_string & v)
 837 {
 838   return derived_probe_builder::get_param (params, k, v);
 839 }
 840
 841
 842 bool
 843 base_query::get_number_param(literal_map_t const & params,
 844                              interned_string k, int64_t & v)
 845 {
 846   return derived_probe_builder::get_param (params, k, v);
 847 }
 848
 849
 850 bool
 851 base_query::get_number_param(literal_map_t const & params,
 852                              interned_string k, Dwarf_Addr & v)
 853 {
 854   int64_t value = 0;
 855   bool present = derived_probe_builder::get_param (params, k, value);
 856   if (present)
 857     v = (Dwarf_Addr) value;
 858   return present;
 859 }
 860
 861 struct dwarf_query : public base_query
 862 {
 863   dwarf_query(probe * base_probe,
 864               probe_point * base_loc,
 865               dwflpp & dw,
 866               literal_map_t const & params,
 867               vector<derived_probe *> & results,
 868               interned_string user_path,
 869               interned_string user_lib);
 870
 871   vector<derived_probe *> & results;
 872   set<interned_string> inlined_non_returnable; // function names
 873   probe * base_probe;
 874   probe_point * base_loc;
 875   interned_string user_path;
 876   interned_string user_lib;
 877
 878   set<string> visited_libraries;
 879   bool resolved_library;
 880
 881   virtual void handle_query_module();
 882   void query_module_dwarf();
 883   void query_module_symtab();
 884   void query_library (const char *data);
 885   void query_plt (const char *entry, size_t addr);
 886
 887   void add_probe_point(interned_string funcname,
 888                        interned_string filename,
 889                        int line,
 890                        Dwarf_Die *scope_die,
 891                        Dwarf_Addr addr);
 892
 893   void mount_well_formed_probe_point();
 894   void unmount_well_formed_probe_point();
 895   stack<pair<probe_point*, probe*> > previous_bases;
 896
 897   void replace_probe_point_component_arg(interned_string functor,
 898                                          interned_string new_functor,
 899                                          int64_t new_arg,
 900                                          bool hex = false);
 901   void replace_probe_point_component_arg(interned_string functor,
 902                                          int64_t new_arg,
 903                                          bool hex = false);
 904   void replace_probe_point_component_arg(interned_string functor,
 905                                          interned_string new_functor,
 906                                          interned_string new_arg);
 907   void replace_probe_point_component_arg(interned_string functor,
 908                                          interned_string new_arg);
 909   void remove_probe_point_component(interned_string functor);
 910
 911   // Track addresses we've already seen in a given module
 912   set<Dwarf_Addr> alias_dupes;
 913
 914   // Track inlines we've already seen as well
 915   // NB: this can't be compared just by entrypc, as inlines can overlap
 916   set<inline_instance_info> inline_dupes;
 917
 918   // Used in .callee[s] probes, when calling iterate_over_callees() (which
 919   // provides the actual stack). Retains the addrs of the callers unwind addr
 920   // where the callee is found. Specifies multiple callers. E.g. when a callee
 921   // at depth 2 is found, callers[1] has the addr of the caller, and callers[0]
 922   // has the addr of the caller's caller.
 923   stack<Dwarf_Addr> *callers;
 924
 925   bool has_function_str;
 926   bool has_statement_str;
 927   bool has_function_num;
 928   bool has_statement_num;
 929   interned_string statement_str_val;
 930   interned_string function_str_val;
 931   Dwarf_Addr statement_num_val;
 932   Dwarf_Addr function_num_val;
 933
 934   bool has_call;
 935   bool has_exported;
 936   bool has_inline;
 937   bool has_return;
 938
 939   bool has_nearest;
 940
 941   bool has_maxactive;
 942   int64_t maxactive_val;
 943
 944   bool has_label;
 945   interned_string label_val;
 946
 947   bool has_callee;
 948   interned_string callee_val;
 949
 950   bool has_callees_num;
 951   int64_t callees_num_val;
 952
 953   bool has_absolute;
 954
 955   bool has_mark;
 956
 957   void parse_function_spec(const string & spec);
 958   function_spec_type spec_type;
 959   vector<string> scopes;
 960   interned_string function;
 961   interned_string file;
 962   lineno_t lineno_type;
 963   vector<int> linenos;
 964
 965   // Holds the prologue end of the current function
 966   Dwarf_Addr prologue_end;
 967
 968   set<string> filtered_srcfiles;
 969
 970   // Map official entrypc -> func_info object
 971   inline_instance_map_t filtered_inlines;
 972   func_info_map_t filtered_functions;
 973
 974   // Helper when we want to iterate over both
 975   base_func_info_map_t filtered_all();
 976
 977   void query_module_functions ();
 978
 979   interned_string final_function_name(interned_string final_func,
 980                                       interned_string final_file,
 981                                       int final_line);
 982
 983   bool is_fully_specified_function();
 984 };
 985
 986 uprobe_derived_probe::uprobe_derived_probe (interned_string function,
 987                         interned_string filename,
 988                         int line,
 989                         interned_string module,
 990                         interned_string section,
 991                         Dwarf_Addr dwfl_addr,
 992                         Dwarf_Addr addr,
 993                         dwarf_query & q,
 994                         Dwarf_Die* scope_die):
 995     dwarf_derived_probe(function, filename, line, module, section,
 996                         dwfl_addr, addr, q, scope_die),
 997     pid(q.pid_val), build_id_vaddr(0)
 998   {
 999     // Process parameter is given as a build-id
1000     if (q.build_id_val.size() > 0)
1001       {
1002         const unsigned char *bits;
1003         int len;
1004         GElf_Addr vaddr;
1005
1006         len = dwfl_module_build_id(q.dw.module, &bits, &vaddr);
1007         if (len > 0)
1008           {
1009             Dwarf_Addr reloc_vaddr = vaddr;
1010
1011             len = dwfl_module_relocate_address(q.dw.module, &reloc_vaddr);
1012             DWFL_ASSERT ("dwfl_module_relocate_address reloc_vaddr", len >= 0);
1013
1014             build_id_vaddr = reloc_vaddr;
1015             build_id_val = q.build_id_val;
1016           }
1017       }
1018   }
1019
1020 static void delete_session_module_cache (systemtap_session& s); // forward decl
1021
1022 struct dwarf_builder: public derived_probe_builder
1023 {
1024   map <string,dwflpp*> kern_dw; /* NB: key string could be a wildcard */
1025   map <string,dwflpp*> user_dw;
1026   interned_string user_path;
1027   interned_string user_lib;
1028
1029   // Holds modules to suggest functions from. NB: aggregates over
1030   // recursive calls to build() when deriving globby probes.
1031   set <string> modules_seen;
1032
1033   dwarf_builder() {}
1034
1035   dwflpp *get_kern_dw(systemtap_session& sess, const string& module, bool debuginfo_needed = true)
1036   {
1037     if (kern_dw[module] == 0)
1038       kern_dw[module] = new dwflpp(sess, module, true, debuginfo_needed); // might throw
1039     return kern_dw[module];
1040   }
1041
1042   dwflpp *get_user_dw(systemtap_session& sess, const string& module)
1043   {
1044     if (user_dw[module] == 0)
1045       user_dw[module] = new dwflpp(sess, module, false); // might throw
1046     return user_dw[module];
1047   }
1048
1049   /* NB: not virtual, so can be called from dtor too: */
1050   void dwarf_build_no_more (bool)
1051   {
1052     delete_map(kern_dw);
1053     delete_map(user_dw);
1054   }
1055
1056   void build_no_more (systemtap_session &s)
1057   {
1058     dwarf_build_no_more (s.verbose > 3);
1059     delete_session_module_cache (s);
1060   }
1061
1062   ~dwarf_builder()
1063   {
1064     dwarf_build_no_more (false);
1065   }
1066
1067   virtual void build(systemtap_session & sess,
1068                      probe * base,
1069                      probe_point * location,
1070                      literal_map_t const & parameters,
1071                      vector<derived_probe *> & finished_results);
1072
1073   virtual string name() { return "DWARF builder"; }
1074 };
1075
1076
1077 dwarf_query::dwarf_query(probe * base_probe,
1078                          probe_point * base_loc,
1079                          dwflpp & dw,
1080                          literal_map_t const & params,
1081                          vector<derived_probe *> & results,
1082                          interned_string user_path,
1083                          interned_string user_lib)
1084   : base_query(dw, params), results(results), base_probe(base_probe),
1085     base_loc(base_loc), user_path(user_path), user_lib(user_lib),
1086     resolved_library(false), callers(NULL),
1087     has_function_str(false), has_statement_str(false),
1088     has_function_num(false), has_statement_num(false),
1089     statement_num_val(0), function_num_val(0),
1090     has_call(false), has_exported(false), has_inline(false),
1091     has_return(false), has_nearest(false),
1092     has_maxactive(false), maxactive_val(0),
1093     has_label(false), has_callee(false),
1094     has_callees_num(false), callees_num_val(0),
1095     has_absolute(false), has_mark(false),
1096     spec_type(function_alone),
1097     lineno_type(ABSOLUTE),
1098     prologue_end(0)
1099 {
1100   // Reduce the query to more reasonable semantic values (booleans,
1101   // extracted strings, numbers, etc).
1102   has_function_str = get_string_param(params, TOK_FUNCTION, function_str_val);
1103   has_function_num = get_number_param(params, TOK_FUNCTION, function_num_val);
1104
1105   has_statement_str = get_string_param(params, TOK_STATEMENT, statement_str_val);
1106   has_statement_num = get_number_param(params, TOK_STATEMENT, statement_num_val);
1107
1108   has_label = get_string_param(params, TOK_LABEL, label_val);
1109   has_callee = get_string_param(params, TOK_CALLEE, callee_val);
1110   if (has_null_param(params, TOK_CALLEES))
1111     { // .callees ==> .callees(1) (also equivalent to .callee("*"))
1112       has_callees_num = true;
1113       callees_num_val = 1;
1114     }
1115   else
1116     {
1117       has_callees_num = get_number_param(params, TOK_CALLEES, callees_num_val);
1118       if (has_callees_num && callees_num_val < 1)
1119         throw SEMANTIC_ERROR(_(".callees(N) only acceptable for N >= 1"),
1120                              base_probe->tok);
1121     }
1122
1123   has_call = has_null_param(params, TOK_CALL);
1124   has_exported = has_null_param(params, TOK_EXPORTED);
1125   has_inline = has_null_param(params, TOK_INLINE);
1126   has_return = has_null_param(params, TOK_RETURN);
1127   has_nearest = has_null_param(params, TOK_NEAREST);
1128   has_maxactive = get_number_param(params, TOK_MAXACTIVE, maxactive_val);
1129   has_absolute = has_null_param(params, TOK_ABSOLUTE);
1130   has_mark = false;
1131
1132   if (has_function_str)
1133     parse_function_spec(function_str_val);
1134   else if (has_statement_str)
1135     parse_function_spec(statement_str_val);
1136 }
1137
1138
1139 void
1140 dwarf_query::query_module_dwarf()
1141 {
1142   if (has_function_num || has_statement_num)
1143     {
1144       // If we have module("foo").function(0xbeef) or
1145       // module("foo").statement(0xbeef), the address is relative
1146       // to the start of the module, so we seek the function
1147       // number plus the module's bias.
1148       Dwarf_Addr addr = has_function_num ?
1149         function_num_val : statement_num_val;
1150
1151       // These are raw addresses, we need to know what the elf_bias
1152       // is to feed it to libdwfl based functions.
1153       Dwarf_Addr elf_bias;
1154       Elf *elf = dwfl_module_getelf (dw.module, &elf_bias);
1155       assert(elf);
1156       addr += elf_bias;
1157       query_addr(addr, this);
1158     }
1159   else
1160     {
1161       // Otherwise if we have a function("foo") or statement("foo")
1162       // specifier, we have to scan over all the CUs looking for
1163       // the function(s) in question
1164       assert(has_function_str || has_statement_str);
1165
1166       // For simple cases, no wildcard and no source:line, we can do a very
1167       // quick function lookup in a module-wide cache.
1168       if (spec_type == function_alone &&
1169           !dw.name_has_wildcard(function) &&
1170           !startswith(function, "_Z"))
1171         query_module_functions();
1172       else
1173         dw.iterate_over_cus(&query_cu, this, false);
1174     }
1175 }
1176
1177 static void query_func_info (Dwarf_Addr entrypc, func_info & fi,
1178                                                         dwarf_query * q);
1179
1180 static void
1181 query_symtab_func_info (func_info & fi, dwarf_query * q)
1182 {
1183   assert(null_die(&fi.die));
1184
1185   Dwarf_Addr entrypc = fi.entrypc;
1186
1187   // Now compensate for the dw bias because the addresses come
1188   // from dwfl_module_symtab, so fi->entrypc is NOT a normal dw address.
1189   q->dw.get_module_dwarf(false, false);
1190   entrypc -= q->dw.module_bias;
1191
1192   // PR29676.  We consult the symbol tables of both the elf and
1193   // dwarf files. The 2 results can contain duplicates so
1194   // check results before continuing to create new probe points
1195   for(auto ddp_it = q->results.begin(); ddp_it != q->results.end(); ++ddp_it){
1196     dwarf_derived_probe *ddp = dynamic_cast<dwarf_derived_probe *> (*ddp_it);
1197     if(ddp && ddp->addr == entrypc)
1198       return;
1199   }
1200
1201   // If there are already probes in this module, lets not duplicate.
1202   // This can come from other weak symbols/aliases or existing
1203   // matches from Dwarf DIE functions.  Try to add this entrypc to the
1204   // collection, and only continue if it was new.
1205   if (q->alias_dupes.insert(entrypc).second)
1206     query_func_info(entrypc, fi, q);
1207 }
1208
1209 void
1210 dwarf_query::query_module_symtab()
1211 {
1212   // Get the symbol table if we don't already have it
1213   module_info *mi = dw.mod_info;
1214   if (mi->symtab_status == info_unknown)
1215     mi->get_symtab();
1216   if (mi->symtab_status == info_absent)
1217     return;
1218
1219   func_info *fi = NULL;
1220   symbol_table *sym_table = mi->sym_table;
1221
1222   if (has_function_str && spec_type == function_alone)
1223     {
1224       if (dw.name_has_wildcard(function_str_val))
1225         {
1226           for (auto iter = sym_table->map_by_addr.begin();
1227                iter != sym_table->map_by_addr.end();
1228                ++iter)
1229             {
1230               fi = iter->second;
1231               if (!null_die(&fi->die) // already handled in query_module_dwarf()
1232                   || fi->descriptor) // ppc opd (and also undefined symbols)
1233                 continue;
1234               if (dw.function_name_matches_pattern(fi->name, function_str_val))
1235                 query_symtab_func_info(*fi, this);
1236             }
1237         }
1238       else
1239         {
1240           const auto& fis = sym_table->lookup_symbol(function_str_val);
1241           for (auto it=fis.begin(); it!=fis.end(); ++it)
1242             {
1243               fi = *it;
1244               if (fi && null_die(&fi->die))
1245                 query_symtab_func_info(*fi, this);
1246             }
1247         }
1248     }
1249 }
1250
1251 void
1252 dwarf_query::handle_query_module()
1253 {
1254   if (has_plt && has_statement_num)
1255     {
1256       query_plt_statement (this);
1257       return;
1258     }
1259
1260   // PR25841.  We may only need dwarf depending on the context-related
1261   // constructs in the probe handler and/or transitively called
1262   // functions.  Otherwise, for some probe types (as per the former
1263   // assess_dbinfo_reqt()), we could fall back to query_module_symtab
1264   // (elf-only) and not bother look for / complain about absence of
1265   // dwarf.  But ... the worst case for probes where pure elf symbols are
1266   // enough is a warning that dwarf wasn't available.  Grin and bear it.
1267   dw.get_module_dwarf(false /* don't require */, true /* warn */);
1268
1269   // prebuild the symbol table to resolve aliases
1270   dw.mod_info->get_symtab();
1271
1272   // reset the dupe-checking for each new module
1273   alias_dupes.clear();
1274   inline_dupes.clear();
1275
1276   if (dw.mod_info->dwarf_status == info_present)
1277     query_module_dwarf();
1278
1279   // Consult the symbol table, asm and weak functions can show up
1280   // in the symbol table but not in dwarf and minidebuginfo is
1281   // located in the gnu_debugdata section, alias_dupes checking
1282   // is done before adding any probe points
1283   // PR29676.   Some probes require additional debuginfo
1284   // to expand wildcards (ex. .label, .callee). Since the debuginfo is
1285   // not available, don't bother looking in the symbol table for these results.
1286   // This can result in 0 results, if there is no dwarf info present
1287   if(!pending_interrupts && !(has_label || has_callee || has_callees_num))
1288     query_module_symtab();
1289 }
1290
1291
1292 void
1293 dwarf_query::parse_function_spec(const string & spec)
1294 {
1295   lineno_type = ABSOLUTE;
1296   size_t src_pos, line_pos, scope_pos;
1297
1298   // look for named scopes
1299   scope_pos = spec.rfind("::");
1300   if (scope_pos != string::npos)
1301     {
1302       tokenize_cxx(spec.substr(0, scope_pos), scopes);
1303       scope_pos += 2;
1304     }
1305   else
1306     scope_pos = 0;
1307
1308   // look for a source separator
1309   src_pos = spec.find('@', scope_pos);
1310   if (src_pos == string::npos)
1311     {
1312       function = spec.substr(scope_pos);
1313       spec_type = function_alone;
1314     }
1315   else
1316     {
1317       function = spec.substr(scope_pos, src_pos - scope_pos);
1318
1319       // look for a line-number separator
1320       line_pos = spec.find_first_of(":+", src_pos);
1321       if (line_pos == string::npos)
1322         {
1323           file = spec.substr(src_pos + 1);
1324           spec_type = function_and_file;
1325         }
1326       else
1327         {
1328           file = spec.substr(src_pos + 1, line_pos - src_pos - 1);
1329
1330           // classify the line spec
1331           spec_type = function_file_and_line;
1332           if (spec[line_pos] == '+')
1333             lineno_type = RELATIVE;
1334           else if (spec[line_pos + 1] == '*' &&
1335                    spec.length() == line_pos + 2)
1336             lineno_type = WILDCARD;
1337           else
1338             lineno_type = ABSOLUTE;
1339
1340           if (lineno_type != WILDCARD)
1341             try
1342               {
1343                 // try to parse N, N-M, or N,M,O,P, or combination thereof...
1344                 if (spec.find_first_of(",-", line_pos + 1) != string::npos)
1345                   {
1346                     lineno_type = ENUMERATED;
1347                     vector<string> sub_specs;
1348                     tokenize(spec.substr(line_pos + 1), sub_specs, ",");
1349                     for (auto line_spec = sub_specs.cbegin();
1350                          line_spec != sub_specs.cend(); ++line_spec)
1351                       {
1352                         vector<string> ranges;
1353                         tokenize(*line_spec, ranges, "-");
1354                         if (ranges.size() > 1)
1355                           {
1356                             int low = lex_cast<int>(ranges.front());
1357                             int high = lex_cast<int>(ranges.back());
1358                             for (int i = low; i <= high; i++)
1359                                 linenos.push_back(i);
1360                           }
1361                         else
1362                             linenos.push_back(lex_cast<int>(ranges.at(0)));
1363                       }
1364                     sort(linenos.begin(), linenos.end());
1365                   }
1366                 else
1367                   {
1368                     linenos.push_back(lex_cast<int>(spec.substr(line_pos + 1)));
1369                     linenos.push_back(lex_cast<int>(spec.substr(line_pos + 1)));
1370                   }
1371               }
1372             catch (runtime_error & exn)
1373               {
1374                 goto bad;
1375               }
1376         }
1377     }
1378
1379   if (function.empty() ||
1380       (spec_type != function_alone && file.empty()))
1381     goto bad;
1382
1383   if (sess.verbose > 2)
1384     {
1385       //clog << "parsed '" << spec << "'";
1386       clog << _F("parse '%s'", spec.c_str());
1387
1388       if (!scopes.empty())
1389         clog << ", scope '" << scopes[0] << "'";
1390       for (unsigned i = 1; i < scopes.size(); ++i)
1391         clog << "::'" << scopes[i] << "'";
1392
1393       clog << ", func '" << function << "'";
1394
1395       if (spec_type != function_alone)
1396         clog << ", file '" << file << "'";
1397
1398       if (spec_type == function_file_and_line)
1399         {
1400           clog << ", line ";
1401           switch (lineno_type)
1402             {
1403             case ABSOLUTE:
1404               clog << linenos[0];
1405               break;
1406
1407             case RELATIVE:
1408               clog << "+" << linenos[0];
1409               break;
1410
1411             case ENUMERATED:
1412               {
1413                 for (auto linenos_it = linenos.cbegin();
1414                      linenos_it != linenos.cend(); ++linenos_it)
1415                   {
1416                     auto range_it = linenos_it;
1417                     while ((range_it+1) != linenos.end() && *range_it + 1 == *(range_it+1))
1418                         ++range_it;
1419                     if (linenos_it == range_it)
1420                         clog << *linenos_it;
1421                     else
1422                         clog << *linenos_it << "-" << *range_it;
1423                     if (range_it + 1 != linenos.end())
1424                       clog << ",";
1425                     linenos_it = range_it;
1426                   }
1427                 }
1428               break;
1429
1430             case WILDCARD:
1431               clog << "*";
1432               break;
1433             }
1434         }
1435
1436       clog << endl;
1437     }
1438
1439   return;
1440
1441 bad:
1442   throw SEMANTIC_ERROR(_F("malformed specification '%s'", spec.c_str()),
1443                        base_probe->tok);
1444 }
1445
1446 string path_remove_sysroot(const systemtap_session& sess, const string& path)
1447 {
1448   size_t pos;
1449   string retval = path;
1450   if (!sess.sysroot.empty() &&
1451       (pos = retval.find(sess.sysroot)) != string::npos)
1452     retval.replace(pos, sess.sysroot.length(),
1453                    (*(sess.sysroot.end() - 1) == '/' ? "/": ""));
1454   return retval;
1455 }
1456
1457 /*
1458  * Convert 'Global Entry Point' to 'Local Entry Point'.
1459  *
1460  * if @gep contains next address after prologue, don't change it.
1461  *
1462  * For ELF ABI v2 on PPC64 LE, we need to adjust sym.st_value corresponding
1463  * to the bits of sym.st_other. These bits will tell us what's the offset
1464  * of the local entry point from the global entry point.
1465  *
1466  * st_other field is currently only used with ABIv2 on ppc64
1467  */
1468 static Dwarf_Addr
1469 get_lep(dwarf_query *q, Dwarf_Addr gep)
1470 {
1471   Dwarf_Addr bias;
1472   Dwfl_Module *mod = q->dw.module;
1473   Elf* elf = (dwarf_getelf (dwfl_module_getdwarf (mod, &bias))
1474              ?: dwfl_module_getelf (mod, &bias));
1475
1476   GElf_Ehdr ehdr_mem;
1477   GElf_Ehdr* em = gelf_getehdr (elf, &ehdr_mem);
1478   if (em == NULL)
1479     throw SEMANTIC_ERROR (_("Couldn't get elf header"));
1480
1481   if (!(em->e_machine == EM_PPC64) || !((em->e_flags & EF_PPC64_ABI) == 2))
1482     return gep;
1483
1484   int syments = dwfl_module_getsymtab(mod);
1485   for (int i = 1; i < syments; ++i)
1486     {
1487       GElf_Sym sym;
1488       GElf_Word section;
1489       GElf_Addr addr;
1490
1491 #if _ELFUTILS_PREREQ (0, 158)
1492       dwfl_module_getsym_info (mod, i, &sym, &addr, &section, NULL, NULL);
1493 #else
1494       dwfl_module_getsym (mod, i, &sym, &section);
1495       addr = sym.st_value;
1496 #endif
1497
1498       /*
1499        * Symbol table contains module_bias + offset. Substract module_bias
1500        * to compare offset with gep.
1501        */
1502       if ((addr - bias) == gep && (GELF_ST_TYPE(sym.st_info) == STT_FUNC)
1503           && sym.st_other)
1504         return gep + PPC64_LOCAL_ENTRY_OFFSET(sym.st_other);
1505     }
1506
1507   return gep;
1508 }
1509
1510 void
1511 dwarf_query::add_probe_point(interned_string dw_funcname,
1512                              interned_string filename,
1513                              int line,
1514                              Dwarf_Die* scope_die,
1515                              Dwarf_Addr addr)
1516 {
1517   interned_string reloc_section; // base section for relocation purposes
1518   Dwarf_Addr orig_addr = addr;
1519   Dwarf_Addr reloc_addr; // relocated
1520   interned_string module = dw.module_name; // "kernel" or other
1521   interned_string funcname = dw_funcname;
1522
1523   assert (! has_absolute); // already handled in dwarf_builder::build()
1524
1525   addr = get_lep(this, addr);
1526   reloc_addr = dw.relocate_address(addr, reloc_section);
1527
1528   // If we originally used the linkage name, then let's call it that way
1529   const char* linkage_name;
1530   if (!null_die(scope_die) && startswith (this->function, "_Z")
1531       && (linkage_name = dwarf_linkage_name (scope_die)))
1532     funcname = linkage_name;
1533
1534   if (sess.verbose > 1)
1535     {
1536       clog << _("probe ") << funcname << "@" << filename << ":" << line;
1537       if (string(module) == TOK_KERNEL)
1538         clog << _(" kernel");
1539       else if (has_module)
1540         clog << _(" module=") << module;
1541       else if (has_process)
1542         clog << _(" process=") << module;
1543       if (reloc_section != "") clog << " reloc=" << reloc_section;
1544       clog << " pc=0x" << hex << addr << dec;
1545     }
1546
1547   dwflpp::blocklisted_type blocklisted = dw.blocklisted_p (funcname, filename,
1548                                                            line, module, addr,
1549                                                            has_return);
1550   if (sess.verbose > 1)
1551     clog << endl;
1552
1553   if (module == TOK_KERNEL)
1554     {
1555       // PR 4224: adapt to relocatable kernel by subtracting the _stext address here.
1556       reloc_addr = addr - sess.sym_stext;
1557       reloc_section = "_stext"; // a message to runtime's _stp_module_relocate
1558     }
1559
1560   if (!blocklisted)
1561     {
1562       sess.unwindsym_modules.insert (module);
1563
1564       if (has_process)
1565         {
1566           string module_tgt = path_remove_sysroot(sess, module);
1567           results.push_back (new uprobe_derived_probe(funcname, filename, line,
1568                                                       module_tgt, reloc_section, addr, reloc_addr,
1569                                                       *this, scope_die));
1570         }
1571       else
1572         {
1573           assert (has_kernel || has_module);
1574
1575           // We could only convert probes in the module's .init
1576           // section to symbol+offset probes. However, the module
1577           // refresh code only expects to be called once on a module
1578           // load, so we'll go ahead and convert them all.
1579           if (has_module)
1580             {
1581               module_info *mi = dw.mod_info;
1582
1583               if (mi->symtab_status == info_unknown)
1584                 mi->get_symtab();
1585               if (mi->symtab_status == info_absent)
1586                 throw SEMANTIC_ERROR(_F("can't retrieve symbol table for function %s",
1587                                         module_val.to_string().c_str()));
1588
1589               symbol_table *sym_table = mi->sym_table;
1590               func_info *symbol = sym_table->get_func_containing_address(addr);
1591
1592               // Do not use LEP to find offset here. When 'symbol_name'
1593               // is used to register probe, kernel itself will find LEP.
1594               Dwarf_Addr offset = orig_addr - symbol->addr;
1595               results.push_back (new dwarf_derived_probe(funcname, filename,
1596                                                          line, module,
1597                                                          reloc_section, addr,
1598                                                          reloc_addr,
1599                                                          *this, scope_die,
1600                                                          symbol->name,
1601                                                          offset));
1602             }
1603           else
1604             results.push_back (new dwarf_derived_probe(funcname, filename,
1605                                                        line, module,
1606                                                        reloc_section, addr,
1607                                                        reloc_addr,
1608                                                        *this, scope_die));
1609         }
1610     }
1611   else
1612     {
1613       switch (blocklisted)
1614         {
1615         case dwflpp::blocklisted_section:
1616           sess.print_warning(_F("function %s is in blocklisted section",
1617                                 funcname.to_string().c_str()), base_probe->tok);
1618           break;
1619         case dwflpp::blocklisted_kprobes:
1620           sess.print_warning(_F("kprobes function %s is blocklisted",
1621                                 funcname.to_string().c_str()), base_probe->tok);
1622           break;
1623         case dwflpp::blocklisted_function_return:
1624           sess.print_warning(_F("function %s return probe is blocklisted",
1625                                 funcname.to_string().c_str()), base_probe->tok);
1626           break;
1627         case dwflpp::blocklisted_file:
1628           sess.print_warning(_F("function %s is in blocklisted file",
1629                                 funcname.to_string().c_str()), base_probe->tok);
1630           break;
1631         case dwflpp::blocklisted_function:
1632         default:
1633           sess.print_warning(_F("function %s is blocklisted",
1634                                 funcname.to_string().c_str()), base_probe->tok);
1635           break;
1636         }
1637     }
1638 }
1639
1640 void
1641 dwarf_query::mount_well_formed_probe_point()
1642 {
1643   interned_string module = dw.module_name;
1644   if (has_process)
1645     module = path_remove_sysroot(sess, module);
1646
1647   vector<probe_point::component*> comps;
1648   for (auto it = base_loc->components.begin();
1649        it != base_loc->components.end(); ++it)
1650     {
1651       if ((*it)->functor == TOK_PROCESS && this->build_id_val != "")
1652         comps.push_back(new probe_point::component((*it)->functor,
1653           new literal_string(this->build_id_val)));
1654       else if ((*it)->functor == TOK_PROCESS || (*it)->functor == TOK_MODULE)
1655         comps.push_back(new probe_point::component((*it)->functor,
1656           new literal_string(has_library ? path : module)));
1657       else
1658         comps.push_back(*it);
1659     }
1660
1661   probe_point *pp = new probe_point(*base_loc);
1662   pp->well_formed = true;
1663   pp->components = comps;
1664
1665   previous_bases.push(make_pair(base_loc, base_probe));
1666
1667   base_loc = pp;
1668   base_probe = new probe(base_probe, pp);
1669 }
1670
1671 void
1672 dwarf_query::unmount_well_formed_probe_point()
1673 {
1674   assert(!previous_bases.empty());
1675
1676   base_loc = previous_bases.top().first;
1677   base_probe = previous_bases.top().second;
1678
1679   previous_bases.pop();
1680 }
1681
1682 void
1683 dwarf_query::replace_probe_point_component_arg(interned_string functor,
1684                                                interned_string new_functor,
1685                                                int64_t new_arg,
1686                                                bool hex)
1687 {
1688   // only allow these operations if we're editing the well-formed loc
1689   assert(!previous_bases.empty());
1690
1691   for (auto it = base_loc->components.begin();
1692        it != base_loc->components.end(); ++it)
1693     if ((*it)->functor == functor)
1694       *it = new probe_point::component(new_functor,
1695               new literal_number(new_arg, hex));
1696 }
1697
1698 void
1699 dwarf_query::replace_probe_point_component_arg(interned_string functor,
1700                                                int64_t new_arg,
1701                                                bool hex)
1702 {
1703   replace_probe_point_component_arg(functor, functor, new_arg, hex);
1704 }
1705
1706 void
1707 dwarf_query::replace_probe_point_component_arg(interned_string functor,
1708                                                interned_string new_functor,
1709                                                interned_string new_arg)
1710 {
1711   // only allow these operations if we're editing the well-formed loc
1712   assert(!previous_bases.empty());
1713
1714   for (auto it = base_loc->components.begin();
1715        it != base_loc->components.end(); ++it)
1716     if ((*it)->functor == functor)
1717       *it = new probe_point::component(new_functor,
1718               new literal_string(new_arg));
1719 }
1720
1721 void
1722 dwarf_query::replace_probe_point_component_arg(interned_string functor,
1723                                                interned_string new_arg)
1724 {
1725   replace_probe_point_component_arg(functor, functor, new_arg);
1726 }
1727
1728 void
1729 dwarf_query::remove_probe_point_component(interned_string functor)
1730 {
1731   // only allow these operations if we're editing the well-formed loc
1732   assert(!previous_bases.empty());
1733
1734   vector<probe_point::component*> new_comps;
1735   for (auto it = base_loc->components.begin();
1736        it != base_loc->components.end(); ++it)
1737     if ((*it)->functor != functor)
1738       new_comps.push_back(*it);
1739
1740   base_loc->components = new_comps;
1741 }
1742
1743
1744 interned_string
1745 dwarf_query::final_function_name(interned_string final_func,
1746                                  interned_string final_file,
1747                                  int final_line)
1748 {
1749   string final_name = final_func;
1750   if (final_file != "")
1751     {
1752       final_name += ("@" + string(final_file));
1753       if (final_line > 0)
1754         final_name += (":" + lex_cast(final_line));
1755     }
1756   return final_name;
1757 }
1758
1759 bool
1760 dwarf_query::is_fully_specified_function()
1761 {
1762   // A fully specified function is one that was given using a .function() probe
1763   // by full name (no wildcards), and specific srcfile and decl_line.
1764   return (has_function_str
1765           && spec_type == function_file_and_line
1766           && !dw.name_has_wildcard(function)
1767           && filtered_srcfiles.size() == 1
1768           && !filtered_functions.empty()
1769           && lineno_type == ABSOLUTE
1770           && filtered_functions[0].decl_line == linenos[0]);
1771 }
1772
1773 base_func_info_map_t
1774 dwarf_query::filtered_all(void)
1775 {
1776   base_func_info_map_t r;
1777   for (auto f = filtered_functions.cbegin();
1778        f != filtered_functions.cend(); ++f)
1779     r.push_back(*f);
1780   for (auto i = filtered_inlines.cbegin();
1781        i != filtered_inlines.cend(); ++i)
1782     r.push_back(*i);
1783   return r;
1784 }
1785
1786 // The critical determining factor when interpreting a pattern
1787 // string is, perhaps surprisingly: "presence of a lineno". The
1788 // presence of a lineno changes the search strategy completely.
1789 //
1790 // Compare the two cases:
1791 //
1792 //   1. {statement,function}(foo@file.c:lineno)
1793 //      - find the files matching file.c
1794 //      - in each file, find the functions matching foo
1795 //      - query the file for line records matching lineno
1796 //      - iterate over the line records,
1797 //        - and iterate over the functions,
1798 //          - if(haspc(function.DIE, line.addr))
1799 //            - if looking for statements: probe(lineno.addr)
1800 //            - if looking for functions: probe(function.{entrypc,return,etc.})
1801 //
1802 //   2. {statement,function}(foo@file.c)
1803 //      - find the files matching file.c
1804 //      - in each file, find the functions matching foo
1805 //        - probe(function.{entrypc,return,etc.})
1806 //
1807 // Thus the first decision we make is based on the presence of a
1808 // lineno, and we enter entirely different sets of callbacks
1809 // depending on that decision.
1810 //
1811 // Note that the first case is a generalization fo the second, in that
1812 // we could theoretically search through line records for matching
1813 // file names (a "table scan" in rdbms lingo).  Luckily, file names
1814 // are already cached elsewhere, so we can do an "index scan" as an
1815 // optimization.
1816
1817 static void
1818 query_statement (interned_string func,
1819                  interned_string file,
1820                  int line,
1821                  Dwarf_Die *scope_die,
1822                  Dwarf_Addr stmt_addr,
1823                  dwarf_query * q)
1824 {
1825   try
1826     {
1827       q->add_probe_point(func, file,
1828                          line, scope_die, stmt_addr);
1829     }
1830   catch (const semantic_error& e)
1831     {
1832       q->sess.print_error (e);
1833     }
1834 }
1835
1836 static void
1837 query_addr(Dwarf_Addr addr, dwarf_query *q)
1838 {
1839   assert(q->has_function_num || q->has_statement_num);
1840
1841   dwflpp &dw = q->dw;
1842
1843   if (q->sess.verbose > 2)
1844     clog << "query_addr 0x" << hex << addr << dec << endl;
1845
1846   // First pick which CU contains this address
1847   Dwarf_Die* cudie = dw.query_cu_containing_address(addr);
1848   if (!cudie) // address could be wildly out of range
1849     return;
1850   dw.focus_on_cu(cudie);
1851
1852   // Now compensate for the dw bias
1853   addr -= dw.module_bias;
1854
1855   // Per PR5787, we look up the scope die even for
1856   // statement_num's, for blocklist sensitivity and $var
1857   // resolution purposes.
1858
1859   // Find the scopes containing this address
1860   vector<Dwarf_Die> scopes = dw.getscopes(addr);
1861   if (scopes.empty())
1862     return;
1863
1864   // Look for the innermost containing function
1865   Dwarf_Die *fnscope = NULL;
1866   for (size_t i = 0; i < scopes.size(); ++i)
1867     {
1868       int tag = dwarf_tag(&scopes[i]);
1869       if ((tag == DW_TAG_subprogram && !q->has_inline) ||
1870           (tag == DW_TAG_inlined_subroutine &&
1871            !q->has_call && !q->has_return && !q->has_exported))
1872         {
1873           fnscope = &scopes[i];
1874           break;
1875         }
1876     }
1877   if (!fnscope)
1878     return;
1879   dw.focus_on_function(fnscope);
1880
1881   Dwarf_Die *scope = q->has_function_num ? fnscope : &scopes[0];
1882
1883   const char *file = dwarf_decl_file(fnscope) ?: "";
1884   int line;
1885   dwarf_decl_line(fnscope, &line);
1886
1887   // Function probes should reset the addr to the function entry
1888   // and possibly perform prologue searching
1889   if (q->has_function_num)
1890     {
1891       if (!dw.die_entrypc(fnscope, &addr))
1892         return;
1893       if (dwarf_tag(fnscope) == DW_TAG_subprogram &&
1894           q->sess.prologue_searching_mode != systemtap_session::prologue_searching_never &&
1895           (q->sess.prologue_searching_mode == systemtap_session::prologue_searching_always ||
1896            (q->has_process && !q->dw.has_valid_locs()))) // PR 6871 && PR 6941
1897         {
1898           func_info func;
1899           func.die = *fnscope;
1900           func.name = dw.function_name;
1901           func.decl_file = file;
1902           func.decl_line = line;
1903           func.entrypc = addr;
1904
1905           func_info_map_t funcs(1, func);
1906           dw.resolve_prologue_endings (funcs);
1907           q->prologue_end = funcs[0].prologue_end;
1908
1909           // PR13200: if it's a .return probe, we need to emit a *retprobe based
1910           // on the entrypc so here we only use prologue_end for non .return
1911           // probes (note however that .return probes still take advantage of
1912           // prologue_end: PR14436)
1913           if (!q->has_return)
1914             addr = funcs[0].prologue_end;
1915         }
1916     }
1917   else
1918     {
1919       Dwarf_Line *address_line = dwarf_getsrc_die(cudie, addr);
1920       Dwarf_Addr address_line_addr = addr;
1921       if (address_line)
1922         {
1923           file = DWARF_LINESRC(address_line);
1924           line = DWARF_LINENO(address_line);
1925           address_line_addr = DWARF_LINEADDR(address_line);
1926         }
1927
1928       // Verify that a raw address matches the beginning of a
1929       // statement. This is a somewhat lame check that the address
1930       // is at the start of an assembly instruction.  Mark probes are in the
1931       // middle of a macro and thus not strictly at a statement beginning.
1932       // Guru mode may override this check.
1933       if (!q->has_mark && (!address_line || address_line_addr != addr))
1934         {
1935           stringstream msg;
1936           msg << _F("address %#" PRIx64 " does not match the beginning of a statement",
1937                     addr);
1938           if (address_line)
1939             msg << _F(" (try %#" PRIx64 ")", address_line_addr);
1940           else
1941             msg << _F(" (no line info found for '%s', in module '%s')",
1942                       dw.cu_name().c_str(), dw.module_name.c_str());
1943           if (! q->sess.guru_mode)
1944             throw SEMANTIC_ERROR(msg.str());
1945           else
1946            q->sess.print_warning(msg.str());
1947         }
1948     }
1949
1950   // We're ready to build a probe, but before, we need to create the final,
1951   // well-formed version of this location with all the components filled in
1952   q->mount_well_formed_probe_point();
1953   q->replace_probe_point_component_arg(TOK_FUNCTION, addr, true /* hex */ );
1954   q->replace_probe_point_component_arg(TOK_STATEMENT, addr, true /* hex */ );
1955
1956   // Build a probe at this point
1957   query_statement(dw.function_name, file, line, scope, addr, q);
1958
1959   q->unmount_well_formed_probe_point();
1960 }
1961
1962 static void
1963 query_plt_statement(dwarf_query *q)
1964 {
1965   assert (q->has_plt && q->has_statement_num);
1966
1967   Dwarf_Addr addr = q->statement_num_val;
1968   if (q->sess.verbose > 2)
1969     clog << "query_plt_statement 0x" << hex << addr << dec << endl;
1970
1971   // First adjust the raw address to dwfl's elf bias.
1972   Dwarf_Addr elf_bias;
1973   Elf *elf = dwfl_module_getelf (q->dw.module, &elf_bias);
1974   assert(elf);
1975   addr += elf_bias;
1976
1977   // Now compensate for the dw bias
1978   q->dw.get_module_dwarf(false, false);
1979   addr -= q->dw.module_bias;
1980
1981   // Create the final well-formed probe point
1982   q->mount_well_formed_probe_point();
1983   q->replace_probe_point_component_arg(TOK_STATEMENT, q->statement_num_val, true /* hex */ );
1984
1985   // We remove the .plt part here, since if the user provided a .plt probe, then
1986   // the higher-level probe point is already well-formed. On the other hand, if
1987   // the user provides a .plt(PATTERN).statement(0xABCD), the PATTERN is
1988   // irrelevant (we won't iterate over plts) so just take it out.
1989   q->remove_probe_point_component(TOK_PLT);
1990
1991   // Build a probe at this point
1992   query_statement(q->plt_val, NULL, -1, NULL, addr, q);
1993
1994   q->unmount_well_formed_probe_point();
1995 }
1996
1997 static void
1998 query_label (const base_func_info& func,
1999              char const * label,
2000              char const * file,
2001              int line,
2002              Dwarf_Die *scope_die,
2003              Dwarf_Addr stmt_addr,
2004              dwarf_query * q)
2005 {
2006   assert (q->has_statement_str || q->has_function_str);
2007
2008   // weed out functions whose decl_file isn't one of
2009   // the source files that we actually care about
2010   if (q->spec_type != function_alone &&
2011       q->filtered_srcfiles.count(file) == 0)
2012     return;
2013
2014   // Create the final well-formed probe
2015   interned_string canon_func = q->final_function_name(func.name, file ?: "", line);
2016
2017   q->mount_well_formed_probe_point();
2018   q->replace_probe_point_component_arg(TOK_FUNCTION, canon_func);
2019   q->replace_probe_point_component_arg(TOK_LABEL, label);
2020
2021   query_statement(func.name, file, line, scope_die, stmt_addr, q);
2022
2023   q->unmount_well_formed_probe_point();
2024 }
2025
2026 static void
2027 query_callee (base_func_info& callee,
2028               base_func_info& caller,
2029               stack<Dwarf_Addr> *callers,
2030               dwarf_query * q)
2031 {
2032   assert (q->has_function_str);
2033   assert (q->has_callee || q->has_callees_num);
2034
2035   // OK, we found a callee for a targeted caller. To help users see the
2036   // derivation, we add the well-formed form .function(caller).callee(callee).
2037
2038   interned_string canon_caller = q->final_function_name(caller.name, caller.decl_file,
2039                                                         caller.decl_line);
2040   interned_string canon_callee = q->final_function_name(callee.name, callee.decl_file,
2041                                                         callee.decl_line);
2042
2043   q->mount_well_formed_probe_point();
2044   q->replace_probe_point_component_arg(TOK_FUNCTION, canon_caller);
2045   q->replace_probe_point_component_arg(TOK_CALLEES, TOK_CALLEE, canon_callee);
2046   q->replace_probe_point_component_arg(TOK_CALLEE, canon_callee);
2047
2048   // Pass on the callers we'll need to add checks for
2049   q->callers = callers;
2050
2051   query_statement(callee.name, callee.decl_file,
2052                   callee.decl_line,
2053                   &callee.die, callee.entrypc, q);
2054
2055   q->unmount_well_formed_probe_point();
2056 }
2057
2058 static void
2059 query_inline_instance_info (inline_instance_info & ii,
2060                             dwarf_query * q)
2061 {
2062   try
2063     {
2064       assert (! q->has_return); // checked by caller already
2065       assert (q->has_function_str || q->has_statement_str);
2066
2067       if (q->sess.verbose>2)
2068         clog << _F("querying entrypc %#" PRIx64 " of instance of inline '%s'\n",
2069                    ii.entrypc, ii.name.to_string().c_str());
2070
2071       interned_string canon_func = q->final_function_name(ii.name, ii.decl_file,
2072                                                           ii.decl_line);
2073
2074       q->mount_well_formed_probe_point();
2075       q->replace_probe_point_component_arg(TOK_FUNCTION, canon_func);
2076       q->replace_probe_point_component_arg(TOK_STATEMENT, canon_func);
2077
2078       query_statement (ii.name, ii.decl_file, ii.decl_line,
2079                        &ii.die, ii.entrypc, q);
2080
2081       q->unmount_well_formed_probe_point();
2082     }
2083   catch (semantic_error &e)
2084     {
2085       q->sess.print_error (e);
2086     }
2087 }
2088
2089 static void
2090 query_func_info (Dwarf_Addr entrypc,
2091                  func_info & fi,
2092                  dwarf_query * q)
2093 {
2094   assert(q->has_function_str || q->has_statement_str);
2095
2096   try
2097     {
2098       interned_string canon_func = q->final_function_name(fi.name, fi.decl_file,
2099                                                           fi.decl_line);
2100
2101       q->mount_well_formed_probe_point();
2102       q->replace_probe_point_component_arg(TOK_FUNCTION, canon_func);
2103       q->replace_probe_point_component_arg(TOK_STATEMENT, canon_func);
2104
2105       // If it's a .return probe, we need to emit a *retprobe based on the
2106       // entrypc (PR13200). Note however that if prologue_end is valid,
2107       // dwarf_derived_probe will still take advantage of it by creating a new
2108       // probe there if necessary to pick up target vars (PR14436).
2109       if (fi.prologue_end == 0 || q->has_return)
2110         {
2111           q->prologue_end = fi.prologue_end;
2112           query_statement (fi.name, fi.decl_file, fi.decl_line,
2113                            &fi.die, entrypc, q);
2114         }
2115       else
2116         {
2117           query_statement (fi.name, fi.decl_file, fi.decl_line,
2118                            &fi.die, fi.prologue_end, q);
2119         }
2120
2121       q->unmount_well_formed_probe_point();
2122     }
2123   catch (semantic_error &e)
2124     {
2125       q->sess.print_error (e);
2126     }
2127 }
2128
2129 static void
2130 query_srcfile_line (Dwarf_Addr addr, int lineno, dwarf_query * q)
2131 {
2132   assert (q->has_statement_str || q->has_function_str);
2133   assert (q->spec_type == function_file_and_line);
2134
2135   auto bfis = q->filtered_all();
2136   for (auto i = bfis.begin(); i != bfis.end(); ++i)
2137     {
2138       if (q->sess.verbose>3)
2139         clog << _F("checking DIE (dieoffset: %#" PRIx64 ") "
2140                    "against scope address %#" PRIx64 "\n",
2141                    dwarf_dieoffset(& i->die),
2142                    addr);
2143
2144       if (q->dw.die_has_pc (i->die, addr))
2145         {
2146           if (q->sess.verbose>3)
2147             clog << _("filtered DIE lands on srcfile\n");
2148           Dwarf_Die scope;
2149           q->dw.inner_die_containing_pc(i->die, addr, scope);
2150
2151           interned_string canon_func = q->final_function_name(i->name, i->decl_file,
2152                                                               lineno /* NB: not i->decl_line */ );
2153
2154           if (q->has_nearest && (q->lineno_type == ABSOLUTE ||
2155                                  q->lineno_type == RELATIVE))
2156             {
2157               int lineno_nearest = q->linenos[0];
2158               if (q->lineno_type == RELATIVE)
2159                 lineno_nearest += i->decl_line;
2160               interned_string canon_func_nearest = q->final_function_name(i->name,
2161                                                                           i->decl_file,
2162                                                                           lineno_nearest);
2163               q->mount_well_formed_probe_point();
2164               q->replace_probe_point_component_arg(TOK_STATEMENT, canon_func_nearest);
2165             }
2166
2167           q->mount_well_formed_probe_point();
2168           q->replace_probe_point_component_arg(TOK_FUNCTION, canon_func);
2169           q->replace_probe_point_component_arg(TOK_STATEMENT, canon_func);
2170
2171           query_statement (i->name, i->decl_file,
2172                            lineno, // NB: not q->line !
2173                            &scope, addr, q);
2174
2175           q->unmount_well_formed_probe_point();
2176           if (q->has_nearest && (q->lineno_type == ABSOLUTE ||
2177                                  q->lineno_type == RELATIVE))
2178             q->unmount_well_formed_probe_point();
2179         }
2180     }
2181 }
2182
2183 bool
2184 inline_instance_info::operator<(const inline_instance_info& other) const
2185 {
2186   if (entrypc != other.entrypc)
2187     return entrypc < other.entrypc;
2188
2189   if (decl_line != other.decl_line)
2190     return decl_line < other.decl_line;
2191
2192   int cmp = name.compare(other.name);
2193   if (!cmp) // tiebreaker
2194     cmp = decl_file.compare(other.decl_file);
2195
2196   return cmp < 0;
2197 }
2198
2199
2200 static int
2201 query_dwarf_inline_instance (Dwarf_Die * die, dwarf_query * q)
2202 {
2203   assert (q->has_statement_str || q->has_function_str);
2204   assert (!q->has_call && !q->has_return && !q->has_exported);
2205
2206   try
2207     {
2208       if (q->sess.verbose>2)
2209         clog << _F("selected inline instance of %s\n", q->dw.function_name.c_str());
2210
2211       Dwarf_Addr entrypc;
2212       if (q->dw.die_entrypc (die, &entrypc))
2213         {
2214           // PR12609: The tails of partially-inlined functions show up
2215           // in the query_dwarf_func() path, not here.  The heads do
2216           // come here, and should be processed here.
2217
2218           inline_instance_info inl;
2219           inl.die = *die;
2220           inl.name = q->dw.function_name;
2221           inl.entrypc = entrypc;
2222           const char* df;
2223           q->dw.function_file (&df);
2224           inl.decl_file = df ?: "";
2225           q->dw.function_line (&inl.decl_line);
2226
2227           // make sure that this inline hasn't already
2228           // been matched from a different CU
2229           if (q->inline_dupes.insert(inl).second)
2230             {
2231               if (q->sess.verbose>3)
2232                 clog << _F("added to filtered_inlines (dieoffset: %#" PRIx64 ")\n",
2233                            dwarf_dieoffset(&inl.die));
2234
2235               q->filtered_inlines.push_back(inl);
2236             }
2237         }
2238       return DWARF_CB_OK;
2239     }
2240   catch (const semantic_error& e)
2241     {
2242       q->sess.print_error (e);
2243       return DWARF_CB_ABORT;
2244     }
2245 }
2246
2247 static int
2248 query_dwarf_func (Dwarf_Die * func, dwarf_query * q)
2249 {
2250   assert (q->has_statement_str || q->has_function_str);
2251
2252   // weed out functions whose decl_file isn't one of
2253   // the source files that we actually care about
2254   string decl_file = dwarf_decl_file(func)?:"";
2255
2256   if (q->sess.verbose>4)
2257     clog << _F("querying dwarf func in file %s count %zu (func dieoffset: %#" PRIx64 ")\n",
2258                decl_file.c_str(),
2259                q->filtered_srcfiles.count(decl_file),
2260                dwarf_dieoffset(func));
2261
2262   if (q->spec_type != function_alone &&
2263       decl_file != "" && // do not skip decl_file-free DIEs; could be artificial/LTO?
2264       q->filtered_srcfiles.count(decl_file) == 0)
2265     return DWARF_CB_OK;
2266
2267   try
2268     {
2269       q->dw.focus_on_function (func);
2270
2271       if (!q->dw.function_scope_matches(q->scopes))
2272         return DWARF_CB_OK;
2273
2274       // make sure that this function address hasn't
2275       // already been matched under an aliased name
2276       Dwarf_Addr addr;
2277       if (!q->dw.func_is_inline() &&
2278           dwarf_entrypc(func, &addr) == 0 &&
2279           !q->alias_dupes.insert(addr).second)
2280         return DWARF_CB_OK;
2281
2282       if (q->dw.func_is_inline () && (! q->has_call) && (! q->has_return) && (! q->has_exported))
2283         {
2284           if (q->sess.verbose>3)
2285             clog << _F("checking instances of inline %s\n", q->dw.function_name.c_str());
2286           q->dw.iterate_over_inline_instances (query_dwarf_inline_instance, q);
2287         }
2288       else if (q->dw.func_is_inline () && (q->has_return)) // PR 11553
2289         {
2290           q->inlined_non_returnable.insert (q->dw.function_name);
2291         }
2292       else if (!q->dw.func_is_inline () && (! q->has_inline))
2293         {
2294           if (q->has_exported && !q->dw.func_is_exported ())
2295             return DWARF_CB_OK;
2296           if (q->sess.verbose>2)
2297             clog << _F("selected function %s\n", q->dw.function_name.c_str());
2298
2299
2300           func_info func;
2301           q->dw.function_die (&func.die);
2302           func.name = q->dw.function_name;
2303           const char *df;
2304           q->dw.function_file (&df);
2305           func.decl_file = df ?: "";
2306           q->dw.function_line (&func.decl_line);
2307
2308           Dwarf_Addr entrypc;
2309           if (q->dw.function_entrypc (&entrypc))
2310             {
2311               func.entrypc = entrypc;
2312
2313               // PR12609: handle partial-inlined functions.  These look
2314               // like normal inlined instances in DWARF (so come through
2315               // here), but in fact are common/tail parts of a normal
2316               // inlined function instance.  They do not represent entry
2317               // points, so we filter them out.  DWARF/gcc doesn't leave
2318               // any attributes to identify these from there, so we look
2319               // up the ELF symbol name and rely on a heuristic.
2320               GElf_Sym sym;
2321               GElf_Off off = 0;
2322               Dwarf_Addr elf_bias;
2323               Elf *elf = dwfl_module_getelf (q->dw.module, &elf_bias);
2324               assert(elf);
2325
2326               const char *name = dwfl_module_addrinfo (q->dw.module, entrypc + elf_bias,
2327                                                        &off, &sym, NULL, NULL, NULL);
2328
2329               if (q->sess.verbose>3)
2330                       clog << _F("%s = dwfl_module_addrinfo(entrypc=%p + %p)\n",
2331                                  name, (void*)entrypc, (void *)elf_bias);
2332               if (name != NULL && strstr(name, ".part.") != NULL)
2333                 {
2334                   if (q->sess.verbose>2)
2335                     clog << _F("skipping partially-inlined instance "
2336                                "%s at %p\n", name, (void*)entrypc);
2337                   return DWARF_CB_OK;
2338                 }
2339
2340               if (q->sess.verbose>3)
2341                 clog << _F("added to filtered_functions (dieoffset: %#" PRIx64 ")\n",
2342                            dwarf_dieoffset(&func.die));
2343
2344               q->filtered_functions.push_back (func);
2345             }
2346           /* else this function is fully inlined, just ignore it */
2347         }
2348       return DWARF_CB_OK;
2349     }
2350   catch (const semantic_error& e)
2351     {
2352       q->sess.print_error (e);
2353       return DWARF_CB_ABORT;
2354     }
2355 }
2356
2357 static int
2358 query_cu (Dwarf_Die * cudie, dwarf_query * q)
2359 {
2360   assert (q->has_statement_str || q->has_function_str);
2361
2362   if (pending_interrupts) return DWARF_CB_ABORT;
2363
2364   try
2365     {
2366       q->dw.focus_on_cu (cudie);
2367
2368       if (false && q->sess.verbose>2)
2369         clog << _F("focused on CU '%s', in module '%s'\n",
2370                    q->dw.cu_name().c_str(), q->dw.module_name.c_str());
2371
2372       q->filtered_srcfiles.clear();
2373       q->filtered_functions.clear();
2374       q->filtered_inlines.clear();
2375
2376       // In this path, we find "abstract functions", record
2377       // information about them, and then (depending on lineno
2378       // matching) possibly emit one or more of the function's
2379       // associated addresses. Unfortunately the control of this
2380       // cannot easily be turned inside out.
2381
2382       if (q->spec_type != function_alone)
2383         {
2384           // If we have a pattern string with a filename, we need
2385           // to elaborate the srcfile mask in question first.
2386           q->dw.collect_srcfiles_matching (q->file, q->filtered_srcfiles);
2387
2388           // If we have a file pattern and *no* srcfile matches, there's
2389           // no need to look further into this CU, so skip.
2390           if (q->filtered_srcfiles.empty())
2391             return DWARF_CB_OK;
2392         }
2393
2394       // Pick up [entrypc, name, DIE] tuples for all the functions
2395       // matching the query, and fill in the prologue endings of them
2396       // all in a single pass.
2397       q->dw.iterate_over_functions (query_dwarf_func, q, q->function);
2398
2399       if (!q->filtered_functions.empty() &&
2400           !q->has_statement_str && // PR 2608
2401           q->sess.prologue_searching_mode != systemtap_session::prologue_searching_never &&
2402            (q->sess.prologue_searching_mode == systemtap_session::prologue_searching_always ||
2403             (q->has_process && !q->dw.has_valid_locs()))) // PR 6871 && PR 6941
2404         q->dw.resolve_prologue_endings (q->filtered_functions);
2405
2406       if (q->has_label)
2407         {
2408           enum lineno_t lineno_type = WILDCARD;
2409           if (q->spec_type == function_file_and_line)
2410             lineno_type = q->lineno_type;
2411           auto bfis = q->filtered_all();
2412           for (auto i = bfis.begin(); i != bfis.end(); ++i)
2413             q->dw.iterate_over_labels (&i->die, q->label_val, *i, q->linenos,
2414                                        lineno_type, q, query_label);
2415         }
2416       else if (q->has_callee || q->has_callees_num)
2417         {
2418           // .callee(str) --> str, .callees[(N)] --> "*"
2419           string callee_val = q->has_callee ? q->callee_val : "*";
2420           int64_t callees_num_val = q->has_callees_num ? q->callees_num_val : 1;
2421
2422           // NB: We filter functions that do not match the file here rather than
2423           // in query_callee because we only want the filtering to apply to the
2424           // first level, not to callees that are recursed into if
2425           // callees_num_val > 1.
2426           auto bfis = q->filtered_all();
2427           for (auto i = bfis.begin(); i != bfis.end(); ++i)
2428             {
2429               if (q->spec_type != function_alone &&
2430                   q->filtered_srcfiles.count(i->decl_file) == 0)
2431                 continue;
2432               q->dw.iterate_over_callees (&i->die, callee_val,
2433                                           callees_num_val,
2434                                           q, query_callee, *i);
2435             }
2436         }
2437       else if (q->spec_type == function_file_and_line
2438               // User specified function, file and lineno, but if they match
2439               // exactly a specific function in a specific line at a specific
2440               // decl_line, the user doesn't actually want to probe a lineno,
2441               // but rather the function itself. So let fall through to
2442               // query_func_info/query_inline_instance_info in final else.
2443                && !q->is_fully_specified_function()
2444                && !q->has_function_str)
2445         {
2446           auto bfis = q->filtered_all();
2447
2448           for (auto srcfile = q->filtered_srcfiles.cbegin();
2449                srcfile != q->filtered_srcfiles.cend(); ++srcfile)
2450             q->dw.iterate_over_srcfile_lines(srcfile->c_str(), q->linenos,
2451                                              q->lineno_type, bfis,
2452                                              query_srcfile_line,
2453                                              q->has_nearest, q);
2454         }
2455       else
2456         {
2457           // .statement(...:NN) often gets mixed up with .function(...:NN)
2458           if (q->spec_type == function_file_and_line
2459               && !q->is_fully_specified_function()
2460               && q->has_function_str)
2461             q->sess.print_warning (_("For probing a particular line, use a "
2462                                      ".statement() probe, not .function()"),
2463                                    q->base_probe->tok);
2464
2465           // Otherwise, simply probe all resolved functions.
2466           for (auto i = q->filtered_functions.begin();
2467                i != q->filtered_functions.end(); ++i)
2468             query_func_info (i->entrypc, *i, q);
2469
2470           // And all inline instances (if we're not excluding inlines with ".call")
2471           if (! q->has_call)
2472             for (auto i = q->filtered_inlines.begin();
2473                  i != q->filtered_inlines.end(); ++i)
2474               query_inline_instance_info (*i, q);
2475         }
2476       return DWARF_CB_OK;
2477     }
2478   catch (const semantic_error& e)
2479     {
2480       // q->sess.print_error (e);
2481       throw;
2482       // return DWARF_CB_ABORT;
2483     }
2484 }
2485
2486
2487 void
2488 dwarf_query::query_module_functions ()
2489 {
2490   try
2491     {
2492       filtered_srcfiles.clear();
2493       filtered_functions.clear();
2494       filtered_inlines.clear();
2495
2496       // Collect all module functions so we know which CUs are interesting
2497       int rc = dw.iterate_single_function(query_dwarf_func, this, function);
2498       if (rc != DWARF_CB_OK)
2499         return;
2500
2501       set<void*> used_cus; // by cu->addr
2502       vector<Dwarf_Die> cus;
2503       Dwarf_Die cu_mem;
2504
2505       auto bfis = filtered_all();
2506       for (auto i = bfis.begin(); i != bfis.end(); ++i)
2507         if (dwarf_diecu(&i->die, &cu_mem, NULL, NULL) &&
2508             used_cus.insert(cu_mem.addr).second)
2509           cus.push_back(cu_mem);
2510
2511       // Reset the dupes since we didn't actually collect them the first time
2512       alias_dupes.clear();
2513       inline_dupes.clear();
2514
2515       // Run the query again on the individual CUs
2516       for (auto i = cus.begin(); i != cus.end(); ++i){
2517         rc = query_cu(&*i, this);
2518         if (rc != DWARF_CB_OK)
2519           return;
2520       }
2521     }
2522   catch (const semantic_error& e)
2523     {
2524       sess.print_error (e);
2525     }
2526 }
2527
2528
2529 static bool
2530 validate_module_elf (systemtap_session& sess,
2531                      Dwfl_Module *mod, const char *name,  base_query *q)
2532 {
2533   // Validate the machine code in this elf file against the
2534   // session machine.  This is important, in case the wrong kind
2535   // of debuginfo is being automagically processed by elfutils.
2536   // While we can tell i686 apart from x86-64, unfortunately
2537   // we can't help confusing i586 vs i686 (both EM_386).
2538   //
2539   // In case of a mismatch, soft-reject (ignore it with a warning).
2540   // This is important in case of probing by buildid or mass
2541   // debuginfod where some random architecture's module might come
2542   // back.
2543
2544   Dwarf_Addr bias;
2545   // We prefer dwfl_module_getdwarf to dwfl_module_getelf here,
2546   // because dwfl_module_getelf can force costly section relocations
2547   // we don't really need, while either will do for this purpose.
2548   Elf* elf = (dwarf_getelf (dwfl_module_getdwarf (mod, &bias))
2549                   ?: dwfl_module_getelf (mod, &bias));
2550
2551   GElf_Ehdr ehdr_mem;
2552   GElf_Ehdr* em = gelf_getehdr (elf, &ehdr_mem);
2553   if (em == 0) { DWFL_ASSERT ("dwfl_getehdr", dwfl_errno()); }
2554   assert(em);
2555   int elf_machine = em->e_machine;
2556   const char* debug_filename = "";
2557   const char* main_filename = "";
2558   (void) dwfl_module_info (mod, NULL, NULL,
2559                                NULL, NULL, NULL,
2560                                & main_filename,
2561                                & debug_filename);
2562   const string& sess_machine = q->sess.architecture;
2563
2564   string expect_machine; // to match sess.machine (i.e., kernel machine)
2565   string expect_machine2;
2566
2567   // NB: See also the 'uname -m' squashing done in main.cxx.
2568   switch (elf_machine)
2569     {
2570       // x86 and ppc are bi-architecture; a 64-bit kernel
2571       // can normally run either 32-bit or 64-bit *userspace*.
2572     case EM_386:
2573       expect_machine = "i?86";
2574       if (! q->has_process) break; // 32-bit kernel/module
2575       /* Fallthrough */
2576     case EM_X86_64:
2577       expect_machine2 = "x86_64";
2578       break;
2579     case EM_PPC:
2580     case EM_PPC64:
2581       expect_machine = "powerpc";
2582       break;
2583     case EM_S390: expect_machine = "s390"; break;
2584     case EM_IA_64: expect_machine = "ia64"; break;
2585     case EM_ARM: expect_machine = "arm*"; break;
2586     case EM_AARCH64: expect_machine = "arm64"; break;
2587     case EM_MIPS: expect_machine = "mips"; break;
2588     case EM_RISCV: expect_machine = "riscv"; break;
2589       // XXX: fill in some more of these
2590     default: expect_machine = "?"; break;
2591     }
2592
2593   if (! debug_filename) debug_filename = main_filename;
2594   if (! debug_filename) debug_filename = name;
2595
2596   if (fnmatch (expect_machine.c_str(), sess_machine.c_str(), 0) != 0 &&
2597       fnmatch (expect_machine2.c_str(), sess_machine.c_str(), 0) != 0)
2598     {
2599       sess.print_warning (_F("ELF machine %s|%s (code %d) mismatch with target %s in '%s'",
2600                              expect_machine.c_str(), expect_machine2.c_str(), elf_machine,
2601                              sess_machine.c_str(), debug_filename));
2602       return false;
2603     }
2604
2605   if (q->sess.verbose>2)
2606     clog << _F("focused on module '%s' = [%#" PRIx64 "-%#" PRIx64 ", bias %#" PRIx64
2607                " file %s ELF machine %s|%s (code %d)\n",
2608                q->dw.module_name.c_str(), q->dw.module_start, q->dw.module_end,
2609                q->dw.module_bias, debug_filename, expect_machine.c_str(),
2610                expect_machine2.c_str(), elf_machine);
2611
2612   return true;
2613 }
2614
2615
2616
2617 static Dwarf_Addr
2618 lookup_symbol_address (Dwfl_Module *m, const char* wanted)
2619 {
2620   int syments = dwfl_module_getsymtab(m);
2621   assert(syments);
2622   for (int i = 1; i < syments; ++i)
2623     {
2624       GElf_Sym sym;
2625       const char *name = dwfl_module_getsym(m, i, &sym, NULL);
2626       if (name != NULL && strcmp(name, wanted) == 0)
2627         return sym.st_value;
2628     }
2629
2630   return 0;
2631 }
2632
2633
2634
2635 static int
2636 query_module (Dwfl_Module *mod,
2637               void **,
2638               const char *name,
2639               Dwarf_Addr addr,
2640               base_query *q)
2641 {
2642   try
2643     {
2644       module_info* mi = q->sess.module_cache->cache[name];
2645       if (mi == 0)
2646         {
2647           mi = q->sess.module_cache->cache[name] = new module_info(name);
2648
2649           mi->mod = mod;
2650           mi->addr = addr;
2651
2652           const char* debug_filename = "";
2653           const char* main_filename = "";
2654           (void) dwfl_module_info (mod, NULL, NULL,
2655                                    NULL, NULL, NULL,
2656                                    & main_filename,
2657                                    & debug_filename);
2658
2659           if (debug_filename || main_filename)
2660             {
2661               mi->elf_path = debug_filename ?: main_filename;
2662             }
2663           else if (name == TOK_KERNEL)
2664             {
2665               mi->dwarf_status = info_absent;
2666             }
2667         }
2668       // OK, enough of that module_info caching business.
2669
2670       q->dw.focus_on_module(mod, mi);
2671
2672       // If we have enough information in the pattern to skip a module and
2673       // the module does not match that information, return early.
2674       if (!q->dw.module_name_matches(q->module_val))
2675         return pending_interrupts ? DWARF_CB_ABORT : DWARF_CB_OK;
2676
2677       // Don't allow module("*kernel*") type expressions to match the
2678       // elfutils module "kernel", which we refer to in the probe
2679       // point syntax exclusively as "kernel.*".
2680       if (q->dw.module_name == TOK_KERNEL && ! q->has_kernel)
2681         return pending_interrupts ? DWARF_CB_ABORT : DWARF_CB_OK;
2682
2683       if (mod)
2684         {
2685           if (! validate_module_elf(q->sess, mod, name, q))
2686             return DWARF_CB_OK;
2687         }
2688       else
2689         assert(q->has_kernel);   // and no vmlinux to examine
2690
2691       if (q->sess.verbose>2)
2692         cerr << _F("focused on module '%s'\n", q->dw.module_name.c_str());
2693
2694
2695       // Collect a few kernel addresses.  XXX: these belong better
2696       // to the sess.module_info["kernel"] struct.
2697       if (q->dw.module_name == TOK_KERNEL)
2698         {
2699           if (! q->sess.sym_kprobes_text_start)
2700             q->sess.sym_kprobes_text_start = lookup_symbol_address (mod, "__kprobes_text_start");
2701           if (! q->sess.sym_kprobes_text_end)
2702             q->sess.sym_kprobes_text_end = lookup_symbol_address (mod, "__kprobes_text_end");
2703           if (! q->sess.sym_stext)
2704             q->sess.sym_stext = lookup_symbol_address (mod, "_stext");
2705         }
2706
2707       // If there is a .library component, then q->path will hold the path to
2708       // the executable if the library was fully resolved. If not (e.g. not
2709       // absolute, or globby), resort to iterate_over_libraries().
2710       if (q->has_library && q->path.empty())
2711         q->dw.iterate_over_libraries (&q->query_library_callback, q);
2712       // .plt is translated to .plt.statement(N).  We only want to iterate for the
2713       // .plt case
2714       else if (q->has_plt && ! q->has_statement)
2715         {
2716           q->dw.iterate_over_plt (q, &q->query_plt_callback);
2717           q->visited_modules.insert(name);
2718         }
2719       else
2720         {
2721           // search the module for matches of the probe point.
2722           q->handle_query_module();
2723           q->visited_modules.insert(name);
2724         }
2725
2726       // If we know that there will be no more matches, abort early.
2727       if (q->dw.module_name_final_match(q->module_val) || pending_interrupts)
2728         return DWARF_CB_ABORT;
2729       else
2730         return DWARF_CB_OK;
2731     }
2732   catch (const semantic_error& e)
2733     {
2734       // q->sess.print_error (e);
2735       // return DWARF_CB_ABORT;
2736       throw;
2737     }
2738 }
2739
2740
2741 void
2742 base_query::query_library_callback (base_query *me, const char *data)
2743 {
2744   me->query_library (data);
2745 }
2746
2747
2748 probe*
2749 build_library_probe(dwflpp& dw,
2750                     const string& library,
2751                     probe *base_probe,
2752                     probe_point *base_loc)
2753 {
2754   probe_point* specific_loc = new probe_point(*base_loc);
2755   vector<probe_point::component*> derived_comps;
2756
2757   // Create new probe point for the matching library. This is what will be
2758   // shown in listing mode. Also replace the process(str) with the real
2759   // absolute path rather than keeping what the user typed in.
2760   for (auto it = specific_loc->components.begin();
2761        it != specific_loc->components.end(); ++it)
2762     if ((*it)->functor == TOK_PROCESS)
2763       derived_comps.push_back(new probe_point::component(TOK_PROCESS,
2764           new literal_string(path_remove_sysroot(dw.sess, dw.module_name))));
2765     else if ((*it)->functor == TOK_LIBRARY)
2766       derived_comps.push_back(new probe_point::component(TOK_LIBRARY,
2767           new literal_string(path_remove_sysroot(dw.sess, library)),
2768           true /* from_glob */ ));
2769     else
2770       derived_comps.push_back(*it);
2771   probe_point* derived_loc = new probe_point(*specific_loc);
2772   derived_loc->components = derived_comps;
2773   return new probe (new probe (base_probe, specific_loc), derived_loc);
2774 }
2775
2776 bool
2777 query_one_library (const char *library, dwflpp & dw,
2778     const string user_lib, probe * base_probe, probe_point *base_loc,
2779     vector<derived_probe *> & results)
2780 {
2781   if (dw.function_name_matches_pattern(library, "*" + user_lib))
2782     {
2783       string library_path = find_executable (library, "", dw.sess.sysenv,
2784                                              "LD_LIBRARY_PATH");
2785       probe *new_base = build_library_probe(dw, library_path,
2786                                             base_probe, base_loc);
2787
2788       // We pass true for the optional parameter of derive_probes() here to
2789       // indicate that we don't mind if the probe doesn't resolve. This is
2790       // because users expect wildcarded probe points to only apply to a subset
2791       // of matching libraries, in the sense of "any", rather than "all", just
2792       // like module("*") and process("*"). See also dwarf_builder::build().
2793       derive_probes(dw.sess, new_base, results, true /* optional */ );
2794
2795       if (dw.sess.verbose > 2)
2796         clog << _("module=") << library_path << endl;
2797       return true;
2798     }
2799   return false;
2800 }
2801
2802
2803 void
2804 dwarf_query::query_library (const char *library)
2805 {
2806   visited_libraries.insert(library);
2807   if (query_one_library (library, dw, user_lib, base_probe, base_loc, results))
2808     resolved_library = true;
2809 }
2810
2811 struct plt_expanding_visitor: public var_expanding_visitor
2812 {
2813   plt_expanding_visitor(systemtap_session&s, const string & entry):
2814     var_expanding_visitor (s),
2815     entry (entry)
2816   {
2817   }
2818   const string & entry;
2819
2820   void visit_target_symbol (target_symbol* e);
2821 };
2822
2823
2824 void
2825 base_query::query_plt_callback (base_query *me, const char *entry, size_t address)
2826 {
2827   if (me->dw.function_name_matches_pattern (entry, me->plt_val))
2828     me->query_plt (entry, address);
2829   me->dw.mod_info->plt_funcs.insert(entry);
2830 }
2831
2832
2833 void
2834 query_one_plt (const char *entry, long addr, dwflpp & dw,
2835     probe * base_probe, probe_point *base_loc,
2836     vector<derived_probe *> & results, base_query *q)
2837 {
2838       interned_string module = dw.module_name;
2839       if (q->has_process)
2840         module = path_remove_sysroot(dw.sess, module);
2841
2842       probe_point* specific_loc = new probe_point(*base_loc);
2843       specific_loc->well_formed = true;
2844
2845       vector<probe_point::component*> derived_comps;
2846
2847       if (dw.sess.verbose > 2)
2848         clog << _F("plt entry=%s\n", entry);
2849
2850       for (auto it = specific_loc->components.begin();
2851            it != specific_loc->components.end(); ++it)
2852         if ((*it)->functor == TOK_PROCESS)
2853           {
2854             // Replace with fully resolved path
2855             *it = new probe_point::component(TOK_PROCESS,
2856                     new literal_string(q->has_library ? q->path : module));
2857             derived_comps.push_back(*it);
2858           }
2859         else if ((*it)->functor == TOK_PLT)
2860           {
2861             // Replace possibly globby component
2862             *it = new probe_point::component(TOK_PLT,
2863                                              new literal_string(string(entry)));
2864             derived_comps.push_back(*it);
2865             derived_comps.push_back(new probe_point::component(TOK_STATEMENT,
2866                                                                new literal_number(addr, true)));
2867           }
2868         else
2869           derived_comps.push_back(*it);
2870       probe_point* derived_loc = new probe_point(*specific_loc);
2871       derived_loc->components = derived_comps;
2872       probe *new_base = new probe (new probe (base_probe, specific_loc),
2873                                    derived_loc);
2874       string e = string(entry);
2875       plt_expanding_visitor pltv (dw.sess, e);
2876       var_expand_const_fold_loop (dw.sess, new_base->body, pltv);
2877
2878       literal_map_t params;
2879       for (unsigned i = 0; i < derived_loc->components.size(); ++i)
2880        {
2881           probe_point::component *c = derived_loc->components[i];
2882           params[c->functor] = c->arg;
2883        }
2884       dwarf_query derived_q(new_base, derived_loc, dw, params, results, "", "");
2885       dw.iterate_over_modules<base_query>(&query_module, &derived_q);
2886 }
2887
2888
2889 void
2890 dwarf_query::query_plt (const char *entry, size_t address)
2891 {
2892   query_one_plt (entry, address, dw, base_probe, base_loc, results, this);
2893 }
2894
2895 // This would more naturally fit into elaborate.cxx:semantic_pass_symbols,
2896 // but the needed declaration for module_cache is not available there.
2897 // Nor for that matter in session.cxx.  Only in this CU is that field ever
2898 // set (in query_module() above), so we clean it up here too.
2899 static void
2900 delete_session_module_cache (systemtap_session& s)
2901 {
2902   if (s.module_cache) {
2903     if (s.verbose > 3)
2904       clog << _("deleting module_cache") << endl;
2905     delete s.module_cache;
2906     s.module_cache = 0;
2907   }
2908 }
2909
2910
2911 struct dwarf_var_expanding_visitor: public var_expanding_visitor
2912 {
2913   dwarf_query & q;
2914   Dwarf_Die *scope_die;
2915   Dwarf_Addr addr;
2916   block *add_block;
2917   block *add_call_probe; // synthesized from .return probes with saved $vars
2918   // NB: tids are not always collected in add_block & add_call_probe, because
2919   // gen_kretprobe_saved_return doesn't need them.  Thus we need these extra
2920   // *_tid bools for gen_mapped_saved_return to tell what's there.
2921   bool add_block_tid, add_call_probe_tid;
2922   unsigned saved_longs, saved_strings; // data saved within kretprobes
2923   unordered_map<Dwarf_Addr, block *> entry_probes;
2924   unordered_map<std::string, expression *> return_ts_map;
2925   vector<Dwarf_Die> scopes;
2926   // probe counter name -> pointer of associated probe
2927   std::set<std::string> perf_counter_refs;
2928   bool visited;
2929
2930   dwarf_var_expanding_visitor(dwarf_query & q, Dwarf_Die *sd, Dwarf_Addr a):
2931     var_expanding_visitor(q.sess),
2932     q(q), scope_die(sd), addr(a), add_block(NULL), add_call_probe(NULL),
2933     add_block_tid(false), add_call_probe_tid(false),
2934     saved_longs(0), saved_strings(0), visited(false) {}
2935   expression* gen_mapped_saved_return(expression* e, const string& name);
2936   expression* gen_kretprobe_saved_return(expression* e);
2937   void visit_target_symbol_saved_return (target_symbol* e);
2938   void visit_target_symbol_context (target_symbol* e);
2939   void visit_target_symbol (target_symbol* e);
2940   void visit_atvar_op (atvar_op* e);
2941   void visit_cast_op (cast_op* e);
2942   void visit_entry_op (entry_op* e);
2943   void visit_perf_op (perf_op* e);
2944
2945 private:
2946   vector<Dwarf_Die>& getscopes(target_symbol *e);
2947 };
2948
2949
2950 unsigned var_expanding_visitor::tick = 0;
2951
2952
2953 var_expanding_visitor::var_expanding_visitor (systemtap_session& s):
2954   update_visitor(s.verbose), sess(s), op()
2955 {
2956   // FIXME: for the time being, by default we only support plain '$foo
2957   // = bar', not '+=' or any other op= variant. This is fixable, but a
2958   // bit ugly.
2959   //
2960   // If derived classes desire to add additional operator support, add
2961   // new operators to this list in the derived class constructor.
2962   valid_ops.insert ("=");
2963 }
2964
2965
2966 void
2967 var_expanding_visitor::provide_lvalue_call(functioncall* fcall)
2968 {
2969   // Provide the functioncall to our parent, so that it can be used to
2970   // substitute for the assignment node immediately above us.
2971   assert(!target_symbol_setter_functioncalls.empty());
2972   *(target_symbol_setter_functioncalls.top()) = fcall;
2973 }
2974
2975
2976 bool
2977 var_expanding_visitor::rewrite_lvalue(const token* tok, interned_string& eop,
2978                                       expression*& lvalue, expression*& rvalue)
2979 {
2980   // Our job would normally be to require() the left and right sides
2981   // into a new assignment. What we're doing is slightly trickier:
2982   // we're pushing a functioncall** onto a stack, and if our left
2983   // child sets the functioncall* for that value, we're going to
2984   // assume our left child was a target symbol -- transformed into a
2985   // set_target_foo(value) call, and it wants to take our right child
2986   // as the argument "value".
2987   //
2988   // This is why some people claim that languages with
2989   // constructor-decomposing case expressions have a leg up on
2990   // visitors.
2991
2992   functioncall *fcall = NULL;
2993
2994   // Let visit_target_symbol know what operator it should handle.
2995   interned_string* old_op = op;
2996   op = & eop;
2997
2998   target_symbol_setter_functioncalls.push (&fcall);
2999   replace (lvalue);
3000   target_symbol_setter_functioncalls.pop ();
3001   replace (rvalue);
3002
3003   op = old_op;
3004
3005   if (fcall != NULL)
3006     {
3007       // Our left child is informing us that it was a target variable
3008       // and it has been replaced with a set_target_foo() function
3009       // call; we are going to provide that function call -- with the
3010       // right child spliced in as sole argument -- in place of
3011       // ourselves, in the var expansion we're in the middle of making.
3012
3013       if (valid_ops.find (eop) == valid_ops.end ())
3014         {
3015           // Build up a list of supported operators.
3016           string ops;
3017           int valid_ops_size = 0;
3018           for (auto i = valid_ops.begin(); i != valid_ops.end(); i++)
3019           {
3020             ops += " " + *i + ",";
3021             valid_ops_size++;
3022           }
3023           ops.resize(ops.size() - 1);   // chop off the last ','
3024
3025           // Throw the error.
3026           throw SEMANTIC_ERROR (_NF("Only the following assign operator is implemented on target variables: %s",
3027                                             "Only the following assign operators are implemented on target variables: %s",
3028                                            valid_ops_size, ops.c_str()), tok);
3029
3030         }
3031
3032       assert (lvalue == fcall);
3033       if (rvalue)
3034         fcall->args.push_back (rvalue);
3035       provide (fcall);
3036       return true;
3037     }
3038   else
3039     return false;
3040 }
3041
3042
3043 void
3044 var_expanding_visitor::visit_assignment (assignment* e)
3045 {
3046   if (!rewrite_lvalue (e->tok, e->op, e->left, e->right))
3047     provide (e);
3048 }
3049
3050
3051 void
3052 var_expanding_visitor::visit_pre_crement (pre_crement* e)
3053 {
3054   expression *dummy = NULL;
3055   if (!rewrite_lvalue (e->tok, e->op, e->operand, dummy))
3056     provide (e);
3057 }
3058
3059
3060 void
3061 var_expanding_visitor::visit_post_crement (post_crement* e)
3062 {
3063   expression *dummy = NULL;
3064   if (!rewrite_lvalue (e->tok, e->op, e->operand, dummy))
3065     provide (e);
3066 }
3067
3068
3069 void
3070 var_expanding_visitor::visit_delete_statement (delete_statement* s)
3071 {
3072   string fakeop = "delete";
3073   interned_string fopr = fakeop;
3074   expression *dummy = NULL;
3075   if (!rewrite_lvalue (s->tok, fopr, s->value, dummy))
3076     provide (s);
3077 }
3078
3079
3080 void
3081 var_expanding_visitor::visit_defined_op (defined_op* e)
3082 {
3083   expression * const old_operand = e->operand;
3084   bool resolved = true;
3085
3086   defined_ops.push (e);
3087   try {
3088     replace (e->operand);
3089
3090     // NB: Formerly, we had some curious cases to consider here, depending on what
3091     // various visit_target_symbol() implementations do for successful or
3092     // erroneous resolutions.  Some would signal a visit_target_symbol failure
3093     // with an exception, with a set flag within the target_symbol, or nothing
3094     // at all.
3095     //
3096     // Now, failures always have to be signalled with a
3097     // saved_conversion_error being chained to the target_symbol.
3098     // Successes have to result in an attempted rewrite of the
3099     // target_symbol (via provide()).
3100     //
3101     // Edna Mode: "no capes".  fche: "no exceptions".  reality: not that simple
3102
3103     // dwarf stuff: success: rewrites to a function; failure: retains target_symbol, sets saved_conversion_error
3104     //
3105     // sdt-kprobes sdt.h: success: string or functioncall; failure: semantic_error
3106     //
3107     // sdt-uprobes: success: string or no op; failure: no op; expect derived/synthetic
3108     //              dwarf probe to take care of it.
3109     //              But this is rather unhelpful.  So we rig the sdt_var_expanding_visitor
3110     //              to pass through @defined() to the synthetic dwarf probe.
3111     //
3112     // utrace: success: rewrites to function; failure: semantic_error
3113     //
3114     // procfs: success: rewrites to function; failure: semantic_error
3115     //
3116     // ... but @defined() can nest other types of expressions too, for better or for worse,
3117     // which can result in semantic_error.
3118
3119     target_symbol* tsym = dynamic_cast<target_symbol*> (e->operand);
3120     if (tsym && tsym->saved_conversion_error) // failing
3121       resolved = false;
3122     else if (e->operand == old_operand) // unresolved but not marked failing
3123       {
3124         // There are some visitors that won't touch certain target_symbols,
3125         // e.g. dwarf_var_expanding_visitor won't resolve @cast.  We should
3126         // leave it for now so some other visitor can have a chance.
3127         defined_ops.pop ();
3128         provide (e);
3129         return;
3130       }
3131     else // resolved, rewritten to some other expression type
3132       resolved = true;
3133   } catch (const semantic_error& e) {
3134     // some uncooperative value like @perf("NO_SUCH_VALUE")
3135     resolved = false;
3136   }
3137   defined_ops.pop ();
3138
3139   if (sess.verbose>2)
3140     clog << _("Resolving ") << *e << ": " << resolved << endl;
3141
3142   literal_number* ln = new literal_number (resolved ? 1 : 0);
3143   ln->tok = e->tok;
3144   abort_provide (ln); // PR20672; stop updating visitor
3145 }
3146
3147
3148 // Traverse a staptree*, looking for any operation that requires probe
3149 // context to work
3150 struct context_op_finder: public traversing_visitor
3151 {
3152 public:
3153   bool context_op_p;
3154   context_op_finder(): context_op_p(false) {}
3155
3156   void visit_target_symbol (target_symbol* e)
3157   { context_op_p = true; traversing_visitor::visit_target_symbol(e); }
3158   void visit_defined_op (defined_op* e)
3159   { context_op_p = true; traversing_visitor::visit_defined_op(e); }
3160   void visit_atvar_op (atvar_op* e)
3161   { context_op_p = true; traversing_visitor::visit_atvar_op(e); }
3162   void visit_cast_op (cast_op* e) // if module is specified, not a context_op_p
3163   { if (e->module == "") context_op_p = true; traversing_visitor::visit_cast_op(e); }
3164   void visit_autocast_op (autocast_op* e) // XXX do these show up early?
3165   { context_op_p = true; traversing_visitor::visit_autocast_op(e); }
3166   void visit_perf_op (perf_op* e)
3167   { context_op_p = true; traversing_visitor::visit_perf_op(e); }
3168 };
3169
3170
3171 void
3172 var_expanding_visitor::visit_functioncall (functioncall* e)
3173 {
3174   update_visitor::visit_functioncall(e); // for arguments etc.
3175
3176   if (strverscmp(sess.compatible.c_str(), "4.3") >= 0 && // PR25841 behaviour
3177       e->referents.size() == 0 && // first time seeing this functioncall
3178       sess.symbol_resolver && // from some sort of symbol-resolution context
3179       sess.symbol_resolver->current_probe) // prevent being called from semantic_pass_symbols function-only loop
3180     {
3181       // need to early resolve
3182       auto refs = sess.symbol_resolver->find_functions (e, e->function, e->args.size (), e->tok);
3183
3184       vector<functiondecl*> copyrefs;
3185       for (auto ri = refs.begin(); ri != refs.end(); ri++)
3186         {
3187           auto r = *ri;
3188           // We accumulate these functiondecls, so we don't recurse infinitely.
3189           // Recursive functions will be handled correctly though because the second
3190           // time we clone, the first clone will be found & reused.
3191           if (early_resolution_in_progress.find(r) != early_resolution_in_progress.end())
3192             continue;
3193
3194           context_op_finder cop;
3195           r->body->visit(& cop);
3196           if (cop.context_op_p) // need to clone
3197             {
3198               r->cloned_p = true; // so don't warn about elision later
3199
3200               if (sess.verbose > 2)
3201                 clog << _("need a clone of context-op function ") << *r->tok << endl;
3202
3203               // check if we already cloned it, e.g. if we have two
3204               // calls to the same function from a probe.
3205               string clone_function_name = string("__clone_") +
3206                 sess.symbol_resolver->current_probe->name() + string("_of_") + string(r->name);
3207
3208               auto johnny = sess.functions.find(clone_function_name);
3209               if (johnny != sess.functions.end())
3210                 {
3211                   if (sess.verbose > 3)
3212                     clog << _("reusing previous clone") << endl;
3213                   e->function = johnny->first; // overwrite functioncall name for -p2 disambiguation
3214                   copyrefs.push_back(johnny->second);
3215                   continue;
3216                 }
3217
3218               // nope, must make a new clone
3219
3220               auto nf = new functiondecl();
3221               nf->synthetic = true;
3222               nf->tok = r->tok;
3223               // nf->unmangled_name = r->unmangled_name;
3224               nf->unmangled_name = nf->name = clone_function_name;
3225               nf->mangle_oldstyle = r->mangle_oldstyle;
3226               nf->has_next = r->has_next;
3227               nf->priority = r->priority;
3228               for (auto ji = r->formal_args.begin(); ji != r->formal_args.end(); ji++)
3229                 {
3230                   auto j = *ji;
3231                   auto v = new vardecl();
3232                   v->type = pe_unknown; // = j->type anyway; we're before type inference
3233                   v->tok = j->tok;
3234                   v->name = j->name;
3235                   v->unmangled_name = j->unmangled_name;
3236                   nf->formal_args.push_back (v);
3237                 }
3238               // leave empty locals, unused_locals -- they'll be filled soon
3239
3240               // deep_copy the body then process it recursively
3241               nf->body = deep_copy_visitor::deep_copy(r->body);
3242               early_resolution_in_progress.insert(r);
3243               require (nf->body, false); // process it recursively
3244               early_resolution_in_progress.erase(r);
3245
3246               sess.functions.insert(make_pair(nf->name, nf));
3247               e->function = nf->name; // overwrite functioncall name for -p2 disambiguation
3248               copyrefs.push_back(nf);
3249
3250               if (sess.verbose > 3) {
3251                 clog << _("clone: ");
3252                 nf->print(clog);
3253                 clog << endl;
3254               }
3255             }
3256           else
3257             copyrefs = refs; // already added into s.functions[]
3258         }
3259
3260       e->referents = copyrefs;
3261     }
3262
3263   else if (strverscmp(sess.compatible.c_str(), "4.3") >= 0 && // PR25841 behaviour
3264            e->referents.size() != 0) // second or later time calling
3265     {
3266       for (auto ri = e->referents.begin(); ri != e->referents.end(); ri++)
3267         {
3268           auto r = *ri;
3269           if (early_resolution_in_progress.find(r) != early_resolution_in_progress.end())
3270             {
3271               // already warned earlier
3272               continue;
3273             }
3274
3275           early_resolution_in_progress.insert(r);
3276           require (r->body, false); // process it recursively
3277           early_resolution_in_progress.erase(r);
3278         }
3279     }
3280 }
3281
3282
3283 struct dwarf_pretty_print
3284 {
3285   dwarf_pretty_print (dwflpp& dw, vector<Dwarf_Die>& scopes, Dwarf_Addr pc,
3286                       const string& local, bool userspace_p,
3287                       const target_symbol& e, bool lvalue):
3288     dw(dw), local(local), scopes(scopes), pc(pc),
3289     pointer(NULL), pointer_type(),
3290     userspace_p(userspace_p), deref_p(true)
3291   {
3292     init_ts (e);
3293     dw.type_die_for_local (scopes, pc, local, ts, &base_type, lvalue);
3294   }
3295
3296   dwarf_pretty_print (dwflpp& dw, Dwarf_Die *scope_die, Dwarf_Addr pc,
3297                       bool userspace_p, const target_symbol& e, bool lvalue):
3298     dw(dw), scopes(1, *scope_die), pc(pc),
3299     pointer(NULL), pointer_type(),
3300     userspace_p(userspace_p), deref_p(true)
3301   {
3302     init_ts (e);
3303     dw.type_die_for_return (&scopes[0], pc, ts, &base_type, lvalue);
3304   }
3305
3306   dwarf_pretty_print (dwflpp& dw, Dwarf_Die *type_die, expression* pointer,
3307                       bool deref_p, bool userspace_p, const target_symbol& e,
3308                       bool lvalue):
3309     dw(dw), pc(0), pointer(pointer), pointer_type(*type_die),
3310     userspace_p(userspace_p), deref_p(deref_p)
3311   {
3312     init_ts (e);
3313     dw.type_die_for_pointer (type_die, ts, &base_type, lvalue);
3314   }
3315
3316   functioncall* expand ();
3317   ~dwarf_pretty_print () { delete ts; }
3318
3319 private:
3320   dwflpp& dw;
3321   target_symbol* ts;
3322   bool print_full;
3323   Dwarf_Die base_type;
3324
3325   string local;
3326   vector<Dwarf_Die> scopes;
3327   Dwarf_Addr pc;
3328
3329   expression* pointer;
3330   Dwarf_Die pointer_type;
3331
3332   const bool userspace_p, deref_p;
3333
3334   void recurse (Dwarf_Die* type, target_symbol* e,
3335                 print_format* pf, bool top=false);
3336   void recurse_bitfield (Dwarf_Die* type, target_symbol* e,
3337                          print_format* pf);
3338   void recurse_base (Dwarf_Die* type, target_symbol* e,
3339                      print_format* pf);
3340   void recurse_array (Dwarf_Die* type, target_symbol* e,
3341                       print_format* pf, bool top);
3342   void recurse_pointer (Dwarf_Die* type, target_symbol* e,
3343                         print_format* pf, bool top);
3344   void recurse_struct (Dwarf_Die* type, target_symbol* e,
3345                        print_format* pf, bool top);
3346   void recurse_struct_members (Dwarf_Die* type, target_symbol* e,
3347                                print_format* pf, int& count);
3348   bool print_chars (Dwarf_Die* type, target_symbol* e, print_format* pf);
3349
3350   void init_ts (const target_symbol& e);
3351   expression* deref (target_symbol* e);
3352   bool push_deref (print_format* pf, const string& fmt, target_symbol* e);
3353 };
3354
3355
3356 void
3357 dwarf_pretty_print::init_ts (const target_symbol& e)
3358 {
3359   // Work with a new target_symbol so we can modify arguments
3360   ts = new target_symbol (e);
3361
3362   if (ts->addressof)
3363     throw SEMANTIC_ERROR(_("cannot take address of pretty-printed variable"), ts->tok);
3364
3365   size_t depth = ts->pretty_print_depth ();
3366   if (depth == 0)
3367     throw SEMANTIC_ERROR(_("invalid target_symbol for pretty-print"), ts->tok);
3368   print_full = depth > 1;
3369   ts->components.pop_back();
3370 }
3371
3372
3373 functioncall*
3374 dwarf_pretty_print::expand ()
3375 {
3376   static unsigned tick = 0;
3377
3378   // function pretty_print_X([pointer], [arg1, arg2, ...]) {
3379   //   try {
3380   //     return sprintf("{.foo=...}", (ts)->foo, ...)
3381   //   } catch {
3382   //     return "ERROR"
3383   //   }
3384   // }
3385
3386   // Create the function decl and call.
3387
3388   string fhash = detox_path(string(ts->tok->location.file->name));
3389   functiondecl *fdecl = new functiondecl;
3390   fdecl->tok = ts->tok;
3391   fdecl->synthetic = true;
3392   fdecl->unmangled_name = fdecl->name = "__private_" + fhash
3393     + "_dwarf_pretty_print_" + lex_cast(tick++);
3394   fdecl->type = pe_string;
3395
3396   functioncall* fcall = new functioncall;
3397   fcall->referents.push_back(fdecl); // may be needed for post-pass2a sym resolution; autocast08.stp
3398   fcall->tok = ts->tok;
3399   fcall->function = fdecl->name;
3400   fcall->type = pe_string;
3401
3402   // If there's a <pointer>, replace it with a new var and make that
3403   // the first function argument.
3404   if (pointer)
3405     {
3406       vardecl *v = new vardecl;
3407       v->type = pe_long;
3408       v->name = v->unmangled_name = "pointer";
3409       v->tok = ts->tok;
3410       v->synthetic = true;
3411       fdecl->formal_args.push_back (v);
3412       fcall->args.push_back (pointer);
3413
3414       symbol* sym = new symbol;
3415       sym->tok = ts->tok;
3416       sym->name = v->name;
3417       pointer = sym;
3418     }
3419
3420   // For each expression argument, replace it with a function argument.
3421   for (unsigned i = 0; i < ts->components.size(); ++i)
3422     if (ts->components[i].type == target_symbol::comp_expression_array_index)
3423       {
3424         vardecl *v = new vardecl;
3425         v->type = pe_long;
3426         v->unmangled_name = v->name = "index" + lex_cast(i);
3427         v->tok = ts->tok;
3428         fdecl->formal_args.push_back (v);
3429         fcall->args.push_back (ts->components[i].expr_index);
3430
3431         symbol* sym = new symbol;
3432         sym->tok = ts->tok;
3433         sym->name = v->name;
3434         ts->components[i].expr_index = sym;
3435       }
3436
3437   // Create the return sprintf.
3438   print_format* pf = print_format::create(ts->tok, "sprintf");
3439   return_statement* rs = new return_statement;
3440   rs->tok = ts->tok;
3441   rs->value = pf;
3442
3443   // Recurse into the actual values.
3444   recurse (&base_type, ts, pf, true);
3445   pf->components = print_format::string_to_components(pf->raw_components);
3446
3447   // Create the try-catch net
3448   try_block* tb = new try_block;
3449   tb->tok = ts->tok;
3450   tb->try_block = rs;
3451   tb->catch_error_var = 0;
3452   return_statement* rs2 = new return_statement;
3453   rs2->tok = ts->tok;
3454   rs2->value = new literal_string (string("ERROR"));
3455   rs2->value->tok = ts->tok;
3456   tb->catch_block = rs2;
3457   fdecl->body = tb;
3458
3459   fdecl->join (dw.sess);
3460   return fcall;
3461 }
3462
3463
3464 void
3465 dwarf_pretty_print::recurse (Dwarf_Die* start_type, target_symbol* e,
3466                              print_format* pf, bool top)
3467 {
3468   // deal with initial void* pointers
3469   if (!deref_p && null_die(start_type))
3470     {
3471       push_deref (pf, "%p", e);
3472       return;
3473     }
3474
3475   Dwarf_Die type;
3476   dw.resolve_unqualified_inner_typedie (start_type, &type, e);
3477
3478   switch (dwarf_tag(&type))
3479     {
3480     default:
3481       // XXX need a warning?
3482       // throw semantic_error ("unsupported type (tag " + lex_cast(dwarf_tag(&type))
3483       //                       + ") for " + dwarf_type_name(&type), e->tok);
3484       pf->raw_components.append("?");
3485       break;
3486
3487     case DW_TAG_enumeration_type:
3488     case DW_TAG_base_type:
3489       recurse_base (&type, e, pf);
3490       break;
3491
3492     case DW_TAG_array_type:
3493       recurse_array (&type, e, pf, top);
3494       break;
3495
3496     case DW_TAG_pointer_type:
3497     case DW_TAG_reference_type:
3498     case DW_TAG_rvalue_reference_type:
3499       recurse_pointer (&type, e, pf, top);
3500       break;
3501
3502     case DW_TAG_subroutine_type:
3503       push_deref (pf, "<function>:%p", e);
3504       break;
3505
3506     case DW_TAG_union_type:
3507     case DW_TAG_structure_type:
3508     case DW_TAG_class_type:
3509       recurse_struct (&type, e, pf, top);
3510       break;
3511     }
3512 }
3513
3514
3515 // Bit fields are handled as a special-case combination of recurse() and
3516 // recurse_base(), only called from recurse_struct_members().  The main
3517 // difference is that the value is always printed numerically, even if the
3518 // underlying type is a char.
3519 void
3520 dwarf_pretty_print::recurse_bitfield (Dwarf_Die* start_type, target_symbol* e,
3521                                       print_format* pf)
3522 {
3523   Dwarf_Die type;
3524   dw.resolve_unqualified_inner_typedie (start_type, &type, e);
3525
3526   int tag = dwarf_tag(&type);
3527   if (tag != DW_TAG_base_type && tag != DW_TAG_enumeration_type)
3528     {
3529       // XXX need a warning?
3530       // throw semantic_error ("unsupported bitfield type (tag " + lex_cast(tag)
3531       //                       + ") for " + dwarf_type_name(&type), e->tok);
3532       pf->raw_components.append("?");
3533       return;
3534     }
3535
3536   Dwarf_Attribute attr;
3537   Dwarf_Word encoding = (Dwarf_Word) -1;
3538   dwarf_formudata (dwarf_attr_integrate (&type, DW_AT_encoding, &attr),
3539                    &encoding);
3540   switch (encoding)
3541     {
3542     case DW_ATE_float:
3543     case DW_ATE_complex_float:
3544       // XXX need a warning?
3545       // throw semantic_error ("unsupported bitfield type (encoding " + lex_cast(encoding)
3546       //                       + ") for " + dwarf_type_name(&type), e->tok);
3547       pf->raw_components.append("?");
3548       break;
3549
3550     case DW_ATE_unsigned:
3551     case DW_ATE_unsigned_char:
3552       push_deref (pf, "%u", e);
3553       break;
3554
3555     case DW_ATE_signed:
3556     case DW_ATE_signed_char:
3557     default:
3558       push_deref (pf, "%i", e);
3559       break;
3560     }
3561 }
3562
3563
3564 void
3565 dwarf_pretty_print::recurse_base (Dwarf_Die* type, target_symbol* e,
3566                                   print_format* pf)
3567 {
3568   Dwarf_Attribute attr;
3569   Dwarf_Word encoding = (Dwarf_Word) -1;
3570   dwarf_formudata (dwarf_attr_integrate (type, DW_AT_encoding, &attr),
3571                    &encoding);
3572   switch (encoding)
3573     {
3574     case DW_ATE_float:
3575     case DW_ATE_complex_float:
3576       // XXX need a warning?
3577       // throw semantic_error ("unsupported type (encoding " + lex_cast(encoding)
3578       //                       + ") for " + dwarf_type_name(type), e->tok);
3579       pf->raw_components.append("?");
3580       break;
3581
3582     case DW_ATE_UTF: // XXX need to add unicode to _stp_vsprint_char
3583     case DW_ATE_signed_char:
3584     case DW_ATE_unsigned_char:
3585       // Use escapes to make sure that non-printable characters
3586       // don't interrupt our stream (especially '\0' values).
3587       push_deref (pf, "'%#c'", e);
3588       break;
3589
3590     case DW_ATE_unsigned:
3591       push_deref (pf, "%u", e);
3592       break;
3593
3594     case DW_ATE_signed:
3595     default:
3596       push_deref (pf, "%i", e);
3597       break;
3598     }
3599 }
3600
3601
3602 void
3603 dwarf_pretty_print::recurse_array (Dwarf_Die* type, target_symbol* e,
3604                                    print_format* pf, bool top)
3605 {
3606   if (!top && !print_full)
3607     {
3608       pf->raw_components.append("[...]");
3609       return;
3610     }
3611
3612   Dwarf_Die childtype;
3613   dwarf_attr_die (type, DW_AT_type, &childtype);
3614
3615   if (print_chars (&childtype, e, pf))
3616     return;
3617
3618   pf->raw_components.append("[");
3619
3620   // We print the array up to the first 5 elements.
3621   // XXX how can we determine the array size?
3622   // ... for now, just print the first element
3623   // NB: limit to 32 args; see PR10750 and c_unparser::visit_print_format.
3624   unsigned i, size = 1;
3625   for (i=0; i < size && i < 5 && pf->args.size() < 32; ++i)
3626     {
3627       if (i > 0)
3628         pf->raw_components.append(", ");
3629       target_symbol* e2 = new target_symbol(*e);
3630       e2->components.push_back (target_symbol::component(e->tok, i));
3631       recurse (&childtype, e2, pf);
3632     }
3633   if (i < size || 1/*XXX until real size is known */)
3634     pf->raw_components.append(", ...");
3635   pf->raw_components.append("]");
3636 }
3637
3638
3639 void
3640 dwarf_pretty_print::recurse_pointer (Dwarf_Die* type, target_symbol* e,
3641                                      print_format* pf, bool top)
3642 {
3643   // We chase to top-level pointers, but leave the rest alone
3644   bool void_p = true;
3645   Dwarf_Die pointee;
3646   if (dwarf_attr_die (type, DW_AT_type, &pointee))
3647     {
3648       try
3649         {
3650           dw.resolve_unqualified_inner_typedie (&pointee, &pointee, e);
3651           void_p = false;
3652         }
3653       catch (const semantic_error&) {}
3654     }
3655
3656   if (!void_p)
3657     {
3658       if (print_chars (&pointee, e, pf))
3659         return;
3660
3661       if (top)
3662         {
3663           recurse (&pointee, e, pf, top);
3664           return;
3665         }
3666     }
3667
3668   push_deref (pf, "%p", e);
3669 }
3670
3671
3672 void
3673 dwarf_pretty_print::recurse_struct (Dwarf_Die* type, target_symbol* e,
3674                                     print_format* pf, bool top)
3675 {
3676   if (dwarf_hasattr(type, DW_AT_declaration))
3677     {
3678       Dwarf_Die *resolved = dw.declaration_resolve(type);
3679       if (!resolved)
3680         {
3681           // could be an error, but for now just stub it
3682           // throw semantic_error ("unresolved " + dwarf_type_name(type), e->tok);
3683           pf->raw_components.append("{...}");
3684           return;
3685         }
3686       type = resolved;
3687     }
3688
3689   int count = 0;
3690   pf->raw_components.append("{");
3691   if (top || print_full)
3692     recurse_struct_members (type, e, pf, count);
3693   else
3694     pf->raw_components.append("...");
3695   pf->raw_components.append("}");
3696 }
3697
3698
3699 void
3700 dwarf_pretty_print::recurse_struct_members (Dwarf_Die* type, target_symbol* e,
3701                                             print_format* pf, int& count)
3702 {
3703   /* With inheritance, a subclass may mask member names of parent classes, so
3704    * our search among the inheritance tree must be breadth-first rather than
3705    * depth-first (recursive).  The type die is still our starting point.  When
3706    * we encounter a masked name, just skip it. */
3707   set<string> dupes;
3708   deque<Dwarf_Die> inheritees(1, *type);
3709   for (; !inheritees.empty(); inheritees.pop_front())
3710     {
3711       Dwarf_Die child, childtype, import;
3712       if (dwarf_child (&inheritees.front(), &child) == 0)
3713         do
3714           {
3715             target_symbol* e2 = e;
3716
3717             // skip static members
3718             if (dwarf_hasattr(&child, DW_AT_declaration))
3719               continue;
3720
3721             int tag = dwarf_tag (&child);
3722
3723             /* Pretend imported units contain members by recursing into
3724                struct_member printing with the same count. */
3725             if (tag == DW_TAG_imported_unit
3726                 && dwarf_attr_die (&child, DW_AT_import, &import))
3727               recurse_struct_members (&import, e2, pf, count);
3728
3729             if (tag != DW_TAG_member && tag != DW_TAG_inheritance)
3730               continue;
3731
3732             dwarf_attr_die (&child, DW_AT_type, &childtype);
3733
3734             if (tag == DW_TAG_inheritance)
3735               {
3736                 inheritees.push_back(childtype);
3737                 continue;
3738               }
3739
3740             int childtag = dwarf_tag (&childtype);
3741             const char *member = dwarf_diename (&child);
3742
3743             // "_vptr.foo" members are C++ virtual function tables,
3744             // which (generally?) aren't interesting for users.
3745             if (member && startswith(member, "_vptr."))
3746               continue;
3747
3748             // skip inheritance-masked duplicates
3749             if (member && !dupes.insert(member).second)
3750               continue;
3751
3752             if (++count > 1)
3753               pf->raw_components.append(", ");
3754
3755             // NB: limit to 32 args; see PR10750 and c_unparser::visit_print_format.
3756             if (pf->args.size() >= 32)
3757               {
3758                 pf->raw_components.append("...");
3759                 break;
3760               }
3761
3762             if (member)
3763               {
3764                 pf->raw_components.append(".");
3765                 pf->raw_components.append(member);
3766
3767                 e2 = new target_symbol(*e);
3768                 e2->components.push_back (target_symbol::component(e->tok, member));
3769               }
3770             else if (childtag == DW_TAG_union_type)
3771               pf->raw_components.append("<union>");
3772             else if (childtag == DW_TAG_structure_type)
3773               pf->raw_components.append("<class>");
3774             else if (childtag == DW_TAG_class_type)
3775               pf->raw_components.append("<struct>");
3776             pf->raw_components.append("=");
3777
3778             if (dwarf_hasattr_integrate (&child, DW_AT_bit_offset)
3779                 || dwarf_hasattr_integrate (&child, DW_AT_data_bit_offset))
3780               recurse_bitfield (&childtype, e2, pf);
3781             else
3782               recurse (&childtype, e2, pf);
3783           }
3784         while (dwarf_siblingof (&child, &child) == 0);
3785     }
3786 }
3787
3788
3789 bool
3790 dwarf_pretty_print::print_chars (Dwarf_Die* start_type, target_symbol* e,
3791                                  print_format* pf)
3792 {
3793   Dwarf_Die type;
3794   dw.resolve_unqualified_inner_typedie (start_type, &type, e);
3795
3796   Dwarf_Attribute attr;
3797   Dwarf_Word encoding = (Dwarf_Word) -1;
3798   dwarf_formudata (dwarf_attr_integrate (&type, DW_AT_encoding, &attr),
3799                    &encoding);
3800   switch (encoding)
3801     {
3802     case DW_ATE_UTF:
3803     case DW_ATE_signed_char:
3804     case DW_ATE_unsigned_char:
3805       break;
3806     default:
3807       return false;
3808     }
3809
3810   string function = userspace_p ? "user_string_quoted" : "kernel_or_user_string_quoted";
3811   Dwarf_Word size = (Dwarf_Word) -1;
3812   dwarf_formudata (dwarf_attr_integrate (&type, DW_AT_byte_size, &attr), &size);
3813   switch (size)
3814     {
3815     case 1:
3816       break;
3817     case 2:
3818       function += "_utf16";
3819       break;
3820     case 4:
3821       function += "_utf32";
3822       break;
3823     default:
3824       return false;
3825     }
3826
3827   if (push_deref (pf, "%s", e))
3828     {
3829       // steal the last arg for a string access
3830       assert (!pf->args.empty());
3831       functioncall* fcall = new functioncall;
3832       fcall->tok = e->tok;
3833       fcall->function = function;
3834       fcall->args.push_back (pf->args.back());
3835       pf->args.back() = fcall;
3836     }
3837   return true;
3838 }
3839
3840 struct target_bitfield_remover: public update_visitor
3841 {
3842   void visit_target_bitfield(target_bitfield *);
3843 };
3844
3845 void target_bitfield_remover::visit_target_bitfield(target_bitfield *e)
3846 {
3847   replace (e->base);
3848
3849   expression *ret;
3850   if (e->signed_p)
3851     {
3852       binary_expression *ls = new binary_expression;
3853       ls->tok = e->tok;
3854       ls->op = "<<";
3855       ls->left = e->base;
3856       ls->right = new literal_number(64 - e->offset - e->size);
3857
3858       binary_expression *rs = new binary_expression;
3859       rs->tok = e->tok;
3860       rs->op = ">>";
3861       rs->left = ls;
3862       rs->right = new literal_number(64 - e->size);
3863
3864       ret = rs;
3865     }
3866   else
3867     {
3868       binary_expression *rs = new binary_expression;
3869       rs->tok = e->tok;
3870       rs->op = ">>";
3871       rs->left = e->base;
3872       rs->right = new literal_number(e->offset);
3873
3874       uint64_t field = ((uint64_t)2 << (e->size - 1)) - 1;
3875       binary_expression *msk = new binary_expression;
3876       msk->tok = e->tok;
3877       msk->op = "&";
3878       msk->left = rs;
3879       msk->right = new literal_number(field);
3880
3881       ret = msk;
3882     }
3883   provide (ret);
3884 }
3885
3886 // PR10601: adapt to kernel-vs-userspace loc2c-runtime
3887 static const string EMBEDDED_FETCH_DEREF_KERNEL = string("\n")
3888   + "#define fetch_register k_fetch_register\n"
3889   + "#define store_register k_store_register\n"
3890   + "#define deref kderef\n"
3891   + "#define store_deref store_kderef\n";
3892
3893 static const string EMBEDDED_FETCH_DEREF_USER = string("\n")
3894   + "#define fetch_register u_fetch_register\n"
3895   + "#define store_register u_store_register\n"
3896   + "#define deref uderef\n"
3897   + "#define store_deref store_uderef\n";
3898
3899 #define EMBEDDED_FETCH_DEREF(U) \
3900   (U ? EMBEDDED_FETCH_DEREF_USER : EMBEDDED_FETCH_DEREF_KERNEL)
3901
3902 static const string EMBEDDED_FETCH_DEREF_DONE = string("\n")
3903   + "#undef fetch_register\n"
3904   + "#undef store_register\n"
3905   + "#undef deref\n"
3906   + "#undef store_deref\n";
3907
3908 static functioncall*
3909 synthetic_embedded_deref_call(dwflpp& dw, location_context &ctx,
3910                               const std::string &function_name,
3911                               Dwarf_Die *function_type,
3912                               bool userspace_p, bool lvalue_p,
3913                               expression *pointer = NULL)
3914 {
3915   target_symbol *e = ctx.e;
3916   const target_symbol *e_orig = ctx.e_orig;
3917   const token *tok = e->tok;
3918
3919   assert (e != NULL);
3920   assert (e_orig != NULL);
3921
3922   // Synthesize a functiondecl to contain an expression.
3923   string fhash = detox_path(string(tok->location.file->name));
3924   functiondecl *fdecl = new functiondecl;
3925   fdecl->synthetic = true;
3926   fdecl->tok = tok;
3927   fdecl->unmangled_name = fdecl->name = "__private_" + fhash + function_name;
3928   // The fdecl type is generic, but we'll be detailed on the fcall below.
3929   fdecl->type = pe_long;
3930   fdecl->type_details = make_shared<exp_type_dwarf>(&dw, function_type,
3931                                                     userspace_p, e->addressof);
3932   // Synthesize a functioncall.
3933   functioncall* fcall = new functioncall;
3934   fcall->tok = tok;
3935   fcall->referents.push_back(fdecl); // may be needed for post-pass2a sym resolution; autocast08.stp
3936   fcall->function = fdecl->name;
3937   fcall->type = fdecl->type;
3938   fcall->type_details = fdecl->type_details;
3939
3940   // ??? Once upon a time we explicitly marked functions with
3941   // /* unprivileged */, /* pure */, and /* stable */.  Now that we
3942   // have the // function body as staptree nodes, we simply deduce
3943   // the properties from the nodes.
3944
3945   // If this code snippet uses a precomputed pointer,
3946   // pass that as the first argument.
3947   if (pointer)
3948     {
3949       assert(ctx.pointer);
3950       fdecl->formal_args.push_back(ctx.pointer);
3951       fcall->args.push_back(pointer);
3952     }
3953
3954   // Any non-literal indexes need to be passed as arguments too.
3955   if (!e->components.empty())
3956     {
3957       fdecl->formal_args.insert(fdecl->formal_args.end(),
3958                                 ctx.indicies.begin(),
3959                                 ctx.indicies.end()); // indexN..M
3960
3961       assert (e->components.size() == e_orig->components.size());
3962       for (unsigned i = 0; i < e->components.size(); ++i)
3963         if (e->components[i].type == target_symbol::comp_expression_array_index)
3964           fcall->args.push_back(e_orig->components[i].expr_index); // the original index expression
3965     }
3966
3967   // If this code snippet is assigning to an lvalue,
3968   // add a final argument for the rvalue.
3969   expression *ref_exp = ctx.locations.back()->program; // contains rewritten
3970   if (ref_exp == 0) // e.g. if saw ->type == loc_noncontinguous
3971     throw SEMANTIC_ERROR(_("no usable location for symbol [error::dwarf]"), e->tok);
3972
3973   //check if it's a 32-bit float; if it is do the conversion from f32 to f64
3974   int typetag = dwarf_tag (function_type);
3975   if (typetag == DW_TAG_base_type)
3976     {
3977       Dwarf_Attribute encoding_attr;
3978       Dwarf_Word encoding = (Dwarf_Word) -1;
3979       dwarf_formudata (dwarf_attr_integrate (function_type, DW_AT_encoding, &encoding_attr),
3980                          & encoding);
3981
3982       Dwarf_Attribute size_attr;
3983       Dwarf_Word byte_size;
3984       if (dwarf_attr_integrate (function_type, DW_AT_byte_size, &size_attr) == NULL
3985           || dwarf_formudata (&size_attr, &byte_size) != 0)
3986         {
3987           throw (SEMANTIC_ERROR
3988                  (_F("cannot get byte_size attribute for type %s: %s",
3989                      dwarf_diename (function_type) ?: "<anonymous>",
3990                      dwarf_errmsg (-1)), e->tok));
3991         }
3992       if (byte_size > 8)
3993             throw (SEMANTIC_ERROR
3994                    ("cannot process >64-bit values", e->tok));
3995
3996       if (encoding == DW_ATE_float
3997           && byte_size == 4)
3998         {
3999           if (lvalue_p) {
4000             throw (SEMANTIC_ERROR
4001                    ("cannot assign yet to 32-bit float", e->tok));
4002           } else {
4003             functioncall* conv_fcall = new functioncall();
4004             conv_fcall->function = "fp32_to_fp64";
4005             conv_fcall->tok = tok;
4006             conv_fcall->type = pe_long;
4007             conv_fcall->type_details = fcall->type_details;
4008             //conv_fcall->referents = 0;
4009             conv_fcall->args.push_back(fcall);
4010             fcall = conv_fcall;
4011           }
4012         }
4013     }
4014
4015   if (lvalue_p)
4016     {
4017       // NB: We don't know the value for fcall argument yet.
4018       // (see target_symbol_setter_functioncalls)
4019
4020       vardecl *rvalue = new vardecl;
4021       rvalue->type = pe_long;
4022       rvalue->name = rvalue->unmangled_name = "rvalue";
4023       rvalue->tok = tok;
4024
4025       fdecl->formal_args.push_back(rvalue);
4026
4027       symbol *sym = new symbol;
4028       sym->name = rvalue->name;
4029       sym->tok = rvalue->tok;
4030       sym->type = pe_long;
4031       // sym->referent = rvalue;
4032       expression *rhs = sym;
4033
4034       // Expand bitfield writes.
4035       if (target_bitfield *bf = dynamic_cast<target_bitfield *>(ref_exp))
4036         {
4037           uint64_t field = ((uint64_t)2 << (bf->size - 1)) - 1;
4038
4039           ref_exp = bf->base;
4040           if (target_deref *dr = dynamic_cast<target_deref *>(ref_exp))
4041             {
4042               // Compute the address for a deref only once.  This is
4043               // particularly important when the address itself is a deref.
4044               expression *addr = ctx.save_expression (dr->addr);
4045               dr->addr = addr;
4046             }
4047
4048           binary_expression *msk = new binary_expression;
4049           msk->tok = tok;
4050           msk->op = "&";
4051           msk->left = sym;
4052           msk->right = new literal_number(field);
4053
4054           binary_expression *sft = new binary_expression;
4055           sft->tok = tok;
4056           sft->op = "<<";
4057           sft->left = msk;
4058           sft->right = new literal_number(bf->offset);
4059
4060           binary_expression *clr = new binary_expression;
4061           clr->tok = tok;
4062           clr->op = "&";
4063           clr->left = deep_copy_visitor::deep_copy(ref_exp);
4064           clr->right = new literal_number(~(field << bf->offset));
4065
4066           binary_expression *ior = new binary_expression;
4067           ior->tok = tok;
4068           ior->op = "|";
4069           ior->left = clr;
4070           ior->right = sft;
4071
4072           rhs = ior;
4073         }
4074
4075       assignment *a = new assignment;
4076       a->tok = tok;
4077       a->op = "=";
4078       a->left = ref_exp;
4079       a->right = rhs;
4080
4081       ref_exp = a;
4082     }
4083
4084   // Expand bitfield reads.
4085   target_bitfield_remover().replace(ref_exp);
4086
4087   fdecl->locals = ctx.locals;
4088
4089   block *blk = new block;
4090   blk->tok = tok;
4091   fdecl->body = blk;
4092
4093   for (auto i = ctx.evals.begin(); i != ctx.evals.end(); ++i)
4094     blk->statements.push_back(*i);
4095
4096   return_statement *ret = new return_statement;
4097   ret->tok = tok;
4098   ret->value = ref_exp;
4099   blk->statements.push_back(ret);
4100
4101   // Add the synthesized decl to the session now.
4102   fdecl->join (dw.sess);
4103
4104   return fcall;
4105 }
4106
4107 expression*
4108 dwarf_pretty_print::deref (target_symbol* e)
4109 {
4110   static unsigned tick = 0;
4111
4112   if (!deref_p)
4113     {
4114       assert (pointer && e->components.empty());
4115       return pointer;
4116     }
4117
4118   bool lvalue_p = false;
4119
4120   location_context ctx(e, pointer);
4121   ctx.pc = pc;
4122   ctx.userspace_p = userspace_p;
4123
4124   Dwarf_Die endtype;
4125   if (pointer)
4126     dw.literal_stmt_for_pointer (ctx, &pointer_type, ctx.e, lvalue_p, &endtype);
4127   else if (!local.empty())
4128     dw.literal_stmt_for_local (ctx, scopes, local, ctx.e, lvalue_p, &endtype);
4129   else
4130     dw.literal_stmt_for_return (ctx, &scopes[0], ctx.e, lvalue_p, &endtype);
4131
4132   string name = "_dwarf_pretty_print_deref_" + lex_cast(tick++);
4133   return synthetic_embedded_deref_call(dw, ctx, name, &endtype, userspace_p,
4134                                        lvalue_p, pointer);
4135 }
4136
4137
4138 bool
4139 dwarf_pretty_print::push_deref (print_format* pf, const string& fmt,
4140                                 target_symbol* e)
4141 {
4142   expression* e2 = NULL;
4143   try
4144     {
4145       e2 = deref (e);
4146     }
4147   catch (const semantic_error&)
4148     {
4149       pf->raw_components.append ("?");
4150       return false;
4151     }
4152   pf->raw_components.append (fmt);
4153   pf->args.push_back (e2);
4154   return true;
4155 }
4156
4157
4158 void
4159 dwarf_var_expanding_visitor::visit_target_symbol_saved_return (target_symbol* e)
4160 {
4161   // Get the full name of the target symbol.
4162   stringstream ts_name_stream;
4163   e->print(ts_name_stream);
4164   string ts_name = ts_name_stream.str();
4165
4166   // Check and make sure we haven't already seen this target
4167   // variable in this return probe.  If we have, just return our
4168   // last replacement.
4169   auto i = return_ts_map.find(ts_name);
4170   if (i != return_ts_map.end())
4171     {
4172       provide (i->second);
4173       return;
4174     }
4175
4176   // Attempt the expansion directly first, so if there's a problem with the
4177   // variable we won't have a bogus entry probe lying around.  Like in
4178   // saveargs(), we pretend for a moment that we're not in a .return.
4179   expression *repl = e;
4180   {
4181     save_and_restore<bool> temp_return (& q.has_return, false);
4182     replace (repl);
4183   }
4184
4185   // If it's still a target_symbol, then it couldn't be resolved.  It may
4186   // not have a saved_conversion_error yet, e.g. for null_die(scope_die),
4187   // but we know it's not worth making that bogus entry anyway.
4188   if (dynamic_cast<target_symbol*>(repl))
4189     {
4190       provide (repl);
4191       return;
4192     }
4193
4194   expression *exp;
4195   if (!q.has_process &&
4196       strverscmp(q.sess.kernel_base_release.c_str(), "2.6.25") >= 0)
4197     exp = gen_kretprobe_saved_return(repl);
4198   else
4199     exp = gen_mapped_saved_return(repl, e->sym_name());
4200
4201   // Propagate the DWARF type to the expression in the return probe.
4202   if (repl->type_details && !exp->type_details)
4203     exp->type_details = repl->type_details;
4204
4205   // Provide the variable to our parent so it can be used as a
4206   // substitute for the target symbol.
4207   provide (exp);
4208
4209   // Remember this replacement since we might be able to reuse
4210   // it later if the same return probe references this target
4211   // symbol again.
4212   return_ts_map[ts_name] = exp;
4213 }
4214
4215 static expression*
4216 gen_mapped_saved_return(systemtap_session &sess, expression* e,
4217                         const string& name,
4218                         block *& add_block, bool& add_block_tid,
4219                         block *& add_call_probe, bool& add_call_probe_tid)
4220 {
4221   static unsigned tick = 0;
4222
4223   // We've got to do several things here to handle target
4224   // variables in return probes.
4225
4226   // (1) Synthesize two global arrays.  One is the cache of the
4227   // target variable and the other contains a thread specific
4228   // nesting level counter.  The arrays will look like
4229   // this:
4230   //
4231   //   _entry_tvar_{name}_{num}
4232   //   _entry_tvar_{name}_{num}_ctr
4233
4234   string aname = (string("__global_entry_tvar_")
4235                   + name
4236                   + "_" + lex_cast(tick++));
4237   vardecl* vd = new vardecl;
4238   vd->name = vd->unmangled_name = aname;
4239   vd->synthetic = true;
4240   vd->tok = e->tok;
4241   sess.globals.push_back (vd);
4242
4243   string ctrname = aname + "_ctr";
4244   vd = new vardecl;
4245   vd->name = vd->unmangled_name = ctrname;
4246   vd->tok = e->tok;
4247   vd->synthetic = true;
4248   sess.globals.push_back (vd);
4249
4250   // (2) Create a new code block we're going to insert at the
4251   // beginning of this probe to get the cached value into a
4252   // temporary variable.  We'll replace the target variable
4253   // reference with the temporary variable reference.  The code
4254   // will look like this:
4255   //
4256   //   _entry_tvar_tid = tid()
4257   //   _entry_tvar_{name}_{num}_tmp
4258   //       = _entry_tvar_{name}_{num}[_entry_tvar_tid,
4259   //                    _entry_tvar_{name}_{num}_ctr[_entry_tvar_tid]]
4260   //   delete _entry_tvar_{name}_{num}[_entry_tvar_tid,
4261   //                    _entry_tvar_{name}_{num}_ctr[_entry_tvar_tid]--]
4262   //   if (! _entry_tvar_{name}_{num}_ctr[_entry_tvar_tid])
4263   //       delete _entry_tvar_{name}_{num}_ctr[_entry_tvar_tid]
4264
4265   // (2a) Synthesize the tid temporary expression, which will look
4266   // like this:
4267   //
4268   //   _entry_tvar_tid = tid()
4269   symbol* tidsym = new symbol;
4270   tidsym->name = string("_entry_tvar_tid");
4271   tidsym->tok = e->tok;
4272
4273   if (add_block == NULL)
4274     {
4275       add_block = new block;
4276       add_block->tok = e->tok;
4277     }
4278
4279   if (!add_block_tid)
4280     {
4281       // Synthesize a functioncall to grab the thread id.
4282       functioncall* fc = new functioncall;
4283       fc->tok = e->tok;
4284       fc->function = string("tid");
4285
4286       // Assign the tid to '_entry_tvar_tid'.
4287       assignment* a = new assignment;
4288       a->tok = e->tok;
4289       a->op = "=";
4290       a->left = tidsym;
4291       a->right = fc;
4292
4293       expr_statement* es = new expr_statement;
4294       es->tok = e->tok;
4295       es->value = a;
4296       add_block->statements.push_back (es);
4297       add_block_tid = true;
4298     }
4299
4300   // (2b) Synthesize an array reference and assign it to a
4301   // temporary variable (that we'll use as replacement for the
4302   // target variable reference).  It will look like this:
4303   //
4304   //   _entry_tvar_{name}_{num}_tmp
4305   //       = _entry_tvar_{name}_{num}[_entry_tvar_tid,
4306   //                    _entry_tvar_{name}_{num}_ctr[_entry_tvar_tid]]
4307
4308   arrayindex* ai_tvar_base = new arrayindex;
4309   ai_tvar_base->tok = e->tok;
4310
4311   symbol* sym = new symbol;
4312   sym->name = aname;
4313   sym->tok = e->tok;
4314   ai_tvar_base->base = sym;
4315
4316   ai_tvar_base->indexes.push_back(tidsym);
4317
4318   // We need to create a copy of the array index in its current
4319   // state so we can have 2 variants of it (the original and one
4320   // that post-decrements the second index).
4321   arrayindex* ai_tvar = new arrayindex;
4322   arrayindex* ai_tvar_postdec = new arrayindex;
4323   *ai_tvar = *ai_tvar_base;
4324   *ai_tvar_postdec = *ai_tvar_base;
4325
4326   // Synthesize the
4327   // "_entry_tvar_{name}_{num}_ctr[_entry_tvar_tid]" used as the
4328   // second index into the array.
4329   arrayindex* ai_ctr = new arrayindex;
4330   ai_ctr->tok = e->tok;
4331
4332   sym = new symbol;
4333   sym->name = ctrname;
4334   sym->tok = e->tok;
4335   ai_ctr->base = sym;
4336   ai_ctr->indexes.push_back(tidsym);
4337   ai_tvar->indexes.push_back(ai_ctr);
4338
4339   symbol* tmpsym = new symbol;
4340   tmpsym->name = aname + "_tmp";
4341   tmpsym->tok = e->tok;
4342
4343   assignment* a = new assignment;
4344   a->tok = e->tok;
4345   a->op = "=";
4346   a->left = tmpsym;
4347   a->right = ai_tvar;
4348
4349   expr_statement* es = new expr_statement;
4350   es->tok = e->tok;
4351   es->value = a;
4352
4353   add_block->statements.push_back (es);
4354
4355   // (2c) Add a post-decrement to the second array index and
4356   // delete the array value.  It will look like this:
4357   //
4358   //   delete _entry_tvar_{name}_{num}[_entry_tvar_tid,
4359   //                    _entry_tvar_{name}_{num}_ctr[_entry_tvar_tid]--]
4360
4361   post_crement* pc = new post_crement;
4362   pc->tok = e->tok;
4363   pc->op = "--";
4364   pc->operand = ai_ctr;
4365   ai_tvar_postdec->indexes.push_back(pc);
4366
4367   delete_statement* ds = new delete_statement;
4368   ds->tok = e->tok;
4369   ds->value = ai_tvar_postdec;
4370
4371   add_block->statements.push_back (ds);
4372
4373   // (2d) Delete the counter value if it is 0.  It will look like
4374   // this:
4375   //   if (! _entry_tvar_{name}_{num}_ctr[_entry_tvar_tid])
4376   //       delete _entry_tvar_{name}_{num}_ctr[_entry_tvar_tid]
4377
4378   ds = new delete_statement;
4379   ds->tok = e->tok;
4380   ds->value = ai_ctr;
4381
4382   unary_expression *ue = new unary_expression;
4383   ue->tok = e->tok;
4384   ue->op = "!";
4385   ue->operand = ai_ctr;
4386
4387   if_statement *ifs = new if_statement;
4388   ifs->tok = e->tok;
4389   ifs->condition = ue;
4390   ifs->thenblock = ds;
4391   ifs->elseblock = NULL;
4392
4393   add_block->statements.push_back (ifs);
4394
4395   // (3) We need an entry probe that saves the value for us in the
4396   // global array we created.  Create the entry probe, which will
4397   // look like this:
4398   //
4399   //   probe kernel.function("{function}").call {
4400   //     _entry_tvar_tid = tid()
4401   //     _entry_tvar_{name}_{num}[_entry_tvar_tid,
4402   //                       ++_entry_tvar_{name}_{num}_ctr[_entry_tvar_tid]]
4403   //       = ${param}
4404   //   }
4405
4406   if (add_call_probe == NULL)
4407     {
4408       add_call_probe = new block;
4409       add_call_probe->tok = e->tok;
4410     }
4411
4412   if (!add_call_probe_tid)
4413     {
4414       // Synthesize a functioncall to grab the thread id.
4415       functioncall* fc = new functioncall;
4416       fc->tok = e->tok;
4417       fc->function = string("tid");
4418
4419       // Assign the tid to '_entry_tvar_tid'.
4420       assignment* a = new assignment;
4421       a->tok = e->tok;
4422       a->op = "=";
4423       a->left = tidsym;
4424       a->right = fc;
4425
4426       expr_statement* es = new expr_statement;
4427       es->tok = e->tok;
4428       es->value = a;
4429       add_call_probe = new block(add_call_probe, es);
4430       add_call_probe_tid = true;
4431     }
4432
4433   // Save the value, like this:
4434   //     _entry_tvar_{name}_{num}[_entry_tvar_tid,
4435   //                       ++_entry_tvar_{name}_{num}_ctr[_entry_tvar_tid]]
4436   //       = ${param}
4437   arrayindex* ai_tvar_preinc = new arrayindex;
4438   *ai_tvar_preinc = *ai_tvar_base;
4439
4440   pre_crement* preinc = new pre_crement;
4441   preinc->tok = e->tok;
4442   preinc->op = "++";
4443   preinc->operand = ai_ctr;
4444   ai_tvar_preinc->indexes.push_back(preinc);
4445
4446   a = new assignment;
4447   a->tok = e->tok;
4448   a->op = "=";
4449   a->left = ai_tvar_preinc;
4450   a->right = e;
4451
4452   es = new expr_statement;
4453   es->tok = e->tok;
4454   es->value = a;
4455
4456   add_call_probe = new block(add_call_probe, es);
4457
4458   // (4) Provide the '_entry_tvar_{name}_{num}_tmp' variable to
4459   // our parent so it can be used as a substitute for the target
4460   // symbol.
4461   delete ai_tvar_base;
4462   return tmpsym;
4463 }
4464
4465
4466 expression*
4467 dwarf_var_expanding_visitor::gen_mapped_saved_return(expression* e,
4468                                                      const string& name)
4469 {
4470     return ::gen_mapped_saved_return(q.sess, e, name, add_block,
4471                                      add_block_tid, add_call_probe,
4472                                      add_call_probe_tid);
4473 }
4474
4475
4476 expression*
4477 dwarf_var_expanding_visitor::gen_kretprobe_saved_return(expression* e)
4478 {
4479   // The code for this is simple.
4480   //
4481   // .call:
4482   //   _set_kretprobe_long(index, $value)
4483   //
4484   // .return:
4485   //   _get_kretprobe_long(index)
4486   //
4487   // (or s/long/string/ for things like $$parms)
4488
4489   unsigned index;
4490   string setfn, getfn;
4491
4492   // We need the caller to predetermine the type of the expression!
4493   switch (e->type)
4494     {
4495     case pe_string:
4496       index = saved_strings++;
4497       setfn = "_set_kretprobe_string";
4498       getfn = "_get_kretprobe_string";
4499       break;
4500     case pe_long:
4501       index = saved_longs++;
4502       setfn = "_set_kretprobe_long";
4503       getfn = "_get_kretprobe_long";
4504       break;
4505     default:
4506       throw SEMANTIC_ERROR(_("unknown type to save in kretprobe"), e->tok);
4507     }
4508
4509   // Create the entry code
4510   //   _set_kretprobe_{long|string}(index, $value)
4511
4512   if (add_call_probe == NULL)
4513     {
4514       add_call_probe = new block;
4515       add_call_probe->tok = e->tok;
4516     }
4517
4518   functioncall* set_fc = new functioncall;
4519   set_fc->tok = e->tok;
4520   set_fc->function = setfn;
4521   set_fc->args.push_back(new literal_number(index));
4522   set_fc->args.back()->tok = e->tok;
4523   set_fc->args.push_back(e);
4524
4525   expr_statement* set_es = new expr_statement;
4526   set_es->tok = e->tok;
4527   set_es->value = set_fc;
4528
4529   add_call_probe->statements.push_back(set_es);
4530
4531   // Create the return code
4532   //   _get_kretprobe_{long|string}(index)
4533
4534   functioncall* get_fc = new functioncall;
4535   get_fc->tok = e->tok;
4536   get_fc->function = getfn;
4537   get_fc->args.push_back(new literal_number(index));
4538   get_fc->args.back()->tok = e->tok;
4539
4540   return get_fc;
4541 }
4542
4543 void
4544 dwarf_var_expanding_visitor::visit_target_symbol_context (target_symbol* e)
4545 {
4546   if (pending_interrupts) {
4547     provide(e);
4548     return;
4549   }
4550
4551   if (null_die(scope_die)) {
4552     literal_string *empty = new literal_string(string(""));
4553     empty->tok = e->tok;
4554     provide(empty);
4555     return;
4556   }
4557
4558   target_symbol *tsym = new target_symbol(*e);
4559
4560   bool pretty = e->check_pretty_print ();
4561   string format = pretty ? "=%s" : "=%#x";
4562
4563   // Convert $$parms to sprintf of a list of parms and active local vars
4564   // which we recursively evaluate
4565
4566   print_format* pf = print_format::create(e->tok, "sprintf");
4567
4568   if (q.has_return && (e->name == "$$return"))
4569     {
4570       tsym->name = "$return";
4571
4572       // Ignore any variable that isn't accessible.
4573       tsym->saved_conversion_error = 0;
4574       expression *texp = tsym;
4575       replace (texp); // NB: throws nothing ...
4576       if (tsym->saved_conversion_error) // ... but this is how we know it happened.
4577         {
4578
4579         }
4580       else
4581         {
4582           pf->raw_components += "return";
4583           pf->raw_components += format;
4584           pf->args.push_back(texp);
4585         }
4586     }
4587   else
4588     {
4589       // non-.return probe: support $$parms, $$vars, $$locals
4590       bool first = true;
4591       Dwarf_Die result;
4592       vector<Dwarf_Die> scopes = q.dw.getscopes(scope_die);
4593       for (unsigned i = 0; i < scopes.size(); ++i)
4594         {
4595           if (dwarf_tag(&scopes[i]) == DW_TAG_compile_unit)
4596             break; // we don't want file-level variables
4597           if (dwarf_child (&scopes[i], &result) == 0)
4598             do
4599               {
4600                 switch (dwarf_tag (&result))
4601                   {
4602                   case DW_TAG_variable:
4603                     if (e->name == "$$parms")
4604                       continue;
4605                     break;
4606                   case DW_TAG_formal_parameter:
4607                     if (e->name == "$$locals")
4608                       continue;
4609                     break;
4610
4611                   default:
4612                     continue;
4613                   }
4614
4615                 const char *diename = dwarf_diename (&result);
4616                 if (! diename) continue;
4617
4618                 if (! first)
4619                   pf->raw_components += " ";
4620                 pf->raw_components += diename;
4621                 first = false;
4622
4623                 // Write a placeholder for ugly aggregates
4624                 Dwarf_Die type;
4625                 if (!pretty && dwarf_attr_die(&result, DW_AT_type, &type))
4626                   {
4627                     q.dw.resolve_unqualified_inner_typedie(&type, &type, e);
4628                     switch (dwarf_tag(&type))
4629                       {
4630                       case DW_TAG_union_type:
4631                       case DW_TAG_structure_type:
4632                       case DW_TAG_class_type:
4633                         pf->raw_components += "={...}";
4634                         continue;
4635
4636                       case DW_TAG_array_type:
4637                         pf->raw_components += "=[...]";
4638                         continue;
4639                       }
4640                   }
4641
4642                 tsym->name = string("$") + diename;
4643
4644                 // Ignore any variable that isn't accessible.
4645                 tsym->saved_conversion_error = 0;
4646                 expression *texp = tsym;
4647                 replace (texp); // NB: throws nothing ...
4648                 if (tsym->saved_conversion_error) // ... but this is how we know it happened.
4649                   {
4650                     if (q.sess.verbose>2)
4651                       {
4652                         for (const semantic_error *c = tsym->saved_conversion_error;
4653                              c != 0;
4654                              c = c->get_chain()) {
4655                             clog << _("variable location problem [man error::dwarf]: ") << c->what() << endl;
4656                         }
4657                       }
4658
4659                     pf->raw_components += "=?";
4660                   }
4661                 else
4662                   {
4663                     pf->raw_components += format;
4664                     pf->args.push_back(texp);
4665                   }
4666               }
4667             while (dwarf_siblingof (&result, &result) == 0);
4668         }
4669     }
4670
4671   pf->components = print_format::string_to_components(pf->raw_components);
4672   pf->type = pe_string;
4673   provide (pf);
4674 }
4675
4676
4677 void
4678 dwarf_var_expanding_visitor::visit_atvar_op (atvar_op *e)
4679 {
4680   // Fill in our current module context if needed
4681   if (e->module.empty())
4682     e->module = q.dw.module_name;
4683
4684   if (e->module == q.dw.module_name && e->cu_name.empty())
4685     {
4686       // process like any other local
4687       // e->sym_name() will do the right thing
4688       visit_target_symbol(e);
4689       return;
4690     }
4691
4692   var_expanding_visitor::visit_atvar_op(e);
4693 }
4694
4695
4696 void
4697 dwarf_var_expanding_visitor::visit_target_symbol (target_symbol *e)
4698 {
4699   assert(e->name.size() > 0 && (e->name[0] == '$' || e->name == "@var"));
4700   visited = true;
4701   bool defined_being_checked = (defined_ops.size() > 0 && (defined_ops.top()->operand == e));
4702   // In this mode, we avoid hiding errors or generating extra code such as for .return saved $vars
4703
4704   try
4705     {
4706       bool lvalue = is_active_lvalue(e);
4707       if (lvalue && !q.sess.guru_mode)
4708         throw SEMANTIC_ERROR(_("write to target variable not permitted; need stap -g"), e->tok);
4709
4710       // XXX: process $context vars should be writable
4711
4712       // See if we need to generate a new probe to save/access function
4713       // parameters from a return probe.  PR 1382.
4714       if (q.has_return
4715           && !defined_being_checked
4716           && (strverscmp(sess.compatible.c_str(), "4.1") < 0 || e->name != "@var")
4717           && e->name != "$return" // not the special return-value variable handled below
4718           && e->name != "$$return") // nor the other special variable handled below
4719         {
4720           if (lvalue)
4721             throw SEMANTIC_ERROR(_("write to target variable not permitted in .return probes"), e->tok);
4722           // PR14924: discourage this syntax
4723           stringstream expr;
4724           e->print(expr);
4725           q.sess.print_warning(_F("confusing usage, value is captured as @entry(%s) in .return probe [man stapprobes] RETURN PROBES", expr.str().c_str()), e->tok);
4726           visit_target_symbol_saved_return(e);
4727           return;
4728         }
4729
4730       if (e->name == "$$vars" || e->name == "$$parms" || e->name == "$$locals"
4731           || (q.has_return && (e->name == "$$return")))
4732         {
4733           if (lvalue)
4734             throw SEMANTIC_ERROR(_("cannot write to context variable"), e->tok);
4735
4736           if (e->addressof)
4737             throw SEMANTIC_ERROR(_("cannot take address of context variable"), e->tok);
4738
4739           e->assert_no_components("dwarf", true);
4740
4741           visit_target_symbol_context(e);
4742           return;
4743         }
4744
4745       // Everything else (pretty-printed vars, and context vars) require a
4746       // scope_die in which to search for them. If produce an error.
4747       if (null_die(scope_die))
4748         throw SEMANTIC_ERROR(_F("debuginfo scope not found for module '%s', cannot resolve context variable [man error::dwarf]",
4749                                 q.dw.module_name.c_str()), e->tok);
4750
4751       if (e->check_pretty_print (lvalue))
4752         {
4753           if (q.has_return && (e->name == "$return"))
4754             {
4755               dwarf_pretty_print dpp (q.dw, scope_die, addr,
4756                                       q.has_process, *e, lvalue);
4757               dpp.expand()->visit(this);
4758             }
4759           else
4760             {
4761               dwarf_pretty_print dpp (q.dw, getscopes(e), addr,
4762                                       e->sym_name(),
4763                                       q.has_process, *e, lvalue);
4764               dpp.expand()->visit(this);
4765             }
4766           return;
4767         }
4768
4769       bool userspace_p = q.has_process;
4770       location_context ctx(e);
4771       ctx.pc = addr;
4772       ctx.userspace_p = userspace_p;
4773
4774       // NB: pass the ctx.e (copied/rewritten veraion e, not orig_e),
4775       // so [x] index expressions have their intra-synthetic-function names
4776       Dwarf_Die endtype;
4777       if (q.has_return && (e->name == "$return"))
4778         q.dw.literal_stmt_for_return (ctx, scope_die, ctx.e, lvalue, &endtype);
4779       else
4780         q.dw.literal_stmt_for_local (ctx, getscopes(e), e->sym_name(),
4781                                      ctx.e, lvalue, &endtype);
4782
4783       // Now that have location information check if change to variable has any effect
4784       if (lvalue) {
4785         if (q.has_kernel &&
4786             (q.sess.kernel_config["CONFIG_RETPOLINE"] == string("y") ||
4787              q.sess.kernel_config["CONFIG_MITIGATION_RETPOLINE"] == string("y")))
4788           q.sess.print_warning(_F("liveness analysis skipped on CONFIG_RETPOLINE kernel %s",
4789                                   q.dw.mod_info->elf_path.c_str()), e->tok);
4790
4791         else if (liveness(q.sess, e, q.dw.mod_info->elf_path, addr, ctx) < 0) {
4792           q.sess.print_warning(_F("write at %p will have no effect",
4793                                   (void *)addr), e->tok);
4794         }
4795       }
4796
4797       q.dw.sess.globals.insert(q.dw.sess.globals.end(),
4798                               ctx.globals.begin(),
4799                               ctx.globals.end());
4800
4801       for (auto it = ctx.entry_probes.begin(); it != ctx.entry_probes.end(); ++it)
4802         {
4803           auto res = entry_probes.find(it->first);
4804           if (res == entry_probes.end())
4805             entry_probes.insert(std::pair<Dwarf_Addr, block *>(it->first, it->second));
4806           else
4807             res->second = new block(res->second, it->second);
4808         }
4809
4810       string fname = (string(lvalue ? "_dwarf_tvar_set" : "_dwarf_tvar_get")
4811                       + "_" + escaped_identifier_string (e->sym_name())
4812                       + "_" + lex_cast(tick++));
4813
4814       functioncall* n = synthetic_embedded_deref_call(q.dw, ctx, fname,
4815                                                       &endtype, userspace_p,
4816                                                       lvalue);
4817
4818       if (lvalue)
4819         provide_lvalue_call (n);
4820
4821       provide(n); // allow recursion to $var1[$var2] subexpressions
4822     }
4823   catch (const semantic_error& er)
4824     {
4825       // We suppress this error message, and pass the unresolved
4826       // target_symbol to the next pass.  We hope that this value ends
4827       // up not being referenced after all, so it can be optimized out
4828       // quietly.
4829       if (sess.verbose > 3)
4830         clog << "chaining to " << *e->tok << endl
4831              << sess.build_error_msg(er) << endl;
4832       e->chain (er);
4833       provide (e);
4834     }
4835 }
4836
4837
4838 void
4839 dwarf_var_expanding_visitor::visit_cast_op (cast_op *e)
4840 {
4841   // Fill in our current module context if needed
4842   if (e->module.empty())
4843     {
4844       // Backward compatibility for @cast() ops, sans module string,
4845       // which expanded to "kernel" rather than to the current
4846       // function/probe context.
4847       if (strverscmp(sess.compatible.c_str(), "4.3") < 0)
4848         e->module = "kernel";
4849       else
4850         e->module = q.dw.module_name;
4851     }
4852
4853   var_expanding_visitor::visit_cast_op(e);
4854 }
4855
4856
4857 void
4858 dwarf_var_expanding_visitor::visit_entry_op (entry_op *e)
4859 {
4860   expression *repl = e;
4861   bool defined_being_checked = (defined_ops.size() > 0 && (defined_ops.top()->operand == e));
4862   // In this mode, we avoid hiding errors or generating extra code such as for .return saved $vars
4863
4864   if (q.has_return)
4865     {
4866       // NB: don't expand the operand here, as if it weren't a return
4867       // probe.  The original operand expression is transcribed into
4868       // the synthetic .call probe that gen_mapped_saved_return calls.
4869       // If we were to expand it here, we may e.g. map @perf("...") to
4870       // __perf_read_... prematurely & incorrectly.  PR20416
4871
4872       // NB: but ... we sort of want to do a trial-expansion, just to
4873       // see if the contents are rejected, e.g. with a $var-undefined
4874       // error, so that the failure can propagate back up to a containing
4875       // @defined().  PR20821
4876
4877       if (defined_being_checked)
4878         {
4879           save_and_restore<bool> temp_return (& q.has_return, false);
4880           replace (e->operand); // don't generate any @entry machinery!
4881
4882           // propagate the replaced operand upward; it may be a
4883           // target_symbol and have a saved_conversion_error; we
4884           // also don't want to expand @defined(@entry(...)) into
4885           // a full synthetic probe goo.
4886           repl = e->operand;
4887         }
4888       else
4889         {
4890           // XXX it would be nice to use gen_kretprobe_saved_return when available,
4891           // but it requires knowing the types already, which is problematic for
4892           // arbitrary expressons.
4893
4894           repl = gen_mapped_saved_return (e->operand, "entry");
4895         }
4896     }
4897   provide (repl);
4898 }
4899
4900
4901 void
4902 dwarf_var_expanding_visitor::visit_perf_op (perf_op *e)
4903 {
4904   string e_lit_val = e->operand->value;
4905
4906   add_block = new block;
4907   add_block->tok = e->tok;
4908
4909   systemtap_session &s = this->q.sess;
4910   // Find the associated perf.counter probe
4911   auto it = s.perf_counters.begin();
4912   for (; it != s.perf_counters.end(); it++)
4913     if ((*it).first == e_lit_val)
4914       {
4915         // if perf .process("name") omitted, then set it to this process name
4916         if ((*it).second.length() == 0)
4917           (*it).second = this->q.user_path;
4918         if ((*it).second == this->q.user_path)
4919           break;
4920       }
4921
4922   if (it != s.perf_counters.end())
4923     {
4924       perf_counter_refs.insert((*it).first);
4925       // __perf_read_N is assigned in the probe prologue
4926       symbol* sym = new symbol;
4927       sym->tok = e->tok;
4928       sym->name = "__perf_read_" + (*it).first;
4929       provide (sym);
4930     }
4931   else
4932     throw SEMANTIC_ERROR(_F("perf counter '%s' not defined", e_lit_val.c_str()));
4933 }
4934
4935
4936 vector<Dwarf_Die>&
4937 dwarf_var_expanding_visitor::getscopes(target_symbol *e)
4938 {
4939   if (scopes.empty())
4940     {
4941       if(!null_die(scope_die))
4942         scopes = q.dw.getscopes(scope_die);
4943       if (scopes.empty())
4944         //throw semantic_error (_F("unable to find any scopes containing %d", addr), e->tok);
4945         //                        ((scope_die == NULL) ? "" : (string (" in ") + (dwarf_diename(scope_die) ?: "<unknown>") + "(" + (dwarf_diename(q.dw.cu) ?: "<unknown>") ")" ))
4946         throw SEMANTIC_ERROR ("unable to find any scopes containing "
4947                               + lex_cast_hex(addr)
4948                               + (null_die(scope_die) ? ""
4949                                  : (string (" in ")
4950                                     + (dwarf_diename(scope_die) ?: "<unknown>")
4951                                     + "(" + (dwarf_diename(q.dw.cu) ?: "<unknown>")
4952                                     + ")"))
4953                               + " while searching for local '"
4954                               + e->sym_name() + "'",
4955                               e->tok);
4956     }
4957   return scopes;
4958 }
4959
4960
4961 struct dwarf_cast_expanding_visitor: public var_expanding_visitor
4962 {
4963   dwarf_builder& db;
4964   map<string,string> compiled_headers;
4965
4966   dwarf_cast_expanding_visitor(systemtap_session& s, dwarf_builder& db):
4967     var_expanding_visitor(s), db(db) {}
4968   void visit_cast_op (cast_op* e);
4969   void filter_special_modules(string& module);
4970 };
4971
4972
4973 struct dwarf_cast_query : public base_query
4974 {
4975   cast_op& e;
4976   const bool lvalue;
4977   const bool userspace_p;
4978   functioncall*& result;
4979
4980   dwarf_cast_query(dwflpp& dw, const string& module, cast_op& e, bool lvalue,
4981                    const bool userspace_p, functioncall*& result):
4982     base_query(dw, module), e(e), lvalue(lvalue),
4983     userspace_p(userspace_p), result(result) {}
4984
4985   void handle_query_module();
4986   void query_library (const char *) {}
4987   void query_plt (const char *, size_t) {}
4988 };
4989
4990
4991 void
4992 dwarf_cast_query::handle_query_module()
4993 {
4994   static unsigned tick = 0;
4995
4996   if (result)
4997     return;
4998
4999   // look for the type in any CU
5000   Dwarf_Die* type_die = NULL;
5001   string tns = e.type_name;
5002
5003   if (startswith(tns, "class "))
5004     {
5005       // normalize to match dwflpp::global_alias_caching_callback
5006       string struct_name = "struct " + (string)e.type_name.substr(6);
5007       type_die = dw.declaration_resolve_other_cus(struct_name);
5008     }
5009   else
5010     type_die = dw.declaration_resolve_other_cus(tns);
5011
5012   // NB: We now index the types as "struct name"/"union name"/etc. instead of
5013   // just "name".  But since we didn't require users to be explicit before, and
5014   // actually sort of discouraged it, we must be flexible now.  So if a lookup
5015   // fails with a bare name, try augmenting it.
5016   if (!type_die &&
5017       !startswith(tns, "class ") &&
5018       !startswith(tns, "struct ") &&
5019       !startswith(tns, "union ") &&
5020       !startswith(tns, "enum "))
5021     {
5022       type_die = dw.declaration_resolve_other_cus("struct " + tns);
5023       if (!type_die)
5024         type_die = dw.declaration_resolve_other_cus("union " + tns);
5025       if (!type_die)
5026         type_die = dw.declaration_resolve_other_cus("enum " + tns);
5027     }
5028
5029   if (!type_die)
5030     return;
5031
5032   location_context ctx(&e, e.operand);
5033   ctx.userspace_p = userspace_p;
5034
5035   // ctx may require extra information for --runtime=bpf
5036   symbol *s;
5037   bpf_context_vardecl *v;
5038   if ((s = dynamic_cast<symbol *>(e.operand))
5039       && (v = dynamic_cast<bpf_context_vardecl *>(s->referent)))
5040     ctx.adapt_pointer_to_bpf(v->size, v->offset, v->is_signed);
5041
5042   Dwarf_Die endtype;
5043   bool ok = false;
5044
5045   try
5046     {
5047       Dwarf_Die cu_mem;
5048       dw.focus_on_cu(dwarf_diecu(type_die, &cu_mem, NULL, NULL));
5049
5050       if (e.check_pretty_print (lvalue))
5051         {
5052           dwarf_pretty_print dpp(dw, type_die, e.operand, true, userspace_p,
5053                                  e, lvalue);
5054           result = dpp.expand();
5055           return;
5056         }
5057
5058       ok = dw.literal_stmt_for_pointer (ctx, type_die, ctx.e, lvalue, &endtype);
5059     }
5060   catch (const semantic_error& er)
5061     {
5062       // NB: we can have multiple errors, since a @cast
5063       // may be attempted using several different modules:
5064       //     @cast(ptr, "type", "module1:module2:...")
5065       e.chain (er);
5066     }
5067
5068   if (!ok)
5069     return;
5070
5071   string fname = (string(lvalue ? "_dwarf_cast_set" : "_dwarf_cast_get")
5072                   + "_" + e.sym_name()
5073                   + "_" + lex_cast(tick++));
5074   result = synthetic_embedded_deref_call(dw, ctx, fname, &endtype,
5075                                          userspace_p, lvalue, e.operand);
5076 }
5077
5078
5079 void dwarf_cast_expanding_visitor::filter_special_modules(string& module)
5080 {
5081   // look for "<path/to/header>" or "kernel<path/to/header>"
5082   // for those cases, build a module including that header
5083   if (module[module.size() - 1] == '>' &&
5084       (module[0] == '<' || startswith(module, "kernel<")))
5085     {
5086       string header = module;
5087       auto it = compiled_headers.find(header);
5088       if (it != compiled_headers.end())
5089         {
5090           module = it->second;
5091           return;
5092         }
5093
5094       string cached_module;
5095       if (sess.use_cache)
5096         {
5097           // see if the cached module exists
5098           cached_module = find_typequery_hash(sess, module);
5099           if (!cached_module.empty() && !sess.poison_cache)
5100             {
5101               int fd = open(cached_module.c_str(), O_RDONLY);
5102               if (fd != -1)
5103                 {
5104                   if (sess.verbose > 2)
5105                     //TRANSLATORS: Here we're using a cached module.
5106                     clog << _("Pass 2: using cached ") << cached_module << endl;
5107                   compiled_headers[header] = module = cached_module;
5108                   close(fd);
5109                   return;
5110                 }
5111             }
5112         }
5113
5114       // no cached module, time to make it
5115       if (make_typequery(sess, module) == 0)
5116         {
5117           // try to save typequery in the cache
5118           if (sess.use_cache)
5119             copy_file(module, cached_module, sess.verbose > 2);
5120           compiled_headers[header] = module;
5121         }
5122     }
5123 }
5124
5125
5126 void dwarf_cast_expanding_visitor::visit_cast_op (cast_op* e)
5127 {
5128   bool lvalue = is_active_lvalue(e);
5129   if (lvalue && !sess.guru_mode)
5130     throw SEMANTIC_ERROR(_("write to @cast context variable not permitted; need stap -g"), e->tok);
5131
5132
5133   if (strverscmp(sess.compatible.c_str(), "4.3") < 0) // PR25841 - no need to sub "kernel"
5134     if (e->module.empty())
5135       e->module = "kernel"; // "*" may also be reasonable to search all kernel modules
5136
5137   functioncall* result = NULL;
5138
5139   // split the module string by ':' for alternatives
5140   vector<string> modules;
5141   tokenize(e->module, modules, ":");
5142   bool userspace_p=false; // PR10601
5143   for (unsigned i = 0; !result && i < modules.size(); ++i)
5144     {
5145       string& module = modules[i];
5146       filter_special_modules(module);
5147
5148       // NB: This uses '/' to distinguish between kernel modules and userspace,
5149       // which means that userspace modules won't get any PATH searching.
5150       dwflpp* dw;
5151       try
5152         {
5153           userspace_p=is_user_module (module);
5154           if (! userspace_p)
5155             {
5156               // kernel or kernel module target
5157               dw = db.get_kern_dw(sess, module);
5158             }
5159           else
5160             {
5161               module = find_executable (module, "", sess.sysenv); // canonicalize it
5162               dw = db.get_user_dw(sess, module);
5163             }
5164         }
5165       catch (const semantic_error& er)
5166         {
5167           /* ignore and go to the next module */
5168           continue;
5169         }
5170
5171       dwarf_cast_query q (*dw, module, *e, lvalue, userspace_p, result);
5172       dw->iterate_over_modules<base_query>(&query_module, &q);
5173     }
5174
5175   if (!result)
5176     {
5177       // We pass the unresolved cast_op to the next pass, and hope
5178       // that this value ends up not being referenced after all, so
5179       // it can be optimized out quietly.
5180       provide (e);
5181       return;
5182     }
5183
5184   if (lvalue)
5185     provide_lvalue_call (result);
5186
5187   result->visit (this);
5188 }
5189
5190
5191 static bool resolve_pointer_type(Dwarf_Die& die, bool& isptr);
5192
5193 exp_type_dwarf::exp_type_dwarf(dwflpp* dw, Dwarf_Die* die,
5194                                bool userspace_p, bool addressof):
5195   dw(dw), die(*die), userspace_p(userspace_p), is_pointer(false)
5196 {
5197   // is_pointer tells us whether a value is a pointer to the given type, so we
5198   // can dereference it; otherwise it will be treated as an end point.
5199   if (addressof)
5200     // we're already looking at the pointed-to type
5201     is_pointer = true;
5202   else
5203     // use the same test as tracepoints to see what we have
5204     resolve_pointer_type(this->die, is_pointer);
5205 }
5206
5207
5208 functioncall *
5209 exp_type_dwarf::expand(autocast_op* e, bool lvalue)
5210 {
5211   static unsigned tick = 0;
5212
5213   try
5214     {
5215       // make sure we're not dereferencing base types or void
5216       bool deref_p = is_pointer && !null_die(&die);
5217       if (!deref_p)
5218         e->assert_no_components("autocast", true);
5219
5220       if (lvalue && !dw->sess.guru_mode)
5221         throw SEMANTIC_ERROR(_("write not permitted; need stap -g"), e->tok);
5222
5223       if (e->components.empty())
5224         {
5225           if (e->addressof)
5226             throw SEMANTIC_ERROR(_("cannot take address of tracepoint variable"), e->tok);
5227
5228           // no components and no addressof?  how did this autocast come to be?
5229           throw SEMANTIC_ERROR(_("internal error: no-op autocast encountered"), e->tok);
5230         }
5231
5232       Dwarf_Die cu_mem;
5233       if (!null_die(&die))
5234         dw->focus_on_cu(dwarf_diecu(&die, &cu_mem, NULL, NULL));
5235
5236       if (e->check_pretty_print (lvalue))
5237         {
5238           dwarf_pretty_print dpp(*dw, &die, e->operand, deref_p, userspace_p,
5239                                  *e, lvalue);
5240           return dpp.expand();
5241         }
5242
5243       location_context ctx(e, e->operand);
5244       ctx.userspace_p = userspace_p;
5245       Dwarf_Die endtype;
5246
5247       dw->literal_stmt_for_pointer (ctx, &die, ctx.e, lvalue, &endtype);
5248
5249       string fname = (string(lvalue ? "_dwarf_autocast_set"
5250                              : "_dwarf_autocast_get")
5251                       + "_" + lex_cast(tick++));
5252
5253       return synthetic_embedded_deref_call(*dw, ctx, fname, &endtype,
5254                                            userspace_p, lvalue, e->operand);
5255     }
5256   catch (const semantic_error &er)
5257     {
5258       if (dw->sess.verbose > 3)
5259         clog << "chaining to " << *e->tok << endl
5260              << dw->sess.build_error_msg(er) << endl;
5261       e->chain (er);
5262       return NULL;
5263     }
5264 }
5265
5266
5267 void exp_type_dwarf::print(ostream& o) const
5268 {
5269   o << "dwarf=" << dwarf_type_name((Dwarf_Die*) & die);
5270 }
5271
5272
5273
5274 struct dwarf_atvar_expanding_visitor: public var_expanding_visitor
5275 {
5276   dwarf_builder& db;
5277
5278   dwarf_atvar_expanding_visitor(systemtap_session& s, dwarf_builder& db):
5279     var_expanding_visitor(s), db(db) {}
5280   void visit_atvar_op (atvar_op* e);
5281 };
5282
5283
5284 struct dwarf_atvar_query: public base_query
5285 {
5286   atvar_op& e;
5287   const bool userspace_p, lvalue;
5288   functioncall*& result;
5289   unsigned& tick;
5290   const string cu_name_pattern;
5291
5292   dwarf_atvar_query(dwflpp& dw, const string& module, atvar_op& e,
5293                     const bool userspace_p, const bool lvalue,
5294                     functioncall*& result,
5295                     unsigned& tick):
5296     base_query(dw, module), e(e),
5297     userspace_p(userspace_p), lvalue(lvalue), result(result),
5298     tick(tick), cu_name_pattern(string("*/") + (string)e.cu_name) {}
5299
5300   void handle_query_module ();
5301   void query_library (const char *) {}
5302   void query_plt (const char *, size_t) {}
5303   static int atvar_query_cu (Dwarf_Die *cudie, dwarf_atvar_query *q);
5304 };
5305
5306
5307 int
5308 dwarf_atvar_query::atvar_query_cu (Dwarf_Die * cudie, dwarf_atvar_query *q)
5309 {
5310   if (! q->e.cu_name.empty())
5311     {
5312       const char *die_name = dwarf_diename(cudie) ?: "";
5313       string cns = q->e.cu_name;
5314       if (strcmp(die_name, cns.c_str()) != 0 // Perfect match
5315           && fnmatch(q->cu_name_pattern.c_str(), die_name, 0) != 0)
5316         {
5317           return DWARF_CB_OK;
5318         }
5319     }
5320
5321   try
5322     {
5323       vector<Dwarf_Die>  scopes(1, *cudie);
5324
5325       q->dw.focus_on_cu (cudie);
5326
5327       if (q->e.check_pretty_print (q->lvalue))
5328         {
5329           dwarf_pretty_print dpp (q->dw, scopes, 0, q->e.sym_name(),
5330                                   q->userspace_p, q->e, q->lvalue);
5331           q->result = dpp.expand();
5332           return DWARF_CB_ABORT;
5333         }
5334
5335       location_context ctx(&q->e);
5336       ctx.userspace_p = q->userspace_p;
5337       Dwarf_Die endtype;
5338
5339       bool ok = q->dw.literal_stmt_for_local (ctx, scopes, q->e.sym_name(),
5340                                               ctx.e, q->lvalue, &endtype);
5341
5342       if (!ok)
5343         return DWARF_CB_OK;
5344
5345       string fname = (string(q->lvalue ? "_dwarf_tvar_set"
5346                                        : "_dwarf_tvar_get")
5347                       + "_" + q->e.sym_name()
5348                       + "_" + lex_cast(q->tick++));
5349
5350       q->result = synthetic_embedded_deref_call (q->dw, ctx, fname, &endtype,
5351                                                  q->userspace_p, q->lvalue);
5352     }
5353   catch (const semantic_error& er)
5354     {
5355       if (q->sess.verbose > 3)
5356         clog << "chaining to " << q->e.tok << endl
5357              << q->sess.build_error_msg(er) << endl;
5358       q->e.chain (er);
5359       return DWARF_CB_OK;
5360     }
5361
5362   if (q->result) {
5363       return DWARF_CB_ABORT;
5364   }
5365
5366   return DWARF_CB_OK;
5367 }
5368
5369
5370 void
5371 dwarf_atvar_query::handle_query_module ()
5372 {
5373
5374   dw.iterate_over_cus(atvar_query_cu, this, false);
5375 }
5376
5377
5378 void
5379 dwarf_atvar_expanding_visitor::visit_atvar_op (atvar_op* e)
5380 {
5381   const bool lvalue = is_active_lvalue(e);
5382   if (lvalue && !sess.guru_mode)
5383     throw SEMANTIC_ERROR(_("write to @var variable not permitted; "
5384                            "need stap -g"), e->tok);
5385
5386   if (strverscmp(sess.compatible.c_str(), "4.3") < 0) // PR25841 - no need to sub "kernel"
5387     if (e->module.empty())
5388       e->module = "kernel";
5389
5390   functioncall* result = NULL;
5391
5392   // split the module string by ':' for alternatives
5393   vector<string> modules;
5394   tokenize(e->module, modules, ":");
5395   bool userspace_p = false;
5396   for (unsigned i = 0; !result && i < modules.size(); ++i)
5397     {
5398       string& module = modules[i];
5399
5400       dwflpp* dw;
5401       try
5402         {
5403           userspace_p = is_user_module(module);
5404           if (!userspace_p)
5405             {
5406               // kernel or kernel module target
5407               dw = db.get_kern_dw(sess, module);
5408             }
5409           else
5410             {
5411               module = find_executable(module, "", sess.sysenv);
5412               dw = db.get_user_dw(sess, module);
5413             }
5414         }
5415       catch (const semantic_error& er)
5416         {
5417           /* ignore and go to the next module */
5418           continue;
5419         }
5420
5421       dwarf_atvar_query q (*dw, module, *e, userspace_p, lvalue, result, tick);
5422       dw->iterate_over_modules<base_query>(&query_module, &q);
5423
5424       if (result)
5425         {
5426           sess.unwindsym_modules.insert(module);
5427
5428           if (lvalue)
5429             provide_lvalue_call (result);
5430
5431           result->visit(this);
5432           return;
5433         }
5434
5435       /* Unable to find the variable in the current module, so we chain
5436        * an error in atvar_op */
5437       string esn = e->sym_name();
5438       string mn = module;
5439       string cun = e->cu_name;
5440       semantic_error  er(ERR_SRC, _F("unable to find global '%s' in %s%s%s",
5441                                      esn.c_str(), mn.c_str(),
5442                                      cun.empty() ? "" : _(", in "),
5443                                      cun.c_str()));
5444       if (sess.verbose > 3)
5445         clog << "chaining to " << *e->tok << endl
5446              << sess.build_error_msg(er) << endl;
5447       e->chain (er);
5448     }
5449
5450   provide(e);
5451 }
5452
5453
5454 void
5455 dwarf_derived_probe::printsig (ostream& o) const
5456 {
5457   // Instead of just printing the plain locations, we add a PC value
5458   // as a comment as a way of telling e.g. apart multiple inlined
5459   // function instances.  This is distinct from the verbose/clog
5460   // output, since this part goes into the cache hash calculations.
5461   sole_location()->print (o);
5462   if (symbol_name != "")
5463     o << " /* pc=<" << symbol_name << "+" << offset << "> */";
5464   else
5465     o << " /* pc=" << section << "+0x" << hex << addr << dec << " */";
5466
5467   printsig_nested (o);
5468 }
5469
5470
5471 void
5472 dwarf_derived_probe::printsig_nonest (ostream& o) const
5473 {
5474   sole_location()->print (o);
5475   if (symbol_name != "")
5476     o << " /* pc=<" << symbol_name << "+" << offset << "> */";
5477   else
5478     o << " /* pc=" << section << "+0x" << hex << addr << dec << " */";
5479 }
5480
5481
5482 void
5483 dwarf_derived_probe::join_group (systemtap_session& s)
5484 {
5485   // skip probes which are paired entry-handlers
5486   if (!has_return && (saved_longs || saved_strings))
5487     return;
5488
5489   if (! s.generic_kprobe_derived_probes)
5490     s.generic_kprobe_derived_probes = new generic_kprobe_derived_probe_group ();
5491   s.generic_kprobe_derived_probes->enroll (this);
5492   this->group = s.generic_kprobe_derived_probes;
5493   if (has_return && entry_handler)
5494     entry_handler->group = s.generic_kprobe_derived_probes;
5495 }
5496
5497
5498 static bool
5499 kernel_supports_inode_uprobes(systemtap_session& s)
5500 {
5501   // The arch-supports is new to the builtin inode-uprobes, so it makes a
5502   // reasonable indicator of the new API.  Else we'll need an autoconf...
5503   // see also buildrun.cxx:kernel_built_uprobs()
5504   return (s.kernel_config["CONFIG_ARCH_SUPPORTS_UPROBES"] == "y"
5505           && s.kernel_config["CONFIG_UPROBES"] == "y");
5506 }
5507
5508
5509 static bool
5510 kernel_supports_inode_uretprobes(systemtap_session& s)
5511 {
5512   // We need inode-uprobes first, then look for a sign of uretprobes.  The only
5513   // non-static function at present is arch_uretprobe_hijack_return_addr.
5514   return kernel_supports_inode_uprobes(s) &&
5515     (s.kernel_functions.count("arch_uretprobe_hijack_return_addr") > 0);
5516 }
5517
5518
5519 void
5520 check_process_probe_kernel_support(systemtap_session& s)
5521 {
5522   // We don't have utrace.  For process probes that aren't
5523   // uprobes-based, we just need the task_finder.  The task_finder
5524   // needs CONFIG_TRACEPOINTS and specific tracepoints.  There is a
5525   // specific autoconf test for its needs.
5526   //
5527   // We'll just require CONFIG_TRACEPOINTS here as a quick-and-dirty
5528   // approximation.
5529   if (! s.need_uprobes && s.kernel_config["CONFIG_TRACEPOINTS"] == "y")
5530     return;
5531
5532   // For uprobes-based process probes, we need the task_finder plus
5533   // the builtin inode-uprobes.
5534   if (s.need_uprobes
5535       && s.kernel_config["CONFIG_TRACEPOINTS"] == "y"
5536       && kernel_supports_inode_uprobes(s))
5537     return;
5538
5539   throw SEMANTIC_ERROR (_("process probes not available without kernel CONFIG_TRACEPOINTS/CONFIG_ARCH_SUPPORTS_UPROBES/CONFIG_UPROBES"));
5540 }
5541
5542
5543 dwarf_derived_probe::dwarf_derived_probe(interned_string funcname,
5544                                          interned_string filename,
5545                                          int line,
5546                                          // module & section specify a relocation
5547                                          // base for <addr>, unless section==""
5548                                          // (equivalently module=="kernel")
5549                                          // for userspace, it's a full path, for
5550                                          // modules, it's either a full path, or
5551                                          // the basename (e.g. 'btrfs')
5552                                          interned_string module,
5553                                          interned_string section,
5554                                          // NB: dwfl_addr is the virtualized
5555                                          // address for this symbol.
5556                                          Dwarf_Addr dwfl_addr,
5557                                          // addr is the section-offset for
5558                                          // actual relocation.
5559                                          Dwarf_Addr addr,
5560                                          dwarf_query& q,
5561                                          Dwarf_Die* scope_die /* may be null */,
5562                                          interned_string symbol_name,
5563                                          Dwarf_Addr offset)
5564   : generic_kprobe_derived_probe (q.base_probe, q.base_loc, module, section,
5565                                   addr, q.has_return,
5566                                   q.has_maxactive, q.maxactive_val, "", offset),
5567     path (q.path),
5568     has_process (q.has_process),
5569     has_library (q.has_library),
5570     user_path (q.user_path),
5571     user_lib (q.user_lib),
5572     access_vars(false)
5573 {
5574   // If we were given a fullpath to a kernel module, then get the simple name
5575   if (q.has_module && is_fully_resolved(module, q.dw.sess.sysroot, q.dw.sess.sysenv))
5576     this->module = modname_from_path(module);
5577
5578   if (q.has_module && symbol_name != "")
5579     this->symbol_name = lex_cast(this->module) + ":" + lex_cast(symbol_name);
5580
5581   if (q.sess.runtime_mode == systemtap_session::bpf_runtime && q.has_return)
5582     this->sym_name_for_bpf = funcname;
5583
5584   if (user_lib.size() != 0)
5585     has_library = true;
5586
5587   if (q.has_process)
5588     {
5589       // We may receive probes on two types of ELF objects: ET_EXEC or ET_DYN.
5590       // ET_EXEC ones need no further relocation on the addr(==dwfl_addr), whereas
5591       // ET_DYN ones do (addr += run-time mmap base address).  We tell these apart
5592       // by the incoming section value (".absolute" vs. ".dynamic").
5593       // XXX Assert invariants here too?
5594
5595       // inode-uprobes needs an offset rather than an absolute VM address.
5596       // ditto for userspace runtimes (dyninst)
5597       if ((kernel_supports_inode_uprobes(q.dw.sess) || q.dw.sess.runtime_usermode_p()) &&
5598           section == ".absolute" && addr == dwfl_addr &&
5599           addr >= q.dw.module_start && addr < q.dw.module_end)
5600         this->addr = addr - q.dw.module_start;
5601     }
5602   else
5603     {
5604       // Assert kernel relocation invariants
5605       if (section == "" && dwfl_addr != addr) // addr should be absolute
5606         throw SEMANTIC_ERROR (_("missing relocation basis"), tok);
5607       if (section != "" && dwfl_addr == addr) // addr should be an offset
5608         throw SEMANTIC_ERROR (_("inconsistent relocation address"), tok);
5609     }
5610
5611   // XXX: hack for strange g++/gcc's
5612 #ifndef USHRT_MAX
5613 #define USHRT_MAX 32767
5614 #endif
5615
5616   // Range limit maxactive() value
5617   if (has_maxactive && (maxactive_val < 0 || maxactive_val > USHRT_MAX))
5618     throw SEMANTIC_ERROR (_F("maxactive value out of range [0,%s]",
5619                           lex_cast(USHRT_MAX).c_str()), q.base_loc->components.front()->tok);
5620
5621   // Expand target variables in the probe body. Even if the scope_die is
5622   // invalid, we still want to expand things such as $$vars/$$parms/etc...
5623   // (PR15999, PR16473). Access to specific context vars e.g. $argc will not be
5624   // expanded and will produce an error during the typeresolution_info pass.
5625   {
5626       // PR14436: if we're expanding target variables in the probe body of a
5627       // .return probe, we need to make the expansion at the postprologue addr
5628       // instead (if any), which is then also the spot where the entry handler
5629       // probe is placed. (Note that at this point, a nonzero prologue_end
5630       // implies that it should be used, i.e. code is unoptimized).
5631       Dwarf_Addr handler_dwfl_addr = dwfl_addr;
5632       if (q.prologue_end != 0 && q.has_return)
5633         {
5634           handler_dwfl_addr = q.prologue_end;
5635           if (q.sess.verbose > 2)
5636             clog << _F("expanding .return vars at prologue_end (0x%s) "
5637                        "rather than entrypc (0x%s)\n",
5638                        lex_cast_hex(handler_dwfl_addr).c_str(),
5639                        lex_cast_hex(dwfl_addr).c_str());
5640         }
5641
5642       // PR20672, there may be @defined()-guarded @entry() expressions
5643       // in the tree.  If any @defined() maps to false, the visitor
5644       // needs to abort so that subsequent @entry()'s are not
5645       // processed (to generate synthetic .call etc. probes).  We do a
5646       // a mini relaxation loop here.
5647       dwarf_var_expanding_visitor v (q, scope_die, handler_dwfl_addr);
5648       if (q.sess.symbol_resolver)
5649         q.sess.symbol_resolver->current_probe = this;
5650       var_expand_const_fold_loop (q.sess, this->body, v);
5651
5652       // Propagate perf.counters so we can emit later
5653       this->perf_counter_refs = v.perf_counter_refs;
5654       // Emit local var used to save the perf counter read value
5655       for (auto pcii = v.perf_counter_refs.begin();
5656            pcii != v.perf_counter_refs.end(); pcii++)
5657         {
5658           // Find the associated perf counter probe
5659           for (auto it = q.sess.perf_counters.begin();
5660                it != q.sess.perf_counters.end();
5661                it++)
5662             if ((*it).first == (*pcii))
5663               {
5664                 vardecl* vd = new vardecl;
5665                 vd->name = vd->unmangled_name = "__perf_read_" + (*it).first;
5666                 vd->tok = this->tok;
5667                 vd->set_arity(0, this->tok);
5668                 vd->type = pe_long;
5669                 vd->synthetic = true;
5670                 this->locals.push_back (vd);
5671                 break;
5672               }
5673         }
5674
5675       if (!q.has_process)
5676         access_vars = v.visited;
5677
5678       // If during target-variable-expanding the probe, we added a new block
5679       // of code, add it to the start of the probe.
5680       if (v.add_block)
5681         this->body = new block(v.add_block, this->body);
5682
5683       // If when target-variable-expanding the probe, we need to synthesize a
5684       // sibling function-entry probe.  We don't go through the whole probe derivation
5685       // business (PR10642) that could lead to wildcard/alias resolution, or for that
5686       // dwarf-induced duplication.
5687       if (v.add_call_probe)
5688         {
5689           assert (q.has_return && !q.has_call);
5690
5691           // We temporarily replace q.base_probe.
5692           save_and_restore<statement*> tmp_body (&q.base_probe->body, v.add_call_probe);
5693           save_and_restore<bool> tmp_return (&q.has_return, false);
5694           save_and_restore<bool> tmp_call (&q.has_call, true);
5695
5696           // NB: any moved @entry(EXPR) bits will be expanded during this
5697           // nested *derived_probe ctor for the synthetic .call probe.
5698           // PR20416
5699           if (q.has_process)
5700             {
5701               // Place handler probe at the same addr as where the vars were
5702               // expanded (which may not be the same addr as the one for the
5703               // main retprobe, PR14436).
5704               Dwarf_Addr handler_addr = addr;
5705               if (handler_dwfl_addr != dwfl_addr)
5706                 // adjust section offset by prologue_end-entrypc
5707                 handler_addr += handler_dwfl_addr - dwfl_addr;
5708               entry_handler = new uprobe_derived_probe (funcname, filename,
5709                                                         line, module, section,
5710                                                         handler_dwfl_addr,
5711                                                         handler_addr, q,
5712                                                         scope_die);
5713             }
5714           else
5715             entry_handler = new dwarf_derived_probe (funcname, filename, line,
5716                                                      module, section, dwfl_addr,
5717                                                      addr, q, scope_die);
5718
5719           entry_handler->synthetic = true;
5720
5721           saved_longs = entry_handler->saved_longs = v.saved_longs;
5722           saved_strings = entry_handler->saved_strings = v.saved_strings;
5723
5724           q.results.push_back (entry_handler);
5725         }
5726
5727       for (auto it = v.entry_probes.begin(); it != v.entry_probes.end(); ++it)
5728         {
5729           save_and_restore<statement*> tmp_body (&q.base_probe->body, it->second);
5730           save_and_restore<bool> tmp_function_num (&q.has_function_num, true);
5731           query_addr (it->first, &q);
5732         }
5733
5734       // Save the local variables for listing mode. If the scope_die is null,
5735       // local vars aren't accessible, so no need to invoke saveargs (PR10820).
5736       if (!null_die(scope_die) &&
5737           (q.sess.dump_mode == systemtap_session::dump_matched_probes_vars ||
5738           q.sess.language_server_mode))
5739         saveargs(q, scope_die, dwfl_addr);
5740   }
5741
5742   // Reset the sole element of the "locations" vector as a
5743   // "reverse-engineered" form of the incoming (q.base_loc) probe
5744   // point.  This allows a user to see what function / file / line
5745   // number any particular match of the wildcards.
5746
5747   vector<probe_point::component*> comps;
5748   if (q.has_kernel)
5749     comps.push_back (new probe_point::component(TOK_KERNEL));
5750   else if(q.has_module)
5751     comps.push_back (new probe_point::component(TOK_MODULE, new literal_string(module)));
5752   else if(q.has_process && q.build_id_val != "") // for stap -vL process("buildid").function() etc. probes
5753     comps.push_back (new probe_point::component(TOK_PROCESS, new literal_string(q.build_id_val)));
5754   else if(q.has_process)
5755     comps.push_back (new probe_point::component(TOK_PROCESS, new literal_string(module)));
5756   else
5757     assert (0);
5758
5759   string fn_or_stmt;
5760   if (q.has_function_str || q.has_function_num)
5761     fn_or_stmt = TOK_FUNCTION;
5762   else
5763     fn_or_stmt = TOK_STATEMENT;
5764
5765   if (q.has_function_str || q.has_statement_str)
5766       {
5767         interned_string retro_name = q.final_function_name(funcname, filename, line);
5768         comps.push_back
5769           (new probe_point::component
5770            (fn_or_stmt, new literal_string (retro_name)));
5771       }
5772   else if (q.has_function_num || q.has_statement_num)
5773     {
5774       Dwarf_Addr retro_addr;
5775       if (q.has_function_num)
5776         retro_addr = q.function_num_val;
5777       else
5778         retro_addr = q.statement_num_val;
5779       comps.push_back (new probe_point::component
5780                        (fn_or_stmt,
5781                         new literal_number(retro_addr, true)));
5782
5783       if (q.has_absolute)
5784         comps.push_back (new probe_point::component (TOK_ABSOLUTE));
5785     }
5786
5787   if (q.has_call)
5788       comps.push_back (new probe_point::component(TOK_CALL));
5789   if (q.has_exported)
5790       comps.push_back (new probe_point::component(TOK_EXPORTED));
5791   if (q.has_inline)
5792       comps.push_back (new probe_point::component(TOK_INLINE));
5793   if (has_return)
5794     comps.push_back (new probe_point::component(TOK_RETURN));
5795   if (has_maxactive)
5796     comps.push_back (new probe_point::component
5797                      (TOK_MAXACTIVE, new literal_number(maxactive_val)));
5798
5799   // Overwrite it.
5800   this->sole_location()->components = comps;
5801
5802   // if it's a .callee[s[(N)]] call, add checks to the probe body so that the
5803   // user body is only 'triggered' when called from q.callers[N-1], which
5804   // itself is called from q.callers[N-2], etc... I.E.
5805   // callees(N) --> N elements in q.callers --> N checks against [u]stack(0..N-1)
5806   if ((q.has_callee || q.has_callees_num) && q.callers && !q.callers->empty())
5807     {
5808       if (q.sess.verbose > 2)
5809         clog << _F("adding caller checks for callee %s\n",
5810                    funcname.to_string().c_str());
5811
5812       // Copy the stack and empty it out
5813       stack<Dwarf_Addr> callers(*q.callers);
5814       for (unsigned level = 1; !callers.empty(); level++,
5815                                                  callers.pop())
5816         {
5817           Dwarf_Addr caller = callers.top();
5818
5819           // We first need to make the caller addr relocatable
5820           interned_string caller_section;
5821           Dwarf_Addr caller_reloc;
5822           if (module == TOK_KERNEL)
5823             { // allow for relocatable kernel (see also add_probe_point())
5824               caller_reloc = caller - q.sess.sym_stext;
5825               caller_section = "_stext";
5826             }
5827           else
5828             caller_reloc = q.dw.relocate_address(caller,
5829                                                  caller_section);
5830
5831           if (q.sess.verbose > 2)
5832             clog << _F("adding caller check [u]stack(%d) == reloc(0x%s)\n",
5833                        level, lex_cast_hex(caller_reloc).c_str());
5834
5835           // We want to add a statement like this:
5836           // if (!_caller_match(user, mod, sec, addr)) next;
5837           // Something similar is done in semantic_pass_conditions()
5838
5839           functioncall* check = new functioncall();
5840           check->tok = this->tok;
5841           check->function = "_caller_match";
5842           check->args.push_back(new literal_number(q.has_process));
5843           check->args[0]->tok = this->tok;
5844           // For callee .return probes, the callee is popped off stack
5845           // so we don't want to match the frame below the caller
5846           if (q.has_return)
5847             check->args.push_back(new literal_number(level-1));
5848           else
5849             check->args.push_back(new literal_number(level));
5850           check->args[1]->tok = this->tok;
5851           check->args.push_back(new literal_string(this->module));
5852           check->args[2]->tok = this->tok;
5853           check->args.push_back(new literal_string(caller_section));
5854           check->args[3]->tok = this->tok;
5855           check->args.push_back(new literal_number(caller_reloc, true /* hex */));
5856           check->args[4]->tok = this->tok;
5857
5858           unary_expression* notexp = new unary_expression();
5859           notexp->tok = this->tok;
5860           notexp->op = "!";
5861           notexp->operand = check;
5862
5863           if_statement* ifs = new if_statement();
5864           ifs->tok = this->tok;
5865           ifs->thenblock = new next_statement();
5866           ifs->thenblock->tok = this->tok;
5867           ifs->elseblock = NULL;
5868           ifs->condition = notexp;
5869
5870           this->body = new block(ifs, this->body);
5871         }
5872     }
5873 }
5874
5875
5876 void
5877 dwarf_derived_probe::saveargs(dwarf_query& q, Dwarf_Die* scope_die,
5878                               Dwarf_Addr dwfl_addr)
5879 {
5880   if (null_die(scope_die))
5881     return;
5882
5883   bool verbose = q.sess.verbose > 2;
5884
5885   if (verbose)
5886     clog << _F("saveargs: examining '%s' (dieoffset: %#" PRIx64 ")\n", (dwarf_diename(scope_die)?: "unknown"), dwarf_dieoffset(scope_die));
5887
5888   if (has_return)
5889     {
5890       /* Only save the return value if it has a type. */
5891       string type_name;
5892       Dwarf_Die type_die;
5893       if (dwarf_attr_die (scope_die, DW_AT_type, &type_die) &&
5894           dwarf_type_name(&type_die, type_name))
5895         args.push_back("$return:"+type_name);
5896
5897       else if (verbose)
5898         clog << _F("saveargs: failed to retrieve type name for return value (dieoffset: %s)\n",
5899                    lex_cast_hex(dwarf_dieoffset(scope_die)).c_str());
5900     }
5901
5902   Dwarf_Die arg;
5903   vector<Dwarf_Die> scopes = q.dw.getscopes(scope_die);
5904   for (unsigned i = 0; i < scopes.size(); ++i)
5905     {
5906       if (dwarf_tag(&scopes[i]) == DW_TAG_compile_unit)
5907         break; // we don't want file-level variables
5908       if (dwarf_child (&scopes[i], &arg) == 0)
5909         do
5910           {
5911             switch (dwarf_tag (&arg))
5912               {
5913               case DW_TAG_variable:
5914               case DW_TAG_formal_parameter:
5915                 break;
5916
5917               default:
5918                 continue;
5919               }
5920
5921             /* Ignore this local if it has no name. */
5922             const char *arg_name = dwarf_diename (&arg);
5923             if (!arg_name)
5924               {
5925                 if (verbose)
5926                   clog << _F("saveargs: failed to retrieve name for local (dieoffset: %s)\n",
5927                              lex_cast_hex(dwarf_dieoffset(&arg)).c_str());
5928                 continue;
5929               }
5930
5931             if (verbose)
5932               clog << _F("saveargs: finding location for local '%s' (dieoffset: %s)\n",
5933                          arg_name, lex_cast_hex(dwarf_dieoffset(&arg)).c_str());
5934
5935             /* Ignore this local if it has no location (or not at this PC). */
5936             /* NB: It still may not be directly accessible, e.g. if it is an
5937              * aggregate type, implicit_pointer, etc., but the user can later
5938              * figure out how to access the interesting parts. */
5939
5940             /* XXX: Perhaps saveargs() / listings-mode should work by synthesizing
5941              * several synthetic
5942              *     probe foo { $var }
5943              * probes, testing them for overall resolvability.
5944              */
5945
5946             Dwarf_Attribute attr_mem;
5947             if (!dwarf_attr_integrate (&arg, DW_AT_const_value, &attr_mem))
5948               {
5949                 Dwarf_Op *expr;
5950                 size_t len;
5951                 if (!dwarf_attr_integrate (&arg, DW_AT_location, &attr_mem))
5952                   {
5953                     if (verbose)
5954                       clog << _F("saveargs: failed to resolve the location for local '%s' (dieoffset: %s)\n",
5955                                   arg_name, lex_cast_hex(dwarf_dieoffset(&arg)).c_str());
5956                     continue;
5957                   }
5958                 else if (!(dwarf_getlocation_addr(&attr_mem, dwfl_addr, &expr,
5959                                                   &len, 1) == 1 && len > 0))
5960                   {
5961                     Dwarf_Addr dwfl_addr2 = q.dw.pr15123_retry_addr (dwfl_addr, & arg);
5962                     if (!dwfl_addr2 || (!(dwarf_getlocation_addr(&attr_mem, dwfl_addr2, &expr,
5963                                                                  &len, 1) == 1 && len > 0))) {
5964                       if (verbose)
5965                         clog << _F("saveargs: local '%s' (dieoffset: %s) is not available at this address (%s)\n",
5966                                    arg_name, lex_cast_hex(dwarf_dieoffset(&arg)).c_str(), lex_cast_hex(dwfl_addr).c_str());
5967                       continue;
5968                     }
5969                   }
5970               }
5971
5972             /* Ignore this local if it has no type. */
5973             string type_name;
5974             Dwarf_Die type_die;
5975             if (!dwarf_attr_die (&arg, DW_AT_type, &type_die) ||
5976                 !dwarf_type_name(&type_die, type_name))
5977               {
5978                 if (verbose)
5979                   clog << _F("saveargs: failed to retrieve type name for local '%s' (dieoffset: %s)\n",
5980                              arg_name, lex_cast_hex(dwarf_dieoffset(&arg)).c_str());
5981                 continue;
5982               }
5983
5984             /* This local looks good -- save it! */
5985             args.push_back("$"+string(arg_name)+":"+type_name);
5986           }
5987         while (dwarf_siblingof (&arg, &arg) == 0);
5988     }
5989 }
5990
5991
5992 void
5993 dwarf_derived_probe::getargs(std::list<std::string> &arg_set) const
5994 {
5995   arg_set.insert(arg_set.end(), args.begin(), args.end());
5996 }
5997
5998
5999 void
6000 dwarf_derived_probe::emit_privilege_assertion (translator_output* o)
6001 {
6002   if (has_process)
6003     {
6004       // These probes are allowed for unprivileged users, but only in the
6005       // context of processes which they own.
6006       emit_process_owner_assertion (o);
6007       return;
6008     }
6009
6010   // Other probes must contain the default assertion which aborts
6011   // if executed by an unprivileged user.
6012   derived_probe::emit_privilege_assertion (o);
6013 }
6014
6015
6016 void
6017 dwarf_derived_probe::print_dupe_stamp(ostream& o)
6018 {
6019   if (has_process)
6020     {
6021       // These probes are allowed for unprivileged users, but only in the
6022       // context of processes which they own.
6023       print_dupe_stamp_unprivileged_process_owner (o);
6024       return;
6025     }
6026
6027   // Other probes must contain the default dupe stamp
6028   derived_probe::print_dupe_stamp (o);
6029 }
6030
6031
6032 void
6033 dwarf_derived_probe::register_statement_variants(match_node * root,
6034                                                  dwarf_builder * dw,
6035                                                  privilege_t privilege)
6036 {
6037   root
6038     ->bind_privilege(privilege)
6039     ->bind(dw);
6040   root->bind(TOK_NEAREST)
6041     ->bind_privilege(privilege)
6042     ->bind(dw);
6043 }
6044
6045 void
6046 dwarf_derived_probe::register_function_variants(match_node * root,
6047                                                 dwarf_builder * dw,
6048                                                 privilege_t privilege)
6049 {
6050   root
6051     ->bind_privilege(privilege)
6052     ->bind(dw);
6053   root->bind(TOK_CALL)
6054     ->bind_privilege(privilege)
6055     ->bind(dw);
6056   root->bind(TOK_EXPORTED)
6057     ->bind_privilege(privilege)
6058     ->bind(dw);
6059   root->bind(TOK_RETURN)
6060     ->bind_privilege(privilege)
6061     ->bind(dw);
6062
6063   // For process probes / uprobes, .maxactive() is unused.
6064   if (! pr_contains (privilege, pr_stapusr))
6065     {
6066       root->bind(TOK_RETURN)
6067         ->bind_num(TOK_MAXACTIVE)->bind(dw);
6068     }
6069 }
6070
6071 void
6072 dwarf_derived_probe::register_function_and_statement_variants(
6073   systemtap_session& s,
6074   match_node * root,
6075   dwarf_builder * dw,
6076   privilege_t privilege
6077 )
6078 {
6079   // Here we match 4 forms:
6080   //
6081   // .function("foo")
6082   // .function(0xdeadbeef)
6083   // .statement("foo")
6084   // .statement(0xdeadbeef)
6085
6086   match_node *fv_root = root->bind_str(TOK_FUNCTION);
6087   register_function_variants(fv_root, dw, privilege);
6088   // ROOT.function("STRING") always gets the .inline and .label variants.
6089   fv_root->bind(TOK_INLINE)
6090     ->bind_privilege(privilege)
6091     ->bind(dw);
6092   fv_root->bind_str(TOK_LABEL)
6093     ->bind_privilege(privilege)
6094     ->bind(dw);
6095   fv_root->bind_str(TOK_CALLEE)
6096     ->bind_privilege(privilege)
6097     ->bind(dw);
6098   fv_root->bind_str(TOK_CALLEE)
6099     ->bind(TOK_RETURN)
6100     ->bind_privilege(privilege)
6101     ->bind(dw);
6102   fv_root->bind_str(TOK_CALLEE)
6103     ->bind(TOK_CALL)
6104     ->bind_privilege(privilege)
6105     ->bind(dw);
6106   fv_root->bind(TOK_CALLEES)
6107     ->bind_privilege(privilege)
6108     ->bind(dw);
6109   fv_root->bind_num(TOK_CALLEES)
6110     ->bind_privilege(privilege)
6111     ->bind(dw);
6112
6113   fv_root = root->bind_num(TOK_FUNCTION);
6114   register_function_variants(fv_root, dw, privilege);
6115   // ROOT.function(NUMBER).inline is deprecated in release 1.7 and removed thereafter.
6116   if (strverscmp(s.compatible.c_str(), "1.7") <= 0)
6117     {
6118       fv_root->bind(TOK_INLINE)
6119         ->bind_privilege(privilege)
6120         ->bind(dw);
6121     }
6122
6123   register_statement_variants(root->bind_str(TOK_STATEMENT), dw, privilege);
6124   register_statement_variants(root->bind_num(TOK_STATEMENT), dw, privilege);
6125 }
6126
6127 void
6128 dwarf_derived_probe::register_sdt_variants(systemtap_session&,
6129                                            match_node * root,
6130                                            dwarf_builder * dw)
6131 {
6132   root->bind_str(TOK_MARK)
6133     ->bind_privilege(pr_all)
6134     ->bind(dw);
6135   root->bind_str(TOK_PROVIDER)->bind_str(TOK_MARK)
6136     ->bind_privilege(pr_all)
6137     ->bind(dw);
6138 }
6139
6140 void
6141 dwarf_derived_probe::register_plt_variants(systemtap_session&,
6142                                            match_node * root,
6143                                            dwarf_builder * dw)
6144 {
6145   root->bind(TOK_PLT)
6146     ->bind_privilege(pr_all)
6147     ->bind(dw);
6148   root->bind_str(TOK_PLT)
6149     ->bind_privilege(pr_all)
6150     ->bind(dw);
6151
6152   root->bind(TOK_PLT)
6153     ->bind(TOK_RETURN)
6154     ->bind_privilege(pr_all)
6155     ->bind(dw);
6156   root->bind_str(TOK_PLT)
6157     ->bind(TOK_RETURN)
6158     ->bind_privilege(pr_all)
6159     ->bind(dw);
6160 }
6161
6162 void
6163 dwarf_derived_probe::register_patterns(systemtap_session& s)
6164 {
6165   match_node* root = s.pattern_root;
6166   dwarf_builder *dw = new dwarf_builder();
6167
6168   update_visitor *filter = new dwarf_cast_expanding_visitor(s, *dw);
6169   s.code_filters.push_back(filter);
6170
6171   filter = new dwarf_atvar_expanding_visitor(s, *dw);
6172   s.code_filters.push_back(filter);
6173
6174   register_function_and_statement_variants(s, root->bind(TOK_KERNEL), dw, pr_privileged);
6175   register_function_and_statement_variants(s, root->bind_str(TOK_MODULE), dw, pr_privileged);
6176   root->bind(TOK_KERNEL)->bind_num(TOK_STATEMENT)->bind(TOK_ABSOLUTE)
6177     ->bind(dw);
6178
6179   match_node* uprobes[] = {
6180       root->bind(TOK_PROCESS),
6181       root->bind_str(TOK_PROCESS),
6182       root->bind_num(TOK_PROCESS),
6183       root->bind(TOK_PROCESS)->bind_str(TOK_LIBRARY),
6184       root->bind_str(TOK_PROCESS)->bind_str(TOK_LIBRARY),
6185   };
6186   for (size_t i = 0; i < sizeof(uprobes) / sizeof(*uprobes); ++i)
6187     {
6188       register_function_and_statement_variants(s, uprobes[i], dw, pr_all);
6189       register_sdt_variants(s, uprobes[i], dw);
6190       register_plt_variants(s, uprobes[i], dw);
6191     }
6192 }
6193
6194 void
6195 dwarf_derived_probe::emit_probe_local_init(systemtap_session& s, translator_output * o)
6196 {
6197   if (perf_counter_refs.size())
6198     {
6199       o->newline() << "{";
6200       o->indent(1);
6201       unsigned ref_idx = 0;
6202       for (auto pcii = perf_counter_refs.begin();
6203            pcii != perf_counter_refs.end();
6204            pcii++)
6205         {
6206           // Find the associated perf.counter probe
6207           unsigned i = 0;
6208
6209           for (auto it=s.perf_counters.begin() ;
6210                it != s.perf_counters.end();
6211                it++, i++)
6212             {
6213               if ((*it).first == (*pcii))
6214                 {
6215                   // copy the perf counter values over
6216                   //
6217                   // NB: We'd like to simplify here. Right now we read
6218                   // the perf counters into "values", then copy that
6219                   // into the locals. We should be able to remove the
6220                   // locals, but the 'symbol' class isn't designed to
6221                   // point to the context structure itself, but the
6222                   // locals inside the context structure.
6223                   o->newline() << "l->l___perf_read_" + (*it).first
6224                     + " = (int64_t)c->perf_read_values["
6225                     + lex_cast(ref_idx) + "];";
6226                   ref_idx++;
6227                   break;
6228                 }
6229             }
6230         }
6231       o->newline(-1) << "}";
6232     }
6233
6234   if (access_vars)
6235     {
6236       // if accessing $variables, emit bsp cache setup for speeding up
6237       o->newline() << "#if defined __ia64__";
6238       o->newline() << "bspcache(c->unwaddr, c->kregs);";
6239       o->newline() << "#endif";
6240     }
6241 }
6242
6243 // ------------------------------------------------------------------------
6244
6245 void
6246 generic_kprobe_derived_probe_group::enroll (generic_kprobe_derived_probe* p)
6247 {
6248   probes_by_module.insert (make_pair (p->module, p));
6249
6250   // XXX: probes put at the same address (or symbol_name+offset)
6251   // should all share a single kprobe/kretprobe, and have their
6252   // handlers executed sequentially.
6253 }
6254
6255
6256 void
6257 generic_kprobe_derived_probe_group::emit_module_decls (systemtap_session& s)
6258 {
6259   if (probes_by_module.empty()) return;
6260
6261   s.op->newline() << "/* ---- dwarf and non-dwarf kprobe-based probes ---- */";
6262
6263   // FIXME: we could do the same thing (finding stats for the embedded
6264   // strings) for 'symbol_name'...
6265
6266   // Let's find some stats for the embedded strings.  Maybe they
6267   // are small and uniform enough to justify putting char[MAX]'s into
6268   // the array instead of relocated char*'s.
6269   size_t module_name_max = 0, section_name_max = 0;
6270   size_t module_name_tot = 0, section_name_tot = 0;
6271   size_t all_name_cnt = probes_by_module.size(); // for average
6272   for (auto it = probes_by_module.begin(); it != probes_by_module.end(); it++)
6273     {
6274       generic_kprobe_derived_probe* p = it->second;
6275 #define DOIT(var,expr) do {                             \
6276         size_t var##_size = (expr) + 1;                 \
6277         var##_max = max (var##_max, var##_size);        \
6278         var##_tot += var##_size; } while (0)
6279       DOIT(module_name, p->module.size());
6280       DOIT(section_name, p->section.size());
6281 #undef DOIT
6282     }
6283
6284   // Decide whether it's worthwhile to use char[] or char* by comparing
6285   // the amount of average waste (max - avg) to the relocation data size
6286   // (3 native long words).
6287 #define CALCIT(var)                                                     \
6288   if ((var##_name_max-(var##_name_tot/all_name_cnt)) < (3 * sizeof(void*))) \
6289     {                                                                   \
6290       s.op->newline() << "#define STAP_KPROBE_PROBE_STR_" << #var << " " \
6291                       << "const char " << #var                          \
6292                       << "[" << var##_name_max << "]";                 \
6293       if (s.verbose > 2) clog << "stap_kprobe_probe " << #var            \
6294                               << "[" << var##_name_max << "]" << endl;  \
6295     }                                                                   \
6296   else                                                                  \
6297     {                                                                   \
6298       s.op->newline() << "#define STAP_KPROBE_PROBE_STR_" << #var << " " \
6299                       << "const char * const " << #var << "";          \
6300       if (s.verbose > 2) clog << "stap_kprobe_probe *" << #var << endl;  \
6301     }
6302
6303   CALCIT(module);
6304   CALCIT(section);
6305
6306 #undef CALCIT
6307
6308   s.op->newline() << "#include \"linux/kprobes.c\"";
6309
6310 #define UNDEFIT(var) s.op->newline() << "#undef STAP_KPROBE_PROBE_STR_" << #var
6311   UNDEFIT(module);
6312   UNDEFIT(section);
6313 #undef UNDEFIT
6314
6315   // Emit an array of kprobe/kretprobe pointers
6316   s.op->newline() << "#if defined(STAPCONF_UNREGISTER_KPROBES)";
6317   s.op->newline() << "static void * stap_unreg_kprobes[" << probes_by_module.size() << "];";
6318   s.op->newline() << "#endif";
6319
6320   // Emit the actual probe list.
6321
6322   // NB: we used to plop a union { struct kprobe; struct kretprobe } into
6323   // struct stap_kprobe_probe, but it being initialized data makes it add
6324   // hundreds of bytes of padding per stap_kprobe_probe.  (PR5673)
6325   s.op->newline() << "static struct stap_kprobe stap_kprobes[" << probes_by_module.size() << "];";
6326   // NB: bss!
6327
6328   s.op->newline() << "static struct stap_kprobe_probe stap_kprobe_probes[] = {";
6329   s.op->indent(1);
6330
6331   size_t stap_kprobe_idx = 0;
6332   for (auto it = probes_by_module.begin(); it != probes_by_module.end(); it++)
6333     {
6334       generic_kprobe_derived_probe* p = it->second;
6335       s.op->newline() << "{";
6336       if (p->has_return)
6337         s.op->line() << " .return_p=1,";
6338       if (p->has_maxactive)
6339         {
6340           s.op->line() << " .maxactive_p=1,";
6341           assert (p->maxactive_val >= 0 && p->maxactive_val <= USHRT_MAX);
6342           s.op->line() << " .maxactive_val=" << p->maxactive_val << ",";
6343         }
6344       if (p->saved_longs || p->saved_strings)
6345         {
6346           if (p->saved_longs)
6347             s.op->line() << " .saved_longs=" << p->saved_longs << ",";
6348           if (p->saved_strings)
6349             s.op->line() << " .saved_strings=" << p->saved_strings << ",";
6350           if (p->entry_handler)
6351             s.op->line() << " .entry_probe=" << common_probe_init (p->entry_handler) << ",";
6352         }
6353       if (p->locations[0]->optional)
6354         s.op->line() << " .optional_p=1,";
6355       s.op->line() << " .address=(unsigned long)0x" << hex << p->addr << dec << "ULL,";
6356       s.op->line() << " .module=\"" << p->module << "\",";
6357       s.op->line() << " .section=\"" << p->section << "\",";
6358       s.op->line() << " .probe=" << common_probe_init (p) << ",";
6359       s.op->line() << " .kprobe=&stap_kprobes[" << stap_kprobe_idx++ << "],";
6360       if (!p->symbol_name.empty())
6361         {
6362           // After kernel commit 4982223e51, module notifiers are
6363           // being called too early. So, we have to switch to using
6364           // symbol+offset probing for modules.
6365           if (! p->section.empty())
6366             s.op->newline(-1) << "#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0)";
6367           else
6368             s.op->indent(-1);
6369           s.op->newline() << " .symbol_name=\"" << p->symbol_name << "\",";
6370           s.op->line() << " .offset=(unsigned int)" << p->offset << ",";
6371           if (! p->section.empty())
6372             s.op->newline() << "#endif";
6373           s.op->newline(1);
6374         }
6375       s.op->line() << " },";
6376     }
6377
6378   s.op->newline(-1) << "};";
6379
6380   // Emit the kprobes callback function
6381   s.op->newline();
6382   s.op->newline() << "static int enter_kprobe_probe (struct kprobe *inst,";
6383   s.op->line() << " struct pt_regs *regs) {";
6384   // NB: as of PR5673, the kprobe|kretprobe union struct is in BSS
6385   s.op->newline(1) << "int kprobe_idx = ((uintptr_t)inst-(uintptr_t)stap_kprobes)/sizeof(struct stap_kprobe);";
6386   // Check that the index is plausible
6387   s.op->newline() << "struct stap_kprobe_probe *skp = &stap_kprobe_probes[";
6388   s.op->line() << "((kprobe_idx >= 0 && kprobe_idx < " << probes_by_module.size() << ")?";
6389   s.op->line() << "kprobe_idx:0)"; // NB: at least we avoid memory corruption
6390   // XXX: it would be nice to give a more verbose error though; BUG_ON later?
6391   s.op->line() << "];";
6392   common_probe_entryfn_prologue (s, "STAP_SESSION_RUNNING", "", "skp->probe",
6393                                  "stp_probe_type_kprobe");
6394   s.op->newline() << "c->kregs = regs;";
6395
6396   // Make it look like the IP is set as it wouldn't have been replaced
6397   // by a breakpoint instruction when calling real probe handler. Reset
6398   // IP regs on return, so we don't confuse kprobes. PR10458
6399   s.op->newline() << "{";
6400   s.op->indent(1);
6401   s.op->newline() << "unsigned long kprobes_ip = REG_IP(c->kregs);";
6402   s.op->newline() << "SET_REG_IP(regs, (unsigned long) inst->addr);";
6403   s.op->newline() << "(*skp->probe->ph) (c);";
6404   s.op->newline() << "SET_REG_IP(regs, kprobes_ip);";
6405   s.op->newline(-1) << "}";
6406
6407   common_probe_entryfn_epilogue (s, true, otf_safe_context(s));
6408   s.op->newline() << "return 0;";
6409   s.op->newline(-1) << "}";
6410
6411   // Same for kretprobes
6412   s.op->newline();
6413   s.op->newline() << "static int enter_kretprobe_common (struct kretprobe_instance *inst,";
6414   s.op->line() << " struct pt_regs *regs, int entry) {";
6415   s.op->newline(1) << "struct kretprobe *krp = get_kretprobe(inst);";
6416
6417   // NB: as of PR5673, the kprobe|kretprobe union struct is in BSS
6418   s.op->newline() << "int kprobe_idx = ((uintptr_t)krp-(uintptr_t)stap_kprobes)/sizeof(struct stap_kprobe);";
6419   // Check that the index is plausible
6420   s.op->newline() << "struct stap_kprobe_probe *skp = &stap_kprobe_probes[";
6421   s.op->line() << "((kprobe_idx >= 0 && kprobe_idx < " << probes_by_module.size() << ")?";
6422   s.op->line() << "kprobe_idx:0)"; // NB: at least we avoid memory corruption
6423   // XXX: it would be nice to give a more verbose error though; BUG_ON later?
6424   s.op->line() << "];";
6425
6426   s.op->newline() << "const struct stap_probe *sp = entry ? skp->entry_probe : skp->probe;";
6427   s.op->newline() << "if (sp) {";
6428   s.op->indent(1);
6429   common_probe_entryfn_prologue (s, "STAP_SESSION_RUNNING", "", "sp",
6430                                  "stp_probe_type_kretprobe");
6431   s.op->newline() << "c->kregs = regs;";
6432
6433   // for assisting runtime's backtrace logic and accessing kretprobe data packets
6434   s.op->newline() << "c->ips.krp.pi = inst;";
6435   s.op->newline() << "c->ips.krp.pi_longs = skp->saved_longs;";
6436
6437   // Make it look like the IP is set as it wouldn't have been replaced
6438   // by a breakpoint instruction when calling real probe handler. Reset
6439   // IP regs on return, so we don't confuse kprobes. PR10458
6440   s.op->newline() << "{";
6441   s.op->newline(1) << "unsigned long kprobes_ip = REG_IP(c->kregs);";
6442   s.op->newline() << "if (entry)";
6443   s.op->newline(1) << "SET_REG_IP(regs, (unsigned long) get_kretprobe(inst)->kp.addr);";
6444   s.op->newline(-1) << "else";
6445   s.op->newline(1) << "SET_REG_IP(regs, (unsigned long) _stp_ret_addr_r(inst));";
6446   s.op->newline(-1) << "(sp->ph) (c);";
6447   s.op->newline() << "SET_REG_IP(regs, kprobes_ip);";
6448   s.op->newline(-1) << "}";
6449
6450   common_probe_entryfn_epilogue (s, true, otf_safe_context(s));
6451   s.op->newline(-1) << "}";
6452   s.op->newline() << "return 0;";
6453   s.op->newline(-1) << "}";
6454
6455   s.op->newline();
6456 }
6457
6458
6459 void
6460 generic_kprobe_derived_probe_group::emit_module_init (systemtap_session& s)
6461 {
6462   if (probes_by_module.empty()) return;
6463
6464   s.op->newline() << "/* ---- dwarf and non-dwarf kprobe-based probes ---- */";
6465
6466   // We'll let stapkp_init() handle reporting errors by setting probe_point to
6467   // NULL.
6468   s.op->newline() << "probe_point = NULL;";
6469
6470   s.op->newline() << "rc = stapkp_init( "
6471                                      << "stap_kprobe_probes, "
6472                                      << "ARRAY_SIZE(stap_kprobe_probes));";
6473 }
6474
6475 std::string
6476 generic_kprobe_derived_probe::args_for_bpf() const
6477 {
6478   std::stringstream o;
6479
6480   if (has_return)
6481     o << "kretprobe/" << sym_name_for_bpf;
6482   else
6483     o << "kprobe/" << "0x" << std::hex << addr;
6484
6485   return o.str();
6486 }
6487
6488 bool
6489 sort_for_bpf(systemtap_session& s __attribute__ ((unused)),
6490              generic_kprobe_derived_probe_group *ge,
6491              sort_for_bpf_probe_arg_vector &v)
6492 {
6493   if (!ge || ge->probes_by_module.empty())
6494     return false;
6495
6496   for (auto i = ge->probes_by_module.begin();
6497        i != ge->probes_by_module.end(); ++i)
6498     {
6499       generic_kprobe_derived_probe *p = i->second;
6500       v.push_back(std::pair<derived_probe *, std::string>
6501                   (p, p->args_for_bpf()));
6502     }
6503
6504   return true;
6505 }
6506
6507 void
6508 generic_kprobe_derived_probe_group::emit_module_refresh (systemtap_session& s)
6509 {
6510   if (probes_by_module.empty()) return;
6511
6512   s.op->newline() << "/* ---- dwarf and non-dwarf kprobe-based probes ---- */";
6513
6514   s.op->newline() << "stapkp_refresh( "
6515                                    << "modname, "
6516                                    << "stap_kprobe_probes, "
6517                                    << "ARRAY_SIZE(stap_kprobe_probes));";
6518 }
6519
6520 void
6521 generic_kprobe_derived_probe_group::emit_module_exit (systemtap_session& s)
6522 {
6523   if (probes_by_module.empty()) return;
6524
6525   s.op->newline() << "/* ---- dwarf and non-dwarf kprobe-based probes ---- */";
6526
6527   s.op->newline() << "stapkp_exit( "
6528                                 << "stap_kprobe_probes, "
6529                                 << "ARRAY_SIZE(stap_kprobe_probes));";
6530 }
6531
6532 // ------------------------------------------------------------------------
6533
6534 static void sdt_v3_tokenize(const string& str, vector<string>& tokens)
6535 {
6536   string::size_type pos;
6537   string::size_type lastPos = str.find_first_not_of(" ", 0);
6538   string::size_type nextAt = str.find("@", lastPos);
6539
6540   if (nextAt == string::npos)
6541     {
6542       // PR13934: Assembly probes are not forced to use the N@OP form.
6543       // In this case, N is inferred to be the native word size.  Since we
6544       // don't have a nice delimiter, just split it on spaces.  SDT-asm authors
6545       // then must not put any spaces in arguments, to avoid ambiguity.
6546       tokenize(str, tokens, " ");
6547       return;
6548     }
6549
6550   while (lastPos != string::npos)
6551    {
6552      pos = nextAt + 1;
6553      nextAt = str.find("@", pos);
6554      if (nextAt == string::npos)
6555        pos = string::npos;
6556      else
6557        pos = str.rfind(" ", nextAt);
6558
6559      tokens.push_back(str.substr(lastPos, pos - lastPos));
6560      lastPos = str.find_first_not_of(" ", pos);
6561    }
6562 }
6563
6564
6565 struct sdt_uprobe_var_expanding_visitor: public var_expanding_visitor
6566 {
6567   enum regwidths {QI, QIh, HI, SI, DI};
6568   sdt_uprobe_var_expanding_visitor(systemtap_session& s,
6569                                    dwflpp& dw,
6570                                    int elf_machine,
6571                                    interned_string process_name,
6572                                    interned_string provider_name,
6573                                    interned_string probe_name,
6574                                    stap_sdt_probe_type probe_type,
6575                                    interned_string arg_string,
6576                                    int ac):
6577     var_expanding_visitor (s), dw (dw), elf_machine (elf_machine),
6578     process_name (process_name), provider_name (provider_name),
6579     probe_name (probe_name), probe_type (probe_type), arg_count ((unsigned) ac)
6580   {
6581     // sanity check that we're not somehow here for a kernel probe
6582     assert(is_user_module(process_name));
6583
6584     build_dwarf_registers();
6585
6586     need_debug_info = false;
6587     if (probe_type == uprobe3_type)
6588       {
6589         sdt_v3_tokenize(arg_string, arg_tokens);
6590         assert(arg_count <= 12);
6591       }
6592     else
6593       {
6594         tokenize(arg_string, arg_tokens, " ");
6595         assert(arg_count <= 10);
6596       }
6597   }
6598
6599   dwflpp& dw;
6600   int elf_machine;
6601   interned_string process_name;
6602   interned_string provider_name;
6603   interned_string probe_name;
6604   stap_sdt_probe_type probe_type;
6605   unsigned arg_count;
6606   vector<string> arg_tokens;
6607
6608   map<string, pair<unsigned,int> > dwarf_regs;
6609   string regnames;
6610   string percent_regnames;
6611
6612   bool need_debug_info;
6613
6614   void build_dwarf_registers();
6615   void visit_target_symbol (target_symbol* e);
6616   unsigned get_target_symbol_argno_and_validate (target_symbol* e);
6617   long parse_out_arg_precision(string& asmarg);
6618   char parse_out_arg_type(string& asmarg);
6619   expression* try_parse_arg_literal (target_symbol *e,
6620                                      const string& asmarg,
6621                                      long precision);
6622   expression* try_parse_arg_register (target_symbol *e,
6623                                       const string& asmarg,
6624                                       long precision);
6625   expression* try_parse_arg_offset_register (target_symbol *e,
6626                                              const string& asmarg,
6627                                              long precision);
6628   expression* try_parse_arg_register_pair (target_symbol *e,
6629                                            const string& asmarg,
6630                                            long precision);
6631   expression* try_parse_arg_effective_addr (target_symbol *e,
6632                                             const string& asmarg,
6633                                             long precision);
6634   expression* try_parse_arg_varname (target_symbol *e,
6635                                      const string& asmarg,
6636                                      long precision);
6637   void visit_target_symbol_arg (target_symbol* e);
6638   void visit_target_symbol_context (target_symbol* e);
6639   void visit_atvar_op (atvar_op* e);
6640   void visit_cast_op (cast_op* e);
6641 };
6642
6643 void
6644 sdt_uprobe_var_expanding_visitor::build_dwarf_registers ()
6645 {
6646   /* Register name mapping table depends on the elf machine of this particular
6647      probe target process/file, not upon the host.  So we can't just
6648      #ifdef _i686_ etc. */
6649
6650 #define DRI(name,num,width)  dwarf_regs[name]=make_pair(num,width)
6651   if (elf_machine == EM_X86_64) {
6652     DRI ("%rax", 0, DI); DRI ("%eax", 0, SI); DRI ("%ax", 0, HI);
6653        DRI ("%al", 0, QI); DRI ("%ah", 0, QIh);
6654     DRI ("%rdx", 1, DI); DRI ("%edx", 1, SI); DRI ("%dx", 1, HI);
6655        DRI ("%dl", 1, QI); DRI ("%dh", 1, QIh);
6656     DRI ("%rcx", 2, DI); DRI ("%ecx", 2, SI); DRI ("%cx", 2, HI);
6657        DRI ("%cl", 2, QI); DRI ("%ch", 2, QIh);
6658     DRI ("%rbx", 3, DI); DRI ("%ebx", 3, SI); DRI ("%bx", 3, HI);
6659        DRI ("%bl", 3, QI); DRI ("%bh", 3, QIh);
6660     DRI ("%rsi", 4, DI); DRI ("%esi", 4, SI); DRI ("%si", 4, HI);
6661        DRI ("%sil", 4, QI);
6662     DRI ("%rdi", 5, DI); DRI ("%edi", 5, SI); DRI ("%di", 5, HI);
6663        DRI ("%dil", 5, QI);
6664     DRI ("%rbp", 6, DI); DRI ("%ebp", 6, SI); DRI ("%bp", 6, HI);
6665        DRI ("%bpl", 6, QI);
6666     DRI ("%rsp", 7, DI); DRI ("%esp", 7, SI); DRI ("%sp", 7, HI);
6667        DRI ("%spl", 7, QI);
6668     DRI ("%r8", 8, DI); DRI ("%r8d", 8, SI); DRI ("%r8w", 8, HI);
6669        DRI ("%r8b", 8, QI);
6670     DRI ("%r9", 9, DI); DRI ("%r9d", 9, SI); DRI ("%r9w", 9, HI);
6671        DRI ("%r9b", 9, QI);
6672     DRI ("%r10", 10, DI); DRI ("%r10d", 10, SI); DRI ("%r10w", 10, HI);
6673        DRI ("%r10b", 10, QI);
6674     DRI ("%r11", 11, DI); DRI ("%r11d", 11, SI); DRI ("%r11w", 11, HI);
6675        DRI ("%r11b", 11, QI);
6676     DRI ("%r12", 12, DI); DRI ("%r12d", 12, SI); DRI ("%r12w", 12, HI);
6677        DRI ("%r12b", 12, QI);
6678     DRI ("%r13", 13, DI); DRI ("%r13d", 13, SI); DRI ("%r13w", 13, HI);
6679        DRI ("%r13b", 13, QI);
6680     DRI ("%r14", 14, DI); DRI ("%r14d", 14, SI); DRI ("%r14w", 14, HI);
6681        DRI ("%r14b", 14, QI);
6682     DRI ("%r15", 15, DI); DRI ("%r15d", 15, SI); DRI ("%r15w", 15, HI);
6683        DRI ("%r15b", 15, QI);
6684     DRI ("%rip", 16, DI); DRI ("%eip", 16, SI); DRI ("%ip", 16, HI);
6685     DRI ("%xmm0", 17, DI); DRI ("%xmm1", 18, DI);  DRI ("%xmm2", 19, DI); DRI ("%xmm3", 20, DI);
6686     DRI ("%xmm4", 21, DI); DRI ("%xmm5", 22, DI);  DRI ("%xmm6", 23, DI); DRI ("%xmm7", 24, DI);
6687     DRI ("%xmm8", 25, DI); DRI ("%xmm9", 26, DI);  DRI ("%xmm10", 27, DI); DRI ("%xmm11", 28, DI);
6688     DRI ("%xmm12", 29, DI); DRI ("%xmm13", 30, DI);  DRI ("%xmm14", 31, DI); DRI ("%xmm15", 32, DI);
6689     DRI ("%st0", 33, DI); DRI ("%st1", 34, DI);  DRI ("%st2", 35, DI); DRI ("%st3", 36, DI);
6690     DRI ("%st4", 37, DI); DRI ("%st5", 38, DI);  DRI ("%st6", 39, DI); DRI ("%st7", 40, DI);
6691   } else if (elf_machine == EM_386) {
6692     DRI ("%eax", 0, SI); DRI ("%ax", 0, HI); DRI ("%al", 0, QI);
6693        DRI ("%ah", 0, QIh);
6694     DRI ("%ecx", 1, SI); DRI ("%cx", 1, HI); DRI ("%cl", 1, QI);
6695        DRI ("%ch", 1, QIh);
6696     DRI ("%edx", 2, SI); DRI ("%dx", 2, HI); DRI ("%dl", 2, QI);
6697        DRI ("%dh", 2, QIh);
6698     DRI ("%ebx", 3, SI); DRI ("%bx", 3, HI); DRI ("%bl", 3, QI);
6699        DRI ("%bh", 3, QIh);
6700     DRI ("%esp", 4, SI); DRI ("%sp", 4, HI);
6701     DRI ("%ebp", 5, SI); DRI ("%bp", 5, HI);
6702     DRI ("%esi", 6, SI); DRI ("%si", 6, HI); DRI ("%sil", 6, QI);
6703     DRI ("%edi", 7, SI); DRI ("%di", 7, HI); DRI ("%dil", 7, QI);
6704   } else if (elf_machine == EM_PPC || elf_machine == EM_PPC64) {
6705     DRI ("%r0", 0, DI);
6706     DRI ("%r1", 1, DI);
6707     DRI ("%r2", 2, DI);
6708     DRI ("%r3", 3, DI);
6709     DRI ("%r4", 4, DI);
6710     DRI ("%r5", 5, DI);
6711     DRI ("%r6", 6, DI);
6712     DRI ("%r7", 7, DI);
6713     DRI ("%r8", 8, DI);
6714     DRI ("%r9", 9, DI);
6715     DRI ("%r10", 10, DI);
6716     DRI ("%r11", 11, DI);
6717     DRI ("%r12", 12, DI);
6718     DRI ("%r13", 13, DI);
6719     DRI ("%r14", 14, DI);
6720     DRI ("%r15", 15, DI);
6721     DRI ("%r16", 16, DI);
6722     DRI ("%r17", 17, DI);
6723     DRI ("%r18", 18, DI);
6724     DRI ("%r19", 19, DI);
6725     DRI ("%r20", 20, DI);
6726     DRI ("%r21", 21, DI);
6727     DRI ("%r22", 22, DI);
6728     DRI ("%r23", 23, DI);
6729     DRI ("%r24", 24, DI);
6730     DRI ("%r25", 25, DI);
6731     DRI ("%r26", 26, DI);
6732     DRI ("%r27", 27, DI);
6733     DRI ("%r28", 28, DI);
6734     DRI ("%r29", 29, DI);
6735     DRI ("%r30", 30, DI);
6736     DRI ("%r31", 31, DI);
6737     // PR11821: unadorned register "names" without -mregnames
6738     DRI ("0", 0, DI);
6739     DRI ("1", 1, DI);
6740     DRI ("2", 2, DI);
6741     DRI ("3", 3, DI);
6742     DRI ("4", 4, DI);
6743     DRI ("5", 5, DI);
6744     DRI ("6", 6, DI);
6745     DRI ("7", 7, DI);
6746     DRI ("8", 8, DI);
6747     DRI ("9", 9, DI);
6748     DRI ("10", 10, DI);
6749     DRI ("11", 11, DI);
6750     DRI ("12", 12, DI);
6751     DRI ("13", 13, DI);
6752     DRI ("14", 14, DI);
6753     DRI ("15", 15, DI);
6754     DRI ("16", 16, DI);
6755     DRI ("17", 17, DI);
6756     DRI ("18", 18, DI);
6757     DRI ("19", 19, DI);
6758     DRI ("20", 20, DI);
6759     DRI ("21", 21, DI);
6760     DRI ("22", 22, DI);
6761     DRI ("23", 23, DI);
6762     DRI ("24", 24, DI);
6763     DRI ("25", 25, DI);
6764     DRI ("26", 26, DI);
6765     DRI ("27", 27, DI);
6766     DRI ("28", 28, DI);
6767     DRI ("29", 29, DI);
6768     DRI ("30", 30, DI);
6769     DRI ("31", 31, DI);
6770   } else if (elf_machine == EM_S390) {
6771     DRI ("%r0", 0, DI);
6772     DRI ("%r1", 1, DI);
6773     DRI ("%r2", 2, DI);
6774     DRI ("%r3", 3, DI);
6775     DRI ("%r4", 4, DI);
6776     DRI ("%r5", 5, DI);
6777     DRI ("%r6", 6, DI);
6778     DRI ("%r7", 7, DI);
6779     DRI ("%r8", 8, DI);
6780     DRI ("%r9", 9, DI);
6781     DRI ("%r10", 10, DI);
6782     DRI ("%r11", 11, DI);
6783     DRI ("%r12", 12, DI);
6784     DRI ("%r13", 13, DI);
6785     DRI ("%r14", 14, DI);
6786     DRI ("%r15", 15, DI);
6787     DRI ("%f0", 16, DI);
6788     DRI ("%f1", 17, DI);
6789     DRI ("%f2", 18, DI);
6790     DRI ("%f3", 19, DI);
6791     DRI ("%f4", 20, DI);
6792     DRI ("%f5", 21, DI);
6793     DRI ("%f6", 22, DI);
6794     DRI ("%f7", 23, DI);
6795     DRI ("%f8", 24, DI);
6796     DRI ("%f9", 25, DI);
6797     DRI ("%f10", 26, DI);
6798     DRI ("%f11", 27, DI);
6799     DRI ("%f12", 28, DI);
6800     DRI ("%f13", 29, DI);
6801     DRI ("%f14", 30, DI);
6802     DRI ("%f15", 31, DI);
6803 } else if (elf_machine == EM_ARM) {
6804     DRI ("r0", 0, SI);
6805     DRI ("r1", 1, SI);
6806     DRI ("r2", 2, SI);
6807     DRI ("r3", 3, SI);
6808     DRI ("r4", 4, SI);
6809     DRI ("r5", 5, SI);
6810     DRI ("r6", 6, SI);
6811     DRI ("r7", 7, SI);
6812     DRI ("r8", 8, SI);
6813     DRI ("r9", 9, SI);
6814     DRI ("r10", 10, SI); DRI ("sl", 10, SI);
6815     DRI ("fp", 11, SI);
6816     DRI ("ip", 12, SI);
6817     DRI ("sp", 13, SI);
6818     DRI ("lr", 14, SI);
6819     DRI ("pc", 15, SI);
6820   } else if (elf_machine == EM_AARCH64) {
6821     DRI ("x0", 0, DI); DRI ("w0", 0, SI);
6822     DRI ("x1", 1, DI); DRI ("w1", 1, SI);
6823     DRI ("x2", 2, DI); DRI ("w2", 2, SI);
6824     DRI ("x3", 3, DI); DRI ("w3", 3, SI);
6825     DRI ("x4", 4, DI); DRI ("w4", 4, SI);
6826     DRI ("x5", 5, DI); DRI ("w5", 5, SI);
6827     DRI ("x6", 6, DI); DRI ("w6", 6, SI);
6828     DRI ("x7", 7, DI); DRI ("w7", 7, SI);
6829     DRI ("x8", 8, DI); DRI ("w8", 8, SI);
6830     DRI ("x9", 9, DI); DRI ("w9", 9, SI);
6831     DRI ("x10", 10, DI); DRI ("w10", 10, SI);
6832     DRI ("x11", 11, DI); DRI ("w11", 11, SI);
6833     DRI ("x12", 12, DI); DRI ("w12", 12, SI);
6834     DRI ("x13", 13, DI); DRI ("w13", 13, SI);
6835     DRI ("x14", 14, DI); DRI ("w14", 14, SI);
6836     DRI ("x15", 15, DI); DRI ("w15", 15, SI);
6837     DRI ("x16", 16, DI); DRI ("w16", 16, SI);
6838     DRI ("x17", 17, DI); DRI ("w17", 17, SI);
6839     DRI ("x18", 18, DI); DRI ("w18", 18, SI);
6840     DRI ("x19", 19, DI); DRI ("w19", 19, SI);
6841     DRI ("x20", 20, DI); DRI ("w20", 20, SI);
6842     DRI ("x21", 21, DI); DRI ("w21", 21, SI);
6843     DRI ("x22", 22, DI); DRI ("w22", 22, SI);
6844     DRI ("x23", 23, DI); DRI ("w23", 23, SI);
6845     DRI ("x24", 24, DI); DRI ("w24", 24, SI);
6846     DRI ("x25", 25, DI); DRI ("w25", 25, SI);
6847     DRI ("x26", 26, DI); DRI ("w26", 26, SI);
6848     DRI ("x27", 27, DI); DRI ("w27", 27, SI);
6849     DRI ("x28", 28, DI); DRI ("w28", 28, SI);
6850     DRI ("x29", 29, DI); DRI ("w29", 29, SI);
6851     DRI ("x30", 30, DI); DRI ("w30", 30, SI);
6852     DRI ("sp", 31, DI);
6853     DRI ("v0", 64, DI); DRI ("v1", 65, DI);  DRI ("v2", 66, DI); DRI ("v3", 67, DI);
6854     DRI ("v4", 68, DI); DRI ("v5", 69, DI);  DRI ("v6", 70, DI); DRI ("v7", 71, DI);
6855     DRI ("v8", 72, DI); DRI ("v9", 73, DI);  DRI ("v10", 74, DI); DRI ("v11", 75, DI);
6856     DRI ("v12", 76, DI); DRI ("v13", 77, DI);  DRI ("v14", 78, DI); DRI ("v15", 79, DI);
6857     DRI ("v16", 80, DI); DRI ("v17", 81, DI);  DRI ("v18", 82, DI); DRI ("v19", 83, DI);
6858     DRI ("v20", 84, DI); DRI ("v21", 85, DI);  DRI ("v22", 86, DI); DRI ("v23", 87, DI);
6859     DRI ("v24", 88, DI); DRI ("25", 89, DI);  DRI ("v26", 90, DI); DRI ("v27", 91, DI);
6860     DRI ("v28", 92, DI); DRI ("v29", 93, DI);  DRI ("v30", 94, DI); DRI ("v31", 95, DI);
6861   } else if (elf_machine == EM_RISCV) {
6862     Dwarf_Addr bias;
6863     Elf* elf = (dwfl_module_getelf (dw.mod_info->mod, &bias));
6864     enum regwidths riscv_reg_width =
6865         (gelf_getclass (elf) == ELFCLASS32) ? SI : DI;
6866     DRI ("x0", 0, riscv_reg_width); DRI ("zero", 0, riscv_reg_width);
6867     DRI ("x1", 1, riscv_reg_width); DRI ("ra", 1, riscv_reg_width);
6868     DRI ("x2", 2, riscv_reg_width); DRI ("sp", 2, riscv_reg_width);
6869     DRI ("x3", 3, riscv_reg_width); DRI ("gp", 3, riscv_reg_width);
6870     DRI ("x4", 4, riscv_reg_width); DRI ("tp", 4, riscv_reg_width);
6871     DRI ("x5", 5, riscv_reg_width); DRI ("t0", 5, riscv_reg_width);
6872     DRI ("x6", 6, riscv_reg_width); DRI ("t1", 6, riscv_reg_width);
6873     DRI ("x7", 7, riscv_reg_width); DRI ("t2", 7, riscv_reg_width);
6874     DRI ("x8", 8, riscv_reg_width); DRI ("s0", 8, riscv_reg_width); DRI ("fp", 8, riscv_reg_width);
6875     DRI ("x9", 9, riscv_reg_width); DRI ("s1", 9, riscv_reg_width);
6876     DRI ("x10", 10, riscv_reg_width); DRI ("a0", 10, riscv_reg_width);
6877     DRI ("x11", 11, riscv_reg_width); DRI ("a1", 11, riscv_reg_width);
6878     DRI ("x12", 12, riscv_reg_width); DRI ("a2", 12, riscv_reg_width);
6879     DRI ("x13", 13, riscv_reg_width); DRI ("a3", 13, riscv_reg_width);
6880     DRI ("x14", 14, riscv_reg_width); DRI ("a4", 14, riscv_reg_width);
6881     DRI ("x15", 15, riscv_reg_width); DRI ("a5", 15, riscv_reg_width);
6882     DRI ("x16", 16, riscv_reg_width); DRI ("a6", 16, riscv_reg_width);
6883     DRI ("x17", 17, riscv_reg_width); DRI ("a7", 17, riscv_reg_width);
6884     DRI ("x18", 18, riscv_reg_width); DRI ("s2", 18, riscv_reg_width);
6885     DRI ("x19", 19, riscv_reg_width); DRI ("s3", 19, riscv_reg_width);
6886     DRI ("x20", 20, riscv_reg_width); DRI ("s4", 20, riscv_reg_width);
6887     DRI ("x21", 21, riscv_reg_width); DRI ("s5", 21, riscv_reg_width);
6888     DRI ("x22", 22, riscv_reg_width); DRI ("s6", 22, riscv_reg_width);
6889     DRI ("x23", 23, riscv_reg_width); DRI ("s7", 23, riscv_reg_width);
6890     DRI ("x24", 24, riscv_reg_width); DRI ("s8", 24, riscv_reg_width);
6891     DRI ("x25", 25, riscv_reg_width); DRI ("s9", 25, riscv_reg_width);
6892     DRI ("x26", 26, riscv_reg_width); DRI ("s10", 26, riscv_reg_width);
6893     DRI ("x27", 27, riscv_reg_width); DRI ("s11", 27, riscv_reg_width);
6894     DRI ("x28", 28, riscv_reg_width); DRI ("t3", 28, riscv_reg_width);
6895     DRI ("x29", 29, riscv_reg_width); DRI ("t4", 29, riscv_reg_width);
6896     DRI ("x30", 30, riscv_reg_width); DRI ("t5", 30, riscv_reg_width);
6897     DRI ("x31", 31, riscv_reg_width); DRI ("t6", 31, riscv_reg_width);
6898   } else if (elf_machine == EM_MIPS) {
6899     Dwarf_Addr bias;
6900     Elf* elf = (dwfl_module_getelf (dw.mod_info->mod, &bias));
6901     enum regwidths mips_reg_width =
6902         (gelf_getclass (elf) == ELFCLASS32) ? SI : DI;
6903     DRI ("$zero", 0, mips_reg_width);
6904     DRI ("$at", 1, mips_reg_width);
6905     DRI ("$v0", 2, mips_reg_width);
6906     DRI ("$v1", 3, mips_reg_width);
6907     DRI ("$a0", 4, mips_reg_width);
6908     DRI ("$a1", 5, mips_reg_width);
6909     DRI ("$a2", 6, mips_reg_width);
6910     DRI ("$a3", 7, mips_reg_width);
6911     DRI ("$a4", 8, mips_reg_width);
6912     DRI ("$a5", 9, mips_reg_width);
6913     DRI ("$a6", 10, mips_reg_width);
6914     DRI ("$a7", 11, mips_reg_width);
6915     DRI ("$t0", 12, mips_reg_width);
6916     DRI ("$t1", 13, mips_reg_width);
6917     DRI ("$t2", 14, mips_reg_width);
6918     DRI ("$t3", 15, mips_reg_width);
6919     DRI ("$s0", 16, mips_reg_width);
6920     DRI ("$s1", 17, mips_reg_width);
6921     DRI ("$s2", 18, mips_reg_width);
6922     DRI ("$s3", 19, mips_reg_width);
6923     DRI ("$s4", 20, mips_reg_width);
6924     DRI ("$s5", 21, mips_reg_width);
6925     DRI ("$s6", 22, mips_reg_width);
6926     DRI ("$s7", 23, mips_reg_width);
6927     DRI ("$t8", 24, mips_reg_width);
6928     DRI ("$t9", 25, mips_reg_width);
6929     DRI ("$k0", 26, mips_reg_width);
6930     DRI ("$k1", 27, mips_reg_width);
6931     DRI ("$gp", 28, mips_reg_width);
6932     DRI ("$sp", 29, mips_reg_width);
6933     DRI ("$s8", 30, mips_reg_width);
6934     DRI ("$fp", 30, mips_reg_width);
6935     DRI ("$ra", 31, mips_reg_width);
6936
6937     DRI ("$0", 0, mips_reg_width);
6938     DRI ("$1", 1, mips_reg_width);
6939     DRI ("$2", 2, mips_reg_width);
6940     DRI ("$3", 3, mips_reg_width);
6941     DRI ("$4", 4, mips_reg_width);
6942     DRI ("$5", 5, mips_reg_width);
6943     DRI ("$6", 6, mips_reg_width);
6944     DRI ("$7", 7, mips_reg_width);
6945     DRI ("$8", 8, mips_reg_width);
6946     DRI ("$9", 9, mips_reg_width);
6947     DRI ("$10", 10, mips_reg_width);
6948     DRI ("$11", 11, mips_reg_width);
6949     DRI ("$12", 12, mips_reg_width);
6950     DRI ("$13", 13, mips_reg_width);
6951     DRI ("$14", 14, mips_reg_width);
6952     DRI ("$15", 15, mips_reg_width);
6953     DRI ("$16", 16, mips_reg_width);
6954     DRI ("$17", 17, mips_reg_width);
6955     DRI ("$18", 18, mips_reg_width);
6956     DRI ("$19", 19, mips_reg_width);
6957     DRI ("$20", 20, mips_reg_width);
6958     DRI ("$21", 21, mips_reg_width);
6959     DRI ("$22", 22, mips_reg_width);
6960     DRI ("$23", 23, mips_reg_width);
6961     DRI ("$24", 24, mips_reg_width);
6962     DRI ("$25", 25, mips_reg_width);
6963     DRI ("$26", 26, mips_reg_width);
6964     DRI ("$27", 27, mips_reg_width);
6965     DRI ("$28", 28, mips_reg_width);
6966     DRI ("$29", 29, mips_reg_width);
6967     DRI ("$30", 30, mips_reg_width);
6968     DRI ("$31", 31, mips_reg_width);
6969   } else if (arg_count) {
6970     /* permit this case; just fall back to dwarf */
6971   }
6972 #undef DRI
6973
6974   // Build regex pieces out of the known dwarf_regs.  We keep two separate
6975   // lists: ones with the % prefix (and thus unambigiuous even despite PR11821),
6976   // and ones with no prefix (and thus only usable in unambiguous contexts).
6977   for (auto ri = dwarf_regs.cbegin(); ri != dwarf_regs.cend(); ri++)
6978     {
6979       string regname = ri->first;
6980       assert (regname != "");
6981       // for register names starting with '$' convert the dollar to a
6982       // '\$' as otherwise the regexp tries to match end-of-line
6983       if (regname[0]=='$')
6984         regname = string("\\")+regname;
6985       regnames += string("|")+regname;
6986       if (regname[0]=='%')
6987         percent_regnames += string("|")+regname;
6988     }
6989
6990   // clip off leading |
6991   if (regnames != "")
6992     regnames = regnames.substr(1);
6993   if (percent_regnames != "")
6994     percent_regnames = percent_regnames.substr(1);
6995 }
6996
6997 void
6998 sdt_uprobe_var_expanding_visitor::visit_target_symbol_context (target_symbol* e)
6999 {
7000   if (e->addressof)
7001     throw SEMANTIC_ERROR(_("cannot take address of context variable"), e->tok);
7002
7003   if (e->name == "$$name")
7004     {
7005       literal_string *myname = new literal_string (probe_name);
7006       myname->tok = e->tok;
7007       provide(myname);
7008       return;
7009     }
7010
7011   else if (e->name == "$$provider")
7012     {
7013       literal_string *myname = new literal_string (provider_name);
7014       myname->tok = e->tok;
7015       provide(myname);
7016       return;
7017     }
7018
7019   else if (e->name == "$$vars" || e->name == "$$parms")
7020     {
7021       e->assert_no_components("sdt", true);
7022
7023       // Convert $$vars to sprintf of a list of vars which we recursively evaluate
7024
7025       print_format* pf = print_format::create(e->tok, "sprintf");
7026
7027       for (unsigned i = 1; i <= arg_count; ++i)
7028         {
7029           if (i > 1)
7030             pf->raw_components += " ";
7031           target_symbol *tsym = new target_symbol;
7032           tsym->tok = e->tok;
7033           tsym->name = "$arg" + lex_cast(i);
7034           pf->raw_components += tsym->name;
7035           tsym->components = e->components;
7036
7037           expression *texp = require<expression> (tsym);
7038           if (e->check_pretty_print ())
7039             pf->raw_components += "=%s";
7040           else
7041             pf->raw_components += "=%#x";
7042           pf->args.push_back(texp);
7043         }
7044
7045       pf->components = print_format::string_to_components(pf->raw_components);
7046       provide (pf);
7047     }
7048   else
7049     assert(0); // shouldn't get here
7050 }
7051
7052 unsigned
7053 sdt_uprobe_var_expanding_visitor::get_target_symbol_argno_and_validate (target_symbol *e)
7054 {
7055   // parsing
7056   unsigned argno = 0;
7057   if (startswith(e->name, "$arg"))
7058     {
7059       try
7060         {
7061           argno = lex_cast<unsigned>(e->name.substr(4).to_string());
7062         }
7063       catch (const runtime_error& f)
7064         {
7065           // non-integral $arg suffix: e.g. $argKKKSDF
7066           argno = 0;
7067         }
7068     }
7069
7070   // validation
7071   if (arg_count == 0 || // a sdt.h variant without .probe-stored arg_count
7072       argno < 1 || argno > arg_count) // a $argN with out-of-range N
7073     {
7074       // NB: Either
7075       // 1) uprobe1_type $argN or $FOO (we don't know the arg_count)
7076       // 2) uprobe2_type $FOO (no probe args)
7077       // both of which get resolved later.
7078       // Throw it now, and it might be resolved by DWARF later.
7079       need_debug_info = true;
7080       throw SEMANTIC_ERROR(_("target-symbol requires debuginfo"), e->tok);
7081     }
7082   assert (arg_tokens.size() >= argno);
7083   return argno;
7084 }
7085
7086 long
7087 sdt_uprobe_var_expanding_visitor::parse_out_arg_precision(string& asmarg)
7088 {
7089   long precision;
7090   if (asmarg.find('@') != string::npos)
7091     {
7092       long at_or_type = asmarg.find_first_of("@f");
7093       precision = lex_cast<int>(asmarg.substr(0, at_or_type));
7094       asmarg = asmarg.substr(at_or_type);
7095     }
7096   else
7097     {
7098       // V1/V2 do not have precision field so default to signed long
7099       // V3 asm does not have precision field so default to unsigned long
7100       if (probe_type == uprobe3_type)
7101         precision = sizeof(long); // this is an asm probe
7102       else
7103         precision = -sizeof(long);
7104     }
7105   return precision;
7106 }
7107
7108 char
7109 sdt_uprobe_var_expanding_visitor::parse_out_arg_type(string& asmarg)
7110 {
7111   // Reference: __builtin_classify_type
7112   char type;
7113   if (asmarg.find('@') != string::npos)
7114     {
7115       type = asmarg[0];
7116       asmarg = asmarg.substr(asmarg.find('@')+1);
7117     }
7118   else
7119     type = 'i';
7120   return type;
7121 }
7122
7123 expression*
7124 sdt_uprobe_var_expanding_visitor::try_parse_arg_literal (target_symbol *e,
7125                                                          const string& asmarg,
7126                                                          long precision)
7127 {
7128   expression *argexpr = NULL;
7129
7130   // Here, we test for a numeric literal.
7131   // Only accept (signed) decimals throughout. XXX
7132
7133   // PR11821.  NB: on powerpc, literals are not prefixed with $,
7134   // so this regex does not match.  But that's OK, since without
7135   // -mregnames, we can't tell them apart from register numbers
7136   // anyway.  With -mregnames, we could, if gcc somehow
7137   // communicated to us the presence of that option, but alas it
7138   // doesn't.  http://gcc.gnu.org/PR44995.
7139   vector<string> matches;
7140   string regexp;
7141
7142   if (elf_machine == EM_AARCH64 || elf_machine == EM_MIPS) {
7143     regexp = "^([-]?[0-9][0-9]*)$";
7144   } else {
7145     regexp = "^[i\\$#]([-]?[0-9][0-9]*)$";
7146   }
7147
7148   if (!regexp_match (asmarg, regexp, matches)) {
7149       string sn =matches[1];
7150       int64_t n;
7151
7152       // We have to pay attention to the size & sign, as gcc sometimes
7153       // propagates constants that don't quite match, like a negative
7154       // value to fill an unsigned type.
7155       // NB: let it throw if something happens
7156       switch (precision)
7157         {
7158         case -1: n = lex_cast<  int8_t>(sn); break;
7159         case  1: n = lex_cast< uint8_t>(sn); break;
7160         case -2: n = lex_cast< int16_t>(sn); break;
7161         case  2: n = lex_cast<uint16_t>(sn); break;
7162         case -4: n = lex_cast< int32_t>(sn); break;
7163         case  4: n = lex_cast<uint32_t>(sn); break;
7164         default:
7165         case -8: n = lex_cast< int64_t>(sn); break;
7166         case  8: n = lex_cast<uint64_t>(sn); break;
7167         }
7168
7169       literal_number* ln = new literal_number(n);
7170       ln->tok = e->tok;
7171       argexpr = ln;
7172     }
7173
7174   return argexpr;
7175 }
7176
7177 expression*
7178 sdt_uprobe_var_expanding_visitor::try_parse_arg_register (target_symbol *e,
7179                                                           const string& asmarg,
7180                                                           long precision)
7181 {
7182   expression *argexpr = NULL;
7183
7184   // test for REGISTER
7185   // NB: Because PR11821, we must use percent_regnames here.
7186   string regexp;
7187   if (elf_machine == EM_PPC || elf_machine == EM_PPC64
7188      || elf_machine == EM_ARM || elf_machine == EM_AARCH64
7189      || elf_machine == EM_RISCV)
7190     regexp = "^(" + regnames + ")$";
7191   else
7192     regexp = "^(" + percent_regnames + ")$";
7193
7194   vector<string> matches;
7195   if (!regexp_match(asmarg, regexp, matches))
7196     {
7197       string regname = matches[1];
7198       auto ri = dwarf_regs.find (regname);
7199       if (ri != dwarf_regs.end()) // known register
7200         {
7201           embedded_expr *get_arg1 = new embedded_expr;
7202           string width_adjust;
7203           switch (ri->second.second)
7204             {
7205             case QI: width_adjust = ") & 0xff)"; break;
7206             case QIh: width_adjust = ">>8) & 0xff)"; break;
7207             case HI:
7208               // preserve 16 bit register signness
7209               width_adjust = ") & 0xffff)";
7210               if (precision < 0)
7211                 width_adjust += " << 48 >> 48";
7212               break;
7213             case SI:
7214               // preserve 32 bit register signness
7215               width_adjust = ") & 0xffffffff)";
7216               if (precision < 0)
7217                 width_adjust += " << 32 >> 32";
7218               break;
7219             default: width_adjust = "))";
7220             }
7221           string type = "";
7222           if (probe_type == uprobe3_type)
7223             type = (precision < 0
7224                     ? "(int" : "(uint") + lex_cast(abs(precision) * 8) + "_t)";
7225           type = type + "((";
7226           get_arg1->tok = e->tok;
7227           get_arg1->code = string("/* unprivileged */ /* pure */")
7228             + string(" ((int64_t)") + type
7229             + string("u_fetch_register(")
7230             + lex_cast(dwarf_regs[regname].first) + string("))")
7231             + width_adjust;
7232           argexpr = get_arg1;
7233         }
7234     }
7235   return argexpr;
7236 }
7237
7238 static string
7239 precision_to_function(long precision)
7240 {
7241   switch (precision)
7242     {
7243     case 1: case -1:
7244       return "user_int8";
7245     case 2:
7246       return "user_uint16";
7247     case -2:
7248       return "user_int16";
7249     case 4:
7250       return "user_uint32";
7251     case -4:
7252       return "user_int32";
7253     case 8: case -8:
7254       return "user_int64";
7255     default:
7256       return "user_long";
7257     }
7258 }
7259
7260 expression*
7261 sdt_uprobe_var_expanding_visitor::try_parse_arg_offset_register (target_symbol *e,
7262                                                                  const string& asmarg,
7263                                                                  long precision)
7264 {
7265   expression *argexpr = NULL;
7266
7267   // test for OFFSET(REGISTER) where OFFSET is +-N+-N+-N
7268   // NB: Despite PR11821, we can use regnames here, since the parentheses
7269   // make things unambiguous. (Note: gdb/stap-probe.c also parses this)
7270   // On ARM test for [REGISTER, OFFSET]
7271
7272   string regexp;
7273   int reg, offset1;
7274   if (elf_machine == EM_ARM || elf_machine == EM_AARCH64)
7275     {
7276       regexp = "^\\[(" + regnames + ")(,[ ]*[#]?([+-]?[0-9]+)([+-][0-9]*)?([+-][0-9]*)?)?\\]$";
7277       reg = 1;
7278       offset1 = 3;
7279     }
7280   else
7281     {
7282       regexp = "^([+-]?[0-9]*)([+-][0-9]*)?([+-][0-9]*)?[(](" + regnames + ")[)]$";
7283       reg = 4;
7284       offset1 = 1;
7285     }
7286
7287   vector<string> matches;
7288   if (!regexp_match(asmarg, regexp, matches))
7289     {
7290       string regname;
7291       int64_t disp = 0;
7292       if (matches[reg].length())
7293         regname = matches[reg];
7294       if (dwarf_regs.find (regname) == dwarf_regs.end())
7295         throw SEMANTIC_ERROR(_F("unrecognized register '%s'", regname.c_str()));
7296
7297       for (int i=offset1; i <= (offset1 + 2); i++)
7298         if (matches[i].length())
7299           // should decode positive/negative hex/decimal
7300           // NB: let it throw if something happens
7301           disp += lex_cast<int64_t>(matches[i]);
7302
7303       // synthesize user_long(%{fetch_register(R)%} + D)
7304       embedded_expr *get_arg1 = new embedded_expr;
7305       get_arg1->tok = e->tok;
7306       get_arg1->code = string("/* unprivileged */ /* pure */")
7307         + string("u_fetch_register(")
7308         + lex_cast(dwarf_regs[regname].first) + string(")");
7309       // XXX: may we ever need to cast that to a narrower type?
7310
7311       literal_number* inc = new literal_number(disp);
7312       inc->tok = e->tok;
7313
7314       binary_expression *be = new binary_expression;
7315       be->tok = e->tok;
7316       be->left = get_arg1;
7317       be->op = "+";
7318       be->right = inc;
7319
7320       functioncall *fc = new functioncall;
7321       fc->function = precision_to_function(precision);
7322       fc->tok = e->tok;
7323       fc->args.push_back(be);
7324
7325       argexpr = fc;
7326     }
7327
7328   return argexpr;
7329 }
7330
7331 expression*
7332 sdt_uprobe_var_expanding_visitor::try_parse_arg_register_pair (target_symbol *e,
7333                                                                const string& asmarg,
7334                                                                long precision)
7335 {
7336
7337   // BZ1613157: for powerpc, accept "R,R", as an alias of "(Ra,Rb)"
7338   if (sess.architecture.substr(0,7) == "powerpc")
7339     {
7340       // test for BASE_REGISTER,INDEX_REGISTER
7341       string regexp = "^(" + regnames + "),(" + regnames + ")$";
7342       vector<string> matches;
7343       if (!regexp_match(asmarg, regexp, matches))
7344         {
7345           // delegate to parenthetic syntax
7346           return try_parse_arg_effective_addr (e, string("(")+asmarg+string(")"), precision);
7347         }
7348     }
7349   else if (elf_machine == EM_AARCH64) // BZ1788648
7350     {
7351       // test for [BASE_REGISTER, INDEX_REGISTER]
7352       string regexp = "^\\[(" + regnames + "), (" + regnames + ")\\]$";
7353       vector<string> matches;
7354       if (!regexp_match(asmarg, regexp, matches))
7355         {
7356           // delegate to parenthetic syntax
7357           string regnames = asmarg.substr(1, asmarg.length()-2); // trim the []
7358           return try_parse_arg_effective_addr (e, string("(")+regnames+string(")"), precision); // add the ()
7359         }
7360     }
7361
7362   return NULL;
7363 }
7364
7365 expression*
7366 sdt_uprobe_var_expanding_visitor::try_parse_arg_effective_addr (target_symbol *e,
7367                                                                 const string& asmarg,
7368                                                                 long precision)
7369 {
7370   expression *argexpr = NULL;
7371
7372   // test for OFFSET(BASE_REGISTER,INDEX_REGISTER[,SCALE]) where OFFSET is +-N+-N+-N
7373   // NB: Despite PR11821, we can use regnames here, since the parentheses
7374   // make things unambiguous. (Note: gdb/stap-probe.c also parses this)
7375   string regexp = "^([+-]?[0-9]*)([+-][0-9]*)?([+-][0-9]*)?[(](" + regnames + "),[ ]?(" +
7376                                                                    regnames + ")(,[1248])?[)]$";
7377   vector<string> matches;
7378   if (!regexp_match(asmarg, regexp, matches))
7379     {
7380       string baseregname;
7381       string indexregname;
7382       int64_t disp = 0;
7383       short scale = 1;
7384
7385       if (matches[6].length())
7386         // NB: let it throw if we can't cast
7387         scale = lex_cast<short>(matches[6].substr(1)); // NB: skip the comma!
7388
7389       if (matches[4].length())
7390         baseregname = matches[4];
7391       if (dwarf_regs.find (baseregname) == dwarf_regs.end())
7392         throw SEMANTIC_ERROR(_F("unrecognized base register '%s'", baseregname.c_str()));
7393
7394       if (matches[5].length())
7395         indexregname = matches[5];
7396       if (dwarf_regs.find (indexregname) == dwarf_regs.end())
7397         throw SEMANTIC_ERROR(_F("unrecognized index register '%s'", indexregname.c_str()));
7398
7399       for (int i = 1; i <= 3; i++) // up to three OFFSET terms
7400         if (matches[i].length())
7401           // should decode positive/negative hex/decimal
7402           // NB: let it throw if something happens
7403           disp += lex_cast<int64_t>(matches[i]);
7404
7405       // synthesize user_long(%{fetch_register(R1)+fetch_register(R2)*N%} + D)
7406
7407       embedded_expr *get_arg1 = new embedded_expr;
7408       string regfn = "u_fetch_register";
7409
7410       get_arg1->tok = e->tok;
7411       get_arg1->code = string("/* unprivileged */ /* pure */")
7412         + regfn + string("(")+lex_cast(dwarf_regs[baseregname].first)+string(")")
7413         + string("+(")
7414         + regfn + string("(")+lex_cast(dwarf_regs[indexregname].first)+string(")")
7415         + string("*")
7416         + lex_cast(scale)
7417         + string(")");
7418
7419       // NB: could plop this +DISPLACEMENT bit into the embedded-c expression too
7420       literal_number* inc = new literal_number(disp);
7421       inc->tok = e->tok;
7422
7423       binary_expression *be = new binary_expression;
7424       be->tok = e->tok;
7425       be->left = get_arg1;
7426       be->op = "+";
7427       be->right = inc;
7428
7429       functioncall *fc = new functioncall;
7430       fc->function = precision_to_function(precision);
7431       fc->tok = e->tok;
7432       fc->args.push_back(be);
7433
7434       argexpr = fc;
7435     }
7436
7437   return argexpr;
7438 }
7439
7440
7441 expression*
7442 sdt_uprobe_var_expanding_visitor::try_parse_arg_varname (target_symbol *e,
7443                                                          const string& asmarg,
7444                                                          long precision)
7445 {
7446   static unsigned tick = 0;
7447   expression *argexpr = NULL;
7448
7449   // test for [OFF+]VARNAME[+OFF][(REGISTER)], where VARNAME is a variable
7450   // name. NB: Despite PR11821, we can use regnames here, since the parentheses
7451   // make things unambiguous.
7452   string regex = "^(([0-9]+)[+])?([a-zA-Z_][a-zA-Z0-9_]*)([+][0-9]+)?([(]("
7453                  + regnames + ")[)])?$";
7454   vector<string> matches;
7455   if (!regexp_match(asmarg, regex, matches))
7456     {
7457       assert(matches.size() >= 4);
7458       interned_string varname = matches[3];
7459
7460       // OFF can be before VARNAME (put in matches[2]) or after (put in
7461       // matches[4]) (or both?). Seems like in most cases it comes after,
7462       // unless the code was compiled with -fPIC.
7463       int64_t offset = 0;
7464       if (!matches[2].empty())
7465         offset += lex_cast<int64_t>(matches[2]);
7466       if (matches.size() >= 5 && !matches[4].empty())
7467         offset += lex_cast<int64_t>(matches[4]);
7468
7469       string regname;
7470       if (matches.size() >= 7)
7471         regname = matches[6];
7472
7473       // If it's just VARNAME, then proceed. If it's VARNAME(REGISTER), then
7474       // only proceed if it's RIP-relative addressing on x86_64.
7475       if (regname.empty() || (regname == "%rip" && elf_machine == EM_X86_64))
7476         {
7477           dw.mod_info->get_symtab();
7478           if (dw.mod_info->symtab_status != info_present)
7479             throw SEMANTIC_ERROR(_("can't retrieve symbol table"));
7480
7481           assert(dw.mod_info->sym_table);
7482           unordered_map<interned_string, Dwarf_Addr>& globals = dw.mod_info->sym_table->globals;
7483           unordered_map<interned_string, Dwarf_Addr>& locals = dw.mod_info->sym_table->locals;
7484           Dwarf_Addr addr = 0;
7485
7486           // check symtab locals then globals
7487           if (locals.count(varname))
7488             addr = locals[varname];
7489           if (globals.count(varname))
7490             addr = globals[varname];
7491
7492           if (addr)
7493             {
7494               // add whatever offset is in the operand
7495               addr += offset;
7496
7497               // adjust for dw bias because relocate_address() expects a
7498               // libdw address and this addr is from the symtab
7499               dw.get_module_dwarf(false, false);
7500               addr -= dw.module_bias;
7501
7502               interned_string reloc_section;
7503               Dwarf_Addr reloc_addr = dw.relocate_address(addr, reloc_section);
7504
7505               // OK, we have an address for the variable. Let's create a
7506               // function that will just relocate it at runtime, and then
7507               // call user_[u]int*() on the address it returns.
7508
7509               functioncall *user_int_call = new functioncall;
7510               user_int_call->function = precision_to_function(precision);
7511               user_int_call->tok = e->tok;
7512
7513               string fhash = detox_path(string(e->tok->location.file->name));
7514               functiondecl *get_addr_decl = new functiondecl;
7515               get_addr_decl->tok = e->tok;
7516               get_addr_decl->synthetic = true;
7517               get_addr_decl->unmangled_name = get_addr_decl->name =
7518                 "__private_" + fhash + "_sdt_arg_get_addr_" + lex_cast(tick++);
7519               get_addr_decl->type = pe_long;
7520
7521               // build _stp_umodule_relocate(module, addr, current)
7522               stringstream ss;
7523               ss << " /* unprivileged */ /* pure */ /* pragma:vma */" << endl;
7524               ss << "STAP_RETURN(_stp_umodule_relocate(";
7525                 ss << "\"" << path_remove_sysroot(sess, process_name) << "\", ";
7526                 ss << "0x" << hex << reloc_addr << dec << ", ";
7527                 ss << "current";
7528               ss << "));" << endl;
7529
7530               embeddedcode *ec = new embeddedcode;
7531               ec->tok = e->tok;
7532               ec->code = ss.str();
7533               get_addr_decl->body = ec;
7534               get_addr_decl->join(sess);
7535
7536               functioncall *get_addr_call = new functioncall;
7537               get_addr_call->tok = e->tok;
7538               get_addr_call->function = get_addr_decl->name;
7539               user_int_call->args.push_back(get_addr_call);
7540
7541               argexpr = user_int_call;
7542             }
7543         }
7544     }
7545
7546   return argexpr;
7547 }
7548
7549 void
7550 sdt_uprobe_var_expanding_visitor::visit_target_symbol_arg (target_symbol *e)
7551 {
7552   try
7553     {
7554       unsigned argno = get_target_symbol_argno_and_validate(e); // the N in $argN
7555       string asmarg = arg_tokens[argno-1];   // $arg1 => arg_tokens[0]
7556
7557       // Now we try to parse this thing, which is an assembler operand
7558       // expression.  If we can't, we warn, back down to need_debug_info
7559       // and hope for the best.  Here is the syntax for a few architectures.
7560       // Note that the power iN syntax is only for V3 sdt.h; gcc emits the i.
7561       //
7562       //        literal reg reg      reg+     base+index*size+ VAR VAR+off RIP-relative
7563       //                    indirect offset   offset                       VAR+off
7564       // x86    $N      %rR (%rR)    N(%rR)   O(%bR,%iR,S)     var var+off var+off(%rip)
7565       // x86_64 $N      %rR (%rR)    N(%rR)   O(%bR,%iR,S)     var var+off var+off(%rip)
7566       // power  iN      R   (R)      N(R)     R,R
7567       // ia64   N       rR  [r16]
7568       // s390   N       %rR 0(rR)    N(r15)
7569       // arm    #N      rR  [rR]     [rR, #N]
7570       // arm64  N       rR  [rR]     [rR, N]
7571       // mips   N       $r           N($r)
7572       // riscv  N       r            N(r)
7573
7574       expression* argexpr = 0; // filled in in case of successful parse
7575
7576       // Parse (and remove from asmarg) the leading length
7577       long precision = parse_out_arg_precision(asmarg);
7578       char type __attribute__ ((unused));
7579       type = parse_out_arg_type(asmarg);
7580
7581       try
7582         {
7583           if ((argexpr = try_parse_arg_literal(e, asmarg, precision)) != NULL)
7584             goto matched;
7585
7586           // all other matches require registers
7587           if (regnames == "")
7588             throw SEMANTIC_ERROR("no registers to use for parsing");
7589
7590           if ((argexpr = try_parse_arg_register(e, asmarg, precision)) != NULL)
7591             goto matched;
7592           if ((argexpr = try_parse_arg_offset_register(e, asmarg, precision)) != NULL)
7593             goto matched;
7594           if ((argexpr = try_parse_arg_register_pair(e, asmarg, precision)) != NULL)
7595             goto matched;
7596           if ((argexpr = try_parse_arg_effective_addr(e, asmarg, precision)) != NULL)
7597             goto matched;
7598           if ((argexpr = try_parse_arg_varname(e, asmarg, precision)) != NULL)
7599             goto matched;
7600         }
7601       catch (const semantic_error& er)
7602         {
7603           if (sess.verbose > 3)
7604             clog << "chaining to " << *e->tok << endl
7605                  << sess.build_error_msg(er) << endl;
7606           e->chain(er);
7607         }
7608
7609       // The asmarg operand was not recognized.  Back down to dwarf.
7610       if (! sess.suppress_warnings)
7611         {
7612           if (probe_type == UPROBE3_TYPE)
7613             sess.print_warning (_F("Can't parse SDT_V3 operand '%s' "
7614                                    "[man error::sdt]", asmarg.c_str()),
7615                                 e->tok);
7616           else // must be *PROBE2; others don't get asm operands
7617             sess.print_warning (_F("Downgrading SDT_V2 probe argument to "
7618                                    "dwarf, can't parse '%s' [man error::sdt]",
7619                                    asmarg.c_str()),
7620                                 e->tok);
7621         }
7622
7623       need_debug_info = true;
7624       throw SEMANTIC_ERROR(_("SDT asm not understood, requires debuginfo "
7625                              "[man error::sdt]"), e->tok);
7626
7627       /* NOTREACHED */
7628
7629     matched:
7630       assert (argexpr != 0);
7631
7632       if (sess.verbose > 2)
7633         //TRANSLATORS: We're mapping the operand to a new expression*.
7634         clog << _F("mapped asm operand %s to ", asmarg.c_str()) << *argexpr << endl;
7635
7636       if (e->components.empty()) // We have a scalar
7637         {
7638           if (e->addressof)
7639             throw SEMANTIC_ERROR(_("cannot take address of sdt variable"), e->tok);
7640           provide (argexpr);
7641         }
7642       else  // $var->foo
7643         {
7644           cast_op *cast = new cast_op;
7645           cast->name = "@cast";
7646           cast->tok = e->tok;
7647           cast->operand = argexpr;
7648           cast->components = e->components;
7649           cast->type_name = (string)probe_name + "_arg" + lex_cast(argno);
7650           cast->module = process_name;
7651           cast->visit(this);
7652         }
7653     }
7654   catch (const semantic_error &er)
7655     {
7656       if (sess.verbose > 3)
7657         clog << "chaining to " << *e->tok << endl
7658              << sess.build_error_msg(er) << endl;
7659       e->chain (er);
7660       provide (e);
7661     }
7662 }
7663
7664
7665 void
7666 sdt_uprobe_var_expanding_visitor::visit_target_symbol (target_symbol* e)
7667 {
7668   try
7669     {
7670       assert(e->name.size() > 0
7671              && (e->name[0] == '$' || e->name == "@var"));
7672
7673       if (e->name == "$$name" || e->name == "$$provider" || e->name == "$$parms" || e->name == "$$vars")
7674         visit_target_symbol_context (e);
7675       else
7676         visit_target_symbol_arg (e);
7677     }
7678   catch (const semantic_error &er)
7679     {
7680       if (sess.verbose > 3)
7681         clog << "chaining to " << *e->tok << endl
7682              << sess.build_error_msg(er) << endl;
7683       e->chain (er);
7684       provide (e);
7685     }
7686 }
7687
7688
7689 void
7690 sdt_uprobe_var_expanding_visitor::visit_atvar_op (atvar_op* e)
7691 {
7692   need_debug_info = true;
7693
7694   // Fill in our current module context if needed
7695   if (e->module.empty())
7696     e->module = process_name;
7697
7698   var_expanding_visitor::visit_atvar_op(e);
7699 }
7700
7701
7702 void
7703 sdt_uprobe_var_expanding_visitor::visit_cast_op (cast_op* e)
7704 {
7705   // Fill in our current module context if needed
7706   if (e->module.empty())
7707     e->module = process_name;
7708
7709   var_expanding_visitor::visit_cast_op(e);
7710 }
7711
7712
7713 void
7714 plt_expanding_visitor::visit_target_symbol (target_symbol *e)
7715 {
7716   try
7717     {
7718       if (e->name == "$$name")
7719         {
7720           literal_string *myname = new literal_string (entry);
7721           myname->tok = e->tok;
7722           provide(myname);
7723           return;
7724         }
7725
7726       // variable not found -> throw a semantic error
7727       // (only to be caught right away, but this may be more complex later...)
7728       string alternatives = "$$name";
7729       throw SEMANTIC_ERROR(_F("unable to find plt variable '%s' (alternatives: %s)",
7730                               e->name.to_string().c_str(), alternatives.c_str()), e->tok);
7731     }
7732   catch (const semantic_error &er)
7733     {
7734       if (sess.verbose > 3)
7735         clog << "chaining to " << *e->tok << endl
7736              << sess.build_error_msg(er) << endl;
7737       e->chain (er);
7738       provide (e);
7739     }
7740 }
7741
7742
7743 struct sdt_query : public base_query
7744 {
7745   sdt_query(probe * base_probe, probe_point * base_loc,
7746             dwflpp & dw, literal_map_t const & params,
7747             vector<derived_probe *> & results, const string user_lib);
7748
7749   void query_library (const char *data);
7750   set<string> visited_libraries;
7751   bool resolved_library;
7752
7753   void query_plt (const char *, size_t) {}
7754   void handle_query_module();
7755
7756 private:
7757   stap_sdt_probe_type probe_type;
7758   enum { probe_section=0, note_section=1, unknown_section=-1 } probe_loc;
7759   probe * base_probe;
7760   probe_point * base_loc;
7761   literal_map_t const & params;
7762   vector<derived_probe *> & results;
7763   interned_string pp_mark;
7764   interned_string pp_provider;
7765   string user_lib;
7766
7767   set<string> probes_handled;
7768
7769   Elf_Data *pdata;
7770   size_t probe_scn_offset;
7771   size_t probe_scn_addr;
7772   uint64_t arg_count;
7773   GElf_Addr base;
7774   GElf_Addr pc;
7775   string arg_string;
7776   string probe_name;
7777   string provider_name;
7778   GElf_Addr semaphore_load_offset;
7779   Dwarf_Addr semaphore;
7780
7781   bool init_probe_scn();
7782   bool get_next_probe();
7783   void iterate_over_probe_entries();
7784   void handle_probe_entry();
7785
7786   static void setup_note_probe_entry_callback (sdt_query *me,
7787                                                const string& scn_name,
7788                                                const string& note_name,
7789                                                int type,
7790                                                const char *data,
7791                                                size_t len);
7792   void setup_note_probe_entry (const string& scn_name,
7793                                const string& note_name, int type,
7794                                const char *data, size_t len);
7795
7796   void record_semaphore(vector<derived_probe *> & results, unsigned start);
7797   probe* convert_location();
7798   bool have_uprobe() {return probe_type == uprobe1_type || probe_type == uprobe2_type || probe_type == uprobe3_type;}
7799   bool have_debuginfo_uprobe(bool need_debug_info)
7800   {return probe_type == uprobe1_type
7801       || ((probe_type == uprobe2_type || probe_type == uprobe3_type)
7802           && need_debug_info);}
7803   bool have_debuginfoless_uprobe() {return probe_type == uprobe2_type || probe_type == uprobe3_type;}
7804 };
7805
7806
7807 sdt_query::sdt_query(probe * base_probe, probe_point * base_loc,
7808                      dwflpp & dw, literal_map_t const & params,
7809                      vector<derived_probe *> & results, const string user_lib):
7810   base_query(dw, params), resolved_library(false),
7811   probe_type(unknown_probe_type), probe_loc(unknown_section),
7812   base_probe(base_probe), base_loc(base_loc), params(params), results(results),
7813   user_lib(user_lib), pdata(0), probe_scn_offset(0), probe_scn_addr(0), arg_count(0),
7814   base(0), pc(0), semaphore_load_offset(0), semaphore(0)
7815 {
7816   assert(get_string_param(params, TOK_MARK, pp_mark));
7817   get_string_param(params, TOK_PROVIDER, pp_provider); // pp_provider == "" -> unspecified
7818
7819   // PR10245: permit usage of dtrace-y "-" separator in marker name;
7820   // map it to double-underscores.
7821   size_t pos = 0;
7822   string pp_mark2 = pp_mark; // copy for string replacement processing
7823   while (1) // there may be more than one
7824     {
7825       size_t i = pp_mark2.find("-", pos);
7826       if (i == string::npos) break;
7827       pp_mark2.replace (i, 1, "__");
7828       pos = i+1; // resume searching after the inserted __
7829     }
7830   pp_mark = pp_mark2;
7831
7832   // XXX: same for pp_provider?
7833 }
7834
7835
7836 void
7837 sdt_query::handle_probe_entry()
7838 {
7839   if (! have_uprobe()
7840       && !probes_handled.insert(probe_name).second)
7841     return;
7842
7843   if (sess.verbose > 3)
7844     {
7845       //TRANSLATORS: Describing what probe type (kprobe or uprobe) the probe
7846       //TRANSLATORS: is matched to.
7847       clog << _F("matched probe_name %s probe type ", probe_name.c_str());
7848       switch (probe_type)
7849         {
7850         case uprobe1_type:
7851           clog << "uprobe1 at 0x" << hex << pc << dec << endl;
7852           break;
7853         case uprobe2_type:
7854           clog << "uprobe2 at 0x" << hex << pc << dec << endl;
7855           break;
7856         case uprobe3_type:
7857           clog << "uprobe3 at 0x" << hex << pc << dec << endl;
7858           break;
7859         default:
7860           clog << "unknown!" << endl;
7861           break;
7862         }
7863     }
7864
7865   // Extend the derivation chain
7866   probe *new_base = convert_location();
7867   probe_point *new_location = new_base->locations[0];
7868
7869   bool need_debug_info = false;
7870
7871   // We could get the Elf* from either dwarf_getelf(dwfl_module_getdwarf(...))
7872   // or dwfl_module_getelf(...).  We only need it for the machine type, which
7873   // should be the same.  The bias is used for relocating debuginfoless probes,
7874   // though, so that must come from the possibly-prelinked ELF file, not DWARF.
7875   Dwarf_Addr bias;
7876   Elf* elf = dwfl_module_getelf (dw.mod_info->mod, &bias);
7877
7878   /* Figure out the architecture of this particular ELF file.  The
7879      dwarfless register-name mappings depend on it. */
7880   GElf_Ehdr ehdr_mem;
7881   GElf_Ehdr* em = gelf_getehdr (elf, &ehdr_mem);
7882   if (em == 0) { DWFL_ASSERT ("dwfl_getehdr", dwfl_errno()); }
7883   assert(em);
7884   int elf_machine = em->e_machine;
7885   sdt_uprobe_var_expanding_visitor svv (sess, dw, elf_machine, module_val,
7886                                         provider_name, probe_name, probe_type,
7887                                         arg_string, arg_count);
7888   if (sess.symbol_resolver) // trigger an early var_expanding_visitor::visit_functioncall pass
7889     sess.symbol_resolver->current_probe = new_base;
7890   // We can't do this the normal DWARF PR25841 way, because here we
7891   // don't have the derived_probe yet, just a new copy of a new base
7892   // probe.  Yet we can't wait to do this mapping until later, because
7893   // we need to know the need_debug_info flag as a prerequisite.  XXX:
7894   // maybe we could split this visitor into a need_debug_info
7895   // calculator, and do $$name/etc.  expansion later on the
7896   // uprobe_derived_probes ... but they may be hiding in this->results
7897   // or odd places.
7898   var_expand_const_fold_loop (sess, new_base->body, svv);
7899
7900   need_debug_info = svv.need_debug_info;
7901
7902   // XXX: why not derive_probes() in the uprobes case too?
7903   literal_map_t params;
7904   for (unsigned i = 0; i < new_location->components.size(); ++i)
7905    {
7906       probe_point::component *c = new_location->components[i];
7907       params[c->functor] = c->arg;
7908    }
7909
7910   unsigned prior_results_size = results.size();
7911   dwarf_query q(new_base, new_location, dw, params, results, "", "");
7912   q.has_mark = true; // enables mid-statement probing
7913
7914   // V1 probes always need dwarf info
7915   // V2+ probes need dwarf info in case of a variable reference
7916   if (have_debuginfo_uprobe(need_debug_info))
7917     dw.iterate_over_modules<base_query>(&query_module, &q);
7918
7919   // For V2+ probes, if variable references weren't used or failed (PR14369),
7920   // then try with the more direct approach.  Unresolved $vars might still
7921   // cause their own error, but this gives them a chance to be optimized out.
7922   if (have_debuginfoless_uprobe() && results.size() == prior_results_size)
7923     {
7924       string section;
7925       Dwarf_Addr reloc_addr = q.statement_num_val + bias;
7926       if (dwfl_module_relocations (q.dw.mod_info->mod) > 0)
7927         {
7928           dwfl_module_relocate_address (q.dw.mod_info->mod, &reloc_addr);
7929           section = ".dynamic";
7930         }
7931       else
7932         section = ".absolute";
7933
7934       uprobe_derived_probe* p =
7935         new uprobe_derived_probe ("", "", 0,
7936                                   path_remove_sysroot(sess,q.module_val),
7937                                   section,
7938                                   q.statement_num_val, reloc_addr, q, 0);
7939       p->saveargs (arg_count);
7940       results.push_back (p);
7941     }
7942   sess.unwindsym_modules.insert (dw.module_name);
7943   record_semaphore(results, prior_results_size);
7944 }
7945
7946
7947 void
7948 sdt_query::handle_query_module()
7949 {
7950   if (!init_probe_scn())
7951     return;
7952
7953   if (sess.verbose > 3)
7954     clog << "TOK_MARK: " << pp_mark << " TOK_PROVIDER: " << pp_provider << endl;
7955
7956   if (probe_loc == note_section)
7957     {
7958       GElf_Shdr shdr_mem;
7959       GElf_Shdr *shdr = dw.get_section (".stapsdt.base", &shdr_mem);
7960
7961       // The 'base' lets us adjust the hardcoded addresses in notes for prelink
7962       // effects.  The 'semaphore_load_offset' is the load address of the .probes
7963       // section so the semaphore can be converted to a section offset if needed.
7964       if (shdr)
7965         {
7966           base = shdr->sh_addr;
7967           shdr = dw.get_section (".probes", &shdr_mem);
7968           if (shdr)
7969             semaphore_load_offset = shdr->sh_addr - shdr->sh_offset;
7970         }
7971       else
7972         base = semaphore_load_offset = 0;
7973
7974       dw.iterate_over_notes (this, &sdt_query::setup_note_probe_entry_callback);
7975     }
7976   else if (probe_loc == probe_section)
7977     iterate_over_probe_entries ();
7978 }
7979
7980
7981 bool
7982 sdt_query::init_probe_scn()
7983 {
7984   Elf* elf;
7985   GElf_Shdr shdr_mem;
7986
7987   GElf_Shdr *shdr = dw.get_section (".note.stapsdt", &shdr_mem);
7988   if (shdr)
7989     {
7990       probe_loc = note_section;
7991       return true;
7992     }
7993
7994   shdr = dw.get_section (".probes", &shdr_mem, &elf);
7995   if (shdr)
7996     {
7997       pdata = elf_getdata_rawchunk (elf, shdr->sh_offset, shdr->sh_size, ELF_T_BYTE);
7998       probe_scn_offset = 0;
7999       probe_scn_addr = shdr->sh_addr;
8000       assert (pdata != NULL);
8001       if (sess.verbose > 4)
8002         clog << "got .probes elf scn_addr@0x" << probe_scn_addr << ", size: "
8003              << pdata->d_size << endl;
8004       probe_loc = probe_section;
8005       return true;
8006     }
8007   else
8008     return false;
8009 }
8010
8011 void
8012 sdt_query::setup_note_probe_entry_callback (sdt_query *me,
8013                                             const string& scn_name,
8014                                             const string& note_name, int type,
8015                                             const char *data, size_t len)
8016 {
8017   me->setup_note_probe_entry (scn_name, note_name, type, data, len);
8018 }
8019
8020
8021 void
8022 sdt_query::setup_note_probe_entry (const string& scn_name,
8023                                    const string& note_name, int type,
8024                                    const char *data, size_t len)
8025 {
8026   if (scn_name.compare(".note.stapsdt"))
8027     return;
8028 #define _SDT_NOTE_NAME "stapsdt"
8029   if (note_name.compare(_SDT_NOTE_NAME))
8030     return;
8031 #define _SDT_NOTE_TYPE 3
8032   if (type != _SDT_NOTE_TYPE)
8033     return;
8034
8035   // we found a probe entry
8036   union
8037   {
8038     Elf64_Addr a64[3];
8039     Elf32_Addr a32[3];
8040   } buf;
8041   Dwarf_Addr bias;
8042   Elf* elf = (dwfl_module_getelf (dw.mod_info->mod, &bias));
8043   Elf_Data dst =
8044     {
8045       &buf, ELF_T_ADDR, EV_CURRENT,
8046       gelf_fsize (elf, ELF_T_ADDR, 3, EV_CURRENT), 0, 0
8047     };
8048   assert (dst.d_size <= sizeof buf);
8049
8050   if (len < dst.d_size + 3)
8051     return;
8052
8053   Elf_Data src =
8054     {
8055       (void *) data, ELF_T_ADDR, EV_CURRENT,
8056       dst.d_size, 0, 0
8057     };
8058
8059   if (gelf_xlatetom (elf, &dst, &src,
8060                       elf_getident (elf, NULL)[EI_DATA]) == NULL)
8061     printf ("gelf_xlatetom: %s", elf_errmsg (-1));
8062
8063   probe_type = uprobe3_type;
8064   const char * provider = data + dst.d_size;
8065
8066   const char *name = (const char*)memchr (provider, '\0', data + len - provider);
8067   if(name++ == NULL)
8068     return;
8069
8070   const char *args = (const char*)memchr (name, '\0', data + len - name);
8071   if (args++ == NULL || memchr (args, '\0', data + len - name) != data + len - 1)
8072     return;
8073
8074   provider_name = provider;
8075   probe_name = name;
8076   arg_string = args;
8077
8078   dw.mod_info->marks.insert(make_pair(provider, name));
8079
8080   // Did we find a matching probe?
8081   if (! (dw.function_name_matches_pattern (probe_name, pp_mark)
8082          && ((pp_provider == "")
8083              || dw.function_name_matches_pattern (provider_name, pp_provider))))
8084     return;
8085
8086   // PR13934: Assembly probes are not forced to use the N@OP form.
8087   // If we have '@' then great, else count based on space-delimiters.
8088   arg_count = count(arg_string.begin(), arg_string.end(), '@');
8089   if (!arg_count && !arg_string.empty())
8090     arg_count = 1 + count(arg_string.begin(), arg_string.end(), ' ');
8091
8092   GElf_Addr base_ref;
8093   if (gelf_getclass (elf) == ELFCLASS32)
8094     {
8095       pc = buf.a32[0];
8096       base_ref = buf.a32[1];
8097       semaphore = buf.a32[2];
8098     }
8099   else
8100     {
8101       pc = buf.a64[0];
8102       base_ref = buf.a64[1];
8103       semaphore = buf.a64[2];
8104     }
8105
8106   semaphore += base - base_ref;
8107   pc += base - base_ref;
8108
8109   // The semaphore also needs the ELF bias added now, so
8110   // record_semaphore can properly relocate it later.
8111   semaphore += bias;
8112
8113   if (sess.verbose > 4)
8114     clog << _F(" saw .note.stapsdt %s%s ", probe_name.c_str(), (provider_name != "" ? _(" (provider ")+provider_name+") " : "").c_str()) << "@0x" << hex << pc << dec << endl;
8115
8116   handle_probe_entry();
8117 }
8118
8119
8120 void
8121 sdt_query::iterate_over_probe_entries()
8122 {
8123   // probes are in the .probe section
8124   while (probe_scn_offset < pdata->d_size)
8125     {
8126       stap_sdt_probe_entry_v1 *pbe_v1 = (stap_sdt_probe_entry_v1 *) ((char*)pdata->d_buf + probe_scn_offset);
8127       stap_sdt_probe_entry_v2 *pbe_v2 = (stap_sdt_probe_entry_v2 *) ((char*)pdata->d_buf + probe_scn_offset);
8128       probe_type = (stap_sdt_probe_type)(pbe_v1->type_a);
8129       if (! have_uprobe())
8130         {
8131           // Unless this is a mangled .probes section, this happens
8132           // because the name of the probe comes first, followed by
8133           // the sentinel.
8134           if (sess.verbose > 5)
8135             clog << _F("got unknown probe_type : 0x%x", probe_type) << endl;
8136           probe_scn_offset += sizeof(__uint32_t);
8137           continue;
8138         }
8139       if ((long)pbe_v1 % sizeof(__uint64_t)) // we have stap_sdt_probe_entry_v1.type_b
8140         {
8141           pbe_v1 = (stap_sdt_probe_entry_v1*)((char*)pbe_v1 - sizeof(__uint32_t));
8142           if (pbe_v1->type_b != uprobe1_type)
8143             continue;
8144         }
8145
8146       if (probe_type == uprobe1_type)
8147         {
8148           if (pbe_v1->name == 0) // No name possibly means we have a .so with a relocation
8149             return;
8150           semaphore = 0;
8151           probe_name = (char*)((char*)pdata->d_buf + pbe_v1->name - (char*)probe_scn_addr);
8152           provider_name = ""; // unknown
8153           pc = pbe_v1->arg;
8154           arg_count = 0;
8155           probe_scn_offset += sizeof (stap_sdt_probe_entry_v1);
8156         }
8157       else if (probe_type == uprobe2_type)
8158         {
8159           if (pbe_v2->name == 0) // No name possibly means we have a .so with a relocation
8160             return;
8161           semaphore = pbe_v2->semaphore;
8162           probe_name = (char*)((char*)pdata->d_buf + pbe_v2->name - (char*)probe_scn_addr);
8163           provider_name = (char*)((char*)pdata->d_buf + pbe_v2->provider - (char*)probe_scn_addr);
8164           arg_count = pbe_v2->arg_count;
8165           pc = pbe_v2->pc;
8166           if (pbe_v2->arg_string)
8167             arg_string = (char*)((char*)pdata->d_buf + pbe_v2->arg_string - (char*)probe_scn_addr);
8168           // skip over pbe_v2, probe_name text and provider text
8169           probe_scn_offset = ((long)(pbe_v2->name) - (long)(probe_scn_addr)) + probe_name.length();
8170           probe_scn_offset += sizeof (__uint32_t) - probe_scn_offset % sizeof (__uint32_t);
8171         }
8172       if (sess.verbose > 4)
8173         clog << _("saw .probes ") << probe_name << (provider_name != "" ? _(" (provider ")+provider_name+") " : "")
8174              << "@0x" << hex << pc << dec << endl;
8175
8176       dw.mod_info->marks.insert(make_pair(provider_name, probe_name));
8177
8178       if (dw.function_name_matches_pattern (probe_name, pp_mark)
8179           && ((pp_provider == "") || dw.function_name_matches_pattern (provider_name, pp_provider)))
8180         handle_probe_entry ();
8181     }
8182 }
8183
8184
8185 void
8186 sdt_query::record_semaphore (vector<derived_probe *> & results, unsigned start)
8187 {
8188   for (unsigned i=0; i<2; i++) {
8189     // prefer with-provider symbol; look without provider prefix for backward compatibility only
8190     string semaphore = (i==0 ? (provider_name+"_") : "") + probe_name + "_semaphore";
8191     // XXX: multiple addresses?
8192     if (sess.verbose > 2)
8193       clog << _F("looking for semaphore symbol %s ", semaphore.c_str());
8194
8195     Dwarf_Addr addr;
8196     if (this->semaphore)
8197       addr = this->semaphore;
8198     else
8199       addr  = lookup_symbol_address(dw.module, semaphore.c_str());
8200     if (addr)
8201       {
8202         if (dwfl_module_relocations (dw.module) > 0)
8203           dwfl_module_relocate_address (dw.module, &addr);
8204         // XXX: relocation basis?
8205
8206         // Dyninst needs the *file*-based offset for semaphores,
8207         // so subtract the difference in load addresses between .text and .probes
8208         if (dw.sess.runtime_usermode_p())
8209           addr -= semaphore_load_offset;
8210
8211         for (unsigned i = start; i < results.size(); ++i)
8212           results[i]->sdt_semaphore_addr = addr;
8213         if (sess.verbose > 2)
8214           clog << _(", found at 0x") << hex << addr << dec << endl;
8215         return;
8216       }
8217     else
8218       if (sess.verbose > 2)
8219         clog << _(", not found") << endl;
8220   }
8221 }
8222
8223
8224 probe*
8225 sdt_query::convert_location ()
8226 {
8227   interned_string module = dw.module_name;
8228   if (has_process)
8229     module = path_remove_sysroot(sess, module);
8230   if (build_id_val != "")
8231     module = build_id_val; // prefer this one
8232
8233   probe_point* specific_loc = new probe_point(*base_loc);
8234   specific_loc->well_formed = true;
8235
8236   vector<probe_point::component*> derived_comps;
8237
8238   for (auto it = specific_loc->components.begin();
8239        it != specific_loc->components.end(); ++it)
8240     if ((*it)->functor == TOK_PROCESS)
8241       {
8242         // replace the possibly incomplete path to process
8243         *it = new probe_point::component(TOK_PROCESS,
8244                 new literal_string(has_library ? path : module));
8245
8246         // copy the process name
8247         derived_comps.push_back(*it);
8248       }
8249     else if ((*it)->functor == TOK_LIBRARY)
8250       {
8251         // copy the library name for process probes
8252         derived_comps.push_back(*it);
8253       }
8254     else if ((*it)->functor == TOK_PROVIDER)
8255       {
8256         // replace the possibly wildcarded arg with the specific provider name
8257         *it = new probe_point::component(TOK_PROVIDER,
8258                                          new literal_string(provider_name));
8259       }
8260     else if ((*it)->functor == TOK_MARK)
8261       {
8262         // replace the possibly wildcarded arg with the specific marker name
8263         *it = new probe_point::component(TOK_MARK,
8264                                          new literal_string(probe_name));
8265
8266         if (sess.verbose > 3)
8267           switch (probe_type)
8268             {
8269             case uprobe1_type:
8270               clog << _("probe_type == uprobe1, use statement addr: 0x")
8271                    << hex << pc << dec << endl;
8272               break;
8273             case uprobe2_type:
8274               clog << _("probe_type == uprobe2, use statement addr: 0x")
8275                    << hex << pc << dec << endl;
8276             break;
8277             case uprobe3_type:
8278               clog << _("probe_type == uprobe3, use statement addr: 0x")
8279                    << hex << pc << dec << endl;
8280               break;
8281             default:
8282               clog << _F("probe_type == use_uprobe_no_dwarf, use label name: _stapprobe1_%s",
8283                          pp_mark.to_string().c_str()) << endl;
8284           }
8285
8286         switch (probe_type)
8287           {
8288           case uprobe1_type:
8289           case uprobe2_type:
8290           case uprobe3_type:
8291             // process("executable").statement(probe_arg)
8292             derived_comps.push_back
8293               (new probe_point::component(TOK_STATEMENT,
8294                                           new literal_number(pc, true)));
8295             break;
8296
8297           default: // deprecated
8298             // process("executable").function("*").label("_stapprobe1_MARK_NAME")
8299             derived_comps.push_back
8300               (new probe_point::component(TOK_FUNCTION,
8301                                           new literal_string(string("*"))));
8302             derived_comps.push_back
8303               (new probe_point::component(TOK_LABEL,
8304                                           new literal_string(string("_stapprobe1_") + (string)pp_mark)));
8305             break;
8306           }
8307       }
8308
8309   probe_point* derived_loc = new probe_point(*specific_loc);
8310   derived_loc->components = derived_comps;
8311   return new probe (new probe (base_probe, specific_loc), derived_loc);
8312 }
8313
8314
8315 void
8316 sdt_query::query_library (const char *library)
8317 {
8318   visited_libraries.insert(library);
8319   if (query_one_library (library, dw, user_lib, base_probe, base_loc, results))
8320     resolved_library = true;
8321 }
8322
8323 string
8324 suggest_marks(systemtap_session& sess,
8325               const set<string>& modules,
8326               const string& mark,
8327               const string& provider)
8328 {
8329   if (mark.empty() || modules.empty() || sess.module_cache == NULL || sess.suppress_costly_diagnostics)
8330     return "";
8331
8332   // PR18577: There isn't any point in generating a suggestion list if
8333   // we're not going to display it.
8334   if ((sess.dump_mode == systemtap_session::dump_matched_probes
8335        || sess.dump_mode == systemtap_session::dump_matched_probes_vars)
8336       && sess.verbose < 2)
8337     return "";
8338
8339   set<string> marks;
8340   const auto &cache = sess.module_cache->cache;
8341   bool dash_suggestions = (mark.find("-") != string::npos);
8342
8343   for (auto itmod = modules.begin();
8344        itmod != modules.end(); ++itmod)
8345     {
8346       auto itcache = cache.find(*itmod);
8347       if (itcache != cache.end())
8348         {
8349           for (auto itmarks = itcache->second->marks.cbegin();
8350                itmarks != itcache->second->marks.cend(); ++itmarks)
8351             {
8352               if (provider.empty()
8353                   // simulating dw.function_name_matches_pattern()
8354                   || (fnmatch(provider.c_str(), itmarks->first.c_str(), 0) == 0))
8355                 {
8356                   string marksug = itmarks->second;
8357                   if (dash_suggestions)
8358                     {
8359                       size_t pos = 0;
8360                       while (1) // there may be more than one
8361                         {
8362                           size_t i = marksug.find("__", pos);
8363                           if (i == string::npos) break;
8364                           marksug.replace (i, 2, "-");
8365                           pos = i+1; // resume searching after the inserted -
8366                         }
8367                     }
8368                   marks.insert(marksug);
8369                 }
8370             }
8371         }
8372     }
8373
8374   if (sess.verbose > 2)
8375     {
8376       clog << "suggesting " << marks.size() << " marks "
8377            << "from modules:" << endl;
8378       for (auto itmod = modules.begin();
8379            itmod != modules.end(); ++itmod)
8380         clog << *itmod << endl;
8381     }
8382
8383   if (marks.empty())
8384     return "";
8385
8386   return levenshtein_suggest(mark, marks, 5); // print top 5 marks only
8387 }
8388
8389 string
8390 suggest_plt_functions(systemtap_session& sess,
8391                       const set<string>& modules,
8392                       const string& func)
8393 {
8394   if (func.empty() || modules.empty() || sess.module_cache == NULL || sess.suppress_costly_diagnostics)
8395     return "";
8396
8397   // PR18577: There isn't any point in generating a suggestion list if
8398   // we're not going to display it.
8399   if ((sess.dump_mode == systemtap_session::dump_matched_probes
8400        || sess.dump_mode == systemtap_session::dump_matched_probes_vars)
8401       && sess.verbose < 2)
8402     return "";
8403
8404   set<interned_string> funcs;
8405   const auto &cache = sess.module_cache->cache;
8406
8407   for (auto itmod = modules.begin();
8408        itmod != modules.end(); ++itmod)
8409     {
8410       auto itcache = cache.find(*itmod);
8411       if (itcache != cache.end())
8412         funcs.insert(itcache->second->plt_funcs.begin(),
8413                      itcache->second->plt_funcs.end());
8414     }
8415
8416   if (sess.verbose > 2)
8417     {
8418       clog << "suggesting " << funcs.size() << " plt functions "
8419            << "from modules:" << endl;
8420       for (auto itmod = modules.begin();
8421            itmod != modules.end(); ++itmod)
8422         clog << *itmod << endl;
8423     }
8424
8425   if (funcs.empty())
8426     return "";
8427
8428   return levenshtein_suggest(func, funcs, 5); // print top 5 funcs only
8429 }
8430
8431 string
8432 suggest_dwarf_functions(systemtap_session& sess,
8433                         const set<string>& modules,
8434                         string func)
8435 {
8436   // Trim any @ component
8437   size_t pos = func.find('@');
8438   if (pos != string::npos)
8439     func.erase(pos);
8440
8441   if (func.empty() || modules.empty() || sess.module_cache == NULL || sess.suppress_costly_diagnostics)
8442     return "";
8443
8444   // PR18577: There isn't any point in generating a suggestion list if
8445   // we're not going to display it.
8446   if ((sess.dump_mode == systemtap_session::dump_matched_probes
8447        || sess.dump_mode == systemtap_session::dump_matched_probes_vars)
8448       && sess.verbose < 2)
8449     return "";
8450
8451   // We must first aggregate all the functions from the cache
8452   set<interned_string> funcs;
8453   const auto &cache = sess.module_cache->cache;
8454
8455   for (auto itmod = modules.begin();
8456        itmod != modules.end(); ++itmod)
8457     {
8458       module_info *module;
8459
8460       // retrieve module_info from cache
8461       auto itcache = cache.find(*itmod);
8462       if (itcache != cache.end())
8463         module = itcache->second;
8464       else // module not found
8465         continue;
8466
8467       // add inlines
8468       funcs.insert(module->inlined_funcs.begin(),
8469                    module->inlined_funcs.end());
8470
8471       // add all function symbols in cache
8472       if (module->symtab_status != info_present || module->sym_table == NULL)
8473         continue;
8474       const auto& modfuncs = module->sym_table->map_by_name;
8475       for (auto itfuncs = modfuncs.begin();
8476            itfuncs != modfuncs.end(); ++itfuncs)
8477         funcs.insert(itfuncs->first);
8478     }
8479
8480   if (sess.verbose > 2)
8481     {
8482       clog << "suggesting " << funcs.size() << " dwarf functions "
8483            << "from modules:" << endl;
8484       for (auto itmod = modules.begin();
8485            itmod != modules.end(); ++itmod)
8486         clog << *itmod << endl;
8487     }
8488
8489   if (funcs.empty())
8490     return "";
8491
8492   return levenshtein_suggest(func, funcs, 5); // print top 5 funcs only
8493 }
8494
8495
8496 // Use a glob pattern to find executables or shared libraries
8497 static set<string>
8498 glob_executable(const string& pattern)
8499 {
8500   glob_t the_blob;
8501   set<string> globs;
8502
8503   int rc = glob (pattern.c_str(), 0, NULL, & the_blob);
8504   if (rc)
8505     throw SEMANTIC_ERROR (_F("glob %s error (%d)", pattern.c_str(), rc));
8506
8507   for (unsigned i = 0; i < the_blob.gl_pathc; ++i)
8508     {
8509       const char* globbed = the_blob.gl_pathv[i];
8510       struct stat st;
8511
8512       if (stat (globbed, &st) == 0
8513           && S_ISREG (st.st_mode)) // see find_executable()
8514         {
8515           // Need to call resolve_path here, in order to path-expand
8516           // patterns like process("stap*").  Otherwise it may go through
8517           // to the next round of expansion as ("stap"), leading to a $PATH
8518           // search that's not consistent with the glob search already done.
8519           string canononicalized = resolve_path (globbed);
8520
8521           // The canonical names can result in duplication, for example
8522           // having followed symlinks that are common with shared libraries,
8523           // so we use a set for unique results.
8524           globs.insert(canononicalized);
8525         }
8526     }
8527
8528   globfree (& the_blob);
8529   return globs;
8530 }
8531
8532 static bool
8533 resolve_library_by_path(base_query & q,
8534                         set<string> const & visited_libraries,
8535                         probe * base,
8536                         probe_point * location,
8537                         literal_map_t const & parameters,
8538                         vector<derived_probe *> & finished_results)
8539 {
8540   size_t results_pre = finished_results.size();
8541   systemtap_session & sess = q.sess;
8542   dwflpp & dw = q.dw;
8543
8544   interned_string lib;
8545   if (!location->from_globby_comp(TOK_LIBRARY) && q.has_library
8546       && !visited_libraries.empty()
8547       && q.get_string_param(parameters, TOK_LIBRARY, lib))
8548     {
8549       // The library didn't fit any DT_NEEDED libraries. As a last effort,
8550       // let's try to look for the library directly.
8551
8552       if (contains_glob_chars (lib))
8553         {
8554           // Evaluate glob here, and call derive_probes recursively with each match.
8555           const auto& globs = glob_executable (lib);
8556           for (auto it = globs.begin(); it != globs.end(); ++it)
8557             {
8558               assert_no_interrupts();
8559
8560               const string& globbed = *it;
8561               if (sess.verbose > 1)
8562                 clog << _F("Expanded library(\"%s\") to library(\"%s\")",
8563                            lib.to_string().c_str(), globbed.c_str()) << endl;
8564
8565               probe *new_base = build_library_probe(dw, globbed,
8566                                                     base, location);
8567
8568               // We override "optional = true" here, as if the
8569               // wildcarded probe point was given a "?" suffix.
8570
8571               // This is because wildcard probes will be expected
8572               // by users to apply only to some subset of the
8573               // matching binaries, in the sense of "any", rather
8574               // than "all", sort of similarly how
8575               // module("*").function("...") patterns work.
8576
8577               derive_probes (sess, new_base, finished_results,
8578                              true /* NB: not location->optional */ );
8579             }
8580         }
8581       else
8582         {
8583           string resolved_lib = find_executable(lib, sess.sysroot, sess.sysenv,
8584                                                 "LD_LIBRARY_PATH");
8585           if (resolved_lib.find('/') != string::npos)
8586             {
8587               probe *new_base = build_library_probe(dw, resolved_lib,
8588                                                     base, location);
8589               derive_probes(sess, new_base, finished_results);
8590               if (lib.find('/') == string::npos)
8591                 sess.print_warning(_F("'%s' is not a needed library of '%s'. "
8592                                       "Specify the full path to squelch this warning.",
8593                                       resolved_lib.c_str(), dw.module_name.c_str()));
8594             }
8595           else
8596             {
8597               // Otherwise, let's suggest from the DT_NEEDED libraries
8598               string sugs = levenshtein_suggest(lib, visited_libraries, 5);
8599               if (!sugs.empty())
8600                 throw SEMANTIC_ERROR (_NF("no match (similar library: %s)",
8601                                           "no match (similar libraries: %s)",
8602                                           sugs.find(',') == string::npos,
8603                                           sugs.c_str()));
8604             }
8605         }
8606     }
8607
8608   return results_pre != finished_results.size();
8609 }
8610
8611 static void
8612 handle_module_token(systemtap_session &sess, interned_string &module_token_val)
8613 {
8614   // Do we have a fully resolved path to the module?
8615   if (!is_fully_resolved(module_token_val, sess.sysroot, sess.sysenv))
8616     {
8617       // If the path isn't fully resolved, it might be a in-tree
8618       // module name or a relative path. If it is a relative path,
8619       // convert it to a full path.
8620       if (module_token_val.find('/') != string::npos)
8621         {
8622           string module_token_val2 = find_executable(module_token_val,
8623                                                      sess.sysroot,
8624                                                      sess.sysenv);
8625           module_token_val = module_token_val2;
8626         }
8627       // If we're here, then it's an in-tree module. Replace any
8628       // dashes with underscores.
8629       else
8630         {
8631           size_t dash_pos = 0;
8632           // copy out for replace operations
8633           string module_token_val2 = module_token_val;
8634           while ((dash_pos = module_token_val2.find('-')) != string::npos)
8635               module_token_val2.replace(int(dash_pos), 1, "_");
8636           module_token_val = module_token_val2;
8637         }
8638     }
8639 }
8640
8641 void
8642 dwarf_builder::build(systemtap_session & sess,
8643                      probe * base,
8644                      probe_point * location,
8645                      literal_map_t const & parameters,
8646                      vector<derived_probe *> & finished_results)
8647 {
8648   // NB: the kernel/user dwlfpp objects are long-lived.
8649   // XXX: but they should be per-session, as this builder object
8650   // may be reused if we try to cross-instrument multiple targets.
8651
8652   dwflpp* dw = 0;
8653   literal_map_t filled_parameters = parameters;
8654
8655   interned_string module_name;
8656   int64_t proc_pid;
8657   if (has_null_param (parameters, TOK_KERNEL))
8658     {
8659       bool debuginfo_needed = true;
8660
8661       /* PR26660 kernel.statement(HEX).absolute does not require kernel
8662        * debuginfo */
8663       bool has_statement_num = has_param (parameters, TOK_STATEMENT);
8664       if (has_statement_num)
8665         {
8666           if (has_param (parameters, TOK_ABSOLUTE))
8667             debuginfo_needed = false;
8668         }
8669
8670       //cerr << "debuginfo needed? " << debuginfo_needed << endl;
8671       dw = get_kern_dw(sess, "kernel", debuginfo_needed);
8672     }
8673   else if (get_param (parameters, TOK_MODULE, module_name))
8674     {
8675       handle_module_token(sess, module_name);
8676       filled_parameters[TOK_MODULE] = new literal_string(module_name);
8677
8678       // NB: glob patterns get expanded later, during the offline
8679       // elfutils module listing.
8680       dw = get_kern_dw(sess, module_name);
8681     }
8682   else if (has_param(filled_parameters, TOK_PROCESS))
8683       {
8684         // NB: module_name is not yet set!
8685
8686       if(has_null_param(filled_parameters, TOK_PROCESS))
8687         {
8688           if (!location->auto_path.empty())
8689             {
8690               if (location->components[0]->functor == TOK_PROCESS &&
8691                   location->components[0]->arg == 0)
8692                 {
8693                   // PATH expansion of process component without argument.
8694                   // The filename without the .stp extension is used.
8695                   string full_path = location->auto_path;
8696                   string::size_type start = full_path.find("PATH/") + 4;
8697                   string::size_type end = full_path.rfind(".stp");
8698                   module_name = full_path.substr(start, end - start);
8699                   location->components[0]->arg = new literal_string(module_name);
8700                   filled_parameters[TOK_PROCESS] = new literal_string(module_name);
8701                 }
8702             }
8703           else
8704             {
8705               string file;
8706               try
8707                 {
8708                   file = sess.cmd_file();
8709                 }
8710               catch (const semantic_error& e)
8711                 {
8712                   if(sess.target_pid)
8713                     throw SEMANTIC_ERROR(_("invalid -x pid for unspecified process"
8714                                            " probe [man stapprobes]"), NULL, NULL, &e);
8715                   else
8716                     throw SEMANTIC_ERROR(_("invalid -c command for unspecified process"
8717                                            " probe [man stapprobes]"), NULL, NULL, &e);
8718                 }
8719               if(file.empty())
8720                 throw SEMANTIC_ERROR(_("unspecified process probe is invalid without"
8721                                        " a -c COMMAND or -x PID [man stapprobes]"));
8722               module_name = sess.sysroot + file;
8723               filled_parameters[TOK_PROCESS] = new literal_string(module_name);// this needs to be used in place of the blank map
8724               // in the case of TOK_MARK we need to modify locations as well   // XXX why?
8725               if(location->components[0]->functor==TOK_PROCESS &&
8726                  location->components[0]->arg == 0)
8727                 {
8728                   if (sess.target_pid)
8729                     location->components[0]->arg = new literal_number(sess.target_pid);
8730                   else
8731                     location->components[0]->arg = new literal_string(module_name);
8732                 }
8733             }
8734         }
8735
8736       // NB: must specifically handle the classical ("string") form here, to make sure
8737       // we get the module_name out.
8738       else if (get_param (parameters, TOK_PROCESS, module_name))
8739         {
8740           if (!location->auto_path.empty())
8741             {
8742               if (!module_name.empty() && module_name[0] != '/')
8743                 {
8744                   // prefix argument with file location from PATH directory
8745                   string full_path = location->auto_path;
8746                   string::size_type start = full_path.find("PATH/") + 4;
8747                   string::size_type end = full_path.rfind("/");
8748                   string arg = module_name;
8749                   module_name = full_path.substr(start, end-start+1) + arg;
8750                   location->components[0]->arg = new literal_string(module_name);
8751                   filled_parameters[TOK_PROCESS] = new literal_string(module_name);
8752                 }
8753             }
8754           else
8755             {
8756               filled_parameters[TOK_PROCESS] = new literal_string(module_name);
8757             }
8758         }
8759
8760       else if (get_param (parameters, TOK_PROCESS, proc_pid))
8761         {
8762           // check that the pid given corresponds to a running process
8763           string pid_err_msg;
8764           if (!is_valid_pid(proc_pid, pid_err_msg))
8765             throw SEMANTIC_ERROR(pid_err_msg);
8766
8767           string pid_path = string("/proc/") + lex_cast(proc_pid) + "/exe";
8768           module_name = sess.sysroot + pid_path;
8769
8770           // in the case of TOK_MARK we need to modify locations as well  // XXX why?
8771           if(location->components[0]->functor==TOK_PROCESS &&
8772              location->components[0]->arg == 0)
8773             location->components[0]->arg = new literal_number(sess.target_pid);
8774         }
8775
8776       // PR6456  process("/bin/*")  glob handling
8777       if (contains_glob_chars (module_name))
8778         {
8779           // Expand glob via rewriting the probe-point process("....")
8780           // parameter, asserted to be the first one.
8781
8782           assert (location->components.size() > 0);
8783           assert (location->components[0]->functor == TOK_PROCESS);
8784           assert (location->components[0]->arg);
8785           literal_string* lit = dynamic_cast<literal_string*>(location->components[0]->arg);
8786           assert (lit);
8787
8788           // Evaluate glob here, and call derive_probes recursively with each match.
8789           const auto& globs = glob_executable (sess.sysroot
8790                                                + string(module_name));
8791           unsigned results_pre = finished_results.size();
8792           for (auto it = globs.begin(); it != globs.end(); ++it)
8793             {
8794               assert_no_interrupts();
8795
8796               const string& globbed = *it;
8797
8798               // synthesize a new probe_point, with the glob-expanded string
8799               probe_point *pp = new probe_point (*location);
8800
8801               // PR13338: quote results to prevent recursion
8802               string eglobbed = escape_glob_chars (globbed);
8803
8804               if (sess.verbose > 1)
8805                 clog << _F("Expanded process(\"%s\") to process(\"%s\")",
8806                            module_name.to_string().c_str(), eglobbed.c_str()) << endl;
8807               string eglobbed_tgt = path_remove_sysroot(sess, eglobbed);
8808
8809               probe_point::component* ppc
8810                 = new probe_point::component (TOK_PROCESS,
8811                                               new literal_string (eglobbed_tgt),
8812                                               true /* from_glob */ );
8813               ppc->tok = location->components[0]->tok; // overwrite [0] slot, pattern matched above
8814               pp->components[0] = ppc;
8815
8816               probe* new_probe = new probe (base, pp);
8817
8818               // We override "optional = true" here, as if the
8819               // wildcarded probe point was given a "?" suffix.
8820
8821               // This is because wildcard probes will be expected
8822               // by users to apply only to some subset of the
8823               // matching binaries, in the sense of "any", rather
8824               // than "all", sort of similarly how
8825               // module("*").function("...") patterns work.
8826
8827               derive_probes (sess, new_probe, finished_results,
8828                              true /* NB: not location->optional */ );
8829             }
8830
8831           unsigned results_post = finished_results.size();
8832
8833           // Did we fail to find a function/plt/mark by name? Let's suggest
8834           // something!
8835           interned_string func;
8836           if (results_pre == results_post
8837               && get_param(filled_parameters, TOK_FUNCTION, func)
8838               && !func.empty())
8839             {
8840               string sugs = suggest_dwarf_functions(sess, modules_seen, func);
8841               modules_seen.clear();
8842               if (!sugs.empty())
8843                 throw SEMANTIC_ERROR (_NF("no match (similar function: %s)",
8844                                           "no match (similar functions: %s)",
8845                                           sugs.find(',') == string::npos,
8846                                           sugs.c_str()));
8847             }
8848           else if (results_pre == results_post
8849                    && get_param(filled_parameters, TOK_PLT, func)
8850                    && !func.empty())
8851             {
8852               string sugs = suggest_plt_functions(sess, modules_seen, func);
8853               modules_seen.clear();
8854               if (!sugs.empty())
8855                 throw SEMANTIC_ERROR (_NF("no match (similar function: %s)",
8856                                           "no match (similar functions: %s)",
8857                                           sugs.find(',') == string::npos,
8858                                           sugs.c_str()));
8859             }
8860           else if (results_pre == results_post
8861                    && get_param(filled_parameters, TOK_MARK, func)
8862                    && !func.empty())
8863             {
8864               interned_string provider;
8865               get_param(filled_parameters, TOK_PROVIDER, provider);
8866
8867               string sugs = suggest_marks(sess, modules_seen, func, provider);
8868               modules_seen.clear();
8869               if (!sugs.empty())
8870                 throw SEMANTIC_ERROR (_NF("no match (similar mark: %s)",
8871                                           "no match (similar marks: %s)",
8872                                           sugs.find(',') == string::npos,
8873                                           sugs.c_str()));
8874             }
8875
8876           return; // avoid falling through
8877         }
8878
8879       // PR13338: unquote glob results
8880       module_name = unescape_glob_chars (module_name);
8881       user_path = find_executable (module_name, sess.sysroot, sess.sysenv); // canonicalize it
8882       // Note we don't need to pass the sysroot to
8883       // is_fully_resolved(), since we just passed it to
8884       // find_executable().
8885       if (!is_fully_resolved(user_path, "", sess.sysenv))
8886         throw SEMANTIC_ERROR(_F("cannot find executable '%s'",
8887                                 user_path.to_string().c_str()));
8888
8889       // if the executable starts with "#!", we look for the interpreter of the script
8890       {
8891          ifstream script_file (user_path.to_string().c_str());
8892
8893          if (script_file.good ())
8894          {
8895            string line;
8896
8897            getline (script_file, line);
8898
8899            if (line.compare (0, 2, "#!") == 0)
8900            {
8901               string path = line.substr(2);
8902
8903               // trim white space
8904               trim(path);
8905
8906               if (! path.empty())
8907               {
8908                 // handle "#!/usr/bin/env" redirect
8909                 size_t offset = 0;
8910                 if (path.compare(0, sizeof("/bin/env")-1, "/bin/env") == 0)
8911                 {
8912                   offset = sizeof("/bin/env")-1;
8913                 }
8914                 else if (path.compare(0, sizeof("/usr/bin/env")-1, "/usr/bin/env") == 0)
8915                 {
8916                   offset = sizeof("/usr/bin/env")-1;
8917                 }
8918
8919                 if (offset != 0)
8920                 {
8921                     size_t p3 = path.find_first_not_of(" \t", offset);
8922
8923                     if (p3 != string::npos)
8924                     {
8925                        string env_path = path.substr(p3);
8926                        user_path = find_executable (env_path, sess.sysroot,
8927                                                     sess.sysenv);
8928                     }
8929                 }
8930                 else
8931                 {
8932                   user_path = find_executable (path, sess.sysroot, sess.sysenv);
8933                 }
8934
8935                 struct stat st;
8936
8937                 const string& new_path = user_path;
8938                 if (access (new_path.c_str(), X_OK) == 0
8939                   && stat (new_path.c_str(), &st) == 0
8940                   && S_ISREG (st.st_mode)) // see find_executable()
8941                 {
8942                   if (sess.verbose > 1)
8943                     clog << _F("Expanded process(\"%s\") to process(\"%s\")",
8944                                module_name.to_string().c_str(), new_path.c_str()) << endl;
8945
8946                   assert (location->components.size() > 0);
8947                   assert (location->components[0]->functor == TOK_PROCESS);
8948                   assert (location->components[0]->arg);
8949                   literal_string* lit = dynamic_cast<literal_string*>(location->components[0]->arg);
8950                   assert (lit);
8951
8952                   // synthesize a new probe_point, with the expanded string
8953                   probe_point *pp = new probe_point (*location);
8954                   string user_path_tgt = path_remove_sysroot(sess, new_path);
8955                   probe_point::component* ppc = new probe_point::component (TOK_PROCESS,
8956                                                                             new literal_string (user_path_tgt));
8957                   ppc->tok = location->components[0]->tok; // overwrite [0] slot, pattern matched above
8958                   pp->components[0] = ppc;
8959
8960                   probe* new_probe = new probe (base, pp);
8961
8962                   derive_probes (sess, new_probe, finished_results);
8963
8964                   script_file.close();
8965                   return;
8966                 }
8967               }
8968            }
8969          }
8970          script_file.close();
8971       }
8972
8973       // If this is a library probe, then target the library module instead. We
8974       // do this only if the library path is already fully resolved (such as
8975       // what query_one_library() would have done for us). Otherwise, we resort
8976       // to iterate_over_libraries.
8977       if (get_param (parameters, TOK_LIBRARY, user_lib) && !user_lib.empty())
8978         {
8979           string library = find_executable (user_lib, sess.sysroot,
8980                                             sess.sysenv, "LD_LIBRARY_PATH");
8981           if (is_fully_resolved(library, "", sess.sysenv, "LD_LIBRARY_PATH"))
8982             module_name = library;
8983           else
8984             module_name = user_path; // canonicalize it
8985         }
8986       else
8987         module_name = user_path; // canonicalize it
8988
8989       // uretprobes aren't available everywhere
8990       if (has_null_param(parameters, TOK_RETURN) && !sess.runtime_usermode_p())
8991         {
8992           if (kernel_supports_inode_uprobes(sess) &&
8993               !kernel_supports_inode_uretprobes(sess))
8994             throw SEMANTIC_ERROR
8995               (_("process return probes not available [man error::inode-uprobes]"));
8996         }
8997
8998       // There is a similar check in pass 4 (buildrun), but it is
8999       // needed here too to make sure alternatives for optional
9000       // (? or !) process probes are disposed and/or alternatives
9001       // are selected.
9002       if (!sess.runtime_usermode_p())
9003         check_process_probe_kernel_support(sess);
9004
9005       // user-space target; we use one dwflpp instance per module name
9006       // (= program or shared library)
9007       dw = get_user_dw(sess, module_name);
9008     }
9009
9010   assert(dw);
9011
9012   unsigned results_pre = finished_results.size();
9013
9014   if (sess.verbose > 3)
9015     clog << _F("dwarf_builder::build for %s",
9016                module_name.to_string().c_str()) << endl;
9017
9018   interned_string dummy_mark_name; // NB: PR10245: dummy value, need not substitute - => __
9019   if (get_param(parameters, TOK_MARK, dummy_mark_name))
9020     {
9021       sdt_query sdtq(base, location, *dw, filled_parameters, finished_results, user_lib);
9022       dw->iterate_over_modules<base_query>(&query_module, &sdtq);
9023
9024       // We need to update modules_seen with the modules we've visited
9025       modules_seen.insert(sdtq.visited_modules.begin(),
9026                           sdtq.visited_modules.end());
9027
9028       if (results_pre == finished_results.size()
9029           && sdtq.has_library && !sdtq.resolved_library
9030           && resolve_library_by_path (sdtq, sdtq.visited_libraries,
9031                                       base, location, filled_parameters,
9032                                       finished_results))
9033         return;
9034
9035       // Did we fail to find a mark?
9036       if (results_pre == finished_results.size()
9037           && !location->from_globby_comp(TOK_MARK))
9038         {
9039           interned_string provider;
9040           (void) get_param(filled_parameters, TOK_PROVIDER, provider);
9041
9042           string sugs = suggest_marks(sess, modules_seen, dummy_mark_name, provider);
9043           modules_seen.clear();
9044           if (!sugs.empty())
9045             throw SEMANTIC_ERROR (_NF("no match (similar mark: %s)",
9046                                       "no match (similar marks: %s)",
9047                                       sugs.find(',') == string::npos,
9048                                       sugs.c_str()));
9049         }
9050
9051       return;
9052     }
9053
9054   dwarf_query q(base, location, *dw, filled_parameters, finished_results, user_path, user_lib);
9055
9056   // XXX: kernel.statement.absolute is a special case that requires no
9057   // dwfl processing.  This code should be in a separate builder.
9058   if (q.has_kernel && q.has_absolute)
9059     {
9060       // assert guru mode for absolute probes
9061       if (! q.base_probe->privileged)
9062         {
9063           throw SEMANTIC_ERROR (_("absolute statement probe in unprivileged script; need stap -g"),
9064                                 q.base_probe->tok);
9065         }
9066
9067       // For kernel.statement(NUM).absolute probe points, we bypass
9068       // all the debuginfo stuff: We just wire up a
9069       // dwarf_derived_probe right here and now.
9070       dwarf_derived_probe* p =
9071         new dwarf_derived_probe ("", "", 0, "kernel", "",
9072                                  q.statement_num_val, q.statement_num_val,
9073                                  q, 0);
9074       finished_results.push_back (p);
9075       sess.unwindsym_modules.insert ("kernel");
9076       return;
9077     }
9078
9079   dw->iterate_over_modules<base_query>(&query_module, &q);
9080
9081   // We need to update modules_seen with the modules we've visited
9082   modules_seen.insert(q.visited_modules.begin(),
9083                       q.visited_modules.end());
9084
9085   // PR11553 special processing: .return probes requested, but
9086   // some inlined function instances matched.
9087   unsigned i_n_r = q.inlined_non_returnable.size();
9088   unsigned results_post = finished_results.size();
9089   if (i_n_r > 0)
9090     {
9091       if ((results_pre == results_post) && (! sess.suppress_warnings)) // no matches; issue warning
9092         {
9093           string quicklist;
9094           for (auto it = q.inlined_non_returnable.begin();
9095                it != q.inlined_non_returnable.end();
9096                it++)
9097             {
9098               quicklist += " " + (string)(*it);
9099               if (quicklist.size() > 80) // heuristic, don't make an overlong report line
9100                 {
9101                   quicklist += " ...";
9102                   break;
9103                 }
9104             }
9105
9106           sess.print_warning (_NF("cannot probe .return of %u inlined function %s",
9107                                           "cannot probe .return of %u inlined functions %s",
9108                                            quicklist.size(), i_n_r, quicklist.c_str()));
9109           // There will be also a "no matches" semantic error generated.
9110         }
9111       if (sess.verbose > 1)
9112         clog << _NF("skipped .return probe of %u inlined function",
9113                             "skipped .return probe of %u inlined functions", i_n_r, i_n_r) << endl;
9114       if ((sess.verbose > 3) || (sess.verbose > 2 && results_pre == results_post)) // issue details with high verbosity
9115         {
9116           for (auto it = q.inlined_non_returnable.begin();
9117                it != q.inlined_non_returnable.end();
9118                it++)
9119             clog << (*it) << " ";
9120           clog << endl;
9121         }
9122     } // i_n_r > 0
9123
9124   if (results_pre == finished_results.size()
9125       && q.has_library && !q.resolved_library
9126       && resolve_library_by_path (q, q.visited_libraries,
9127                                   base, location, filled_parameters,
9128                                   finished_results))
9129     return;
9130
9131   // If we just failed to resolve a function/plt by name, we can suggest
9132   // something. We only suggest things for probe points that were not
9133   // synthesized from a glob, i.e. only for 'real' probes. This is also
9134   // required because modules_seen needs to accumulate across recursive
9135   // calls for process(glob)[.library(glob)] probes.
9136   interned_string func;
9137   if (results_pre == results_post && !location->from_globby_comp(TOK_FUNCTION)
9138       && get_param(filled_parameters, TOK_FUNCTION, func)
9139       && !func.empty())
9140     {
9141       string sugs = suggest_dwarf_functions(sess, modules_seen, func);
9142       modules_seen.clear();
9143       if (!sugs.empty())
9144         throw SEMANTIC_ERROR (_NF("no match (similar function: %s)",
9145                                   "no match (similar functions: %s)",
9146                                   sugs.find(',') == string::npos,
9147                                   sugs.c_str()));
9148     }
9149   else if (results_pre == results_post && !location->from_globby_comp(TOK_PLT)
9150            && get_param(filled_parameters, TOK_PLT, func)
9151            && !func.empty())
9152     {
9153       string sugs = suggest_plt_functions(sess, modules_seen, func);
9154       modules_seen.clear();
9155       if (!sugs.empty())
9156         throw SEMANTIC_ERROR (_NF("no match (similar function: %s)",
9157                                   "no match (similar functions: %s)",
9158                                   sugs.find(',') == string::npos,
9159                                   sugs.c_str()));
9160     }
9161   else if (results_pre != results_post)
9162     // Something was derived so we won't need to suggest something
9163     modules_seen.clear();
9164 }
9165
9166 symbol_table::~symbol_table()
9167 {
9168   delete_map(map_by_addr);
9169 }
9170
9171 void
9172 symbol_table::add_symbol(interned_string name, bool weak, bool descriptor,
9173                          Dwarf_Addr addr, Dwarf_Addr entrypc)
9174 {
9175   /* Does the target architecture have function descriptors?
9176      Then we want to filter them out. When seeing a symbol with a name
9177      starting with '.' we assume it is a regular function pointer and
9178      not a pointer to a function descriptor. Note that this might create
9179      duplicates if we also found the function descriptor symbol itself.
9180      dwfl_module_getsym_info () will have resolved the actual function
9181      address for us. But we won't know if we see either or both.  */
9182   if (opd_section != SHN_UNDEF)
9183     {
9184       // Map ".sys_foo" to "sys_foo".
9185       if (name[0] == '.')
9186         name.remove_prefix(1);
9187
9188       // Make sure we don't create duplicate func_info's
9189       auto er = map_by_addr.equal_range(addr);
9190       for (auto it = er.first; it != er.second; ++it)
9191         if (it->second->name == name)
9192           return;
9193     }
9194
9195   func_info *fi = new func_info();
9196   fi->entrypc = entrypc;
9197   fi->addr = addr;
9198   fi->name = name;
9199   fi->weak = weak;
9200   fi->descriptor = descriptor;
9201
9202   map_by_name.insert(make_pair(fi->name, fi));
9203   map_by_addr.insert(make_pair(addr, fi));
9204 }
9205
9206 void
9207 symbol_table::prepare_section_rejection(Dwfl_Module *mod __attribute__ ((unused)))
9208 {
9209   Dwarf_Addr bias;
9210   Elf* elf = (dwarf_getelf (dwfl_module_getdwarf (mod, &bias))
9211               ?: dwfl_module_getelf (mod, &bias));
9212
9213   GElf_Ehdr ehdr_mem;
9214   GElf_Ehdr* em = gelf_getehdr (elf, &ehdr_mem);
9215   if (em == NULL) throw SEMANTIC_ERROR (_("Couldn't get elf header"));
9216
9217   /* Only old ELFv1 PPC64 ABI have function descriptors.  */
9218   opd_section = SHN_UNDEF;
9219   if (em->e_machine != EM_PPC64 || (em->e_flags & EF_PPC64_ABI) == 2)
9220     return;
9221
9222   /*
9223    * The .opd section contains function descriptors that can look
9224    * just like function entry points.  For example, there's a function
9225    * descriptor called "do_exit" that links to the entry point ".do_exit".
9226    * Reject all symbols in .opd.
9227    */
9228   Elf_Scn* scn = 0;
9229   size_t shstrndx;
9230
9231   if (!elf)
9232     return;
9233   if (elf_getshdrstrndx (elf, &shstrndx) != 0)
9234     return;
9235   while ((scn = elf_nextscn(elf, scn)) != NULL)
9236     {
9237       GElf_Shdr shdr_mem;
9238       GElf_Shdr *shdr = gelf_getshdr(scn, &shdr_mem);
9239       if (!shdr)
9240         continue;
9241       const char *name = elf_strptr(elf, shstrndx, shdr->sh_name);
9242       if (!strcmp(name, ".opd"))
9243         {
9244           opd_section = elf_ndxscn(scn);
9245           return;
9246         }
9247     }
9248 }
9249
9250 bool
9251 symbol_table::reject_section(GElf_Word section)
9252 {
9253   if (section == SHN_UNDEF || section == opd_section)
9254     return true;
9255   return false;
9256 }
9257
9258 enum info_status
9259 symbol_table::get_from_elf()
9260 {
9261   Dwfl_Module *mod = mod_info->mod;
9262   int syments = dwfl_module_getsymtab(mod);
9263   assert(syments);
9264   prepare_section_rejection(mod);
9265
9266   for (int i = 1; i < syments; ++i)
9267     {
9268       GElf_Sym sym;
9269       GElf_Word section;
9270       GElf_Addr addr;
9271       bool reject;
9272
9273 /* Note that dwfl_module_getsym does adjust the sym.st_value but doesn't
9274    try to resolve it to a function address.  dwfl_module_getsym_info leaves
9275    the st_value in tact (no adjustment applied) and returns the fully
9276    resolved address separately. In that case we can simply reject the
9277    symbol if it is SHN_UNDEF and don't need to call reject_section which
9278    does extra checks to see whether the address fall in an architecture
9279    specific descriptor table (which will never be the case when using the
9280    new dwfl_module_getsym_info).  dwfl_module_getsym will only provide us
9281    with the (adjusted) st_value of the symbol, which might point into a
9282    function descriptor table. So in that case we still have to call
9283    reject_section. */
9284 #if _ELFUTILS_PREREQ (0, 158)
9285       const char* n = dwfl_module_getsym_info (mod, i, &sym, &addr, &section,
9286                                       NULL, NULL);
9287       reject = section == SHN_UNDEF;
9288 #else
9289       const char* n = dwfl_module_getsym (mod, i, &sym, &section);
9290       addr = sym.st_value;
9291       reject = reject_section(section);
9292 #endif
9293       if (! n)
9294         continue;
9295       interned_string name = n;
9296
9297       Dwarf_Addr entrypc = addr;
9298       if (GELF_ST_TYPE(sym.st_info) == STT_FUNC)
9299         add_symbol(name, (GELF_ST_BIND(sym.st_info) == STB_WEAK),
9300                    reject, addr, entrypc);
9301       if (GELF_ST_TYPE(sym.st_info) == STT_OBJECT
9302           && GELF_ST_BIND(sym.st_info) == STB_GLOBAL)
9303         globals[name] = addr;
9304       if (GELF_ST_TYPE(sym.st_info) == STT_OBJECT
9305           && GELF_ST_BIND(sym.st_info) == STB_LOCAL)
9306         locals[name] = addr;
9307     }
9308   return info_present;
9309 }
9310
9311 func_info *
9312 symbol_table::get_func_containing_address(Dwarf_Addr addr)
9313 {
9314   auto iter = map_by_addr.upper_bound(addr);
9315   if (iter == map_by_addr.begin())
9316     return NULL;
9317   else
9318     return (--iter)->second;
9319 }
9320
9321 func_info *
9322 symbol_table::get_first_func()
9323 {
9324   auto iter = map_by_addr.begin();
9325   return (iter)->second;
9326 }
9327
9328 /* Note this function filters out any symbols that are "rejected" because
9329    they are "descriptor" function symbols or SHN_UNDEF symbols. */
9330 set <func_info*>
9331 symbol_table::lookup_symbol(interned_string name)
9332 {
9333   set<func_info*> fis;
9334   auto ret = map_by_name.equal_range(name);
9335   for (auto it = ret.first; it != ret.second; ++it)
9336     if (! it->second->descriptor)
9337       fis.insert(it->second);
9338   return fis;
9339 }
9340
9341 /* Filters out the same "descriptor" or SHN_UNDEF symbols as
9342    symbol_table::lookup_symbol.  */
9343 set <Dwarf_Addr>
9344 symbol_table::lookup_symbol_address(interned_string name)
9345 {
9346   set <Dwarf_Addr> addrs;
9347   set <func_info*> fis = lookup_symbol(name);
9348
9349   for (auto it=fis.begin(); it!=fis.end(); ++it)
9350     addrs.insert((*it)->addr);
9351
9352   return addrs;
9353 }
9354
9355 // This is the kernel symbol table.  The kernel macro cond_syscall creates
9356 // a weak symbol for each system call and maps it to sys_ni_syscall.
9357 // For system calls not implemented elsewhere, this weak symbol shows up
9358 // in the kernel symbol table.  Following the precedent of dwarfful stap,
9359 // we refuse to consider such symbols.  Here we delete them from our
9360 // symbol table.
9361 // TODO: Consider generalizing this and/or making it part of blocklist
9362 // processing.
9363 void
9364 symbol_table::purge_syscall_stubs()
9365 {
9366   set<Dwarf_Addr> addrs = lookup_symbol_address("sys_ni_syscall");
9367   if (addrs.empty())
9368     return;
9369
9370   /* Highly unlikely that multiple symbols named "sys_ni_syscall" may exist */
9371   if (addrs.size() > 1)
9372     cerr << _("Multiple 'sys_ni_syscall' symbols found.\n");
9373   Dwarf_Addr stub_addr = * addrs.begin();
9374
9375   auto purge_range = map_by_addr.equal_range(stub_addr);
9376   for (auto iter = purge_range.first;
9377        iter != purge_range.second;
9378        )
9379     {
9380       func_info *fi = iter->second;
9381       if (fi->weak && fi->name != "sys_ni_syscall")
9382         {
9383           map_by_name.erase(fi->name);
9384           map_by_addr.erase(iter++);
9385           delete fi;
9386         }
9387       else
9388         iter++;
9389     }
9390 }
9391
9392 void
9393 module_info::get_symtab()
9394 {
9395   if (symtab_status != info_unknown)
9396     return;
9397
9398   sym_table = new symbol_table(this);
9399   if (!elf_path.empty())
9400     {
9401       symtab_status = sym_table->get_from_elf();
9402     }
9403   else
9404     {
9405       assert(name == TOK_KERNEL);
9406       symtab_status = info_absent;
9407       cerr << _("Error: Cannot find vmlinux.") << endl;;
9408     }
9409   if (symtab_status == info_absent)
9410     {
9411       delete sym_table;
9412       sym_table = NULL;
9413       return;
9414     }
9415
9416   if (name == TOK_KERNEL)
9417     sym_table->purge_syscall_stubs();
9418 }
9419
9420 // update_symtab reconciles data between the elf symbol table and the dwarf
9421 // function enumeration.  It updates the symbol table entries with the dwarf
9422 // die that describes the function, which also signals to query_module_symtab
9423 // that a statement probe isn't needed.  In return, it also adds aliases to the
9424 // function table for names that share the same addr/die.
9425 void
9426 module_info::update_symtab(cu_function_cache_t *funcs)
9427 {
9428   if (!sym_table)
9429     return;
9430
9431   cu_function_cache_t new_funcs;
9432
9433   for (auto func = funcs->begin();
9434        func != funcs->end(); func++)
9435     {
9436       // optimization: inlines will never be in the symbol table
9437       if (dwarf_func_inline(&func->second) != 0)
9438         {
9439           inlined_funcs.insert(func->first);
9440           continue;
9441         }
9442
9443       // We need to make additional efforts to match mangled elf names to dwarf
9444       // too.  DW_AT_linkage_name (or w/ MIPS) can help, but that's sometimes
9445       // missing, so we may also need to try matching by address.  See also the
9446       // notes about _Z in dwflpp::iterate_over_functions().
9447       interned_string name = dwarf_linkage_name(&func->second) ?: func->first;
9448
9449       set<func_info*> fis = sym_table->lookup_symbol(name);
9450       if (fis.empty())
9451         continue;
9452
9453       for (auto fi = fis.begin(); fi!=fis.end(); ++fi)
9454         {
9455           // iterate over all functions at the same address
9456           auto er = sym_table->map_by_addr.equal_range((*fi)->addr);
9457           for (auto it = er.first; it != er.second; ++it)
9458             {
9459               // update this function with the dwarf die
9460               it->second->die = func->second;
9461
9462               // if this function is a new alias, then
9463               // save it to merge into the function cache
9464               if (it->second != *fi)
9465                 new_funcs.insert(make_pair(it->second->name, it->second->die));
9466             }
9467         }
9468     }
9469
9470   // add all discovered aliases back into the function cache
9471   // NB: this won't replace any names that dwarf may have already found
9472   funcs->insert(new_funcs.begin(), new_funcs.end());
9473 }
9474
9475 module_info::~module_info()
9476 {
9477   if (sym_table)
9478     delete sym_table;
9479 }
9480
9481 // ------------------------------------------------------------------------
9482 // user-space probes
9483 // ------------------------------------------------------------------------
9484
9485
9486 struct uprobe_derived_probe_group: public generic_dpg<uprobe_derived_probe>
9487 {
9488 private:
9489   string make_pbm_key (uprobe_derived_probe* p) {
9490     return (string)p->path + "|" + (string)p->module + "|" + (string)p->section + "|" + (string)lex_cast(p->pid);
9491   }
9492
9493   void emit_module_maxuprobes (systemtap_session& s);
9494
9495   // Using our own utrace-based uprobes
9496   void emit_module_utrace_decls (systemtap_session& s);
9497   void emit_module_utrace_init (systemtap_session& s);
9498   void emit_module_utrace_exit (systemtap_session& s);
9499
9500   // Using the upstream inode-based uprobes
9501   void emit_module_inode_decls (systemtap_session& s);
9502   void emit_module_inode_init (systemtap_session& s);
9503   void emit_module_inode_refresh (systemtap_session& s);
9504   void emit_module_inode_exit (systemtap_session& s);
9505
9506   // Using the dyninst backend (via stapdyn)
9507   void emit_module_dyninst_decls (systemtap_session& s);
9508   void emit_module_dyninst_init (systemtap_session& s);
9509   void emit_module_dyninst_exit (systemtap_session& s);
9510
9511   // Perf support
9512   unsigned max_perf_counters;
9513   void emit_module_perf_read_handlers (systemtap_session& s);
9514
9515 public:
9516   uprobe_derived_probe_group(): max_perf_counters(0) {}
9517
9518   void emit_module_decls (systemtap_session& s);
9519   void emit_module_init (systemtap_session& s);
9520   void emit_module_refresh (systemtap_session& s);
9521   void emit_module_exit (systemtap_session& s);
9522
9523   // on-the-fly only supported for inode-uprobes
9524   bool otf_supported (systemtap_session& s)
9525     { return !s.runtime_usermode_p()
9526              && kernel_supports_inode_uprobes(s); }
9527
9528   // workqueue manipulation is safe in uprobes
9529   bool otf_safe_context (systemtap_session& s)
9530     { return otf_supported(s); }
9531
9532   friend bool sort_for_bpf(systemtap_session& s,
9533                            uprobe_derived_probe_group *upg,
9534                            sort_for_bpf_probe_arg_vector &v);
9535 };
9536
9537
9538 void
9539 uprobe_derived_probe::join_group (systemtap_session& s)
9540 {
9541   if (! s.uprobe_derived_probes)
9542     s.uprobe_derived_probes = new uprobe_derived_probe_group ();
9543   s.uprobe_derived_probes->enroll (this);
9544   this->group = s.uprobe_derived_probes;
9545
9546   if (s.runtime_usermode_p())
9547     enable_dynprobes(s);
9548   else
9549     enable_task_finder(s);
9550
9551   // Ask buildrun.cxx to build extra module if needed, and
9552   // signal staprun to load that module.  If we're using the builtin
9553   // inode-uprobes, we still need to know that it is required.
9554   s.need_uprobes = true;
9555 }
9556
9557
9558 void
9559 uprobe_derived_probe::getargs(std::list<std::string> &arg_set) const
9560 {
9561   dwarf_derived_probe::getargs(arg_set);
9562   arg_set.insert(arg_set.end(), args.begin(), args.end());
9563 }
9564
9565
9566 void
9567 uprobe_derived_probe::saveargs(int nargs)
9568 {
9569   for (int i = 1; i <= nargs; i++)
9570     args.push_back("$arg" + lex_cast (i) + ":long");
9571 }
9572
9573
9574 void
9575 uprobe_derived_probe::emit_privilege_assertion (translator_output* o)
9576 {
9577   // These probes are allowed for unprivileged users, but only in the
9578   // context of processes which they own.
9579   emit_process_owner_assertion (o);
9580 }
9581
9582
9583 void
9584 uprobe_derived_probe::emit_perf_read_handler (systemtap_session &s,
9585                                               unsigned idx)
9586 {
9587   if (perf_counter_refs.size())
9588     {
9589       unsigned ref_idx = 0;
9590       s.op->newline() << "static void stap_perf_read_handler_" << idx
9591                       << "(long *values) {";
9592       s.op->indent(1);
9593
9594       for (auto pcii = perf_counter_refs.begin();
9595            pcii != perf_counter_refs.end();
9596            pcii++)
9597         {
9598           // Find the associated perf.counter probe
9599           unsigned i = 0;
9600           for (auto it=s.perf_counters.begin() ;
9601                it != s.perf_counters.end();
9602                it++, i++)
9603             {
9604               if ((*it).first == (*pcii))
9605                 {
9606                   s.op->newline() << "values[" << ref_idx
9607                                   << "] = _stp_perf_read(smp_processor_id(),"
9608                                   << i << ");";
9609                   ref_idx++;
9610                   break;
9611                 }
9612             }
9613         }
9614       s.op->newline() << "return;";
9615       s.op->newline(-1) << "}";
9616     }
9617 }
9618
9619 struct uprobe_builder: public derived_probe_builder
9620 {
9621   uprobe_builder() {}
9622   virtual void build(systemtap_session & sess,
9623                      probe * base,
9624                      probe_point * location,
9625                      literal_map_t const & parameters,
9626                      vector<derived_probe *> & finished_results)
9627   {
9628     int64_t process, address;
9629
9630     if (kernel_supports_inode_uprobes(sess))
9631       throw SEMANTIC_ERROR (_("absolute process probes not available [man error::inode-uprobes]"));
9632
9633     bool b1 = get_param (parameters, TOK_PROCESS, process);
9634     (void) b1;
9635     bool b2 = get_param (parameters, TOK_STATEMENT, address);
9636     (void) b2;
9637     bool rr = has_null_param (parameters, TOK_RETURN);
9638     assert (b1 && b2); // by pattern_root construction
9639
9640     finished_results.push_back(new uprobe_derived_probe(base, location, process, address, rr));
9641   }
9642
9643   virtual string name() { return "uprobe builder"; }
9644 };
9645
9646
9647 void
9648 uprobe_derived_probe_group::emit_module_maxuprobes (systemtap_session& s)
9649 {
9650   // We'll probably need at least this many:
9651   unsigned minuprobes = probes.size();
9652   // .. but we don't want so many that .bss is inflated (PR10507):
9653   unsigned uprobesize = 64;
9654   unsigned maxuprobesmem = 10*1024*1024; // 10 MB
9655   unsigned maxuprobes = maxuprobesmem / uprobesize;
9656
9657   // Let's choose a value on the geometric middle.  This should end up
9658   // between minuprobes and maxuprobes.  It's OK if this number turns
9659   // out to be < minuprobes or > maxuprobes.  At worst, we get a
9660   // run-time error of one kind (too few: missed uprobe registrations)
9661   // or another (too many: vmalloc errors at module load time).
9662   unsigned default_maxuprobes = (unsigned)sqrt((double)minuprobes * (double)maxuprobes);
9663
9664   s.op->newline() << "#ifndef MAXUPROBES";
9665   s.op->newline() << "#define MAXUPROBES " << default_maxuprobes;
9666   s.op->newline() << "#endif";
9667 }
9668
9669
9670 void
9671 uprobe_derived_probe_group::emit_module_perf_read_handlers (systemtap_session& s)
9672 {
9673   // If we're using perf counters, output the handler function(s)
9674   // before the actual uprobe probe handler function.
9675   for (unsigned i=0; i<probes.size(); i++)
9676     {
9677       uprobe_derived_probe *p = probes[i];
9678       p->emit_perf_read_handler(s, i);
9679     }
9680 }
9681
9682
9683 void
9684 udpg_entryfn_prologue_declaration_callback (systemtap_session& s, void* data)
9685 {
9686   unsigned nvalues = (unsigned)(unsigned long)data;
9687   if (nvalues > 0)
9688     {
9689       // Note that only gurus can exceed the maximum number of perf
9690       // values used in 1 probe. Since we store the perf values on
9691       // the stack, we can't have too many.
9692       if (!s.guru_mode && nvalues > 16)
9693         throw SEMANTIC_ERROR(_F("Too many simultaneous uses of perf values (%d is greater than 16)",
9694                                 nvalues));
9695       s.op->newline() << "long perf_read_values[" << nvalues << "];";
9696     }
9697 }
9698
9699
9700 void
9701 udpg_entryfn_prologue_pre_context_callback (systemtap_session& s, void* data)
9702 {
9703   unsigned nvalues = (unsigned)(unsigned long)data;
9704   if (nvalues == 0 || s.runtime_usermode_p())
9705     return;
9706
9707   if (kernel_supports_inode_uprobes (s))
9708     {
9709       s.op->newline() << "if (sup->perf_read_handler)";
9710       s.op->newline(1) << "sup->perf_read_handler(perf_read_values);";
9711       s.op->indent(-1);
9712     }
9713   else
9714     {
9715       s.op->newline() << "if (sups->perf_read_handler)";
9716       s.op->newline(1) << "sups->perf_read_handler(perf_read_values);";
9717       s.op->indent(-1);
9718     }
9719 }
9720
9721
9722 void
9723 uprobe_derived_probe_group::emit_module_utrace_decls (systemtap_session& s)
9724 {
9725   if (probes.empty()) return;
9726   s.op->newline() << "/* ---- utrace uprobes ---- */";
9727   // If uprobes isn't in the kernel, pull it in from the runtime.
9728
9729   s.op->newline() << "#if defined(CONFIG_UPROBES) || defined(CONFIG_UPROBES_MODULE)";
9730   s.op->newline() << "#include <linux/uprobes.h>";
9731   s.op->newline() << "#else";
9732   s.op->newline() << "#include \"linux/uprobes/uprobes.h\"";
9733   s.op->newline() << "#endif";
9734   s.op->newline() << "#ifndef UPROBES_API_VERSION";
9735   s.op->newline() << "#define UPROBES_API_VERSION 1";
9736   s.op->newline() << "#endif";
9737
9738   emit_module_maxuprobes (s);
9739   emit_module_perf_read_handlers(s);
9740
9741   // Forward decls
9742   s.op->newline() << "#include \"linux/uprobes-common.h\"";
9743
9744   // In .bss, the shared pool of uprobe/uretprobe structs.  These are
9745   // too big to embed in the initialized .data stap_uprobe_spec array.
9746   // XXX: consider a slab cache or somesuch for stap_uprobes
9747   s.op->newline() << "static struct stap_uprobe stap_uprobes [MAXUPROBES];";
9748   s.op->newline() << "static DEFINE_MUTEX(stap_uprobes_lock);"; // protects against concurrent registration/unregistration
9749
9750   s.op->assert_0_indent();
9751
9752   // Assign task-finder numbers as we build up the stap_uprobe_tf table.
9753   // This means we process probes[] in two passes.
9754   map <string,unsigned> module_index;
9755   unsigned module_index_ctr = 0;
9756
9757   // not const since embedded task_finder_target struct changes
9758   s.op->newline() << "static struct stap_uprobe_tf stap_uprobe_finders[] = {";
9759   s.op->indent(1);
9760   for (unsigned i=0; i<probes.size(); i++)
9761     {
9762       uprobe_derived_probe *p = probes[i];
9763       string pbmkey = make_pbm_key (p);
9764       if (module_index.find (pbmkey) == module_index.end())
9765         {
9766           module_index[pbmkey] = module_index_ctr++;
9767
9768           s.op->newline() << "{";
9769           // NB: it's essential that make_pbm_key() use all of and
9770           // only the same fields as we're about to emit.
9771           s.op->line() << " .finder={";
9772           s.op->line() << "  .purpose=\"uprobes\",";
9773
9774           if (p->pid != 0)
9775             s.op->line() << " .pid=" << p->pid << ",";
9776
9777           if (p->section == "") // .statement(addr).absolute
9778             s.op->line() << " .callback=&stap_uprobe_process_found,";
9779           else if (p->section == ".absolute") // proxy for ET_EXEC -> exec()'d program
9780             {
9781               s.op->line() << " .procname=" << lex_cast_qstring(p->module) << ",";
9782               s.op->line() << " .callback=&stap_uprobe_process_found,";
9783             }
9784           else if (p->section != ".absolute") // ET_DYN
9785             {
9786               // XXX: process("buildid").library("buildid") not supported?
9787               if (p->has_library)
9788                 s.op->line() << " .procname=\"" << p->path << "\", ";
9789               s.op->line() << " .mmap_callback=&stap_uprobe_mmap_found, ";
9790               s.op->line() << " .munmap_callback=&stap_uprobe_munmap_found, ";
9791               s.op->line() << " .callback=&stap_uprobe_process_munmap,";
9792             }
9793           s.op->line() << " },";
9794           if (p->module != "")
9795             s.op->line() << " .pathname=" << lex_cast_qstring(p->module) << ", ";
9796           s.op->line() << " },";
9797         }
9798       else
9799         { } // skip it in this pass, already have a suitable stap_uprobe_tf slot for it.
9800     }
9801   s.op->newline(-1) << "};";
9802
9803   s.op->assert_0_indent();
9804
9805   unsigned pci;
9806   for (pci=0; pci<probes.size(); pci++)
9807     {
9808       // List of perf counters used by each probe
9809       // This list is an index into struct stap_perf_probe,
9810       uprobe_derived_probe *p = probes[pci];
9811       s.op->newline() << "long perf_counters_" + lex_cast(pci) + "[] = {";
9812       for (auto pcii = p->perf_counter_refs.begin();
9813            pcii != p->perf_counter_refs.end(); pcii++)
9814         {
9815           unsigned i = 0;
9816           // Find the associated perf.counter probe
9817           for (auto it = s.perf_counters.begin();
9818                it != s.perf_counters.end(); it++, i++)
9819             if ((*it).first == (*pcii))
9820               break;
9821           s.op->line() << lex_cast(i) << ", ";
9822         }
9823       s.op->newline() << "};";
9824     }
9825
9826    // NB: read-only structure
9827   s.op->newline() << "static const struct stap_uprobe_spec stap_uprobe_specs [] = {";
9828   s.op->indent(1);
9829   for (unsigned i =0; i<probes.size(); i++)
9830     {
9831       uprobe_derived_probe* p = probes[i];
9832       s.op->newline() << "{";
9833       string key = make_pbm_key (p);
9834       unsigned value = module_index[key];
9835       if (value != 0)
9836         s.op->line() << " .tfi=" << value << ",";
9837       s.op->line() << " .address=(unsigned long)0x" << hex << p->addr << dec << "ULL,";
9838       s.op->line() << " .probe=" << common_probe_init (p) << ",";
9839
9840       if (p->sdt_semaphore_addr != 0)
9841         s.op->line() << " .sdt_sem_offset=(unsigned long)0x"
9842                      << hex << p->sdt_semaphore_addr << dec << "ULL,";
9843
9844       // Don't bother emit if array is empty.
9845       if (p->perf_counter_refs.size())
9846         {
9847           s.op->line() << " .perf_counters_dim=ARRAY_SIZE(perf_counters_" << lex_cast(i) << "),";
9848           // List of perf counters used by a probe from above
9849           s.op->line() << " .perf_counters=perf_counters_" + lex_cast(i) + ",";
9850           s.op->line() << " .perf_read_handler=&stap_perf_read_handler_"
9851               + lex_cast(i) + ",";
9852         }
9853       if (p->has_return)
9854         s.op->line() << " .return_p=1,";
9855       s.op->line() << " },";
9856     }
9857   s.op->newline(-1) << "};";
9858
9859   s.op->assert_0_indent();
9860
9861   s.op->newline() << "static void enter_uprobe_probe (struct uprobe *inst, struct pt_regs *regs) {";
9862   s.op->newline(1) << "struct stap_uprobe *sup = container_of(inst, struct stap_uprobe, up);";
9863   s.op->newline() << "const struct stap_uprobe_spec *sups = &stap_uprobe_specs [sup->spec_index];";
9864   common_probe_entryfn_prologue (s, "STAP_SESSION_RUNNING", "", "sups->probe",
9865                                  "stp_probe_type_uprobe", true,
9866                                  udpg_entryfn_prologue_declaration_callback,
9867                                  udpg_entryfn_prologue_pre_context_callback,
9868                                  (void *)(unsigned long)max_perf_counters);
9869   s.op->newline() << "if (sup->spec_index < 0 || "
9870                   << "sup->spec_index >= " << probes.size() << ") {";
9871   s.op->newline(1) << "_stp_error (\"bad spec_index %d (max " << probes.size()
9872                    << "): %s\", sup->spec_index, c->probe_point);";
9873   s.op->newline() << "goto probe_epilogue;";
9874   s.op->newline(-1) << "}";
9875   s.op->newline() << "c->uregs = regs;";
9876   s.op->newline() << "c->user_mode_p = 1;";
9877
9878   // assign values to something in context
9879   if (s.perf_counters.size())
9880     s.op->newline() << "c->perf_read_values = perf_read_values;";
9881
9882   // Make it look like the IP is set as it would in the actual user
9883   // task when calling real probe handler. Reset IP regs on return, so
9884   // we don't confuse uprobes. PR10458
9885   s.op->newline() << "{";
9886   s.op->indent(1);
9887   s.op->newline() << "unsigned long uprobes_ip = REG_IP(c->uregs);";
9888   s.op->newline() << "SET_REG_IP(regs, inst->vaddr);";
9889   s.op->newline() << "(*sups->probe->ph) (c);";
9890   s.op->newline() << "SET_REG_IP(regs, uprobes_ip);";
9891   s.op->newline(-1) << "}";
9892
9893   common_probe_entryfn_epilogue (s, true, otf_safe_context(s));
9894   s.op->newline(-1) << "}";
9895
9896   s.op->newline() << "static void enter_uretprobe_probe (struct uretprobe_instance *inst, struct pt_regs *regs) {";
9897   s.op->newline(1) << "struct stap_uprobe *sup = container_of(inst->rp, struct stap_uprobe, urp);";
9898   s.op->newline() << "const struct stap_uprobe_spec *sups = &stap_uprobe_specs [sup->spec_index];";
9899   common_probe_entryfn_prologue (s, "STAP_SESSION_RUNNING", "", "sups->probe",
9900                                  "stp_probe_type_uretprobe", true,
9901                                  udpg_entryfn_prologue_declaration_callback,
9902                                  udpg_entryfn_prologue_pre_context_callback,
9903                                  (void *)(unsigned long)max_perf_counters);
9904   s.op->newline() << "c->ips.ri = inst;";
9905   s.op->newline() << "if (sup->spec_index < 0 || "
9906                   << "sup->spec_index >= " << probes.size() << ") {";
9907   s.op->newline(1) << "_stp_error (\"bad spec_index %d (max " << probes.size()
9908                    << "): %s\", sup->spec_index, c->probe_point);";
9909   s.op->newline() << "goto probe_epilogue;";
9910   s.op->newline(-1) << "}";
9911
9912   s.op->newline() << "c->uregs = regs;";
9913   s.op->newline() << "c->user_mode_p = 1;";
9914
9915   // assign values to something in context
9916   if (s.perf_counters.size())
9917     s.op->newline() << "c->perf_read_values = perf_read_values;";
9918
9919   // Make it look like the IP is set as it would in the actual user
9920   // task when calling real probe handler. Reset IP regs on return, so
9921   // we don't confuse uprobes. PR10458
9922   s.op->newline() << "{";
9923   s.op->indent(1);
9924   s.op->newline() << "unsigned long uprobes_ip = REG_IP(c->uregs);";
9925   s.op->newline() << "SET_REG_IP(regs, inst->ret_addr);";
9926   s.op->newline() << "(*sups->probe->ph) (c);";
9927   s.op->newline() << "SET_REG_IP(regs, uprobes_ip);";
9928   s.op->newline(-1) << "}";
9929
9930   common_probe_entryfn_epilogue (s, true, otf_safe_context(s));
9931   s.op->newline(-1) << "}";
9932
9933   s.op->newline();
9934   s.op->newline() << "#include \"linux/uprobes-common.c\"";
9935   s.op->newline();
9936 }
9937
9938
9939 void
9940 uprobe_derived_probe_group::emit_module_utrace_init (systemtap_session& s)
9941 {
9942   if (probes.empty()) return;
9943
9944   s.op->newline() << "/* ---- utrace uprobes ---- */";
9945
9946   s.op->newline() << "for (j=0; j<MAXUPROBES; j++) {";
9947   s.op->newline(1) << "struct stap_uprobe *sup = & stap_uprobes[j];";
9948   s.op->newline() << "sup->spec_index = -1;"; // free slot
9949   // NB: we assume the rest of the struct (specificaly, sup->up) is
9950   // initialized to zero.  This is so that we can use
9951   // sup->up->kdata = NULL for "really free!"  PR 6829.
9952   s.op->newline(-1) << "}";
9953   s.op->newline() << "mutex_init (& stap_uprobes_lock);";
9954
9955   // Set up the task_finders
9956   s.op->newline() << "for (i=0; i<sizeof(stap_uprobe_finders)/sizeof(stap_uprobe_finders[0]); i++) {";
9957   s.op->newline(1) << "struct stap_uprobe_tf *stf = & stap_uprobe_finders[i];";
9958   s.op->newline() << "probe_point = stf->pathname;"; // for error messages; XXX: would prefer pp() or something better
9959   s.op->newline() << "rc = stap_register_task_finder_target (& stf->finder);";
9960
9961   // NB: if (rc), there is no need (XXX: nor any way) to clean up any
9962   // finders already registered, since mere registration does not
9963   // cause any utrace or memory allocation actions.  That happens only
9964   // later, once the task finder engine starts running.  So, for a
9965   // partial initialization requiring unwind, we need do nothing.
9966   s.op->newline() << "if (rc) break;";
9967
9968   s.op->newline(-1) << "}";
9969 }
9970
9971
9972 void
9973 uprobe_derived_probe_group::emit_module_utrace_exit (systemtap_session& s)
9974 {
9975   if (probes.empty()) return;
9976   s.op->newline() << "/* ---- utrace uprobes ---- */";
9977
9978   // NB: there is no stap_unregister_task_finder_target call;
9979   // important stuff like utrace cleanups are done by
9980   // __stp_task_finder_cleanup() via stap_stop_task_finder().
9981   //
9982   // This function blocks until all callbacks are completed, so there
9983   // is supposed to be no possibility of any registration-related code starting
9984   // to run in parallel with our shutdown here.  So we don't need to protect the
9985   // stap_uprobes[] array with the mutex.
9986
9987   s.op->newline() << "for (j=0; j<MAXUPROBES; j++) {";
9988   s.op->newline(1) << "struct stap_uprobe *sup = & stap_uprobes[j];";
9989   s.op->newline() << "const struct stap_uprobe_spec *sups = &stap_uprobe_specs [sup->spec_index];";
9990   s.op->newline() << "if (sup->spec_index < 0) continue;"; // free slot
9991
9992   // PR10655: decrement that ENABLED semaphore
9993   s.op->newline() << "if (sup->sdt_sem_address) {";
9994   s.op->newline(1) << "unsigned short sdt_semaphore;"; // NB: fixed size
9995   s.op->newline() << "pid_t pid = (sups->return_p ? sup->urp.u.pid : sup->up.pid);";
9996   s.op->newline() << "struct task_struct *tsk;";
9997   s.op->newline() << "rcu_read_lock();";
9998
9999   // Do a pid->task_struct* lookup.  For 2.6.24+, this code assumes
10000   // that the pid is always in the global namespace, not in any
10001   // private namespace.
10002   // We'd like to call find_task_by_pid_ns() here, but it isn't
10003   // exported.  So, we call what it calls...
10004   s.op->newline() << "  tsk = pid_task(find_pid_ns(pid, &init_pid_ns), PIDTYPE_PID);";
10005
10006   s.op->newline() << "if (tsk) {"; // just in case the thing exited while we weren't watching
10007   s.op->newline(1) << "if (__access_process_vm_noflush(tsk, sup->sdt_sem_address, &sdt_semaphore, sizeof(sdt_semaphore), 0)) {";
10008   s.op->newline(1) << "sdt_semaphore --;";
10009   s.op->newline() << "#ifdef DEBUG_UPROBES";
10010   s.op->newline() << "_stp_dbug (__FUNCTION__,__LINE__, \"-semaphore %#x @ %#lx\\n\", sdt_semaphore, sup->sdt_sem_address);";
10011   s.op->newline() << "#endif";
10012   s.op->newline() << "__access_process_vm_noflush(tsk, sup->sdt_sem_address, &sdt_semaphore, sizeof(sdt_semaphore), 1);";
10013   s.op->newline(-1) << "}";
10014   // XXX: need to analyze possibility of race condition
10015   s.op->newline(-1) << "}";
10016   s.op->newline() << "rcu_read_unlock();";
10017   s.op->newline(-1) << "}";
10018
10019   s.op->newline() << "if (sups->return_p) {";
10020   s.op->newline(1) << "#ifdef DEBUG_UPROBES";
10021   s.op->newline() << "_stp_dbug (__FUNCTION__,__LINE__, \"-uretprobe spec %d index %d pid %d addr %p\\n\", sup->spec_index, j, sup->up.pid, (void*) sup->up.vaddr);";
10022   s.op->newline() << "#endif";
10023   // NB: PR6829 does not change that we still need to unregister at
10024   // *this* time -- when the script as a whole exits.
10025   s.op->newline() << "unregister_uretprobe (& sup->urp);";
10026   s.op->newline(-1) << "} else {";
10027   s.op->newline(1) << "#ifdef DEBUG_UPROBES";
10028   s.op->newline() << "_stp_dbug (__FUNCTION__,__LINE__, \"-uprobe spec %d index %d pid %d addr %p\\n\", sup->spec_index, j, sup->up.pid, (void*) sup->up.vaddr);";
10029   s.op->newline() << "#endif";
10030   s.op->newline() << "unregister_uprobe (& sup->up);";
10031   s.op->newline(-1) << "}";
10032
10033   s.op->newline() << "sup->spec_index = -1;";
10034
10035   // XXX: uprobe missed counts?
10036
10037   s.op->newline(-1) << "}";
10038
10039   s.op->newline() << "mutex_destroy (& stap_uprobes_lock);";
10040 }
10041
10042
10043 void
10044 uprobe_derived_probe_group::emit_module_inode_decls (systemtap_session& s)
10045 {
10046   if (probes.empty()) return;
10047   s.op->newline() << "/* ---- inode uprobes ---- */";
10048   emit_module_maxuprobes (s);
10049   s.op->newline() << "#include \"linux/uprobes-inode.c\"";
10050   emit_module_perf_read_handlers(s);
10051
10052   // Write the probe handler.
10053   s.op->newline() << "static int stapiu_probe_handler "
10054                   << "(struct stapiu_consumer *sup, struct pt_regs *regs) {";
10055   s.op->newline(1);
10056
10057   // Since we're sharing the entry function, we have to dynamically choose the probe_type
10058   string probe_type = "(sup->return_p ? stp_probe_type_uretprobe : stp_probe_type_uprobe)";
10059   common_probe_entryfn_prologue (s, "STAP_SESSION_RUNNING", "", "sup->probe",
10060                                  probe_type, true,
10061                                  udpg_entryfn_prologue_declaration_callback,
10062                                  udpg_entryfn_prologue_pre_context_callback,
10063                                  (void *)(unsigned long)max_perf_counters);
10064
10065   s.op->newline() << "c->uregs = regs;";
10066   s.op->newline() << "c->user_mode_p = 1;";
10067
10068   // assign values to something in context
10069   if (s.perf_counters.size())
10070     s.op->newline() << "c->perf_read_values = perf_read_values;";
10071
10072   // NB: IP is already set by stapiu_probe_prehandler in uprobes-inode.c
10073   s.op->newline() << "(*sup->probe->ph) (c);";
10074
10075   common_probe_entryfn_epilogue (s, true, otf_safe_context(s));
10076   s.op->newline() << "return 0;";
10077   s.op->newline(-1) << "}";
10078   s.op->assert_0_indent();
10079
10080   // Declare the actual probes.
10081   unsigned pci;
10082   for (pci=0; pci<probes.size(); pci++)
10083     {
10084       // List of perf counters used by each probe
10085       // This list is an index into struct stap_perf_probe,
10086       uprobe_derived_probe *p = probes[pci];
10087       if (p->perf_counter_refs.size() == 0)
10088         continue;
10089
10090       s.op->newline() << "long perf_counters_" + lex_cast(pci) + "[] = {";
10091       for (auto pcii = p->perf_counter_refs.begin();
10092            pcii != p->perf_counter_refs.end(); pcii++)
10093         {
10094           unsigned i = 0;
10095           // Find the associated perf.counter probe
10096           for (auto it = s.perf_counters.begin();
10097                it != s.perf_counters.end(); it++, i++)
10098             if ((*it).first == (*pcii))
10099               break;
10100           s.op->line() << lex_cast(i) << ", ";
10101         }
10102       s.op->newline() << "};";
10103     }
10104
10105   s.op->newline() << "static struct stapiu_consumer "
10106                   << "stap_inode_uprobe_consumers[] = {";
10107   s.op->indent(1);
10108   for (unsigned i=0; i<probes.size(); i++)
10109     {
10110       uprobe_derived_probe *p = probes[i];
10111
10112       s.op->newline() << "{";
10113       if (p->has_return)
10114         s.op->line() << " .return_p=1,";
10115
10116       // emit the task_finder info for this uprobe
10117       // This will be duplicated amongst multiple uprobes for the same file,
10118       // so there will be some iteration within task-finder.
10119       s.op->line() << " .finder={";
10120       s.op->line() << "  .purpose=\"inode-uprobes\",";
10121
10122       if (p->pid != 0)
10123         s.op->line() << " .pid=" << p->pid << ",";
10124
10125       if (p->section == "" ||         // .statement(addr).absolute  XXX?
10126           p->section == ".absolute")  // ET_EXEC
10127         {
10128           s.op->line() << " .callback=&stapiu_process_found,";
10129           if (!p->build_id_val.empty())
10130             {
10131               s.op->line() << " .build_id=\"" << p->build_id_val << "\",";
10132               s.op->line() << " .build_id_len=" << p->build_id_val.size() << ",";
10133               s.op->line() << " .build_id_vaddr=" << p->build_id_vaddr << "ULL,";
10134             }
10135           else
10136             {
10137               s.op->line() << " .build_id_len=0,";
10138               s.op->line() << " .procname=" << lex_cast_qstring(p->module) << ",";
10139             }
10140         }
10141       else if (p->section != ".absolute") // ET_DYN
10142         {
10143           // XXX: process("buildid1").library("buildid2") probably not quite right yet
10144
10145           s.op->line() << " .mmap_callback=&stapiu_mmap_found, ";
10146           s.op->line() << " .munmap_callback=&stapiu_munmap_found, ";
10147           s.op->line() << " .callback=&stapiu_process_munmap,";
10148         }
10149       s.op->line() << " },"; // finished with the task-finder object
10150
10151       // for shared library probing, we need to configure the stapiu_consumer
10152       // rather than (just) the stapiu_consumer.finder (which deals with
10153       // tasks only).
10154       if (p->section != "" && p->section != ".absolute") // shared library or similar
10155         {
10156           if (p->build_id_val.empty())
10157             s.op->line() << " .solib_pathname=" << lex_cast_qstring(p->module) << ",";
10158           else
10159             {
10160               s.op->line() << " .solib_build_id=\"" << p->build_id_val << "\",";
10161               s.op->line() << " .solib_build_id_len=" << p->build_id_val.size() << ",";
10162               s.op->line() << " .solib_build_id_vaddr=" << p->build_id_vaddr << ",";
10163             }
10164         }
10165
10166       // add the _stp_modules[].name key
10167       s.op->line() << " .module_name=" << lex_cast_qstring(p->module) << ",";
10168
10169       // add the per-uprobe addresses
10170       s.op->line() << " .offset=(loff_t)0x" << hex << p->addr << dec << "ULL,";
10171       if (p->sdt_semaphore_addr)
10172         s.op->line() << " .sdt_sem_offset=(loff_t)0x"
10173                      << hex << p->sdt_semaphore_addr << dec << "ULL,";
10174
10175       // Don't bother emit if array is empty.
10176       if (p->perf_counter_refs.size())
10177         {
10178           s.op->line() << " .perf_counters_dim=ARRAY_SIZE(perf_counters_" << lex_cast(i) << "),";
10179           // List of perf counters used by a probe from above
10180           s.op->line() << " .perf_counters=perf_counters_" + lex_cast(i) + ",";
10181           s.op->line() << " .perf_read_handler=&stap_perf_read_handler_"
10182               + lex_cast(i) + ",";
10183         }
10184
10185       s.op->line() << " .probe=" << common_probe_init (p) << ",";
10186       s.op->line() << " },";
10187     }
10188   s.op->newline(-1) << "};";
10189   s.op->assert_0_indent();
10190 }
10191
10192
10193 void
10194 uprobe_derived_probe_group::emit_module_inode_init (systemtap_session& s)
10195 {
10196   if (probes.empty()) return;
10197   s.op->newline() << "/* ---- inode uprobes ---- */";
10198   // Let stapiu_init() handle reporting errors by setting probe_point
10199   // to NULL.
10200   s.op->newline() << "probe_point = NULL;";
10201   s.op->newline() << "rc = stapiu_init ("
10202                   << "stap_inode_uprobe_consumers, "
10203                   << "ARRAY_SIZE(stap_inode_uprobe_consumers));";
10204 }
10205
10206
10207 void
10208 uprobe_derived_probe_group::emit_module_inode_refresh (systemtap_session& s)
10209 {
10210   if (probes.empty()) return;
10211   s.op->newline() << "/* ---- inode uprobes ---- */";
10212   s.op->newline() << "stapiu_refresh ("
10213                   << "stap_inode_uprobe_consumers, "
10214                   << "ARRAY_SIZE(stap_inode_uprobe_consumers));";
10215 }
10216
10217
10218 void
10219 uprobe_derived_probe_group::emit_module_inode_exit (systemtap_session& s)
10220 {
10221   if (probes.empty()) return;
10222   s.op->newline() << "/* ---- inode uprobes ---- */";
10223   s.op->newline() << "stapiu_exit ("
10224                   << "stap_inode_uprobe_consumers, "
10225                   << "ARRAY_SIZE(stap_inode_uprobe_consumers));";
10226 }
10227
10228
10229 void
10230 uprobe_derived_probe_group::emit_module_dyninst_decls (systemtap_session& s)
10231 {
10232   if (probes.empty()) return;
10233   s.op->newline() << "/* ---- dyninst uprobes ---- */";
10234   emit_module_maxuprobes (s);
10235   s.op->newline() << "#include \"dyninst/uprobes.h\"";
10236
10237   // Let the dynprobe_derived_probe_group handle outputting targets
10238   // and probes. This allows us to merge different types of probes.
10239   s.op->newline() << "static struct stapdu_probe stapdu_probes[];";
10240   for (unsigned i = 0; i < probes.size(); i++)
10241     {
10242       uprobe_derived_probe *p = probes[i];
10243
10244       dynprobe_add_uprobe(s, p->module, p->addr, p->sdt_semaphore_addr,
10245                           (p->has_return ? "STAPDYN_PROBE_FLAG_RETURN" : "0"),
10246                           common_probe_init(p));
10247     }
10248   // loc2c-generated code assumes pt_regs are available, so use this to make
10249   // sure we always have *something* for it to dereference...
10250   s.op->newline() << "static struct pt_regs stapdu_dummy_uregs;";
10251
10252   // Write the probe handler.
10253   // NB: not static, so dyninst can find it
10254   s.op->newline() << "int enter_dyninst_uprobe "
10255                   << "(uint64_t index, struct pt_regs *regs) {";
10256   s.op->newline(1) << "struct stapdu_probe *sup = &stapdu_probes[index];";
10257
10258   // Since we're sharing the entry function, we have to dynamically choose the probe_type
10259   string probe_type = "((sup->flags & STAPDYN_PROBE_FLAG_RETURN) ?"
10260                       " stp_probe_type_uretprobe : stp_probe_type_uprobe)";
10261   common_probe_entryfn_prologue (s, "STAP_SESSION_RUNNING", "", "sup->probe",
10262                                  probe_type);
10263
10264   s.op->newline() << "c->uregs = regs ?: &stapdu_dummy_uregs;";
10265   s.op->newline() << "c->user_mode_p = 1;";
10266   // XXX: once we have regs, check how dyninst sets the IP
10267   // XXX: the way that dyninst rewrites stuff is probably going to be
10268   // ...  very confusing to our backtracer (at least if we stay in process)
10269   s.op->newline() << "(*sup->probe->ph) (c);";
10270   common_probe_entryfn_epilogue (s, true, otf_safe_context(s));
10271   s.op->newline() << "return 0;";
10272   s.op->newline(-1) << "}";
10273   s.op->newline() << "#include \"dyninst/uprobes-regs.c\"";
10274   s.op->assert_0_indent();
10275 }
10276
10277
10278 void
10279 uprobe_derived_probe_group::emit_module_dyninst_init (systemtap_session& s)
10280 {
10281   if (probes.empty()) return;
10282
10283   /* stapdyn handles the dirty work via dyninst */
10284   s.op->newline() << "/* ---- dyninst uprobes ---- */";
10285   s.op->newline() << "/* this section left intentionally blank */";
10286 }
10287
10288
10289 void
10290 uprobe_derived_probe_group::emit_module_dyninst_exit (systemtap_session& s)
10291 {
10292   if (probes.empty()) return;
10293
10294   /* stapdyn handles the dirty work via dyninst */
10295   s.op->newline() << "/* ---- dyninst uprobes ---- */";
10296   s.op->newline() << "/* this section left intentionally blank */";
10297 }
10298
10299
10300 void
10301 uprobe_derived_probe_group::emit_module_decls (systemtap_session& s)
10302 {
10303    // Here we need to figure out the max number of perf counters used
10304    // per probe.
10305   for (unsigned i=0; i<probes.size(); i++)
10306     {
10307       uprobe_derived_probe *p = probes[i];
10308       if (max_perf_counters < p->perf_counter_refs.size())
10309         max_perf_counters = p->perf_counter_refs.size();
10310     }
10311
10312   if (s.runtime_usermode_p())
10313     emit_module_dyninst_decls (s);
10314   else if (kernel_supports_inode_uprobes (s))
10315     emit_module_inode_decls (s);
10316   else
10317     emit_module_utrace_decls (s);
10318 }
10319
10320
10321 void
10322 uprobe_derived_probe_group::emit_module_init (systemtap_session& s)
10323 {
10324   if (s.runtime_usermode_p())
10325     emit_module_dyninst_init (s);
10326   else if (kernel_supports_inode_uprobes (s))
10327     emit_module_inode_init (s);
10328   else
10329     emit_module_utrace_init (s);
10330 }
10331
10332
10333 void
10334 uprobe_derived_probe_group::emit_module_refresh (systemtap_session& s)
10335 {
10336   if (!s.runtime_usermode_p() && kernel_supports_inode_uprobes (s))
10337     emit_module_inode_refresh (s);
10338 }
10339
10340
10341 void
10342 uprobe_derived_probe_group::emit_module_exit (systemtap_session& s)
10343 {
10344   if (s.runtime_usermode_p())
10345     emit_module_dyninst_exit (s);
10346   else if (kernel_supports_inode_uprobes (s))
10347     emit_module_inode_exit (s);
10348   else
10349     emit_module_utrace_exit (s);
10350 }
10351
10352 bool
10353 sort_for_bpf(systemtap_session& s  __attribute__ ((unused)),
10354              uprobe_derived_probe_group *upg, sort_for_bpf_probe_arg_vector &v)
10355 {
10356   if (!upg)
10357     return false;
10358
10359   for (auto i = upg->probes.begin(); i != upg->probes.end(); ++i)
10360     {
10361       uprobe_derived_probe *p = *i;
10362
10363       if (p->module.empty())
10364         throw SEMANTIC_ERROR(_("binary path required for BPF runtime"), p->tok);
10365
10366       if (p->has_library)
10367         throw SEMANTIC_ERROR(_("probe not compatible with BPF runtime"), p->tok);
10368
10369       std::stringstream o;
10370
10371       // format of section name: uprobe/<type>/<pid>/<offset><binary path>
10372       o << "uprobe/"
10373         << (p->has_return ? "r" : "p") << "/"
10374         << p->pid << "/"
10375         << p->addr
10376         << p->module;
10377
10378       v.push_back(std::pair<derived_probe *, std::string>(p, o.str()));
10379     }
10380
10381   return true;
10382 }
10383
10384 // ------------------------------------------------------------------------
10385 // Dwarfless kprobe derived probes
10386 // ------------------------------------------------------------------------
10387
10388 static const string TOK_KPROBE("kprobe");
10389
10390 struct kprobe_derived_probe: public generic_kprobe_derived_probe
10391 {
10392   kprobe_derived_probe (systemtap_session& sess,
10393                         vector<derived_probe *> & results,
10394                         probe *base,
10395                         probe_point *location,
10396                         interned_string module,
10397                         interned_string name,
10398                         int64_t stmt_addr,
10399                         bool has_call,
10400                         bool has_return,
10401                         bool has_statement,
10402                         bool has_maxactive,
10403                         bool has_path,
10404                         bool has_library,
10405                         int64_t maxactive_val,
10406                         const string& path,
10407                         const string& library
10408                         );
10409   bool has_call;
10410   bool has_statement;
10411   bool has_path;
10412   bool has_library;
10413   string path;
10414   string library;
10415   bool access_var;
10416   void printsig (std::ostream &o) const;
10417   void join_group (systemtap_session& s);
10418 };
10419
10420 struct kprobe_var_expanding_visitor: public var_expanding_visitor
10421 {
10422   block *add_block;
10423   block *add_call_probe; // synthesized from .return probes with saved $vars
10424   bool add_block_tid, add_call_probe_tid;
10425   bool has_return;
10426
10427   kprobe_var_expanding_visitor(systemtap_session& sess, bool has_return):
10428     var_expanding_visitor(sess), add_block(NULL), add_call_probe(NULL),
10429     add_block_tid(false), add_call_probe_tid(false),
10430     has_return(has_return) {}
10431
10432   void visit_entry_op (entry_op* e);
10433 };
10434
10435
10436 kprobe_derived_probe::kprobe_derived_probe (systemtap_session& sess,
10437                                             vector<derived_probe *> & results,
10438                                             probe *base,
10439                                             probe_point *location,
10440                                             interned_string module,
10441                                             interned_string name,
10442                                             int64_t stmt_addr,
10443                                             bool has_call,
10444                                             bool has_return,
10445                                             bool has_statement,
10446                                             bool has_maxactive,
10447                                             bool has_path,
10448                                             bool has_library,
10449                                             int64_t maxactive_val,
10450                                             const string& path,
10451                                             const string& library
10452                                             ):
10453   generic_kprobe_derived_probe (base, location,
10454                                 module, "" /* FIXME: * section */,
10455                                 stmt_addr, has_return,
10456                                 has_maxactive, maxactive_val,
10457                                 name),
10458   has_call (has_call), has_statement (has_statement),
10459   has_path (has_path), has_library (has_library),
10460   path (path), library (library)
10461 {
10462   this->tok = base->tok;
10463   this->access_var = false;
10464
10465 #ifndef USHRT_MAX
10466 #define USHRT_MAX 32767
10467 #endif
10468
10469   // Expansion of $target variables in the probe body produces an error during
10470   // translate phase, since we're not using debuginfo
10471
10472   vector<probe_point::component*> comps;
10473   comps.push_back (new probe_point::component(TOK_KPROBE));
10474
10475   if (has_statement)
10476     {
10477       comps.push_back (new probe_point::component(TOK_STATEMENT,
10478                                                   new literal_number(addr, true)));
10479       comps.push_back (new probe_point::component(TOK_ABSOLUTE));
10480     }
10481   else
10482     {
10483       size_t pos = name.find(':');
10484       if (pos != string::npos)
10485         {
10486           interned_string module = name.substr(0, pos);
10487           interned_string function = name.substr(pos + 1);
10488           comps.push_back (new probe_point::component(TOK_MODULE, new literal_string(module)));
10489           comps.push_back (new probe_point::component(TOK_FUNCTION, new literal_string(function)));
10490         }
10491       else
10492         comps.push_back (new probe_point::component(TOK_FUNCTION, new literal_string(name)));
10493     }
10494
10495   if (has_call)
10496     comps.push_back (new probe_point::component(TOK_CALL));
10497   if (has_return)
10498     comps.push_back (new probe_point::component(TOK_RETURN));
10499   if (has_maxactive)
10500     comps.push_back (new probe_point::component(TOK_MAXACTIVE, new literal_number(maxactive_val)));
10501
10502   kprobe_var_expanding_visitor v (sess, has_return);
10503   // PR25841: no need for this as kprobe.* probes don't support $context vars at all
10504   // if (sess.symbol_resolver)
10505   //   sess.symbol_resolver->current_probe = this;
10506   var_expand_const_fold_loop (sess, this->body, v);
10507
10508   // If during target-variable-expanding the probe, we added a new block
10509   // of code, add it to the start of the probe.
10510   if (v.add_block)
10511     this->body = new block(v.add_block, this->body);
10512
10513   // If when target-variable-expanding the probe, we need to
10514   // synthesize a sibling function-entry probe.  We don't go through
10515   // the whole probe derivation business (PR10642) that could lead to
10516   // wildcard/alias resolution, or for that dwarf-induced duplication.
10517   //
10518   // XXX: The dwarf_kprobe_derived_probe class has a different method
10519   // to handle these synthesized probes. It might be possible to use
10520   // the same method.
10521   if (v.add_call_probe)
10522     {
10523       assert (has_return);
10524
10525       // We temporarily replace base.
10526       statement* old_body = base->body;
10527       base->body = v.add_call_probe;
10528
10529       derived_probe *entry_handler
10530         = new kprobe_derived_probe (sess, results, base, location,
10531                                     module, name, 0, true /* has_call */,
10532                                     false /* has_return */,
10533                                     has_statement, has_maxactive, has_path,
10534                                     has_library, maxactive_val, path, library);
10535
10536       entry_handler->synthetic = true;
10537       results.push_back (entry_handler);
10538
10539       base->body = old_body;
10540     }
10541
10542   this->sole_location()->components = comps;
10543 }
10544
10545 void kprobe_derived_probe::printsig (ostream& o) const
10546 {
10547   sole_location()->print (o);
10548   o << " /* " << " name = " << symbol_name << "*/";
10549   printsig_nested (o);
10550 }
10551
10552 void kprobe_derived_probe::join_group (systemtap_session& s)
10553 {
10554   if (! s.generic_kprobe_derived_probes)
10555     s.generic_kprobe_derived_probes = new generic_kprobe_derived_probe_group ();
10556   s.generic_kprobe_derived_probes->enroll (this);
10557   this->group = s.generic_kprobe_derived_probes;
10558 }
10559
10560 struct kprobe_builder: public derived_probe_builder
10561 {
10562 public:
10563   kprobe_builder() {}
10564
10565   void build_no_more (systemtap_session &) {}
10566
10567   virtual void build(systemtap_session & sess,
10568                      probe * base,
10569                      probe_point * location,
10570                      literal_map_t const & parameters,
10571                      vector<derived_probe *> & finished_results);
10572   virtual string name() { return "kprobe builder"; }
10573 };
10574
10575
10576 string
10577 suggest_kernel_functions(const systemtap_session& session, interned_string function)
10578 {
10579   const set<interned_string>& kernel_functions = session.kernel_functions;
10580   if (function.empty() || kernel_functions.empty() || session.suppress_costly_diagnostics)
10581     return "";
10582
10583   // PR18577: There isn't any point in generating a suggestion list if
10584   // we're not going to display it.
10585   if ((session.dump_mode == systemtap_session::dump_matched_probes
10586        || session.dump_mode == systemtap_session::dump_matched_probes_vars)
10587       && session.verbose < 2)
10588     return "";
10589
10590   if (session.verbose > 2)
10591     clog << "suggesting " << kernel_functions.size() << " kernel functions" << endl;
10592
10593   return levenshtein_suggest(function, kernel_functions, 5); // print top 5 only
10594 }
10595
10596 void
10597 kprobe_builder::build(systemtap_session & sess,
10598                       probe * base,
10599                       probe_point * location,
10600                       literal_map_t const & parameters,
10601                       vector<derived_probe *> & finished_results)
10602 {
10603   interned_string function_string_val, module_string_val;
10604   interned_string path, library, path_tgt, library_tgt;
10605   int64_t statement_num_val = 0, maxactive_val = 0;
10606   bool has_function_str, has_module_str, has_statement_num;
10607   bool has_absolute, has_call, has_return, has_maxactive;
10608   bool has_path, has_library;
10609
10610   has_function_str = get_param(parameters, TOK_FUNCTION, function_string_val);
10611   has_module_str = get_param(parameters, TOK_MODULE, module_string_val);
10612   has_call = has_null_param (parameters, TOK_CALL);
10613   has_return = has_null_param (parameters, TOK_RETURN);
10614   has_maxactive = get_param(parameters, TOK_MAXACTIVE, maxactive_val);
10615   has_statement_num = get_param(parameters, TOK_STATEMENT, statement_num_val);
10616   has_absolute = has_null_param (parameters, TOK_ABSOLUTE);
10617   has_path = get_param (parameters, TOK_PROCESS, path);
10618   has_library = get_param (parameters, TOK_LIBRARY, library);
10619
10620   if (has_module_str)
10621     {
10622       // The TOK_MODULE value can be a module name, relative path to a
10623       // module filename, or an absolute path to a module
10624       // filename. Handle all those details.
10625       handle_module_token(sess, module_string_val);
10626
10627       // If we've got a fullpath to the kernel module, then get the
10628       // simple name.
10629       if (module_string_val[0] == '/')
10630         module_string_val = modname_from_path(module_string_val);
10631     }
10632   if (has_path)
10633     {
10634       path = find_executable (path, sess.sysroot, sess.sysenv);
10635       path_tgt = path_remove_sysroot(sess, path);
10636     }
10637   if (has_library)
10638     {
10639       library = find_executable (library, sess.sysroot, sess.sysenv,
10640                                  "LD_LIBRARY_PATH");
10641       library_tgt = path_remove_sysroot(sess, library);
10642     }
10643
10644   if (has_function_str)
10645     {
10646       if (has_module_str)
10647         {
10648           function_string_val = (string)module_string_val + ":" + (string)function_string_val;
10649           derived_probe *dp
10650             = new kprobe_derived_probe (sess, finished_results, base,
10651                                         location, module_string_val,
10652                                         function_string_val,
10653                                         0, has_call, has_return,
10654                                         has_statement_num, has_maxactive,
10655                                         has_path, has_library, maxactive_val,
10656                                         path_tgt, library_tgt);
10657           finished_results.push_back (dp);
10658         }
10659       else
10660         {
10661           vector<interned_string> matches;
10662
10663           // Simple names can be found directly
10664           if (function_string_val.find_first_of("*?[{") == string::npos)
10665             {
10666               if (sess.kernel_functions.count(function_string_val))
10667                 matches.push_back(function_string_val);
10668             }
10669           else // Search function name list for matching names
10670             {
10671               const string& val = csh_to_ksh(function_string_val);
10672               for (auto it = sess.kernel_functions.cbegin();
10673                    it != sess.kernel_functions.cend(); it++)
10674                 {
10675                   // fnmatch returns zero for matching.
10676                   if (fnmatch(val.c_str(), it->to_string().c_str(), FNM_EXTMATCH) == 0)
10677                     matches.push_back(*it);
10678                 }
10679             }
10680
10681           if (matches.empty())
10682             {
10683               string sugs = suggest_kernel_functions(sess, function_string_val);
10684               if (!sugs.empty())
10685                 throw SEMANTIC_ERROR (_NF("no match (similar function: %s)",
10686                                           "no match (similar functions: %s)",
10687                                           sugs.find(',') == string::npos,
10688                                           sugs.c_str()));
10689             }
10690
10691           for (auto it = matches.cbegin(); it != matches.cend(); it++)
10692             {
10693               derived_probe *dp
10694                 = new kprobe_derived_probe (sess, finished_results, base,
10695                                             location, "", *it, 0, has_call,
10696                                             has_return, has_statement_num,
10697                                             has_maxactive, has_path,
10698                                             has_library, maxactive_val,
10699                                             path_tgt, library_tgt);
10700               finished_results.push_back (dp);
10701             }
10702         }
10703     }
10704   else
10705     {
10706       // assert guru mode for absolute probes
10707       if ( has_statement_num && has_absolute && !base->privileged )
10708         throw SEMANTIC_ERROR (_("absolute statement probe in unprivileged script; need stap -g"), base->tok);
10709
10710       finished_results.push_back (new kprobe_derived_probe (sess,
10711                                                             finished_results,
10712                                                             base,
10713                                                             location,
10714                                                             module_string_val,
10715                                                             "",
10716                                                             statement_num_val,
10717                                                             has_call,
10718                                                             has_return,
10719                                                             has_statement_num,
10720                                                             has_maxactive,
10721                                                             has_path,
10722                                                             has_library,
10723                                                             maxactive_val,
10724                                                             path_tgt,
10725                                                             library_tgt));
10726     }
10727 }
10728
10729
10730 void
10731 kprobe_var_expanding_visitor::visit_entry_op (entry_op *e)
10732 {
10733   expression *repl = e;
10734
10735   if (has_return)
10736     {
10737       // see also PR20416
10738       // XXX it would be nice to use gen_kretprobe_saved_return when
10739       // available, but it requires knowing the types already, which is
10740       // problematic for arbitrary expressons.
10741       repl = gen_mapped_saved_return (sess, e->operand, "entry",
10742                                       add_block, add_block_tid,
10743                                       add_call_probe, add_call_probe_tid);
10744     }
10745   provide (repl);
10746 }
10747
10748
10749 // ------------------------------------------------------------------------
10750 //  Hardware breakpoint based probes.
10751 // ------------------------------------------------------------------------
10752
10753 static const string TOK_HWBKPT("data");
10754 static const string TOK_HWBKPT_WRITE("write");
10755 static const string TOK_HWBKPT_RW("rw");
10756 static const string TOK_LENGTH("length");
10757
10758 #define HWBKPT_READ 0
10759 #define HWBKPT_WRITE 1
10760 #define HWBKPT_RW 2
10761 struct hwbkpt_derived_probe: public derived_probe
10762 {
10763   hwbkpt_derived_probe (probe *base,
10764                         probe_point *location,
10765                         uint64_t addr,
10766                         string symname,
10767                         unsigned int len,
10768                         bool has_only_read_access,
10769                         bool has_only_write_access,
10770                         bool has_rw_access,
10771                         bool is_kernel
10772                         );
10773   Dwarf_Addr hwbkpt_addr;
10774   string symbol_name;
10775   unsigned int hwbkpt_access,hwbkpt_len;
10776   bool kernel_p;
10777
10778   void printsig (std::ostream &o) const;
10779   void join_group (systemtap_session& s);
10780 };
10781
10782 struct hwbkpt_derived_probe_group: public derived_probe_group
10783 {
10784 private:
10785   vector<hwbkpt_derived_probe*> hwbkpt_probes;
10786
10787 public:
10788   void enroll (hwbkpt_derived_probe* probe, systemtap_session& s);
10789   void emit_module_decls (systemtap_session& s);
10790   void emit_module_init (systemtap_session& s);
10791   void emit_module_exit (systemtap_session& s);
10792
10793   friend void warn_for_bpf(systemtap_session& s,
10794                            hwbkpt_derived_probe_group *dpg,
10795                            const std::string& kind);
10796 };
10797
10798 hwbkpt_derived_probe::hwbkpt_derived_probe (probe *base,
10799                                             probe_point *location,
10800                                             uint64_t addr,
10801                                             string symname,
10802                                             unsigned int len,
10803                                             bool has_only_read_access,
10804                                             bool has_only_write_access,
10805                                             bool,
10806                                             bool is_kernel):
10807   derived_probe (base, location, true /* .components soon rewritten */ ),
10808   hwbkpt_addr (addr),
10809   symbol_name (symname),
10810   hwbkpt_len (len),
10811   kernel_p(is_kernel)
10812 {
10813   this->tok = base->tok;
10814
10815   vector<probe_point::component*> comps;
10816   comps.push_back (new probe_point::component(TOK_KERNEL));
10817
10818   if (hwbkpt_addr)
10819     comps.push_back (new probe_point::component (TOK_HWBKPT,
10820                                                  new literal_number(hwbkpt_addr, true)));
10821   else if (symbol_name.size())
10822     comps.push_back (new probe_point::component (TOK_HWBKPT, new literal_string(symbol_name)));
10823
10824   comps.push_back (new probe_point::component (TOK_LENGTH, new literal_number(hwbkpt_len)));
10825
10826   if (has_only_read_access)
10827     this->hwbkpt_access = HWBKPT_READ ;
10828 //TODO add code for comps.push_back for read, since this flag is not for x86
10829
10830   else
10831     {
10832       if (has_only_write_access)
10833         {
10834           this->hwbkpt_access = HWBKPT_WRITE ;
10835           comps.push_back (new probe_point::component(TOK_HWBKPT_WRITE));
10836         }
10837       else
10838         {
10839           this->hwbkpt_access = HWBKPT_RW ;
10840           comps.push_back (new probe_point::component(TOK_HWBKPT_RW));
10841         }
10842     }
10843
10844   this->sole_location()->components = comps;
10845 }
10846
10847 void hwbkpt_derived_probe::printsig (ostream& o) const
10848 {
10849   sole_location()->print (o);
10850   printsig_nested (o);
10851 }
10852
10853 void hwbkpt_derived_probe::join_group (systemtap_session& s)
10854 {
10855   if (! s.hwbkpt_derived_probes)
10856     s.hwbkpt_derived_probes = new hwbkpt_derived_probe_group ();
10857   s.hwbkpt_derived_probes->enroll (this, s);
10858   this->group = s.hwbkpt_derived_probes;
10859 }
10860
10861 void hwbkpt_derived_probe_group::enroll (hwbkpt_derived_probe* p, systemtap_session&)
10862 {
10863   hwbkpt_probes.push_back (p);
10864 }
10865
10866 void
10867 hwbkpt_derived_probe_group::emit_module_decls (systemtap_session& s)
10868 {
10869   if (hwbkpt_probes.empty()) return;
10870
10871   s.op->newline() << "/* ---- hwbkpt-based probes ---- */";
10872
10873   s.op->newline() << "#include <linux/perf_event.h>";
10874   s.op->newline() << "#include <linux/hw_breakpoint.h>";
10875   s.op->newline() << "#include <linux/stap-hw-breakpoint.h>";
10876   s.op->newline();
10877
10878   // Forward declare the main entry functions
10879   s.op->newline() << "#ifdef STAPCONF_PERF_HANDLER_NMI";
10880   s.op->newline() << "static void enter_hwbkpt_probe (struct perf_event *bp,";
10881   s.op->line() << " int nmi,";
10882   s.op->line() << " struct perf_sample_data *data,";
10883   s.op->line() << " struct pt_regs *regs);";
10884   s.op->newline() << "#else";
10885   s.op->newline() << "static void enter_hwbkpt_probe (struct perf_event *bp,";
10886   s.op->line() << " struct perf_sample_data *data,";
10887   s.op->line() << " struct pt_regs *regs);";
10888   s.op->newline() << "#endif";
10889
10890   // Emit the actual probe list.
10891
10892   s.op->newline() << "static struct perf_event_attr ";
10893   s.op->newline() << "stap_hwbkpt_probe_array[" << hwbkpt_probes.size() << "];";
10894
10895   s.op->newline() << "static void *";
10896   s.op->newline() << "stap_hwbkpt_ret_array[" << hwbkpt_probes.size() << "];";
10897   s.op->newline() << "static struct stap_hwbkpt_probe stap_hwbkpt_probes[] = {";
10898   s.op->indent(1);
10899
10900   for (unsigned int it = 0; it < hwbkpt_probes.size(); it++)
10901     {
10902       hwbkpt_derived_probe* p = hwbkpt_probes.at(it);
10903       s.op->newline() << "{";
10904       if (p->kernel_p)
10905         s.op->line() << " .kernel_p=1" << ",";
10906       if (p->symbol_name.size())
10907       s.op->line() << " .address=(unsigned long)0x0" << "ULL,";
10908       else
10909       s.op->line() << " .address=(unsigned long)0x" << hex << p->hwbkpt_addr << dec << "ULL,";
10910       switch(p->hwbkpt_access){
10911       case HWBKPT_READ:
10912                 s.op->line() << " .atype=HW_BREAKPOINT_R ,";
10913                 break;
10914       case HWBKPT_WRITE:
10915                 s.op->line() << " .atype=HW_BREAKPOINT_W ,";
10916                 break;
10917       case HWBKPT_RW:
10918                 s.op->line() << " .atype=HW_BREAKPOINT_R|HW_BREAKPOINT_W ,";
10919                 break;
10920         };
10921       s.op->line() << " .len=" << p->hwbkpt_len << ",";
10922       s.op->line() << " .probe=" << common_probe_init (p) << ",";
10923       s.op->line() << " .symbol=\"" << p->symbol_name << "\",";
10924       s.op->line() << " },";
10925     }
10926   s.op->newline(-1) << "};";
10927
10928   // Emit the hwbkpt callback function
10929   s.op->newline() ;
10930   s.op->newline() << "#ifdef STAPCONF_PERF_HANDLER_NMI";
10931   s.op->newline() << "static void enter_hwbkpt_probe (struct perf_event *bp,";
10932   s.op->line() << " int nmi,";
10933   s.op->line() << " struct perf_sample_data *data,";
10934   s.op->line() << " struct pt_regs *regs) {";
10935   s.op->newline() << "#else";
10936   s.op->newline() << "static void enter_hwbkpt_probe (struct perf_event *bp,";
10937   s.op->line() << " struct perf_sample_data *data,";
10938   s.op->line() << " struct pt_regs *regs) {";
10939   s.op->newline() << "#endif";
10940   s.op->newline(1) << "unsigned int i;";
10941   s.op->newline() << "if (bp->attr.type != PERF_TYPE_BREAKPOINT) return;";
10942   s.op->newline() << "for (i=0; i<" << hwbkpt_probes.size() << "; i++) {";
10943   s.op->newline(1) << "struct perf_event_attr *hp = & stap_hwbkpt_probe_array[i];";
10944   // XXX: why not match stap_hwbkpt_ret_array[i] against bp instead?
10945   s.op->newline() << "if (bp->attr.bp_addr==hp->bp_addr && bp->attr.bp_type==hp->bp_type && bp->attr.bp_len==hp->bp_len) {";
10946   s.op->newline(1) << "struct stap_hwbkpt_probe *skp = &stap_hwbkpt_probes[i];";
10947   common_probe_entryfn_prologue (s, "STAP_SESSION_RUNNING", "", "skp->probe",
10948                                  "stp_probe_type_hwbkpt");
10949   s.op->newline() << "if (user_mode(regs)) {";
10950   s.op->newline(1)<< "c->user_mode_p = 1;";
10951   s.op->newline() << "c->uregs = regs;";
10952   s.op->newline(-1) << "} else {";
10953   s.op->newline(1) << "c->kregs = regs;";
10954   s.op->newline(-1) << "}";
10955   s.op->newline() << "(*skp->probe->ph) (c);";
10956   common_probe_entryfn_epilogue (s, true, otf_safe_context(s));
10957   s.op->newline(-1) << "}";
10958   s.op->newline(-1) << "}";
10959   s.op->newline() << "return;";
10960   s.op->newline(-1) << "}";
10961 }
10962
10963 void
10964 hwbkpt_derived_probe_group::emit_module_init (systemtap_session& s)
10965 {
10966   s.op->newline() << "rc = stap_hwbkpt_init(&enter_hwbkpt_probe, stap_hwbkpt_probes, "
10967     << hwbkpt_probes.size() << ", stap_hwbkpt_probe_array, "
10968     << "stap_hwbkpt_ret_array, &probe_point);";
10969 }
10970
10971 void
10972 hwbkpt_derived_probe_group::emit_module_exit (systemtap_session& s)
10973 {
10974   //Unregister hwbkpt probes.
10975   s.op->newline() << "stap_hwbkpt_exit(stap_hwbkpt_probes, "
10976     << hwbkpt_probes.size() << ", stap_hwbkpt_ret_array);";
10977 }
10978
10979
10980 // PR26234: Not supported by stapbpf.
10981 void
10982 warn_for_bpf(systemtap_session& s, hwbkpt_derived_probe_group *hpg,
10983              const std::string& kind)
10984 {
10985   for (unsigned int i = 0; i < hpg->hwbkpt_probes.size(); i++)
10986     {
10987       s.print_warning(_F("%s will be ignored by bpf backend",
10988                          kind.c_str()),
10989                       hpg->hwbkpt_probes[i]->tok);
10990     }
10991 }
10992
10993
10994 struct hwbkpt_builder: public derived_probe_builder
10995 {
10996   bool kernel_p;
10997
10998   hwbkpt_builder(bool is_kernel): kernel_p(is_kernel) {}
10999   virtual void build(systemtap_session & sess,
11000                      probe * base,
11001                      probe_point * location,
11002                      literal_map_t const & parameters,
11003                      vector<derived_probe *> & finished_results);
11004
11005   virtual string name() { return "hwbkpt builder"; }
11006 };
11007
11008 void
11009 hwbkpt_builder::build(systemtap_session & sess,
11010                       probe * base,
11011                       probe_point * location,
11012                       literal_map_t const & parameters,
11013                       vector<derived_probe *> & finished_results)
11014 {
11015   interned_string symbol_str_val;
11016   int64_t hwbkpt_address, len;
11017   bool has_addr, has_symbol_str, has_write, has_rw, has_len;
11018
11019   if (! (sess.kernel_config["CONFIG_PERF_EVENTS"] == string("y")))
11020       throw SEMANTIC_ERROR (_("CONFIG_PERF_EVENTS not available on this kernel"),
11021                             location->components[0]->tok);
11022   if (! (sess.kernel_config["CONFIG_HAVE_HW_BREAKPOINT"] == string("y")))
11023       throw SEMANTIC_ERROR (_("CONFIG_HAVE_HW_BREAKPOINT not available on this kernel"),
11024                             location->components[0]->tok);
11025
11026   // See BZ1431263 (on aarch64, running the hw_watch_addr.stp
11027   // systemtap examples cause a stuck CPU).
11028   if (sess.architecture == string("arm64"))
11029       throw SEMANTIC_ERROR (_F("%s.data probes are not supported on arm64 kernels",
11030                                kernel_p ? "kernel" : "process"),
11031                             location->components[0]->tok);
11032
11033   has_addr = get_param (parameters, TOK_HWBKPT, hwbkpt_address);
11034   has_symbol_str = get_param (parameters, TOK_HWBKPT, symbol_str_val);
11035   has_len = get_param (parameters, TOK_LENGTH, len);
11036   has_write = (parameters.find(TOK_HWBKPT_WRITE) != parameters.end());
11037   has_rw = (parameters.find(TOK_HWBKPT_RW) != parameters.end());
11038
11039   // Make an intermediate pp that is well-formed. It's pretty much the same as
11040   // the user-provided one, except that the addr literal is well-typed.
11041   probe_point* well_formed_loc = new probe_point(*location);
11042   well_formed_loc->well_formed = true;
11043
11044   vector<probe_point::component*> well_formed_comps;
11045   for (auto it = location->components.begin();
11046       it != location->components.end(); ++it)
11047     if ((*it)->functor == TOK_HWBKPT && has_addr)
11048       well_formed_comps.push_back(new probe_point::component(TOK_HWBKPT,
11049           new literal_number(hwbkpt_address, true /* hex */ )));
11050     else
11051       well_formed_comps.push_back(*it);
11052   well_formed_loc->components = well_formed_comps;
11053   probe *new_base = new probe (base, well_formed_loc);
11054
11055   if (!has_len)
11056         len = 1;
11057
11058   if (has_addr)
11059       finished_results.push_back (new hwbkpt_derived_probe (new_base,
11060                                                             location,
11061                                                             hwbkpt_address,
11062                                                             "",len,0,
11063                                                             has_write,
11064                                                             has_rw,
11065                                                             kernel_p));
11066   else if (has_symbol_str)
11067       finished_results.push_back (new hwbkpt_derived_probe (new_base,
11068                                                             location,
11069                                                             0,
11070                                                             symbol_str_val,len,0,
11071                                                             has_write,
11072                                                             has_rw,
11073                                                             kernel_p));
11074   else
11075     assert (0);
11076 }
11077
11078 // ------------------------------------------------------------------------
11079 // statically inserted kernel-tracepoint derived probes
11080 // ------------------------------------------------------------------------
11081
11082 struct tracepoint_arg
11083 {
11084   string name, c_type, c_decl, typecast;
11085   bool usable, used, isptr;
11086   Dwarf_Die type_die;
11087   tracepoint_arg(const string& tracepoint_name, Dwarf_Die *arg);
11088
11089   // used with --runtime=bpf
11090   int size;
11091   int offset;
11092   bool is_signed;
11093 };
11094
11095 struct tracepoint_derived_probe: public derived_probe
11096 {
11097   tracepoint_derived_probe (systemtap_session& s,
11098                             dwflpp& dw, Dwarf_Die& func_die,
11099                             const string& tracepoint_system,
11100                             const string& tracepoint_name,
11101                             probe* base_probe, probe_point* location);
11102
11103   systemtap_session& sess;
11104   string tracepoint_system, tracepoint_name, header;
11105   vector <struct tracepoint_arg> args;
11106
11107   void build_args(dwflpp& dw, Dwarf_Die& func_die);
11108   void build_args_for_bpf(dwflpp& dw, Dwarf_Die& struct_die);
11109   void getargs (std::list<std::string> &arg_set) const;
11110   void join_group (systemtap_session& s);
11111   void print_dupe_stamp(ostream& o);
11112 };
11113
11114
11115 struct tracepoint_derived_probe_group: public generic_dpg<tracepoint_derived_probe>
11116 {
11117   friend bool sort_for_bpf(systemtap_session& s,
11118                            tracepoint_derived_probe_group *t,
11119                            sort_for_bpf_probe_arg_vector &v);
11120
11121   void emit_module_decls (systemtap_session& s);
11122   void emit_module_init (systemtap_session& s);
11123   void emit_module_exit (systemtap_session& s);
11124 };
11125
11126
11127 struct tracepoint_var_expanding_visitor: public var_expanding_visitor
11128 {
11129   tracepoint_var_expanding_visitor(dwflpp& dw,
11130                                    vector <struct tracepoint_arg>& args):
11131     var_expanding_visitor (dw.sess),
11132     dw (dw), args (args) {}
11133   dwflpp& dw;
11134   vector <struct tracepoint_arg>& args;
11135
11136   void visit_target_symbol (target_symbol* e);
11137   void visit_target_symbol_arg (target_symbol* e);
11138   void visit_target_symbol_context (target_symbol* e);
11139 };
11140
11141
11142 void
11143 tracepoint_var_expanding_visitor::visit_target_symbol_arg (target_symbol* e)
11144 {
11145   string argname = e->sym_name();
11146   string en = e->name;
11147
11148   // search for a tracepoint parameter matching this name
11149   tracepoint_arg *arg = NULL;
11150   for (unsigned i = 0; i < args.size(); ++i)
11151     if (args[i].usable && args[i].name == argname)
11152       {
11153         arg = &args[i];
11154         arg->used = true;
11155         break;
11156       }
11157
11158   if (arg == NULL)
11159     {
11160       set<string> vars;
11161       for (unsigned i = 0; i < args.size(); ++i)
11162         vars.insert("$" + args[i].name);
11163       vars.insert("$$name");
11164       vars.insert("$$parms");
11165       vars.insert("$$vars");
11166       string sugs = levenshtein_suggest(en, vars); // no need to limit, there's not that many
11167
11168       // We hope that this value ends up not being referenced after all, so it
11169       // can be optimized out quietly.
11170       throw SEMANTIC_ERROR(_F("unable to find tracepoint variable '%s'%s",
11171                               en.c_str(), sugs.empty() ? "" :
11172                               (_(" (alternatives: ") + sugs + ")").c_str()), e->tok);
11173                               // NB: we use 'alternatives' because we list all
11174       // NB: we can have multiple errors, since a target variable
11175       // may be expanded in several different contexts:
11176       //     trace ("*") { $foo->bar }
11177     }
11178
11179   // make sure we're not dereferencing base types or void
11180   bool deref_p = arg->isptr && !null_die(&arg->type_die);
11181   if (!deref_p)
11182     e->assert_no_components("tracepoint", true);
11183
11184   // we can only write to dereferenced fields, and only if guru mode is on
11185   bool lvalue = is_active_lvalue(e);
11186   if (lvalue && (!dw.sess.guru_mode || e->components.empty()))
11187     throw SEMANTIC_ERROR(_F("write to tracepoint variable '%s' not permitted; need stap -g", en.c_str()), e->tok);
11188
11189   // XXX: if a struct/union arg is passed by value, then writing to its fields
11190   // is also meaningless until you dereference past a pointer member.  It's
11191   // harder to detect and prevent that though...
11192
11193   if (e->components.empty())
11194     {
11195       if (e->addressof)
11196         throw SEMANTIC_ERROR(_("cannot take address of tracepoint variable"), e->tok);
11197
11198       // Just grab the value from the probe locals
11199       symbol* sym = new symbol;
11200       sym->tok = e->tok;
11201       sym->name = "__tracepoint_arg_" + arg->name;
11202       sym->type_details = make_shared<exp_type_dwarf>(&dw, &arg->type_die, false, false);
11203
11204       if (sess.runtime_mode == systemtap_session::bpf_runtime)
11205         {
11206           bpf_context_vardecl *v = new bpf_context_vardecl;
11207
11208           v->size = arg->size;
11209           v->offset = arg->offset;
11210           v->is_signed = arg->is_signed;
11211           sym->referent = v;
11212         }
11213
11214       provide (sym);
11215     }
11216   else
11217     {
11218       // make a copy of the original as a bare target symbol for the tracepoint
11219       // value, which will be passed into the dwarf dereferencing code
11220       target_symbol* e2 = deep_copy_visitor::deep_copy(e);
11221       e2->components.clear();
11222
11223       if (e->check_pretty_print (lvalue))
11224         {
11225           dwarf_pretty_print dpp(dw, &arg->type_die, e2, deref_p, false,
11226                                  *e, lvalue);
11227           dpp.expand()->visit (this);
11228           return;
11229         }
11230
11231       bool userspace_p = false;
11232       location_context ctx(e, e2);
11233       ctx.userspace_p = userspace_p;
11234
11235       if (dw.sess.runtime_mode == systemtap_session::bpf_runtime)
11236         ctx.adapt_pointer_to_bpf(arg->size, arg->offset, arg->is_signed);
11237
11238       Dwarf_Die endtype;
11239       dw.literal_stmt_for_pointer (ctx, &arg->type_die, ctx.e, lvalue, &endtype);
11240
11241       string fname = (string(lvalue ? "_tracepoint_tvar_set"
11242                              : "_tracepoint_tvar_get")
11243                       + "_" + e->sym_name()
11244                       + "_" + lex_cast(tick++));
11245
11246       functioncall* n = synthetic_embedded_deref_call(dw, ctx, fname, &endtype,
11247                                                       userspace_p, lvalue, e2);
11248
11249       if (lvalue)
11250         provide_lvalue_call (n);
11251
11252       provide(n); // allow recursion to $var1[$var2] subexpressions
11253     }
11254 }
11255
11256
11257 void
11258 tracepoint_var_expanding_visitor::visit_target_symbol_context (target_symbol* e)
11259 {
11260   if (e->addressof)
11261     throw SEMANTIC_ERROR(_("cannot take address of context variable"), e->tok);
11262
11263   if (is_active_lvalue (e))
11264     throw SEMANTIC_ERROR(_F("write to tracepoint '%s' not permitted",
11265                             e->name.to_string().c_str()), e->tok);
11266
11267   if (e->name == "$$name" || e->name == "$$system")
11268     {
11269       e->assert_no_components("tracepoint");
11270
11271       string member = (e->name == "$$name") ? "c->ips.tp.tracepoint_name"
11272                                             : "c->ips.tp.tracepoint_system";
11273
11274       // Synthesize an embedded expression.
11275       embedded_expr *expr = new embedded_expr;
11276       expr->tok = e->tok;
11277       expr->code = string("/* string */ /* pure */ " +
11278                           member + " ? " + member + " : \"\"");
11279       provide (expr);
11280     }
11281   else if (e->name == "$$vars" || e->name == "$$parms")
11282     {
11283       e->assert_no_components("tracepoint", true);
11284
11285       print_format* pf = print_format::create(e->tok, "sprintf");
11286
11287       for (unsigned i = 0; i < args.size(); ++i)
11288         {
11289           if (!args[i].usable)
11290             continue;
11291           if (i > 0)
11292             pf->raw_components += " ";
11293           pf->raw_components += args[i].name;
11294           target_symbol *tsym = new target_symbol;
11295           tsym->tok = e->tok;
11296           tsym->name = "$" + args[i].name;
11297           tsym->components = e->components;
11298
11299           // every variable should always be accessible!
11300           tsym->saved_conversion_error = 0;
11301           expression *texp = require<expression> (tsym); // NB: throws nothing ...
11302           if (tsym->saved_conversion_error) // ... but this is how we know it happened.
11303             {
11304               if (dw.sess.verbose>2)
11305                 for (const semantic_error *c = tsym->saved_conversion_error;
11306                      c != 0; c = c->get_chain())
11307                   clog << _("variable location problem [man error::dwarf]: ") << c->what() << endl;
11308               pf->raw_components += "=?";
11309               continue;
11310             }
11311
11312           if (e->check_pretty_print ())
11313             pf->raw_components += "=%s";
11314           else
11315             pf->raw_components += args[i].isptr ? "=%p" : "=%#x";
11316           pf->args.push_back(texp);
11317         }
11318
11319       pf->components = print_format::string_to_components(pf->raw_components);
11320       provide (pf);
11321     }
11322   else
11323     assert(0); // shouldn't get here
11324 }
11325
11326 void
11327 tracepoint_var_expanding_visitor::visit_target_symbol (target_symbol* e)
11328 {
11329   try
11330     {
11331       assert(e->name.size() > 0 && e->name[0] == '$');
11332
11333       if (e->name == "$$name" || e->name == "$$system"
11334           || e->name == "$$parms" || e->name == "$$vars")
11335         visit_target_symbol_context (e);
11336       else
11337         visit_target_symbol_arg (e);
11338     }
11339   catch (const semantic_error &er)
11340     {
11341       if (sess.verbose > 3)
11342         clog << "chaining to " << *e->tok << endl
11343              << sess.build_error_msg(er) << endl;
11344       e->chain (er);
11345       provide (e);
11346     }
11347 }
11348
11349
11350 tracepoint_derived_probe::tracepoint_derived_probe (systemtap_session& s,
11351                                                     dwflpp& dw, Dwarf_Die& func_die,
11352                                                     const string& tracepoint_system,
11353                                                     const string& tracepoint_name,
11354                                                     probe* base, probe_point* loc):
11355   derived_probe (base, loc, true /* .components soon rewritten */), sess (s),
11356   tracepoint_system (tracepoint_system), tracepoint_name (tracepoint_name)
11357 {
11358   // create synthetic probe point name; preserve condition
11359   vector<probe_point::component*> comps;
11360   comps.push_back (new probe_point::component (TOK_KERNEL));
11361
11362   // tag on system to the final name unless we're in compatibility mode so that
11363   // e.g. pn() returns just the name as before
11364   string final_name = tracepoint_name;
11365   if (!tracepoint_system.empty()
11366       && strverscmp(s.compatible.c_str(), "2.6") > 0)
11367     final_name = tracepoint_system + ":" + final_name;
11368
11369   comps.push_back (new probe_point::component (TOK_TRACE,
11370                                                new literal_string(final_name)));
11371   this->sole_location()->components = comps;
11372
11373   // fill out the available arguments in this tracepoint
11374   if (s.runtime_mode == systemtap_session::bpf_runtime)
11375     build_args_for_bpf(dw, func_die);
11376   else
11377     build_args(dw, func_die);
11378
11379   // determine which header defined this tracepoint
11380   string decl_file = dwarf_decl_file(&func_die);
11381   header = decl_file;
11382
11383   // tracepoints from FOO_event_types.h should really be included from FOO.h
11384   // XXX can dwarf tell us the include hierarchy?  it would be better to
11385   // ... walk up to see which one was directly included by tracequery.c
11386   // XXX: see also PR9993.
11387   size_t header_pos = header.find("_event_types");
11388   if (header_pos != string::npos)
11389     header.erase(header_pos, 12);
11390
11391   // Now expand the local variables in the probe body
11392   tracepoint_var_expanding_visitor v (dw, args);
11393   // PR25841 -- not yet, need to put tracepoint parameters somewhere else, so
11394   // function context code can access it.
11395   // if (sess.symbol_resolver)
11396   //  sess.symbol_resolver->current_probe = this;
11397   var_expand_const_fold_loop (sess, this->body, v);
11398
11399   for (unsigned i = 0; i < args.size(); i++)
11400     {
11401       if (!args[i].used)
11402         continue;
11403
11404       if (s.runtime_mode == systemtap_session::bpf_runtime)
11405         {
11406           bpf_context_vardecl* v = new bpf_context_vardecl;
11407           v->name = "__tracepoint_arg_" + args[i].name;
11408           v->tok = this->tok;
11409           v->set_arity(0, this->tok);
11410           v->type = pe_long;
11411           v->synthetic = true;
11412           v->size = args[i].size;
11413           v->offset = args[i].offset;
11414
11415           this->locals.push_back(v);
11416         }
11417       else
11418         {
11419           vardecl* v = new vardecl;
11420           v->name = v->unmangled_name = "__tracepoint_arg_" + args[i].name;
11421           v->tok = this->tok;
11422           v->set_arity(0, this->tok);
11423           v->type = pe_long;
11424           v->synthetic = true;
11425
11426           this->locals.push_back (v);
11427         }
11428     }
11429
11430   if (sess.verbose > 2)
11431     clog << "tracepoint-based " << name() << " tracepoint='" << tracepoint_name << "'" << endl;
11432 }
11433
11434
11435 static bool
11436 resolve_pointer_type(Dwarf_Die& die, bool& isptr)
11437 {
11438   if (null_die(&die))
11439     {
11440       isptr = true;
11441       return true;
11442     }
11443
11444   Dwarf_Die type;
11445   switch (dwarf_tag(&die))
11446     {
11447     case DW_TAG_typedef:
11448     case DW_TAG_const_type:
11449     case DW_TAG_volatile_type:
11450     case DW_TAG_restrict_type:
11451       // iterate on the referent type
11452       return (dwarf_attr_die(&die, DW_AT_type, &die)
11453               && resolve_pointer_type(die, isptr));
11454
11455     case DW_TAG_base_type:
11456     case DW_TAG_enumeration_type:
11457     case DW_TAG_structure_type:
11458     case DW_TAG_union_type:
11459       // base types will simply be treated as script longs
11460       // structs/unions must be referenced by pointer elsewhere
11461       isptr = false;
11462       return true;
11463
11464     case DW_TAG_array_type:
11465     case DW_TAG_pointer_type:
11466     case DW_TAG_reference_type:
11467     case DW_TAG_rvalue_reference_type:
11468       // pointer-like types can be treated as script longs,
11469       // and if we know their type, they can also be dereferenced
11470       isptr = true;
11471       type = die;
11472       while (dwarf_attr_die(&type, DW_AT_type, &type))
11473         {
11474           // It still might be a non-type, e.g. const void,
11475           // so we need to strip away all qualifiers.
11476           int tag = dwarf_tag(&type);
11477           if (tag != DW_TAG_typedef &&
11478               tag != DW_TAG_const_type &&
11479               tag != DW_TAG_volatile_type &&
11480               tag != DW_TAG_restrict_type)
11481             {
11482               die = type;
11483               return true;
11484             }
11485         }
11486       // otherwise use a null_die to indicate void
11487       std::memset(&die, 0, sizeof(die));
11488       return true;
11489
11490     default:
11491       // should we consider other types too?
11492       return false;
11493     }
11494 }
11495
11496 static bool
11497 is_signed_type(Dwarf_Die *die)
11498 {
11499   switch (dwarf_tag(die))
11500     {
11501     case DW_TAG_base_type:
11502       {
11503         Dwarf_Attribute attr;
11504         Dwarf_Word encoding = (Dwarf_Word) -1;
11505         dwarf_formudata (dwarf_attr_integrate (die, DW_AT_encoding, &attr),
11506                          &encoding);
11507         return encoding == DW_ATE_signed || encoding == DW_ATE_signed_char;
11508       }
11509     case DW_TAG_typedef:
11510     case DW_TAG_const_type:
11511     case DW_TAG_volatile_type:
11512     case DW_TAG_restrict_type:
11513       // iterate on the referent type
11514       return (dwarf_attr_die(die, DW_AT_type, die)
11515               && is_signed_type(die));
11516
11517     default:
11518       // should we consider other types too?
11519       return false;
11520     }
11521 }
11522
11523 static int
11524 get_byte_size(Dwarf_Die *die, const char *probe_name)
11525 {
11526   Dwarf_Attribute attr;
11527   Dwarf_Word size;
11528
11529   if (dwarf_attr(die, DW_AT_byte_size, &attr) == NULL)
11530     {
11531       Dwarf_Word count = 1;
11532       Dwarf_Die type;
11533       Dwarf_Die child;
11534
11535       if (dwarf_tag(die) == DW_TAG_array_type)
11536         {
11537           count = 0;
11538
11539           if (dwarf_child(die, &child) != 0)
11540             throw SEMANTIC_ERROR(_F("cannot resolve size of array %s for probe %s",
11541                                     dwarf_diename(die), probe_name));
11542
11543           do
11544             if (dwarf_tag(&child) == DW_TAG_subrange_type)
11545               {
11546                 if (dwarf_attr(&child, DW_AT_upper_bound, &attr) != NULL)
11547                   {
11548                      dwarf_formudata(&attr, &count);
11549                      count++;
11550                   }
11551                 else if (dwarf_attr(&child, DW_AT_count, &attr) != NULL)
11552                   dwarf_formudata(&attr, &count);
11553                 else
11554                   SEMANTIC_ERROR(_F("array %s for probe %s has unknown size",
11555                                     dwarf_diename(die), probe_name));
11556               }
11557           while (dwarf_siblingof(&child, &child) == 0);
11558         }
11559       // Do any other types require special handling?
11560
11561       if (dwarf_attr_die(die, DW_AT_type, &type) == NULL)
11562         throw (SEMANTIC_ERROR(
11563                _F("cannot get byte size of type '%s' for tracepoint '%s'",
11564                   dwarf_diename(die), probe_name)));
11565
11566       return count * get_byte_size(&type, probe_name);
11567     }
11568
11569   dwarf_formudata(&attr, &size);
11570   return size;
11571
11572 }
11573
11574 static bool
11575 resolve_tracepoint_arg_type(tracepoint_arg& arg)
11576 {
11577   if (!resolve_pointer_type(arg.type_die, arg.isptr))
11578     return false;
11579
11580   if (arg.isptr)
11581     arg.typecast = "(intptr_t)";
11582   else if (dwarf_tag(&arg.type_die) == DW_TAG_structure_type ||
11583            dwarf_tag(&arg.type_die) == DW_TAG_union_type)
11584     {
11585       // for structs/unions which are passed by value, we turn it into
11586       // a pointer that can be dereferenced.
11587       arg.isptr = true;
11588       arg.typecast = "(intptr_t)&";
11589     }
11590   return true;
11591 }
11592
11593
11594 tracepoint_arg::tracepoint_arg(const string& tracepoint_name, Dwarf_Die *arg)
11595 : usable(false), used(false), isptr(false), type_die(), size(-1),
11596   offset(-1), is_signed(false)
11597 {
11598   name = dwarf_diename(arg) ?: "";
11599
11600   // read the type of this parameter
11601   if (!dwarf_attr_die (arg, DW_AT_type, &type_die)
11602       || !dwarf_type_name(&type_die, c_type))
11603     throw SEMANTIC_ERROR (_F("cannot get type of parameter '%s' of tracepoint '%s'",
11604                              name.c_str(), tracepoint_name.c_str()));
11605
11606   // build the C declaration
11607   if (!dwarf_type_decl(&type_die, "__tracepoint_arg_" + name, c_decl))
11608     throw SEMANTIC_ERROR (_F("cannot get declaration of parameter '%s' of tracepoint '%s'",
11609                              name.c_str(), tracepoint_name.c_str()));
11610
11611   usable = resolve_tracepoint_arg_type(*this);
11612 }
11613
11614
11615
11616 void
11617 tracepoint_derived_probe::build_args(dwflpp&, Dwarf_Die& func_die)
11618 {
11619   Dwarf_Die arg;
11620   if (dwarf_child(&func_die, &arg) == 0)
11621     do
11622       if (dwarf_tag(&arg) == DW_TAG_formal_parameter)
11623         {
11624           // build a tracepoint_arg for this parameter
11625           args.emplace_back(tracepoint_name, &arg);
11626           if (sess.verbose > 4)
11627             {
11628               auto& tparg = args.back();
11629               clog << _F("found parameter for tracepoint '%s': type:'%s' name:'%s' decl:'%s' %s",
11630                          tracepoint_name.c_str(), tparg.c_type.c_str(), tparg.name.c_str(),
11631                          tparg.c_decl.c_str(), tparg.usable ? "ok" : "unavailable") << endl;
11632             }
11633         }
11634     while (dwarf_siblingof(&arg, &arg) == 0);
11635 }
11636
11637 void
11638 tracepoint_derived_probe::build_args_for_bpf(dwflpp&, Dwarf_Die& struct_die)
11639 {
11640   Dwarf_Die member;
11641   int data_start = 0;
11642   bool struct_found = false, more_members = true;
11643
11644   if (dwarf_child(&struct_die, &member) != 0) return;
11645
11646   // find the member struct inside the struct that actually has the information about the bpf arguments
11647   while (!struct_found && more_members)
11648   {
11649           Dwarf_Die type;
11650           Dwarf_Attribute attr;
11651           Dwarf_Word off;
11652
11653           dwarf_attr_die(&member, DW_AT_type, &type);
11654           if ((dwarf_tag(&type) == DW_TAG_structure_type)) {
11655                   if (dwarf_attr(&member, DW_AT_data_member_location, &attr) == NULL
11656                       || dwarf_formudata(&attr, &off) != 0)
11657                           throw (SEMANTIC_ERROR
11658                                  (_F("cannot get offset attribute for variable '%s' of tracepoint '%s'",
11659                                      dwarf_diename(&member), tracepoint_name.c_str())));
11660                   data_start = off;
11661                   member = type;
11662                   struct_found = true;
11663           } else {
11664                   more_members = (dwarf_siblingof(&member, &member) == 0);
11665           }
11666   }
11667
11668   if (dwarf_child(&member, &member) == 0)
11669     do
11670       if (dwarf_tag(&member) == DW_TAG_member)
11671         {
11672           Dwarf_Die type;
11673           Dwarf_Attribute attr;
11674           Dwarf_Word off;
11675           tracepoint_arg arg(dwarf_diename(&member), &member);
11676
11677           if (dwarf_attr(&member, DW_AT_data_member_location, &attr) == NULL
11678               || dwarf_formudata(&attr, &off) != 0)
11679             throw (SEMANTIC_ERROR
11680                    (_F("cannot get offset attribute for variable '%s' of tracepoint '%s'",
11681                        dwarf_diename(&member), tracepoint_name.c_str())));
11682
11683           dwarf_attr_die(&member, DW_AT_type, &type);
11684           arg.is_signed = is_signed_type(&type);
11685           arg.size = get_byte_size(&type, tracepoint_name.c_str());
11686           arg.offset = off + data_start;
11687
11688           args.push_back(arg);
11689         }
11690     while (dwarf_siblingof(&member, &member) == 0);
11691 }
11692
11693 void
11694 tracepoint_derived_probe::getargs(std::list<std::string> &arg_set) const
11695 {
11696   for (unsigned i = 0; i < args.size(); ++i)
11697     if (args[i].usable)
11698       arg_set.push_back("$"+args[i].name+":"+args[i].c_type);
11699 }
11700
11701 void
11702 tracepoint_derived_probe::join_group (systemtap_session& s)
11703 {
11704   if (! s.tracepoint_derived_probes)
11705     s.tracepoint_derived_probes = new tracepoint_derived_probe_group ();
11706   s.tracepoint_derived_probes->enroll (this);
11707   this->group = s.tracepoint_derived_probes;
11708 }
11709
11710
11711 void
11712 tracepoint_derived_probe::print_dupe_stamp(ostream& o)
11713 {
11714   for (unsigned i = 0; i < args.size(); i++)
11715     if (args[i].used)
11716       o << "__tracepoint_arg_" << args[i].name << endl;
11717 }
11718
11719
11720 // Look for a particular header file in the build directory and the
11721 // source directory (if it exists). Return true if the header file was
11722 // found.
11723 static bool header_exists(systemtap_session& s, const string& header)
11724 {
11725   if (file_exists(s.kernel_build_tree + header)
11726       || (!s.kernel_source_tree.empty()
11727           && file_exists(s.kernel_source_tree + header)))
11728     return true;
11729   return false;
11730 }
11731
11732
11733 static vector<string> tracepoint_extra_decls (systemtap_session& s,
11734                                               const string& header,
11735                                               const bool tracequery)
11736 {
11737   vector<string> they_live;
11738
11739   // Several headers end up including events/irq.h, events/kmem.h, and
11740   // events/module.h on RHEL6 (since they include headers that include
11741   // those headers). This causes stap to think the tracepoints from
11742   // those files belong in multiple tracepoint subsystems. To get
11743   // around this, we'll define the header guard macros for those
11744   // tracepoints headers, troublesome header file, then undefine the
11745   // macro. Then, later when a header includes linux/interrupt.h (for
11746   // example), the events/irq.h file doesn't get included because of
11747   // the header guard macro on linux/interrupt.h.
11748   //
11749   // Note that we only do this when building a tracequery module (to
11750   // find all the tracepoints).
11751   if (tracequery)
11752     {
11753       they_live.push_back ("#define _TRACE_KMEM_H");
11754       they_live.push_back ("#define _TRACE_IRQ_H");
11755       they_live.push_back ("#include <linux/interrupt.h>");
11756       they_live.push_back ("#undef _TRACE_IRQ_H");
11757       they_live.push_back ("#undef _TRACE_KMEM_H");
11758
11759       they_live.push_back ("#define _TRACE_MODULE_H");
11760       they_live.push_back ("#include <linux/module.h>");
11761       they_live.push_back ("#undef _TRACE_MODULE_H");
11762     }
11763
11764   // PR 9993
11765   // XXX: may need this to be configurable
11766   they_live.push_back ("#include <linux/skbuff.h>");
11767
11768   // PR11649: conditional extra header
11769   // for kvm tracepoints in 2.6.33ish
11770   if (s.kernel_config["CONFIG_KVM"] != string("")) {
11771     they_live.push_back ("#include <linux/kvm_host.h>");
11772   }
11773
11774   if (header.find("xfs") != string::npos
11775       && s.kernel_config["CONFIG_XFS_FS"] != string("")) {
11776     they_live.push_back ("#define XFS_BIG_BLKNOS 1");
11777
11778     // The xfs_types.h include file got moved from fs/xfs/xfs_types.h
11779     // to fs/xfs/libxfs/xfs_types.h in upstream kernel 4.4, but that
11780     // patch has gotten backported to RHEL7's 3.10, so we can't really
11781     // depend on kernel version to know where that file is. We could
11782     // add lots of typedefs here to get things to compile (like for
11783     // xfs_agblock_t, xfs_agino_t, etc.), but the upstream kernel
11784     // could change the types being mapped and we'd get a compile
11785     // error when the types don't match. So, we'll try to find the
11786     // xfs_types.h file in the kernel source tree.
11787     if (header_exists(s, "/fs/xfs/xfs_linux.h"))
11788       they_live.push_back ("#include \"fs/xfs/xfs_linux.h\"");
11789     if (header_exists(s, "/fs/xfs/libxfs/xfs_types.h"))
11790       they_live.push_back ("#include \"fs/xfs/libxfs/xfs_types.h\"");
11791     else if (header_exists(s, "/fs/xfs/xfs_types.h"))
11792       they_live.push_back ("#include \"fs/xfs/xfs_types.h\"");
11793
11794     // Kernel 4.7 needs xfs_format.h.
11795     if (header_exists(s, "/fs/xfs/libxfs/xfs_format.h"))
11796       they_live.push_back ("#include \"fs/xfs/libxfs/xfs_format.h\"");
11797
11798     // Kernel 4.10 needs several headers.
11799     if (header_exists(s, "/fs/xfs/libxfs/xfs_trans_resv.h"))
11800       they_live.push_back ("#include \"fs/xfs/libxfs/xfs_trans_resv.h\"");
11801     if (header_exists(s, "/fs/xfs/xfs_mount.h"))
11802       they_live.push_back ("#include \"fs/xfs/xfs_mount.h\"");
11803     if (header_exists(s, "/fs/xfs/libxfs/xfs_log_format.h"))
11804       they_live.push_back ("#include \"fs/xfs/libxfs/xfs_log_format.h\"");
11805
11806     // Sigh. xfs_types.h (no matter where it is), also needs
11807     // xfs_linux.h. But, on newer kernels, xfs_linux.h includes
11808     // xfs_types.h, but really needs a '-I' command to do so. So,
11809     // we'll have to add a custom '-I' command.
11810     if (file_exists(s.kernel_build_tree + "/fs/xfs/libxfs"))
11811       s.kernel_extra_cflags.push_back ("-I" + s.kernel_build_tree
11812                                        + "/fs/xfs/libxfs");
11813     else if (!s.kernel_source_tree.empty()
11814              && file_exists(s.kernel_source_tree + "/fs/xfs/libxfs"))
11815       s.kernel_extra_cflags.push_back ("-I" + s.kernel_source_tree
11816                                        + "/fs/xfs/libxfs");
11817
11818     they_live.push_back ("struct xfs_mount;");
11819     they_live.push_back ("struct xfs_inode;");
11820     they_live.push_back ("struct xfs_buf;");
11821     they_live.push_back ("struct xfs_bmbt_irec;");
11822     they_live.push_back ("struct xfs_trans;");
11823     they_live.push_back ("struct xfs_name;");
11824     they_live.push_back ("struct xfs_icreate_log;");
11825   }
11826
11827   if (header.find("nfs") != string::npos
11828       && s.kernel_config["CONFIG_NFSD"] != string("")) {
11829     they_live.push_back ("struct rpc_task;");
11830     they_live.push_back ("struct nfs_open_context;");
11831     they_live.push_back ("struct nfs_client;");
11832     they_live.push_back ("struct nfs_fattr;");
11833     they_live.push_back ("struct nfs_fh;");
11834     they_live.push_back ("struct nfs_server;");
11835     they_live.push_back ("struct nfs_pgio_header;");
11836     they_live.push_back ("struct nfs_commit_data;");
11837     they_live.push_back ("struct nfs_closeres;");
11838     they_live.push_back ("struct nfs_closeargs;");
11839     they_live.push_back ("struct nfs_unlinkdata;");
11840     they_live.push_back ("struct nfs_writeverf;");
11841     they_live.push_back ("struct nfs4_sequence_args;");
11842     they_live.push_back ("struct nfs4_sequence_res;");
11843     they_live.push_back ("struct nfs4_session;");
11844     they_live.push_back ("struct nfs4_state;");
11845     they_live.push_back ("struct nfs4_delegreturnres;");
11846     they_live.push_back ("struct nfs4_delegreturnargs;");
11847     they_live.push_back ("struct pnfs_layout_hdr;");
11848     they_live.push_back ("struct pnfs_layout_range;");
11849     they_live.push_back ("struct pnfs_layout_segment;");
11850
11851     // We need a definition of a 'stateid_t', which is a typedef of an
11852     // anonymous struct. So, we'll have to include the right kernel
11853     // header file.
11854     if (header_exists(s, "/fs/nfsd/state.h"))
11855       they_live.push_back ("#include \"fs/nfsd/state.h\"");
11856
11857     // We need a definition of the pnfs_update_layout_reason enum, so
11858     // we'll need the right kernel header file.
11859     if (s.kernel_config["CONFIG_NFS_V4"] != string("")
11860         && header_exists(s, "/include/linux/nfs4.h"))
11861       they_live.push_back ("#include \"linux/nfs4.h\"");
11862   }
11863
11864   // RHEL6.3
11865   if (header.find("rpc") != string::npos && s.kernel_config["CONFIG_NFSD"] != string("")) {
11866     they_live.push_back ("struct rpc_clnt;");
11867     they_live.push_back ("struct rpc_wait_queue;");
11868   }
11869
11870   if (header.find("timer") != string::npos)
11871     {
11872       // Before including asm/cputime.h, we need to make sure it
11873       // exists, which is tricky since we need the arch specific
11874       // include directory.
11875       string karch = s.architecture;
11876       if (karch == "i386" || karch == "x86_64")
11877         karch = "x86";
11878       if (file_exists(s.kernel_build_tree + "/arch/" + karch
11879                       + "/include/asm/cputime.h"))
11880         they_live.push_back ("#include <asm/cputime.h>");
11881       else if (!s.kernel_source_tree.empty()
11882                && file_exists(s.kernel_source_tree + "/arch/" + karch
11883                               + "/include/asm/cputime.h"))
11884         they_live.push_back ("#include <asm/cputime.h>");
11885     }
11886
11887   // linux 3.0
11888   they_live.push_back ("struct cpu_workqueue_struct;");
11889
11890   if (header.find("clk") != string::npos)
11891       they_live.push_back ("struct clk_duty;");
11892
11893   if (header.find("fsi") != string::npos)
11894       they_live.push_back ("struct fsi_master_acf;");
11895
11896   if (header.find("ib_") != string::npos) {
11897       they_live.push_back ("struct ib_mad_hdr;");
11898       they_live.push_back ("struct ib_user_mad_hdr;");
11899       they_live.push_back ("struct ib_umad_file;");
11900       if (header_exists(s, "/include/rdma/id_mad.h"))
11901         they_live.push_back ("#include \"rdma/id_mad.h\"");
11902   }
11903
11904   if (header.find("ext4") != string::npos
11905       && s.kernel_config["CONFIG_EXT4_FS"] != string("")
11906       && header_exists(s, "/fs/ext4/ext4.h"))
11907     they_live.push_back ("#include \"fs/ext4/ext4.h\"");
11908
11909   if (header.find("ext3") != string::npos)
11910   {
11911       they_live.push_back ("struct ext3_reserve_window_node;");
11912       they_live.push_back ("struct super_block;");
11913       they_live.push_back ("struct dentry;");
11914   }
11915
11916   if (header.find("workqueue") != string::npos)
11917     {
11918       they_live.push_back ("struct pool_workqueue;");
11919       they_live.push_back ("struct work_struct;");
11920     }
11921
11922   // Here we need the header file, since we need the snd_soc_dapm_path
11923   // struct declared and the snd_soc_dapm_direction enum.
11924   if (header.find("asoc") != string::npos)
11925     {
11926       if (header_exists(s, "/include/sound/soc.h"))
11927         they_live.push_back ("#include \"sound/soc.h\"");
11928     }
11929
11930   if (header.find("9p") != string::npos)
11931     {
11932       they_live.push_back ("struct p9_client;");
11933       they_live.push_back ("struct p9_fcall;");
11934     }
11935
11936   if (header.find("bcache") != string::npos)
11937     {
11938       they_live.push_back ("struct bkey;");
11939       they_live.push_back ("struct btree;");
11940       they_live.push_back ("struct cache_set;");
11941       they_live.push_back ("struct cache;");
11942       they_live.push_back ("struct bcache_device;");
11943     }
11944
11945   if (header.find("f2fs") != string::npos)
11946     {
11947       // cannot get fs/f2fs/f2fs.h #included
11948       they_live.push_back ("typedef u32 block_t;");
11949       they_live.push_back ("typedef u32 nid_t;");
11950       they_live.push_back ("struct f2fs_io_info;");
11951       they_live.push_back ("struct f2fs_sb_info;");
11952       they_live.push_back ("struct extent_info;");
11953       they_live.push_back ("struct extent_node;");
11954       they_live.push_back ("struct super_block;");
11955       they_live.push_back ("struct buffer_head;");
11956       they_live.push_back ("struct bio;");
11957     }
11958
11959   if (header.find("radeon") != string::npos)
11960     {
11961       they_live.push_back ("struct radeon_bo;");
11962       they_live.push_back ("struct radeon_bo_va;");
11963       they_live.push_back ("struct radeon_cs_parser;");
11964       they_live.push_back ("struct radeon_semaphore;");
11965     }
11966
11967   // Argh, 3.11, i915_trace.h -> i915_drv.h -> i915_reg.h without
11968   // -I. So, we have to add a custom -I flag.
11969   if (header.find("i915_trace") != string::npos)
11970     {
11971       if (file_exists(s.kernel_build_tree + "/drivers/gpu/drm/i915"))
11972         s.kernel_extra_cflags.push_back ("-I" + s.kernel_build_tree
11973                                          + "/drivers/gpu/drm/i915");
11974       else if (!s.kernel_source_tree.empty()
11975                && file_exists(s.kernel_source_tree + "/drivers/gpu/drm/i915"))
11976         s.kernel_extra_cflags.push_back ("-I" + s.kernel_source_tree
11977                                          + "/drivers/gpu/drm/i915");
11978
11979       if (file_exists(s.kernel_build_tree + "/drivers/gpu/drm/i915/gt"))
11980         s.kernel_extra_cflags.push_back ("-I" + s.kernel_build_tree
11981                                          + "/drivers/gpu/drm/i915/gt");
11982       else if (!s.kernel_source_tree.empty()
11983                && file_exists(s.kernel_source_tree + "/drivers/gpu/drm/i915/gt"))
11984         s.kernel_extra_cflags.push_back ("-I" + s.kernel_source_tree
11985                                          + "/drivers/gpu/drm/i915/gt");
11986     }
11987
11988   if (header.find("/ath/") != string::npos)
11989     they_live.push_back ("struct ath5k_hw;");
11990
11991   if (header.find("nilfs2") != string::npos)
11992     they_live.push_back ("struct nilfs_transaction_info;");
11993
11994   if (header.find("spi") != string::npos)
11995     {
11996       they_live.push_back ("struct spi_master;");
11997       they_live.push_back ("struct spi_message;");
11998       they_live.push_back ("struct spi_transfer;");
11999       they_live.push_back ("struct spi_controller;");
12000     }
12001
12002   if (header.find("thermal_power_allocator") != string::npos)
12003     they_live.push_back ("struct thermal_zone_device;");
12004
12005   if (header.find("brcms_trace_brcmsmac") != string::npos)
12006     they_live.push_back ("struct brcms_timer;");
12007
12008   if (header.find("hda_intel_trace") != string::npos)
12009     they_live.push_back ("struct azx;");
12010
12011   if (header.find("v4l2") != string::npos)
12012     they_live.push_back ("struct v4l2_buffer;");
12013
12014   if (header.find("pcm_trace") != string::npos
12015       || header.find("pcm_param_trace") != string::npos)
12016     {
12017       they_live.push_back ("struct snd_pcm_substream;");
12018       they_live.push_back ("#include <sound/asound.h>");
12019     }
12020
12021   // Here we need the header file, since we need the migrate_mode enum.
12022   if (header.find("migrate") != string::npos
12023       || header.find("compaction") != string::npos)
12024     {
12025       if (header_exists(s, "/include/linux/migrate_mode.h"))
12026         they_live.push_back ("#include <linux/migrate_mode.h>");
12027     }
12028
12029   // include/trace/events/module.h is odd. If CREATE_TRACE_POINTS
12030   // isn't defined, it doesn't define TRACE_SYSTEM, which means we
12031   // we'll find the module tracepoints (like 'module_load'), but not
12032   // realize they belong in the module subsystem (like
12033   // 'module:module_load'). We'd like to define CREATE_TRACE_POINTS,
12034   // but that causes compilation errors. So, we'll just define
12035   // TRACE_SYSTEM ourselves.
12036   if (header.find("events/module.h") != string::npos)
12037     they_live.push_back ("#define TRACE_SYSTEM module");
12038
12039   if (header.find("events/net.h") != string::npos)
12040     they_live.push_back ("struct ndmsg;");
12041
12042   if (header.find("iwl") != string::npos)
12043     {
12044       they_live.push_back ("struct iwl_cmd_header_wide;");
12045       they_live.push_back ("struct iwl_host_cmd;");
12046       they_live.push_back ("struct iwl_trans;");
12047       they_live.push_back ("struct iwl_rx_packet;");
12048     }
12049
12050   if (header.find("mdio") != string::npos)
12051     {
12052       if (header_exists(s, "/include/linux/phy.h"))
12053         they_live.push_back ("#include <linux/phy.h>");
12054     }
12055
12056   if (header.find("intel_iommu") != string::npos && s.architecture != "x86_64" && s.architecture != "i386")
12057     {
12058       // need asm/cacheflush.h for clflush_cache_range() used in that header,
12059       // but this function does not exist on e.g. ppc
12060       they_live.push_back ("#error nope");
12061     }
12062
12063   if (header.find("wbt") != string::npos)
12064     {
12065       // blk-wbt.h gets included as "../../../block/blk-wbt.h", so we
12066       // need an include path that is 3 levels deep. Note we can't use
12067       // "include/linux/events", since its headers conflict with ours.
12068       if (file_exists(s.kernel_build_tree + "/block/blk-wbt.h")
12069           && file_exists(s.kernel_build_tree + "/fs/xfs/libxfs"))
12070         s.kernel_extra_cflags.push_back ("-I" + s.kernel_build_tree
12071                                          + "/fs/xfs/libxfs");
12072       else if (!s.kernel_source_tree.empty()
12073                && file_exists(s.kernel_source_tree + "/block/blk-wbt.h")
12074                && file_exists(s.kernel_source_tree + "/fs/xfs/libxfs"))
12075         s.kernel_extra_cflags.push_back ("-I" + s.kernel_source_tree
12076                                          + "/fs/xfs/libxfs");
12077
12078       if (header_exists(s, "/include/linux/blk_types.h"))
12079         they_live.push_back ("#include <linux/blk_types.h>");
12080       if (header_exists(s, "/include/linux/blkdev.h"))
12081         they_live.push_back ("#include <linux/blkdev.h>");
12082     }
12083
12084   if (header.find("swiotlb") != string::npos)
12085     {
12086       if (header_exists(s, "/include/linux/swiotlb.h"))
12087         they_live.push_back ("#include <linux/swiotlb.h>");
12088     }
12089
12090
12091   if (header.find("afs") != string::npos)
12092     {
12093       if (header_exists (s, "/fs/afs/internal.h"))
12094         they_live.push_back ("#include \"fs/afs/internal.h\"");
12095
12096       they_live.push_back ("struct afs_call;");
12097     }
12098
12099   if (header.find("rxrpc") != string::npos)
12100     {
12101       they_live.push_back ("struct rxrpc_call;");
12102       they_live.push_back ("struct rxrpc_connection;");
12103       they_live.push_back ("struct rxrpc_seq_t;");
12104       they_live.push_back ("struct rxrpc_serial_t;");
12105       they_live.push_back ("struct rxrpc_skb_priv;");
12106
12107       // We need a definition of a 'rxrpc_seq_t', which is a typedef.
12108       // So, we'll have to include the right kernel header file.
12109       if (header_exists(s, "/net/rxrpc/protocol.h"))
12110         they_live.push_back ("#include \"net/rxrpc/protocol.h\"");
12111
12112       if (header_exists (s, "/net/rxrpc/ar-internal.h"))
12113         they_live.push_back ("#include \"net/rxrpc/ar-internal.h\"");
12114     }
12115
12116   if (header.find("xdp") != string::npos)
12117     {
12118       they_live.push_back ("struct bpf_map;");
12119     }
12120
12121   if (header.find("bridge") != string::npos)
12122     {
12123       // br_private.h gets included as
12124       // "../../../net/bridge/br_private.h", so we need an include
12125       // path that is 3 levels deep.
12126       if (file_exists(s.kernel_build_tree + "/net/bridge/br_private.h")
12127           && file_exists(s.kernel_build_tree + "/fs/xfs/libxfs"))
12128         s.kernel_extra_cflags.push_back ("-I" + s.kernel_build_tree
12129                                          + "/fs/xfs/libxfs");
12130       else if (!s.kernel_source_tree.empty()
12131                && file_exists(s.kernel_source_tree + "/net/bridge/br_private.h")
12132                && file_exists(s.kernel_source_tree + "/fs/xfs/libxfs"))
12133         s.kernel_extra_cflags.push_back ("-I" + s.kernel_source_tree
12134                                          + "/fs/xfs/libxfs");
12135     }
12136
12137   if (header.find("fsi") != string::npos)
12138     {
12139       they_live.push_back ("struct fsi_master;");
12140       they_live.push_back ("struct fsi_master_gpio;");
12141     }
12142
12143   if (header.find("drm") != string::npos)
12144     {
12145       they_live.push_back ("struct drm_file;");
12146     }
12147
12148   if (header.find("cachefiles") != string::npos ||
12149       header.find("fscache") != string::npos)
12150     {
12151       they_live.push_back ("#include <linux/fscache.h>");
12152       they_live.push_back ("#include <linux/fscache-cache.h>");
12153       they_live.push_back ("struct cachefiles_object;"); // fs/cachefiles/internal.h
12154     }
12155
12156   #if 0
12157   /* This doesn't work as of 4.17ish, because it interferes with subsequent tracepoints
12158      coming in from other trace headers. e.g. module:module_put vs mei:module_put. */
12159   if (header_exists(s, "/drivers/misc/mei/mei-trace.h"))
12160     they_live.push_back ("#include \"drivers/misc/mei/mei-trace.h\"");
12161   #endif
12162
12163   if (header.find("gpu_scheduler") != string::npos)
12164     {
12165       they_live.push_back("#include <drm/gpu_scheduler.h>");
12166     }
12167
12168   if (header.find("siox.h") != string::npos)
12169     {
12170       they_live.push_back ("struct siox_device;"); // #include "drivers/siox/siox.h"
12171       they_live.push_back ("struct siox_master;"); // #include "drivers/siox/siox.h"
12172       they_live.push_back ("struct rxrpc_local;"); // #include "drivers/siox/siox.h"
12173     }
12174
12175   return they_live;
12176 }
12177
12178
12179 void
12180 tracepoint_derived_probe_group::emit_module_decls (systemtap_session& s)
12181 {
12182   if (probes.empty())
12183     return;
12184
12185   s.op->newline() << "/* ---- tracepoint probes ---- */";
12186   s.op->newline() << "#include <linux/stp_tracepoint.h>" << endl;
12187   s.op->newline();
12188
12189
12190   // We create a MODULE_aux_N.c file for each tracepoint header, to allow them
12191   // to be separately compiled.  That's because kernel tracepoint headers sometimes
12192   // conflict.  PR13155.
12193
12194   map<string,translator_output*> per_header_aux;
12195   // GC NB: the translator_output* structs are owned/retained by the systemtap_session.
12196
12197   for (unsigned i = 0; i < probes.size(); ++i)
12198     {
12199       tracepoint_derived_probe *p = probes[i];
12200       string header = p->header;
12201
12202       // We cache the auxiliary output files on a per-header basis.  We don't
12203       // need one aux file per tracepoint, only one per tracepoint-header.
12204       translator_output *tpop = per_header_aux[header];
12205       if (tpop == 0)
12206         {
12207           tpop = s.op_create_auxiliary();
12208           per_header_aux[header] = tpop;
12209
12210           // PR9993: Add extra headers to work around undeclared types in individual
12211           // include/trace/foo.h files
12212           const vector<string>& extra_decls = tracepoint_extra_decls (s, header,
12213                                                                       false);
12214           for (unsigned z=0; z<extra_decls.size(); z++)
12215             tpop->newline() << extra_decls[z] << "\n";
12216
12217           // strip include/ substring, the same way as done in get_tracequery_module()
12218           size_t root_pos = header.rfind("include/");
12219           header = ((root_pos != string::npos) ? header.substr(root_pos + 8) : header);
12220
12221           tpop->newline() << "#include <linux/stp_tracepoint.h>" << endl;
12222           tpop->newline() << "#include <" << header << ">";
12223         }
12224
12225       // collect the args that are actually in use
12226       vector<const tracepoint_arg*> used_args;
12227       for (unsigned j = 0; j < p->args.size(); ++j)
12228         if (p->args[j].used)
12229           used_args.push_back(&p->args[j]);
12230
12231       // forward-declare the generated-side tracepoint callback, and define the
12232       // generated-side tracepoint callback in the main translator-output
12233       string enter_real_fn = "enter_real_tracepoint_probe_" + lex_cast(i);
12234       if (used_args.empty())
12235         {
12236           tpop->newline() << "STP_TRACE_ENTER_REAL_NOARGS(" << enter_real_fn << ");";
12237           s.op->newline() << "STP_TRACE_ENTER_REAL_NOARGS(" << enter_real_fn << ");";
12238           s.op->newline() << "STP_TRACE_ENTER_REAL_NOARGS(" << enter_real_fn << ")";
12239         }
12240       else
12241         {
12242           tpop->newline() << "STP_TRACE_ENTER_REAL(" << enter_real_fn;
12243           s.op->newline() << "STP_TRACE_ENTER_REAL(" << enter_real_fn;
12244           s.op->indent(2);
12245           for (unsigned j = 0; j < used_args.size(); ++j)
12246             {
12247               tpop->line() << ", int64_t";
12248               s.op->newline() << ", int64_t __tracepoint_arg_" << used_args[j]->name;
12249             }
12250           tpop->line() << ");";
12251           s.op->newline() << ");";
12252           s.op->indent(-2);
12253           s.op->newline() << "STP_TRACE_ENTER_REAL(" << enter_real_fn;
12254           s.op->indent(2);
12255           for (unsigned j = 0; j < used_args.size(); ++j)
12256             {
12257               s.op->newline() << ", int64_t __tracepoint_arg_" << used_args[j]->name;
12258             }
12259           s.op->newline() << ")";
12260           s.op->indent(-2);
12261         }
12262       s.op->newline() << "{";
12263       s.op->newline(1) << "const struct stap_probe * const probe = "
12264                        << common_probe_init (p) << ";";
12265       common_probe_entryfn_prologue (s, "STAP_SESSION_RUNNING", "", "probe",
12266                                      "stp_probe_type_tracepoint");
12267       s.op->newline() << "c->ips.tp.tracepoint_system = "
12268                       << lex_cast_qstring (p->tracepoint_system)
12269                       << ";";
12270       s.op->newline() << "c->ips.tp.tracepoint_name = "
12271                       << lex_cast_qstring (p->tracepoint_name)
12272                       << ";";
12273       for (unsigned j = 0; j < used_args.size(); ++j)
12274         {
12275           s.op->newline() << "c->probe_locals." << p->name()
12276                           << "." + s.up->c_localname("__tracepoint_arg_" + used_args[j]->name)
12277                           << " = __tracepoint_arg_" << used_args[j]->name << ";";
12278         }
12279       s.op->newline() << "(*probe->ph) (c);";
12280       common_probe_entryfn_epilogue (s, true, otf_safe_context(s));
12281       s.op->newline(-1) << "}";
12282
12283       // define the real tracepoint callback function
12284       string enter_fn = "enter_tracepoint_probe_" + lex_cast(i);
12285       if (p->args.empty())
12286         tpop->newline() << "static STP_TRACE_ENTER_NOARGS(" << enter_fn << ")";
12287       else
12288         {
12289           tpop->newline() << "static STP_TRACE_ENTER(" << enter_fn;
12290           s.op->indent(2);
12291           for (unsigned j = 0; j < p->args.size(); ++j)
12292             tpop->newline() << ", " << p->args[j].c_decl;
12293           tpop->newline() << ")";
12294           s.op->indent(-2);
12295         }
12296       tpop->newline() << "{";
12297       tpop->newline(1) << enter_real_fn << "(";
12298       tpop->indent(2);
12299       for (unsigned j = 0; j < used_args.size(); ++j)
12300         {
12301           if (j > 0)
12302             tpop->line() << ", ";
12303           tpop->newline() << "(int64_t)" << used_args[j]->typecast
12304                           << "__tracepoint_arg_" << used_args[j]->name;
12305         }
12306       tpop->newline() << ");";
12307       tpop->newline(-3) << "}";
12308
12309
12310       // emit normalized registration functions
12311       s.op->newline() << "int register_tracepoint_probe_" << i << "(void);";
12312       tpop->newline() << "int register_tracepoint_probe_" << i << "(void);" << endl;
12313       tpop->newline() << "int register_tracepoint_probe_" << i << "(void) {";
12314       tpop->newline(1) << "return STP_TRACE_REGISTER(" << p->tracepoint_name
12315                        << ", " << enter_fn << ");";
12316       tpop->newline(-1) << "}";
12317
12318       // NB: we're not prepared to deal with unreg failures.  However, failures
12319       // can only occur if the tracepoint doesn't exist (yet?), or if we
12320       // weren't even registered.  The former should be OKed by the initial
12321       // registration call, and the latter is safe to ignore.
12322
12323       // declare normalized registration functions
12324       s.op->newline() << "void unregister_tracepoint_probe_" << i << "(void);";
12325       tpop->newline() << "void unregister_tracepoint_probe_" << i << "(void);" << endl;
12326       tpop->newline() << "void unregister_tracepoint_probe_" << i << "(void) {";
12327       tpop->newline(1) << "(void) STP_TRACE_UNREGISTER(" << p->tracepoint_name
12328                        << ", " << enter_fn << ");";
12329       tpop->newline(-1) << "}";
12330       tpop->newline();
12331
12332       tpop->assert_0_indent();
12333     }
12334
12335   // emit an array of registration functions for easy init/shutdown
12336   s.op->newline() << "static struct stap_tracepoint_probe {";
12337   s.op->newline(1) << "int (*reg)(void);";
12338   s.op->newline(0) << "void (*unreg)(void);";
12339   s.op->newline(-1) << "} stap_tracepoint_probes[] = {";
12340   s.op->indent(1);
12341   for (unsigned i = 0; i < probes.size(); ++i)
12342     {
12343       s.op->newline () << "{";
12344       s.op->line() << " .reg=&register_tracepoint_probe_" << i << ",";
12345       s.op->line() << " .unreg=&unregister_tracepoint_probe_" << i;
12346       s.op->line() << " },";
12347     }
12348   s.op->newline(-1) << "};";
12349   s.op->newline();
12350 }
12351
12352
12353 void
12354 tracepoint_derived_probe_group::emit_module_init (systemtap_session &s)
12355 {
12356   if (probes.size () == 0)
12357     return;
12358
12359   s.op->newline() << "/* init tracepoint probes */";
12360   s.op->newline() << "for (i=0; i<" << probes.size() << "; i++) {";
12361   s.op->newline(1) << "rc = stap_tracepoint_probes[i].reg();";
12362   s.op->newline() << "if (rc) {";
12363   s.op->newline(1) << "for (j=i-1; j>=0; j--)"; // partial rollback
12364   s.op->newline(1) << "stap_tracepoint_probes[j].unreg();";
12365   s.op->newline(-1) << "break;"; // don't attempt to register any more probes
12366   s.op->newline(-1) << "}";
12367   s.op->newline(-1) << "}";
12368
12369   // Modern kernels' tracepoint implementation makes use of SRCU and
12370   // their tracepoint_synchronize_unregister() function calls
12371   // synchronize_srcu(&tracepoint_srcu) right before calling synchronize_rcu().
12372   // So it's safer to always call tracepoint_synchronize_unregister() to avoid
12373   // any risks.
12374
12375   s.op->newline() << "if (rc)";
12376   s.op->newline(1) << "tracepoint_synchronize_unregister();";
12377   s.op->indent(-1);
12378 }
12379
12380
12381 void
12382 tracepoint_derived_probe_group::emit_module_exit (systemtap_session& s)
12383 {
12384   if (probes.empty())
12385     return;
12386
12387   s.op->newline() << "/* deregister tracepoint probes */";
12388   s.op->newline() << "for (i=0; i<" << probes.size() << "; i++)";
12389   s.op->newline(1) << "stap_tracepoint_probes[i].unreg();";
12390   s.op->indent(-1);
12391
12392   // This is necessary: see above.
12393   s.op->newline() << "tracepoint_synchronize_unregister();";
12394 }
12395
12396
12397 struct tracepoint_query : public base_query
12398 {
12399   probe * base_probe;
12400   probe_point * base_loc;
12401   vector<derived_probe *> & results;
12402   set<string> probed_names;
12403
12404   void handle_query_module();
12405   int handle_query_cu(Dwarf_Die * cudie);
12406   int handle_query_func(Dwarf_Die * func);
12407   int handle_query_type(Dwarf_Die * type);
12408   int handle_query_type_syscall_events(Dwarf_Die * cudie);
12409   void query_library (const char *) {}
12410   void query_plt (const char *, size_t) {}
12411
12412   static int tracepoint_query_cu (Dwarf_Die * cudie, tracepoint_query * q);
12413   static int tracepoint_query_func (Dwarf_Die * func, tracepoint_query * q);
12414   static int tracepoint_query_type (Dwarf_Die * type,
12415                                     bool has_inner_types,
12416                                     const std::string& prefix,
12417                                     tracepoint_query * q);
12418
12419   tracepoint_query(dwflpp & dw, const string & tracepoint,
12420                    probe * base_probe, probe_point * base_loc,
12421                    vector<derived_probe *> & results):
12422     base_query(dw, "*"), base_probe(base_probe),
12423     base_loc(base_loc), results(results)
12424   {
12425     // The user may have specified the system to probe, e.g. all of the
12426     // following are possible:
12427     //
12428     //   sched_switch --> tracepoint named sched_switch
12429     //   sched:sched_switch --> tracepoint named sched_switch in the sched system
12430     //   sch*:sched_* --> system starts with sch and tracepoint starts with sched_
12431     //   sched:* --> all tracepoints in system sched
12432     //   *:sched_switch --> same as just sched_switch
12433
12434     size_t sys_pos = tracepoint.find(':');
12435     if (sys_pos == string::npos)
12436       {
12437         this->system = "";
12438         this->tracepoint = tracepoint;
12439       }
12440     else
12441       {
12442         if (strverscmp(sess.compatible.c_str(), "2.6") <= 0)
12443           throw SEMANTIC_ERROR (_("SYSTEM:TRACEPOINT syntax not supported "
12444                                   "with --compatible <= 2.6"));
12445
12446         this->system = tracepoint.substr(0, sys_pos);
12447         this->tracepoint = tracepoint.substr(sys_pos+1);
12448       }
12449
12450     // make sure we have something to query (filters out e.g. "" and ":")
12451     if (this->tracepoint.empty())
12452       throw SEMANTIC_ERROR (_("invalid tracepoint string provided"));
12453   }
12454
12455 private:
12456   string system; // target subsystem(s) to query
12457   string tracepoint; // target tracepoint(s) to query
12458   string current_system; // subsystem of module currently being visited
12459
12460   string retrieve_trace_system();
12461 };
12462
12463 // name of section where TRACE_SYSTEM is stored
12464 // (see tracepoint_builder::get_tracequery_modules())
12465 #define STAP_TRACE_SYSTEM ".stap_trace_system"
12466
12467 string
12468 tracepoint_query::retrieve_trace_system()
12469 {
12470   Dwarf_Addr bias;
12471   Elf* elf = dwfl_module_getelf(dw.module, &bias);
12472   if (!elf)
12473     return "";
12474
12475   size_t shstrndx;
12476   if (elf_getshdrstrndx(elf, &shstrndx))
12477     return "";
12478
12479   Elf_Scn *scn = NULL;
12480   GElf_Shdr shdr_mem;
12481
12482   while ((scn = elf_nextscn(elf, scn)))
12483     {
12484       if (gelf_getshdr(scn, &shdr_mem) == NULL)
12485         return "";
12486
12487       const char *name = elf_strptr(elf, shstrndx, shdr_mem.sh_name);
12488       if (name == NULL)
12489         return "";
12490
12491       if (strcmp(name, STAP_TRACE_SYSTEM) == 0)
12492         break;
12493     }
12494
12495   if (scn == NULL)
12496     return "";
12497
12498   // Extract saved TRACE_SYSTEM in section
12499   Elf_Data *data = elf_getdata(scn, NULL);
12500   if (!data)
12501     return "";
12502
12503   return string((char*)data->d_buf);
12504 }
12505
12506
12507 void
12508 tracepoint_query::handle_query_module()
12509 {
12510   // Get the TRACE_SYSTEM for this module, if any. It will be found in the
12511   // STAP_TRACE_SYSTEM section if it exists.
12512   current_system = retrieve_trace_system();
12513
12514   // check if user wants a specific system
12515   if (!system.empty())
12516     {
12517       // don't need to go further if module has no system or doesn't
12518       // match the one we want
12519       if (current_system.empty()
12520           || !dw.function_name_matches_pattern(current_system, system))
12521         return;
12522     }
12523
12524   // look for the tracepoints in each CU
12525   dw.iterate_over_cus(tracepoint_query_cu, this, false);
12526 }
12527
12528
12529 int
12530 tracepoint_query::handle_query_cu(Dwarf_Die * cudie)
12531 {
12532   dw.focus_on_cu (cudie);
12533   dw.mod_info->get_symtab();
12534
12535   // look at each type to see if it's a tracepoint
12536   if (dw.sess.runtime_mode == dw.sess.systemtap_session::bpf_runtime)
12537     {
12538       if (0 && current_system == "raw_syscalls")
12539         // In BPF / trace_events world, syscalls are abstracted from
12540         // the TRACE_EVENT_FN() (pure callbacks), via
12541         // kernel/trace/trace_syscalls.stp into a family of trace
12542         // events (demultiplexed by syscall id#).  There is a
12543         // standardized event-field structure that does -not- show up
12544         // in these header files, nor in the vmlinux file, but are
12545         // synthesized/registered at kernel boot time.
12546         return handle_query_type_syscall_events (cudie);
12547       else
12548         return dwflpp::iterate_over_globals (cudie, tracepoint_query_type, this);
12549     }
12550
12551   // look at each function to see if it's a tracepoint
12552   string function = "stapprobe_" + tracepoint;
12553   return dw.iterate_over_functions (tracepoint_query_func, this, function);
12554 }
12555
12556
12557 int
12558 tracepoint_query::handle_query_func(Dwarf_Die * func)
12559 {
12560   dw.focus_on_function (func);
12561
12562   assert(startswith(dw.function_name, "stapprobe_"));
12563   string tracepoint_instance = dw.function_name.substr(10);
12564
12565   // check for duplicates -- sometimes tracepoint headers may be indirectly
12566   // included in more than one of our tracequery modules.
12567   if (!probed_names.insert(tracepoint_instance).second)
12568     return DWARF_CB_OK;
12569
12570   // PR17126: blocklist
12571   if (!sess.guru_mode)
12572     {
12573       if ((sess.architecture.substr(0,3) == "ppc" ||
12574            sess.architecture.substr(0,7) == "powerpc") &&
12575           (tracepoint_instance == "hcall_entry" ||
12576            tracepoint_instance == "hcall_exit" ||
12577            tracepoint_instance == "hash_fault"))
12578         {
12579           sess.print_warning(_F("tracepoint %s is blocklisted on architecture %s",
12580                                 tracepoint_instance.c_str(), sess.architecture.c_str()));
12581           return DWARF_CB_OK;
12582         }
12583   }
12584
12585   derived_probe *dp = new tracepoint_derived_probe (dw.sess, dw, *func,
12586                                                     current_system,
12587                                                     tracepoint_instance,
12588                                                     base_probe, base_loc);
12589   results.push_back (dp);
12590   return DWARF_CB_OK;
12591 }
12592
12593 int
12594 tracepoint_query::handle_query_type(Dwarf_Die * type)
12595 {
12596   Dwarf_Die struct_die = *type;
12597
12598   if (!dwarf_hasattr(type, DW_AT_name))
12599     return DWARF_CB_OK;
12600
12601   std::string name(dwarf_diename(type) ?: "<unknown type>");
12602
12603   if (!dw.function_name_matches_pattern(name, "stapprobe_" + tracepoint)
12604       || startswith(name, "stapprobe_template_"))
12605     return DWARF_CB_OK;
12606
12607   name = name.substr(10);
12608
12609   // get the corresponding structure die
12610   while (dwarf_tag(&struct_die) == DW_TAG_typedef)
12611     {
12612       if (dwarf_attr_die(&struct_die, DW_AT_type, &struct_die) == NULL)
12613         throw SEMANTIC_ERROR(_F("Unable to resolve base type of %s for probe %s\n",
12614                                 name.c_str(), tracepoint.c_str()));
12615     }
12616
12617   assert(dwarf_tag(&struct_die) == DW_TAG_structure_type);
12618
12619   // check for duplicates -- sometimes tracepoint headers may be indirectly
12620   // included in more than one of our tracequery modules.
12621   if (!probed_names.insert(name).second)
12622     return DWARF_CB_OK;
12623
12624   derived_probe *dp = new tracepoint_derived_probe(dw.sess, dw, struct_die,
12625                                                    current_system, name,
12626                                                    base_probe, base_loc);
12627   results.push_back(dp);
12628   return DWARF_CB_OK;
12629 }
12630
12631
12632 int
12633 tracepoint_query::handle_query_type_syscall_events(Dwarf_Die * cudie)
12634 {
12635   (void) cudie;
12636
12637   return DWARF_CB_OK;
12638 }
12639
12640
12641
12642 int
12643 tracepoint_query::tracepoint_query_cu (Dwarf_Die * cudie, tracepoint_query * q)
12644 {
12645   if (pending_interrupts) return DWARF_CB_ABORT;
12646   return q->handle_query_cu(cudie);
12647 }
12648
12649
12650 int
12651 tracepoint_query::tracepoint_query_func (Dwarf_Die * func, tracepoint_query * q)
12652 {
12653   if (pending_interrupts) return DWARF_CB_ABORT;
12654   return q->handle_query_func(func);
12655 }
12656
12657 int
12658 tracepoint_query::tracepoint_query_type (Dwarf_Die *type, bool has_inner_types,
12659                                          const std::string& prefix, tracepoint_query *q)
12660 {
12661   // needed to match signature of dwflpp::iterate_over_globals callback
12662   (void) has_inner_types;
12663   (void) prefix;
12664
12665   if (pending_interrupts) return DWARF_CB_ABORT;
12666   return q->handle_query_type(type);
12667 }
12668
12669
12670 struct tracepoint_builder: public derived_probe_builder
12671 {
12672 private:
12673   dwflpp *dw;
12674   bool init_dw(systemtap_session& s);
12675   void get_tracequery_modules(systemtap_session& s,
12676                               const vector<string>& headers,
12677                               vector<string>& modules);
12678
12679 public:
12680
12681   tracepoint_builder(): dw(0) {}
12682   ~tracepoint_builder() { delete dw; }
12683
12684   void build_no_more (systemtap_session& s)
12685   {
12686     if (dw && s.verbose > 3)
12687       clog << _("tracepoint_builder releasing dwflpp") << endl;
12688     delete dw;
12689     dw = NULL;
12690
12691     delete_session_module_cache (s);
12692   }
12693
12694   void build(systemtap_session& s,
12695              probe *base, probe_point *location,
12696              literal_map_t const& parameters,
12697              vector<derived_probe*>& finished_results);
12698
12699   virtual string name() { return "tracepoint builder"; }
12700 };
12701
12702
12703
12704 // Create (or cache) one or more tracequery .o modules, based upon the
12705 // tracepoint-related header files given.  Return the generated or cached
12706 // modules[].
12707
12708 void
12709 tracepoint_builder::get_tracequery_modules(systemtap_session& s,
12710                                            const vector<string>& headers,
12711                                            vector<string>& modules)
12712 {
12713   if (s.verbose > 2)
12714     {
12715       clog << _F("Pass 2: getting a tracepoint query for %zu headers: ", headers.size()) << endl;
12716       for (size_t i = 0; i < headers.size(); ++i)
12717         clog << "  " << headers[i] << endl;
12718     }
12719
12720   map<string,string> headers_cache_obj;  // header name -> cache/.../tracequery_hash.o file name
12721   // Map the headers to cache .o names.  Note that this has side-effects of
12722   // creating the $SYSTEMTAP_DIR/.cache/XX/... directory and the hash-log file,
12723   // so we prefer not to repeat this.
12724   vector<string> uncached_headers;
12725   for (size_t i=0; i<headers.size(); i++)
12726     headers_cache_obj[headers[i]] = find_tracequery_hash(s, headers[i]);
12727
12728   // They may be in the cache already.
12729   if (s.use_cache && !s.poison_cache)
12730     for (size_t i=0; i<headers.size(); i++)
12731       {
12732         // see if the cached module exists
12733         const string& tracequery_path = headers_cache_obj[headers[i]];
12734         if (!tracequery_path.empty() && file_exists(tracequery_path))
12735           {
12736             if (s.verbose > 2)
12737               clog << _F("Pass 2: using cached %s", tracequery_path.c_str()) << endl;
12738
12739             // an empty file is a cached failure
12740             if (get_file_size(tracequery_path) > 0)
12741               modules.push_back (tracequery_path);
12742           }
12743         else
12744           uncached_headers.push_back(headers[i]);
12745       }
12746   else
12747     uncached_headers = headers;
12748
12749   // If we have nothing left to search for, quit
12750   if (uncached_headers.empty()) return;
12751
12752   map<string,string> headers_tracequery_src; // header -> C-source code mapping
12753
12754   // We could query several subsets of headers[] to make this go
12755   // faster, but let's KISS and do one at a time.
12756   for (size_t i=0; i<uncached_headers.size(); i++)
12757     {
12758       const string& header = uncached_headers[i];
12759
12760       // create a tracequery source file
12761       ostringstream osrc;
12762
12763       // PR9993: Add extra headers to work around undeclared types in individual
12764       // include/trace/foo.h files
12765       vector<string> short_decls = tracepoint_extra_decls(s, header, true);
12766
12767       // add each requested tracepoint header
12768       size_t root_pos = header.rfind("include/");
12769       short_decls.push_back(string("#include <") +
12770                             ((root_pos != string::npos) ? header.substr(root_pos + 8) : header) +
12771                             string(">"));
12772
12773       osrc << "#ifdef CONFIG_TRACEPOINTS" << endl;
12774       osrc << "#include <linux/tracepoint.h>" << endl;
12775
12776       // BPF raw tracepoint macros for creating the multiple fields
12777       // of the data struct that describes the raw tracepoint.
12778       // These macros counts up to 12. Any more, it will return 13th argument.
12779       // These macros will likely have issues with raw tracepoints with more than 12 arguments.
12780       osrc << "#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n" << endl;
12781       osrc << "#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)" << endl;
12782       osrc << "#define __CONCAT(a, b) a ## b" << endl;
12783       osrc << "#define CONCATENATE(a, b) __CONCAT(a, b)" << endl;
12784       osrc << "#define __FIELD_ENTRY(x) x __attribute__ ((aligned (8)))" << endl;
12785       osrc << "#define __FIELD1(a,...) __FIELD_ENTRY(a);" << endl;
12786       osrc << "#define __FIELD2(a,...) __FIELD_ENTRY(a); __FIELD1(__VA_ARGS__)" << endl;
12787       osrc << "#define __FIELD3(a,...) __FIELD_ENTRY(a); __FIELD2(__VA_ARGS__)" << endl;
12788       osrc << "#define __FIELD4(a,...) __FIELD_ENTRY(a); __FIELD3(__VA_ARGS__)" << endl;
12789       osrc << "#define __FIELD5(a,...) __FIELD_ENTRY(a); __FIELD4(__VA_ARGS__)" << endl;
12790       osrc << "#define __FIELD6(a,...) __FIELD_ENTRY(a); __FIELD5(__VA_ARGS__)" << endl;
12791       osrc << "#define __FIELD7(a,...) __FIELD_ENTRY(a); __FIELD6(__VA_ARGS__)" << endl;
12792       osrc << "#define __FIELD8(a,...) __FIELD_ENTRY(a); __FIELD7(__VA_ARGS__)" << endl;
12793       osrc << "#define __FIELD9(a,...) __FIELD_ENTRY(a); __FIELD8(__VA_ARGS__)" << endl;
12794       osrc << "#define __FIELD10(a,...) __FIELD_ENTRY(a); __FIELD9(__VA_ARGS__)" << endl;
12795       osrc << "#define __FIELD11(a,...) __FIELD_ENTRY(a); __FIELD10(__VA_ARGS__)" << endl;
12796       osrc << "#define __FIELD12(a,...) __FIELD_ENTRY(a); __FIELD11(__VA_ARGS__)" << endl;
12797       osrc << "#define FIELDS(...) CONCATENATE(__FIELD, COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__)" << endl;
12798
12799       // The following PARAMS and DECLARE_TRACE_* macros are used
12800       // by both linux kernel module and bpf raw tracepoints.
12801
12802       // The kernel has changed this naming a few times, previously TPPROTO,
12803       // TP_PROTO, TPARGS, TP_ARGS, etc.  so let's just dupe the latest.
12804       osrc << "#ifndef PARAMS" << endl;
12805       osrc << "#define PARAMS(args...) args" << endl;
12806       osrc << "#endif" << endl;
12807
12808       // 2.6.35 added the NOARGS variant, but it's the same for us
12809       osrc << "#undef DECLARE_TRACE_NOARGS" << endl;
12810       osrc << "#define DECLARE_TRACE_NOARGS(name) \\" << endl;
12811       osrc << "  DECLARE_TRACE(name, void, )" << endl;
12812
12813       // 2.6.38 added the CONDITION variant, which can also just redirect
12814       osrc << "#undef DECLARE_TRACE_CONDITION" << endl;
12815       osrc << "#define DECLARE_TRACE_CONDITION(name, proto, args, cond) \\" << endl;
12816       osrc << "  DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))" << endl;
12817
12818       // older tracepoints used DEFINE_TRACE, so redirect that too
12819       osrc << "#undef DEFINE_TRACE" << endl;
12820       osrc << "#define DEFINE_TRACE(name, proto, args) \\" << endl;
12821       osrc << "  DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))" << endl;
12822
12823       // Macros to help build the struct describing the older cooked bpf tracepoints
12824       osrc << "#undef __field" << endl;
12825       osrc << "#define __field(type, item) type item;" << endl;
12826
12827       osrc << "#undef __field_desc" << endl;
12828       osrc << "#define __field_desc(type, container, item) type item;" << endl;
12829
12830       osrc << "#undef __array" << endl;
12831       osrc << "#define __array(type, item, size) type item[size];" << endl;
12832
12833       osrc << "#undef __array_desc" << endl;
12834       osrc << "#define __array_desc(type, container, item, size) type item[size];" << endl;
12835
12836       osrc << "#undef __dynamic_array" << endl;
12837       osrc << "#define __dynamic_array(type, item, len) u32 item;" << endl;
12838
12839       osrc << "#undef __string" << endl;
12840       osrc << "#define __string(item, src) __dynamic_array(char, item, -1)" << endl;
12841
12842       osrc << "#undef __bitmask" << endl;
12843       osrc << "#define __bitmask(item, nr_bits) __dynamic_array(char, item, -1)" << endl;
12844
12845       osrc << "#undef TP_STRUCT__entry" << endl;
12846       osrc << "#define TP_STRUCT__entry(args...) args" << endl;
12847
12848       if (s.runtime_mode != systemtap_session::bpf_runtime) {
12849           // override DECLARE_TRACE to synthesize probe functions for us
12850           osrc << "#undef DECLARE_TRACE" << endl;
12851           osrc << "#define DECLARE_TRACE(name, proto, args) \\" << endl;
12852           osrc << "  void stapprobe_##name(proto); \\" << endl;
12853           osrc << "  void stapprobe_##name(proto) {}" << endl;
12854       } else {
12855           if (s.use_bpf_raw_tracepoint) {
12856               // override DECLARE_TRACE to synthesize struct for the bpf raw tracepoint
12857               osrc << "#undef DECLARE_TRACE" << endl;
12858               osrc << "#define DECLARE_TRACE(name, proto, args) \\" << endl;
12859               osrc << "  struct stapprobe_##name { struct { FIELDS(proto) } data; } stapprobe_##name;" << endl;
12860           } else {
12861               // Macros to create structure for older cooked bpf tracepoints
12862               // Similar to above, but instantiates structs instead of functions.
12863               // The members will become tracepoint args.
12864               osrc << "#undef DECLARE_EVENT_CLASS" << endl;
12865               osrc << "#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) \\" << endl;
12866               osrc << "  struct stapprobe_template_##name { unsigned long long pad; struct { tstruct } data; };" << endl;
12867
12868               // typedef helps us access template's debuginfo when given name's debuginfo
12869               osrc << "#undef DEFINE_EVENT" << endl;
12870               osrc << "#define DEFINE_EVENT(template, name, proto, args) \\" << endl;
12871               osrc << "  typedef struct stapprobe_template_##template stapprobe_##name; \\" << endl;
12872               osrc << "  stapprobe_##name stapprobe_inst_##name;" << endl;
12873
12874               osrc << "#undef TRACE_EVENT" << endl;
12875               osrc << "#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \\" << endl;
12876               osrc << "  struct stapprobe_##name { unsigned long long pad; struct { tstruct } data; } stapprobe_##name;" << endl;
12877
12878               osrc << "#undef TRACE_EVENT_FN" << endl;
12879               osrc << "#define TRACE_EVENT_FN(name, proto, args, tstruct, assign, print, reg, unreg) \\" << endl;
12880               osrc << "  struct stapprobe_##name { unsigned long long pad; struct { tstruct } data; } stapprobe_##name;" << endl;
12881
12882               osrc << "#undef TRACE_EVENT_CONDITION" << endl;
12883               osrc << "#define TRACE_EVENT_CONDITION(name, proto, args, cond, tstruct, assign, print) \\" << endl;
12884               osrc << " struct stapprobe_##name { unsigned long long pad; struct { tstruct } data; } stapprobe_##name;" << endl;
12885           }
12886       }
12887
12888       // add the specified decls/#includes
12889       for (unsigned z=0; z<short_decls.size(); z++)
12890         osrc << "#undef TRACE_INCLUDE_FILE\n"
12891              << "#undef TRACE_INCLUDE_PATH\n"
12892              << short_decls[z] << "\n";
12893
12894       // create a section that will hold the TRACE_SYSTEM for this header
12895       osrc << "#ifdef TRACE_SYSTEM" << endl;
12896       osrc << "const char stap_trace_system[]" << endl;
12897       osrc << "  __attribute__((section(\"" STAP_TRACE_SYSTEM "\")))" << endl;
12898       osrc << "    = __stringify(TRACE_SYSTEM);" << endl;
12899       osrc << "#endif" << endl;
12900
12901       // finish up the module source
12902       osrc << "#endif /* CONFIG_TRACEPOINTS */" << endl;
12903
12904       // save the source file away
12905       headers_tracequery_src[header] = osrc.str();
12906     }
12907
12908   // now build them all together
12909   map<string,string> tracequery_objs = make_tracequeries(s, headers_tracequery_src);
12910
12911   // now extend the modules list, and maybe plop them into the cache
12912   for (size_t i=0; i<uncached_headers.size(); i++)
12913     {
12914       const string& header = uncached_headers[i];
12915       const string& tracequery_obj = tracequery_objs[header];
12916       const string& tracequery_path = headers_cache_obj[header];
12917       if (tracequery_obj !="" && file_exists(tracequery_obj))
12918         {
12919           modules.push_back (tracequery_obj);
12920           if (s.use_cache)
12921             copy_file(tracequery_obj, tracequery_path, s.verbose > 2);
12922         }
12923       else if (s.use_cache)
12924         // cache an empty file for failures
12925         copy_file("/dev/null", tracequery_path, s.verbose > 2);
12926     }
12927 }
12928
12929
12930
12931 bool
12932 tracepoint_builder::init_dw(systemtap_session& s)
12933 {
12934   if (dw != NULL)
12935     return true;
12936
12937   vector<string> tracequery_modules;
12938   vector<string> system_headers;
12939
12940   glob_t trace_glob;
12941
12942   // find kernel_source_tree from DW_AT_comp_dir
12943   if (s.kernel_source_tree == "")
12944     {
12945       unsigned found;
12946       Dwfl *dwfl = setup_dwfl_kernel ("kernel", &found, s);
12947       if (found)
12948         {
12949           Dwarf_Die *cudie = 0;
12950           Dwarf_Addr bias;
12951           while ((cudie = dwfl_nextcu (dwfl, cudie, &bias)) != NULL)
12952             {
12953               assert_no_interrupts();
12954               Dwarf_Attribute attr;
12955               const char* name = dwarf_formstring (dwarf_attr (cudie, DW_AT_comp_dir, &attr));
12956               if (name)
12957                 {
12958                   // Before we try to use it, check that the path actually
12959                   // exists locally and is distinct from the build tree.
12960                   if (!file_exists(name))
12961                     {
12962                       if (s.verbose > 2)
12963                         clog << _F("Ignoring inaccessible kernel source tree (DW_AT_comp_dir) at '%s'", name) << endl;
12964                     }
12965                   else if (resolve_path(name) == resolve_path(s.kernel_build_tree))
12966                     {
12967                       if (s.verbose > 2)
12968                         clog << _F("Ignoring duplicate kernel source tree (DW_AT_comp_dir) at '%s'", name) << endl;
12969                     }
12970                   else
12971                     {
12972                       if (s.verbose > 2)
12973                         clog << _F("Located kernel source tree (DW_AT_comp_dir) at '%s'", name) << endl;
12974                       s.kernel_source_tree = name;
12975                     }
12976
12977                   break; // skip others; modern Kbuild uses same comp_dir for them all
12978                 }
12979             }
12980         }
12981       dwfl_end (dwfl);
12982     }
12983
12984   // find kernel_source_tree from a source link, when different from build
12985   if (s.kernel_source_tree == "")
12986     {
12987       vector<string> source_trees;
12988
12989       // vendor kernel (e.g. Fedora): the source link is in the same dir
12990       // as the build tree
12991       if (endswith(s.kernel_build_tree, "/build"))
12992         {
12993           string source_tree = s.kernel_build_tree;
12994           source_tree.replace(source_tree.length() - 5, 5, "source");
12995           source_trees.push_back(source_tree);
12996         }
12997
12998       // vanilla kernel: the source link is in the build tree
12999       source_trees.push_back(s.kernel_build_tree + "/source");
13000
13001       for (unsigned i = 0; i < source_trees.size(); i++)
13002         {
13003           string source_tree = source_trees[i];
13004
13005           if (dir_exists(source_tree) &&
13006               resolve_path(source_tree) != resolve_path(s.kernel_build_tree))
13007             {
13008               if (s.verbose > 2)
13009                 clog << _F("Located kernel source tree at '%s'", source_tree.c_str()) << endl;
13010               s.kernel_source_tree = source_tree;
13011               break;
13012             }
13013         }
13014     }
13015
13016   // prefixes
13017   vector<string> glob_prefixes;
13018   glob_prefixes.push_back (s.kernel_build_tree);
13019   if (s.kernel_source_tree != "")
13020     glob_prefixes.push_back (s.kernel_source_tree);
13021
13022   // suffixes
13023   vector<string> glob_suffixes;
13024   glob_suffixes.push_back("include/trace/events/*.h");
13025   glob_suffixes.push_back("include/trace/*.h");
13026   glob_suffixes.push_back("include/ras/*_event.h");
13027   glob_suffixes.push_back("arch/x86/entry/vsyscall/*trace.h");
13028   glob_suffixes.push_back("arch/x86/kernel/*trace.h");
13029   glob_suffixes.push_back("arch/*/include/asm/*trace*.h");
13030   glob_suffixes.push_back("arch/*/include/asm/trace/*.h");
13031   glob_suffixes.push_back("arch/*/kvm/*trace.h");
13032   glob_suffixes.push_back("fs/xfs/linux-*/xfs_tr*.h");
13033   glob_suffixes.push_back("fs/*/*trace*.h");
13034   glob_suffixes.push_back("net/*/*trace*.h");
13035   glob_suffixes.push_back("sound/core/*_trace.h");
13036   glob_suffixes.push_back("sound/hda/*trace*.h");
13037   glob_suffixes.push_back("sound/pci/hda/*_trace.h");
13038   glob_suffixes.push_back("drivers/base/regmap/*trace*.h");
13039   glob_suffixes.push_back("drivers/gpu/drm/*_trace.h");
13040   glob_suffixes.push_back("drivers/gpu/drm/*/*_trace.h");
13041   glob_suffixes.push_back("drivers/net/wireless/*/*/*trace*.h");
13042   glob_suffixes.push_back("drivers/usb/host/*trace*.h");
13043   glob_suffixes.push_back("virt/kvm/*/*trace*.h");
13044
13045   // see also tracepoint_extra_decls above
13046
13047   // compute cartesian product
13048   vector<string> globs;
13049   for (unsigned i=0; i<glob_prefixes.size(); i++)
13050     for (unsigned j=0; j<glob_suffixes.size(); j++)
13051       globs.push_back (glob_prefixes[i]+string("/")+glob_suffixes[j]);
13052
13053   set<string> duped_headers;
13054   for (unsigned z = 0; z < globs.size(); z++)
13055     {
13056       string glob_str = globs[z];
13057       if (s.verbose > 3)
13058         clog << _("Checking tracepoint glob ") << glob_str << endl;
13059
13060       int r = glob(glob_str.c_str(), 0, NULL, &trace_glob);
13061       if (r == GLOB_NOSPACE || r == GLOB_ABORTED)
13062         throw runtime_error("Error globbing tracepoint");
13063
13064       for (unsigned i = 0; i < trace_glob.gl_pathc; ++i)
13065         {
13066           string header(trace_glob.gl_pathv[i]);
13067
13068           // filter out a few known "internal-only" headers
13069           if (endswith(header, "/define_trace.h") ||
13070               endswith(header, "/ftrace.h")       ||
13071               endswith(header, "/trace_events.h") ||
13072               endswith(header, "/perf.h") ||
13073               endswith(header, "_event_types.h"))
13074             continue;
13075
13076           // Skip identical headers from the build and source trees.
13077           // NB: For the moment these are only compared by reduced path, since
13078           // get_tracequery_modules and emit_module_decls also reduce the path
13079           // like this for their #includes.  If we want to get fancier, like
13080           // comparing file contents, then those functions will also have to be
13081           // more precise in how they #include.
13082           size_t root_pos = header.rfind("include/");
13083           if (root_pos != string::npos &&
13084               !duped_headers.insert(header.substr(root_pos + 8)).second)
13085             continue;
13086
13087           system_headers.push_back(header);
13088         }
13089       globfree(&trace_glob);
13090     }
13091
13092   // Build tracequery modules
13093   get_tracequery_modules(s, system_headers, tracequery_modules);
13094
13095   // TODO: consider other sources of tracepoint headers too, like from
13096   // a command-line parameter or some environment or .systemtaprc
13097
13098   dw = new dwflpp(s, tracequery_modules, true);
13099   return true;
13100 }
13101
13102 void
13103 tracepoint_builder::build(systemtap_session& s,
13104                           probe *base, probe_point *location,
13105                           literal_map_t const& parameters,
13106                           vector<derived_probe*>& finished_results)
13107 {
13108   if (s.runtime_mode == systemtap_session::bpf_runtime &&
13109        strverscmp(s.compatible.c_str(), "4.2") >= 0) {
13110          s.use_bpf_raw_tracepoint =
13111            (s.kernel_functions.count("bpf_raw_tracepoint_release") > 0) ||
13112            (s.kernel_functions.count("bpf_raw_tp_link_release") > 0);
13113          if (!s.use_bpf_raw_tracepoint)
13114           throw SEMANTIC_ERROR (_("SYSTEM: new BPF TRACEPOINT behavior not supported "
13115                                   "by target kernel (or use --compatible=4.1 option)"));
13116   }
13117
13118   if (!init_dw(s))
13119     return;
13120
13121   interned_string tracepoint;
13122   assert(get_param (parameters, TOK_TRACE, tracepoint));
13123
13124   tracepoint_query q(*dw, tracepoint, base, location, finished_results);
13125   unsigned results_pre = finished_results.size();
13126   dw->iterate_over_modules<base_query>(&query_module, &q);
13127   unsigned results_post = finished_results.size();
13128
13129   // Did we fail to find a match? Let's suggest something!
13130   if (results_pre == results_post)
13131     {
13132       size_t pos;
13133       string sugs = suggest_dwarf_functions(s, q.visited_modules, tracepoint);
13134       while ((pos = sugs.find("stapprobe_")) != string::npos)
13135         sugs.erase(pos, string("stapprobe_").size());
13136       if (!sugs.empty())
13137         throw SEMANTIC_ERROR (_NF("no match (similar tracepoint: %s)",
13138                                   "no match (similar tracepoints: %s)",
13139                                   sugs.find(',') == string::npos,
13140                                   sugs.c_str()));
13141     }
13142 }
13143
13144 bool
13145 sort_for_bpf(systemtap_session& s,
13146              tracepoint_derived_probe_group *t,
13147              sort_for_bpf_probe_arg_vector &v)
13148 {
13149   string tracepoint_flavor = (s.runtime_mode == systemtap_session::bpf_runtime && s.use_bpf_raw_tracepoint) ? "raw_trace/" : "trace/";
13150   if (!t)
13151     return false;
13152
13153   for (auto i = t->probes.begin(); i != t->probes.end(); ++i)
13154     {
13155       tracepoint_derived_probe *p = *i;
13156       v.push_back(std::pair<derived_probe *, std::string>
13157                   (p, tracepoint_flavor + p->tracepoint_system + "/" + p->tracepoint_name));
13158     }
13159
13160   return true;
13161 }
13162
13163 // ------------------------------------------------------------------------
13164 //  Standard tapset registry.
13165 // ------------------------------------------------------------------------
13166
13167 void
13168 register_standard_tapsets(systemtap_session & s)
13169 {
13170   register_tapset_been(s);
13171   register_tapset_mark(s);
13172   register_tapset_procfs(s);
13173   register_tapset_timers(s);
13174   register_tapset_netfilter(s);
13175   register_tapset_utrace(s);
13176   register_tapset_debuginfod(s);
13177
13178   // dwarf-based kprobe/uprobe parts
13179   dwarf_derived_probe::register_patterns(s);
13180
13181   // XXX: user-space starter set
13182   s.pattern_root->bind_num(TOK_PROCESS)
13183     ->bind_num(TOK_STATEMENT)->bind(TOK_ABSOLUTE)
13184     ->bind_privilege(pr_all)
13185     ->bind(new uprobe_builder ());
13186   s.pattern_root->bind_num(TOK_PROCESS)
13187     ->bind_num(TOK_STATEMENT)->bind(TOK_ABSOLUTE)->bind(TOK_RETURN)
13188     ->bind_privilege(pr_all)
13189     ->bind(new uprobe_builder ());
13190
13191   // kernel tracepoint probes
13192   s.pattern_root->bind(TOK_KERNEL)->bind_str(TOK_TRACE)
13193     ->bind(new tracepoint_builder());
13194
13195   // Kprobe based probe
13196   s.pattern_root->bind(TOK_KPROBE)->bind_str(TOK_FUNCTION)
13197      ->bind(new kprobe_builder());
13198   s.pattern_root->bind(TOK_KPROBE)->bind_str(TOK_FUNCTION)->bind(TOK_CALL)
13199      ->bind(new kprobe_builder());
13200   s.pattern_root->bind(TOK_KPROBE)->bind_str(TOK_MODULE)
13201      ->bind_str(TOK_FUNCTION)->bind(new kprobe_builder());
13202   s.pattern_root->bind(TOK_KPROBE)->bind_str(TOK_MODULE)
13203      ->bind_str(TOK_FUNCTION)->bind(TOK_CALL)->bind(new kprobe_builder());
13204   s.pattern_root->bind(TOK_KPROBE)->bind_str(TOK_FUNCTION)->bind(TOK_RETURN)
13205      ->bind(new kprobe_builder());
13206   s.pattern_root->bind(TOK_KPROBE)->bind_str(TOK_FUNCTION)->bind(TOK_RETURN)
13207      ->bind_num(TOK_MAXACTIVE)->bind(new kprobe_builder());
13208   s.pattern_root->bind(TOK_KPROBE)->bind_str(TOK_MODULE)
13209      ->bind_str(TOK_FUNCTION)->bind(TOK_RETURN)->bind(new kprobe_builder());
13210   s.pattern_root->bind(TOK_KPROBE)->bind_str(TOK_MODULE)
13211      ->bind_str(TOK_FUNCTION)->bind(TOK_RETURN)
13212      ->bind_num(TOK_MAXACTIVE)->bind(new kprobe_builder());
13213   s.pattern_root->bind(TOK_KPROBE)->bind_num(TOK_STATEMENT)
13214       ->bind(TOK_ABSOLUTE)->bind(new kprobe_builder());
13215
13216   //Hwbkpt based kernel probe
13217   // NB: we formerly registered the probe point types only if the kernel configuration
13218   // allowed it.  However, we get better error messages if we allow probes to resolve.
13219   s.pattern_root->bind(TOK_KERNEL)->bind_num(TOK_HWBKPT)
13220     ->bind(TOK_HWBKPT_WRITE)->bind(new hwbkpt_builder(true));
13221   s.pattern_root->bind(TOK_KERNEL)->bind_str(TOK_HWBKPT)
13222     ->bind(TOK_HWBKPT_WRITE)->bind(new hwbkpt_builder(true));
13223   s.pattern_root->bind(TOK_KERNEL)->bind_num(TOK_HWBKPT)
13224     ->bind(TOK_HWBKPT_RW)->bind(new hwbkpt_builder(true));
13225   s.pattern_root->bind(TOK_KERNEL)->bind_str(TOK_HWBKPT)
13226     ->bind(TOK_HWBKPT_RW)->bind(new hwbkpt_builder(true));
13227   s.pattern_root->bind(TOK_KERNEL)->bind_num(TOK_HWBKPT)
13228     ->bind_num(TOK_LENGTH)->bind(TOK_HWBKPT_WRITE)->bind(new hwbkpt_builder(true));
13229   s.pattern_root->bind(TOK_KERNEL)->bind_num(TOK_HWBKPT)
13230     ->bind_num(TOK_LENGTH)->bind(TOK_HWBKPT_RW)->bind(new hwbkpt_builder(true));
13231   // length supported with address only, not symbol names
13232
13233   //Hwbkpt based process probe
13234   // NB: we don't support symbol names in the probe spec (yet).
13235   s.pattern_root->bind(TOK_PROCESS)->bind_num(TOK_HWBKPT)
13236     ->bind(TOK_HWBKPT_WRITE)->bind(new hwbkpt_builder(false));
13237   s.pattern_root->bind(TOK_PROCESS)->bind_num(TOK_HWBKPT)
13238     ->bind(TOK_HWBKPT_RW)->bind(new hwbkpt_builder(false));
13239   s.pattern_root->bind(TOK_PROCESS)->bind_num(TOK_HWBKPT)
13240     ->bind_num(TOK_LENGTH)->bind(TOK_HWBKPT_WRITE)->bind(new hwbkpt_builder(false));
13241   s.pattern_root->bind(TOK_PROCESS)->bind_num(TOK_HWBKPT)
13242     ->bind_num(TOK_LENGTH)->bind(TOK_HWBKPT_RW)->bind(new hwbkpt_builder(false));
13243
13244   //perf event based probe
13245   register_tapset_perf(s);
13246   register_tapset_java(s);
13247   register_tapset_python(s);
13248 }
13249
13250
13251 vector<derived_probe_group*>
13252 all_session_groups(systemtap_session& s)
13253 {
13254   vector<derived_probe_group*> g;
13255
13256 #define DOONE(x) \
13257   if (s. x##_derived_probes) \
13258     g.push_back ((derived_probe_group*)(s. x##_derived_probes))
13259
13260   // Note that order *is* important here.  We want to make sure we
13261   // register (actually run) begin probes before any other probe type
13262   // is run.  Similarly, when unregistering probes, we want to
13263   // unregister (actually run) end probes after every other probe type
13264   // has be unregistered.  To do the latter,
13265   // c_unparser::emit_module_exit() will run this list backwards.
13266   DOONE(vma_tracker);
13267   DOONE(be);
13268   DOONE(generic_kprobe);
13269   DOONE(uprobe);
13270   DOONE(timer);
13271   DOONE(profile);
13272   DOONE(mark);
13273   DOONE(tracepoint);
13274   DOONE(hwbkpt);
13275   DOONE(perf);
13276   DOONE(hrtimer);
13277
13278   // Another "order is important" item. Python probes create synthetic
13279   // procfs probes and the python probes' emit_module_decls() needs to
13280   // be called first.
13281   DOONE(python);
13282   DOONE(procfs);
13283
13284   DOONE(netfilter);
13285
13286   // Another "order is important" item.  We want to make sure we
13287   // "register" the dummy task_finder probe group after all probe
13288   // groups that use the task_finder.
13289   DOONE(utrace);
13290   DOONE(itrace);
13291   DOONE(dynprobe);
13292   DOONE(task_finder);
13293 #undef DOONE
13294   return g;
13295 }
13296
13297 /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */