[systemtap.git] / runtime / linux / uprobes-common.c

/* -*- linux-c -*- 
 * uprobe Functions
 * Copyright (C) 2014 Red Hat Inc.
 *
 * This file is part of systemtap, and is free software.  You can
 * redistribute it and/or modify it under the terms of the GNU General
 * Public License (GPL); either version 2, or (at your option) any
 * later version.
 */

#ifndef _UPROBE_COMMON_C_
#define _UPROBE_COMMON_C_

/* NB: Because these utrace callbacks only occur before / after
   userspace instructions run, there is no concurrency control issue
   between active uprobe callbacks and these registration /
   unregistration pieces.

   We protect the stap_uprobe->spec_index (which also serves as a
   free/busy flag) value with the outer protective stap_probes_lock
   spinlock, to protect it against concurrent registration /
   unregistration.
*/

static int stap_uprobe_change_plus (struct task_struct *tsk, unsigned long relocation, unsigned long length, const struct stap_uprobe_tf *stf, unsigned long offset, unsigned long vm_flags) {
  int tfi = (stf - stap_uprobe_finders);
  int spec_index;
  /* iterate over stap_uprobe_spec[] that use this same stap_uprobe_tf */
  for (spec_index=0; spec_index<sizeof(stap_uprobe_specs)/sizeof(stap_uprobe_specs[0]); spec_index++) {
    int handled_p = 0;
    int slotted_p = 0;
    const struct stap_uprobe_spec *sups = &stap_uprobe_specs [spec_index];
    struct stap_uprobe *sup;
    pid_t sdt_sem_pid;
    int rc = 0;
    int i;
    int pci;
    
    if (likely(sups->tfi != tfi)) continue;
    /* skip probes with an address beyond this map event; should not 
       happen unless a shlib/exec got mmapped in weirdly piecemeal */
    if (likely((vm_flags & VM_EXEC) && sups->address >= length)) continue;

    /* Found a uprobe_spec for this stap_uprobe_tf.  Need to lock the
       stap_uprobes[] array to allocate a free spot, but then we can
       unlock and do the register_*probe subsequently. */

    might_sleep();
    mutex_lock (& stap_uprobes_lock);
    for (i=0; i<MAXUPROBES; i++) { /* XXX: slow linear search */
      sup = & stap_uprobes[i];

      /* register new uprobe
	 We make two passes for semaphores;
	 see stap_uprobe_change_semaphore_plus */
 
      if (sup->spec_index < 0 || (sups->sdt_sem_offset && vm_flags & VM_WRITE && sup->spec_index == spec_index)) {
        #if (UPROBES_API_VERSION < 2)
	/* See PR6829 comment. */
        if (sup->spec_index == -1 && sup->up.kdata != NULL) continue;
        else if (sup->spec_index == -2 && sup->urp.u.kdata != NULL) continue;
        #endif
        sup->spec_index = spec_index;
        slotted_p = 1;
        break;
      }
    }
    mutex_unlock (& stap_uprobes_lock);
    #ifdef DEBUG_UPROBES
    _stp_dbug(__FUNCTION__,__LINE__, "+uprobe spec %d idx %d process %s[%d] addr %p pp %s\n", spec_index, (slotted_p ? i : -1), tsk->comm, tsk->tgid, (void*)(relocation+sups->address), sups->probe->pp);
    #endif

    /* NB: check for user-module build-id only if we have a pathname
       at all; for a process(PID#).* probe, we may not.  If at some
       point we map process(PID#) to process("/proc/PID#/exe"), we'll
       get a pathname. */
    if (stf->pathname)
            if ((rc = _stp_usermodule_check(tsk, stf->pathname, relocation)))
                    return rc;

    /* Here, slotted_p implies that `i' points to the single
       stap_uprobes[] element that has been slotted in for registration
       or unregistration processing.  !slotted_p implies that the table
       was full (registration; MAXUPROBES) or that no matching entry was
       found (unregistration; should not happen). */

    sdt_sem_pid = (sups->return_p ? sup->urp.u.pid : sup->up.pid);
    if (sups->sdt_sem_offset && (sdt_sem_pid != tsk->tgid || sup->sdt_sem_address == 0)) {
      /* If the probe is in an ET_EXEC binary, then the sdt_sem_offset already
       * is a real address.  But stap_uprobe_process_found calls us in this
       * case with relocation=offset=0, so we don't have to worry about it.  */
      sup->sdt_sem_address = (relocation - offset) + sups->sdt_sem_offset;
    } /* sdt_sem_offset */

    for (pci=0; pci < sups->perf_counters_dim; pci++) {
	if ((sups->perf_counters)[pci] > -1)
	  _stp_perf_read_init ((sups->perf_counters)[pci], tsk);
      }

    if (slotted_p) {
      struct stap_uprobe *sup = & stap_uprobes[i];
      if (sups->return_p) {
        sup->urp.u.pid = tsk->tgid;
        sup->urp.u.vaddr = relocation + sups->address;
        sup->urp.handler = &enter_uretprobe_probe;
        rc = register_uretprobe (& sup->urp);
      } else {
        sup->up.pid = tsk->tgid;
        sup->up.vaddr = relocation + sups->address;
        sup->up.handler = &enter_uprobe_probe;
        rc = register_uprobe (& sup->up);
      }

      /* The u*probe failed to register.  However, if we got EEXIST,
       * that means that the u*probe is already there, so just ignore
       * the error.  This could happen if CLONE_THREAD or CLONE_VM was
       * used. */
      if (rc != 0 && rc != -EEXIST) {
        _stp_warn ("u*probe failed %s[%d] '%s' addr %p rc %d\n", tsk->comm, tsk->tgid, sups->probe->pp, (void*)(relocation + sups->address), rc);
	/* NB: we need to release this slot,
	   so we need to borrow the mutex temporarily. */
	might_sleep();
        mutex_lock (& stap_uprobes_lock);
        sup->spec_index = -1;
	sup->sdt_sem_address = 0;
        mutex_unlock (& stap_uprobes_lock);
      } else {
        handled_p = 1;
      }
    }
    /* NB: handled_p implies slotted_p */
    if (unlikely (! handled_p)) {
      #ifdef STP_TIMING
      atomic_inc (skipped_count_uprobe_reg());
      #endif
      /* NB: duplicates common_entryfn_epilogue,
	 but then this is not a probe entry fn epilogue. */
#ifndef STAP_SUPPRESS_HANDLER_ERRORS
      if (unlikely (atomic_inc_return (skipped_count()) > MAXSKIPPED)) {
        if (unlikely (pseudo_atomic_cmpxchg(session_state(), STAP_SESSION_RUNNING, STAP_SESSION_ERROR) == STAP_SESSION_RUNNING))
          _stp_error ("Skipped too many probes, check MAXSKIPPED or try again with stap -t for more details.");
      }
#endif
    }
  }  /* close iteration over stap_uprobe_spec[] */
  return 0; /* XXX: or rc? */
}

static int stap_uprobe_change_semaphore_plus (struct task_struct *tsk, unsigned long relocation, unsigned long length, const struct stap_uprobe_tf *stf) {
  int tfi = (stf - stap_uprobe_finders);
  int spec_index;
  int rc = 0;
  struct stap_uprobe *sup;
  int i;

  /* We make two passes for semaphores.
     The first pass, stap_uprobe_change_plus, calculates the address of the   
     semaphore.  If the probe is in a .so, we calculate the 
     address when the initial mmap maps the entire solib, e.g.
     7f089885a000-7f089885b000  rw-p-  libtcl.so
     A subsequent mmap maps in the writable segment where the 
     semaphore control variable lives, e.g.
     7f089850d000-7f0898647000  r-xp-  libtcl.so
     7f0898647000-7f0898846000  ---p   libtcl.so
     7f0898846000-7f089885b000  rw-p-  libtcl.so
     The second pass, stap_uprobe_change_semaphore_plus, sets the semaphore.
     If the probe is in a .so this will be when the writable segment of the .so
     is mapped in.  If the task changes, then recalculate the address.
  */

  for (i=0; i<MAXUPROBES; i++) {  /* XXX: slow linear search */
    sup = & stap_uprobes[i];
    if (sup->spec_index == -1) continue;
    if (sup->sdt_sem_address != 0 && !(sup->up.pid == tsk->tgid && sup->sdt_sem_address >= relocation && sup->sdt_sem_address < relocation+length)) continue;
    if (sup->sdt_sem_address) {
      unsigned short sdt_semaphore = 0; /* NB: fixed size */
      if ((rc = get_user (sdt_semaphore, (unsigned short __user*) sup->sdt_sem_address)) == 0) {
        sdt_semaphore ++;
        #ifdef DEBUG_UPROBES
        {
          const struct stap_uprobe_spec *sups = &stap_uprobe_specs [sup->spec_index];
          _stp_dbug(__FUNCTION__,__LINE__, "+semaphore %#x @ %#lx spec %d idx %d task %d\n", sdt_semaphore, sup->sdt_sem_address, sup->spec_index, i, tsk->tgid);
        }
        #endif
	rc = put_user (sdt_semaphore, (unsigned short __user*) sup->sdt_sem_address);
	/* XXX: need to analyze possibility of race condition */
      }
    }
  }
  return rc;
}

/* Removing/unmapping a uprobe is simpler than adding one (in the
  _plus function above).  We need not care about stap_uprobe_finders
  or anything, we just scan through stap_uprobes[] for a live probe
  within the given address range, and kill it.  */
static int stap_uprobe_change_minus (struct task_struct *tsk, unsigned long relocation, unsigned long length, const struct stap_uprobe_tf *stf) {
  int i;

  /* NB: it's not an error for us not to find a live uprobe within the
     given range.  We might have received a callback for a part of a
     shlib that was unmapped and unprobed. */

  for (i=0; i<MAXUPROBES; i++) { /* XXX: slow linear search */
    struct stap_uprobe *sup = & stap_uprobes[i];
    struct stap_uprobe_spec *sups;
    if (sup->spec_index < 0) continue; /* skip free uprobes slot */
    sups = (struct stap_uprobe_spec*) & stap_uprobe_specs[sup->spec_index];
    might_sleep();
    mutex_lock (& stap_uprobes_lock);

    /* PR6829, PR9940:
       Here we're unregistering for one of two reasons:
       1. the process image is going away (or gone) due to exit or exec; or
       2. the vma containing the probepoint has been unmapped.
       In case 1, it's sort of a nop, because uprobes will notice the event
       and dispose of the probes eventually, if it hasn't already.  But by
       calling unmap_u[ret]probe() ourselves, we free up sup right away.
       
       In both cases, we must use unmap_u[ret]probe instead of
       unregister_u[ret]probe, so uprobes knows not to try to restore the
       original opcode.
    */

    /* URETPROBE */
    if (sups->return_p && sup->urp.u.pid == tsk->tgid && sup->urp.u.vaddr >= relocation && sup->urp.u.vaddr < relocation+length) { /* in range */
      
      #ifdef DEBUG_UPROBES
      _stp_dbug (__FUNCTION__,__LINE__, "-uretprobe spec %d idx %d process %s[%d] addr %p pp %s\n", sup->spec_index, i, tsk->comm, tsk->tgid, (void*) sup->urp.u.vaddr, sups->probe->pp);
      #endif
      #if (UPROBES_API_VERSION >= 2)
      unmap_uretprobe (& sup->urp);
      sup->spec_index = -1;
      sup->sdt_sem_address = 0;
      #else
      /* Uprobes lacks unmap_uretprobe.  Before reusing sup, we must wait
	 until uprobes turns loose of the uretprobe on its own, as indicated
	 by uretprobe.kdata = NULL. */
      sup->spec_index = -2;
      #endif
      /* UPROBE */
    } else if (!sups->return_p && sup->up.pid == tsk->tgid && sup->up.vaddr >= relocation && sup->up.vaddr < relocation+length) { /* in range */
      
      #ifdef DEBUG_UPROBES
      _stp_dbug (__FUNCTION__,__LINE__, "-uprobe spec %d idx %d process %s[%d] reloc %p pp %s\n", sup->spec_index, i, tsk->comm, tsk->tgid, (void*) sup->up.vaddr, sups->probe->pp);
      #endif
      #if (UPROBES_API_VERSION >= 2)
      unmap_uprobe (& sup->up);
      sup->spec_index = -1;
      sup->sdt_sem_address = 0;
      #else
      /* Uprobes lacks unmap_uprobe.  Before reusing sup, we must wait
	 until uprobes turns loose of the uprobe on its own, as indicated
	 by uprobe.kdata = NULL. */
      sup->spec_index = -1;
      sup->sdt_sem_address = 0;
      #endif
      /* PR10655: we don't need to fidget with the ENABLED semaphore either,
	 as the process is gone, buh-bye, toodaloo, au revoir, see ya later! */
    }
    mutex_unlock (& stap_uprobes_lock);
  }  /* close iteration over stap_uprobes[] */
  return 0; /* XXX: or !handled_p */
}

/* The task_finder_callback we use for ET_EXEC targets.
   We used to perform uprobe insertion/removal here, but not any more.
   (PR10524) */
static int stap_uprobe_process_found (struct stap_task_finder_target *tgt, struct task_struct *tsk, int register_p, int process_p) {
  const struct stap_uprobe_tf *stf = container_of(tgt, struct stap_uprobe_tf, finder);
  if (! process_p) return 0; /* ignore threads */
  dbug_task_vma(1, "%cproc pid %d stf %p %p path %s\n", register_p?'+':'-', tsk->tgid, tgt, stf, stf->pathname);
  /* ET_EXEC events are like shlib events, but with 0 relocation bases */
  if (register_p) {
    int rc = stap_uprobe_change_plus (tsk, 0, TASK_SIZE, stf, 0, 0);
    stap_uprobe_change_semaphore_plus (tsk, 0, TASK_SIZE, stf);
    return rc;
  } else
    return stap_uprobe_change_minus (tsk, 0, TASK_SIZE, stf);
}

/* The task_finder_mmap_callback */
static int
stap_uprobe_mmap_found (struct stap_task_finder_target *tgt,
                        struct task_struct *tsk, char *path,
                        struct dentry *dentry, unsigned long addr,
                        unsigned long length, unsigned long offset,
                        unsigned long vm_flags)
{
  int rc = 0;
  const struct stap_uprobe_tf *stf = container_of(tgt, struct stap_uprobe_tf, finder);
  /* 1 - shared libraries' executable segments load from offset 0
   *   - ld.so convention offset != 0 is now allowed
   *     so stap_uprobe_change_plus can set a semaphore,
   *     i.e. a static extern, in a shared object
   * 2 - the shared library we're interested in
   * 3 - mapping should be executable or writable (for semaphore in .so)
   *     NB: or both, on kernels that lack noexec mapping
   */
  if (path == NULL || strcmp (path, stf->pathname))
    return 0;

  /* Check non-writable, executable sections for probes. */
  if ((vm_flags & VM_EXEC) && !(vm_flags & VM_WRITE)) {
    dbug_task_vma (1,
               "+mmap X pid %d path %s addr %p length %u offset %p stf %p %p path %s\n",
               tsk->tgid, path, (void *) addr, (unsigned)length, (void*) offset,
               tgt, stf, stf->pathname);
    rc = stap_uprobe_change_plus (tsk, addr, length, stf, offset, vm_flags);
  }

  /* Check writable sections for semaphores.
   * NB: They may have also been executable for the check above, if we're
   *     running a kernel that lacks noexec mappings.  So long as there's
   *     no error (rc == 0), we need to look for semaphores too.
   */
  if ((rc == 0) && (vm_flags & VM_WRITE)) {
    dbug_task_vma (1,
               "+mmap W pid %d path %s addr %p length %u offset %p stf %p %p path %s\n",
               tsk->tgid, path, (void *) addr, (unsigned)length, (void*) offset,
               tgt, stf, stf->pathname);
    rc = stap_uprobe_change_semaphore_plus (tsk, addr, length, stf);
  }

  return rc;
}

/* The task_finder_munmap_callback */
static int stap_uprobe_munmap_found (struct stap_task_finder_target *tgt, struct task_struct *tsk, unsigned long addr, unsigned long length) {
  const struct stap_uprobe_tf *stf = container_of(tgt, struct stap_uprobe_tf, finder);
  dbug_task_vma (1, "-mmap pid %d addr %p length %lu stf %p %p path %s\n", tsk->tgid, (void *) addr, length, tgt, stf, stf->pathname);
  return stap_uprobe_change_minus (tsk, addr, length, stf);
}

/* The task_finder_callback we use for ET_DYN targets.
   This just forces an unmap of everything as the process exits.
   (PR11151) */
static int stap_uprobe_process_munmap (struct stap_task_finder_target *tgt, struct task_struct *tsk, int register_p, int process_p) {
  const struct stap_uprobe_tf *stf = container_of(tgt, struct stap_uprobe_tf, finder);
  if (! process_p) return 0; /* ignore threads */
  dbug_task_vma (1, "%cproc pid %d stf %p %p path %s\n", register_p?'+':'-', tsk->tgid, tgt, stf, stf->pathname);
  /* Covering 0->TASK_SIZE means "unmap everything" */
  if (!register_p)
    return stap_uprobe_change_minus (tsk, 0, TASK_SIZE, stf);
  return 0;
}

#endif /* _UPROBE_COMMON_C_ */
Commit	Line	Data
cc52276b WC	1	/* -- linux-c --
cc52276b WC	2	* uprobe Functions
ef36f781	3	* Copyright (C) 2014 Red Hat Inc.
cc52276b WC	4	*
	5	* This file is part of systemtap, and is free software. You can
	6	* redistribute it and/or modify it under the terms of the GNU General
	7	* Public License (GPL); either version 2, or (at your option) any
	8	* later version.
	9	*/
	10
	11	#ifndef _UPROBE_COMMON_C_
	12	#define _UPROBE_COMMON_C_
	13
	14	/* NB: Because these utrace callbacks only occur before / after
	15	userspace instructions run, there is no concurrency control issue
	16	between active uprobe callbacks and these registration /
	17	unregistration pieces.
	18
	19	We protect the stap_uprobe->spec_index (which also serves as a
	20	free/busy flag) value with the outer protective stap_probes_lock
	21	spinlock, to protect it against concurrent registration /
	22	unregistration.
	23	*/
	24
	25	static int stap_uprobe_change_plus (struct task_struct tsk, unsigned long relocation, unsigned long length, const struct stap_uprobe_tf stf, unsigned long offset, unsigned long vm_flags) {
	26	int tfi = (stf - stap_uprobe_finders);
	27	int spec_index;
	28	/* iterate over stap_uprobe_spec[] that use this same stap_uprobe_tf */
	29	for (spec_index=0; spec_index<sizeof(stap_uprobe_specs)/sizeof(stap_uprobe_specs[0]); spec_index++) {
	30	int handled_p = 0;
	31	int slotted_p = 0;
	32	const struct stap_uprobe_spec *sups = &stap_uprobe_specs [spec_index];
	33	struct stap_uprobe *sup;
	34	pid_t sdt_sem_pid;
	35	int rc = 0;
	36	int i;
4fa83377 SC	37	int pci;
4fa83377 SC	38
cc52276b WC	39	if (likely(sups->tfi != tfi)) continue;
	40	/* skip probes with an address beyond this map event; should not
	41	happen unless a shlib/exec got mmapped in weirdly piecemeal */
77694f53	42	if (likely((vm_flags & VM_EXEC) && sups->address >= length)) continue;
cc52276b WC	43
	44	/* Found a uprobe_spec for this stap_uprobe_tf. Need to lock the
	45	stap_uprobes[] array to allocate a free spot, but then we can
	46	unlock and do the register_probe subsequently. /
	47
ae1666a0	48	might_sleep();
cc52276b WC	49	mutex_lock (& stap_uprobes_lock);
	50	for (i=0; i<MAXUPROBES; i++) { /* XXX: slow linear search */
	51	sup = & stap_uprobes[i];
	52
	53	/* register new uprobe
	54	We make two passes for semaphores;
86229a55	55	see stap_uprobe_change_semaphore_plus */
cc52276b WC	56
	57	if (sup->spec_index < 0 \|\| (sups->sdt_sem_offset && vm_flags & VM_WRITE && sup->spec_index == spec_index)) {
	58	#if (UPROBES_API_VERSION < 2)
	59	/* See PR6829 comment. */
	60	if (sup->spec_index == -1 && sup->up.kdata != NULL) continue;
	61	else if (sup->spec_index == -2 && sup->urp.u.kdata != NULL) continue;
	62	#endif
	63	sup->spec_index = spec_index;
	64	slotted_p = 1;
	65	break;
	66	}
	67	}
	68	mutex_unlock (& stap_uprobes_lock);
	69	#ifdef DEBUG_UPROBES
26e63673	70	_stp_dbug(__FUNCTION__,__LINE__, "+uprobe spec %d idx %d process %s[%d] addr %p pp %s\n", spec_index, (slotted_p ? i : -1), tsk->comm, tsk->tgid, (void*)(relocation+sups->address), sups->probe->pp);
cc52276b	71	#endif
77c59419 FCE	72
	73	/* NB: check for user-module build-id only if we have a pathname
	74	at all; for a process(PID#).* probe, we may not. If at some
	75	point we map process(PID#) to process("/proc/PID#/exe"), we'll
	76	get a pathname. */
	77	if (stf->pathname)
	78	if ((rc = _stp_usermodule_check(tsk, stf->pathname, relocation)))
	79	return rc;
cc52276b WC	80
	81	/* Here, slotted_p implies that `i' points to the single
	82	stap_uprobes[] element that has been slotted in for registration
	83	or unregistration processing. !slotted_p implies that the table
	84	was full (registration; MAXUPROBES) or that no matching entry was
	85	found (unregistration; should not happen). */
	86
	87	sdt_sem_pid = (sups->return_p ? sup->urp.u.pid : sup->up.pid);
	88	if (sups->sdt_sem_offset && (sdt_sem_pid != tsk->tgid \|\| sup->sdt_sem_address == 0)) {
529c7eae JS	89	/* If the probe is in an ET_EXEC binary, then the sdt_sem_offset already
	90	* is a real address. But stap_uprobe_process_found calls us in this
	91	* case with relocation=offset=0, so we don't have to worry about it. */
	92	sup->sdt_sem_address = (relocation - offset) + sups->sdt_sem_offset;
cc52276b	93	} /* sdt_sem_offset */
4fa83377 SC	94
4fa83377 SC	95	for (pci=0; pci < sups->perf_counters_dim; pci++) {
0d049a1d FCE	96	if ((sups->perf_counters)[pci] > -1)
0d049a1d FCE	97	_stp_perf_read_init ((sups->perf_counters)[pci], tsk);
4fa83377 SC	98	}
4fa83377 SC	99
cc52276b WC	100	if (slotted_p) {
	101	struct stap_uprobe *sup = & stap_uprobes[i];
	102	if (sups->return_p) {
	103	sup->urp.u.pid = tsk->tgid;
	104	sup->urp.u.vaddr = relocation + sups->address;
	105	sup->urp.handler = &enter_uretprobe_probe;
	106	rc = register_uretprobe (& sup->urp);
	107	} else {
	108	sup->up.pid = tsk->tgid;
	109	sup->up.vaddr = relocation + sups->address;
	110	sup->up.handler = &enter_uprobe_probe;
	111	rc = register_uprobe (& sup->up);
	112	}
86229a55 DS	113
	114	/* The u*probe failed to register. However, if we got EEXIST,
	115	* that means that the u*probe is already there, so just ignore
	116	* the error. This could happen if CLONE_THREAD or CLONE_VM was
	117	* used. */
	118	if (rc != 0 && rc != -EEXIST) {
26e63673	119	_stp_warn ("uprobe failed %s[%d] '%s' addr %p rc %d\n", tsk->comm, tsk->tgid, sups->probe->pp, (void)(relocation + sups->address), rc);
cc52276b WC	120	/* NB: we need to release this slot,
cc52276b WC	121	so we need to borrow the mutex temporarily. */
ae1666a0	122	might_sleep();
cc52276b WC	123	mutex_lock (& stap_uprobes_lock);
cc52276b WC	124	sup->spec_index = -1;
7554a13f	125	sup->sdt_sem_address = 0;
cc52276b WC	126	mutex_unlock (& stap_uprobes_lock);
	127	} else {
	128	handled_p = 1;
	129	}
	130	}
	131	/* NB: handled_p implies slotted_p */
	132	if (unlikely (! handled_p)) {
	133	#ifdef STP_TIMING
4e1434f6	134	atomic_inc (skipped_count_uprobe_reg());
cc52276b WC	135	#endif
	136	/* NB: duplicates common_entryfn_epilogue,
	137	but then this is not a probe entry fn epilogue. */
7acb3e34	138	#ifndef STAP_SUPPRESS_HANDLER_ERRORS
228f84ae MF	139	if (unlikely (atomic_inc_return (skipped_count()) > MAXSKIPPED)) {
228f84ae MF	140	if (unlikely (pseudo_atomic_cmpxchg(session_state(), STAP_SESSION_RUNNING, STAP_SESSION_ERROR) == STAP_SESSION_RUNNING))
7acb3e34	141	_stp_error ("Skipped too many probes, check MAXSKIPPED or try again with stap -t for more details.");
cc52276b	142	}
228f84ae	143	#endif
cc52276b WC	144	}
	145	} /* close iteration over stap_uprobe_spec[] */
	146	return 0; /* XXX: or rc? */
	147	}
	148
	149	static int stap_uprobe_change_semaphore_plus (struct task_struct tsk, unsigned long relocation, unsigned long length, const struct stap_uprobe_tf stf) {
	150	int tfi = (stf - stap_uprobe_finders);
	151	int spec_index;
	152	int rc = 0;
	153	struct stap_uprobe *sup;
	154	int i;
	155
	156	/* We make two passes for semaphores.
	157	The first pass, stap_uprobe_change_plus, calculates the address of the
	158	semaphore. If the probe is in a .so, we calculate the
	159	address when the initial mmap maps the entire solib, e.g.
	160	7f089885a000-7f089885b000 rw-p- libtcl.so
100a540e	161	A subsequent mmap maps in the writable segment where the
cc52276b WC	162	semaphore control variable lives, e.g.
	163	7f089850d000-7f0898647000 r-xp- libtcl.so
	164	7f0898647000-7f0898846000 ---p libtcl.so
	165	7f0898846000-7f089885b000 rw-p- libtcl.so
	166	The second pass, stap_uprobe_change_semaphore_plus, sets the semaphore.
100a540e	167	If the probe is in a .so this will be when the writable segment of the .so
cc52276b WC	168	is mapped in. If the task changes, then recalculate the address.
	169	*/
	170
	171	for (i=0; i<MAXUPROBES; i++) { /* XXX: slow linear search */
	172	sup = & stap_uprobes[i];
	173	if (sup->spec_index == -1) continue;
	174	if (sup->sdt_sem_address != 0 && !(sup->up.pid == tsk->tgid && sup->sdt_sem_address >= relocation && sup->sdt_sem_address < relocation+length)) continue;
	175	if (sup->sdt_sem_address) {
	176	unsigned short sdt_semaphore = 0; /* NB: fixed size */
	177	if ((rc = get_user (sdt_semaphore, (unsigned short __user*) sup->sdt_sem_address)) == 0) {
	178	sdt_semaphore ++;
	179	#ifdef DEBUG_UPROBES
	180	{
	181	const struct stap_uprobe_spec *sups = &stap_uprobe_specs [sup->spec_index];
	182	_stp_dbug(__FUNCTION__,__LINE__, "+semaphore %#x @ %#lx spec %d idx %d task %d\n", sdt_semaphore, sup->sdt_sem_address, sup->spec_index, i, tsk->tgid);
	183	}
	184	#endif
77694f53	185	rc = put_user (sdt_semaphore, (unsigned short __user*) sup->sdt_sem_address);
cc52276b WC	186	/* XXX: need to analyze possibility of race condition */
	187	}
	188	}
	189	}
	190	return rc;
	191	}
	192
	193	/* Removing/unmapping a uprobe is simpler than adding one (in the
	194	_plus function above). We need not care about stap_uprobe_finders
	195	or anything, we just scan through stap_uprobes[] for a live probe
	196	within the given address range, and kill it. */
	197	static int stap_uprobe_change_minus (struct task_struct tsk, unsigned long relocation, unsigned long length, const struct stap_uprobe_tf stf) {
	198	int i;
	199
	200	/* NB: it's not an error for us not to find a live uprobe within the
	201	given range. We might have received a callback for a part of a
	202	shlib that was unmapped and unprobed. */
	203
	204	for (i=0; i<MAXUPROBES; i++) { /* XXX: slow linear search */
	205	struct stap_uprobe *sup = & stap_uprobes[i];
	206	struct stap_uprobe_spec *sups;
	207	if (sup->spec_index < 0) continue; /* skip free uprobes slot */
	208	sups = (struct stap_uprobe_spec*) & stap_uprobe_specs[sup->spec_index];
ae1666a0	209	might_sleep();
cc52276b WC	210	mutex_lock (& stap_uprobes_lock);
	211
	212	/* PR6829, PR9940:
	213	Here we're unregistering for one of two reasons:
	214	1. the process image is going away (or gone) due to exit or exec; or
	215	2. the vma containing the probepoint has been unmapped.
	216	In case 1, it's sort of a nop, because uprobes will notice the event
	217	and dispose of the probes eventually, if it hasn't already. But by
	218	calling unmap_u[ret]probe() ourselves, we free up sup right away.
	219
	220	In both cases, we must use unmap_u[ret]probe instead of
	221	unregister_u[ret]probe, so uprobes knows not to try to restore the
	222	original opcode.
	223	*/
	224
	225	/* URETPROBE */
	226	if (sups->return_p && sup->urp.u.pid == tsk->tgid && sup->urp.u.vaddr >= relocation && sup->urp.u.vaddr < relocation+length) { /* in range */
	227
	228	#ifdef DEBUG_UPROBES
26e63673	229	_stp_dbug (__FUNCTION__,__LINE__, "-uretprobe spec %d idx %d process %s[%d] addr %p pp %s\n", sup->spec_index, i, tsk->comm, tsk->tgid, (void*) sup->urp.u.vaddr, sups->probe->pp);
cc52276b WC	230	#endif
	231	#if (UPROBES_API_VERSION >= 2)
	232	unmap_uretprobe (& sup->urp);
	233	sup->spec_index = -1;
7554a13f	234	sup->sdt_sem_address = 0;
cc52276b WC	235	#else
	236	/* Uprobes lacks unmap_uretprobe. Before reusing sup, we must wait
	237	until uprobes turns loose of the uretprobe on its own, as indicated
	238	by uretprobe.kdata = NULL. */
	239	sup->spec_index = -2;
	240	#endif
	241	/* UPROBE */
	242	} else if (!sups->return_p && sup->up.pid == tsk->tgid && sup->up.vaddr >= relocation && sup->up.vaddr < relocation+length) { /* in range */
	243
	244	#ifdef DEBUG_UPROBES
26e63673	245	_stp_dbug (__FUNCTION__,__LINE__, "-uprobe spec %d idx %d process %s[%d] reloc %p pp %s\n", sup->spec_index, i, tsk->comm, tsk->tgid, (void*) sup->up.vaddr, sups->probe->pp);
cc52276b WC	246	#endif
	247	#if (UPROBES_API_VERSION >= 2)
	248	unmap_uprobe (& sup->up);
	249	sup->spec_index = -1;
7554a13f	250	sup->sdt_sem_address = 0;
cc52276b WC	251	#else
	252	/* Uprobes lacks unmap_uprobe. Before reusing sup, we must wait
	253	until uprobes turns loose of the uprobe on its own, as indicated
	254	by uprobe.kdata = NULL. */
	255	sup->spec_index = -1;
7554a13f	256	sup->sdt_sem_address = 0;
cc52276b WC	257	#endif
	258	/* PR10655: we don't need to fidget with the ENABLED semaphore either,
	259	as the process is gone, buh-bye, toodaloo, au revoir, see ya later! */
	260	}
	261	mutex_unlock (& stap_uprobes_lock);
	262	} /* close iteration over stap_uprobes[] */
	263	return 0; /* XXX: or !handled_p */
	264	}
	265
	266	/* The task_finder_callback we use for ET_EXEC targets.
	267	We used to perform uprobe insertion/removal here, but not any more.
	268	(PR10524) */
	269	static int stap_uprobe_process_found (struct stap_task_finder_target tgt, struct task_struct tsk, int register_p, int process_p) {
	270	const struct stap_uprobe_tf *stf = container_of(tgt, struct stap_uprobe_tf, finder);
	271	if (! process_p) return 0; /* ignore threads */
a2b0b5c8	272	dbug_task_vma(1, "%cproc pid %d stf %p %p path %s\n", register_p?'+':'-', tsk->tgid, tgt, stf, stf->pathname);
cc52276b WC	273	/* ET_EXEC events are like shlib events, but with 0 relocation bases */
	274	if (register_p) {
	275	int rc = stap_uprobe_change_plus (tsk, 0, TASK_SIZE, stf, 0, 0);
	276	stap_uprobe_change_semaphore_plus (tsk, 0, TASK_SIZE, stf);
	277	return rc;
	278	} else
	279	return stap_uprobe_change_minus (tsk, 0, TASK_SIZE, stf);
	280	}
	281
	282	/* The task_finder_mmap_callback */
7b9215b2 JS	283	static int
	284	stap_uprobe_mmap_found (struct stap_task_finder_target *tgt,
	285	struct task_struct tsk, char path,
	286	struct dentry *dentry, unsigned long addr,
	287	unsigned long length, unsigned long offset,
	288	unsigned long vm_flags)
	289	{
	290	int rc = 0;
cc52276b WC	291	const struct stap_uprobe_tf *stf = container_of(tgt, struct stap_uprobe_tf, finder);
cc52276b WC	292	/* 1 - shared libraries' executable segments load from offset 0
7b9215b2 JS	293	* - ld.so convention offset != 0 is now allowed
	294	* so stap_uprobe_change_plus can set a semaphore,
	295	* i.e. a static extern, in a shared object
	296	* 2 - the shared library we're interested in
100a540e	297	* 3 - mapping should be executable or writable (for semaphore in .so)
7b9215b2 JS	298	* NB: or both, on kernels that lack noexec mapping
	299	*/
	300	if (path == NULL \|\| strcmp (path, stf->pathname))
	301	return 0;
	302
d2696bba DS	303	/* Check non-writable, executable sections for probes. */
d2696bba DS	304	if ((vm_flags & VM_EXEC) && !(vm_flags & VM_WRITE)) {
a2b0b5c8	305	dbug_task_vma (1,
7b9215b2 JS	306	"+mmap X pid %d path %s addr %p length %u offset %p stf %p %p path %s\n",
	307	tsk->tgid, path, (void ) addr, (unsigned)length, (void) offset,
	308	tgt, stf, stf->pathname);
7b9215b2 JS	309	rc = stap_uprobe_change_plus (tsk, addr, length, stf, offset, vm_flags);
	310	}
	311
100a540e	312	/* Check writable sections for semaphores.
7b9215b2 JS	313	* NB: They may have also been executable for the check above, if we're
	314	* running a kernel that lacks noexec mappings. So long as there's
	315	* no error (rc == 0), we need to look for semaphores too.
	316	*/
	317	if ((rc == 0) && (vm_flags & VM_WRITE)) {
a2b0b5c8	318	dbug_task_vma (1,
7b9215b2 JS	319	"+mmap W pid %d path %s addr %p length %u offset %p stf %p %p path %s\n",
	320	tsk->tgid, path, (void ) addr, (unsigned)length, (void) offset,
	321	tgt, stf, stf->pathname);
7b9215b2 JS	322	rc = stap_uprobe_change_semaphore_plus (tsk, addr, length, stf);
	323	}
	324
	325	return rc;
cc52276b WC	326	}
	327
	328	/* The task_finder_munmap_callback */
	329	static int stap_uprobe_munmap_found (struct stap_task_finder_target tgt, struct task_struct tsk, unsigned long addr, unsigned long length) {
	330	const struct stap_uprobe_tf *stf = container_of(tgt, struct stap_uprobe_tf, finder);
a2b0b5c8	331	dbug_task_vma (1, "-mmap pid %d addr %p length %lu stf %p %p path %s\n", tsk->tgid, (void *) addr, length, tgt, stf, stf->pathname);
cc52276b WC	332	return stap_uprobe_change_minus (tsk, addr, length, stf);
	333	}
	334
19d91f6c JS	335	/* The task_finder_callback we use for ET_DYN targets.
	336	This just forces an unmap of everything as the process exits.
	337	(PR11151) */
	338	static int stap_uprobe_process_munmap (struct stap_task_finder_target tgt, struct task_struct tsk, int register_p, int process_p) {
	339	const struct stap_uprobe_tf *stf = container_of(tgt, struct stap_uprobe_tf, finder);
	340	if (! process_p) return 0; /* ignore threads */
a2b0b5c8	341	dbug_task_vma (1, "%cproc pid %d stf %p %p path %s\n", register_p?'+':'-', tsk->tgid, tgt, stf, stf->pathname);
19d91f6c JS	342	/* Covering 0->TASK_SIZE means "unmap everything" */
	343	if (!register_p)
	344	return stap_uprobe_change_minus (tsk, 0, TASK_SIZE, stf);
	345	return 0;
	346	}
	347
cc52276b	348	#endif /* _UPROBE_COMMON_C_ */