This is the mail archive of the elfutils-devel@sourceware.org mailing list for the elfutils project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [patch v7 3/5] x86* unwinder: libdwfl/


Hi Jan,

Sorry, still skipped some parts. Still need to go through
dwfl_frame_core.c and dwfl_frame_pid.c. But I did review all the rest.

Again mostly nitpicks. The only real issue I would like you to
think/comment on is around the dwfl_thread_next comments. Everything
else looks fine. I did have some comments here and there, but nothing I
think must be changed, just would be good to see if my comments make
sense, even if they are nitpicks that aren't worth it to change right
now. Except for the dwfl_thread_next thing I believe the public
interface is abstract/good enough to make changes to implementation
details later if we wish.

I'll comment on dwfl_frame_core.c and dwfl_frame_pid.c later, but a
quick look didn't immediately show anything obviously bad there either.

On Sun, 2013-10-13 at 12:24 +0200, Jan Kratochvil wrote:
> On Fri, 11 Oct 2013 22:47:14 +0200, Jan Kratochvil wrote:
> libdw/
> 2013-06-23  Jan Kratochvil  <jan.kratochvil@redhat.com>
> 
> 	* cfi.h (struct Dwarf_Frame_s): Make the comment more specific.
> 	* libdw.map (ELFUTILS_0.156): Add dwfl_attach_state, dwfl_pid,
> 	dwfl_thread_dwfl, dwfl_thread_tid, dwfl_frame_thread,
> 	dwfl_thread_state_registers, dwfl_thread_state_register_pc,
> 	dwfl_next_thread, dwfl_thread_getframes and dwfl_frame_pc.

This part looks fine.

> libdwfl/
> 2013-10-13  Jan Kratochvil  <jan.kratochvil@redhat.com>
> 	    Mark Wielaard  <mjw@redhat.com>

That is a bit too much credit :) You wrote 99.9% of this.
I only nitpick.

> 	* Makefile.am (AM_CPPFLAGS): Add ../libasm.

Why?

> 	(libdwfl_a_SOURCES): dwfl_frame.c, dwfl_frame_unwind.c,
> 	dwfl_frame_pc.c, dwfl_frame_pid.c, dwfl_frame_core.c and
> 	dwfl_frame_regs.c.
> 	* core-file.c (dwfl_core_file_report): Call
> 	__libdwfl_attach_state_for_core.
> 	* dwfl_end.c (dwfl_end): Call __libdwfl_process_free.
> 	* dwfl_frame.c: New file.
> 	* dwfl_frame_unwind.c: New file.
> 	* dwfl_frame_pc.c: New file.
> 	* dwfl_frame_pid.c: New file.
> 	* dwfl_frame_core.c: New file.
> 	* dwfl_frame_regs.c: New file.
> 	* libdwfl.h (Dwfl_Thread, Dwfl_Frame): New typedefs.
> 	(dwfl_core_file_report, dwfl_linux_proc_report): Extend comments.
> 	(Dwfl_Thread_Callbacks): New definition.
> 	(struct ebl, dwfl_attach_state, dwfl_pid, dwfl_thread_dwfl)
> 	(dwfl_thread_tid, dwfl_frame_thread, dwfl_thread_state_registers)
> 	(dwfl_thread_state_register_pc, dwfl_next_thread, dwfl_thread_getframes)
> 	(dwfl_frame_pc): New declarations.
> 	* libdwflP.h: Include libeblP.h.
> 	(Dwfl_Process): New typedef.
> 	(LIBEBL_BAD, CORE_MISSING, INVALID_REGISTER, PROCESS_MEMORY_READ)
> 	(PROCESS_NO_ARCH, PARSE_PROC, NO_THREAD, INVALID_DWARF)
> 	(UNSUPPORTED_DWARF, NEXT_THREAD_FAIL, ATTACH_STATE_CONFLICT)
> 	(NO_ATTACH_STATE): New DWFL_ERROR entries.
> 	(struct Dwfl): New entry process.
> 	(struct Dwfl_Process, struct Dwfl_Thread, struct Dwfl_Frame)
> 	(dwfl_frame_reg_get, dwfl_frame_reg_set): New definitions.
> 	(__libdwfl_process_free, __libdwfl_frame_unwind)
> 	(__libdwfl_attach_state_for_pid, __libdwfl_attach_state_for_core)
> 	(__libdwfl_segment_start, __libdwfl_segment_end): New declarations.
> 	(dwfl_attach_state, dwfl_pid, dwfl_thread_dwfl, dwfl_thread_tid)
> 	(dwfl_frame_thread, dwfl_thread_state_registers)
> 	(dwfl_thread_state_register_pc, dwfl_next_thread, dwfl_thread_getframes)
> 	(dwfl_frame_pc): New INTDECL entries.
> 	* linux-proc-maps.c (dwfl_linux_proc_report): Call
> 	__libdwfl_attach_state_for_pid.
> 	* segment.c (segment_start): Rename to ...
> 	(__libdwfl_segment_start): ... here and make it internal_function.
> 	(segment_end): Rename to ...
> 	(__libdwfl_segment_end): ... here and make it internal_function.
> 	(reify_segments, dwfl_report_segment): Rename them at the callers.
> 
> Signed-off-by: Jan Kratochvil <jan.kratochvil@redhat.com>

> diff --git a/libdwfl/core-file.c b/libdwfl/core-file.c
> @@ -521,6 +521,14 @@ dwfl_core_file_report (Dwfl *dwfl, Elf *elf)
>    /* We return the number of modules we found if we found any.
>       If we found none, we return -1 instead of 0 if there was an
>       error rather than just nothing found.  */
> -  return sniffed || listed >= 0 ? listed + sniffed : listed;
> +  int retval = sniffed || listed >= 0 ? listed + sniffed : listed;
> +  if (retval > 0)
> +    {
> +      /* Possible error is ignored, DWFL still may be useful for non-unwinding
> +	 operations.  */
> +      __libdwfl_attach_state_for_core (dwfl, elf);
> +    }
> +
> +  return retval;
>  }

OK.

> diff --git a/libdwfl/dwfl_end.c b/libdwfl/dwfl_end.c
> @@ -34,6 +34,9 @@ dwfl_end (Dwfl *dwfl)
>    if (dwfl == NULL)
>      return;
>  
> +  if (dwfl->process)
> +    __libdwfl_process_free (dwfl->process);

OK.

> +++ b/libdwfl/dwfl_frame.c
>
> +#ifndef MIN
> +# define MIN(a, b) ((a) < (b) ? (a) : (b))
> +#endif

Not used in this file.

> +/* Set STATE->pc_set from STATE->regs according to the backend.  Return true on
> +   success, false on error.  */
> +static bool
> +state_fetch_pc (Dwfl_Frame *state)
> +{
> +  switch (state->pc_state)
> +    {
> +    case DWFL_FRAME_STATE_PC_SET:
> +      return true;
> +    case DWFL_FRAME_STATE_PC_UNDEFINED:
> +      abort ();
> +    case DWFL_FRAME_STATE_ERROR:;

trailing ";"

> +      Ebl *ebl = state->thread->process->ebl;
> +      Dwarf_CIE abi_info;
> +      if (ebl_abi_cfi (ebl, &abi_info) != 0)
> +	{
> +	  __libdwfl_seterrno (DWFL_E_LIBEBL);
> +	  return false;
> +	}
> +      unsigned ra = abi_info.return_address_register;
> +      /* dwarf_frame_state_reg_is_set is not applied here.  */
> +      if (ra >= ebl_frame_nregs (ebl))
> +	{
> +	  __libdwfl_seterrno (DWFL_E_LIBEBL_BAD);
> +	  return false;
> +	}
> +      state->pc = state->regs[ra];
> +      state->pc_state = DWFL_FRAME_STATE_PC_SET;
> +      return true;
> +    }
> +  abort ();
> +}

I find the DWFL_FRAME_STATE_ERROR state somewhat confusing.
Would it make sense to rename that to DWFL_FRAME_STATE_UNSET?
Or have both an UNSET and an ERROR state?

In which cases do frames exist that have an unset/error state?

> +/* Do not call it on your own, to be used by thread_* functions only.  */
> +
> +static void
> +state_free (Dwfl_Frame *state)
> +{
> +  Dwfl_Thread *thread = state->thread;
> +  assert (thread->unwound == state);
> +  thread->unwound = state->unwound;
> +  free (state);
> +}

This is always used as:

  while (thread->unwound)
    state_free (thread->unwound);

So just fold that loop into the function and call it thread_state_free
or something.

> +/* Do not call it on your own, to be used by thread_* functions only.  */
> +
> +static Dwfl_Frame *
> +state_alloc (Dwfl_Thread *thread)
> +{
> +  assert (thread->unwound == NULL);
> +  Ebl *ebl = thread->process->ebl;
> +  size_t nregs = ebl_frame_nregs (ebl);
> +  if (nregs == 0)
> +    return NULL;
> +  assert (nregs < sizeof (((Dwfl_Frame *) NULL)->regs_set) * 8);
> +  Dwfl_Frame *state = malloc (sizeof (*state) + sizeof (*state->regs) * nregs);
> +  if (state == NULL)
> +    return NULL;
> +  state->thread = thread;
> +  state->signal_frame = false;
> +  state->pc_state = DWFL_FRAME_STATE_ERROR;
> +  memset (state->regs_set, 0, sizeof (state->regs_set));
> +  thread->unwound = state;
> +  state->unwound = NULL;
> +  return state;
> +}

Comment might be a bit more specific. Only used at the start of
dwfl_thread_getframes. And maybe rename it thread_state_alloc if you
rename the above to show they should match up.

> +/* Free and unlink THREAD from the internal lists.  PREV_THREAD must be NULL if
> +   THREAD was the first one or PREV_THREAD must be the preceding thread.  */
> +static void
> +thread_free (Dwfl_Thread *thread, Dwfl_Thread *prev_thread)
> +{
> +  Dwfl_Process *process = thread->process;
> +  assert (prev_thread == NULL || prev_thread->process == process);
> +  assert (prev_thread != NULL || process->thread == thread);
> +  assert (prev_thread == NULL || prev_thread->next == thread);
> +  if (thread->thread_detach_needed)
> +    {
> +      assert (thread->tid > 0);
> +      if (process->callbacks->thread_detach)
> +	process->callbacks->thread_detach (thread, thread->callbacks_arg);
> +    }

I think you don't need the thread_detach_needed flag in thread. This
function is only called from __libdwfl_process_free or dwfl_next_thread
at which point the thread must already have been detached. The attached
state is only true within dwfl_thread_getframes after
set_initial_registers has been called, and when needed thread_detach is
already called in that function itself.

> +  while (thread->unwound)
> +    state_free (thread->unwound);
> +  if (prev_thread == NULL)
> +    process->thread = thread->next;
> +  else
> +    prev_thread->next = thread->next;
> +  free (thread);
> +}
> +
> +/* Allocate new Dwfl_Thread and link it to PROCESS.  PREV_THREAD must be NULL
> +   if this is the first thread for PROCESS, otherwise PREV_THREAD must be the
> +   last thread of PROCESS.  */
> +static Dwfl_Thread *
> +thread_alloc (Dwfl_Process *process, Dwfl_Thread *prev_thread)
> +{
> +  assert (prev_thread == NULL || prev_thread->process == process);
> +  assert (prev_thread != NULL || process->thread == NULL);
> +  assert (prev_thread == NULL || prev_thread->next == NULL);
> +  Dwfl_Thread *thread = malloc (sizeof (*thread));
> +  if (thread == NULL)
> +    return NULL;
> +  thread->process = process;
> +  thread->unwound = NULL;
> +  thread->tid = 0;
> +  thread->next = NULL;
> +  if (prev_thread == NULL)
> +    process->thread = thread;
> +  else
> +    prev_thread->next = thread;
> +  return thread;
> +}

OK. So process->thread is always the first thread (or NULL) and
prev_thread when this function is called will be the last(est)_thread.

> +void
> +internal_function
> +__libdwfl_process_free (Dwfl_Process *process)
> +{
> +  Dwfl *dwfl = process->dwfl;
> +  if (process->callbacks->detach != NULL)
> +    process->callbacks->detach (dwfl, process->callbacks_arg);
> +  while (process->thread)
> +    thread_free (process->thread, NULL);
> +  assert (dwfl->process == process);
> +  dwfl->process = NULL;
> +  if (process->ebl_close)
> +    ebl_closebackend (process->ebl);
> +  free (process);
> +}

OK. Had to think for a sec why the while (process->thread) loop worked,
even after having just read thread_free...

> +/* Allocate new Dwfl_Process for DWFL.  */
> +static void
> +process_alloc (Dwfl *dwfl)
> +{
> +  Dwfl_Process *process = malloc (sizeof (*process));
> +  if (process == NULL)
> +    return;
> +  process->dwfl = dwfl;
> +  process->thread = NULL;
> +  dwfl->process = process;
> +}

I think this would be slightly clearer if it returned the Dwfl_Process
pointer (or NULL on failure). But not a big deal.

> +bool
> +dwfl_attach_state (Dwfl *dwfl, int machine, pid_t pid,
> +		   const Dwfl_Thread_Callbacks *thread_callbacks, void *arg)
> +{
> +  if (thread_callbacks == NULL || thread_callbacks->next_thread == NULL
> +      || thread_callbacks->set_initial_registers == NULL)
> +    {
> +      __libdwfl_seterrno (DWFL_E_UNKNOWN_ERROR);
> +      return false;
> +    }

That is not a great error. Maybe we need DWFL_E_INVALID_ARGUMENT?

> +  if (dwfl->process != NULL)
> +    {
> +      __libdwfl_seterrno (DWFL_E_ATTACH_STATE_CONFLICT);
> +      return false;
> +    }
> +  Ebl *ebl;
> +  bool ebl_close;
> +  if (machine != EM_NONE)
> +    {
> +      ebl = ebl_openbackend_machine (machine);
> +      ebl_close = true;
> +    }
> +  else
> +    {
> +      for (Dwfl_Module *mod = dwfl->modulelist; mod != NULL; mod = mod->next)
> +	{
> +	  Dwfl_Error error = __libdwfl_module_getebl (mod);
> +	  if (error != DWFL_E_NOERROR)
> +	    continue;
> +	  ebl = mod->ebl;
> +	  break;
> +	}
> +      ebl_close = false;
> +    }
> +  if (ebl == NULL)
> +    {
> +      /* Not identified EBL from any of the modules.  */
> +      __libdwfl_seterrno (DWFL_E_PROCESS_NO_ARCH);
> +      return false;
> +    }
> +  process_alloc (dwfl);
> +  Dwfl_Process *process = dwfl->process;
> +  if (process == NULL)
> +    {
> +      if (ebl_close)
> +	ebl_closebackend (ebl);
> +      __libdwfl_seterrno (DWFL_E_NOMEM);
> +      return false;
> +    }
> +  process->ebl = ebl;
> +  process->ebl_close = ebl_close;
> +  process->pid = pid;
> +  process->callbacks = thread_callbacks;
> +  process->callbacks_arg = arg;
> +  return true;
> +}
> +INTDEF(dwfl_attach_state)

OK.

> +pid_t
> +dwfl_pid (Dwfl *dwfl)
> +{
> +  if (dwfl->process == NULL)
> +    {
> +      __libdwfl_seterrno (DWFL_E_NO_ATTACH_STATE);
> +      return -1;
> +    }
> +  return dwfl->process->pid;
> +}
> +INTDEF(dwfl_pid)

Setting dwfl_errno seems a little redundant since there is no other
reason this call can fail. Thought it is harmless I guess.

> +Dwfl *
> +dwfl_thread_dwfl (Dwfl_Thread *thread)
> +{
> +  return thread->process->dwfl;
> +}
> +INTDEF(dwfl_thread_dwfl)
> +
> +pid_t
> +dwfl_thread_tid (Dwfl_Thread *thread)
> +{
> +  return thread->tid;
> +}
> +INTDEF(dwfl_thread_tid)
> +
> +Dwfl_Thread *
> +dwfl_frame_thread (Dwfl_Frame *state)
> +{
> +  return state->thread;
> +}
> +INTDEF(dwfl_frame_thread)
> +
> +Dwfl_Thread *
> +dwfl_next_thread (Dwfl *dwfl, Dwfl_Thread *prev_thread)
> +{
> +  assert (prev_thread == NULL || prev_thread->process->dwfl == dwfl);

Instead of an assert maybe another candidate for
DWFL_E_INVALID_ARGUMENT?

> +  if (prev_thread != NULL && prev_thread->next != NULL)
> +    return prev_thread->next;
> +  Dwfl_Process *process = dwfl->process;
> +  if (process == NULL)
> +    {
> +      __libdwfl_seterrno (DWFL_E_NO_ATTACH_STATE);
> +      return NULL;
> +    }

hohum, so someone is using a Dwfl_Thread * after calling dwfl_end?
Or is there any other way this could happen? Here maybe an assert might
be in order instead of returning an error?

> +  Dwfl_Thread *nthread = thread_alloc (process, prev_thread);

Wait, don't we have to check prev_thread == NULL && process->thread !=
NULL first?

> +  if (nthread == NULL)
> +    {
> +      __libdwfl_seterrno (DWFL_E_NOMEM);
> +      return NULL;
> +    }
> +  nthread->tid = process->callbacks->next_thread (dwfl, nthread,
> +						  process->callbacks_arg,
> +						  &nthread->callbacks_arg);
> +  if (nthread->tid < 0)
> +    {
> +      thread_free (nthread, prev_thread);
> +      __libdwfl_seterrno (DWFL_E_NEXT_THREAD_FAIL);
> +      return NULL;
> +    }

Pondering whether the dwfl_errno can't already be set by the next_thread
callback. If so, we are overriding some potentially useful error message
with something fairly generic.

> +  if (nthread->tid == 0)
> +    {
> +      thread_free (nthread, prev_thread);
> +      __libdwfl_seterrno (DWFL_E_NOERROR);
> +      return NULL;
> +    }
> +  return nthread;
> +}
> +INTDEF(dwfl_next_thread)

One issue here seems to be that you cannot easily reread or rediscover
all threads of a process. Once you walked through all current threads
that seems to be it. I think we might have to allow "restarting" the
thread list by providing a NULL prev_thread a second time. That would
"invalidate" all current threads attached and return "fresh new
threads". Does that make sense?

> +int
> +dwfl_thread_getframes (Dwfl_Thread *thread,
> +		       int (*callback) (Dwfl_Frame *state, void *arg),
> +		       void *arg)
> +{
> +  assert (thread->unwound == NULL);

What makes sure this is always true? See below, are we cleaning up on
success or only on error?

> +  if (state_alloc (thread) == NULL)
> +    {
> +      __libdwfl_seterrno (DWFL_E_NOMEM);
> +      return -1;
> +    }
> +  Dwfl_Process *process = thread->process;
> +  if (! process->callbacks->set_initial_registers (thread,
> +						   thread->callbacks_arg))
> +    {
> +      while (thread->unwound)
> +	state_free (thread->unwound);
> +      return -1;
> +    }

Make sure we document that if set_initial_registers returns false, we
don't call thread_detach.

> +  thread->thread_detach_needed = true;

I don't think you need this to keep track of when to call detach_thread,
and it makes the code less readable IMHO (there now is always an assert
that checks it is true). See also the comment above.

> +  if (! state_fetch_pc (thread->unwound))
> +    {
> +      assert (thread->thread_detach_needed);
> +      if (process->callbacks->thread_detach)
> +	process->callbacks->thread_detach (thread, thread->callbacks_arg);
> +      thread->thread_detach_needed = false;
> +      while (thread->unwound)
> +	state_free (thread->unwound);
> +      return -1;
> +    }
> +
> +  Dwfl_Frame *state = thread->unwound;
> +  do
> +    {
> +      int err = callback (state, arg);
> +      if (err != DWARF_CB_OK)
> +	{
> +	  assert (thread->thread_detach_needed);
> +	  if (process->callbacks->thread_detach)
> +	    process->callbacks->thread_detach (thread, thread->callbacks_arg);
> +	  thread->thread_detach_needed = false;
> +	  while (thread->unwound)
> +	    state_free (thread->unwound);
> +	  return err;
> +	}
> +      __libdwfl_frame_unwind (state);
> +      state = state->unwound;
> +    }
> +  while (state && state->pc_state == DWFL_FRAME_STATE_PC_SET);

We cleaned up the frames before returning err, shouldn't we also here?

> +  Dwfl_Error err = dwfl_errno ();
> +  assert (thread->thread_detach_needed);
> +  if (process->callbacks->thread_detach)
> +    process->callbacks->thread_detach (thread, thread->callbacks_arg);
> +  thread->thread_detach_needed = false;
> +  if (state == NULL || state->pc_state == DWFL_FRAME_STATE_ERROR)
> +    {
> +      __libdwfl_seterrno (err);
> +      return -1;
> +    }
> +  assert (state->pc_state == DWFL_FRAME_STATE_PC_UNDEFINED);

Why this assert?

> +  return 0;
> +}
> +INTDEF(dwfl_thread_getframes)

[... dwfl_frame_core.c ...]

> diff --git a/libdwfl/dwfl_frame_pc.c b/libdwfl/dwfl_frame_pc.c
> +bool
> +dwfl_frame_pc (Dwfl_Frame *state, Dwarf_Addr *pc, bool *isactivation)
> +{
> +  assert (state->pc_state == DWFL_FRAME_STATE_PC_SET);
> +  *pc = state->pc;
> +  if (isactivation)
> +    {
> +      /* Bottom frame?  */
> +      if (state == state->thread->unwound)
> +	*isactivation = true;
> +      /* *ISACTIVATION is logical or of current and previous frame state.  */
> +      else if (state->signal_frame)
> +	*isactivation = true;
> +      else
> +	{
> +	  /* Not affected by unsuccessfully unwound frame.  */
> +	  __libdwfl_frame_unwind (state);
> +	  if (state->unwound == NULL
> +	      || state->unwound->pc_state != DWFL_FRAME_STATE_PC_SET)
> +	    *isactivation = false;
> +	  else
> +	    *isactivation = state->unwound->signal_frame;
> +	}

I don't understand this last else block part where isactivation depends
on the unwound state. Could you add a comment explaining or a reference
to some document that defines this part?

> +    }
> +  return true;
> +}
> +INTDEF (dwfl_frame_pc)

[... dwfl_frame_pid.c ...]

> diff --git a/libdwfl/dwfl_frame_regs.c b/libdwfl/dwfl_frame_regs.c
> +bool
> +dwfl_thread_state_registers (Dwfl_Thread *thread, const int firstreg,
> +			     unsigned nregs, const Dwarf_Word *regs)
> +{
> +  Dwfl_Frame *state = thread->unwound;
> +  assert (state && state->unwound == NULL);
> +  for (unsigned regno = firstreg; regno < firstreg + nregs; regno++)
> +    if (! dwfl_frame_reg_set (state, regno, regs[regno - firstreg]))
> +      {
> +	__libdwfl_seterrno (DWFL_E_INVALID_REGISTER);
> +	return false;
> +      }
> +  return true;
> +}
> +INTDEF(dwfl_thread_state_registers)
> +
> +void
> +dwfl_thread_state_register_pc (Dwfl_Thread *thread, Dwarf_Word pc)
> +{
> +  Dwfl_Frame *state = thread->unwound;
> +  assert (state && state->unwound == NULL);
> +  state->pc = pc;
> +  state->pc_state = DWFL_FRAME_STATE_PC_SET;
> +}
> +INTDEF(dwfl_thread_state_register_pc)

OK.

> diff --git a/libdwfl/dwfl_frame_unwind.c b/libdwfl/dwfl_frame_unwind.c

Nitpick. Maybe rename to just frame_unwind.c or libdwfl_frame_unwind.c
to indicate that it doesn't contain any public functions?

> +#ifndef MAX
> +# define MAX(a, b) ((a) > (b) ? (a) : (b))
> +#endif
> +
> +static bool
> +state_get_reg (Dwfl_Frame *state, unsigned regno, Dwarf_Addr *val)
> +{
> +  if (! dwfl_frame_reg_get (state, regno, val))
> +    {
> +      __libdwfl_seterrno (DWFL_E_INVALID_REGISTER);
> +      return false;
> +    }
> +  return true;
> +}

OK, this is because dwfl_frame_reg_get doesn't set dwfl_errno.

> +static int
> +bra_compar (const void *key_voidp, const void *elem_voidp)
> +{
> +  Dwarf_Word offset = (uintptr_t) key_voidp;
> +  const Dwarf_Op *op = elem_voidp;
> +  return (offset > op->offset) - (offset < op->offset);
> +}

Nitpick: Rename bra_compare?

> +/* FIXME: Handle bytecode deadlocks and overflows.  */

In the systemtap runtime unwinder we "cheated" and don't support
backward branches/skips (I doubt those are really ever used in CFI).
Would that help here?

> +static bool
> +expr_eval (Dwfl_Frame *state, Dwarf_Frame *frame, const Dwarf_Op *ops,
> +	   size_t nops, Dwarf_Addr *result)
> +{
> +  Dwfl_Process *process = state->thread->process;
> +  if (nops == 0)
> +    {
> +      __libdwfl_seterrno (DWFL_E_INVALID_DWARF);
> +      return false;
> +    }
> +  Dwarf_Addr *stack = NULL;
> +  size_t stack_used = 0, stack_allocated = 0;

> +  bool
> +  push (Dwarf_Addr val)
> +  {
> +    if (stack_used == stack_allocated)
> +      {
> +	stack_allocated = MAX (stack_allocated * 2, 32);
> +	Dwarf_Addr *stack_new = realloc (stack, stack_allocated * sizeof (*stack));
> +	if (stack_new == NULL)
> +	  {
> +	    __libdwfl_seterrno (DWFL_E_NOMEM);
> +	    return false;
> +	  }
> +	stack = stack_new;
> +      }
> +    stack[stack_used++] = val;
> +    return true;
> +  }

Nitpick. I like functions to have one line of whitespace before/after to
make them stand out.

> +  bool
> +  pop (Dwarf_Addr *val)
> +  {
> +    if (stack_used == 0)
> +      {
> +	__libdwfl_seterrno (DWFL_E_INVALID_DWARF);
> +	return false;
> +      }
> +    *val = stack[--stack_used];
> +    return true;
> +  }

Likewise.

> +  Dwarf_Addr val1, val2;
> +  bool is_location = false;
> +  for (const Dwarf_Op *op = ops; op < ops + nops; op++)
> +    switch (op->atom)
> +    {
> +      case DW_OP_reg0 ... DW_OP_reg31:
> +	if (! state_get_reg (state, op->atom - DW_OP_reg0, &val1)
> +	    || ! push (val1))
> +	  {
> +	    free (stack);
> +	    return false;
> +	  }
> +	break;
> +      case DW_OP_regx:
> +	if (! state_get_reg (state, op->number, &val1) || ! push (val1))
> +	  {
> +	    free (stack);
> +	    return false;
> +	  }
> +	break;
> +      case DW_OP_breg0 ... DW_OP_breg31:
> +	if (! state_get_reg (state, op->atom - DW_OP_breg0, &val1))
> +	  {
> +	    free (stack);
> +	    return false;
> +	  }
> +	val1 += op->number;
> +	if (! push (val1))
> +	  {
> +	    free (stack);
> +	    return false;
> +	  }
> +	break;
> +      case DW_OP_bregx:
> +	if (! state_get_reg (state, op->number, &val1))
> +	  {
> +	    free (stack);
> +	    return false;
> +	  }
> +	val1 += op->number2;
> +	if (! push (val1))
> +	  {
> +	    free (stack);
> +	    return false;
> +	  }
> +	break;
> +      case DW_OP_lit0 ... DW_OP_lit31:
> +	if (! push (op->atom - DW_OP_lit0))
> +	  {
> +	    free (stack);
> +	    return false;
> +	  }
> +	break;
> +      case DW_OP_plus_uconst:
> +	if (! pop (&val1) || ! push (val1 + op->number))
> +	  {
> +	    free (stack);
> +	    return false;
> +	  }
> +	break;
> +      case DW_OP_call_frame_cfa:;
> +	Dwarf_Op *cfa_ops;
> +	size_t cfa_nops;
> +	Dwarf_Addr cfa;
> +	if (dwarf_frame_cfa (frame, &cfa_ops, &cfa_nops) != 0
> +	    || ! expr_eval (state, frame, cfa_ops, cfa_nops, &cfa)

Here when you recurse into expr_eval you could pass NULL for frame (and
check that here too) to indicate no DW_OP_call_frame_cfa is expected.

> +	    || ! push (cfa))
> +	  {
> +	    __libdwfl_seterrno (DWFL_E_LIBDW);
> +	    free (stack);
> +	    return false;
> +	  }
> +	is_location = true;
> +	break;
> +      case DW_OP_stack_value:
> +	is_location = false;
> +	break;
> +      case DW_OP_deref:
> +	if (! pop (&val1)
> +	    || process->callbacks->memory_read == NULL
> +	    || ! process->callbacks->memory_read (process->dwfl, val1, &val1,
> +						  process->callbacks_arg)
> +	    || ! push (val1))
> +	  {
> +	    free (stack);
> +	    return false;
> +	  }

Nitpick. In the case of process->callbacks->memory_read == NULL no
dwfl_errno is set.

> +	break;
> +      case DW_OP_nop:
> +	break;
> +      case DW_OP_dup:
> +	if (! pop (&val1) || ! push (val1) || ! push (val1))
> +	  {
> +	    free (stack);
> +	    return false;
> +	  }
> +	break;
> +      case DW_OP_const1u:
> +      case DW_OP_const1s:
> +      case DW_OP_const2u:
> +      case DW_OP_const2s:
> +      case DW_OP_const4u:
> +      case DW_OP_const4s:
> +      case DW_OP_const8u:
> +      case DW_OP_const8s:
> +      case DW_OP_constu:
> +      case DW_OP_consts:
> +	if (! push (op->number))
> +	  {
> +	    free (stack);
> +	    return false;
> +	  }
> +	break;
> +      case DW_OP_bra:
> +	if (! pop (&val1))
> +	  {
> +	    free (stack);
> +	    return false;
> +	  }
> +	if (val1 == 0)
> +	  break;
> +	/* FALLTHRU */
> +      case DW_OP_skip:;
> +	Dwarf_Word offset = op->offset + 1 + 2 + (int16_t) op->number;
> +	const Dwarf_Op *found = bsearch ((void *) (uintptr_t) offset, ops, nops,
> +					 sizeof (*ops), bra_compar);

Clever. But I wonder if we could do something in dwarf_getlocation to
make this easier? Maybe record the decoded op number there somehow and
store it in number2? Not something to solve/do now though.

> +	if (found == NULL)
> +	  {
> +	    free (stack);
> +	    /* PPC32 vDSO has such invalid operations.  */

How horrible. Old kernels only?

> +	    __libdwfl_seterrno (DWFL_E_INVALID_DWARF);
> +	    return false;
> +	  }
> +	/* Undo the 'for' statement increment.  */
> +	op = found - 1;
> +	break;
> +      case DW_OP_drop:
> +	if (! pop (&val1))
> +	  {
> +	    free (stack);
> +	    return false;
> +	  }
> +	break;
> +#define BINOP(atom, op)							\
> +      case atom:							\
> +	if (! pop (&val2) || ! pop (&val1) || ! push (val1 op val2))	\
> +	  {								\
> +	    free (stack);						\
> +	    return false;						\
> +	  }								\
> +	break;
> +      BINOP (DW_OP_and, &)
> +      BINOP (DW_OP_shl, <<)
> +      BINOP (DW_OP_plus, +)
> +      BINOP (DW_OP_mul, *)
> +#undef BINOP
> +#define BINOP_SIGNED(atom, op)						\
> +      case atom:							\
> +	if (! pop (&val2) || ! pop (&val1)				\
> +	    || ! push ((int64_t) val1 op (int64_t) val2))		\
> +	  {								\
> +	    free (stack);						\
> +	    return false;						\
> +	  }								\
> +	break;
> +      BINOP_SIGNED (DW_OP_le, <=)
> +      BINOP_SIGNED (DW_OP_ge, >=)
> +      BINOP_SIGNED (DW_OP_eq, ==)
> +      BINOP_SIGNED (DW_OP_lt, <)
> +      BINOP_SIGNED (DW_OP_gt, >)
> +      BINOP_SIGNED (DW_OP_ne, !=)
> +#undef BINOP_SIGNED
> +      default:
> +	__libdwfl_seterrno (DWFL_E_UNSUPPORTED_DWARF);
> +	return false;

Which OPs are missing (DW_OP_div and DW_OP_mod it seems, be careful with
divide by zero)? Lets make sure we distinquish between OPs that are
valid in CFI, but we don't handle and OPs that aren't valid in CFI
(INVALID_DWARF).

> +    }
> +  if (! pop (result))
> +    {
> +      free (stack);
> +      return false;
> +    }
> +  free (stack);
> +  if (is_location
> +      && (process->callbacks->memory_read == NULL
> +	  || ! process->callbacks->memory_read (process->dwfl, *result, result,
> +						process->callbacks_arg)))
> +    return false;

Again that pesky process->callbacks->memory_read == NULL doesn't set
dwfl_errno.

> +  return true;
> +}
> +
> +/* The logic is to call __libdwfl_seterrno for any CFI bytecode interpretation
> +   error so one can easily catch the problem with a debugger.  Still there are
> +   archs with invalid CFI for some registers where the registers are never used
> +   later.  Therefore we continue unwinding leaving the registers undefined.
> +
> +   The only exception is PC itself, when there is an error unwinding PC we
> +   return false.  Otherwise we would return successful end of backtrace seeing
> +   an undefined PC register (due to an error unwinding it).  */

The first part of the comment makes sense. The last part doesn't match
the implementation, see also at the end.

> +static void
> +handle_cfi (Dwfl_Frame *state, Dwarf_Addr pc, Dwarf_CFI *cfi)
> +{
> +  Dwarf_Frame *frame;
> +  if (INTUSE(dwarf_cfi_addrframe) (cfi, pc, &frame) != 0)
> +    {
> +      __libdwfl_seterrno (DWFL_E_LIBDW);
> +      return;
> +    }
> +  Dwfl_Thread *thread = state->thread;
> +  Dwfl_Process *process = thread->process;
> +  Ebl *ebl = process->ebl;
> +  size_t nregs = ebl_frame_nregs (ebl);

This could return zero. Is that a problem here (return error)? Or did we
already check earlier (assert > 0)?

> +  Dwfl_Frame *unwound;
> +  unwound = malloc (sizeof (*unwound) + sizeof (*unwound->regs) * nregs);
> +  state->unwound = unwound;
> +  unwound->thread = thread;
> +  unwound->unwound = NULL;
> +  unwound->signal_frame = frame->fde->cie->signal_frame;
> +  unwound->pc_state = DWFL_FRAME_STATE_ERROR;
> +  memset (unwound->regs_set, 0, sizeof (unwound->regs_set));
> +  for (unsigned regno = 0; regno < nregs; regno++)
> +    {
> +      Dwarf_Op reg_ops_mem[3], *reg_ops;
> +      size_t reg_nops;
> +      if (dwarf_frame_register (frame, regno, reg_ops_mem, &reg_ops,
> +				&reg_nops) != 0)
> +	{
> +	  __libdwfl_seterrno (DWFL_E_LIBDW);
> +	  continue;
> +	}
> +      Dwarf_Addr regval;
> +      if (reg_nops == 0)
> +	{
> +	  if (reg_ops == reg_ops_mem)
> +	    {
> +	      /* REGNO is undefined.  */
> +	      unsigned ra = frame->fde->cie->return_address_register;
> +	      if (regno == ra)
> +		unwound->pc_state = DWFL_FRAME_STATE_PC_UNDEFINED;
> +	      continue;
> +	    }
> +	  else if (reg_ops == NULL)
> +	    {
> +	      /* REGNO is same-value.  */
> +	      if (! state_get_reg (state, regno, &regval))
> +		continue;
> +	    }
> +	  else
> +	    {
> +	      __libdwfl_seterrno (DWFL_E_INVALID_DWARF);
> +	      continue;
> +	    }
> +	}
> +      else if (! expr_eval (state, frame, reg_ops, reg_nops, &regval))
> +	{
> +	  /* PPC32 vDSO has various invalid operations, ignore them.  The
> +	     register will look as unset causing an error later, if used.
> +	     But PPC32 does not use such registers.  */
> +	  continue;
> +	}
> +      if (! dwfl_frame_reg_set (unwound, regno, regval))
> +	{
> +	  __libdwfl_seterrno (DWFL_E_INVALID_REGISTER);
> +	  continue;
> +	}
> +    }
> +  if (unwound->pc_state == DWFL_FRAME_STATE_ERROR
> +      && dwfl_frame_reg_get (unwound, frame->fde->cie->return_address_register,
> +			     &unwound->pc))
> +    {
> +      /* PPC32 __libc_start_main properly CFI-unwinds PC as zero.  Currently
> +	 none of the archs supported for unwinding have zero as a valid PC.  */
> +      if (unwound->pc == 0)
> +	unwound->pc_state = DWFL_FRAME_STATE_PC_UNDEFINED;
> +      else
> +	unwound->pc_state = DWFL_FRAME_STATE_PC_SET;
> +    }
> +}

If at the end unwound->pc_state == DWFL_FRAME_STATE_ERROR should we
signal an error by setting state->unwound?

> +void
> +internal_function
> +__libdwfl_frame_unwind (Dwfl_Frame *state)
> +{
> +  if (state->unwound)
> +    return;

I am not sure when this happens.
Is this for when someone calls dwfl_frame_pc and it hits that last
activation check I don't fully understand?

> +  Dwarf_Addr pc;
> +  bool ok = INTUSE(dwfl_frame_pc) (state, &pc, NULL);
> +  assert (ok);
> +  /* Do not ask dwfl_frame_pc for ISACTIVATION, it would try to unwind STATE
> +     which would deadlock us.  */

Move this comment up a bit to where you actually call dwfl_frame_pc.
And the comment here is "check whether this is the initial frame or a
signal frame. Then we need to unwind from the adjusted pc."

> +  if (state != state->thread->unwound && ! state->signal_frame)
> +    pc--;
> +  Dwfl_Module *mod = INTUSE(dwfl_addrmodule) (state->thread->process->dwfl, pc);
> +  if (mod != NULL)
> +    {
> +      Dwarf_Addr bias;
> +      Dwarf_CFI *cfi_eh = INTUSE(dwfl_module_eh_cfi) (mod, &bias);
> +      if (cfi_eh)
> +	{
> +	  handle_cfi (state, pc - bias, cfi_eh);
> +	  if (state->unwound)
> +	    return;
> +	}
> +      Dwarf_CFI *cfi_dwarf = INTUSE(dwfl_module_dwarf_cfi) (mod, &bias);
> +      if (cfi_dwarf)
> +	{
> +	  handle_cfi (state, pc - bias, cfi_dwarf);
> +	  if (state->unwound)
> +	    return;
> +	}
> +    }
> +  __libdwfl_seterrno (DWFL_E_NO_DWARF);
> +}

handle_cfi might set dwflerrno when state->unwound == NULL, in that case
we probably shouldn't override it with the generic DWFL_E_NO_DWARF,
might want to put that in the else branch of (mod != NULL)?

Also a nitpick, but might this be slightly nicer to read if this and
handle_cfi returned stat->unwound to make the caller error checking more
"natural"?

> diff --git a/libdwfl/libdwfl.h b/libdwfl/libdwfl.h
> @@ -41,6 +41,14 @@ typedef struct Dwfl_Module Dwfl_Module;
>  /* Handle describing a line record.  */
>  typedef struct Dwfl_Line Dwfl_Line;
>  
> +/* This holds information common for all the frames of one backtrace for
> +   a partical thread/task/TID.  Several threads belong to one Dwfl.  */
> +typedef struct Dwfl_Thread Dwfl_Thread;
> +
> +/* This holds everything we know about the state of the frame at a particular
> +   PC location described by an FDE belonging to Dwfl_Thread.  */
> +typedef struct Dwfl_Frame Dwfl_Frame;

OK.

> @@ -352,10 +360,14 @@ extern int dwfl_linux_kernel_report_offline (Dwfl *dwfl, const char *release,
>     segment to locate its PT_DYNAMIC in the dump.  This might call
>     dwfl_report_elf on file names found in the dump if reading some
>     link_map files is the only way to ascertain those modules' addresses.
> +   dwfl_attach_state is also called for DWFL, dwfl_core_file_report does
> +   not fail if the dwfl_attach_state call has failed.
>     Returns the number of modules reported, or -1 for errors.  */
>  extern int dwfl_core_file_report (Dwfl *dwfl, Elf *elf);
>  
>  /* Call dwfl_report_module for each file mapped into the address space of PID.
> +   dwfl_attach_state is also called for DWFL, dwfl_linux_proc_report does
> +   not fail if the dwfl_attach_state call has failed.
>     Returns zero on success, -1 if dwfl_report_module failed,
>     or an errno code if opening the kernel binary failed.  */
>  extern int dwfl_linux_proc_report (Dwfl *dwfl, pid_t pid);

OK.

> +typedef struct
> +{
> +  /* Called to iterate through threads.  Returns next TID (thread ID) on
> +     success, a negative number on failure and zero if there are no more
> +     threads.  NTHREAD is the new thread being created.  *THREAD_ARGP may be
> +     optionally set by the implementation, THREAD_ARGP is never NULL.
> +     This method must not be NULL.  */ 
> +  pid_t (*next_thread) (Dwfl *dwfl, Dwfl_Thread *nthread, void *dwfl_arg,
> +			void **thread_argp)
> +    __nonnull_attribute__ (1, 2);

Maybe explicitly mention that THREAD_ARG will be passed to other
callbacks that take a Dwfl_Thread argument? 

> +  /* Called during unwinding to access memory (stack) state.  Returns true for
> +     successfully read *RESULT or false and sets dwfl_errno () on failure.
> +     This method may be NULL - in such case dwfl_thread_getframes will return
> +     only the initial frame.  */
> +  bool (*memory_read) (Dwfl *dwfl, Dwarf_Addr addr, Dwarf_Word *result,
> +                       void *dwfl_arg)
> +    __nonnull_attribute__ (1, 3);
> +
> +  /* Called on initial unwind to get the initial register state of the first
> +     frame.  Should call dwfl_thread_state_registers, possibly multiple times
> +     for different ranges and possibly also dwfl_thread_state_register_pc, to
> +     fill in initial (DWARF) register values.  After this call, till at least
> +     thread_detach is called, the thread is assumed to be frozen, so that it is
> +     safe to unwind.  Returns true on success or false and sets dwfl_err () on
> +     failure.  This method must not be NULL.  */ 
> +  bool (*set_initial_registers) (Dwfl_Thread *thread, void *thread_arg)
> +    __nonnull_attribute__ (1);
> +
> +  /* Called by dwfl_end.  All thread_detach method calls have been already
> +     done.  This method may be NULL.  */
> +  void (*detach) (Dwfl *dwfl, void *dwfl_arg)
> +    __nonnull_attribute__ (1);
> +
> +  /* Called when unwinding is done.  No callback will be called after
> +     this method has been called.  Iff set_initial_registers was called for
> +     a TID thread_detach will be called before the detach method above.
> +     This method may be NULL.  */
> +  void (*thread_detach) (Dwfl_Thread *thread, void *thread_arg)
> +    __nonnull_attribute__ (1);
> +} Dwfl_Thread_Callbacks;

OK. But as mentioned above lets document that thread_detach will only be
called if set_initial_registers was called first and returned true.

> +/* PID is the process id associated with the DWFL state.  Architecture of DWFL
> +   modules is specified by MACHINE.  Use EM_NONE to detect architecture from
> +   DWFL.  If EBL is NULL the function will detect it from arbitrary Dwfl_Module
> +   of DWFL.  DWFL_ARG is the callback backend state.  DWFL_ARG will be provided
> +   to the callbacks.  *THREAD_CALLBACKS function pointers must remain valid
> +   during lifetime of DWFL.  Function returns true on success,
> +   false otherwise.  */
> +bool dwfl_attach_state (Dwfl *dwfl, int machine, pid_t pid,
> +                        const Dwfl_Thread_Callbacks *thread_callbacks,
> +			void *dwfl_arg)
> +  __nonnull_attribute__ (1, 4);
> +
> +/* Return PID for the process associated with DWFL.  Function returns -1 if
> +   dwfl_attach_state was not called for DWFL.  */
> +pid_t dwfl_pid (Dwfl *dwfl)
> +  __nonnull_attribute__ (1);
> +
> +/* Return DWFL from which THREAD was created using dwfl_next_thread.  */
> +Dwfl *dwfl_thread_dwfl (Dwfl_Thread *thread)
> +  __nonnull_attribute__ (1);
> +
> +/* Return positive TID (thread ID) for THREAD.  This function never fails.  */
> +pid_t dwfl_thread_tid (Dwfl_Thread *thread)
> +  __nonnull_attribute__ (1);
> +
> +/* Return thread for frame STATE.  This function never fails.  */
> +Dwfl_Thread *dwfl_frame_thread (Dwfl_Frame *state)
> +  __nonnull_attribute__ (1);
> +
> +/* Called by Dwfl_Thread_Callbacks.set_initial_registers implementation.
> +   For every known continuous block of registers <FIRSTREG..FIRSTREG+NREGS)
> +   (inclusive..exclusive) set their content to REGS (array of NREGS items).
> +   Function returns false if any of the registers has invalid number.  */
> +bool dwfl_thread_state_registers (Dwfl_Thread *thread, const int firstreg,
> +                                  unsigned nregs, const Dwarf_Word *regs)
> +  __nonnull_attribute__ (1, 4);
> +
> +/* Called by Dwfl_Thread_Callbacks.set_initial_registers implementation.
> +   If PC is not contained among DWARF registers passed by
> +   dwfl_thread_state_registers on the target architecture pass the PC value
> +   here.  */
> +void dwfl_thread_state_register_pc (Dwfl_Thread *thread, Dwarf_Word pc)
> +  __nonnull_attribute__ (1);

OK.

> +/* Gets the next known thread, if any.  To get the initial thread
> +   provide NULL as previous thread PREV_THREAD.  On error function returns NULL
> +   and sets dwfl_errno ().  When no more threads are found function returns
> +   NULL and dwfl_errno () is set to 0 - dwfl_errmsg (0) returns NULL then.  */
> +Dwfl_Thread *dwfl_next_thread (Dwfl *dwfl, Dwfl_Thread *prev_thread)
> +  __nonnull_attribute__ (1);

Does this interface imply you can "rewalk" the threads given some
returned Dwfl_Thread * you once got, that might not be the last one
returned by this function for that particular Dwfl? The implementation
implies so, but do we want to guarantee that? (I think I would like to
require the user to always pass in the last returned Dwfl_Thread. But
what about the life-time of Dwfl_Thread objects in that case?) Also we
should document what happens if you pass NULL as prev_thread a second
time. Does it invalidate all previous Dwfl_Threads returned before? (I
would say yes, and it does a fresh new walk in case thread
appeared/disappeared).

> +/* Iterate through the frames for a thread.  Returns zero if all frames
> +   have been processed by the callback, returns -1 on error, or the
> +   value of the callback when not DWARF_CB_OK.  Keeps calling the
> +   callback with the next frame while the callback returns
> +   DWARF_CB_OK, till there are no more frames.  On start will call the
> +   set_initial_registers callback and on return will call the
> +   detach_thread callback of the Dwfl_Thread.  */
> +int dwfl_thread_getframes (Dwfl_Thread *thread,
> +                           int (*callback) (Dwfl_Frame *state, void *arg),
> +                           void *arg)
> +  __nonnull_attribute__ (1, 2);
> +
> +/* Return *PC (program counter) for thread-specific frame STATE.
> +   Set *ISACTIVATION according to DWARF frame "activation" definition.
> +   Typically you need to substract 1 from *PC if *ACTIVATION is false to safely
> +   find function of the caller.  ACTIVATION may be NULL.  PC must not be NULL.
> +   Function returns false if it failed to find *PC.  */
> +bool dwfl_frame_pc (Dwfl_Frame *state, Dwarf_Addr *pc, bool *isactivation)
> +  __nonnull_attribute__ (1, 2);

OK.

Do we also need a dwfl_detach_state ()?
In case a user wants to reuse a Dwfl with new pid/callbacks?

> +++ b/libdwfl/libdwflP.h
> +#include "libeblP.h"

Do you need this? Can't you use libebl.h?

> +typedef struct Dwfl_Process Dwfl_Process;
>  
>  /* gettext helper macros.  */
>  #define _(Str) dgettext ("elfutils", Str)
> @@ -74,7 +77,19 @@
>    DWFL_ERROR (BADELF, N_("not a valid ELF file"))			      \
>    DWFL_ERROR (WEIRD_TYPE, N_("cannot handle DWARF type description"))	      \
>    DWFL_ERROR (WRONG_ID_ELF, N_("ELF file does not match build ID"))	      \
> -  DWFL_ERROR (BAD_PRELINK, N_("corrupt .gnu.prelink_undo section data"))
> +  DWFL_ERROR (BAD_PRELINK, N_("corrupt .gnu.prelink_undo section data"))      \
> +  DWFL_ERROR (LIBEBL_BAD, N_("Internal error due to ebl"))		      \
> +  DWFL_ERROR (CORE_MISSING, N_("Missing data in core file"))		      \
> +  DWFL_ERROR (INVALID_REGISTER, N_("Invalid register"))			      \
> +  DWFL_ERROR (PROCESS_MEMORY_READ, N_("Error reading process memory"))	      \
> +  DWFL_ERROR (PROCESS_NO_ARCH, N_("Have not found ELF module in a process"))  \

Maybe "Couldn't determine ELF architecture"?

> +  DWFL_ERROR (PARSE_PROC, N_("Error parsing /proc filesystem"))		      \
> +  DWFL_ERROR (NO_THREAD, N_("No thread found"))				      \

This one is never used.

> +  DWFL_ERROR (INVALID_DWARF, N_("Invalid DWARF"))			      \
> +  DWFL_ERROR (UNSUPPORTED_DWARF, N_("Unsupported DWARF"))		      \
> +  DWFL_ERROR (NEXT_THREAD_FAIL, N_("Unable to find more threads"))	      \
> +  DWFL_ERROR (ATTACH_STATE_CONFLICT, N_("Dwfl already has attached state"))   \
> +  DWFL_ERROR (NO_ATTACH_STATE, N_("Dwfl has no attached state"))

> @@ -92,6 +107,8 @@ struct Dwfl
>  
>    Dwfl_Module *modulelist;    /* List in order used by full traversals.  */
>  
> +  Dwfl_Process *process;
> +
>    GElf_Addr offline_next_address;
>  
>    GElf_Addr segment_align;	/* Smallest granularity of segments.  */

OK.

> +/* This holds information common for all the threads/tasks/TIDs of one process
> +   for backtraces.  */
> +
> +struct Dwfl_Process
> +{
> +  struct Dwfl *dwfl;
> +  pid_t pid;
> +  const Dwfl_Thread_Callbacks *callbacks;
> +  void *callbacks_arg;
> +  struct ebl *ebl;
> +  bool ebl_close:1;
> +  Dwfl_Thread *thread;
> +};

thread could use a comment explaining it is the (currently) last thread.
Or maybe rename it to last_thread or cur_thread to make that clear?

> +/* See its typedef in libdwfl.h.  */
> +
> +struct Dwfl_Thread
> +{
> +  Dwfl_Process *process;
> +  Dwfl_Thread *next;
> +  pid_t tid;
> +  bool thread_detach_needed : 1;
> +  /* Bottom frame.  */
> +  Dwfl_Frame *unwound;
> +  void *callbacks_arg;
> +};

I don't think you really need thread_detach_needed. attaching/getting
initial registers and detaching should only happen in
dwfl_thread_getframes.

I found unwound a little confusing, maybe rename it to outer_frame?
But maybe it is just that bottom/outer are slightly confusing names in
general. current_frame maybe? Or I am nitpicking and you should just
ignore me for this one.

> +/* See its typedef in libdwfl.h.  */
> +
> +struct Dwfl_Frame
> +{
> +  Dwfl_Thread *thread;
> +  /* Previous (outer) frame.  */
> +  Dwfl_Frame *unwound;

Do we really need to keep all old frames around like this?
Can't we cleanup up the old frame in dwfl_thread_getframes after having
called the callback for it (and creating the next)? Not that it wastes
huge amounts of space. More curious if it would result in a simpler
datastructure.

> +  bool signal_frame : 1;
> +  enum
> +  {
> +    /* This structure is still being initialized or there was an error
> +       initializing it.  */
> +    DWFL_FRAME_STATE_ERROR,
> +    /* PC field is valid.  */
> +    DWFL_FRAME_STATE_PC_SET,
> +    /* PC field is undefined, this means the next (inner) frame was the
> +       outermost frame.  */
> +    DWFL_FRAME_STATE_PC_UNDEFINED
> +  } pc_state;

See comment above, could/should we split the ERROR and UNSET state?

> +  /* Either initialized from appropriate REGS element or on some archs
> +     initialized separately as the return address has no DWARF register.  */
> +  Dwarf_Addr pc;
> +  /* (1 << X) bitmask where 0 <= X < ebl_frame_nregs.  */
> +  uint64_t regs_set[3];
> +  /* REGS array size is ebl_frame_nregs.  */
> +  Dwarf_Addr regs[];
> +};

bitmask tells which regs are valid.

> +/* Fetch value from Dwfl_Frame->regs indexed by DWARF REGNO.
> +   No error code is set if the function returns FALSE.  */
> +
> +static inline bool
> +dwfl_frame_reg_get (Dwfl_Frame *state, unsigned regno, Dwarf_Addr *val)
> +{
> +  Ebl *ebl = state->thread->process->ebl;
> +  if (regno >= ebl->frame_nregs)
> +    return false;
> +  if ((state->regs_set[regno / sizeof (*state->regs_set) / 8]
> +       & (1U << (regno % (sizeof (*state->regs_set) * 8)))) == 0)
> +    return false;
> +  if (val)
> +    *val = state->regs[regno];
> +  return true;
> +}
>  
> +/* Store value to Dwfl_Frame->regs indexed by DWARF REGNO.
> +   No error code is set if the function returns FALSE.  */
> +
> +static inline bool
> +dwfl_frame_reg_set (Dwfl_Frame *state, unsigned regno, Dwarf_Addr val)
> +{
> +  Ebl *ebl = state->thread->process->ebl;
> +  if (regno >= ebl->frame_nregs)
> +    return false;
> +  /* For example i386 user_regs_struct has signed fields.  */
> +  if (ebl->class == ELFCLASS32)
> +    val &= 0xffffffff;
> +  state->regs_set[regno / sizeof (*state->regs_set) / 8] |=
> +			      (1U << (regno % (sizeof (*state->regs_set) * 8)));
> +  state->regs[regno] = val;
> +  return true;
> +}

OK.

> @@ -415,6 +520,29 @@ extern Dwfl_Module *__libdwfl_report_offline (Dwfl *dwfl, const char *name,
>  								const char *))
>    internal_function;
>  
> +/* Free PROCESS.  Unlink and free also any structures it references.  */
> +extern void __libdwfl_process_free (Dwfl_Process *process)
> +  internal_function;

OK.

> +/* Update STATE->UNWOUND for the unwound frame.
> +   Functions sets dwfl_errno ().  */
> +extern void __libdwfl_frame_unwind (Dwfl_Frame *state)
> +  internal_function;

Document "On error STATE->UNWOUND will be NULL and sets dwfl_errno ()".
(And nitpick maybe just let this return state->unwound to make error
checking simpler.)

> +/* Call dwfl_attach_state for PID, return true if successful.  */
> +extern bool __libdwfl_attach_state_for_pid (Dwfl *dwfl, pid_t pid)
> +  internal_function;
> +
> +/* Call dwfl_attach_state for CORE, return true if successful.  */
> +extern bool __libdwfl_attach_state_for_core (Dwfl *dwfl, Elf *core)
> +  internal_function;

OK.

> +/* Align segment START downwards or END upwards addresses according to DWFL.  */
> +extern GElf_Addr __libdwfl_segment_start (Dwfl *dwfl, GElf_Addr start)
> +  internal_function;
> +extern GElf_Addr __libdwfl_segment_end (Dwfl *dwfl, GElf_Addr end)
> +  internal_function;

OK.

>  /* Decompression wrappers: decompress whole file into memory.  */
>  extern Dwfl_Error __libdw_gunzip  (int fd, off64_t start_offset,
>  				   void *mapped, size_t mapped_size,
> @@ -557,6 +685,16 @@ INTDECL (dwfl_offline_section_address)
>  INTDECL (dwfl_module_relocate_address)
>  INTDECL (dwfl_module_dwarf_cfi)
>  INTDECL (dwfl_module_eh_cfi)
> +INTDECL (dwfl_attach_state)
> +INTDECL (dwfl_pid)
> +INTDECL (dwfl_thread_dwfl)
> +INTDECL (dwfl_thread_tid)
> +INTDECL (dwfl_frame_thread)
> +INTDECL (dwfl_thread_state_registers)
> +INTDECL (dwfl_thread_state_register_pc)
> +INTDECL (dwfl_next_thread)
> +INTDECL (dwfl_thread_getframes)
> +INTDECL (dwfl_frame_pc)

OK.

> diff --git a/libdwfl/linux-proc-maps.c b/libdwfl/linux-proc-maps.c
> [...]
> @@ -300,6 +300,13 @@ dwfl_linux_proc_report (Dwfl *dwfl, pid_t pid)
>  
>    fclose (f);
>  
> +  if (result == 0)
> +    {
> +      /* Possible error is ignored, DWFL still may be useful for non-unwinding
> +	 operations.  */
> +      __libdwfl_attach_state_for_pid (dwfl, pid);
> +    }
> +
>    return result;
>  }

OK.

> diff --git a/libdwfl/segment.c b/libdwfl/segment.c
> [...]
> @@ -28,16 +28,18 @@
>  
>  #include "libdwflP.h"
>  
> -static GElf_Addr
> -segment_start (Dwfl *dwfl, GElf_Addr start)
> +GElf_Addr
> +internal_function
> +__libdwfl_segment_start (Dwfl *dwfl, GElf_Addr start)
>  {
>    if (dwfl->segment_align > 1)
>      start &= -dwfl->segment_align;
>    return start;
>  }
>  
> -static GElf_Addr
> -segment_end (Dwfl *dwfl, GElf_Addr end)
> +GElf_Addr
> +internal_function
> +__libdwfl_segment_end (Dwfl *dwfl, GElf_Addr end)
>  {
>    if (dwfl->segment_align > 1)
>      end = (end + dwfl->segment_align - 1) & -dwfl->segment_align;
> @@ -156,8 +158,8 @@ reify_segments (Dwfl *dwfl)
>    for (Dwfl_Module *mod = dwfl->modulelist; mod != NULL; mod = mod->next)
>      if (! mod->gc)
>        {
> -	const GElf_Addr start = segment_start (dwfl, mod->low_addr);
> -	const GElf_Addr end = segment_end (dwfl, mod->high_addr);
> +	const GElf_Addr start = __libdwfl_segment_start (dwfl, mod->low_addr);
> +	const GElf_Addr end = __libdwfl_segment_end (dwfl, mod->high_addr);
>  	bool resized = false;
>  
>  	int idx = lookup (dwfl, start, hint);
> @@ -296,8 +298,9 @@ dwfl_report_segment (Dwfl *dwfl, int ndx, const GElf_Phdr *phdr, GElf_Addr bias,
>        dwfl->lookup_module = NULL;
>      }
>  
> -  GElf_Addr start = segment_start (dwfl, bias + phdr->p_vaddr);
> -  GElf_Addr end = segment_end (dwfl, bias + phdr->p_vaddr + phdr->p_memsz);
> +  GElf_Addr start = __libdwfl_segment_start (dwfl, bias + phdr->p_vaddr);
> +  GElf_Addr end = __libdwfl_segment_end (dwfl,
> +					 bias + phdr->p_vaddr + phdr->p_memsz);
>  
>    /* Coalesce into the last one if contiguous and matching.  */
>    if (ndx != dwfl->lookup_tail_ndx

OK.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]