[RFC PATCH 4/6] Linux kernel debug infrastructure and target Linux kernel

Thu Jan 31 16:14:00 GMT 2019

Hi Omair,

On Tue, 29 Jan 2019 10:03:17 +0500
Omair Javaid <omair.javaid@linaro.org> wrote:

[...]

> diff --git a/gdb/gdbarch.c b/gdb/gdbarch.c
> index 434ee3b..5e88623 100644
> --- a/gdb/gdbarch.c
> +++ b/gdb/gdbarch.c
> @@ -3,7 +3,7 @@
>  
>  /* Dynamic architecture support for GDB, the GNU debugger.
>  
> -   Copyright (C) 1998-2019 Free Software Foundation, Inc.
> +   Copyright (C) 1998-2018 Free Software Foundation, Inc.

that doesn't look right. Same for gdbarch.h.

[...]

> diff --git a/gdb/lk-bitmap.h b/gdb/lk-bitmap.h
> new file mode 100644
> index 0000000..f0a6413
> --- /dev/null
> +++ b/gdb/lk-bitmap.h
> @@ -0,0 +1,226 @@
> +/* Iterator for bitmaps from the Linux kernel.
> +
> +   Copyright (C) 2017 Free Software Foundation, Inc.

Usually the copyright should be from the year it is included in GDB.
Better update it to 2019. Same for lk-list.h.

[...]

> +size_t
> +lk_bitmap::hweight () const
> +{
> +  size_t ret = 0;
> +//  for (auto bit : *this)
> +//    ret++;
> +  return ret;
> +}

Is this commented out for a reason?

[...]

> diff --git a/gdb/lk-low.c b/gdb/lk-low.c
> new file mode 100644
> index 0000000..1931964
> --- /dev/null
> +++ b/gdb/lk-low.c

[...]

> +bool
> +linux_kernel_ops::is_kernel_address (CORE_ADDR addr)
> +{
> +  return (addr >= address ("_text")
> +          && addr < address ("high_memory")) ? true : false;
> +}

You can drop the '? true : false'.

[...]

> +/* See lk-low.h.  */
> +
> +bool
> +linux_kernel_ops::update_tasks ()
> +{
> +  lk_task_ptid_list now_running_ptids;
> +  lk_task_ptid_map new_task_struct_ptids;
> +  auto last_cpu_task_struct_addr = cpu_curr_task_struct_addr;
> +  int inf_pid = current_inferior ()->pid;
> +
> +  /* Clear cpu_task_struct_addr and cpu_ptid_pair cache that we created
> +     on last stop.  */
> +  cpu_curr_task_struct_addr.clear();
> +  cpu_ptid_pair.clear();
> +  tid_task_struct.clear();
> +  lk_thread_count = 1;
> +
> +  /* Iterate over all threads and register target beneath threads.  */
> +  for (thread_info *tp : all_threads_safe ())
> +    {
> +      /* Check if this task represents a CPU.  */
> +      if (tp->ptid.tid () == 0)

why not 

if (tp->ptid.tid () != 0)
	continue;

Then you wouldn't need the extra indent on the whole for-loop.

> +        {
> +          //TODO: Can we have a target beneath thread with lwp != cpu ???

Yes, you can. For core files the lwp is whatever the pr_pid field in 
the elf_prstatus is. So in this case it depends on who created the dump.
For s390 kernel dumps the pr_pid is a cpu id. But in theory it could
also be the pid of the task that was running when the dump was created.

> +          unsigned int thread_cpu = tp->ptid.lwp() - 1;
> +          CORE_ADDR task = get_cpu_task_struct_addr (thread_cpu);
> +          int pid = lk_read_int (task + lk_offset ("task_struct->pid"));
> +          long tid = get_task_struct_tid (task);
> +          ptid_t kernel_ptid (tp->ptid.pid (), pid, tid);
> +
> +          /* If cpu is not idle and current cpu task has a sleeping
> +             gdb thread created against it on last stop.  */
> +          CORE_ADDR idle = cpu_idle_task_struct_addr [thread_cpu];
> +          if (idle != task && task_struct_ptids.count (task) > 0)
> +            {
> +              /* If idle task has a gdb thread created against it.  */
> +              long tid = get_task_struct_tid (idle);
> +              ptid_t new_ptid (inf_pid, 0, tid);
> +              if (task_struct_ptids.count (idle) > 0)
> +                {
> +                  thread_change_ptid (task_struct_ptids [idle], new_ptid);
> +                  new_task_struct_ptids [idle] = new_ptid;
> +                  now_running_ptids.push_back(task_struct_ptids [task]);
> +                  task_struct_ptids.erase(task);
> +                }
> +              else
> +                {
> +                  thread_change_ptid (task_struct_ptids [task], new_ptid);
> +                  new_task_struct_ptids [idle] = new_ptid;
> +                  task_struct_ptids.erase(task);
> +                }
> +            }
> +
> +          if (idle == task && task_struct_ptids.count (idle) > 0)
> +            {
> +              now_running_ptids.push_back(task_struct_ptids [idle]);
> +              task_struct_ptids.erase(idle);
> +            }
> +
> +          cpu_ptid_pair [thread_cpu] = std::pair<ptid_t, ptid_t> (kernel_ptid, tp->ptid);
> +          thread_change_ptid (tp->ptid, kernel_ptid);
> +        }
> +    }
> +
> +  /* Create an updated map of Linux kernel task structs mapping to gdb ptid.  */
> +  for (CORE_ADDR task : lk_list ("init_task", "task_struct->tasks"))
> +    {
> +      for (CORE_ADDR thread : lk_list (task, "task_struct->thread_group"))
> +        {
> +          if (is_running_task (thread) != LK_CPU_INVAL)
> +            continue;
> +
> +          if (is_idle_task (thread) != LK_CPU_INVAL)
> +            continue;
> +
> +          int pid = lk_read_int (thread + lk_offset ("task_struct->pid"));
> +          int tid = get_task_struct_tid (thread);
> +          ptid_t ptid (inf_pid, pid, tid);
> +          new_task_struct_ptids [thread] = ptid;
> +
> +          /* Check if we created a gdb thread against
> +             this task struct address on last stop.  */
> +          if (task_struct_ptids.count (thread) > 0)
> +            {
> +              /* Check if ptid needs to be updated.  */
> +              if (task_struct_ptids [thread] != ptid)
> +                thread_change_ptid (task_struct_ptids [thread], ptid);
> +              task_struct_ptids.erase(thread);
> +            }
> +          else
> +            {
> +              /* If this task was running on last stop, try to replace
> +                 it with gdb thread that just started running.  */
> +              bool create_new_thread = true;
> +              for (auto last_cpu_task : last_cpu_task_struct_addr)
> +                if (last_cpu_task.second == thread
> +                    && !now_running_ptids.empty())
> +                  {
> +                    thread_change_ptid (now_running_ptids.back(), ptid);
> +                    last_cpu_task_struct_addr.erase(last_cpu_task.first);
> +                    now_running_ptids.pop_back();
> +                    create_new_thread = false;
> +                    break;
> +                  }
> +
> +              /* Create a new gdb thread against this kernel task,
> +                 if thread was not swapped above.  */
> +              if (create_new_thread)
> +                add_thread_with_info (ptid, NULL);
> +            }
> +        }
> +    }
> +
> +  /* Delete all gdb threads which correspond to exited kernel tasks.  */
> +  for (auto& task : task_struct_ptids)
> +    {
> +      struct thread_info *tp = find_thread_ptid (task.second);
> +      if (tp)
> +        delete_thread (tp);
> +    }
> +
> +  /* Delete all gdb threads which correspond to exited kernel tasks.  */
> +  for (auto& ptid : now_running_ptids)
> +    {
> +      struct thread_info *tp = find_thread_ptid (ptid);
> +      if (tp)
> +        delete_thread (tp);
> +    }
> +
> +  task_struct_ptids = new_task_struct_ptids;
> +  lk_threads_refresh = false;
> +  return true;
> +}

Overall I find this function extremely confusing. Can you explain in more detail
what you are doing here. For example what now_running_ptids is needed for or why
you don't add the idle threads when you walk init_task.

Please also see my comment in lk-low.h

[...]

> +/* Definition of linux_kernel_target target_ops derived class.  */
> +
> +class linux_kernel_target final : public target_ops
> +{
> +public:
> +  const target_info &info () const override
> +  { return linux_kernel_target_info; }
> +
> +  strata stratum () const override { return thread_stratum; }
> +
> +  void close () override;
> +  void mourn_inferior () override;
> +  void detach (inferior *, int) override;
> +
> +  void resume (ptid_t, int, enum gdb_signal) override;
> +  ptid_t wait (ptid_t, struct target_waitstatus *, int) override;
> +
> +  bool can_async_p () override;
> +  bool is_async_p () override;

->
bool can_async_p () override { return false; }
bool is_async_p () override { return false; }

That saves you quite some lines.

[...]

> +void
> +linux_kernel_target::mourn_inferior ()
> +{
> +  target_ops *beneath = this->beneath ();
> +
> +  delete (lk_ops);
> +  beneath->mourn_inferior ();

->
beneath ()->mourn_inferior ();

[...]

> +bool
> +linux_kernel_target::can_async_p ()
> +{
> +  return false;
> +}
> +
> +/* Implementation of linux_kernel_target->is_async_p method.  */
> +
> +bool
> +linux_kernel_target::is_async_p ()
> +{
> +  return false;
> +}

See comment above.

[...]

> diff --git a/gdb/lk-low.h b/gdb/lk-low.h
> new file mode 100644
> index 0000000..103d49f
> --- /dev/null
> +++ b/gdb/lk-low.h
> @@ -0,0 +1,354 @@

[...]

> +/* We use the following convention for PTIDs:
> +
> +   ptid->pid = inferiors PID
> +   ptid->lwp = PID from task_stuct
> +   ptid->tid = TID generated by Linux kernel target.
> +
> +   Linux kernel target generates a unique integer TID against each
> +   task_struct. These TIDs map to their corresponding task_struct
> +   addresses stored in a lk_tid_task_map.
> +
> +   We update PTIDs of running tasks to the ones generated by Linux
> +   kernel target in order to map them over their corresponding
> +   task_struct addresses. These PTIDs are reverted on target resume.  */
> +
> +/* A std::vector that can hold a list of PTIDs.  */
> +typedef std::vector <ptid_t> lk_task_ptid_list;
> +
> +/* A std::map that uses task struct address as key and PTID as value.  */
> +typedef std::map <CORE_ADDR, ptid_t> lk_task_ptid_map;
> +
> +/* A std::map that uses task struct address as key and TID as value.  */
> +typedef std::map <long, CORE_ADDR> lk_tid_task_map;
> +
> +/* A std::map that uses cpu number as key and task struct address as value.  */
> +typedef std::map<unsigned int, CORE_ADDR> lk_cpu_task_struct_map;
> +
> +/* A std::map that uses cpu numbers as key and a std::pair of PTIDs as value.  */
> +typedef std::map<unsigned int, std::pair <ptid_t, ptid_t>> lk_task_ptid_pair_map;

I find all these mapping pretty confusing. Especially I really don't
like the std::pair. They lead to constructs like

> +      return cpu_ptid.second.first;

which doesn't give you any information on what .first or .second is.
Why don't you introduce a helper?

struct lk_task_struct
{
	unsigned int cpu;
	CORE_ADDR addr;
	ptid_t kernel_ptid;
	ptid_t beneath_ptid;
	...
}

With this two maps, one for the lk-target context (via the unique TID)
and one for the target beneath context (via the beneath_ptid), should
be enough to access all information you need.

[...]

> +class linux_kernel_ops
> +{
> +public:
> +  linux_kernel_ops ()
> +  {}
> +
> +  /* Non default destructor as we need to clean-up gdb threads
> +     created by this linux_kernel_ops object.  */
> +  virtual ~linux_kernel_ops ();
> +
> +  /* Read registers from the target and supply their content to regcache.  */
> +  virtual void get_registers (CORE_ADDR task, struct regcache *regcache,
> +                              int regnum) = 0;
> +
> +  /* Return the per_cpu_offset of cpu CPU.  Default uses __per_cpu_offset
> +     array to determine the offset.  */
> +  virtual CORE_ADDR percpu_offset (unsigned int cpu);
> +
> +  /* Verify if we are stopped at a direct mapped address in kernel space.  */
> +  virtual bool is_kernel_address (CORE_ADDR addr);
> +
> +  /* Whether the cached Linux thread list needs refreshing */
> +  int lk_threads_refresh = true;

bool

[...]

> +  /* Holds Linux kernel target tids as key and
> +     corresponding task struct address as value.  */
> +  lk_tid_task_map tid_task_struct;
> +
> +  /* Maps cpu number to linux kernel and target beneath ptids.  */
> +  lk_task_ptid_pair_map cpu_ptid_pair;
> +
> +  /* Maps task_struct addresses to ptids.  */
> +  lk_task_ptid_map task_struct_ptids;
> +
> +  /* Holds cpu to current running task struct address mappings.  */
> +  lk_cpu_task_struct_map cpu_curr_task_struct_addr;
> +
> +  /* Holds cpu to current idle task struct address mappings.  */
> +  lk_cpu_task_struct_map cpu_idle_task_struct_addr;

Why are you tracking the idle task separately? Why can't they be treated
like 'normal' tasks?

For me it looks like you only need this to handle the idle task
slightly different in update_tasks. What exactly are you doing different
with it?

> +
> +  /* Update task_struct_ptids map by walking the task_structs starting from
> +     init_task.  */
> +  bool update_task_struct_ptids ();

There is no definition for this function.

Thanks
Philipp