[PATCH] aarch64: Check for valid inferior thread/regcache before reading pauth registers
Luis Machado
luis.machado@arm.com
Wed Mar 22 10:26:00 GMT 2023
On 3/20/23 20:06, Andrew Burgess wrote:
> Luis Machado <luis.machado@arm.com> writes:
>
>> On 3/16/23 15:49, Andrew Burgess wrote:
>>> Luis Machado <luis.machado@arm.com> writes:
>>>
>>>> There were reports of gdb throwing internal errors when calling
>>>> inferior_thread ()/get_current_regcache () on a system with
>>>> Pointer Authentication enabled.
>>>>
>>>> In such cases, gdb produces the following backtrace:
>>>>
>>>> ../../../repos/binutils-gdb/gdb/thread.c:86: internal-error: inferior_thread: Assertion `current_thread_ != nullptr' failed.
>>>> A problem internal to GDB has been detected,
>>>> further debugging may prove unreliable.
>>>> ----- Backtrace -----
>>>> 0xaaaae04a571f gdb_internal_backtrace_1
>>>> ../../../repos/binutils-gdb/gdb/bt-utils.c:122
>>>> 0xaaaae04a57f3 _Z22gdb_internal_backtracev
>>>> ../../../repos/binutils-gdb/gdb/bt-utils.c:168
>>>> 0xaaaae0b52ccf internal_vproblem
>>>> ../../../repos/binutils-gdb/gdb/utils.c:401
>>>> 0xaaaae0b5310b _Z15internal_verrorPKciS0_St9__va_list
>>>> ../../../repos/binutils-gdb/gdb/utils.c:481
>>>> 0xaaaae0e24b8f _Z18internal_error_locPKciS0_z
>>>> ../../../repos/binutils-gdb/gdbsupport/errors.cc:58
>>>> 0xaaaae0a88983 _Z15inferior_threadv
>>>> ../../../repos/binutils-gdb/gdb/thread.c:86
>>>> 0xaaaae0956c87 _Z20get_current_regcachev
>>>> ../../../repos/binutils-gdb/gdb/regcache.c:428
>>>> 0xaaaae035223f aarch64_remove_non_address_bits
>>>> ../../../repos/binutils-gdb/gdb/aarch64-tdep.c:3572
>>>> 0xaaaae03e8abb _Z31gdbarch_remove_non_address_bitsP7gdbarchm
>>>> ../../../repos/binutils-gdb/gdb/gdbarch.c:3109
>>>> 0xaaaae0a692d7 memory_xfer_partial
>>>> ../../../repos/binutils-gdb/gdb/target.c:1620
>>>> 0xaaaae0a695e3 _Z19target_xfer_partialP10target_ops13target_objectPKcPhPKhmmPm
>>>> ../../../repos/binutils-gdb/gdb/target.c:1684
>>>> 0xaaaae0a69e9f target_read_partial
>>>> ../../../repos/binutils-gdb/gdb/target.c:1937
>>>> 0xaaaae0a69fdf _Z11target_readP10target_ops13target_objectPKcPhml
>>>> ../../../repos/binutils-gdb/gdb/target.c:1977
>>>> 0xaaaae0a69937 _Z18target_read_memorymPhl
>>>> ../../../repos/binutils-gdb/gdb/target.c:1773
>>>> 0xaaaae08be523 ps_xfer_memory
>>>> ../../../repos/binutils-gdb/gdb/proc-service.c:90
>>>> 0xaaaae08be6db ps_pdread
>>>> ../../../repos/binutils-gdb/gdb/proc-service.c:124
>>>> 0x40001ed7c3b3 _td_fetch_value
>>>> /build/glibc-RIFKjK/glibc-2.31/nptl_db/fetch-value.c:115
>>>> 0x40001ed791ef td_ta_map_lwp2thr
>>>> /build/glibc-RIFKjK/glibc-2.31/nptl_db/td_ta_map_lwp2thr.c:194
>>>> 0xaaaae07f4473 thread_from_lwp
>>>> ../../../repos/binutils-gdb/gdb/linux-thread-db.c:413
>>>> 0xaaaae07f6d6f _ZN16thread_db_target4waitE6ptid_tP17target_waitstatus10enum_flagsI16target_wait_flagE
>>>> ../../../repos/binutils-gdb/gdb/linux-thread-db.c:1420
>>>> 0xaaaae0a6b33b _Z11target_wait6ptid_tP17target_waitstatus10enum_flagsI16target_wait_flagE
>>>> ../../../repos/binutils-gdb/gdb/target.c:2586
>>>> 0xaaaae0789cf7 do_target_wait_1
>>>> ../../../repos/binutils-gdb/gdb/infrun.c:3825
>>>> 0xaaaae0789e6f operator()
>>>> ../../../repos/binutils-gdb/gdb/infrun.c:3884
>>>> 0xaaaae078a167 do_target_wait
>>>> ../../../repos/binutils-gdb/gdb/infrun.c:3903
>>>> 0xaaaae078b0af _Z20fetch_inferior_eventv
>>>> ../../../repos/binutils-gdb/gdb/infrun.c:4314
>>>> 0xaaaae076652f _Z22inferior_event_handler19inferior_event_type
>>>> ../../../repos/binutils-gdb/gdb/inf-loop.c:41
>>>> 0xaaaae07dc68b handle_target_event
>>>> ../../../repos/binutils-gdb/gdb/linux-nat.c:4206
>>>> 0xaaaae0e25fbb handle_file_event
>>>> ../../../repos/binutils-gdb/gdbsupport/event-loop.cc:573
>>>> 0xaaaae0e264f3 gdb_wait_for_event
>>>> ../../../repos/binutils-gdb/gdbsupport/event-loop.cc:694
>>>> 0xaaaae0e24f9b _Z16gdb_do_one_eventi
>>>> ../../../repos/binutils-gdb/gdbsupport/event-loop.cc:217
>>>> 0xaaaae080f033 start_event_loop
>>>> ../../../repos/binutils-gdb/gdb/main.c:411
>>>> 0xaaaae080f1b7 captured_command_loop
>>>> ../../../repos/binutils-gdb/gdb/main.c:475
>>>> 0xaaaae0810b97 captured_main
>>>> ../../../repos/binutils-gdb/gdb/main.c:1318
>>>> 0xaaaae0810c1b _Z8gdb_mainP18captured_main_args
>>>> ../../../repos/binutils-gdb/gdb/main.c:1337
>>>> 0xaaaae0338453 main
>>>> ../../../repos/binutils-gdb/gdb/gdb.c:32
>>>> ---------------------
>>>> ../../../repos/binutils-gdb/gdb/thread.c:86: internal-error: inferior_thread: Assertion `current_thread_ != nullptr' failed.
>>>> A problem internal to GDB has been detected,
>>>> further debugging may prove unreliable.
>>>> Quit this debugging session? (y or n)
>>>>
>>>> This issue started after commit d88cb738e6a7a7179dfaff8af78d69250c852af1, which
>>>> enabled more broad use of pointer authentication masks to remove non-address
>>>> bits of pointers.
>>>>
>>>> The above crash happens because gdb is in the middle of handling an event,
>>>> and do_target_wait_1 calls switch_to_inferior_no_thread, nullifying the
>>>> current thread. This means a call to inferior_thread () will assert, and
>>>> attempting to call get_current_regcache () will also call inferior_thread (),
>>>> resulting in an assertion as well.
>>>>
>>>> There is another crash scenario after we kill a previously active inferior, in
>>>> which case the gdbarch will still say we support pointer authentication but we
>>>> will also have no current thread (inferior_thread () will assert etc).
>>>>
>>>> I thought about this for a bit, and I'm not sure what is the best way
>>>> to address this other than actively checking for a valid current thread
>>>> and valid current register cache.
>>>>
>>>> If the target has support for Pointer Authentication, gdb needs to use
>>>> a couple (or 4, for bare-metal) mask registers to mask off some bits of
>>>> pointers, and for that it needs to access the registers.
>>>>
>>>> At some points, like the one from the backtrace above, there is no active
>>>> thread/current regcache because gdb is in the middle of doing event handling
>>>> and switching between threads.
>>>>
>>>> The following patch improves this situation by checking for an active thread
>>>> and register cache. If those don't exist, gdb falls back to using a default
>>>> mask which should be enough for what it is trying to do at the moment.
>>>>
>>>> I discussed with Pedro the possibility of caching the mask register values
>>>> (which are per-process and can change mid-execution), but there isn't a good
>>>> spot to cache those values. Besides, the mask registers can change constantly
>>>> for bare-metal debugging when switching between exception levels.
>>>>
>>>> In some cases, it is just not possible to get access to these mask registers,
>>>> like the case where threads are running. In those cases, using a default mask
>>>> to remove the non-address bits should be enough.
>>>>
>>>> In addition to the check for a valid current register cache, I also introduced
>>>> a try/catch guard against errors reading registers, in case the current thread
>>>> is running, also falling back to using a default mask to remove non-address
>>>> bits.
>>>>
>>>> This can happen when we let threads run in the background and then we attempt
>>>> to access a memory address (now that gdb is capable of reading memory even
>>>> with threads running). Thus gdb will attempt to remove non-address bits
>>>> of that memory access, will attempt to access registers, running into errors.
>>>>
>>>> Regression-tested on aarch64-linux Ubuntu 20.04.
>>>>
>>>> Thoughts?
>>>> ---
>>>> gdb/aarch64-tdep.c | 38 +++++++++++++++++++++++++++++---------
>>>> gdb/gdbthread.h | 8 ++++++++
>>>> gdb/regcache.c | 10 ++++++++++
>>>> gdb/regcache.h | 8 ++++++++
>>>> gdb/thread.c | 8 ++++++++
>>>
>>> Would it be possible to write a test for this issue? Or maybe some of
>>> the existing tests fail due to this issue? If there are some tests that
>>> already expose this issue then it would be great to mention them in the
>>> commit message.
>>>
>>
>> Existing tests fail due to this. Two example are gdb.base/break.exp and gdb.base/access-mem-running.exp are
>> two examples. I'll add those to the commit message for context.
>>
>> The catch here is that they will only fail on a system that supports pointer authentication natively, and
>> there are only a few of those at the moment (Graviton 3, M2 and system
>> QEMU are 3 examples I know of).
>
> I don't think that's a problem, you should just list a few tests that
> exhibit failures and under what circumstances you expect to see the
> failures, e.g. on a system with pointer authentication.
>
Done now. It will be in v2.
>>
>>>> 5 files changed, 63 insertions(+), 9 deletions(-)
>>>>
>>>> diff --git a/gdb/aarch64-tdep.c b/gdb/aarch64-tdep.c
>>>> index 5b1b9921f87..8368592a7db 100644
>>>> --- a/gdb/aarch64-tdep.c
>>>> +++ b/gdb/aarch64-tdep.c
>>>> @@ -3562,13 +3562,17 @@ aarch64_remove_non_address_bits (struct gdbarch *gdbarch, CORE_ADDR pointer)
>>>> select bit (55). */
>>>> CORE_ADDR mask = AARCH64_TOP_BITS_MASK;
>>>>
>>>> - if (tdep->has_pauth ())
>>>> + /* Make sure we have a valid active current register cache to fetch pointer
>>>> + authentication masks from. In some situations, there may be no active
>>>> + threads to fetch the data from (no inferiors, gdb in the middle of
>>>> + handling events etc). If we don't have a valid register cache, fallback
>>>> + to using a default mask to remove non-address bits. */
>>>> + if (tdep->has_pauth () && current_regcache_exists ())
>>>
>>> Maybe I'm completely off the mark here, but does target_has_registers
>>> not help at all here?
>>
>> It does, and that was the first thing I considered for guarding this block against a situation where we
>> have no current thread.
>>
>> But...
>>
>>>
>>> For a process_stratum_target, this should end up in this function:
>>>
>>> bool
>>> process_stratum_target::has_registers ()
>>> {
>>> /* Can't read registers from no inferior. */
>>> return inferior_ptid != null_ptid;
>>> }
>>>
>>> And given that switch_to_no_thread () includes the line:
>>>
>>> inferior_ptid = null_ptid;
>>>
>>
>> ... despite the call to switch_to_no_thread in switch_to_inferior_no_thread from do_target_wait_1
>> in the backtrace above, the call to ps_xfer_memory sets inferior_ptid momentarily before reading
>> memory:
>>
>>
>> static ps_err_e
>> ps_xfer_memory (const struct ps_prochandle *ph, psaddr_t addr,
>> gdb_byte *buf, size_t len, int write)
>> {
>> scoped_restore_current_inferior restore_inferior;
>> set_current_inferior (ph->thread->inf);
>>
>> scoped_restore_current_program_space restore_current_progspace;
>> set_current_program_space (ph->thread->inf->pspace);
>>
>> scoped_restore save_inferior_ptid = make_scoped_restore (&inferior_ptid);
>> inferior_ptid = ph->thread->ptid;
>>
>> CORE_ADDR core_addr = ps_addr_to_core_addr (addr);
>>
>> int ret;
>> if (write)
>> ret = target_write_memory (core_addr, buf, len);
>> else
>> ret = target_read_memory (core_addr, buf, len);
>> return (ret == 0 ? PS_OK : PS_ERR);
>> }
>>
>>
>> Maybe this shouldn't happen, or maybe it is just an unfortunate state to be in. But this
>> prevents the use of target_has_registers to guard against the lack of registers, since,
>> although current_thread_ is still nullptr, inferior_ptid is valid and is not null_ptid.
>>
>> So doing a direct check of current_thread_ through a helper function
>> seemed safer.
>
> Personally, I find this sort of information really valuable to include
> in the commit message.
>
> If someone is digging through this code years from now, all they'll have
> is your commit message. They, like me, might think about
> target_has_registers, and then they have to change GDB, test everything,
> and rediscover why target_has_registers isn't going to work.
>
You're right. I'll add that to v2.
>>
>> And just to clarify, this is just one of the issues where we attempt to fetch a current
>> register cache with a null current_thread_, leading to an assertion.
>>
>> The other issue, which is solved with a try/catch block, yields an error message about not
>> being able to access registers (because the thread we want to fetch registers from is still
>> running).
>>
>> Maybe in the future we should prevent calls to register fetch/store functions if the thread we
>> want to fetch data from is still running. Right now this isn't a problem besides the AArch64
>> target though.
>
> I think thread_info::executing() should tell you if the thread is
> running... At least, I think that's the intention. I don't think that
> field is always correct, so keeping the try/catch is probably a good
> idea, at least for now.
>
> I don't object to this patch going in as is then (maybe with an extended
> commit message), but I guess you should figure out with Simon if his
> idea is better or not...
Thanks for the feedback. I'll wait for Simon's take on it.
>
> Thanks for all the details,
> Andrew
>
>
>>
>>> My hope would be that when we have no thread selected,
>>> target_has_registers will return false.
>>>
>>> The other thing I worried about before suggesting the above is this
>>> depends on the current_inferior in order to identify a target stack.
>>> But reading registers already makes that assumption, right.. reading a
>>> register might require us to actually go get the registers, so we are
>>> already assuming that the current_inferior() is sane at this point.
>>>
>>> Thanks,
>>> Andrew
>>>
>>>
>>>
>>>
>>>> {
>>>> /* Fetch the PAC masks. These masks are per-process, so we can just
>>>> - fetch data from whatever thread we have at the moment.
>>>> + fetch data from whatever thread we have at the moment, hence why we
>>>> + use the current register cache. */
>>>>
>>>> - Also, we have both a code mask and a data mask. For now they are the
>>>> - same, but this may change in the future. */
>>>> struct regcache *regs = get_current_regcache ();
>>>> CORE_ADDR cmask, dmask;
>>>> int dmask_regnum = AARCH64_PAUTH_DMASK_REGNUM (tdep->pauth_reg_base);
>>>> @@ -3583,13 +3587,29 @@ aarch64_remove_non_address_bits (struct gdbarch *gdbarch, CORE_ADDR pointer)
>>>> cmask_regnum = AARCH64_PAUTH_CMASK_HIGH_REGNUM (tdep->pauth_reg_base);
>>>> }
>>>>
>>>> - if (regs->cooked_read (dmask_regnum, &dmask) != REG_VALID)
>>>> - dmask = mask;
>>>> + /* GDB supports reading memory from threads that are running, but in
>>>> + those cases GDB cannot access registers. If reading the pointer
>>>> + authentication registers fails, fallback to using a default mask.
>>>> +
>>>> + We could potentially get it wrong if the masks have been updated
>>>> + while the thread is running, but eventually we will see the thread
>>>> + stop and will be able to fetch the correct data. */
>>>> + try
>>>> + {
>>>> + /* We have both a code mask and a data mask. For now they are
>>>> + the same, but this may change in the future. */
>>>> + if (regs->cooked_read (dmask_regnum, &dmask) != REG_VALID)
>>>> + dmask = mask;
>>>>
>>>> - if (regs->cooked_read (cmask_regnum, &cmask) != REG_VALID)
>>>> - cmask = mask;
>>>> + if (regs->cooked_read (cmask_regnum, &cmask) != REG_VALID)
>>>> + cmask = mask;
>>>>
>>>> - mask |= aarch64_mask_from_pac_registers (cmask, dmask);
>>>> + mask |= aarch64_mask_from_pac_registers (cmask, dmask);
>>>> + }
>>>> + catch (const gdb_exception_error &e)
>>>> + {
>>>> + /* We failed to fetch the contents of the masks from registers. */
>>>> + }
>>>> }
>>>>
>>>> return aarch64_remove_top_bits (pointer, mask);
>>>> diff --git a/gdb/gdbthread.h b/gdb/gdbthread.h
>>>> index c0f27a8a66e..ebb8f140770 100644
>>>> --- a/gdb/gdbthread.h
>>>> +++ b/gdb/gdbthread.h
>>>> @@ -875,6 +875,14 @@ class scoped_restore_current_thread
>>>> enum language m_lang;
>>>> };
>>>>
>>>> +/* Return TRUE if an inferior thread exists and is currently selected,
>>>> + meaning it is safe to call inferior_thread () without risking an assertion.
>>>> +
>>>> + Return FALSE otherwise. If so, inferior_thread () will assert if
>>>> + called. */
>>>> +
>>>> +extern bool inferior_thread_exists (void);
>>>> +
>>>> /* Returns a pointer into the thread_info corresponding to
>>>> INFERIOR_PTID. INFERIOR_PTID *must* be in the thread list. */
>>>> extern struct thread_info* inferior_thread (void);
>>>> diff --git a/gdb/regcache.c b/gdb/regcache.c
>>>> index af76fab1a34..4685b01e9ab 100644
>>>> --- a/gdb/regcache.c
>>>> +++ b/gdb/regcache.c
>>>> @@ -422,6 +422,16 @@ get_thread_regcache (thread_info *thread)
>>>> thread->ptid);
>>>> }
>>>>
>>>> +/* See regcache.h. */
>>>> +
>>>> +bool
>>>> +current_regcache_exists (void)
>>>> +{
>>>> + /* If we have an inferior thread selected, we should have a register
>>>> + cache as well. */
>>>> + return inferior_thread_exists ();
>>>> +}
>>>> +
>>>> struct regcache *
>>>> get_current_regcache (void)
>>>> {
>>>> diff --git a/gdb/regcache.h b/gdb/regcache.h
>>>> index b9ffab9950d..adff7e44cb9 100644
>>>> --- a/gdb/regcache.h
>>>> +++ b/gdb/regcache.h
>>>> @@ -30,6 +30,14 @@ struct address_space;
>>>> class thread_info;
>>>> struct process_stratum_target;
>>>>
>>>> +/* Return TRUE if we can access the register cache from the
>>>> + currently-selected thread. If so, we can call get_current_regcache ()
>>>> + safely without risking an assertion due to the lack of an inferior thread.
>>>> +
>>>> + Return FALSE otherwise. In this case, attempting to call
>>>> + get_current_regcache () will run into an assertion. */
>>>> +
>>>> +extern bool current_regcache_exists (void);
>>>> extern struct regcache *get_current_regcache (void);
>>>> extern struct regcache *get_thread_regcache (process_stratum_target *target,
>>>> ptid_t ptid);
>>>> diff --git a/gdb/thread.c b/gdb/thread.c
>>>> index e4d98ce966a..545bbf2a411 100644
>>>> --- a/gdb/thread.c
>>>> +++ b/gdb/thread.c
>>>> @@ -80,6 +80,14 @@ is_current_thread (const thread_info *thr)
>>>> return thr == current_thread_;
>>>> }
>>>>
>>>> +/* See gdbthread.h. */
>>>> +
>>>> +bool
>>>> +inferior_thread_exists ()
>>>> +{
>>>> + return current_thread_ != nullptr;
>>>> +}
>>>> +
>>>> struct thread_info*
>>>> inferior_thread (void)
>>>> {
>>>> --
>>>> 2.25.1
>>>
>
More information about the Gdb-patches
mailing list