[RFC PATCH 1/1] sframe: Represent FP without RA on stack
Indu Bhagat
indu.bhagat@oracle.com
Mon Apr 22 23:58:00 GMT 2024
On 4/22/24 08:58, Jens Remus wrote:
> If an architecture uses both SFrame RA and FP tracking SFrame assumes
> that the RA offset is the 2nd offset and the FP offset is the 3rd offset
> following the SFrame FRE. An architecture does not need to store both on
> the stack. SFrame cannot represent a FP without RA on stack, since it
> cannot distinguish whether the 2nd offset is the RA or FP offset.
>
> Use an invalid SFrame FRE RA offset value of zero as dummy padding to
> represent the FP being saved on the stack when the RA is not saved on
> the stack.
>
> include/
> * sframe.h (SFRAME_FRE_RA_OFFSET_INVALID): New macro defining
> the invalid RA offset value used to represent a dummy padding
> offset.
>
> gas/
> * gen-sframe.c (get_fre_num_offsets): Accommodate for dummy
> padding RA offset if FP without RA on stack.
> (sframe_get_fre_offset_size): Likewise.
> (output_sframe_row_entry): Write a dummy padding RA offset
> if FP without RA needs to be represented.
>
> libsframe/
> * sframe-dump.c (dump_sframe_func_with_fres): Treat invalid RA
> offsets as if they were undefined. Display them as "u*" to
> distinguish them.
>
> Signed-off-by: Jens Remus <jremus@linux.ibm.com>
> ---
>
> Notes (jremus):
> This patch eliminates 497 occurrences of the warning "skipping SFrame
> FDE due to FP without RA on stack" for a build of glibc on s390x. For
> libc.so this increases the number of FDEs by 166 and the number of
> FREs by 861, while adding 337 dummy padding RA offsets. With a total
> of 28157 offsets the dummy padding offsets account for ~1.20 % of the
> offsets.
While this increase seems small, it does look wasteful.
An orthogonal question below...
>
> SFrame statistics without patch:
>
> VALUE TOTAL MIN MAX AVG
> FDEs: 3478 - - -
> FREs/FDE: 14441 1 15 4
> Offsets/FDE: 28157 1 31 8
> 8-bit: 0 0 0 0
> 16-bit: 28157 1 31 8
> 32-bit: 0 0 0 0
> Offsets/FRE: 28157 1 3 1
> 8-bit: - 0 0 0
> 16-bit: - 1 3 1
> 32-bit: - 0 0 0
>
> SFrame statistics with patch applied:
>
> VALUE TOTAL MIN MAX AVG
> FDEs: 3644 - - -
> FREs/FDE: 15302 1 20 4
> Offsets/FDE: 29944 1 38 8
> 8-bit: 0 0 0 0
> 16-bit: 29944 1 38 8
> 32-bit: 0 0 0 0
> Offsets/FRE: 29944 1 3 1
> 8-bit: - 0 0 0
> 16-bit: - 1 3 1
> 32-bit: - 0 0 0
> O_Padd/FDE: 337 - - 0
> 8-bit: 0
> 16-bit: 337
> 32-bit: 0
>
> Note that on s390x the offsets are at minimum 16-bits in size, due to
> the mandatory CFA offset being at least 160.
>
IIUC, all stack layouts supported in the ABI use the offset 160. Is that
right ? I am wondering if adjusting the stored offsets in the SFrame
section (by decrementing 160 from it) will work ?
If yes, we could encode this constant in SFrame aux hdr bytes for s390x.
> gas/gen-sframe.c | 50 +++++++++++++++++++----------------------
> include/sframe.h | 9 ++++++--
> libsframe/sframe-dump.c | 4 ++++
> 3 files changed, 34 insertions(+), 29 deletions(-)
>
> diff --git a/gas/gen-sframe.c b/gas/gen-sframe.c
> index 4cc86eb6c815..990b08d87953 100644
> --- a/gas/gen-sframe.c
> +++ b/gas/gen-sframe.c
> @@ -347,7 +347,9 @@ get_fre_num_offsets (struct sframe_row_entry *sframe_fre)
> fre_num_offsets++;
> #ifdef SFRAME_FRE_RA_TRACKING
> if (sframe_ra_tracking_p ()
> - && sframe_fre->ra_loc == SFRAME_FRE_ELEM_LOC_STACK)
> + && (sframe_fre->ra_loc == SFRAME_FRE_ELEM_LOC_STACK
> + /* Accommodate for padding RA offset if FP without RA on stack. */
> + || sframe_fre->bp_loc == SFRAME_FRE_ELEM_LOC_STACK))
> fre_num_offsets++;
> #endif
> return fre_num_offsets;
> @@ -371,9 +373,14 @@ sframe_get_fre_offset_size (struct sframe_row_entry *sframe_fre)
> if (sframe_fre->bp_loc == SFRAME_FRE_ELEM_LOC_STACK)
> bp_offset_size = get_offset_size_in_bytes (sframe_fre->bp_offset);
> #ifdef SFRAME_FRE_RA_TRACKING
> - if (sframe_ra_tracking_p ()
> - && sframe_fre->ra_loc == SFRAME_FRE_ELEM_LOC_STACK)
> - ra_offset_size = get_offset_size_in_bytes (sframe_fre->ra_offset);
> + if (sframe_ra_tracking_p ())
> + {
> + if (sframe_fre->ra_loc == SFRAME_FRE_ELEM_LOC_STACK)
> + ra_offset_size = get_offset_size_in_bytes (sframe_fre->ra_offset);
> + /* Accommodate for padding RA offset if FP without RA on stack. */
> + else if (sframe_fre->bp_loc == SFRAME_FRE_ELEM_LOC_STACK)
> + ra_offset_size = get_offset_size_in_bytes (SFRAME_FRE_RA_OFFSET_INVALID);
> + }
> #endif
>
> /* Get the maximum size needed to represent the offsets. */
> @@ -537,11 +544,19 @@ output_sframe_row_entry (symbolS *fde_start_addr,
> fre_write_offsets++;
>
> #ifdef SFRAME_FRE_RA_TRACKING
> - if (sframe_ra_tracking_p ()
> - && sframe_fre->ra_loc == SFRAME_FRE_ELEM_LOC_STACK)
> + if (sframe_ra_tracking_p ())
> {
> - fre_offset_func_map[idx].out_func (sframe_fre->ra_offset);
> - fre_write_offsets++;
> + if (sframe_fre->ra_loc == SFRAME_FRE_ELEM_LOC_STACK)
> + {
> + fre_offset_func_map[idx].out_func (sframe_fre->ra_offset);
> + fre_write_offsets++;
> + }
> + /* Write padding RA offset if FP without RA on stack. */
> + else if (sframe_fre->bp_loc == SFRAME_FRE_ELEM_LOC_STACK)
> + {
> + fre_offset_func_map[idx].out_func (SFRAME_FRE_RA_OFFSET_INVALID);
> + fre_write_offsets++;
> + }
> }
> #endif
> if (sframe_fre->bp_loc == SFRAME_FRE_ELEM_LOC_STACK)
> @@ -1497,25 +1512,6 @@ sframe_do_fde (struct sframe_xlate_ctx *xlate_ctx,
> = get_dw_fde_end_addrS (xlate_ctx->dw_fde);
> }
>
> -#ifdef SFRAME_FRE_RA_TRACKING
> - if (sframe_ra_tracking_p ())
> - {
> - struct sframe_row_entry *fre;
> -
> - /* Iterate over the scratchpad FREs and validate them. */
> - for (fre = xlate_ctx->first_fre; fre; fre = fre->next)
> - {
> - /* SFrame format cannot represent FP on stack without RA on stack. */
> - if (fre->ra_loc != SFRAME_FRE_ELEM_LOC_STACK
> - && fre->bp_loc == SFRAME_FRE_ELEM_LOC_STACK)
> - {
> - as_warn (_("skipping SFrame FDE due to FP without RA on stack"));
> - return SFRAME_XLATE_ERR_NOTREPRESENTED;
> - }
> - }
> - }
> -#endif /* SFRAME_FRE_RA_TRACKING */
> -
> return SFRAME_XLATE_OK;
> }
>
> diff --git a/include/sframe.h b/include/sframe.h
> index 90bc92a32f84..d1a26875b3e2 100644
> --- a/include/sframe.h
> +++ b/include/sframe.h
> @@ -237,6 +237,9 @@ typedef struct sframe_func_desc_entry
> may or may not be tracked. */
> #define SFRAME_FRE_FP_OFFSET_IDX 2
>
> +/* Invalid RA offset. Used as padding to represent FP without RA on stack. */
> +#define SFRAME_FRE_RA_OFFSET_INVALID 0
> +
> typedef struct sframe_fre_info
> {
> /* Information about
> @@ -288,9 +291,11 @@ typedef struct sframe_fre_info
> offset1 (interpreted as CFA = BASE_REG + offset1)
>
> if RA is being tracked
> - offset2 (interpreted as RA = CFA + offset2)
> + offset2 (interpreted as RA = CFA + offset2; an offset value of
> + SFRAME_FRE_RA_OFFSET_INVALID indicates a dummy padding RA offset
> + to represent FP without RA saved on stack)
> if FP is being tracked
> - offset3 (intrepreted as FP = CFA + offset2)
> + offset3 (intrepreted as FP = CFA + offset3)
I too noticed this typo recently and have a patch fixing this.
> fi
> else
> if FP is being tracked
> diff --git a/libsframe/sframe-dump.c b/libsframe/sframe-dump.c
> index 40ea531314ba..3ea4bc327efd 100644
> --- a/libsframe/sframe-dump.c
> +++ b/libsframe/sframe-dump.c
> @@ -199,6 +199,10 @@ dump_sframe_func_with_fres (sframe_decoder_ctx *sfd_ctx,
> if (sframe_decoder_get_fixed_ra_offset (sfd_ctx)
> != SFRAME_CFA_FIXED_RA_INVALID)
> strcpy (temp, "f");
> + /* If an ABI does track RA offset, e.g. AArch64 and S390, it can be a
> + dummy as padding to represent FP without RA being saved on stack. */
> + else if (err[2] == 0 && ra_offset == SFRAME_FRE_RA_OFFSET_INVALID)
> + sprintf (temp, "u*");
> else if (err[2] == 0)
> {
> if (is_sframe_abi_arch_s390 (sfd_ctx) && (ra_offset & 1))
More information about the Binutils
mailing list