This is the mail archive of the
libffi-discuss@sourceware.org
mailing list for the libffi project.
[PATCH 09/16] aarch64: Merge prep_args with ffi_call
- From: Richard Henderson <rth at twiddle dot net>
- To: libffi-discuss at sourceware dot org
- Cc: Richard Henderson <rth at redhat dot com>
- Date: Tue, 28 Oct 2014 11:53:06 -0700
- Subject: [PATCH 09/16] aarch64: Merge prep_args with ffi_call
- Authentication-results: sourceware.org; auth=none
- References: <1414522393-19169-1-git-send-email-rth at twiddle dot net>
From: Richard Henderson <rth@redhat.com>
Use the trick to allocate the stack frame for ffi_call_SYSV
within ffi_call itself.
---
src/aarch64/ffi.c | 193 ++++++++++++++++++++++++-----------------------------
src/aarch64/sysv.S | 192 ++++++++++++++++------------------------------------
2 files changed, 144 insertions(+), 241 deletions(-)
diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c
index d19384b..a067303 100644
--- a/src/aarch64/ffi.c
+++ b/src/aarch64/ffi.c
@@ -72,14 +72,6 @@ ffi_clear_cache (void *start, void *end)
}
extern void
-ffi_call_SYSV (unsigned (*)(struct call_context *context, unsigned char *,
- extended_cif *),
- struct call_context *context,
- extended_cif *,
- size_t,
- void (*fn)(void));
-
-extern void
ffi_closure_SYSV (ffi_closure *);
/* Test for an FFI floating point representation. */
@@ -311,12 +303,11 @@ struct arg_state
/* Initialize a procedure call argument marshalling state. */
static void
-arg_init (struct arg_state *state, size_t call_frame_size)
+arg_init (struct arg_state *state)
{
state->ngrn = 0;
state->nsrn = 0;
state->nsaa = 0;
-
#if defined (__APPLE__)
state->allocating_variadic = 0;
#endif
@@ -529,27 +520,88 @@ allocate_int_to_reg_or_stack (struct call_context *context,
return allocate_to_stack (state, stack, size, size);
}
-/* Marshall the arguments from FFI representation to procedure call
- context and stack. */
+ffi_status
+ffi_prep_cif_machdep (ffi_cif *cif)
+{
+ /* Round the stack up to a multiple of the stack alignment requirement. */
+ cif->bytes = ALIGN(cif->bytes, 16);
-static unsigned
-aarch64_prep_args (struct call_context *context, unsigned char *stack,
- extended_cif *ecif)
+ /* Initialize our flags. We are interested if this CIF will touch a
+ vector register, if so we will enable context save and load to
+ those registers, otherwise not. This is intended to be friendly
+ to lazy float context switching in the kernel. */
+ cif->aarch64_flags = 0;
+
+ if (is_v_register_candidate (cif->rtype))
+ {
+ cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
+ }
+ else
+ {
+ int i;
+ for (i = 0; i < cif->nargs; i++)
+ if (is_v_register_candidate (cif->arg_types[i]))
+ {
+ cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
+ break;
+ }
+ }
+
+#if defined (__APPLE__)
+ cif->aarch64_nfixedargs = 0;
+#endif
+
+ return FFI_OK;
+}
+
+#if defined (__APPLE__)
+
+/* Perform Apple-specific cif processing for variadic calls */
+ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif,
+ unsigned int nfixedargs,
+ unsigned int ntotalargs)
{
- ffi_cif *cif = ecif->cif;
- void **avalue = ecif->avalue;
- int i, nargs = cif->nargs;
+ ffi_status status;
+
+ status = ffi_prep_cif_machdep (cif);
+
+ cif->aarch64_nfixedargs = nfixedargs;
+
+ return status;
+}
+
+#endif
+
+extern void ffi_call_SYSV (void *stack, void *frame,
+ void (*fn)(void), int flags) FFI_HIDDEN;
+
+/* Call a function with the provided arguments and capture the return
+ value. */
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+ struct call_context *context;
+ void *stack, *frame;
struct arg_state state;
+ size_t stack_bytes;
+ int i, nargs = cif->nargs;
+ int h, t;
+ ffi_type *rtype;
- arg_init (&state, cif->bytes);
+ /* Allocate consectutive stack for everything we'll need. */
+ stack_bytes = cif->bytes;
+ stack = alloca (stack_bytes + 32 + sizeof(struct call_context));
+ frame = stack + stack_bytes;
+ context = frame + 32;
+ arg_init (&state);
for (i = 0; i < nargs; i++)
{
ffi_type *ty = cif->arg_types[i];
size_t s = ty->size;
- int h, t = ty->type;
void *a = avalue[i];
+ t = ty->type;
switch (t)
{
case FFI_TYPE_VOID:
@@ -665,83 +717,12 @@ aarch64_prep_args (struct call_context *context, unsigned char *stack,
#endif
}
- return cif->aarch64_flags;
-}
-
-ffi_status
-ffi_prep_cif_machdep (ffi_cif *cif)
-{
- /* Round the stack up to a multiple of the stack alignment requirement. */
- cif->bytes = ALIGN(cif->bytes, 16);
-
- /* Initialize our flags. We are interested if this CIF will touch a
- vector register, if so we will enable context save and load to
- those registers, otherwise not. This is intended to be friendly
- to lazy float context switching in the kernel. */
- cif->aarch64_flags = 0;
-
- if (is_v_register_candidate (cif->rtype))
- {
- cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
- }
- else
- {
- int i;
- for (i = 0; i < cif->nargs; i++)
- if (is_v_register_candidate (cif->arg_types[i]))
- {
- cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
- break;
- }
- }
-
-#if defined (__APPLE__)
- cif->aarch64_nfixedargs = 0;
-#endif
-
- return FFI_OK;
-}
-
-#if defined (__APPLE__)
-
-/* Perform Apple-specific cif processing for variadic calls */
-ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif,
- unsigned int nfixedargs,
- unsigned int ntotalargs)
-{
- ffi_status status;
-
- status = ffi_prep_cif_machdep (cif);
-
- cif->aarch64_nfixedargs = nfixedargs;
-
- return status;
-}
-
-#endif
-
-/* Call a function with the provided arguments and capture the return
- value. */
-void
-ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
-{
- extended_cif ecif;
- struct call_context context;
- size_t stack_bytes;
- int h, t;
-
- ecif.cif = cif;
- ecif.avalue = avalue;
- ecif.rvalue = rvalue;
-
- stack_bytes = cif->bytes;
-
- memset (&context, 0, sizeof (context));
- if (is_register_candidate (cif->rtype))
+ rtype = cif->rtype;
+ if (is_register_candidate (rtype))
{
- ffi_call_SYSV (aarch64_prep_args, &context, &ecif, stack_bytes, fn);
+ ffi_call_SYSV (stack, frame, fn, cif->aarch64_flags);
- t = cif->rtype->type;
+ t = rtype->type;
switch (t)
{
case FFI_TYPE_INT:
@@ -754,33 +735,35 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
case FFI_TYPE_POINTER:
case FFI_TYPE_UINT64:
case FFI_TYPE_SINT64:
- *(ffi_arg *)rvalue = extend_integer_type (&context.x[0], t);
+ *(ffi_arg *)rvalue = extend_integer_type (&context->x[0], t);
break;
case FFI_TYPE_FLOAT:
case FFI_TYPE_DOUBLE:
case FFI_TYPE_LONGDOUBLE:
- compress_hfa_type (rvalue, &context.v[0], 0x100 + t);
+ compress_hfa_type (rvalue, &context->v[0], 0x100 + t);
break;
case FFI_TYPE_STRUCT:
h = is_hfa (cif->rtype);
if (h)
- compress_hfa_type (rvalue, &context.v[0], h);
- else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG)
- memcpy (rvalue, &context.x[0], cif->rtype->size);
+ compress_hfa_type (rvalue, &context->v[0], h);
else
- abort();
+ {
+ FFI_ASSERT (rtype->size <= 16);
+ memcpy (rvalue, &context->x[0], rtype->size);
+ }
break;
default:
- abort();
+ FFI_ASSERT (0);
+ break;
}
}
else
{
- context.x8 = (uintptr_t)rvalue;
- ffi_call_SYSV (aarch64_prep_args, &context, &ecif, stack_bytes, fn);
+ context->x8 = (uintptr_t)rvalue;
+ ffi_call_SYSV (stack, frame, fn, cif->aarch64_flags);
}
}
@@ -851,7 +834,7 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
struct arg_state state;
ffi_type *rtype;
- arg_init (&state, ALIGN(cif->bytes, 16));
+ arg_init (&state);
for (i = 0; i < nargs; i++)
{
diff --git a/src/aarch64/sysv.S b/src/aarch64/sysv.S
index fa7ff5b..a5f636a 100644
--- a/src/aarch64/sysv.S
+++ b/src/aarch64/sysv.S
@@ -22,6 +22,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#define LIBFFI_ASM
#include <fficonfig.h>
#include <ffi.h>
+#include <ffi_cfi.h>
#include "internal.h"
#ifdef HAVE_MACHINE_ASM_H
@@ -38,158 +39,77 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#endif
#endif
-#define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
-#define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
-#define cfi_restore(reg) .cfi_restore reg
-#define cfi_def_cfa_register(reg) .cfi_def_cfa_register reg
+ .text
+ .align 2
- .text
- .align 2
-
- .globl CNAME(ffi_call_SYSV)
+ .globl CNAME(ffi_call_SYSV)
#ifdef __ELF__
- .type CNAME(ffi_call_SYSV), #function
+ .type CNAME(ffi_call_SYSV), #function
+ .hidden CNAME(ffi_call_SYSV)
#endif
-/* ffi_call_SYSV()
-
- Create a stack frame, setup an argument context, call the callee
- and extract the result.
-
- The maximum required argument stack size is provided,
- ffi_call_SYSV() allocates that stack space then calls the
- prepare_fn to populate register context and stack. The
- argument passing registers are loaded from the register
- context and the callee called, on return the register passing
- register are saved back to the context. Our caller will
- extract the return value from the final state of the saved
- register context.
-
- Prototype:
-
- extern unsigned
- ffi_call_SYSV (void (*)(struct call_context *context, unsigned char *,
- extended_cif *),
- struct call_context *context,
- extended_cif *,
- size_t required_stack_size,
- void (*fn)(void));
+/* ffi_call_SYSV
+ extern void ffi_call_SYSV (void *stack, void *frame,
+ void (*fn)(void), int flags);
Therefore on entry we have:
- x0 prepare_fn
- x1 &context
- x2 &ecif
- x3 bytes
- x4 fn
-
- This function uses the following stack frame layout:
+ x0 stack
+ x1 frame
+ x2 fn
+ x3 flags
+*/
- ==
- saved x30(lr)
- x29(fp)-> saved x29(fp)
- saved x24
- saved x23
- saved x22
- sp' -> saved x21
- ...
- sp -> (constructed callee stack arguments)
- ==
-
- Voila! */
-
-#define ffi_call_SYSV_FS (8 * 4)
-
- .cfi_startproc
+ cfi_startproc
CNAME(ffi_call_SYSV):
- stp x29, x30, [sp, #-16]!
- cfi_adjust_cfa_offset (16)
- cfi_rel_offset (x29, 0)
- cfi_rel_offset (x30, 8)
-
- mov x29, sp
- cfi_def_cfa_register (x29)
- sub sp, sp, #ffi_call_SYSV_FS
-
- stp x21, x22, [sp, #0]
- cfi_rel_offset (x21, 0 - ffi_call_SYSV_FS)
- cfi_rel_offset (x22, 8 - ffi_call_SYSV_FS)
-
- stp x23, x24, [sp, #16]
- cfi_rel_offset (x23, 16 - ffi_call_SYSV_FS)
- cfi_rel_offset (x24, 24 - ffi_call_SYSV_FS)
-
- mov x21, x1
- mov x22, x2
- mov x24, x4
-
- /* Allocate the stack space for the actual arguments, many
- arguments will be passed in registers, but we assume
- worst case and allocate sufficient stack for ALL of
- the arguments. */
- sub sp, sp, x3
-
- /* unsigned (*prepare_fn) (struct call_context *context,
- unsigned char *stack, extended_cif *ecif);
- */
- mov x23, x0
- mov x0, x1
- mov x1, sp
- /* x2 already in place */
- blr x23
-
- /* Preserve the flags returned. */
- mov x23, x0
-
- /* Figure out if we should touch the vector registers. */
- tbz x23, #AARCH64_FLAG_ARG_V_BIT, 1f
-
- /* Load the vector argument passing registers. */
- ldp q0, q1, [x21, #0]
- ldp q2, q3, [x21, #32]
- ldp q4, q5, [x21, #64]
- ldp q6, q7, [x21, #96]
+ /* Use a stack frame allocated by our caller. */
+ cfi_def_cfa(x1, 32);
+ stp x29, x30, [x1]
+ mov x29, x1
+ mov sp, x0
+ cfi_def_cfa_register(x29)
+ cfi_rel_offset (x29, 0)
+ cfi_rel_offset (x30, 8)
+
+ str w3, [x29, #16] /* save flags */
+ mov x9, x2 /* save fn */
+
+ /* Load the vector argument passing registers, if necessary. */
+ tbz w3, #AARCH64_FLAG_ARG_V_BIT, 1f
+ ldp q0, q1, [x29, #32 + 0]
+ ldp q2, q3, [x29, #32 + 32]
+ ldp q4, q5, [x29, #32 + 64]
+ ldp q6, q7, [x29, #32 + 96]
1:
- /* Load the core argument passing registers, including
+ /* Load the core argument passing registers, including
the structure return pointer. */
- ldp x0, x1, [x21, #16*N_V_ARG_REG + 0]
- ldp x2, x3, [x21, #16*N_V_ARG_REG + 16]
- ldp x4, x5, [x21, #16*N_V_ARG_REG + 32]
- ldp x6, x7, [x21, #16*N_V_ARG_REG + 48]
- ldr x8, [x21, #16*N_V_ARG_REG + 64]
-
- blr x24
+ ldp x0, x1, [x29, #32 + 16*N_V_ARG_REG + 0]
+ ldp x2, x3, [x29, #32 + 16*N_V_ARG_REG + 16]
+ ldp x4, x5, [x29, #32 + 16*N_V_ARG_REG + 32]
+ ldp x6, x7, [x29, #32 + 16*N_V_ARG_REG + 48]
+ ldr x8, [x29, #32 + 16*N_V_ARG_REG + 64]
- /* Save the core return registers. */
- stp x0, x1, [x21, #16*N_V_ARG_REG]
+ blr x9 /* call fn */
- /* Figure out if we should touch the vector registers. */
- tbz x23, #AARCH64_FLAG_ARG_V_BIT, 1f
+ ldr w3, [x29, #16] /* reload flags */
- /* Save the vector return registers. */
- stp q0, q1, [x21, #0]
- stp q2, q3, [x21, #32]
-1:
- /* All done, unwind our stack frame. */
- ldp x21, x22, [x29, # - ffi_call_SYSV_FS]
- cfi_restore (x21)
- cfi_restore (x22)
-
- ldp x23, x24, [x29, # - ffi_call_SYSV_FS + 16]
- cfi_restore (x23)
- cfi_restore (x24)
-
- mov sp, x29
+ /* Partially deconstruct the stack frame. */
+ mov sp, x29
cfi_def_cfa_register (sp)
+ ldp x29, x30, [x29]
- ldp x29, x30, [sp], #16
- cfi_adjust_cfa_offset (-16)
- cfi_restore (x29)
- cfi_restore (x30)
+ /* Save the core return registers. */
+ stp x0, x1, [sp, #32 + 16*N_V_ARG_REG]
- ret
+ /* Save the vector return registers, if necessary. */
+ tbz w3, #AARCH64_FLAG_ARG_V_BIT, 1f
+ stp q0, q1, [sp, #32 + 0]
+ stp q2, q3, [sp, #32 + 32]
+1:
+ /* All done. */
+ ret
- .cfi_endproc
+ cfi_endproc
#ifdef __ELF__
.size CNAME(ffi_call_SYSV), .-CNAME(ffi_call_SYSV)
#endif
@@ -237,7 +157,7 @@ CNAME(ffi_call_SYSV):
.align 2
.globl CNAME(ffi_closure_SYSV)
- .cfi_startproc
+ cfi_startproc
CNAME(ffi_closure_SYSV):
stp x29, x30, [sp, #-16]!
cfi_adjust_cfa_offset (16)
@@ -310,7 +230,7 @@ CNAME(ffi_closure_SYSV):
cfi_restore (x30)
ret
- .cfi_endproc
+ cfi_endproc
#ifdef __ELF__
.size CNAME(ffi_closure_SYSV), .-CNAME(ffi_closure_SYSV)
#endif
--
1.9.3