From 1dc17ea8f8492d618a91f0b7b3f1e7fd089889d1 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Tue, 9 Feb 2021 17:59:11 +0000 Subject: [PATCH] aarch64: Optimize __libc_mtag_tag_zero_region This is a target hook for memory tagging, the original was a naive implementation. Uses the same algorithm as __libc_mtag_tag_region, but with instructions that also zero the memory. This was not benchmarked on real cpu, but expected to be faster than the naive implementation. --- sysdeps/aarch64/__mtag_tag_zero_region.S | 96 ++++++++++++++++++++---- 1 file changed, 80 insertions(+), 16 deletions(-) diff --git a/sysdeps/aarch64/__mtag_tag_zero_region.S b/sysdeps/aarch64/__mtag_tag_zero_region.S index 74d398bba5..7d955fbd0c 100644 --- a/sysdeps/aarch64/__mtag_tag_zero_region.S +++ b/sysdeps/aarch64/__mtag_tag_zero_region.S @@ -20,30 +20,94 @@ #ifdef USE_MTAG +/* Assumptions: + * + * ARMv8-a, AArch64, MTE, LP64 ABI. + * + * Interface contract: + * Address is 16 byte aligned and size is multiple of 16. + * Returns the passed pointer. + * The memory region may remain untagged if tagging is not enabled. + */ .arch armv8.5-a .arch_extension memtag -/* NB, only supported on variants with 64-bit pointers. */ +#define dstin x0 +#define count x1 +#define dst x2 +#define dstend x3 +#define tmp x4 +#define zva_val x4 -/* FIXME: This is a minimal implementation. We could do much better than - this for large values of COUNT. */ +ENTRY (__libc_mtag_tag_zero_region) + PTR_ARG (0) + SIZE_ARG (1) -#define dstin x0 -#define count x1 -#define dst x2 + add dstend, dstin, count -ENTRY(__libc_mtag_tag_zero_region) + cmp count, 96 + b.hi L(set_long) - mov dst, dstin -L(loop): - stzg dst, [dst], #16 - subs count, count, 16 - bne L(loop) -#if 0 - /* This is not currently needed, since for now we are only called - to tag memory that is taggable. */ - ldg dstin, [dstin] // Recover the tag created (might be untagged). + tbnz count, 6, L(set96) + + /* Set 0, 16, 32, or 48 bytes. */ + lsr tmp, count, 5 + add tmp, dstin, tmp, lsl 4 + cbz count, L(end) + stzg dstin, [dstin] + stzg dstin, [tmp] + stzg dstin, [dstend, -16] +L(end): + ret + + .p2align 4 + /* Set 64..96 bytes. Write 64 bytes from the start and + 32 bytes from the end. */ +L(set96): + stz2g dstin, [dstin] + stz2g dstin, [dstin, 32] + stz2g dstin, [dstend, -32] + ret + + .p2align 4 + /* Size is > 96 bytes. */ +L(set_long): + cmp count, 160 + b.lo L(no_zva) + +#ifndef SKIP_ZVA_CHECK + mrs zva_val, dczid_el0 + and zva_val, zva_val, 31 + cmp zva_val, 4 /* ZVA size is 64 bytes. */ + b.ne L(no_zva) #endif + stz2g dstin, [dstin] + stz2g dstin, [dstin, 32] + bic dst, dstin, 63 + sub count, dstend, dst /* Count is now 64 too large. */ + sub count, count, 128 /* Adjust count and bias for loop. */ + + .p2align 4 +L(zva_loop): + add dst, dst, 64 + dc gzva, dst + subs count, count, 64 + b.hi L(zva_loop) + stz2g dstin, [dstend, -64] + stz2g dstin, [dstend, -32] ret + +L(no_zva): + sub dst, dstin, 32 /* Dst is biased by -32. */ + sub count, count, 64 /* Adjust count for loop. */ +L(no_zva_loop): + stz2g dstin, [dst, 32] + stz2g dstin, [dst, 64]! + subs count, count, 64 + b.hi L(no_zva_loop) + stz2g dstin, [dstend, -64] + stz2g dstin, [dstend, -32] + ret + END (__libc_mtag_tag_zero_region) #endif /* USE_MTAG */ -- 2.43.5