From 3830325502a64c303f9296b7f1e670022da8fa53 Mon Sep 17 00:00:00 2001 From: Corinna Vinschen Date: Tue, 20 Dec 2022 10:03:17 +0100 Subject: [PATCH] Cygwin: x86_64: import new memset.S from NetBSD Signed-off-by: Corinna Vinschen --- winsup/cygwin/x86_64/memset.S | 138 ++++++++++++++++++++-------------- 1 file changed, 81 insertions(+), 57 deletions(-) diff --git a/winsup/cygwin/x86_64/memset.S b/winsup/cygwin/x86_64/memset.S index ac73b6ace..f91d134ef 100644 --- a/winsup/cygwin/x86_64/memset.S +++ b/winsup/cygwin/x86_64/memset.S @@ -1,69 +1,93 @@ -/* These functions are almost verbatim FreeBSD code (even if the header of - one file mentiones NetBSD), just wrapped in the minimum required code to - make them work under the MS AMD64 ABI. - See FreeBSD src/lib/libc/amd64/string/memset.S */ +/* $NetBSD: memset.S,v 1.5 2014/05/22 16:47:31 pooka Exp $ */ -/* - * Written by J.T. Conklin . - * Public domain. - * Adapted for NetBSD/x86_64 by - * Frank van der Linden +/*- + * Copyright (c) 2009 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by David Laight. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. */ - .globl memset - .seh_proc memset -memset: - movq %rsi,8(%rsp) - movq %rdi,16(%rsp) - .seh_endprologue - movq %rcx,%rdi - movq %rdx,%rsi - movq %r8,%rdx - - movq %rsi,%rax - andq $0xff,%rax - movq %rdx,%rcx - movq %rdi,%r11 - - cld /* set fill direction forward */ +#include - /* if the string is too short, it's really not worth the - * overhead of aligning to word boundries, etc. So we jump to - * a plain unaligned set. */ - cmpq $0x0f,%rcx - jle L1 +#if defined(LIBC_SCCS) + RCSID("$NetBSD: memset.S,v 1.5 2014/05/22 16:47:31 pooka Exp $") +#endif - movb %al,%ah /* copy char to all bytes in word */ - movl %eax,%edx - sall $16,%eax - orl %edx,%eax +#ifndef _KERNEL +/* bzero, %rdi is buffer, %rsi length */ - movl %eax,%edx - salq $32,%rax - orq %rdx,%rax +ENTRY2(bzero) + mov %rsi,%rdx /* length */ + xor %eax,%eax /* value to write */ + jmp 1f +END(bzero) +#endif - movq %rdi,%rdx /* compute misalignment */ - negq %rdx - andq $7,%rdx - movq %rcx,%r8 - subq %rdx,%r8 +/* memset, %rdi is buffer, %rsi char to fill, %rdx length */ - movq %rdx,%rcx /* set until word aligned */ - rep - stosb +ENTRY3(memset) + movzbq %sil,%rax /* byte value to fill */ + mov %rdx,%rsi /* copy of length */ + mov $0x0101010101010101,%r9 + imul %r9,%rax /* fill value in all bytes */ - movq %r8,%rcx - shrq $3,%rcx /* set by words */ - rep - stosq +1: + mov %rdi,%r9 /* Need to return buffer address */ + or %edi,%edx /* address | length */ + mov %rsi,%rcx + cmp $7,%rsi + jbe 10f /* jump if short fill */ + test $7,%dl /* check for misaligned fill */ + jnz 20f /* jump if misaligned */ - movq %r8,%rcx /* set remainder by bytes */ - andq $7,%rcx -L1: rep - stosb - movq %r11,%rax +/* Target aligned and length multiple of 8 */ +2: + shr $3,%rcx + rep stosq + mov %r9,%rax + ret - movq 8(%rsp),%rsi - movq 16(%rsp),%rdi +/* + * Short transfer, any faffing here will generate mispredicted branches. + * So we keep it simple. + */ +10: rep stosb + mov %r9,%rax ret - .seh_endproc + +/* + * Buffer or length misaligned. + * Write pattern to first and last word of buffer, then fill middle. + * (This writes to some bytes more than once - possibly three times!.) + */ +20: + mov %rax,(%rdi) + movzbq %dil,%rdx /* low address for alignment */ + mov %rax,-8(%rcx,%rdi) + and $7,%dl /* offset in word */ + sub %rdx,%rcx /* adjust length ... */ + add %rdx,%rdi /* ... and target */ + jmp 2b +END(memset) -- 2.43.5