This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH v2 1/6] wcsmbs: Add wcscpy loop unroll option


This allows an architecture the use the old generic implementation
and also set explicit loop unrolling.

Checked on aarch64-linux-gnu.

	* include/loop_unroll.h: New file.
	* wcsmbs/wcscpy (__wcscpy): Add option to use loop unrolling
	besides generic implementation.
---
 include/loop_unroll.h | 78 +++++++++++++++++++++++++++++++++++++++++++
 wcsmbs/wcscpy.c       | 19 +++++++++++
 2 files changed, 97 insertions(+)
 create mode 100644 include/loop_unroll.h

diff --git a/include/loop_unroll.h b/include/loop_unroll.h
new file mode 100644
index 0000000000..f40d375257
--- /dev/null
+++ b/include/loop_unroll.h
@@ -0,0 +1,78 @@
+/* Macro for explicit loop unrolling.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _LOOP_UNROLL_H
+#define _LOOP_UNROLL_H
+
+/* Loop unroll macro to be used for explicity force loop unrolling with a
+   configurable number or iterations.  The idea is to make the loop unrolling
+   independent of whether compiler is able to unrolling through specific
+   optimizations options (-funroll-loops or -funroll-all-loops).
+
+   For instance, to implement strcpy with SRC being the source input and 
+   DEST the destination buffer, it is expected the macro to be used in this
+   way:
+
+     #define ITERATION(index)	\
+       ({ char c = *str++; *dest++ = c; c != '\0' })
+
+     while (1)
+       UNROLL_REPEAT (4, ITERATION)
+
+   The loop will be manually unrolled 4 times.  Another option is to do
+   the index update after the tests:
+
+     #define ITERATION(index)	\
+       ({ char c = *(str + index); *(dest + index) = c; c != '\0' })
+     #define UPDATE(n)		\
+       str += n; dst += n
+
+     while (1)
+       UNROLL_REPEAT_UPDATE (4, ITERATION, UPDATE)
+
+   The loop will be manually unrolled 4 times and the SRC and DEST pointers
+   will be update only after last iteration.
+
+   Currently both macros unrolls the loop 8 times at maximum.  */
+
+#define UNROLL_REPEAT_1(X)    if (!X(0)) break;
+#define UNROLL_REPEAT_2(X)    UNROLL_REPEAT_1(X) if (!X(1)) break;
+#define UNROLL_REPEAT_3(X)    UNROLL_REPEAT_2(X) if (!X(2)) break;
+#define UNROLL_REPEAT_4(X)    UNROLL_REPEAT_3(X) if (!X(3)) break;
+#define UNROLL_REPEAT_5(X)    UNROLL_REPEAT_4(X) if (!X(4)) break;
+#define UNROLL_REPEAT_6(X)    UNROLL_REPEAT_5(X) if (!X(5)) break;
+#define UNROLL_REPEAT_7(X)    UNROLL_REPEAT_6(X) if (!X(6)) break;
+#define UNROLL_REPEAT_8(X)    UNROLL_REPEAT_7(X) if (!X(7)) break;
+
+#define UNROLL_EXPAND(...)    __VA_ARGS__
+
+#define UNROLL_REPEAT__(N, X) UNROLL_EXPAND(UNROLL_REPEAT_ ## N) (X)
+#define UNROLL_REPEAT_(N, X)  UNROLL_REPEAT__ (N, X)
+
+#define UNROLL_REPEAT(N, X)                \
+  (void) ({                                \
+    UNROLL_REPEAT_(UNROLL_EXPAND(N), X);   \
+  })
+
+#define UNROLL_REPEAT_UPDATE(N, X, U)      \
+  (void) ({                                \
+    UNROLL_REPEAT_ (UNROLL_EXPAND(N), X);  \
+    UPDATE (N);                            \
+  })
+
+#endif
diff --git a/wcsmbs/wcscpy.c b/wcsmbs/wcscpy.c
index 6fb2969513..8e8719744e 100644
--- a/wcsmbs/wcscpy.c
+++ b/wcsmbs/wcscpy.c
@@ -17,6 +17,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <wchar.h>
+#include <loop_unroll.h>
 
 
 #ifdef WCSCPY
@@ -27,7 +28,25 @@
 wchar_t *
 __wcscpy (wchar_t *dest, const wchar_t *src)
 {
+#ifndef UNROLL_NTIMES
   return __wmemcpy (dest, src, __wcslen (src) + 1);
+#else
+  /* Some architectures might have costly tail function call (powerpc
+     for instance) where wmemcpy call overhead for smalls sizes might
+     be costly than just unroll the main loop.  */
+  wchar_t *wcp = dest;
+
+#define ITERATION(index)		\
+  ({					\
+     wchar_t c = *src++;		\
+     *wcp++ = c;			\
+     c != L'\0';			\
+  })
+
+  while (1)
+    UNROLL_REPEAT(UNROLL_NTIMES, ITERATION);
+  return dest;
+#endif
 }
 #ifndef WCSCPY
 weak_alias (__wcscpy, wcscpy)
-- 
2.17.1


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]