This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH 10/11] hppa: Add haszero.h and whichzero.h
On 17/12/2016 04:57, Richard Henderson wrote:
> +static inline unsigned long int
> +haszero(unsigned long int x)
> +{
> +#if __GNUC_PREREQ(4, 5)
> + /* It's more useful to expose a control transfer to the compiler
> + than to expose a proper boolean result. */
> + if (sizeof(x) == 8)
> + asm goto ("uxor,*sbz %%r0,%0,%%r0\n\tb,n %l1" : : "r"(x) : : nbz);
> + else
> + asm goto ("uxor,sbz %%r0,%0,%%r0\n\tb,n %l1" : : "r"(x) : : nbz);
> + return 1;
> + nbz:
> + return 0;
> +#else
Since current GLIBC requires GCC 4.7 as minimum compiler I think we
can get rid of snippets for old compilers. Same for the other
override functios.
> + unsigned long int ret;
> + if (sizeof(x) == 8)
> + asm ("uxor,*sbz %%r0,%1,%%r0\n\tcopy %%r0,%0"
> + : "=r"(ret) : "r"(x), "0"(1));
> + else
> + asm ("uxor,sbz %%r0,%1,%%r0\n\tcopy %%r0,%0"
> + : "=r"(ret) : "r"(x), "0"(1));
> + return ret;
> +#endif
> +}
> +
> +/* Likewise, but for two words simultaneously. */
> +
> +static inline unsigned long int
> +haszero2(unsigned long int x1, unsigned long int x2)
> +{
> +#if __GNUC_PREREQ(4, 5)
> + /* It's more useful to expose a control transfer to the compiler
> + than to expose a proper boolean result. */
> + if (sizeof(x1) == 8)
> + asm goto ("uxor,*sbz %%r0,%0,%%r0\n\t"
> + "uxor,*nbz %%r0,%1,%%r0\n\t"
> + "b,n %l2" : : "r"(x1), "r"(x2) : : sbz);
> + else
> + asm goto ("uxor,sbz %%r0,%0,%%r0\n\t"
> + "uxor,nbz %%r0,%1,%%r0\n\t"
> + "b,n %l2" : : "r"(x1), "r"(x2) : : sbz);
> + return 0;
> + sbz:
> + return 1;
> +#else
> + unsigned long int ret;
> + if (sizeof(x1) == 8)
> + asm ("uxor,*sbz %%r0,%1,%%r0\n\t"
> + "uxor,*nbz %%r0,%2,%%r0\n\t"
> + "ldi 1,%0"
> + : "=r"(ret) : "r"(x1), "r"(x2), "0"(0));
> + else
> + asm ("uxor,sbz %%r0,%1,%%r0\n\t"
> + "uxor,nbz %%r0,%2,%%r0\n\t"
> + "ldi 1,%0"
> + : "=r"(ret) : "r"(x1), "r"(x2), "0"(0));
> + return ret;
> +#endif
> +}
> +
> +#endif /* haszero.h */
> diff --git a/sysdeps/hppa/whichzero.h b/sysdeps/hppa/whichzero.h
> new file mode 100644
> index 0000000..ef18cc7
> --- /dev/null
> +++ b/sysdeps/hppa/whichzero.h
> @@ -0,0 +1,70 @@
> +/* whichzero.h -- functions for zero byte searching. HPPA version.
> + Copyright (C) 2016 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <http://www.gnu.org/licenses/>. */
> +
> +#ifndef HPPA_WHICHZERO_H
> +#define HPPA_WHICHZERO_H 1
> +
> +/* Given a long that is known to contain a zero byte, return the
> + index of the first such within the long in host memory order. */
> +
> +static inline unsigned int
> +whichzero(unsigned long int x)
> +{
> + unsigned int ret;
> +
> + _Static_assert (sizeof(x) == 4, "64-bit not supported");
> +
> + /* Since we have no clz insn, direct tests of the bytes is faster
> + than loading up the constants to do the masking. */
> + asm ("extrw,u,<> %1,23,8,%%r0\n\t"
> + "ldi 2,%0\n\t"
> + "extrw,u,<> %1,15,8,%%r0\n\t"
> + "ldi 1,%0\n\t"
> + "extrw,u,<> %1,7,8,%%r0\n\t"
> + "ldi 0,%0"
> + : "=r"(ret) : "r"(x), "0"(3));
> +
> + return ret;
> +}
> +
> +/* Similarly, but perform the test for two longs simultaneously. */
> +
> +static inline unsigned int
> +whichzero2(unsigned long int x1, unsigned long int x2)
> +{
> + unsigned int ret;
> +
> + _Static_assert (sizeof(x1) == 4, "64-bit not supported");
> +
> + /* Since we have no clz insn, direct tests of the bytes is faster
> + than loading up the constants to do the masking. */
> + asm ("extrw,u,= %1,23,8,%%r0\n\t"
> + "extrw,u,<> %2,23,8,%%r0\n\t"
> + "ldi 2,%0\n\t"
> + "extrw,u,= %1,15,8,%%r0\n\t"
> + "extrw,u,<> %2,15,8,%%r0\n\t"
> + "ldi 1,%0\n\t"
> + "extrw,u,= %1,7,8,%%r0\n\t"
> + "extrw,u,<> %2,7,8,%%r0\n\t"
> + "ldi 0,%0"
> + : "=r"(ret) : "r"(x1), "r"(x2), "0"(3));
> +
> + return ret;
> +}
> +
> +#endif /* whichzero.h */
I am far from a hppa expert, but can't we code the same snippet in C? How
bad would it be compared to this optimized asm?