[PATCH 05/14] S390: Optimize builtin iconv-modules.
Stefan Liebler
stli@linux.vnet.ibm.com
Thu Apr 21 14:51:00 GMT 2016
Here is an updated patch, where the labels in inline assemblies are
out-dented as suggested by Florian.
On 03/18/2016 01:57 PM, Stefan Liebler wrote:
> Hi,
>
> I've updated the vector loop functions
> internal_ucs2_loop and internal_ucs2reverse_loop.
> The old patch contained lhi statements to initialize %[R_TMP],
> which is later used to calculate an address.
> This patch uses lghi statements to initialize %[R_TMP].
>
> the ChangeLog remains the same.
>
> Bye Stefan
>
> On 02/23/2016 10:21 AM, Stefan Liebler wrote:
>> This patch introduces a s390 specific gconv_simple.c file which provides
>> optimized versions for z13 with vector instructions, which will be
>> chosen at
>> runtime via ifunc.
>> The optimized conversions can convert between internal and ascii,
>> ucs4, ucs4le,
>> ucs2, ucs2le.
>> If the build-environment lacks vector support, then iconv/gconv_simple.c
>> is used wihtout any change. Otherwise iconvdata/gconv_simple.c is used
>> to create
>> conversion loop routines without vector instructions as fallback, if
>> vector
>> instructions aren't available at runtime.
>>
>> ChangeLog:
>>
>> * sysdeps/s390/multiarch/gconv_simple.c: New File.
>> * sysdeps/s390/multiarch/Makefile (sysdep_routines): Add
>> gconv_simple.
>> ---
>> sysdeps/s390/multiarch/Makefile | 4 +
>> sysdeps/s390/multiarch/gconv_simple.c | 1266
>> +++++++++++++++++++++++++++++++++
>> 2 files changed, 1270 insertions(+)
>> create mode 100644 sysdeps/s390/multiarch/gconv_simple.c
>>
>> diff --git a/sysdeps/s390/multiarch/Makefile
>> b/sysdeps/s390/multiarch/Makefile
>> index 0805b07..5067b6f 100644
>> --- a/sysdeps/s390/multiarch/Makefile
>> +++ b/sysdeps/s390/multiarch/Makefile
>> @@ -42,3 +42,7 @@ sysdep_routines += wcslen wcslen-vx wcslen-c \
>> wmemset wmemset-vx wmemset-c \
>> wmemcmp wmemcmp-vx wmemcmp-c
>> endif
>> +
>> +ifeq ($(subdir),iconv)
>> +sysdep_routines += gconv_simple
>> +endif
>> diff --git a/sysdeps/s390/multiarch/gconv_simple.c
>> b/sysdeps/s390/multiarch/gconv_simple.c
>> new file mode 100644
>> index 0000000..0e59422
>> --- /dev/null
>> +++ b/sysdeps/s390/multiarch/gconv_simple.c
>> @@ -0,0 +1,1266 @@
>> +/* Simple transformations functions - s390 version.
>> + Copyright (C) 2016 Free Software Foundation, Inc.
>> + This file is part of the GNU C Library.
>> +
>> + The GNU C Library is free software; you can redistribute it and/or
>> + modify it under the terms of the GNU Lesser General Public
>> + License as published by the Free Software Foundation; either
>> + version 2.1 of the License, or (at your option) any later version.
>> +
>> + The GNU C Library is distributed in the hope that it will be useful,
>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + Lesser General Public License for more details.
>> +
>> + You should have received a copy of the GNU Lesser General Public
>> + License along with the GNU C Library; if not, see
>> + <http://www.gnu.org/licenses/>. */
>> +
>> +#if defined HAVE_S390_VX_ASM_SUPPORT
>> +# include <ifunc-resolve.h>
>> +
>> +# if defined HAVE_S390_VX_GCC_SUPPORT
>> +# define ASM_CLOBBER_VR(NR) , NR
>> +# else
>> +# define ASM_CLOBBER_VR(NR)
>> +# endif
>> +
>> +# define ICONV_C_NAME(NAME) __##NAME##_c
>> +# define ICONV_VX_NAME(NAME) __##NAME##_vx
>> +# define ICONV_VX_IFUNC(FUNC) \
>> + extern __typeof (ICONV_C_NAME (FUNC)) __##FUNC; \
>> + s390_vx_libc_ifunc (__##FUNC) \
>> + int FUNC (struct __gconv_step *step, struct __gconv_step_data
>> *data, \
>> + const unsigned char **inptrp, const unsigned char *inend, \
>> + unsigned char **outbufstart, size_t *irreversible, \
>> + int do_flush, int consume_incomplete) \
>> + { \
>> + return __##FUNC (step, data, inptrp, inend,outbufstart, \
>> + irreversible, do_flush, consume_incomplete); \
>> + }
>> +# define ICONV_VX_SINGLE(NAME) \
>> + static __typeof (NAME##_single) __##NAME##_vx_single
>> __attribute__((alias(#NAME "_single")));
>> +
>> +/* Generate the transformations which are used, if the target machine
>> does not
>> + support vector instructions. */
>> +# define __gconv_transform_ascii_internal \
>> + ICONV_C_NAME (__gconv_transform_ascii_internal)
>> +# define __gconv_transform_internal_ascii \
>> + ICONV_C_NAME (__gconv_transform_internal_ascii)
>> +# define __gconv_transform_internal_ucs4le \
>> + ICONV_C_NAME (__gconv_transform_internal_ucs4le)
>> +# define __gconv_transform_ucs4_internal \
>> + ICONV_C_NAME (__gconv_transform_ucs4_internal)
>> +# define __gconv_transform_ucs4le_internal \
>> + ICONV_C_NAME (__gconv_transform_ucs4le_internal)
>> +# define __gconv_transform_ucs2_internal \
>> + ICONV_C_NAME (__gconv_transform_ucs2_internal)
>> +# define __gconv_transform_ucs2reverse_internal \
>> + ICONV_C_NAME (__gconv_transform_ucs2reverse_internal)
>> +# define __gconv_transform_internal_ucs2 \
>> + ICONV_C_NAME (__gconv_transform_internal_ucs2)
>> +# define __gconv_transform_internal_ucs2reverse \
>> + ICONV_C_NAME (__gconv_transform_internal_ucs2reverse)
>> +
>> +
>> +# include <iconv/gconv_simple.c>
>> +
>> +# undef __gconv_transform_ascii_internal
>> +# undef __gconv_transform_internal_ascii
>> +# undef __gconv_transform_internal_ucs4le
>> +# undef __gconv_transform_ucs4_internal
>> +# undef __gconv_transform_ucs4le_internal
>> +# undef __gconv_transform_ucs2_internal
>> +# undef __gconv_transform_ucs2reverse_internal
>> +# undef __gconv_transform_internal_ucs2
>> +# undef __gconv_transform_internal_ucs2reverse
>> +
>> +/* Now define the functions with vector support. */
>> +# if defined __s390x__
>> +# define CONVERT_32BIT_SIZE_T(REG)
>> +# else
>> +# define CONVERT_32BIT_SIZE_T(REG) "llgfr %" #REG ",%" #REG "\n\t"
>> +# endif
>> +
>> +/* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
>> +# define DEFINE_INIT 0
>> +# define DEFINE_FINI 0
>> +# define MIN_NEEDED_FROM 1
>> +# define MIN_NEEDED_TO 4
>> +# define FROM_DIRECTION 1
>> +# define FROM_LOOP ICONV_VX_NAME (ascii_internal_loop)
>> +# define TO_LOOP ICONV_VX_NAME (ascii_internal_loop) /* This
>> is not used. */
>> +# define FUNCTION_NAME ICONV_VX_NAME
>> (__gconv_transform_ascii_internal)
>> +# define ONE_DIRECTION 1
>> +
>> +# define MIN_NEEDED_INPUT MIN_NEEDED_FROM
>> +# define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
>> +# define LOOPFCT FROM_LOOP
>> +# define BODY_ORIG_ERROR \
>> + /* The value is too large. We don't try transliteration here
>> since \
>> + this is not an error because of the lack of possibilities to \
>> + represent the result. This is a genuine bug in the input
>> since \
>> + ASCII does not allow such values. */ \
>> + STANDARD_FROM_LOOP_ERR_HANDLER (1);
>> +
>> +# define BODY_ORIG \
>> + { \
>> + if (__glibc_unlikely (*inptr > '\x7f')) \
>> + { \
>> + BODY_ORIG_ERROR \
>> + } \
>> + else \
>> + { \
>> + /* It's an one byte sequence. */ \
>> + *((uint32_t *) outptr) = *inptr++; \
>> + outptr += sizeof (uint32_t); \
>> + } \
>> + }
>> +# define BODY \
>> + { \
>> + size_t len = inend - inptr; \
>> + if (len > (outend - outptr) / 4) \
>> + len = (outend - outptr) / 4; \
>> + size_t loop_count, tmp; \
>> + __asm__ volatile (".machine push\n\t" \
>> + ".machine \"z13\"\n\t" \
>> + ".machinemode \"zarch_nohighgprs\"\n\t" \
>> + CONVERT_32BIT_SIZE_T ([R_LEN]) \
>> + "vrepib %%v30,0x7f\n\t" /* For compare > 0x7f. */ \
>> + "srlg %[R_LI],%[R_LEN],4\n\t" \
>> + "vrepib %%v31,0x20\n\t" \
>> + "clgije %[R_LI],0,1f\n\t" \
>> + "0:\n\t" /* Handle 16-byte blocks. */ \
>> + "vl %%v16,0(%[R_IN])\n\t" \
>> + /* Checking for values > 0x7f. */ \
>> + "vstrcbs %%v17,%%v16,%%v30,%%v31\n\t" \
>> + "jno 10f\n\t" \
>> + /* Enlarge to UCS4. */ \
>> + "vuplhb %%v17,%%v16\n\t" \
>> + "vupllb %%v18,%%v16\n\t" \
>> + "vuplhh %%v19,%%v17\n\t" \
>> + "vupllh %%v20,%%v17\n\t" \
>> + "vuplhh %%v21,%%v18\n\t" \
>> + "vupllh %%v22,%%v18\n\t" \
>> + /* Store 64bytes to buf_out. */ \
>> + "vstm %%v19,%%v22,0(%[R_OUT])\n\t" \
>> + "la %[R_IN],16(%[R_IN])\n\t" \
>> + "la %[R_OUT],64(%[R_OUT])\n\t" \
>> + "brctg %[R_LI],0b\n\t" \
>> + "lghi %[R_LI],15\n\t" \
>> + "ngr %[R_LEN],%[R_LI]\n\t" \
>> + "je 20f\n\t" /* Jump away if no remaining bytes. */ \
>> + /* Handle remaining bytes. */ \
>> + "1: aghik %[R_LI],%[R_LEN],-1\n\t" \
>> + "jl 20f\n\t" /* Jump away if no remaining bytes. */ \
>> + "vll %%v16,%[R_LI],0(%[R_IN])\n\t" \
>> + /* Checking for values > 0x7f. */ \
>> + "vstrcbs %%v17,%%v16,%%v30,%%v31\n\t" \
>> + "vlgvb %[R_TMP],%%v17,7\n\t" \
>> + "clr %[R_TMP],%[R_LI]\n\t" \
>> + "locrh %[R_TMP],%[R_LEN]\n\t" \
>> + "locghih %[R_LEN],0\n\t" \
>> + "j 12f\n\t" \
>> + "10:\n\t" \
>> + /* Found a value > 0x7f. \
>> + Store the preceding chars. */ \
>> + "vlgvb %[R_TMP],%%v17,7\n\t" \
>> + "12: la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \
>> + "sllk %[R_TMP],%[R_TMP],2\n\t" \
>> + "ahi %[R_TMP],-1\n\t" \
>> + "jl 20f\n\t" \
>> + "lgr %[R_LI],%[R_TMP]\n\t" \
>> + "vuplhb %%v17,%%v16\n\t" \
>> + "vuplhh %%v19,%%v17\n\t" \
>> + "vstl %%v19,%[R_LI],0(%[R_OUT])\n\t" \
>> + "ahi %[R_LI],-16\n\t" \
>> + "jl 11f\n\t" \
>> + "vupllh %%v20,%%v17\n\t" \
>> + "vstl %%v20,%[R_LI],16(%[R_OUT])\n\t" \
>> + "ahi %[R_LI],-16\n\t" \
>> + "jl 11f\n\t" \
>> + "vupllb %%v18,%%v16\n\t" \
>> + "vuplhh %%v21,%%v18\n\t" \
>> + "vstl %%v21,%[R_LI],32(%[R_OUT])\n\t" \
>> + "ahi %[R_LI],-16\n\t" \
>> + "jl 11f\n\t" \
>> + "vupllh %%v22,%%v18\n\t" \
>> + "vstl %%v22,%[R_LI],48(%[R_OUT])\n\t" \
>> + "11:\n\t" \
>> + "la %[R_OUT],1(%[R_TMP],%[R_OUT])\n\t" \
>> + "20:\n\t" \
>> + ".machine pop" \
>> + : /* outputs */ [R_OUT] "+a" (outptr) \
>> + , [R_IN] "+a" (inptr) \
>> + , [R_LEN] "+d" (len) \
>> + , [R_LI] "=d" (loop_count) \
>> + , [R_TMP] "=a" (tmp) \
>> + : /* inputs */ \
>> + : /* clobber list*/ "memory", "cc" \
>> + ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \
>> + ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \
>> + ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \
>> + ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v30") \
>> + ASM_CLOBBER_VR ("v31") \
>> + ); \
>> + if (len > 0) \
>> + { \
>> + /* Found an invalid character at the next input byte. */ \
>> + BODY_ORIG_ERROR \
>> + } \
>> + }
>> +
>> +# define LOOP_NEED_FLAGS
>> +# include <iconv/loop.c>
>> +# include <iconv/skeleton.c>
>> +# undef BODY_ORIG
>> +# undef BODY_ORIG_ERROR
>> +ICONV_VX_IFUNC (__gconv_transform_ascii_internal)
>> +
>> +/* Convert from the internal (UCS4-like) format to ISO 646-IRV. */
>> +# define DEFINE_INIT 0
>> +# define DEFINE_FINI 0
>> +# define MIN_NEEDED_FROM 4
>> +# define MIN_NEEDED_TO 1
>> +# define FROM_DIRECTION 1
>> +# define FROM_LOOP ICONV_VX_NAME (internal_ascii_loop)
>> +# define TO_LOOP ICONV_VX_NAME (internal_ascii_loop) /* This
>> is not used. */
>> +# define FUNCTION_NAME ICONV_VX_NAME
>> (__gconv_transform_internal_ascii)
>> +# define ONE_DIRECTION 1
>> +
>> +# define MIN_NEEDED_INPUT MIN_NEEDED_FROM
>> +# define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
>> +# define LOOPFCT FROM_LOOP
>> +# define BODY_ORIG_ERROR \
>> + UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \
>> + STANDARD_TO_LOOP_ERR_HANDLER (4);
>> +
>> +# define BODY_ORIG \
>> + { \
>> + if (__glibc_unlikely (*((const uint32_t *) inptr) > 0x7f)) \
>> + { \
>> + BODY_ORIG_ERROR \
>> + } \
>> + else \
>> + { \
>> + /* It's an one byte sequence. */ \
>> + *outptr++ = *((const uint32_t *) inptr); \
>> + inptr += sizeof (uint32_t); \
>> + } \
>> + }
>> +# define BODY \
>> + { \
>> + size_t len = (inend - inptr) / 4; \
>> + if (len > outend - outptr) \
>> + len = outend - outptr; \
>> + size_t loop_count, tmp, tmp2; \
>> + __asm__ volatile (".machine push\n\t" \
>> + ".machine \"z13\"\n\t" \
>> + ".machinemode \"zarch_nohighgprs\"\n\t" \
>> + CONVERT_32BIT_SIZE_T ([R_LEN]) \
>> + /* Setup to check for ch > 0x7f. */ \
>> + "vzero %%v21\n\t" \
>> + "srlg %[R_LI],%[R_LEN],4\n\t" \
>> + "vleih %%v21,8192,0\n\t" /* element 0: > */ \
>> + "vleih %%v21,-8192,2\n\t" /* element 1: =<> */ \
>> + "vleif %%v20,127,0\n\t" /* element 0: 127 */ \
>> + "lghi %[R_TMP],0\n\t" \
>> + "clgije %[R_LI],0,1f\n\t" \
>> + "0:\n\t" \
>> + "vlm %%v16,%%v19,0(%[R_IN])\n\t" \
>> + /* Shorten to byte values. */ \
>> + "vpkf %%v23,%%v16,%%v17\n\t" \
>> + "vpkf %%v24,%%v18,%%v19\n\t" \
>> + "vpkh %%v23,%%v23,%%v24\n\t" \
>> + /* Checking for values > 0x7f. */ \
>> + "vstrcfs %%v22,%%v16,%%v20,%%v21\n\t" \
>> + "jno 10f\n\t" \
>> + "vstrcfs %%v22,%%v17,%%v20,%%v21\n\t" \
>> + "jno 11f\n\t" \
>> + "vstrcfs %%v22,%%v18,%%v20,%%v21\n\t" \
>> + "jno 12f\n\t" \
>> + "vstrcfs %%v22,%%v19,%%v20,%%v21\n\t" \
>> + "jno 13f\n\t" \
>> + /* Store 16bytes to outptr. */ \
>> + "vst %%v23,0(%[R_OUT])\n\t" \
>> + "la %[R_IN],64(%[R_IN])\n\t" \
>> + "la %[R_OUT],16(%[R_OUT])\n\t" \
>> + "brctg %[R_LI],0b\n\t" \
>> + "lghi %[R_LI],15\n\t" \
>> + "ngr %[R_LEN],%[R_LI]\n\t" \
>> + "je 20f\n\t" /* Jump away if no remaining bytes. */ \
>> + /* Handle remaining bytes. */ \
>> + "1: sllg %[R_LI],%[R_LEN],2\n\t" \
>> + "aghi %[R_LI],-1\n\t" \
>> + "jl 20f\n\t" /* Jump away if no remaining bytes. */ \
>> + /* Load remaining 1...63 bytes. */ \
>> + "vll %%v16,%[R_LI],0(%[R_IN])\n\t" \
>> + "ahi %[R_LI],-16\n\t" \
>> + "jl 2f\n\t" \
>> + "vll %%v17,%[R_LI],16(%[R_IN])\n\t" \
>> + "ahi %[R_LI],-16\n\t" \
>> + "jl 2f\n\t" \
>> + "vll %%v18,%[R_LI],32(%[R_IN])\n\t" \
>> + "ahi %[R_LI],-16\n\t" \
>> + "jl 2f\n\t" \
>> + "vll %%v19,%[R_LI],48(%[R_IN])\n\t" \
>> + "2:\n\t" \
>> + /* Shorten to byte values. */ \
>> + "vpkf %%v23,%%v16,%%v17\n\t" \
>> + "vpkf %%v24,%%v18,%%v19\n\t" \
>> + "vpkh %%v23,%%v23,%%v24\n\t" \
>> + "sllg %[R_LI],%[R_LEN],2\n\t" \
>> + "aghi %[R_LI],-16\n\t" \
>> + "jl 3f\n\t" /* v16 is not fully loaded. */ \
>> + "vstrcfs %%v22,%%v16,%%v20,%%v21\n\t" \
>> + "jno 10f\n\t" \
>> + "aghi %[R_LI],-16\n\t" \
>> + "jl 4f\n\t" /* v17 is not fully loaded. */ \
>> + "vstrcfs %%v22,%%v17,%%v20,%%v21\n\t" \
>> + "jno 11f\n\t" \
>> + "aghi %[R_LI],-16\n\t" \
>> + "jl 5f\n\t" /* v18 is not fully loaded. */ \
>> + "vstrcfs %%v22,%%v18,%%v20,%%v21\n\t" \
>> + "jno 12f\n\t" \
>> + "aghi %[R_LI],-16\n\t" \
>> + /* v19 is not fully loaded. */ \
>> + "lghi %[R_TMP],12\n\t" \
>> + "vstrcfs %%v22,%%v19,%%v20,%%v21\n\t" \
>> + "6: vlgvb %[R_I],%%v22,7\n\t" \
>> + "aghi %[R_LI],16\n\t" \
>> + "clrjl %[R_I],%[R_LI],14f\n\t" \
>> + "lgr %[R_I],%[R_LEN]\n\t" \
>> + "lghi %[R_LEN],0\n\t" \
>> + "j 15f\n\t" \
>> + "3: vstrcfs %%v22,%%v16,%%v20,%%v21\n\t" \
>> + "j 6b\n\t" \
>> + "4: vstrcfs %%v22,%%v17,%%v20,%%v21\n\t" \
>> + "lghi %[R_TMP],4\n\t" \
>> + "j 6b\n\t" \
>> + "5: vstrcfs %%v22,%%v17,%%v20,%%v21\n\t" \
>> + "lghi %[R_TMP],8\n\t" \
>> + "j 6b\n\t" \
>> + /* Found a value > 0x7f. */ \
>> + "13: ahi %[R_TMP],4\n\t" \
>> + "12: ahi %[R_TMP],4\n\t" \
>> + "11: ahi %[R_TMP],4\n\t" \
>> + "10: vlgvb %[R_I],%%v22,7\n\t" \
>> + "14: srlg %[R_I],%[R_I],2\n\t" \
>> + "agr %[R_I],%[R_TMP]\n\t" \
>> + "je 20f\n\t" \
>> + /* Store characters before invalid one... */ \
>> + "15: aghi %[R_I],-1\n\t" \
>> + "vstl %%v23,%[R_I],0(%[R_OUT])\n\t" \
>> + /* ... and update pointers. */ \
>> + "la %[R_OUT],1(%[R_I],%[R_OUT])\n\t" \
>> + "sllg %[R_I],%[R_I],2\n\t" \
>> + "la %[R_IN],4(%[R_I],%[R_IN])\n\t" \
>> + "20:\n\t" \
>> + ".machine pop" \
>> + : /* outputs */ [R_OUT] "+a" (outptr) \
>> + , [R_IN] "+a" (inptr) \
>> + , [R_LEN] "+d" (len) \
>> + , [R_LI] "=d" (loop_count) \
>> + , [R_I] "=a" (tmp2) \
>> + , [R_TMP] "=d" (tmp) \
>> + : /* inputs */ \
>> + : /* clobber list*/ "memory", "cc" \
>> + ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \
>> + ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \
>> + ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \
>> + ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v23") \
>> + ASM_CLOBBER_VR ("v24") \
>> + ); \
>> + if (len > 0) \
>> + { \
>> + /* Found an invalid character > 0x7f at next character. */ \
>> + BODY_ORIG_ERROR \
>> + } \
>> + }
>> +# define LOOP_NEED_FLAGS
>> +# include <iconv/loop.c>
>> +# include <iconv/skeleton.c>
>> +# undef BODY_ORIG
>> +# undef BODY_ORIG_ERROR
>> +ICONV_VX_IFUNC (__gconv_transform_internal_ascii)
>> +
>> +
>> +/* Convert from internal UCS4 to UCS4 little endian form. */
>> +# define DEFINE_INIT 0
>> +# define DEFINE_FINI 0
>> +# define MIN_NEEDED_FROM 4
>> +# define MIN_NEEDED_TO 4
>> +# define FROM_DIRECTION 1
>> +# define FROM_LOOP ICONV_VX_NAME (internal_ucs4le_loop)
>> +# define TO_LOOP ICONV_VX_NAME (internal_ucs4le_loop) /* This
>> is not used. */
>> +# define FUNCTION_NAME ICONV_VX_NAME
>> (__gconv_transform_internal_ucs4le)
>> +# define ONE_DIRECTION 0
>> +
>> +static inline int
>> +__attribute ((always_inline))
>> +ICONV_VX_NAME (internal_ucs4le_loop) (struct __gconv_step *step,
>> + struct __gconv_step_data *step_data,
>> + const unsigned char **inptrp,
>> + const unsigned char *inend,
>> + unsigned char **outptrp,
>> + unsigned char *outend,
>> + size_t *irreversible)
>> +{
>> + const unsigned char *inptr = *inptrp;
>> + unsigned char *outptr = *outptrp;
>> + int result;
>> + size_t len = MIN (inend - inptr, outend - outptr) / 4;
>> + size_t loop_count;
>> + __asm__ volatile (".machine push\n\t"
>> + ".machine \"z13\"\n\t"
>> + ".machinemode \"zarch_nohighgprs\"\n\t"
>> + CONVERT_32BIT_SIZE_T ([R_LEN])
>> + "bras %[R_LI],1f\n\t"
>> + /* Vector permute mask: */
>> + ".long 0x03020100,0x7060504,0x0B0A0908,0x0F0E0D0C\n\t"
>> + "1: vl %%v20,0(%[R_LI])\n\t"
>> + /* Process 64byte (16char) blocks. */
>> + "srlg %[R_LI],%[R_LEN],4\n\t"
>> + "clgije %[R_LI],0,10f\n\t"
>> + "0: vlm %%v16,%%v19,0(%[R_IN])\n\t"
>> + "vperm %%v16,%%v16,%%v16,%%v20\n\t"
>> + "vperm %%v17,%%v17,%%v17,%%v20\n\t"
>> + "vperm %%v18,%%v18,%%v18,%%v20\n\t"
>> + "vperm %%v19,%%v19,%%v19,%%v20\n\t"
>> + "vstm %%v16,%%v19,0(%[R_OUT])\n\t"
>> + "la %[R_IN],64(%[R_IN])\n\t"
>> + "la %[R_OUT],64(%[R_OUT])\n\t"
>> + "brctg %[R_LI],0b\n\t"
>> + "llgfr %[R_LEN],%[R_LEN]\n\t"
>> + "nilf %[R_LEN],15\n\t"
>> + /* Process 16byte (4char) blocks. */
>> + "10: srlg %[R_LI],%[R_LEN],2\n\t"
>> + "clgije %[R_LI],0,20f\n\t"
>> + "11: vl %%v16,0(%[R_IN])\n\t"
>> + "vperm %%v16,%%v16,%%v16,%%v20\n\t"
>> + "vst %%v16,0(%[R_OUT])\n\t"
>> + "la %[R_IN],16(%[R_IN])\n\t"
>> + "la %[R_OUT],16(%[R_OUT])\n\t"
>> + "brctg %[R_LI],11b\n\t"
>> + "nill %[R_LEN],3\n\t"
>> + /* Process <16bytes. */
>> + "20: sll %[R_LEN],2\n\t"
>> + "ahi %[R_LEN],-1\n\t"
>> + "jl 30f\n\t"
>> + "vll %%v16,%[R_LEN],0(%[R_IN])\n\t"
>> + "vperm %%v16,%%v16,%%v16,%%v20\n\t"
>> + "vstl %%v16,%[R_LEN],0(%[R_OUT])\n\t"
>> + "la %[R_IN],1(%[R_LEN],%[R_IN])\n\t"
>> + "la %[R_OUT],1(%[R_LEN],%[R_OUT])\n\t"
>> + "30: \n\t"
>> + ".machine pop"
>> + : /* outputs */ [R_OUT] "+a" (outptr)
>> + , [R_IN] "+a" (inptr)
>> + , [R_LI] "=a" (loop_count)
>> + , [R_LEN] "+a" (len)
>> + : /* inputs */
>> + : /* clobber list*/ "memory", "cc"
>> + ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17")
>> + ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19")
>> + ASM_CLOBBER_VR ("v20")
>> + );
>> + *inptrp = inptr;
>> + *outptrp = outptr;
>> +
>> + /* Determine the status. */
>> + if (*inptrp == inend)
>> + result = __GCONV_EMPTY_INPUT;
>> + else if (*outptrp + 4 > outend)
>> + result = __GCONV_FULL_OUTPUT;
>> + else
>> + result = __GCONV_INCOMPLETE_INPUT;
>> +
>> + return result;
>> +}
>> +
>> +ICONV_VX_SINGLE (internal_ucs4le_loop)
>> +# include <iconv/skeleton.c>
>> +ICONV_VX_IFUNC (__gconv_transform_internal_ucs4le)
>> +
>> +
>> +/* Transform from UCS4 to the internal, UCS4-like format. Unlike
>> + for the other direction we have to check for correct values here. */
>> +# define DEFINE_INIT 0
>> +# define DEFINE_FINI 0
>> +# define MIN_NEEDED_FROM 4
>> +# define MIN_NEEDED_TO 4
>> +# define FROM_DIRECTION 1
>> +# define FROM_LOOP ICONV_VX_NAME (ucs4_internal_loop)
>> +# define TO_LOOP ICONV_VX_NAME (ucs4_internal_loop) /* This is
>> not used. */
>> +# define FUNCTION_NAME ICONV_VX_NAME
>> (__gconv_transform_ucs4_internal)
>> +# define ONE_DIRECTION 0
>> +
>> +
>> +static inline int
>> +__attribute ((always_inline))
>> +ICONV_VX_NAME (ucs4_internal_loop) (struct __gconv_step *step,
>> + struct __gconv_step_data *step_data,
>> + const unsigned char **inptrp,
>> + const unsigned char *inend,
>> + unsigned char **outptrp,
>> + unsigned char *outend,
>> + size_t *irreversible)
>> +{
>> + int flags = step_data->__flags;
>> + const unsigned char *inptr = *inptrp;
>> + unsigned char *outptr = *outptrp;
>> + int result;
>> + size_t len, loop_count;
>> + do
>> + {
>> + len = MIN (inend - inptr, outend - outptr) / 4;
>> + __asm__ volatile (".machine push\n\t"
>> + ".machine \"z13\"\n\t"
>> + ".machinemode \"zarch_nohighgprs\"\n\t"
>> + CONVERT_32BIT_SIZE_T ([R_LEN])
>> + /* Setup to check for ch > 0x7fffffff. */
>> + "larl %[R_LI],9f\n\t"
>> + "vlm %%v20,%%v21,0(%[R_LI])\n\t"
>> + "srlg %[R_LI],%[R_LEN],2\n\t"
>> + "clgije %[R_LI],0,1f\n\t"
>> + /* Process 16byte (4char) blocks. */
>> + "0: vl %%v16,0(%[R_IN])\n\t"
>> + "vstrcfs %%v22,%%v16,%%v20,%%v21\n\t"
>> + "jno 10f\n\t"
>> + "vst %%v16,0(%[R_OUT])\n\t"
>> + "la %[R_IN],16(%[R_IN])\n\t"
>> + "la %[R_OUT],16(%[R_OUT])\n\t"
>> + "brctg %[R_LI],0b\n\t"
>> + "llgfr %[R_LEN],%[R_LEN]\n\t"
>> + "nilf %[R_LEN],3\n\t"
>> + /* Process <16bytes. */
>> + "1: sll %[R_LEN],2\n\t"
>> + "ahik %[R_LI],%[R_LEN],-1\n\t"
>> + "jl 20f\n\t" /* No further bytes available. */
>> + "vll %%v16,%[R_LI],0(%[R_IN])\n\t"
>> + "vstrcfs %%v22,%%v16,%%v20,%%v21\n\t"
>> + "vlgvb %[R_LI],%%v22,7\n\t"
>> + "clr %[R_LI],%[R_LEN]\n\t"
>> + "locgrhe %[R_LI],%[R_LEN]\n\t"
>> + "locghihe %[R_LEN],0\n\t"
>> + "j 11f\n\t"
>> + /* v20: Vector string range compare values. */
>> + "9: .long 0x7fffffff,0x0,0x0,0x0\n\t"
>> + /* v21: Vector string range compare control-bits.
>> + element 0: >; element 1: =<> (always true) */
>> + ".long 0x20000000,0xE0000000,0x0,0x0\n\t"
>> + /* Found a value > 0x7fffffff. */
>> + "10: vlgvb %[R_LI],%%v22,7\n\t"
>> + /* Store characters before invalid one. */
>> + "11: aghi %[R_LI],-1\n\t"
>> + "jl 20f\n\t"
>> + "vstl %%v16,%[R_LI],0(%[R_OUT])\n\t"
>> + "la %[R_IN],1(%[R_LI],%[R_IN])\n\t"
>> + "la %[R_OUT],1(%[R_LI],%[R_OUT])\n\t"
>> + "20:\n\t"
>> + ".machine pop"
>> + : /* outputs */ [R_OUT] "+a" (outptr)
>> + , [R_IN] "+a" (inptr)
>> + , [R_LI] "=a" (loop_count)
>> + , [R_LEN] "+d" (len)
>> + : /* inputs */
>> + : /* clobber list*/ "memory", "cc"
>> + ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v20")
>> + ASM_CLOBBER_VR ("v21") ASM_CLOBBER_VR ("v22")
>> + );
>> + if (len > 0)
>> + {
>> + /* The value is too large. We don't try transliteration here
>> since
>> + this is not an error because of the lack of possibilities to
>> + represent the result. This is a genuine bug in the input since
>> + UCS4 does not allow such values. */
>> + if (irreversible == NULL)
>> + /* We are transliterating, don't try to correct anything. */
>> + return __GCONV_ILLEGAL_INPUT;
>> +
>> + if (flags & __GCONV_IGNORE_ERRORS)
>> + {
>> + /* Just ignore this character. */
>> + ++*irreversible;
>> + inptr += 4;
>> + continue;
>> + }
>> +
>> + *inptrp = inptr;
>> + *outptrp = outptr;
>> + return __GCONV_ILLEGAL_INPUT;
>> + }
>> + }
>> + while (len > 0);
>> +
>> + *inptrp = inptr;
>> + *outptrp = outptr;
>> +
>> + /* Determine the status. */
>> + if (*inptrp == inend)
>> + result = __GCONV_EMPTY_INPUT;
>> + else if (*outptrp + 4 > outend)
>> + result = __GCONV_FULL_OUTPUT;
>> + else
>> + result = __GCONV_INCOMPLETE_INPUT;
>> +
>> + return result;
>> +}
>> +
>> +ICONV_VX_SINGLE (ucs4_internal_loop)
>> +# include <iconv/skeleton.c>
>> +ICONV_VX_IFUNC (__gconv_transform_ucs4_internal)
>> +
>> +
>> +/* Transform from UCS4-LE to the internal encoding. */
>> +# define DEFINE_INIT 0
>> +# define DEFINE_FINI 0
>> +# define MIN_NEEDED_FROM 4
>> +# define MIN_NEEDED_TO 4
>> +# define FROM_DIRECTION 1
>> +# define FROM_LOOP ICONV_VX_NAME (ucs4le_internal_loop)
>> +# define TO_LOOP ICONV_VX_NAME (ucs4le_internal_loop) /* This
>> is not used. */
>> +# define FUNCTION_NAME ICONV_VX_NAME
>> (__gconv_transform_ucs4le_internal)
>> +# define ONE_DIRECTION 0
>> +
>> +static inline int
>> +__attribute ((always_inline))
>> +ICONV_VX_NAME (ucs4le_internal_loop) (struct __gconv_step *step,
>> + struct __gconv_step_data *step_data,
>> + const unsigned char **inptrp,
>> + const unsigned char *inend,
>> + unsigned char **outptrp,
>> + unsigned char *outend,
>> + size_t *irreversible)
>> +{
>> + int flags = step_data->__flags;
>> + const unsigned char *inptr = *inptrp;
>> + unsigned char *outptr = *outptrp;
>> + int result;
>> + size_t len, loop_count;
>> + do
>> + {
>> + len = MIN (inend - inptr, outend - outptr) / 4;
>> + __asm__ volatile (".machine push\n\t"
>> + ".machine \"z13\"\n\t"
>> + ".machinemode \"zarch_nohighgprs\"\n\t"
>> + CONVERT_32BIT_SIZE_T ([R_LEN])
>> + /* Setup to check for ch > 0x7fffffff. */
>> + "larl %[R_LI],9f\n\t"
>> + "vlm %%v20,%%v22,0(%[R_LI])\n\t"
>> + "srlg %[R_LI],%[R_LEN],2\n\t"
>> + "clgije %[R_LI],0,1f\n\t"
>> + /* Process 16byte (4char) blocks. */
>> + "0: vl %%v16,0(%[R_IN])\n\t"
>> + "vperm %%v16,%%v16,%%v16,%%v22\n\t"
>> + "vstrcfs %%v23,%%v16,%%v20,%%v21\n\t"
>> + "jno 10f\n\t"
>> + "vst %%v16,0(%[R_OUT])\n\t"
>> + "la %[R_IN],16(%[R_IN])\n\t"
>> + "la %[R_OUT],16(%[R_OUT])\n\t"
>> + "brctg %[R_LI],0b\n\t"
>> + "llgfr %[R_LEN],%[R_LEN]\n\t"
>> + "nilf %[R_LEN],3\n\t"
>> + /* Process <16bytes. */
>> + "1: sll %[R_LEN],2\n\t"
>> + "ahik %[R_LI],%[R_LEN],-1\n\t"
>> + "jl 20f\n\t" /* No further bytes available. */
>> + "vll %%v16,%[R_LI],0(%[R_IN])\n\t"
>> + "vperm %%v16,%%v16,%%v16,%%v22\n\t"
>> + "vstrcfs %%v23,%%v16,%%v20,%%v21\n\t"
>> + "vlgvb %[R_LI],%%v23,7\n\t"
>> + "clr %[R_LI],%[R_LEN]\n\t"
>> + "locgrhe %[R_LI],%[R_LEN]\n\t"
>> + "locghihe %[R_LEN],0\n\t"
>> + "j 11f\n\t"
>> + /* v20: Vector string range compare values. */
>> + "9: .long 0x7fffffff,0x0,0x0,0x0\n\t"
>> + /* v21: Vector string range compare control-bits.
>> + element 0: >; element 1: =<> (always true) */
>> + ".long 0x20000000,0xE0000000,0x0,0x0\n\t"
>> + /* v22: Vector permute mask. */
>> + ".long 0x03020100,0x7060504,0x0B0A0908,0x0F0E0D0C\n\t"
>> + /* Found a value > 0x7fffffff. */
>> + "10: vlgvb %[R_LI],%%v23,7\n\t"
>> + /* Store characters before invalid one. */
>> + "11: aghi %[R_LI],-1\n\t"
>> + "jl 20f\n\t"
>> + "vstl %%v16,%[R_LI],0(%[R_OUT])\n\t"
>> + "la %[R_IN],1(%[R_LI],%[R_IN])\n\t"
>> + "la %[R_OUT],1(%[R_LI],%[R_OUT])\n\t"
>> + "20:\n\t"
>> + ".machine pop"
>> + : /* outputs */ [R_OUT] "+a" (outptr)
>> + , [R_IN] "+a" (inptr)
>> + , [R_LI] "=a" (loop_count)
>> + , [R_LEN] "+d" (len)
>> + : /* inputs */
>> + : /* clobber list*/ "memory", "cc"
>> + ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v20")
>> + ASM_CLOBBER_VR ("v21") ASM_CLOBBER_VR ("v22")
>> + ASM_CLOBBER_VR ("v23")
>> + );
>> + if (len > 0)
>> + {
>> + /* The value is too large. We don't try transliteration here
>> since
>> + this is not an error because of the lack of possibilities to
>> + represent the result. This is a genuine bug in the input since
>> + UCS4 does not allow such values. */
>> + if (irreversible == NULL)
>> + /* We are transliterating, don't try to correct anything. */
>> + return __GCONV_ILLEGAL_INPUT;
>> +
>> + if (flags & __GCONV_IGNORE_ERRORS)
>> + {
>> + /* Just ignore this character. */
>> + ++*irreversible;
>> + inptr += 4;
>> + continue;
>> + }
>> +
>> + *inptrp = inptr;
>> + *outptrp = outptr;
>> + return __GCONV_ILLEGAL_INPUT;
>> + }
>> + }
>> + while (len > 0);
>> +
>> + *inptrp = inptr;
>> + *outptrp = outptr;
>> +
>> + /* Determine the status. */
>> + if (*inptrp == inend)
>> + result = __GCONV_EMPTY_INPUT;
>> + else if (*inptrp + 4 > inend)
>> + result = __GCONV_INCOMPLETE_INPUT;
>> + else
>> + {
>> + assert (*outptrp + 4 > outend);
>> + result = __GCONV_FULL_OUTPUT;
>> + }
>> +
>> + return result;
>> +}
>> +ICONV_VX_SINGLE (ucs4le_internal_loop)
>> +# include <iconv/skeleton.c>
>> +ICONV_VX_IFUNC (__gconv_transform_ucs4le_internal)
>> +
>> +/* Convert from UCS2 to the internal (UCS4-like) format. */
>> +# define DEFINE_INIT 0
>> +# define DEFINE_FINI 0
>> +# define MIN_NEEDED_FROM 2
>> +# define MIN_NEEDED_TO 4
>> +# define FROM_DIRECTION 1
>> +# define FROM_LOOP ICONV_VX_NAME (ucs2_internal_loop)
>> +# define TO_LOOP ICONV_VX_NAME (ucs2_internal_loop) /* This is
>> not used. */
>> +# define FUNCTION_NAME ICONV_VX_NAME
>> (__gconv_transform_ucs2_internal)
>> +# define ONE_DIRECTION 1
>> +
>> +# define MIN_NEEDED_INPUT MIN_NEEDED_FROM
>> +# define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
>> +# define LOOPFCT FROM_LOOP
>> +# define BODY_ORIG_ERROR \
>> + /* Surrogate characters in UCS-2 input are not valid. Reject \
>> + them. (Catching this here is not security relevant.) */ \
>> + STANDARD_FROM_LOOP_ERR_HANDLER (2);
>> +# define BODY_ORIG \
>> + { \
>> + uint16_t u1 = get16 (inptr); \
>> + \
>> + if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \
>> + { \
>> + BODY_ORIG_ERROR \
>> + } \
>> + \
>> + *((uint32_t *) outptr) = u1; \
>> + outptr += sizeof (uint32_t); \
>> + inptr += 2; \
>> + }
>> +# define BODY \
>> + { \
>> + size_t len, tmp, tmp2; \
>> + len = MIN ((inend - inptr) / 2, (outend - outptr) / 4); \
>> + __asm__ volatile (".machine push\n\t" \
>> + ".machine \"z13\"\n\t" \
>> + ".machinemode \"zarch_nohighgprs\"\n\t" \
>> + CONVERT_32BIT_SIZE_T ([R_LEN]) \
>> + /* Setup to check for ch >= 0xd800 && ch < 0xe000. */ \
>> + "larl %[R_TMP],9f\n\t" \
>> + "vlm %%v20,%%v21,0(%[R_TMP])\n\t" \
>> + "srlg %[R_TMP],%[R_LEN],3\n\t" \
>> + "clgije %[R_TMP],0,1f\n\t" \
>> + /* Process 16byte (8char) blocks. */ \
>> + "0: vl %%v16,0(%[R_IN])\n\t" \
>> + "vstrchs %%v19,%%v16,%%v20,%%v21\n\t" \
>> + /* Enlarge UCS2 to UCS4. */ \
>> + "vuplhh %%v17,%%v16\n\t" \
>> + "vupllh %%v18,%%v16\n\t" \
>> + "jno 10f\n\t" \
>> + /* Store 32bytes to buf_out. */ \
>> + "vstm %%v17,%%v18,0(%[R_OUT])\n\t" \
>> + "la %[R_IN],16(%[R_IN])\n\t" \
>> + "la %[R_OUT],32(%[R_OUT])\n\t" \
>> + "brctg %[R_TMP],0b\n\t" \
>> + "llgfr %[R_LEN],%[R_LEN]\n\t" \
>> + "nilf %[R_LEN],7\n\t" \
>> + /* Process <16bytes. */ \
>> + "1: sll %[R_LEN],1\n\t" \
>> + "ahik %[R_TMP],%[R_LEN],-1\n\t" \
>> + "jl 20f\n\t" /* No further bytes available. */ \
>> + "vll %%v16,%[R_TMP],0(%[R_IN])\n\t" \
>> + "vstrchs %%v19,%%v16,%%v20,%%v21\n\t" \
>> + /* Enlarge UCS2 to UCS4. */ \
>> + "vuplhh %%v17,%%v16\n\t" \
>> + "vupllh %%v18,%%v16\n\t" \
>> + "vlgvb %[R_TMP],%%v19,7\n\t" \
>> + "clr %[R_TMP],%[R_LEN]\n\t" \
>> + "locgrhe %[R_TMP],%[R_LEN]\n\t" \
>> + "locghihe %[R_LEN],0\n\t" \
>> + "j 11f\n\t" \
>> + /* v20: Vector string range compare values. */ \
>> + "9: .short 0xd800,0xe000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
>> + /* v21: Vector string range compare control-bits. \
>> + element 0: =>; element 1: < */ \
>> + ".short 0xa000,0x4000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
>> + /* Found an element: ch >= 0xd800 && ch < 0xe000 */ \
>> + "10: vlgvb %[R_TMP],%%v19,7\n\t" \
>> + "11: la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \
>> + "sll %[R_TMP],1\n\t" \
>> + "lgr %[R_TMP2],%[R_TMP]\n\t" \
>> + "ahi %[R_TMP],-1\n\t" \
>> + "jl 20f\n\t" \
>> + "vstl %%v17,%[R_TMP],0(%[R_OUT])\n\t" \
>> + "ahi %[R_TMP],-16\n\t" \
>> + "jl 19f\n\t" \
>> + "vstl %%v18,%[R_TMP],16(%[R_OUT])\n\t" \
>> + "19: la %[R_OUT],0(%[R_TMP2],%[R_OUT])\n\t" \
>> + "20:\n\t" \
>> + ".machine pop" \
>> + : /* outputs */ [R_OUT] "+a" (outptr) \
>> + , [R_IN] "+a" (inptr) \
>> + , [R_TMP] "=a" (tmp) \
>> + , [R_TMP2] "=a" (tmp2) \
>> + , [R_LEN] "+d" (len) \
>> + : /* inputs */ \
>> + : /* clobber list*/ "memory", "cc" \
>> + ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \
>> + ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \
>> + ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \
>> + ); \
>> + if (len > 0) \
>> + { \
>> + /* Found an invalid character at next input-char. */ \
>> + BODY_ORIG_ERROR \
>> + } \
>> + }
>> +
>> +# define LOOP_NEED_FLAGS
>> +# include <iconv/loop.c>
>> +# include <iconv/skeleton.c>
>> +# undef BODY_ORIG
>> +# undef BODY_ORIG_ERROR
>> +ICONV_VX_IFUNC (__gconv_transform_ucs2_internal)
>> +
>> +/* Convert from UCS2 in other endianness to the internal (UCS4-like)
>> format. */
>> +# define DEFINE_INIT 0
>> +# define DEFINE_FINI 0
>> +# define MIN_NEEDED_FROM 2
>> +# define MIN_NEEDED_TO 4
>> +# define FROM_DIRECTION 1
>> +# define FROM_LOOP ICONV_VX_NAME (ucs2reverse_internal_loop)
>> +# define TO_LOOP ICONV_VX_NAME (ucs2reverse_internal_loop) /*
>> This is not used.*/
>> +# define FUNCTION_NAME ICONV_VX_NAME
>> (__gconv_transform_ucs2reverse_internal)
>> +# define ONE_DIRECTION 1
>> +
>> +# define MIN_NEEDED_INPUT MIN_NEEDED_FROM
>> +# define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
>> +# define LOOPFCT FROM_LOOP
>> +# define BODY_ORIG_ERROR \
>> + /* Surrogate characters in UCS-2 input are not valid. Reject \
>> + them. (Catching this here is not security relevant.) */ \
>> + if (! ignore_errors_p ()) \
>> + { \
>> + result = __GCONV_ILLEGAL_INPUT; \
>> + break; \
>> + } \
>> + inptr += 2; \
>> + ++*irreversible; \
>> + continue;
>> +
>> +# define BODY_ORIG \
>> + { \
>> + uint16_t u1 = bswap_16 (get16 (inptr)); \
>> + \
>> + if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \
>> + { \
>> + BODY_ORIG_ERROR \
>> + } \
>> + \
>> + *((uint32_t *) outptr) = u1; \
>> + outptr += sizeof (uint32_t); \
>> + inptr += 2; \
>> + }
>> +# define BODY \
>> + { \
>> + size_t len, tmp, tmp2; \
>> + len = MIN ((inend - inptr) / 2, (outend - outptr) / 4); \
>> + __asm__ volatile (".machine push\n\t" \
>> + ".machine \"z13\"\n\t" \
>> + ".machinemode \"zarch_nohighgprs\"\n\t" \
>> + CONVERT_32BIT_SIZE_T ([R_LEN]) \
>> + /* Setup to check for ch >= 0xd800 && ch < 0xe000. */ \
>> + "larl %[R_TMP],9f\n\t" \
>> + "vlm %%v20,%%v22,0(%[R_TMP])\n\t" \
>> + "srlg %[R_TMP],%[R_LEN],3\n\t" \
>> + "clgije %[R_TMP],0,1f\n\t" \
>> + /* Process 16byte (8char) blocks. */ \
>> + "0: vl %%v16,0(%[R_IN])\n\t" \
>> + "vperm %%v16,%%v16,%%v16,%%v22\n\t" \
>> + "vstrchs %%v19,%%v16,%%v20,%%v21\n\t" \
>> + /* Enlarge UCS2 to UCS4. */ \
>> + "vuplhh %%v17,%%v16\n\t" \
>> + "vupllh %%v18,%%v16\n\t" \
>> + "jno 10f\n\t" \
>> + /* Store 32bytes to buf_out. */ \
>> + "vstm %%v17,%%v18,0(%[R_OUT])\n\t" \
>> + "la %[R_IN],16(%[R_IN])\n\t" \
>> + "la %[R_OUT],32(%[R_OUT])\n\t" \
>> + "brctg %[R_TMP],0b\n\t" \
>> + "llgfr %[R_LEN],%[R_LEN]\n\t" \
>> + "nilf %[R_LEN],7\n\t" \
>> + /* Process <16bytes. */ \
>> + "1: sll %[R_LEN],1\n\t" \
>> + "ahik %[R_TMP],%[R_LEN],-1\n\t" \
>> + "jl 20f\n\t" /* No further bytes available. */ \
>> + "vll %%v16,%[R_TMP],0(%[R_IN])\n\t" \
>> + "vperm %%v16,%%v16,%%v16,%%v22\n\t" \
>> + "vstrchs %%v19,%%v16,%%v20,%%v21\n\t" \
>> + /* Enlarge UCS2 to UCS4. */ \
>> + "vuplhh %%v17,%%v16\n\t" \
>> + "vupllh %%v18,%%v16\n\t" \
>> + "vlgvb %[R_TMP],%%v19,7\n\t" \
>> + "clr %[R_TMP],%[R_LEN]\n\t" \
>> + "locgrhe %[R_TMP],%[R_LEN]\n\t" \
>> + "locghihe %[R_LEN],0\n\t" \
>> + "j 11f\n\t" \
>> + /* v20: Vector string range compare values. */ \
>> + "9: .short 0xd800,0xe000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
>> + /* v21: Vector string range compare control-bits. \
>> + element 0: =>; element 1: < */ \
>> + ".short 0xa000,0x4000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
>> + /* v22: Vector permute mask. */ \
>> + ".short 0x0100,0x0302,0x0504,0x0706\n\t" \
>> + ".short 0x0908,0x0b0a,0x0d0c,0x0f0e\n\t" \
>> + /* Found an element: ch >= 0xd800 && ch < 0xe000 */ \
>> + "10: vlgvb %[R_TMP],%%v19,7\n\t" \
>> + "11: la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \
>> + "sll %[R_TMP],1\n\t" \
>> + "lgr %[R_TMP2],%[R_TMP]\n\t" \
>> + "ahi %[R_TMP],-1\n\t" \
>> + "jl 20f\n\t" \
>> + "vstl %%v17,%[R_TMP],0(%[R_OUT])\n\t" \
>> + "ahi %[R_TMP],-16\n\t" \
>> + "jl 19f\n\t" \
>> + "vstl %%v18,%[R_TMP],16(%[R_OUT])\n\t" \
>> + "19: la %[R_OUT],0(%[R_TMP2],%[R_OUT])\n\t" \
>> + "20:\n\t" \
>> + ".machine pop" \
>> + : /* outputs */ [R_OUT] "+a" (outptr) \
>> + , [R_IN] "+a" (inptr) \
>> + , [R_TMP] "=a" (tmp) \
>> + , [R_TMP2] "=a" (tmp2) \
>> + , [R_LEN] "+d" (len) \
>> + : /* inputs */ \
>> + : /* clobber list*/ "memory", "cc" \
>> + ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \
>> + ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \
>> + ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \
>> + ASM_CLOBBER_VR ("v22") \
>> + ); \
>> + if (len > 0) \
>> + { \
>> + /* Found an invalid character at next input-char. */ \
>> + BODY_ORIG_ERROR \
>> + } \
>> + }
>> +# define LOOP_NEED_FLAGS
>> +# include <iconv/loop.c>
>> +# include <iconv/skeleton.c>
>> +# undef BODY_ORIG
>> +# undef BODY_ORIG_ERROR
>> +ICONV_VX_IFUNC (__gconv_transform_ucs2reverse_internal)
>> +
>> +/* Convert from the internal (UCS4-like) format to UCS2. */
>> +#define DEFINE_INIT 0
>> +#define DEFINE_FINI 0
>> +#define MIN_NEEDED_FROM 4
>> +#define MIN_NEEDED_TO 2
>> +#define FROM_DIRECTION 1
>> +#define FROM_LOOP ICONV_VX_NAME (internal_ucs2_loop)
>> +#define TO_LOOP ICONV_VX_NAME (internal_ucs2_loop) /* This
>> is not used. */
>> +#define FUNCTION_NAME ICONV_VX_NAME
>> (__gconv_transform_internal_ucs2)
>> +#define ONE_DIRECTION 1
>> +
>> +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
>> +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
>> +#define LOOPFCT FROM_LOOP
>> +#define BODY_ORIG \
>> + { \
>> + uint32_t val = *((const uint32_t *) inptr); \
>> + \
>> + if (__glibc_unlikely (val >= 0x10000)) \
>> + { \
>> + UNICODE_TAG_HANDLER (val, 4); \
>> + STANDARD_TO_LOOP_ERR_HANDLER (4); \
>> + } \
>> + else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \
>> + { \
>> + /* Surrogate characters in UCS-4 input are not valid. \
>> + We must catch this, because the UCS-2 output might be \
>> + interpreted as UTF-16 by other programs. If we let \
>> + surrogates pass through, attackers could make a security \
>> + hole exploit by synthesizing any desired plane 1-16 \
>> + character. */ \
>> + result = __GCONV_ILLEGAL_INPUT; \
>> + if (! ignore_errors_p ()) \
>> + break; \
>> + inptr += 4; \
>> + ++*irreversible; \
>> + continue; \
>> + } \
>> + else \
>> + { \
>> + put16 (outptr, val); \
>> + outptr += sizeof (uint16_t); \
>> + inptr += 4; \
>> + } \
>> + }
>> +# define BODY \
>> + { \
>> + if (__builtin_expect (inend - inptr < 32, 1) \
>> + || outend - outptr < 16) \
>> + /* Convert remaining bytes with c code. */ \
>> + BODY_ORIG \
>> + else \
>> + { \
>> + /* Convert in 32 byte blocks. */ \
>> + size_t loop_count = (inend - inptr) / 32; \
>> + size_t tmp, tmp2; \
>> + if (loop_count > (outend - outptr) / 16) \
>> + loop_count = (outend - outptr) / 16; \
>> + __asm__ volatile (".machine push\n\t" \
>> + ".machine \"z13\"\n\t" \
>> + ".machinemode \"zarch_nohighgprs\"\n\t" \
>> + CONVERT_32BIT_SIZE_T ([R_LI]) \
>> + "larl %[R_I],3f\n\t" \
>> + "vlm %%v20,%%v23,0(%[R_I])\n\t" \
>> + "0:\n\t" \
>> + "vlm %%v16,%%v17,0(%[R_IN])\n\t" \
>> + /* Shorten UCS4 to UCS2. */ \
>> + "vpkf %%v18,%%v16,%%v17\n\t" \
>> + "vstrcfs %%v19,%%v16,%%v20,%%v21\n\t" \
>> + "jno 11f\n\t" \
>> + "1: vstrcfs %%v19,%%v17,%%v20,%%v21\n\t" \
>> + "jno 10f\n\t" \
>> + /* Store 16bytes to buf_out. */ \
>> + "2: vst %%v18,0(%[R_OUT])\n\t" \
>> + "la %[R_IN],32(%[R_IN])\n\t" \
>> + "la %[R_OUT],16(%[R_OUT])\n\t" \
>> + "brctg %[R_LI],0b\n\t" \
>> + "j 20f\n\t" \
>> + /* Setup to check for ch >= 0xd800. (v20, v21) */ \
>> + "3: .long 0xd800,0xd800,0x0,0x0\n\t" \
>> + ".long 0xa0000000,0xa0000000,0x0,0x0\n\t" \
>> + /* Setup to check for ch >= 0xe000 \
>> + && ch < 0x10000. (v22,v23) */ \
>> + ".long 0xe000,0x10000,0x0,0x0\n\t" \
>> + ".long 0xa0000000,0x40000000,0x0,0x0\n\t" \
>> + /* v16 contains only valid chars. Check in v17: \
>> + ch >= 0xe000 && ch <= 0xffff. */ \
>> + "10: vstrcfs %%v19,%%v17,%%v22,%%v23,8\n\t" \
>> + "jo 2b\n\t" /* All ch's in this range, proceed. */ \
>> + "lhi %[R_TMP],16\n\t" \
>> + "j 12f\n\t" \
>> + /* Maybe v16 contains invalid chars. \
>> + Check ch >= 0xe000 && ch <= 0xffff. */ \
>> + "11: vstrcfs %%v19,%%v16,%%v22,%%v23,8\n\t" \
>> + "jo 1b\n\t" /* All ch's in this range, proceed. */ \
>> + "lhi %[R_TMP],0\n\t" \
>> + "12: vlgvb %[R_I],%%v19,7\n\t" \
>> + "agr %[R_I],%[R_TMP]\n\t" \
>> + "la %[R_IN],0(%[R_I],%[R_IN])\n\t" \
>> + "srl %[R_I],1\n\t" \
>> + "ahi %[R_I],-1\n\t" \
>> + "jl 20f\n\t" \
>> + "vstl %%v18,%[R_I],0(%[R_OUT])\n\t" \
>> + "la %[R_OUT],1(%[R_I],%[R_OUT])\n\t" \
>> + "20:\n\t" \
>> + ".machine pop" \
>> + : /* outputs */ [R_OUT] "+a" (outptr) \
>> + , [R_IN] "+a" (inptr) \
>> + , [R_LI] "+d" (loop_count) \
>> + , [R_I] "=a" (tmp2) \
>> + , [R_TMP] "=d" (tmp) \
>> + : /* inputs */ \
>> + : /* clobber list*/ "memory", "cc" \
>> + ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \
>> + ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \
>> + ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \
>> + ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v23") \
>> + ); \
>> + if (loop_count > 0) \
>> + { \
>> + /* Found an invalid character at next character. */ \
>> + BODY_ORIG \
>> + } \
>> + } \
>> + }
>> +#define LOOP_NEED_FLAGS
>> +#include <iconv/loop.c>
>> +#include <iconv/skeleton.c>
>> +# undef BODY_ORIG
>> +ICONV_VX_IFUNC (__gconv_transform_internal_ucs2)
>> +
>> +/* Convert from the internal (UCS4-like) format to UCS2 in other
>> endianness. */
>> +#define DEFINE_INIT 0
>> +#define DEFINE_FINI 0
>> +#define MIN_NEEDED_FROM 4
>> +#define MIN_NEEDED_TO 2
>> +#define FROM_DIRECTION 1
>> +#define FROM_LOOP ICONV_VX_NAME (internal_ucs2reverse_loop)
>> +#define TO_LOOP ICONV_VX_NAME
>> (internal_ucs2reverse_loop)/* This is not used.*/
>> +#define FUNCTION_NAME ICONV_VX_NAME
>> (__gconv_transform_internal_ucs2reverse)
>> +#define ONE_DIRECTION 1
>> +
>> +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
>> +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
>> +#define LOOPFCT FROM_LOOP
>> +#define BODY_ORIG \
>> + { \
>> + uint32_t val = *((const uint32_t *) inptr); \
>> + if (__glibc_unlikely (val >= 0x10000)) \
>> + { \
>> + UNICODE_TAG_HANDLER (val, 4); \
>> + STANDARD_TO_LOOP_ERR_HANDLER (4); \
>> + } \
>> + else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \
>> + { \
>> + /* Surrogate characters in UCS-4 input are not valid. \
>> + We must catch this, because the UCS-2 output might be \
>> + interpreted as UTF-16 by other programs. If we let \
>> + surrogates pass through, attackers could make a security \
>> + hole exploit by synthesizing any desired plane 1-16 \
>> + character. */ \
>> + if (! ignore_errors_p ()) \
>> + { \
>> + result = __GCONV_ILLEGAL_INPUT; \
>> + break; \
>> + } \
>> + inptr += 4; \
>> + ++*irreversible; \
>> + continue; \
>> + } \
>> + else \
>> + { \
>> + put16 (outptr, bswap_16 (val)); \
>> + outptr += sizeof (uint16_t); \
>> + inptr += 4; \
>> + } \
>> + }
>> +# define BODY \
>> + { \
>> + if (__builtin_expect (inend - inptr < 32, 1) \
>> + || outend - outptr < 16) \
>> + /* Convert remaining bytes with c code. */ \
>> + BODY_ORIG \
>> + else \
>> + { \
>> + /* Convert in 32 byte blocks. */ \
>> + size_t loop_count = (inend - inptr) / 32; \
>> + size_t tmp, tmp2; \
>> + if (loop_count > (outend - outptr) / 16) \
>> + loop_count = (outend - outptr) / 16; \
>> + __asm__ volatile (".machine push\n\t" \
>> + ".machine \"z13\"\n\t" \
>> + ".machinemode \"zarch_nohighgprs\"\n\t" \
>> + CONVERT_32BIT_SIZE_T ([R_LI]) \
>> + "larl %[R_I],3f\n\t" \
>> + "vlm %%v20,%%v24,0(%[R_I])\n\t" \
>> + "0:\n\t" \
>> + "vlm %%v16,%%v17,0(%[R_IN])\n\t" \
>> + /* Shorten UCS4 to UCS2 and byteswap. */ \
>> + "vpkf %%v18,%%v16,%%v17\n\t" \
>> + "vperm %%v18,%%v18,%%v18,%%v24\n\t" \
>> + "vstrcfs %%v19,%%v16,%%v20,%%v21\n\t" \
>> + "jno 11f\n\t" \
>> + "1: vstrcfs %%v19,%%v17,%%v20,%%v21\n\t" \
>> + "jno 10f\n\t" \
>> + /* Store 16bytes to buf_out. */ \
>> + "2: vst %%v18,0(%[R_OUT])\n\t" \
>> + "la %[R_IN],32(%[R_IN])\n\t" \
>> + "la %[R_OUT],16(%[R_OUT])\n\t" \
>> + "brctg %[R_LI],0b\n\t" \
>> + "j 20f\n\t" \
>> + /* Setup to check for ch >= 0xd800. (v20, v21) */ \
>> + "3: .long 0xd800,0xd800,0x0,0x0\n\t" \
>> + ".long 0xa0000000,0xa0000000,0x0,0x0\n\t" \
>> + /* Setup to check for ch >= 0xe000 \
>> + && ch < 0x10000. (v22,v23) */ \
>> + ".long 0xe000,0x10000,0x0,0x0\n\t" \
>> + ".long 0xa0000000,0x40000000,0x0,0x0\n\t" \
>> + /* Vector permute mask (v24) */ \
>> + ".short 0x0100,0x0302,0x0504,0x0706\n\t" \
>> + ".short 0x0908,0x0b0a,0x0d0c,0x0f0e\n\t" \
>> + /* v16 contains only valid chars. Check in v17: \
>> + ch >= 0xe000 && ch <= 0xffff. */ \
>> + "10: vstrcfs %%v19,%%v17,%%v22,%%v23,8\n\t" \
>> + "jo 2b\n\t" /* All ch's in this range, proceed. */ \
>> + "lhi %[R_TMP],16\n\t" \
>> + "j 12f\n\t" \
>> + /* Maybe v16 contains invalid chars. \
>> + Check ch >= 0xe000 && ch <= 0xffff. */ \
>> + "11: vstrcfs %%v19,%%v16,%%v22,%%v23,8\n\t" \
>> + "jo 1b\n\t" /* All ch's in this range, proceed. */ \
>> + "lhi %[R_TMP],0\n\t" \
>> + "12: vlgvb %[R_I],%%v19,7\n\t" \
>> + "agr %[R_I],%[R_TMP]\n\t" \
>> + "la %[R_IN],0(%[R_I],%[R_IN])\n\t" \
>> + "srl %[R_I],1\n\t" \
>> + "ahi %[R_I],-1\n\t" \
>> + "jl 20f\n\t" \
>> + "vstl %%v18,%[R_I],0(%[R_OUT])\n\t" \
>> + "la %[R_OUT],1(%[R_I],%[R_OUT])\n\t" \
>> + "20:\n\t" \
>> + ".machine pop" \
>> + : /* outputs */ [R_OUT] "+a" (outptr) \
>> + , [R_IN] "+a" (inptr) \
>> + , [R_LI] "+d" (loop_count) \
>> + , [R_I] "=a" (tmp2) \
>> + , [R_TMP] "=d" (tmp) \
>> + : /* inputs */ \
>> + : /* clobber list*/ "memory", "cc" \
>> + ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \
>> + ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \
>> + ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \
>> + ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v23") \
>> + ASM_CLOBBER_VR ("v24") \
>> + ); \
>> + if (loop_count > 0) \
>> + { \
>> + /* Found an invalid character at next character. */ \
>> + BODY_ORIG \
>> + } \
>> + } \
>> + }
>> +#define LOOP_NEED_FLAGS
>> +#include <iconv/loop.c>
>> +#include <iconv/skeleton.c>
>> +# undef BODY_ORIG
>> +ICONV_VX_IFUNC (__gconv_transform_internal_ucs2reverse)
>> +
>> +
>> +#else
>> +/* Generate the internal transformations without ifunc if build
>> environment
>> + lacks vector support. Instead simply include the common version. */
>> +# include <iconv/gconv_simple.c>
>> +#endif /* !defined HAVE_S390_VX_ASM_SUPPORT */
>>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0005-S390-Optimize-builtin-iconv-modules.patch
Type: text/x-patch
Size: 48998 bytes
Desc: not available
URL: <http://sourceware.org/pipermail/libc-alpha/attachments/20160421/952b00df/attachment.bin>
More information about the Libc-alpha
mailing list