This is the mail archive of the libc-alpha@sources.redhat.com mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

UTF-8 decoder simplification



Less instructions + fewer memory accesses = more speed.


2000-09-03  Bruno Haible  <haible@clisp.cons.org>

	* iconv/gconv_simple.c (encoding_mask, encoding_byte): Remove.
	(__gconv_transform_internal_utf8) [BODY]: Use simple shifts instead.

*** glibc-20000831/iconv/gconv_simple.c.bak	Mon Aug 28 13:34:25 2000
--- glibc-20000831/iconv/gconv_simple.c	Sat Sep  2 17:47:00 2000
***************
*** 34,52 ****
  #endif
  
  
- /* These are definitions used by some of the functions for handling
-    UTF-8 encoding below.  */
- static const uint32_t encoding_mask[] =
- {
-   ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff
- };
- 
- static const unsigned char encoding_byte[] =
- {
-   0xc0, 0xe0, 0xf0, 0xf8, 0xfc
- };
- 
- 
  /* Transform from the internal, UCS4-like format, to UCS4.  The
     difference between the internal ucs4 format and the real UCS4
     format is, if any, the endianess.  The Unicode/ISO 10646 says that
--- 34,39 ----
***************
*** 856,862 ****
  	char *start;							      \
  									      \
  	for (step = 2; step < 6; ++step)				      \
! 	  if ((wc & encoding_mask[step - 2]) == 0)			      \
  	    break;							      \
  									      \
  	if (__builtin_expect (outptr + step > outend, 0))		      \
--- 843,849 ----
  	char *start;							      \
  									      \
  	for (step = 2; step < 6; ++step)				      \
! 	  if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0)		      \
  	    break;							      \
  									      \
  	if (__builtin_expect (outptr + step > outend, 0))		      \
***************
*** 867,873 ****
  	  }								      \
  									      \
  	start = outptr;							      \
! 	*outptr = encoding_byte[step - 2];				      \
  	outptr += step;							      \
  	--step;								      \
  	do								      \
--- 854,860 ----
  	  }								      \
  									      \
  	start = outptr;							      \
! 	*outptr = (unsigned char) (~0xff >> step);			      \
  	outptr += step;							      \
  	--step;								      \
  	do								      \

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]