[PATCH] Set EILSEQ in mbtowc_r (and fix compiler warnings)
Corinna Vinschen
vinschen@redhat.com
Fri Mar 20 00:34:00 GMT 2009
Hi,
I applied the below patch as obvious. While we changed the wctomb_r
function to generate an EILSEQ errno in case of an unconvertable or
invalid wide char according to POSIX-1.2008, we missed to do the same
for mbtowc_r. The applied patch fixes that. It also fixes compiler
warnings in mbtowc_r.c and wcrtomb_r.c due to a missing declaration
of __locale_charset.
Corinna
* libc/stdlib/mbtowc_r.c (_mbtowc_r): Return EILSEQ in case of an
invalid character sequence.
* libc/stdlib/mbtowc_r.c (_mbtowc_r): Fix compiler warning due to
missing declaration of __locale_charset.
* libc/stdlib/wctomb_r.c (_wctomb_r): Ditto.
Index: libc/stdlib/mbtowc_r.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/stdlib/mbtowc_r.c,v
retrieving revision 1.10
diff -u -p -r1.10 mbtowc_r.c
--- libc/stdlib/mbtowc_r.c 3 Mar 2009 09:28:45 -0000 1.10
+++ libc/stdlib/mbtowc_r.c 19 Mar 2009 19:44:50 -0000
@@ -4,8 +4,11 @@
#include "mbctype.h"
#include <wchar.h>
#include <string.h>
+#include <errno.h>
#ifdef _MB_CAPABLE
+extern char *__locale_charset ();
+
typedef enum { ESCAPE, DOLLAR, BRACKET, AT, B, J,
NUL, JIS_CHAR, OTHER, JIS_C_NUM } JIS_CHAR_TYPE;
typedef enum { ASCII, JIS, A_ESC, A_ESC_DL, JIS_1, J_ESC, J_ESC_BR,
@@ -117,10 +120,16 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state)
return -2;
ch = t[i++];
if (ch < 0x80 || ch > 0xbf)
- return -1;
+ {
+ r->_errno = EILSEQ;
+ return -1;
+ }
if (state->__value.__wchb[0] < 0xc2)
- /* overlong UTF-8 sequence */
- return -1;
+ {
+ /* overlong UTF-8 sequence */
+ r->_errno = EILSEQ;
+ return -1;
+ }
state->__count = 0;
*pwc = (wchar_t)((state->__value.__wchb[0] & 0x1f) << 6)
| (wchar_t)(ch & 0x3f);
@@ -139,24 +148,36 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state)
return -2;
ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
if (state->__value.__wchb[0] == 0xe0 && ch < 0xa0)
- /* overlong UTF-8 sequence */
- return -1;
+ {
+ /* overlong UTF-8 sequence */
+ r->_errno = EILSEQ;
+ return -1;
+ }
if (ch < 0x80 || ch > 0xbf)
- return -1;
+ {
+ r->_errno = EILSEQ;
+ return -1;
+ }
state->__value.__wchb[1] = ch;
state->__count = 2;
if (n < 3)
return -2;
ch = t[i++];
if (ch < 0x80 || ch > 0xbf)
- return -1;
+ {
+ r->_errno = EILSEQ;
+ return -1;
+ }
state->__count = 0;
tmp = (wchar_t)((state->__value.__wchb[0] & 0x0f) << 12)
| (wchar_t)((state->__value.__wchb[1] & 0x3f) << 6)
| (wchar_t)(ch & 0x3f);
if (tmp >= 0xd800 && tmp <= 0xdfff)
- return -1;
+ {
+ r->_errno = EILSEQ;
+ return -1;
+ }
*pwc = tmp;
return i;
}
@@ -173,10 +194,16 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state)
return -2;
ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
if (state->__value.__wchb[0] == 0xf0 && ch < 0x90)
- /* overlong UTF-8 sequence */
- return -1;
+ {
+ /* overlong UTF-8 sequence */
+ r->_errno = EILSEQ;
+ return -1;
+ }
if (ch < 0x80 || ch > 0xbf)
- return -1;
+ {
+ r->_errno = EILSEQ;
+ return -1;
+ }
state->__value.__wchb[1] = ch;
if (state->__count == 1)
state->__count = 2;
@@ -186,14 +213,20 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state)
return -2;
ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2];
if (ch < 0x80 || ch > 0xbf)
- return -1;
+ {
+ r->_errno = EILSEQ;
+ return -1;
+ }
state->__value.__wchb[2] = ch;
state->__count = 3;
if (n < 4)
return -2;
ch = t[i++];
if (ch < 0x80 || ch > 0xbf)
- return -1;
+ {
+ r->_errno = EILSEQ;
+ return -1;
+ }
tmp = (wint_t)((state->__value.__wchb[0] & 0x07) << 18)
| (wint_t)((state->__value.__wchb[1] & 0x3f) << 12)
| (wint_t)((state->__value.__wchb[2] & 0x3f) << 6)
@@ -217,7 +250,10 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state)
return i;
}
else
- return -1;
+ {
+ r->_errno = EILSEQ;
+ return -1;
+ }
}
else if (!strcmp (__locale_charset (), "SJIS"))
{
@@ -246,7 +282,10 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state)
return i;
}
else
- return -1;
+ {
+ r->_errno = EILSEQ;
+ return -1;
+ }
}
}
else if (!strcmp (__locale_charset (), "EUCJP"))
@@ -276,7 +315,10 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state)
return i;
}
else
- return -1;
+ {
+ r->_errno = EILSEQ;
+ return -1;
+ }
}
}
else if (!strcmp (__locale_charset (), "JIS"))
@@ -357,6 +399,7 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state)
break;
case ERROR:
default:
+ r->_errno = EILSEQ;
return -1;
}
Index: libc/stdlib/wctomb_r.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/stdlib/wctomb_r.c,v
retrieving revision 1.11
diff -u -p -r1.11 wctomb_r.c
--- libc/stdlib/wctomb_r.c 3 Mar 2009 09:28:45 -0000 1.11
+++ libc/stdlib/wctomb_r.c 19 Mar 2009 19:44:50 -0000
@@ -5,6 +5,8 @@
#include <locale.h>
#include "mbctype.h"
+extern char *__locale_charset ();
+
/* for some conversions, we use the __count field as a place to store a state value */
#define __state __count
--
Corinna Vinschen
Cygwin Project Co-Leader
Red Hat
More information about the Newlib
mailing list