]> sourceware.org Git - newlib-cygwin.git/commitdiff
* libc/strptime.cc: Implement support for era, alt_digits and POSIX
authorCorinna Vinschen <corinna@vinschen.de>
Fri, 26 Feb 2010 17:53:52 +0000 (17:53 +0000)
committerCorinna Vinschen <corinna@vinschen.de>
Fri, 26 Feb 2010 17:53:52 +0000 (17:53 +0000)
padding and width modifiers.
(era_info_t): New type.
(free_era_info): New static function to free era_info_t storage.
(get_era_info): New static function to create era_info_t storage
from LC_TIME era information.
(alt_digits_t): New type.
(get_alt_digits): New static function to create alt_digits_t storage
from LC_TIME alt_digits information.
(free_alt_digits): New static function to free alt_digits_t storage.
(find_alt_digits): New static function to scan input for alternative
digits and return them, if any.  Return NULL otherwise.
(__strptime): New static function taking all code from strptime.
Implement handling for E, O, +, 0, and width modifiers per POSIX-1.2008.
(strptime): Convert into wrapper function to provide era_info and
alt_digits pointers and call __strptime.
(conv_num): Take additional alt_digits_t parameter and if it's not
NULL, call find_alt_digits to convert.

winsup/cygwin/ChangeLog
winsup/cygwin/libc/strptime.cc

index 8d9ab914c31eec0884b6efa1f769c2932be0dce8..af83c3f955a8dcf3b281a3c37b05da1f3d41d9ed 100644 (file)
@@ -1,3 +1,24 @@
+2010-02-26  Corinna Vinschen  <corinna@vinschen.de>
+
+       * libc/strptime.cc: Implement support for era, alt_digits and POSIX
+       padding and width modifiers.
+       (era_info_t): New type.
+       (free_era_info): New static function to free era_info_t storage.
+       (get_era_info): New static function to create era_info_t storage
+       from LC_TIME era information.
+       (alt_digits_t): New type.
+       (get_alt_digits): New static function to create alt_digits_t storage
+       from LC_TIME alt_digits information.
+       (free_alt_digits): New static function to free alt_digits_t storage.
+       (find_alt_digits): New static function to scan input for alternative
+       digits and return them, if any.  Return NULL otherwise.
+       (__strptime): New static function taking all code from strptime.
+       Implement handling for E, O, +, 0, and width modifiers per POSIX-1.2008.
+       (strptime): Convert into wrapper function to provide era_info and
+       alt_digits pointers and call __strptime.
+       (conv_num): Take additional alt_digits_t parameter and if it's not
+       NULL, call find_alt_digits to convert.
+
 2010-02-26  Corinna Vinschen  <corinna@vinschen.de>
 
        * strsig.cc: Fix formatting and copyright.
index e96de5917334d012dde4ac9258983359fef2fb55..08d4c5f564dfafba7fc0a9ed65cd77e3a042f0e5 100644 (file)
@@ -41,6 +41,7 @@ __RCSID("$NetBSD: strptime.c,v 1.28 2008/04/28 20:23:01 martin Exp $");
 #include <sys/localedef.h>
 #endif
 #include <ctype.h>
+#include <stdlib.h>
 #include <locale.h>
 #include <string.h>
 #include <time.h>
@@ -63,24 +64,240 @@ __weak_alias(strptime,_strptime)
 
 static const char gmt[4] = { "GMT" };
 
-static const u_char *conv_num(const unsigned char *, int *, uint, uint);
+typedef struct _era_info_t {
+  size_t num;          /* Only in first entry: Number of entries,
+                          1 otherwise. */
+  int dir;             /* Direction */
+  long offset;         /* Number of year closest to start_date in the era. */
+  struct tm start;     /* Start date of era */
+  struct tm end;       /* End date of era */
+  CHAR *era_C;         /* Era string */
+  CHAR *era_Y;         /* Replacement for %EY */
+} era_info_t;
+
+static void
+free_era_info (era_info_t *era_info)
+{
+  size_t num = era_info->num;
+
+  for (size_t i = 0; i < num; ++i)
+    {
+      free (era_info[i].era_C);
+      free (era_info[i].era_Y);
+    }
+  free (era_info);
+}
+
+static era_info_t *
+get_era_info (const char *era)
+{
+  char *c;
+  era_info_t *ei = NULL;
+  size_t num = 0, cur = 0, len;
+
+  while (*era)
+    {
+      ++num;
+      era_info_t *tmp = (era_info_t *) realloc (ei, num * sizeof (era_info_t));
+      if (!tmp)
+       {
+         ei->num = cur;
+         free_era_info (ei);
+         return NULL;
+       }
+      ei = tmp;
+      ei[cur].num = 1;
+      ei[cur].dir = (*era == '+') ? 1 : -1;
+      era += 2;
+      ei[cur].offset = strtol (era, &c, 10);
+      era = c + 1;
+      ei[cur].start.tm_year = strtol (era, &c, 10);
+      /* Adjust offset for negative gregorian dates. */
+      if (ei[cur].start.tm_year < 0)
+       ++ei[cur].start.tm_year;
+      ei[cur].start.tm_mon = strtol (c + 1, &c, 10);
+      ei[cur].start.tm_mday = strtol (c + 1, &c, 10);
+      ei[cur].start.tm_hour = ei[cur].start.tm_min = ei[cur].start.tm_sec = 0;
+      era = c + 1;
+      if (era[0] == '-' && era[1] == '*')
+       {
+         ei[cur].end = ei[cur].start;
+         ei[cur].start.tm_year = INT_MIN;
+         ei[cur].start.tm_mon = ei[cur].start.tm_mday = ei[cur].start.tm_hour
+         = ei[cur].start.tm_min = ei[cur].start.tm_sec = 0;
+         era += 3;
+       }
+      else if (era[0] == '+' && era[1] == '*')
+       {
+         ei[cur].end.tm_year = INT_MAX;
+         ei[cur].end.tm_mon = 12;
+         ei[cur].end.tm_mday = 31;
+         ei[cur].end.tm_hour = 23;
+         ei[cur].end.tm_min = ei[cur].end.tm_sec = 59;
+         era += 3;
+       }
+      else
+       {
+         ei[cur].end.tm_year = strtol (era, &c, 10);
+         /* Adjust offset for negative gregorian dates. */
+         if (ei[cur].end.tm_year < 0)
+           ++ei[cur].end.tm_year;
+         ei[cur].end.tm_mon = strtol (c + 1, &c, 10);
+         ei[cur].end.tm_mday = strtol (c + 1, &c, 10);
+         ei[cur].end.tm_mday = 31;
+         ei[cur].end.tm_hour = 23;
+         ei[cur].end.tm_min = ei[cur].end.tm_sec = 59;
+         era = c + 1;
+       }
+      /* era_C */
+      c = strchr (era, ':');
+      len = c - era;
+      ei[cur].era_C = (CHAR *) malloc ((len + 1) * sizeof (CHAR));
+      if (!ei[cur].era_C)
+       {
+         ei->num = cur;
+         free_era_info (ei);
+         return NULL;
+       }
+      strncpy (ei[cur].era_C, era, len);
+      era += len;
+      ei[cur].era_C[len] = '\0';
+      /* era_Y */
+      ++era;
+      c = strchr (era, ';');
+      if (!c)
+       c = strchr (era, '\0');
+      len = c - era;
+      ei[cur].era_Y = (CHAR *) malloc ((len + 1) * sizeof (CHAR));
+      if (!ei[cur].era_Y)
+       {
+         free (ei[cur].era_C);
+         ei->num = cur;
+         free_era_info (ei);
+         return NULL;
+       }
+      strncpy (ei[cur].era_Y, era, len);
+      era += len;
+      ei[cur].era_Y[len] = '\0';
+      ++cur;
+      if (*c)
+       era = c + 1;
+    }
+  ei->num = num;
+  return ei;
+}
+
+typedef struct _alt_digits_t {
+  size_t num;
+  char **digit;
+  char *buffer;
+} alt_digits_t;
+
+static alt_digits_t *
+get_alt_digits (const char *alt_digits)
+{
+  alt_digits_t *adi;
+  const char *a, *e;
+  char *aa, *ae;
+  size_t len;
+
+  adi = (alt_digits_t *) calloc (1, sizeof (alt_digits_t));
+  if (!adi)
+    return NULL;
+
+  /* Compute number of alt_digits. */
+  adi->num = 1;
+  for (a = alt_digits; (e = strchr (a, ';')) != NULL; a = e + 1)
+      ++adi->num;
+  /* Allocate the `digit' array, which is an array of `num' pointers into
+     `buffer'. */
+  adi->digit = (CHAR **) calloc (adi->num, sizeof (CHAR **));
+  if (!adi->digit)
+    {
+      free (adi);
+      return NULL;
+    }
+  /* Compute memory required for `buffer'. */
+  len = strlen (alt_digits);
+  /* Allocate it. */
+  adi->buffer = (CHAR *) malloc ((len + 1) * sizeof (CHAR));
+  if (!adi->buffer)
+    {
+      free (adi->digit);
+      free (adi);
+      return NULL;
+    }
+  /* Store digits in it. */
+  strcpy (adi->buffer, alt_digits);
+  /* Store the pointers into `buffer' into the appropriate `digit' slot. */
+  for (len = 0, aa = adi->buffer; (ae = strchr (aa, ';')) != NULL;
+       ++len, aa = ae + 1)
+    {
+      *ae = '\0';
+      adi->digit[len] = aa;
+    }
+  adi->digit[len] = aa;
+  return adi;
+}
+
+static void
+free_alt_digits (alt_digits_t *adi)
+{
+  free (adi->digit);
+  free (adi->buffer);
+  free (adi);
+}
+
+static const unsigned char *
+find_alt_digits (const unsigned char *bp, alt_digits_t *adi, uint *pval)
+{
+  /* This is rather error-prone, but the entire idea of alt_digits
+     isn't thought out well.  If you start to look for matches at the
+     start, there's a high probability that you find short matches but
+     the entire translation is wrong.  So we scan the alt_digits array
+     from the highest to the lowest digits instead, hoping that it's
+     more likely to catch digits consisting of multiple characters. */
+  for (int i = (int) adi->num - 1; i >= 0; --i)
+    {
+      size_t len = strlen (adi->digit[i]);
+      if (!strncmp ((const char *) bp, adi->digit[i], len))
+       {
+         *pval = i;
+         return bp + len;
+       }
+    }
+  return NULL;
+}
+
+/* This simplifies the calls to conv_num enormously. */
+#define ALT_DIGITS     ((alt_format & ALT_O) ? *alt_digits : NULL)
+
+static const u_char *conv_num(const unsigned char *, int *, uint, uint,
+                             alt_digits_t *);
 static const u_char *find_string(const u_char *, int *, const char * const *,
        const char * const *, int);
 
-
-char *
-strptime(const char *buf, const char *fmt, struct tm *tm)
+static char *
+__strptime(const char *buf, const char *fmt, struct tm *tm,
+          era_info_t **era_info, alt_digits_t **alt_digits)
 {
        unsigned char c;
        const unsigned char *bp;
        int alt_format, i, split_year = 0;
+       era_info_t *era = NULL;
+       int era_offset, got_eoff = 0;
+       int saw_padding;
+       unsigned long width;
        const char *new_fmt;
+       uint ulim;
 
        bp = (const u_char *)buf;
        struct lc_time_T *_CurrentTimeLocale = __get_current_time_locale ();
 
        while (bp != NULL && (c = *fmt++) != '\0') {
                /* Clear `alternate' modifier prior to new conversion. */
+               saw_padding = 0;
+               width = 0;
                alt_format = 0;
                i = 0;
 
@@ -110,18 +327,43 @@ literal:
                case 'E':       /* "%E?" alternative conversion modifier. */
                        LEGAL_ALT(0);
                        alt_format |= ALT_E;
+                       if (!*era_info && *_CurrentTimeLocale->era)
+                         *era_info = get_era_info (_CurrentTimeLocale->era);
                        goto again;
 
                case 'O':       /* "%O?" alternative conversion modifier. */
                        LEGAL_ALT(0);
                        alt_format |= ALT_O;
+                       if (!*alt_digits && *_CurrentTimeLocale->alt_digits)
+                         *alt_digits =
+                             get_alt_digits (_CurrentTimeLocale->alt_digits);
                        goto again;
-
+               case '0':
+               case '+':
+                       LEGAL_ALT(0);
+                       if (saw_padding)
+                         return NULL;
+                       saw_padding = 1;
+                       goto again;
+               case '1': case '2': case '3': case '4': case '5':
+               case '6': case '7': case '8': case '9':
+                       /* POSIX-1.2008 maximum field width.  Per POSIX,
+                          the width is only defined for the 'C', 'F', and 'Y'
+                          conversion specifiers. */
+                       LEGAL_ALT(0);
+                       {
+                         char *end;
+                         width = strtoul (fmt - 1, &end, 10);
+                         fmt = (const char *) end;
+                         goto again;
+                       }
                /*
                 * "Complex" conversion rules, implemented through recursion.
                 */
                case 'c':       /* Date and time, using the locale's format. */
-                       new_fmt = _ctloc(c_fmt);
+                       new_fmt = (alt_format & ALT_E)
+                                 ? _ctloc (era_d_t_fmt) : _ctloc(c_fmt);
+                       LEGAL_ALT(ALT_E);
                        goto recurse;
 
                case 'D':       /* The date as "%m/%d/%y". */
@@ -130,9 +372,15 @@ literal:
                        goto recurse;
 
                case 'F':       /* The date as "%Y-%m-%d". */
-                       new_fmt = "%Y-%m-%d";
-                       LEGAL_ALT(0);
-                       goto recurse;
+                       {
+                         LEGAL_ALT(0);
+                         char *tmp = __strptime ((const char *) bp, "%Y-%m-%d",
+                                                 tm, era_info, alt_digits);
+                         if (tmp && (uint) (tmp - (char *) bp) > width)
+                           return NULL;
+                         bp = (const unsigned char *) tmp;
+                         continue;
+                       }
 
                case 'R':       /* The time as "%H:%M". */
                        new_fmt = "%H:%M";
@@ -150,15 +398,19 @@ literal:
                        goto recurse;
 
                case 'X':       /* The time, using the locale's format. */
-                       new_fmt =_ctloc(X_fmt);
+                       new_fmt = (alt_format & ALT_E)
+                                 ? _ctloc (era_t_fmt) : _ctloc(X_fmt);
+                       LEGAL_ALT(ALT_E);
                        goto recurse;
 
                case 'x':       /* The date, using the locale's format. */
-                       new_fmt =_ctloc(x_fmt);
-                   recurse:
-                       bp = (const u_char *)strptime((const char *)bp,
-                                                           new_fmt, tm);
+                       new_fmt = (alt_format & ALT_E)
+                                 ? _ctloc (era_d_fmt) : _ctloc(x_fmt);
                        LEGAL_ALT(ALT_E);
+                   recurse:
+                       bp = (const u_char *)__strptime((const char *)bp,
+                                                       new_fmt, tm,
+                                                       era_info, alt_digits);
                        continue;
 
                /*
@@ -180,58 +432,83 @@ literal:
                        continue;
 
                case 'C':       /* The century number. */
+                       LEGAL_ALT(ALT_E);
+                       if ((alt_format & ALT_E) && *era_info)
+                         {
+                           /* With E modifier, an era.  We potentially
+                              don't know the era offset yet, so we have to
+                              store the value in a local variable.
+                              The final computation of tm_year is only done
+                              right before this function returns. */
+                           size_t num = (*era_info)->num;
+                           for (size_t i = 0; i < num; ++i)
+                             if (!strncmp ((const char *) bp,
+                                           (*era_info)[i].era_C,
+                                           strlen ((*era_info)[i].era_C)))
+                               {
+                                 era = (*era_info) + i;
+                                 bp += strlen (era->era_C);
+                                 break;
+                               }
+                           if (!era)
+                             return NULL;
+                           continue;
+                         }
                        i = 20;
-                       bp = conv_num(bp, &i, 0, 99);
+                       for (ulim = 99; width && width < 2; ++width)
+                         ulim /= 10;
+                       bp = conv_num(bp, &i, 0, ulim, NULL);
 
                        i = i * 100 - TM_YEAR_BASE;
                        if (split_year)
                                i += tm->tm_year % 100;
                        split_year = 1;
                        tm->tm_year = i;
-                       LEGAL_ALT(ALT_E);
+                       era = NULL;
+                       got_eoff = 0;
                        continue;
 
                case 'd':       /* The day of month. */
                case 'e':
-                       bp = conv_num(bp, &tm->tm_mday, 1, 31);
                        LEGAL_ALT(ALT_O);
+                       bp = conv_num(bp, &tm->tm_mday, 1, 31, ALT_DIGITS);
                        continue;
 
                case 'k':       /* The hour (24-hour clock representation). */
                        LEGAL_ALT(0);
                        /* FALLTHROUGH */
                case 'H':
-                       bp = conv_num(bp, &tm->tm_hour, 0, 23);
                        LEGAL_ALT(ALT_O);
+                       bp = conv_num(bp, &tm->tm_hour, 0, 23, ALT_DIGITS);
                        continue;
 
                case 'l':       /* The hour (12-hour clock representation). */
                        LEGAL_ALT(0);
                        /* FALLTHROUGH */
                case 'I':
-                       bp = conv_num(bp, &tm->tm_hour, 1, 12);
+                       LEGAL_ALT(ALT_O);
+                       bp = conv_num(bp, &tm->tm_hour, 1, 12, ALT_DIGITS);
                        if (tm->tm_hour == 12)
                                tm->tm_hour = 0;
-                       LEGAL_ALT(ALT_O);
                        continue;
 
                case 'j':       /* The day of year. */
                        i = 1;
-                       bp = conv_num(bp, &i, 1, 366);
+                       bp = conv_num(bp, &i, 1, 366, NULL);
                        tm->tm_yday = i - 1;
                        LEGAL_ALT(0);
                        continue;
 
                case 'M':       /* The minute. */
-                       bp = conv_num(bp, &tm->tm_min, 0, 59);
                        LEGAL_ALT(ALT_O);
+                       bp = conv_num(bp, &tm->tm_min, 0, 59, ALT_DIGITS);
                        continue;
 
                case 'm':       /* The month. */
+                       LEGAL_ALT(ALT_O);
                        i = 1;
-                       bp = conv_num(bp, &i, 1, 12);
+                       bp = conv_num(bp, &i, 1, 12, ALT_DIGITS);
                        tm->tm_mon = i - 1;
-                       LEGAL_ALT(ALT_O);
                        continue;
 
                case 'p':       /* The locale's equivalent of AM/PM. */
@@ -243,8 +520,8 @@ literal:
                        continue;
 
                case 'S':       /* The seconds. */
-                       bp = conv_num(bp, &tm->tm_sec, 0, 61);
                        LEGAL_ALT(ALT_O);
+                       bp = conv_num(bp, &tm->tm_sec, 0, 61, ALT_DIGITS);
                        continue;
 
                case 'U':       /* The week of year, beginning on sunday. */
@@ -255,28 +532,67 @@ literal:
                         * point to calculate a real value, so just check the
                         * range for now.
                         */
-                        bp = conv_num(bp, &i, 0, 53);
                         LEGAL_ALT(ALT_O);
+                        bp = conv_num(bp, &i, 0, 53, ALT_DIGITS);
                         continue;
 
                case 'w':       /* The day of week, beginning on sunday. */
-                       bp = conv_num(bp, &tm->tm_wday, 0, 6);
                        LEGAL_ALT(ALT_O);
+                       bp = conv_num(bp, &tm->tm_wday, 0, 6, ALT_DIGITS);
                        continue;
 
                case 'Y':       /* The year. */
+                       LEGAL_ALT(ALT_E);
+                       if ((alt_format & ALT_E) && *era_info)
+                         {
+                           bool gotit = false;
+                           size_t num = (*era_info)->num;
+                           (*era_info)->num = 1;
+                           for (size_t i = 0; i < num; ++i)
+                             {
+                               era_info_t *tmp_ei = (*era_info) + i;
+                               char *tmp = __strptime ((const char *) bp,
+                                                       tmp_ei->era_Y,
+                                                       tm, &tmp_ei,
+                                                       alt_digits);
+                               if (tmp)
+                                 {
+                                   bp = (const unsigned char *) tmp;
+                                   gotit = true;
+                                   break;
+                                 }
+                             }
+                           (*era_info)->num = num;
+                           if (gotit)
+                             continue;
+                           return NULL;
+                         }
                        i = TM_YEAR_BASE;       /* just for data sanity... */
-                       bp = conv_num(bp, &i, 0, 9999);
+                       for (ulim = 9999; width && width < 4; ++width)
+                         ulim /= 10;
+                       bp = conv_num(bp, &i, 0, ulim, NULL);
                        tm->tm_year = i - TM_YEAR_BASE;
-                       LEGAL_ALT(ALT_E);
+                       era = NULL;
+                       got_eoff = 0;
                        continue;
 
                case 'y':       /* The year within 100 years of the epoch. */
                        /* LEGAL_ALT(ALT_E | ALT_O); */
-                       bp = conv_num(bp, &i, 0, 99);
-
-                       if (split_year)
-                               /* preserve century */
+                       if ((alt_format & ALT_E) && *era_info)
+                         {
+                           /* With E modifier, the offset to the start date
+                              of the era specified with %EC.  We potentially
+                              don't know the era yet, so we have to store the
+                              value in a local variable, just like era itself.
+                              The final computation of tm_year is only done
+                              right before this function returns. */
+                           bp = conv_num(bp, &era_offset, 0, UINT_MAX, NULL);
+                           got_eoff = 1;
+                           continue;
+                         }
+                       bp = conv_num(bp, &i, 0, 99, ALT_DIGITS);
+
+                       if (split_year) /* preserve century */
                                i += (tm->tm_year / 100) * 100;
                        else {
                                split_year = 1;
@@ -286,6 +602,8 @@ literal:
                                        i = i + 1900 - TM_YEAR_BASE;
                        }
                        tm->tm_year = i;
+                       era = NULL;
+                       got_eoff = 0;
                        continue;
 
                case 'Z':
@@ -334,29 +652,66 @@ literal:
                }
        }
 
+       if (bp && (era || got_eoff))
+         {
+           /* Default to current era. */
+           if (!era)
+             era = *era_info;
+           /* Default to first year of era if offset is missing */
+           if (!got_eoff)
+             era_offset = era->offset;
+           tm->tm_year = (era->start.tm_year != INT_MIN
+                          ? era->start.tm_year : era->end.tm_year)
+                          + (era_offset - era->offset) * era->dir;
+           /* Check if year falls into the era.  If not, it's an
+              invalid combination of era and offset. */
+           if (era->start.tm_year > tm->tm_year
+               || era->end.tm_year < tm->tm_year)
+             return NULL;
+           tm->tm_year -= TM_YEAR_BASE;
+         }
+
        return (char *) bp;
 }
 
+char *
+strptime (const char *buf, const char *fmt, struct tm *tm)
+{
+  era_info_t *era_info = NULL;
+  alt_digits_t *alt_digits = NULL;
+  char *ret = __strptime (buf, fmt, tm, &era_info, &alt_digits);
+  if (era_info)
+    free_era_info (era_info);
+  if (alt_digits)
+    free_alt_digits (alt_digits);
+  return ret;
+}
 
 static const u_char *
-conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim)
+conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim,
+        alt_digits_t *alt_digits)
 {
        uint result = 0;
        unsigned char ch;
 
-       /* The limit also determines the number of valid digits. */
-       uint rulim = ulim;
-
-       ch = *buf;
-       if (ch < '0' || ch > '9')
-               return NULL;
-
-       do {
-               result *= 10;
-               result += ch - '0';
-               rulim /= 10;
-               ch = *++buf;
-       } while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9');
+       if (alt_digits)
+         buf = find_alt_digits (buf, alt_digits, &result);
+       else
+         {
+           /* The limit also determines the number of valid digits. */
+           uint rulim = ulim;
+
+           ch = *buf;
+           if (ch < '0' || ch > '9')
+                   return NULL;
+
+           do {
+                   result *= 10;
+                   result += ch - '0';
+                   rulim /= 10;
+                   ch = *++buf;
+           } while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9');
+         }
 
        if (result < llim || result > ulim)
                return NULL;
This page took 0.059458 seconds and 5 git commands to generate.