This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[ping2][PATCH 1/2 v1.1][BZ #14547] Fix CVE-2012-4412
- From: Siddhesh Poyarekar <siddhesh at redhat dot com>
- To: libc-alpha at sourceware dot org
- Date: Mon, 2 Sep 2013 16:35:11 +0530
- Subject: [ping2][PATCH 1/2 v1.1][BZ #14547] Fix CVE-2012-4412
- Authentication-results: sourceware.org; auth=none
- References: <20130630163955 dot GE2654 at spoyarek dot pnq dot redhat dot com> <20130821150912 dot GA15273 at spoyarek dot pnq dot redhat dot com> <20130826100620 dot GF2011 at spoyarek dot pnq dot redhat dot com>
Ping!
On Mon, Aug 26, 2013 at 03:36:20PM +0530, Siddhesh Poyarekar wrote:
> Ping!
>
> On Wed, Aug 21, 2013 at 08:39:12PM +0530, Siddhesh Poyarekar wrote:
> > Hi,
> >
> > Here's an updated patch with some small changes based on some bugs I
> > found in my patch. Patch 2/2 will have the test case to verify that
> > this does not regress. It runs for 5 minutes and causes a lot of pain
> > to the build system, so I've kept it as an xtests like Andreas Schwab
> > suggested.
> >
> > No regressions in the testsuite (with check and xcheck) as tested on
> > x86_64 and x86.
> >
> > Siddhesh
> >
> > [BZ #14547]
> > * string/strcoll_l.c (coll_seq): New members rule, idx,
> > save_idx and back_us.
> > (get_next_seq_nocache): New function.
> > (do_compare_nocache): New function.
> > (STRCOLL): Use get_next_seq_nocache and do_compare_nocache
> > when malloc fails.
> >
> > diff --git a/string/strcoll_l.c b/string/strcoll_l.c
> > index 50ed84d..eb042ff 100644
> > --- a/string/strcoll_l.c
> > +++ b/string/strcoll_l.c
> > @@ -45,7 +45,7 @@
> > typedef struct
> > {
> > int len; /* Length of the current sequence. */
> > - int val; /* Position of the sequence relative to the
> > + size_t val; /* Position of the sequence relative to the
> > previous non-ignored sequence. */
> > size_t idxnow; /* Current index in sequences. */
> > size_t idxmax; /* Maximum index in sequences. */
> > @@ -55,6 +55,12 @@ typedef struct
> > const USTRING_TYPE *us; /* The string. */
> > int32_t *idxarr; /* Array to cache weight indices. */
> > unsigned char *rulearr; /* Array to cache rules. */
> > + unsigned char rule; /* Saved rule for the first sequence. */
> > + int32_t idx; /* Index to weight of the current sequence. */
> > + int32_t save_idx; /* Save looked up index of a forward
> > + sequence after the last backward
> > + sequence. */
> > + const USTRING_TYPE *back_us; /* Beginning of the backward sequence. */
> > } coll_seq;
> >
> > /* Get next sequence. The weight indices are cached, so we don't need to
> > @@ -64,7 +70,7 @@ get_next_seq_cached (coll_seq *seq, int nrules, int pass,
> > const unsigned char *rulesets,
> > const USTRING_TYPE *weights)
> > {
> > - int val = seq->val = 0;
> > + size_t val = seq->val = 0;
> > int len = seq->len;
> > size_t backw_stop = seq->backw_stop;
> > size_t backw = seq->backw;
> > @@ -146,7 +152,7 @@ get_next_seq (coll_seq *seq, int nrules, const unsigned char *rulesets,
> > const USTRING_TYPE *extra, const int32_t *indirect)
> > {
> > #include WEIGHT_H
> > - int val = seq->val = 0;
> > + size_t val = seq->val = 0;
> > int len = seq->len;
> > size_t backw_stop = seq->backw_stop;
> > size_t backw = seq->backw;
> > @@ -162,7 +168,7 @@ get_next_seq (coll_seq *seq, int nrules, const unsigned char *rulesets,
> > ++val;
> > if (backw_stop != ~0ul)
> > {
> > - /* The is something pushed. */
> > + /* There is something pushed. */
> > if (backw == backw_stop)
> > {
> > /* The last pushed character was handled. Continue
> > @@ -227,15 +233,199 @@ get_next_seq (coll_seq *seq, int nrules, const unsigned char *rulesets,
> > seq->us = us;
> > }
> >
> > -/* Compare two sequences. */
> > +/* Get next sequence. Traverse the string as required. This function does not
> > + set or use any index or rule cache. */
> > +static void
> > +get_next_seq_nocache (coll_seq *seq, int nrules, const unsigned char *rulesets,
> > + const USTRING_TYPE *weights, const int32_t *table,
> > + const USTRING_TYPE *extra, const int32_t *indirect,
> > + int pass)
> > +{
> > +#include WEIGHT_H
> > + size_t val = seq->val = 0;
> > + int len = seq->len;
> > + size_t backw_stop = seq->backw_stop;
> > + size_t backw = seq->backw;
> > + size_t idxcnt = seq->idxcnt;
> > + size_t idxmax = seq->idxmax;
> > + int32_t idx = seq->idx;
> > + const USTRING_TYPE *us = seq->us;
> > +
> > + while (len == 0)
> > + {
> > + ++val;
> > + if (backw_stop != ~0ul)
> > + {
> > + /* There is something pushed. */
> > + if (backw == backw_stop)
> > + {
> > + /* The last pushed character was handled. Continue
> > + with forward characters. */
> > + if (idxcnt < idxmax)
> > + {
> > + idx = seq->save_idx;
> > + backw_stop = ~0ul;
> > + }
> > + else
> > + {
> > + /* Nothing anymore. The backward sequence ended with
> > + the last sequence in the string. Note that len is
> > + still zero. */
> > + idx = 0;
> > + break;
> > + }
> > + }
> > + else
> > + {
> > + /* XXX Traverse BACKW sequences from the beginning of
> > + BACKW_STOP to get the next sequence. Is ther a quicker way
> > + to do this? */
> > + size_t i = backw_stop;
> > + us = seq->back_us;
> > + while (i < backw)
> > + {
> > + int32_t tmp = findidx (&us, -1);
> > + idx = tmp & 0xffffff;
> > + i++;
> > + }
> > + --backw;
> > + us = seq->us;
> > + }
> > + }
> > + else
> > + {
> > + backw_stop = idxmax;
> > + int32_t prev_idx = idx;
> > +
> > + while (*us != L('\0'))
> > + {
> > + int32_t tmp = findidx (&us, -1);
> > + unsigned char rule = tmp >> 24;
> > + prev_idx = idx;
> > + idx = tmp & 0xffffff;
> > + idxcnt = idxmax++;
> > +
> > + /* Save the rule for the first sequence. */
> > + if (__glibc_unlikely (idxcnt == 0))
> > + seq->rule = rule;
> > +
> > + if ((rulesets[rule * nrules + pass]
> > + & sort_backward) == 0)
> > + /* No more backward characters to push. */
> > + break;
> > + ++idxcnt;
> > + }
> > +
> > + if (backw_stop >= idxcnt)
> > + {
> > + /* No sequence at all or just one. */
> > + if (idxcnt == idxmax || backw_stop > idxcnt)
> > + /* Note that len is still zero. */
> > + break;
> > +
> > + backw_stop = ~0ul;
> > + }
> > + else
> > + {
> > + /* We pushed backward sequences. If the stream ended with the
> > + backward sequence, then we process the last sequence we
> > + found. Otherwise we process the sequence before the last
> > + one since the last one was a forward sequence. */
> > + seq->back_us = seq->us;
> > + seq->us = us;
> > + backw = idxcnt;
> > + if (idxmax > idxcnt)
> > + {
> > + backw--;
> > + seq->save_idx = idx;
> > + idx = prev_idx;
> > + }
> > + if (backw > backw_stop)
> > + backw--;
> > + }
> > + }
> > +
> > + len = weights[idx++];
> > + /* Skip over indices of previous levels. */
> > + for (int i = 0; i < pass; i++)
> > + {
> > + idx += len;
> > + len = weights[idx];
> > + idx++;
> > + }
> > + }
> > +
> > + /* Update the structure. */
> > + seq->val = val;
> > + seq->len = len;
> > + seq->backw_stop = backw_stop;
> > + seq->backw = backw;
> > + seq->idxcnt = idxcnt;
> > + seq->idxmax = idxmax;
> > + seq->us = us;
> > + seq->idx = idx;
> > +}
> > +
> > +/* Compare two sequences. This version does not use the index and rules
> > + cache. */
> > +static int
> > +do_compare_nocache (coll_seq *seq1, coll_seq *seq2, int position,
> > + const USTRING_TYPE *weights)
> > +{
> > + int seq1len = seq1->len;
> > + int seq2len = seq2->len;
> > + size_t val1 = seq1->val;
> > + size_t val2 = seq2->val;
> > + int idx1 = seq1->idx;
> > + int idx2 = seq2->idx;
> > + int result = 0;
> > +
> > + /* Test for position if necessary. */
> > + if (position && val1 != val2)
> > + {
> > + result = val1 > val2 ? 1 : -1;
> > + goto out;
> > + }
> > +
> > + /* Compare the two sequences. */
> > + do
> > + {
> > + if (weights[idx1] != weights[idx2])
> > + {
> > + /* The sequences differ. */
> > + result = weights[idx1] - weights[idx2];
> > + goto out;
> > + }
> > +
> > + /* Increment the offsets. */
> > + ++idx1;
> > + ++idx2;
> > +
> > + --seq1len;
> > + --seq2len;
> > + }
> > + while (seq1len > 0 && seq2len > 0);
> > +
> > + if (position && seq1len != seq2len)
> > + result = seq1len - seq2len;
> > +
> > +out:
> > + seq1->len = seq1len;
> > + seq2->len = seq2len;
> > + seq1->idx = idx1;
> > + seq2->idx = idx2;
> > + return result;
> > +}
> > +
> > +/* Compare two sequences using the index cache. */
> > static int
> > do_compare (coll_seq *seq1, coll_seq *seq2, int position,
> > const USTRING_TYPE *weights)
> > {
> > int seq1len = seq1->len;
> > int seq2len = seq2->len;
> > - int val1 = seq1->val;
> > - int val2 = seq2->val;
> > + size_t val1 = seq1->val;
> > + size_t val2 = seq2->val;
> > int32_t *idx1arr = seq1->idxarr;
> > int32_t *idx2arr = seq2->idxarr;
> > int idx1now = seq1->idxnow;
> > @@ -245,7 +435,7 @@ do_compare (coll_seq *seq1, coll_seq *seq2, int position,
> > /* Test for position if necessary. */
> > if (position && val1 != val2)
> > {
> > - result = val1 - val2;
> > + result = val1 > val2 ? 1 : -1;
> > goto out;
> > }
> >
> > @@ -334,57 +524,62 @@ STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l)
> > memset (&seq1, 0, sizeof (seq1));
> > seq2 = seq1;
> >
> > - /* We need the elements of the strings as unsigned values since they
> > - are used as indices. */
> > - seq1.us = (const USTRING_TYPE *) s1;
> > - seq2.us = (const USTRING_TYPE *) s2;
> > -
> > if (! __libc_use_alloca ((s1len + s2len) * (sizeof (int32_t) + 1)))
> > {
> > seq1.idxarr = (int32_t *) malloc ((s1len + s2len) * (sizeof (int32_t) + 1));
> > - seq2.idxarr = &seq1.idxarr[s1len];
> > - seq1.rulearr = (unsigned char *) &seq2.idxarr[s2len];
> > - seq2.rulearr = &seq1.rulearr[s1len];
> > -
> > - if (seq1.idxarr == NULL)
> > - /* No memory. Well, go with the stack then.
> > -
> > - XXX Once this implementation is stable we will handle this
> > - differently. Instead of precomputing the indices we will
> > - do this in time. This means, though, that this happens for
> > - every pass again. */
> > - goto try_stack;
> > - use_malloc = true;
> > +
> > + /* If we failed to allocate memory, we leave everything as NULL so that
> > + we use the nocache version of traversal and comparison functions. */
> > + if (seq1.idxarr != NULL)
> > + {
> > + seq2.idxarr = &seq1.idxarr[s1len];
> > + seq1.rulearr = (unsigned char *) &seq2.idxarr[s2len];
> > + seq2.rulearr = &seq1.rulearr[s1len];
> > + use_malloc = true;
> > + }
> > }
> > else
> > {
> > - try_stack:
> > seq1.idxarr = (int32_t *) alloca (s1len * sizeof (int32_t));
> > seq2.idxarr = (int32_t *) alloca (s2len * sizeof (int32_t));
> > seq1.rulearr = (unsigned char *) alloca (s1len);
> > seq2.rulearr = (unsigned char *) alloca (s2len);
> > }
> >
> > - seq1.rulearr[0] = 0;
> > + int rule = 0;
> >
> > /* Cache values in the first pass and if needed, use them in subsequent
> > passes. */
> > for (int pass = 0; pass < nrules; ++pass)
> > {
> > seq1.idxcnt = 0;
> > + seq1.idx = 0;
> > + seq2.idx = 0;
> > seq1.backw_stop = ~0ul;
> > seq1.backw = ~0ul;
> > seq2.idxcnt = 0;
> > seq2.backw_stop = ~0ul;
> > seq2.backw = ~0ul;
> >
> > + /* We need the elements of the strings as unsigned values since they
> > + are used as indices. */
> > + seq1.us = (const USTRING_TYPE *) s1;
> > + seq2.us = (const USTRING_TYPE *) s2;
> > +
> > /* We assume that if a rule has defined `position' in one section
> > this is true for all of them. */
> > - int position = rulesets[seq1.rulearr[0] * nrules + pass] & sort_position;
> > + int position = rulesets[rule * nrules + pass] & sort_position;
> >
> > while (1)
> > {
> > - if (pass == 0)
> > + if (__glibc_unlikely (seq1.idxarr == NULL))
> > + {
> > + get_next_seq_nocache (&seq1, nrules, rulesets, weights, table,
> > + extra, indirect, pass);
> > + get_next_seq_nocache (&seq2, nrules, rulesets, weights, table,
> > + extra, indirect, pass);
> > + }
> > + else if (pass == 0)
> > {
> > get_next_seq (&seq1, nrules, rulesets, weights, table, extra,
> > indirect);
> > @@ -411,10 +606,18 @@ STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l)
> > goto free_and_return;
> > }
> >
> > - result = do_compare (&seq1, &seq2, position, weights);
> > + if (__glibc_unlikely (seq1.idxarr == NULL))
> > + result = do_compare_nocache (&seq1, &seq2, position, weights);
> > + else
> > + result = do_compare (&seq1, &seq2, position, weights);
> > if (result != 0)
> > goto free_and_return;
> > }
> > +
> > + if (__glibc_likely (seq1.rulearr != NULL))
> > + rule = seq1.rulearr[0];
> > + else
> > + rule = seq1.rule;
> > }
> >
> > /* Free the memory if needed. */