This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [ping][PATCH][BZ #14547] Clean up strcoll implementation


On 30/07/13 15:01, Siddhesh Poyarekar wrote:
> Ping!
> 
> On Sun, Jun 30, 2013 at 10:02:15PM +0530, Siddhesh Poyarekar wrote:
>> Hi,
>>
>> This patch is a preliminary cleanup before I fix the actual
>> vulnerability described in pr14547.  The aim of this patch is to split
>> up the one big strcoll function into smaller units to consolidate
>> common bits and make the logic generally easier to follow.
>>
>> The actual fix will follow soon in two parts, first that implements
>> non-caching versions of get_next_seq and do_compare if memory
>> allocation failed and the second that does a check for integer
>> overflow in request sizes for the cache arrays.
>>
>> I have verified that this does not cause regressions in the testsuite.
>> The coverage in the testsuite for strcoll and strxfrm seemed pretty
>> adequate to me.  This series fixes CVE-2012-4412, so I would like them
>> considered for inclusion into 2.18.
>>
>> Siddhesh

This looks fine to me.   Only found a few typos and missed conversions
in the comments.

>> 	[BZ #14547]
>> 	* string/strcoll_l.c (coll_seq): New structure.
>> 	(get_next_seq_cached): New function.
>> 	(get_next_seq): New function.
>> 	(do_compare): New function.
>> 	(STRCOLL): Use GNU style definition.  Simplify implementation
>> 	by using get_next_seq, get_next_seq_cached and do_copare.

Typo: do_compare

>>
>> diff --git a/string/strcoll_l.c b/string/strcoll_l.c
>> index ecda08f..1bb9e23 100644
>> --- a/string/strcoll_l.c
>> +++ b/string/strcoll_l.c
>> @@ -41,11 +41,244 @@
>>  
>>  #include "../locale/localeinfo.h"
>>  
>> +/* Track status while looking for sequences in a string.  */
>> +typedef struct
>> +{
>> +  int len;			/* Length of the current sequence.  */
>> +  int val;			/* Position of the sequence relative to the
>> +				   previous non-ignored sequence.  */
>> +  size_t idxnow;		/* Current index in sequences.  */
>> +  size_t idxmax;		/* Maximum index in sequences.  */
>> +  size_t idxcnt;		/* Current count of indeces.  */

Typo: indices   (repeated multiple times)

>> +  size_t backw;			/* Current Backward sequence index.  */
>> +  size_t backw_stop;		/* Index where the backward sequences stop.  */
>> +  const USTRING_TYPE *us;	/* The string.  */
>> +  int32_t *idxarr;		/* Array to cache weight indeces.  */
>> +  unsigned char *rulearr;	/* Array to cache rules.  */
>> +} coll_seq;
>> +
>> +/* Get next sequence.  The weight indeces are cached, so we don't need to
>> +   traverse the string.  */
>> +static void
>> +get_next_seq_cached (coll_seq *seq, int nrules, int pass,
>> +		     const unsigned char *rulesets,
>> +		     const USTRING_TYPE *weights)
>> +{
>> +  int val = seq->val = 0;
>> +  int len = seq->len;
>> +  size_t backw_stop = seq->backw_stop;
>> +  size_t backw = seq->backw;
>> +  size_t idxcnt = seq->idxcnt;
>> +  size_t idxmax = seq->idxmax;
>> +  size_t idxnow = seq->idxnow;
>> +  unsigned char *rulearr = seq->rulearr;
>> +  int32_t *idxarr = seq->idxarr;
>> +
>> +  while (len == 0)
>> +    {
>> +      ++val;
>> +      if (backw_stop != ~0ul)
>> +	{
>> +	  /* The is something pushed.  */

Typo:  There (?)

>> +	  if (backw == backw_stop)
>> +	    {
>> +	      /* The last pushed character was handled.  Continue
>> +		 with forward characters.  */
>> +	      if (idxcnt < idxmax)
>> +		{
>> +		  idxnow = idxcnt;
>> +		  backw_stop = ~0ul;
>> +		}
>> +	      else
>> +		{
>> +		  /* Nothing anymore.  The backward sequence

Typo:  any more

>> +		     ended with the last sequence in the string.  */
>> +		  idxnow = ~0ul;
>> +		  break;
>> +		}
>> +	    }
>> +	  else
>> +	    idxnow = --backw;
>> +	}
>> +      else
>> +	{
>> +	  backw_stop = idxcnt;
>> +
>> +	  while (idxcnt < idxmax)
>> +	    {
>> +	      if ((rulesets[rulearr[idxcnt] * nrules + pass]
>> +		   & sort_backward) == 0)
>> +		/* No more backward characters to push.  */
>> +		break;
>> +	      ++idxcnt;
>> +	    }
>> +
>> +	  if (backw_stop == idxcnt)
>> +	    {
>> +	      /* No sequence at all or just one.  */
>> +	      if (idxcnt == idxmax)
>> +		/* Note that seq1len is still zero.  */
>> +		break;
>> +
>> +	      backw_stop = ~0ul;
>> +	      idxnow = idxcnt++;
>> +	    }
>> +	  else
>> +	    /* We pushed backward sequences.  */
>> +	    idxnow = backw = idxcnt - 1;
>> +	}
>> +      len = weights[idxarr[idxnow]++];
>> +    }
>> +
>> +  /* Update the structure.  */
>> +  seq->val = val;
>> +  seq->len = len;
>> +  seq->backw_stop = backw_stop;
>> +  seq->backw = backw;
>> +  seq->idxcnt = idxcnt;
>> +  seq->idxnow = idxnow;
>> +}
>> +
>> +/* Get next sequence.  Traverse the string as required.  */
>> +static void
>> +get_next_seq (coll_seq *seq, int nrules, const unsigned char *rulesets,
>> +	      const USTRING_TYPE *weights, const int32_t *table,
>> +	      const USTRING_TYPE *extra, const int32_t *indirect)
>> +{
>> +#include WEIGHT_H
>> +  int val = seq->val = 0;
>> +  int len = seq->len;
>> +  size_t backw_stop = seq->backw_stop;
>> +  size_t backw = seq->backw;
>> +  size_t idxcnt = seq->idxcnt;
>> +  size_t idxmax = seq->idxmax;
>> +  size_t idxnow = seq->idxnow;
>> +  unsigned char *rulearr = seq->rulearr;
>> +  int32_t *idxarr = seq->idxarr;
>> +  const USTRING_TYPE *us = seq->us;
>> +
>> +  while (len == 0)
>> +    {
>> +      ++val;
>> +      if (backw_stop != ~0ul)
>> +	{
>> +	  /* The is something pushed.  */
>> +	  if (backw == backw_stop)
>> +	    {
>> +	      /* The last pushed character was handled.  Continue
>> +		 with forward characters.  */
>> +	      if (idxcnt < idxmax)
>> +		{
>> +		  idxnow = idxcnt;
>> +		  backw_stop = ~0ul;
>> +		}
>> +	      else

Are braces needed around this else given it is "multi-line" due to comments?

>> +		/* Nothing anymore.  The backward sequence ended with

Typo: any more

>> +		   the last sequence in the string.  Note that seq2len
>> +		   is still zero.  */

Comment needs updating - seq2len

>> +		break;
>> +	    }
>> +	  else
>> +	    idxnow = --backw;
>> +	}
>> +      else
>> +	{
>> +	  backw_stop = idxmax;
>> +
>> +	  while (*us != L('\0'))
>> +	    {
>> +	      int32_t tmp = findidx (&us, -1);
>> +	      rulearr[idxmax] = tmp >> 24;
>> +	      idxarr[idxmax] = tmp & 0xffffff;
>> +	      idxcnt = idxmax++;
>> +
>> +	      if ((rulesets[rulearr[idxcnt] * nrules]
>> +		   & sort_backward) == 0)
>> +		/* No more backward characters to push.  */
>> +		break;
>> +	      ++idxcnt;
>> +	    }
>> +
>> +	  if (backw_stop >= idxcnt)
>> +	    {
>> +	      /* No sequence at all or just one.  */
>> +	      if (idxcnt == idxmax || backw_stop > idxcnt)
>> +		/* Note that seq1len is still zero.  */

Comment update - seq1len

Allan


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]