This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] gconv: Replace norm_add_slashes with __gconv_norm_add_slashes



On 19/06/2017 13:17, Florian Weimer wrote:
> 2017-06-19  Florian Weimer  <fweimer@redhat.com>

> diff --git a/iconv/norm_add_slashes.c b/iconv/norm_add_slashes.c
> new file mode 100644
> index 0000000..f380524
> --- /dev/null
> +++ b/iconv/norm_add_slashes.c
> @@ -0,0 +1,54 @@
> +/* Normalize the charset name and add a suffix with slashes.
> +   Copyright (C) 1997-2017 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#include <gconv_int.h>
> +
> +char *
> +__gconv_norm_add_slashes (const char *name, size_t name_len,
> +                          const char *suffix)
> +{
> +  const char *name_end  = name + name_len;
> +  const char *cp = name;
> +  char *result;
> +  char *tmp;
> +  size_t cnt = 0;
> +  const size_t suffix_len = strlen (suffix);
> +
> +  while (cp != name_end)
> +    if (*cp++ == '/')
> +      ++cnt;
> +
> +  tmp = result = malloc (name_len + 3 + suffix_len);
> +  if (result == NULL)
> +    return NULL;
> +  cp = name;
> +  while (cp != name_end)
> +    *tmp++ = __toupper_l (*cp++, _nl_C_locobj_ptr);
> +  if (cnt < 2)
> +    {
> +      *tmp++ = '/';
> +      if (cnt < 1)
> +        {
> +          *tmp++ = '/';
> +          if (suffix_len != 0)
> +            tmp = __mempcpy (tmp, suffix, suffix_len);
> +        }
> +    }
> +  *tmp = '\0';
> +  return result;
> +}

I think this is a good candidate for char_array usage [1]:

#define CHAR_ARRAY_INITIAL_SIZE 0
#include <malloc/char_array-skeleton.c>

char *
__gconv_norm_add_slashes (const char *name, size_t name_len,
                          const char *suffix)
{ 
  size_t cnt = 0; 
  for (size_t i = 0; i < name_len; i++)
    if (name[i] == '/')
      cnt++;
  
  struct char_array result;
  if (!char_array_init_str_size (&result, name, name_len))
    return NULL;
  
  for (size_t i = 0; i < char_array_length (&result); i++)
    *char_array_at (&result, i) = __toupper_l (name[i], _nl_C_locobj_ptr);
  
  if (cnt < 2)
    { 
      if (!char_array_append_str (&result, "/"))
        return NULL;
      if (cnt < 1)
        { 
          if (!char_array_append_str (&result, "/")
              | !char_array_append_str (&result, suffix))
            return NULL;
        }
    }
  
  return char_array_finalize (&result, NULL);
}

It has the advantages of index check of dynarray plus the malloc overflow
check.

[1] https://sourceware.org/ml/libc-alpha/2017-06/msg00325.html


> diff --git a/intl/dcigettext.c b/intl/dcigettext.c
> index d97746c..49127d0 100644
> --- a/intl/dcigettext.c
> +++ b/intl/dcigettext.c
> @@ -1123,25 +1123,24 @@ _nl_find_msg (struct loaded_l10nfile *domain_file,
>  		    {
>  		      size_t len;
>  		      char *charset;
> -		      const char *outcharset;
> +		      char *outcharset;
>  
>  		      charsetstr += strlen ("charset=");
>  		      len = strcspn (charsetstr, " \t\n");
>  
> -		      charset = (char *) alloca (len + 1);
> -# if defined _LIBC || HAVE_MEMPCPY
> -		      *((char *) mempcpy (charset, charsetstr, len)) = '\0';
> -# else
> -		      memcpy (charset, charsetstr, len);
> -		      charset[len] = '\0';
> -# endif
> -
> -		      outcharset = encoding;
> -
>  # ifdef _LIBC
>  		      /* We always want to use transliteration.  */
> -		      outcharset = norm_add_slashes (outcharset, "TRANSLIT");
> -		      charset = norm_add_slashes (charset, "");
> +		      charset = __gconv_norm_add_slashes (charsetstr, len, "");
> +		      outcharset = __gconv_norm_add_slashes
> +			(encoding, strlen (encoding),  "TRANSLIT");
> +		      if (charset == NULL || outcharset == NULL)
> +			{
> +			  free ((char *) encoding);

No need to cast on free.

> +			  free (outcharset);
> +			  free (charset);
> +			  goto unlock_fail;
> +			}
> +
>  		      int r = __gconv_open (outcharset, charset, &convd->conv,
>  					    GCONV_AVOID_NOCONV);
>  		      if (__builtin_expect (r != __GCONV_OK, 0))
> @@ -1153,6 +1152,8 @@ _nl_find_msg (struct loaded_l10nfile *domain_file,
>  			    {
>  			      gl_rwlock_unlock (domain->conversions_lock);
>  			      free ((char *) encoding);
> +			      free (outcharset);
> +			      free (charset);
>  			      return NULL;
>  			    }
>  
> @@ -1165,27 +1166,31 @@ _nl_find_msg (struct loaded_l10nfile *domain_file,
>  #   if (((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2) \
>  	&& !defined __UCLIBC__) \
>         || _LIBICONV_VERSION >= 0x0105
> +		      charset = strndup (charsetstr, len);
>  		      if (strchr (outcharset, '/') == NULL)
>  			{
> -			  char *tmp;
> -
> -			  len = strlen (outcharset);
> -			  tmp = (char *) alloca (len + 10 + 1);
> -			  memcpy (tmp, outcharset, len);
> -			  memcpy (tmp + len, "//TRANSLIT", 10 + 1);
> -			  outcharset = tmp;
> -
> -			  convd->conv = iconv_open (outcharset, charset);
> -
> -			  freea (outcharset);
> +			  if (asprintf (&outcharset, "%s//TRANSLIT",
> +					encoding) < 0)
> +			    outcharset = NULL;
>  			}
>  		      else
> +			  outcharset = strdup (encoding);
> +		      if (charset == NULL || outcharset == NULL)
> +			{
> +			  gl_rwlock_unlock (domain->conversions_lock);
> +			  free (outcharset);
> +			  free (charset);
> +			  free ((char *) encoding);
> +			  return NULL;
> +			}
>  #   endif
> -			convd->conv = iconv_open (outcharset, charset);
> +		      convd->conv = iconv_open (outcharset, charset);
>  #  endif
>  # endif
> -
> -		      freea (charset);
> +		      free (outcharset);
> +		      free (charset);
> +		      /* Do not free encoding here because
> +                         convd->encoding takes ownership.  */
>  		    }
>  		}
>  	    }

In fact these seems another place where a char_array could find some use
to avoid all the boilerplate of managing buffers size to appendages and
buffer management (and we could get some speed up by using the stack as
well).

> diff --git a/wcsmbs/wcsmbsload.c b/wcsmbs/wcsmbsload.c
> index 656cc0a..cf7f815 100644
> --- a/wcsmbs/wcsmbsload.c
> +++ b/wcsmbs/wcsmbsload.c
> @@ -160,7 +160,6 @@ __wcsmbs_load_conv (struct __locale_data *new_category)
>      {
>        /* We must find the real functions.  */
>        const char *charset_name;
> -      const char *complete_name;
>        struct gconv_fcts *new_fcts;
>        int use_translit;
>  
> @@ -177,8 +176,11 @@ __wcsmbs_load_conv (struct __locale_data *new_category)
>  
>        /* Normalize the name and add the slashes necessary for a
>  	 complete lookup.  */
> -      complete_name = norm_add_slashes (charset_name,
> -					use_translit ? "TRANSLIT" : "");
> +      char *complete_name = __gconv_norm_add_slashes
> +	(charset_name, strlen (charset_name),
> +	 use_translit ? "TRANSLIT" : "");
> +      if (complete_name ==NULL)
> +	goto failed;
>  
>        /* It is not necessary to use transliteration in this direction
>  	 since the internal character set is supposed to be able to
> @@ -188,6 +190,7 @@ __wcsmbs_load_conv (struct __locale_data *new_category)
>        if (new_fcts->towc != NULL)
>  	new_fcts->tomb = __wcsmbs_getfct (complete_name, "INTERNAL",
>  					  &new_fcts->tomb_nsteps);
> +      free (complete_name);
>  
>        /* If any of the conversion functions is not available we don't
>  	 use any since this would mean we cannot convert back and
> 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]