avoid the need to bump the MO file major revision number

Bruno Haible bruno@clisp.org
Mon Jan 12 12:08:00 GMT 2004


Hi,

When I submitted the patch to support <inttypes.h> macros in MO files
(intl/loadmsgcat.c), I didn't think of the possibility that this might
be useful in other cases as well. But it is: The use of "%Id" in Farsi
translation catalogs is such an example, and similar ones may follow.

With the current loadmsgcat.c code, each time an extension is made to
the set of supported platform dependent substrings, the major version
of the MO files must be bumped by 1. Because if this is not done, the
get_sysdep_segment_value() function returns NULL and the caller crashes.

When a MO file with a new major version is deployed, a libc which doesn't
support this major version rejects the entire MO file. The desired behaviour,
however, is that only those messages that depend on the new extension
are rejected, and the remaining messages of the MO file are still accepted.
Without this desired behaviour, translators cannot make use of the
new features until the new glibc supporting the feature is widely enough
deployed.

I cannot avoid to bump the major version once, for the "%Id" Farsi extension,
since the current glibc is already widely in use. But for future extensions,
I'd like to avoid this extra delay in usability of new features.

Therefore here is a patch that makes loadmsgcat.c more tolerant: It
implements that only those messages that depend on a new extension
are rejected, and the remaining messages of the MO file are still accepted.

Tested inside and outside glibc. Will also be released as part of gettext-0.14.

Bruno


To reproduce:

======================== fa.po ========================
#, c-format
msgid "father of %d children"
msgstr "Vater von %Id Kindern"
=======================================================

$ mkdir -p fa/LC_MESSAGES
$ msgfmt-20040110 fa.po -o fa/LC_MESSAGES/foo.mo
; yields the following MO file:
$ uudecode <<\EOF
begin 644 fa/LC_MESSAGES/foo.mo
MWA($E0$`````````,````#`````#````,`````$````\`````0```$0```!(
M`````````````````````@```&P```!,````6````&X````6````_____X0`
M```+``````````H```#_____20!F871H97(@;V8@)60@8VAI;&1R96X`5F%T
297(@=F]N("5D($MI;F1E<FX`
`
end
EOF

===================== foo.c =================
#include <stdio.h>
#include <libintl.h>
#include <locale.h>
int main ()
{
  setlocale (LC_ALL, "fa_IR");
  textdomain ("foo");
  bindtextdomain ("foo", ".");
  puts (gettext ("father of %d children"));
  return 0;
}
==============================================

$ gcc -O -Wall foo.c

Result without the patch:

$ LANGUAGE= LC_ALL=fa_IR ./a.out
Speicherzugriffsfehler

Result with the patch:

$ LANGUAGE= LC_ALL=fa_IR ./a.out
father of %d children


2004-01-08  Bruno Haible  <bruno@clisp.org>

	* intl/loadmsgcat.c (_nl_load_domain): When a string pair uses a system
	dependent segment not known to this version of the library, ignore
	the string pair instead of crashing.

--- glibc-20031205/intl/loadmsgcat.c.bak	2003-09-22 14:17:20.000000000 +0200
+++ glibc-20031205/intl/loadmsgcat.c	2004-01-11 21:00:55.000000000 +0100
@@ -1,5 +1,5 @@
 /* Load needed message catalogs.
-   Copyright (C) 1995-2002, 2003 Free Software Foundation, Inc.
+   Copyright (C) 1995-2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -1052,12 +1052,13 @@
 		const char **sysdep_segment_values;
 		const nls_uint32 *orig_sysdep_tab;
 		const nls_uint32 *trans_sysdep_tab;
+		nls_uint32 n_inmem_sysdep_strings;
 		size_t memneed;
 		char *mem;
 		struct sysdep_string_desc *inmem_orig_sysdep_tab;
 		struct sysdep_string_desc *inmem_trans_sysdep_tab;
 		nls_uint32 *inmem_hash_tab;
-		unsigned int i;
+		unsigned int i, j;
 
 		/* Get the values of the system dependent segments.  */
 		n_sysdep_segments =
@@ -1092,153 +1093,247 @@
 		   + W (domain->must_swap, data->trans_sysdep_tab_offset));
 
 		/* Compute the amount of additional memory needed for the
-		   system dependent strings and the augmented hash table.  */
-		memneed = 2 * n_sysdep_strings
-			  * sizeof (struct sysdep_string_desc)
-			  + domain->hash_size * sizeof (nls_uint32);
-		for (i = 0; i < 2 * n_sysdep_strings; i++)
+		   system dependent strings and the augmented hash table.
+		   At the same time, also drop string pairs which refer to
+		   an undefined system dependent segment.  */
+		n_inmem_sysdep_strings = 0;
+		memneed = domain->hash_size * sizeof (nls_uint32);
+		for (i = 0; i < n_sysdep_strings; i++)
 		  {
-		    const struct sysdep_string *sysdep_string =
-		      (const struct sysdep_string *)
-		      ((char *) data
-		       + W (domain->must_swap,
-			    i < n_sysdep_strings
-			    ? orig_sysdep_tab[i]
-			    : trans_sysdep_tab[i - n_sysdep_strings]));
-		    size_t need = 0;
-		    const struct segment_pair *p = sysdep_string->segments;
-
-		    if (W (domain->must_swap, p->sysdepref) != SEGMENTS_END)
-		      for (p = sysdep_string->segments;; p++)
-			{
-			  nls_uint32 sysdepref;
-
-			  need += W (domain->must_swap, p->segsize);
-
-			  sysdepref = W (domain->must_swap, p->sysdepref);
-			  if (sysdepref == SEGMENTS_END)
-			    break;
+		    int valid = 1;
+		    size_t needs[2];
+
+		    for (j = 0; j < 2; j++)
+		      {
+			const struct sysdep_string *sysdep_string =
+			  (const struct sysdep_string *)
+			  ((char *) data
+			   + W (domain->must_swap,
+				j == 0
+				? orig_sysdep_tab[i]
+				: trans_sysdep_tab[i]));
+			size_t need = 0;
+			const struct segment_pair *p = sysdep_string->segments;
 
-			  if (sysdepref >= n_sysdep_segments)
+			if (W (domain->must_swap, p->sysdepref) != SEGMENTS_END)
+			  for (p = sysdep_string->segments;; p++)
 			    {
-			      /* Invalid.  */
-			      freea (sysdep_segment_values);
-			      goto invalid;
-			    }
+			      nls_uint32 sysdepref;
 
-			  need += strlen (sysdep_segment_values[sysdepref]);
-			}
+			      need += W (domain->must_swap, p->segsize);
 
-		    memneed += need;
-		  }
+			      sysdepref = W (domain->must_swap, p->sysdepref);
+			      if (sysdepref == SEGMENTS_END)
+				break;
+
+			      if (sysdepref >= n_sysdep_segments)
+				{
+				  /* Invalid.  */
+				  freea (sysdep_segment_values);
+				  goto invalid;
+				}
+
+			      if (sysdep_segment_values[sysdepref] == NULL)
+				{
+				  /* This particular string pair is invalid.  */
+				  valid = 0;
+				  break;
+				}
 
-		/* Allocate additional memory.  */
-		mem = (char *) malloc (memneed);
-		if (mem == NULL)
-		  goto invalid;
-
-		domain->malloced = mem;
-		inmem_orig_sysdep_tab = (struct sysdep_string_desc *) mem;
-		mem += n_sysdep_strings * sizeof (struct sysdep_string_desc);
-		inmem_trans_sysdep_tab = (struct sysdep_string_desc *) mem;
-		mem += n_sysdep_strings * sizeof (struct sysdep_string_desc);
-		inmem_hash_tab = (nls_uint32 *) mem;
-		mem += domain->hash_size * sizeof (nls_uint32);
-
-		/* Compute the system dependent strings.  */
-		for (i = 0; i < 2 * n_sysdep_strings; i++)
-		  {
-		    const struct sysdep_string *sysdep_string =
-		      (const struct sysdep_string *)
-		      ((char *) data
-		       + W (domain->must_swap,
-			    i < n_sysdep_strings
-			    ? orig_sysdep_tab[i]
-			    : trans_sysdep_tab[i - n_sysdep_strings]));
-		    const char *static_segments =
-		      (char *) data
-		      + W (domain->must_swap, sysdep_string->offset);
-		    const struct segment_pair *p = sysdep_string->segments;
+			      need += strlen (sysdep_segment_values[sysdepref]);
+			    }
 
-		    /* Concatenate the segments, and fill
-		       inmem_orig_sysdep_tab[i] (for i < n_sysdep_strings) and
-		       inmem_trans_sysdep_tab[i-n_sysdep_strings] (for
-		       i >= n_sysdep_strings).  */
+			needs[j] = need;
+			if (!valid)
+			  break;
+		      }
 
-		    if (W (domain->must_swap, p->sysdepref) == SEGMENTS_END)
+		    if (valid)
 		      {
-			/* Only one static segment.  */
-			inmem_orig_sysdep_tab[i].length =
-			  W (domain->must_swap, p->segsize);
-			inmem_orig_sysdep_tab[i].pointer = static_segments;
+			n_inmem_sysdep_strings++;
+			memneed += needs[0] + needs[1];
 		      }
-		    else
+		  }
+		memneed += 2 * n_inmem_sysdep_strings
+			   * sizeof (struct sysdep_string_desc);
+
+		if (n_inmem_sysdep_strings > 0)
+		  {
+		    unsigned int k;
+
+		    /* Allocate additional memory.  */
+		    mem = (char *) malloc (memneed);
+		    if (mem == NULL)
+		      goto invalid;
+
+		    domain->malloced = mem;
+		    inmem_orig_sysdep_tab = (struct sysdep_string_desc *) mem;
+		    mem += n_inmem_sysdep_strings
+			   * sizeof (struct sysdep_string_desc);
+		    inmem_trans_sysdep_tab = (struct sysdep_string_desc *) mem;
+		    mem += n_inmem_sysdep_strings
+			   * sizeof (struct sysdep_string_desc);
+		    inmem_hash_tab = (nls_uint32 *) mem;
+		    mem += domain->hash_size * sizeof (nls_uint32);
+
+		    /* Compute the system dependent strings.  */
+		    k = 0;
+		    for (i = 0; i < n_sysdep_strings; i++)
 		      {
-			inmem_orig_sysdep_tab[i].pointer = mem;
+			int valid = 1;
 
-			for (p = sysdep_string->segments;; p++)
+			for (j = 0; j < 2; j++)
 			  {
-			    nls_uint32 segsize =
-			      W (domain->must_swap, p->segsize);
-			    nls_uint32 sysdepref =
-			      W (domain->must_swap, p->sysdepref);
-			    size_t n;
+			    const struct sysdep_string *sysdep_string =
+			      (const struct sysdep_string *)
+			      ((char *) data
+			       + W (domain->must_swap,
+				    j == 0
+				    ? orig_sysdep_tab[i]
+				    : trans_sysdep_tab[i]));
+			    const struct segment_pair *p =
+			      sysdep_string->segments;
+
+			    if (W (domain->must_swap, p->sysdepref)
+				!= SEGMENTS_END)
+			      for (p = sysdep_string->segments;; p++)
+				{
+				  nls_uint32 sysdepref;
+
+				  sysdepref =
+				    W (domain->must_swap, p->sysdepref);
+				  if (sysdepref == SEGMENTS_END)
+				    break;
+
+				  if (sysdep_segment_values[sysdepref] == NULL)
+				    {
+				      /* This particular string pair is
+					 invalid.  */
+				      valid = 0;
+				      break;
+				    }
+				}
 
-			    if (segsize > 0)
+			    if (!valid)
+			      break;
+			  }
+
+			if (valid)
+			  {
+			    for (j = 0; j < 2; j++)
 			      {
-				memcpy (mem, static_segments, segsize);
-				mem += segsize;
-				static_segments += segsize;
+				const struct sysdep_string *sysdep_string =
+				  (const struct sysdep_string *)
+				  ((char *) data
+				   + W (domain->must_swap,
+					j == 0
+					? orig_sysdep_tab[i]
+					: trans_sysdep_tab[i]));
+				const char *static_segments =
+				  (char *) data
+				  + W (domain->must_swap, sysdep_string->offset);
+				const struct segment_pair *p =
+				  sysdep_string->segments;
+
+				/* Concatenate the segments, and fill
+				   inmem_orig_sysdep_tab[k] (for j == 0) and
+				   inmem_trans_sysdep_tab[k] (for j == 1).  */
+
+				struct sysdep_string_desc *inmem_tab_entry =
+				  (j == 0
+				   ? inmem_orig_sysdep_tab
+				   : inmem_trans_sysdep_tab)
+				  + k;
+
+				if (W (domain->must_swap, p->sysdepref)
+				    == SEGMENTS_END)
+				  {
+				    /* Only one static segment.  */
+				    inmem_tab_entry->length =
+				      W (domain->must_swap, p->segsize);
+				    inmem_tab_entry->pointer = static_segments;
+				  }
+				else
+				  {
+				    inmem_tab_entry->pointer = mem;
+
+				    for (p = sysdep_string->segments;; p++)
+				      {
+					nls_uint32 segsize =
+					  W (domain->must_swap, p->segsize);
+					nls_uint32 sysdepref =
+					  W (domain->must_swap, p->sysdepref);
+					size_t n;
+
+					if (segsize > 0)
+					  {
+					    memcpy (mem, static_segments, segsize);
+					    mem += segsize;
+					    static_segments += segsize;
+					  }
+
+					if (sysdepref == SEGMENTS_END)
+					  break;
+
+					n = strlen (sysdep_segment_values[sysdepref]);
+					memcpy (mem, sysdep_segment_values[sysdepref], n);
+					mem += n;
+				      }
+
+				    inmem_tab_entry->length =
+				      mem - inmem_tab_entry->pointer;
+				  }
 			      }
 
-			    if (sysdepref == SEGMENTS_END)
-			      break;
-
-			    n = strlen (sysdep_segment_values[sysdepref]);
-			    memcpy (mem, sysdep_segment_values[sysdepref], n);
-			    mem += n;
+			    k++;
 			  }
-
-			inmem_orig_sysdep_tab[i].length =
-			  mem - inmem_orig_sysdep_tab[i].pointer;
 		      }
-		  }
+		    if (k != n_inmem_sysdep_strings)
+		      abort ();
 
-		/* Compute the augmented hash table.  */
-		for (i = 0; i < domain->hash_size; i++)
-		  inmem_hash_tab[i] =
-		    W (domain->must_swap_hash_tab, domain->hash_tab[i]);
-		for (i = 0; i < n_sysdep_strings; i++)
-		  {
-		    const char *msgid = inmem_orig_sysdep_tab[i].pointer;
-		    nls_uint32 hash_val = __hash_string (msgid);
-		    nls_uint32 idx = hash_val % domain->hash_size;
-		    nls_uint32 incr = 1 + (hash_val % (domain->hash_size - 2));
-
-		    for (;;)
+		    /* Compute the augmented hash table.  */
+		    for (i = 0; i < domain->hash_size; i++)
+		      inmem_hash_tab[i] =
+			W (domain->must_swap_hash_tab, domain->hash_tab[i]);
+		    for (i = 0; i < n_inmem_sysdep_strings; i++)
 		      {
-			if (inmem_hash_tab[idx] == 0)
+			const char *msgid = inmem_orig_sysdep_tab[i].pointer;
+			nls_uint32 hash_val = __hash_string (msgid);
+			nls_uint32 idx = hash_val % domain->hash_size;
+			nls_uint32 incr =
+			  1 + (hash_val % (domain->hash_size - 2));
+
+			for (;;)
 			  {
-			    /* Hash table entry is empty.  Use it.  */
-			    inmem_hash_tab[idx] = 1 + domain->nstrings + i;
-			    break;
-			  }
+			    if (inmem_hash_tab[idx] == 0)
+			      {
+				/* Hash table entry is empty.  Use it.  */
+				inmem_hash_tab[idx] = 1 + domain->nstrings + i;
+				break;
+			      }
 
-			if (idx >= domain->hash_size - incr)
-			  idx -= domain->hash_size - incr;
-			else
-			  idx += incr;
+			    if (idx >= domain->hash_size - incr)
+			      idx -= domain->hash_size - incr;
+			    else
+			      idx += incr;
+			  }
 		      }
-		  }
 
-		freea (sysdep_segment_values);
+		    domain->n_sysdep_strings = n_inmem_sysdep_strings;
+		    domain->orig_sysdep_tab = inmem_orig_sysdep_tab;
+		    domain->trans_sysdep_tab = inmem_trans_sysdep_tab;
 
-		domain->n_sysdep_strings = n_sysdep_strings;
-		domain->orig_sysdep_tab = inmem_orig_sysdep_tab;
-		domain->trans_sysdep_tab = inmem_trans_sysdep_tab;
+		    domain->hash_tab = inmem_hash_tab;
+		    domain->must_swap_hash_tab = 0;
+		  }
+		else
+		  {
+		    domain->n_sysdep_strings = 0;
+		    domain->orig_sysdep_tab = NULL;
+		    domain->trans_sysdep_tab = NULL;
+		  }
 
-		domain->hash_tab = inmem_hash_tab;
-		domain->must_swap_hash_tab = 0;
+		freea (sysdep_segment_values);
 	      }
 	    else
 	      {



More information about the Libc-alpha mailing list