This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[2.23 PATCH] Desupport regexp.h (bug 18681)


-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA256

I posted this a couple weeks ago, but now that the branch has been
cut, `SHLIB_COMPAT(GLIBC_2_0, GLIBC_2_23)` compiles, so I've actually
tested it. ;-)  Also, I merged in the corrections to NEWS and the
comment at the top of regexp.h, which I posted separately for the
2.22 branch.  And hopefully it will not get mangled this time.

I can see two ways to proceed with this patch.  First is just to go
ahead and land it now, and hope that if it breaks someone, they will
tell us about it during the 2.23 development cycle.  Second is to
attempt to search for code it will break, e.g. with a Debian archive
rebuild or a broad-spectrum code-search service.  I did try the latter
but was not able to find one that would distinguish *this* <regexp.h>
from one provided by the application, and there are lots of those.

zw

	* misc/regexp.h: This interface is no longer supported.
	Remove all contents, leaving only an #error directive.
	* misc/regexp.c (loc1, loc2, locs, step, advance):
	Demote to compatibility symbols.
- ---
 NEWS          |   8 ++-
 misc/regexp.c |  29 ++++++--
 misc/regexp.h | 212 +++-------------------------------------------------------
 3 files changed, 36 insertions(+), 213 deletions(-)

diff --git a/NEWS b/NEWS
index 6e0726c..b3a0f2e 100644
- --- a/NEWS
+++ b/NEWS
@@ -8,7 +8,11 @@ using `glibc' in the "product" field.
 Version 2.23
 
 * The following bugs are resolved with this release:
- -  18265, 18525.
+
+  18265, 18525, 18681.
+
+* The obsolete header <regexp.h> has been removed.  Programs that require
+  this header must be updated to use <regex.h> instead.
 
 Version 2.22
 
@@ -89,7 +93,7 @@ Version 2.22
   release.  Use of this header will trigger a deprecation warning.
   Application developers should update their code to use <regex.h> instead.
 
- -  This header was formerly part of SUSv2, but was deprecated in 1997 and
+  This header was formerly part of SUS, but was deprecated in 1994 and
   removed from the standard in 2001.  Also, the glibc implementation
   leaks memory.  See BZ#18681 for more details.
 
diff --git a/misc/regexp.c b/misc/regexp.c
index ee7d572..ef5e18b 100644
- --- a/misc/regexp.c
+++ b/misc/regexp.c
@@ -1,4 +1,4 @@
- -/* Define function and variables for the obsolete <regexp.h> interface.
+/* Compatibility symbols for the obsolete <regexp.h> interface.
    Copyright (C) 1996-2015 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
@@ -17,17 +17,27 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
- -/* We don't include regexp.h here because of the macros it requires, and
- -   because it now contains an unconditional #warning.  */
+/* regexp.h now contains only an #error directive, so it cannot be
+   used in this file.
+
+   The function that would produce an 'expbuf' to use as the second
+   argument to 'step' and 'advance' was defined only in regexp.h,
+   as its definition depended on macros defined by the user.  */
 
 #include <regex.h>
+#include <shlib-compat.h>
+
+#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_23)
 
 /* Define the variables used for the interface.  */
 char *loc1;
 char *loc2;
+compat_symbol (libc, loc1, loc1, GLIBC_2_0);
+compat_symbol (libc, loc2, loc2, GLIBC_2_0);
 
 /* Although we do not support the use we define this variable as well.  */
 char *locs;
+compat_symbol (libc, locs, locs, GLIBC_2_0);
 
 
 /* Find the next match in STRING.  The compiled regular expression is
@@ -35,7 +45,8 @@ char *locs;
    first character matched and `loc2' points to the next unmatched
    character.  */
 int
- -__step (const char *string, const char *expbuf)
+weak_function attribute_compat_text_section
+step (const char *string, const char *expbuf)
 {
   regmatch_t match;	/* We only need info about the full match.  */
 
@@ -50,14 +61,15 @@ __step (const char *string, const char *expbuf)
   loc2 = (char *) string + match.rm_eo;
   return 1;
 }
- -weak_alias (__step, step)
+compat_symbol (libc, step, step, GLIBC_2_0);
 
 
 /* Match the beginning of STRING with the compiled regular expression
    in EXPBUF.  If the match is successful `loc2' will contain the
    position of the first unmatched character.  */
 int
- -__advance (const char *string, const char *expbuf)
+weak_function attribute_compat_text_section
+advance (const char *string, const char *expbuf)
 {
   regmatch_t match;	/* We only need info about the full match.  */
 
@@ -74,4 +86,7 @@ __advance (const char *string, const char *expbuf)
   loc2 = (char *) string + match.rm_eo;
   return 1;
 }
- -weak_alias (__advance, advance)
+compat_symbol (libc, advance, advance, GLIBC_2_0);
+
+
+#endif /* SHLIB_COMPAT (2.0, 2.23) */
diff --git a/misc/regexp.h b/misc/regexp.h
index 3460989..9f5c413 100644
- --- a/misc/regexp.h
+++ b/misc/regexp.h
@@ -19,211 +19,15 @@
 #ifndef _REGEXP_H
 #define _REGEXP_H	1
 
- -/* The contents of this header file were standardized in the
- -   Single Unix Specification, Version 2 (1997) but marked as
- -   LEGACY; new applications were already being encouraged to
- -   use <regex.h> instead.  POSIX.1-2001 removed this header.
+/* The contents of this header file were originally standardized in
+   the Single Unix Specification, Issue 3 (1992).  In Issue 4 (1994)
+   the header was marked as TO BE WITHDRAWN, and new applications
+   were encouraged to use <regex.h> instead.  It was officially
+   withdrawn from the standard in Issue 6 (aka POSIX.1-2001).
 
- -   This header is provided only for backward compatibility.
- -   It will be removed in the next release of the GNU C Library.
- -   New code should use <regex.h> instead.  */
+   The GNU C Library provided this header through version 2.22. */
 
- -#warning "<regexp.h> will be removed in the next release of the GNU C Library."
- -#warning "Please update your code to use <regex.h> instead (no trailing 'p')."
- -
- -#include <features.h>
- -#include <alloca.h>
- -#include <regex.h>
- -#include <stdlib.h>
- -#include <string.h>
- -
- -/* The implementation provided here emulates the needed functionality
- -   by mapping to the POSIX regular expression matcher.  The interface
- -   for the here included function is weird (this really is a harmless
- -   word).
- -
- -   The user has to provide six macros before this header file can be
- -   included:
- -
- -   INIT		Declarations vor variables which can be used by the
- -		other macros.
- -
- -   GETC()	Return the value of the next character in the regular
- -		expression pattern.  Successive calls should return
- -		successive characters.
- -
- -   PEEKC()	Return the value of the next character in the regular
- -		expression pattern.  Immediately successive calls to
- -		PEEKC() should return the same character which should
- -		also be the next character returned by GETC().
- -
- -   UNGETC(c)	Cause `c' to be returned by the next call to GETC() and
- -		PEEKC().
- -
- -   RETURN(ptr)	Used for normal exit of the `compile' function.  `ptr'
- -		is a pointer to the character after the last character of
- -		the compiled regular expression.
- -
- -   ERROR(val)	Used for abnormal return from `compile'.  `val' is the
- -		error number.  The error codes are:
- -		11	Range endpoint too large.
- -		16	Bad number.
- -		25	\digit out of range.
- -		36	Illegal or missing delimiter.
- -		41	No remembered search string.
- -		42	\( \) imbalance.
- -		43	Too many \(.
- -		44	More tan two numbers given in \{ \}.
- -		45	} expected after \.
- -		46	First number exceeds second in \{ \}.
- -		49	[ ] imbalance.
- -		50	Regular expression overflow.
- -
- -  */
- -
- -__BEGIN_DECLS
- -
- -/* Interface variables.  They contain the results of the successful
- -   calls to `setp' and `advance'.  */
- -extern char *loc1;
- -extern char *loc2;
- -
- -/* The use of this variable in the `advance' function is not
- -   supported.  */
- -extern char *locs;
- -
- -
- -#ifndef __DO_NOT_DEFINE_COMPILE
- -/* Get and compile the user supplied pattern up to end of line or
- -   string or until EOF is seen, whatever happens first.  The result is
- -   placed in the buffer starting at EXPBUF and delimited by ENDBUF.
- -
- -   This function cannot be defined in the libc itself since it depends
- -   on the macros.  */
- -char *
- -compile (char *__restrict instring, char *__restrict expbuf,
- -	 const char *__restrict endbuf, int eof)
- -{
- -  char *__input_buffer = NULL;
- -  size_t __input_size = 0;
- -  size_t __current_size = 0;
- -  int __ch;
- -  int __error;
- -  INIT
- -
- -  /* Align the expression buffer according to the needs for an object
- -     of type `regex_t'.  Then check for minimum size of the buffer for
- -     the compiled regular expression.  */
- -  regex_t *__expr_ptr;
- -# if defined __GNUC__ && __GNUC__ >= 2
- -  const size_t __req = __alignof__ (regex_t *);
- -# else
- -  /* How shall we find out?  We simply guess it and can change it is
- -     this really proofs to be wrong.  */
- -  const size_t __req = 8;
- -# endif
- -  expbuf += __req;
- -  expbuf -= (expbuf - ((char *) 0)) % __req;
- -  if (endbuf < expbuf + sizeof (regex_t))
- -    {
- -      ERROR (50);
- -    }
- -  __expr_ptr = (regex_t *) expbuf;
- -  /* The remaining space in the buffer can be used for the compiled
- -     pattern.  */
- -  __expr_ptr->__REPB_PREFIX (buffer) = expbuf + sizeof (regex_t);
- -  __expr_ptr->__REPB_PREFIX (allocated)
- -    = endbuf - (char *) __expr_ptr->__REPB_PREFIX (buffer);
- -
- -  while ((__ch = (GETC ())) != eof)
- -    {
- -      if (__ch == '\0' || __ch == '\n')
- -	{
- -	  UNGETC (__ch);
- -	  break;
- -	}
- -
- -      if (__current_size + 1 >= __input_size)
- -	{
- -	  size_t __new_size = __input_size ? 2 * __input_size : 128;
- -	  char *__new_room = (char *) alloca (__new_size);
- -	  /* See whether we can use the old buffer.  */
- -	  if (__new_room + __new_size == __input_buffer)
- -	    {
- -	      __input_size += __new_size;
- -	      __input_buffer = (char *) memcpy (__new_room, __input_buffer,
- -					       __current_size);
- -	    }
- -	  else if (__input_buffer + __input_size == __new_room)
- -	    __input_size += __new_size;
- -	  else
- -	    {
- -	      __input_size = __new_size;
- -	      __input_buffer = (char *) memcpy (__new_room, __input_buffer,
- -						__current_size);
- -	    }
- -	}
- -      __input_buffer[__current_size++] = __ch;
- -    }
- -  if (__current_size)
- -    __input_buffer[__current_size++] = '\0';
- -  else
- -    __input_buffer = "";
- -
- -  /* Now compile the pattern.  */
- -  __error = regcomp (__expr_ptr, __input_buffer, REG_NEWLINE);
- -  if (__error != 0)
- -    /* Oh well, we have to translate POSIX error codes.  */
- -    switch (__error)
- -      {
- -      case REG_BADPAT:
- -      case REG_ECOLLATE:
- -      case REG_ECTYPE:
- -      case REG_EESCAPE:
- -      case REG_BADRPT:
- -      case REG_EEND:
- -      case REG_ERPAREN:
- -      default:
- -	/* There is no matching error code.  */
- -	ERROR (36);
- -      case REG_ESUBREG:
- -	ERROR (25);
- -      case REG_EBRACK:
- -	ERROR (49);
- -      case REG_EPAREN:
- -	ERROR (42);
- -      case REG_EBRACE:
- -	ERROR (44);
- -      case REG_BADBR:
- -	ERROR (46);
- -      case REG_ERANGE:
- -	ERROR (11);
- -      case REG_ESPACE:
- -      case REG_ESIZE:
- -	ERROR (50);
- -      }
- -
- -  /* Everything is ok.  */
- -  RETURN ((char *) (__expr_ptr->__REPB_PREFIX (buffer)
- -		    + __expr_ptr->__REPB_PREFIX (used)));
- -}
- -#endif
- -
- -
- -/* Find the next match in STRING.  The compiled regular expression is
- -   found in the buffer starting at EXPBUF.  `loc1' will return the
- -   first character matched and `loc2' points to the next unmatched
- -   character.  */
- -extern int step (const char *__restrict __string,
- -		 const char *__restrict __expbuf) __THROW;
- -
- -/* Match the beginning of STRING with the compiled regular expression
- -   in EXPBUF.  If the match is successful `loc2' will contain the
- -   position of the first unmatched character.  */
- -extern int advance (const char *__restrict __string,
- -		    const char *__restrict __expbuf) __THROW;
- -
- -
- -__END_DECLS
+#error "The GNU C Library no longer implements <regexp.h>."
+#error "Please update your code to use <regex.h> instead (no trailing 'p')."
 
 #endif /* regexp.h */
- -- 
2.5.0

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2

iQIcBAEBCAAGBQJVwik2AAoJEJH8wytnaapkPyUP/1mEVmSyTkoia9fvVl1GI+6w
A2pK+58hN8jsCHhp4YWSWcMCsTdKWGqQXbExFmJHeDrlPs+E6h60LtKrdaIbR343
ucdgXRB6F6bvq0vuNIVuvFqTCgj+oMi2Nh4eu8mmc+Om+UmLqsKiopulNQ3AGEXN
jqDV+3dYnbC4ydtP8C5xdiEfpovv1uJEETWmWKOC8KkRwDHxP5tNa5Qr5zW8b+Ky
TaVVbRYANAzqKQXoolgJ54ohem1sxpsJ6BtL+/YHnBAVcctPOpZ3Ci3jaZ72/ZDf
J4rVuLZHolqULiqQoSeItRlTSSzJZKxd/XDJlhTSvnoLAes01DdvhEZtELLHYoMR
rOhYg03cvGU8g8O0WBEVzXr0xXLbV12mUcZo9vF+ftDvF5DEhqWssps1N5ScYanL
FGibVDqWJCHg8krsJvVq0+vTofydYjl8ctZpBZKNjTG52WJC9tN2Q0jF73Jqv98T
sR7SRc0sxIaO5nUnGMSYOW3e7S+Hxb5plicmCJEWW6ATaVqbM6062Uldr4keQzlP
EDyCfBDpBXWxSHj4LKYau08ODMqvYtNPEqtx8tHPXN62yT+qJ6DV352HerdmSP6y
Q4p73oifmG4nJFPMJ4BKo1GMJXqmusNn8LVe5wGiU+iPR6+LjciA86Oi4ORa7msl
o/TRGNV931M7ohc3uZID
=7aEz
-----END PGP SIGNATURE-----


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]