This is the mail archive of the gdb-patches@sourceware.org mailing list for the GDB project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH v4] C++ify gdb/common/environ.c


On 06/14/2017 08:22 PM, Sergio Durigan Junior wrote:

> diff --git a/gdb/Makefile.in b/gdb/Makefile.in
> index 5e5fcaa..133db1a 100644
> --- a/gdb/Makefile.in
> +++ b/gdb/Makefile.in
> @@ -530,14 +530,16 @@ SUBDIR_UNITTESTS_SRCS = \
>  	unittests/offset-type-selftests.c \
>  	unittests/optional-selftests.c \
>  	unittests/ptid-selftests.c \
> -	unittests/scoped_restore-selftests.c
> +	unittests/scoped_restore-selftests.c \
> +	unittests/environ-selftests.c

Please keep the list sorted.

(I'm guilty of missing that before too.)

>  
>  SUBDIR_UNITTESTS_OBS = \
>  	function-view-selftests.o \
>  	offset-type-selftests.o \
>  	optional-selftests.o \
>  	ptid-selftests.o \
> -	scoped_restore-selftests.o
> +	scoped_restore-selftests.o \
> +	environ-selftests.o

Ditto.

> diff --git a/gdb/common/environ.c b/gdb/common/environ.c
> index 3145d01..657f2e0 100644
> --- a/gdb/common/environ.c
> +++ b/gdb/common/environ.c
> @@ -18,165 +18,102 @@
>  #include "common-defs.h"
>  #include "environ.h"
>  #include <algorithm>
> -
> +#include <utility>
>  
> -/* Return a new environment object.  */
> +/* See common/environ.h.  */
>  
> -struct gdb_environ *
> -make_environ (void)
> +gdb_environ::gdb_environ ()
>  {
> -  struct gdb_environ *e;
> +}
>  
> -  e = XNEW (struct gdb_environ);
> +/* See common/environ.h.  */
>  
> -  e->allocated = 10;
> -  e->vector = (char **) xmalloc ((e->allocated + 1) * sizeof (char *));
> -  e->vector[0] = 0;
> -  return e;
> +gdb_environ::~gdb_environ ()
> +{
> +  clear ();
>  }
>  
> -/* Free an environment and all the strings in it.  */
> +/* See common/environ.h.  */
>  
> -void
> -free_environ (struct gdb_environ *e)
> +gdb_environ::gdb_environ (gdb_environ &&e)
> +  : m_environ_vector (std::move (e.m_environ_vector))
>  {
> -  char **vector = e->vector;
> +}
>  
> -  while (*vector)
> -    xfree (*vector++);
> +/* See common/environ.h.  */
>  
> -  xfree (e->vector);
> -  xfree (e);
> +gdb_environ &
> +gdb_environ::operator= (gdb_environ &&e)
> +{
> +  clear ();
> +  m_environ_vector = std::move (e.m_environ_vector);
> +  return *this;
>  }
>  
> -/* Copy the environment given to this process into E.
> -   Also copies all the strings in it, so we can be sure
> -   that all strings in these environments are safe to free.  */
> +/* See common/environ.h.  */
>  
>  void
> -init_environ (struct gdb_environ *e)
> +gdb_environ::clear ()
>  {
> -  extern char **environ;
> -  int i;
> -
> -  if (environ == NULL)
> -    return;
> -
> -  for (i = 0; environ[i]; i++) /*EMPTY */ ;
> +  for (char *v : m_environ_vector)
> +    xfree (v);
> +  m_environ_vector.clear ();
> +}
>  
> -  if (e->allocated < i)
> -    {
> -      e->allocated = std::max (i, e->allocated + 10);
> -      e->vector = (char **) xrealloc ((char *) e->vector,
> -				      (e->allocated + 1) * sizeof (char *));
> -    }
> +/* See common/environ.h.  */
>  
> -  memcpy (e->vector, environ, (i + 1) * sizeof (char *));
> +const char *
> +gdb_environ::get (const std::string &var) const
> +{

Does this need to be a std::string instead of "const char *"?
Callers always pass a string literal down, so this is going to
force a deep string dup for no good reason, AFAICS.


> +  size_t len = var.size ();
> +  const char *var_str = var.c_str ();
>  
> -  while (--i >= 0)
> -    {
> -      int len = strlen (e->vector[i]);
> -      char *newobj = (char *) xmalloc (len + 1);
> +  for (char *el : m_environ_vector)
> +    if (el != NULL && strncmp (el, var_str, len) == 0 && el[len] == '=')
> +      return &el[len + 1];
>  
> -      memcpy (newobj, e->vector[i], len + 1);
> -      e->vector[i] = newobj;
> -    }
> +  return NULL;
>  }

> -char *
> -get_in_environ (const struct gdb_environ *e, const char *var)
> +void
> +gdb_environ::set (const std::string &var, const std::string &value)

Ditto.

>  {
> -  int len = strlen (var);
> -  char **vector = e->vector;
> -  char *s;
> -
> -  for (; (s = *vector) != NULL; vector++)
> -    if (strncmp (s, var, len) == 0 && s[len] == '=')
> -      return &s[len + 1];
> +  /* We have to unset the variable in the vector if it exists.  */
> +  unset (var);
>  
> -  return 0;
> +  /* Insert the element before the last one, which is always NULL.  */
> +  m_environ_vector.insert (m_environ_vector.end () - 1,
> +			   concat (var.c_str (), "=",
> +				   value.c_str (), NULL));
>  }
>  
> -/* Store the value in E of VAR as VALUE.  */
> +/* See common/environ.h.  */
>  
>  void
> -set_in_environ (struct gdb_environ *e, const char *var, const char *value)
> +gdb_environ::unset (const std::string &var)

Ditto.


> -
> -  return;
> +  std::string match = var + '=';
> +  const char *match_str = match.c_str ();
> +
> +  /* We iterate until '.cend () - 1' because the last element is
> +     always NULL.  */
> +  for (std::vector<char *>::const_iterator el = m_environ_vector.cbegin ();
> +       el != m_environ_vector.cend () - 1;
> +       ++el)
> +    if (startswith (*el, match_str))

In gdb_environ::set you used:

    strncmp (el, var_str, len) == 0 && el[len] == '='

It'd be better if places used the same matching code.  Maybe even put
this in a separate helper function.  The gdb_environ::set version
looks better to me for avoiding temporary heap-allocated strings.


> -/* Remove the setting for variable VAR from environment E.  */
> +/* See common/environ.h.  */
>  
> -void
> -unset_in_environ (struct gdb_environ *e, const char *var)
> +char **
> +gdb_environ::get_char_vector () const

So far, getters in gdb's classes don't have a "get_" prefix.
(except "get()" or course, but that's really a getter in
the same sense.)  Can we drop it here?  Like:

 char **gdb_environ::char_vector () const

Though I'd rename it like this instead:

 char ** gdb_environ::envp () const

Because that's what the env vector is traditionally called, e.g.,
from "man 2 execve":

     int execve(const char *filename, char *const argv[],
                  char *const envp[]);

     int main(int argc, char *argv[], char *envp[])

Likewise I'd use that name for local variables where
we call gdb_environ::get_char_vector, just to follow
traditional terminology throughout.

> +/* Class that represents the environment variables as seen by the
> +   inferior.  */
> +
> +class gdb_environ
> +{
> +public:
> +  /* Regular constructor and destructor.  */
> +  gdb_environ ();
> +  ~gdb_environ ();
> +
> +  /* Move constructor.  */
> +  gdb_environ (gdb_environ &&e);
> +
> +  /* Move assignment.  */
> +  gdb_environ &operator= (gdb_environ &&e);
> +
> +  /* Create a gdb_environ object using the host's environment
> +     variables.  */
> +  static gdb_environ from_host_environ ()
>    {

Nit: I find it a bit odd that the ctors/dtors are short but 
defined out of line, while this function is defined inline.
If I was looking at controlling what the compiler could inline,
then I'd do it the other way around -- small ctor/dtor in
the header, and this larger function out of line in the .c file.

> -    /* Number of usable slots allocated in VECTOR.
> -       VECTOR always has one slot not counted here,
> -       to hold the terminating zero.  */
> -    int allocated;
> -    /* A vector of slots, ALLOCATED + 1 of them.
> -       The first few slots contain strings "VAR=VALUE"
> -       and the next one contains zero.
> -       Then come some unused slots.  */
> -    char **vector;
> -  };
> +    extern char **environ;
> +    gdb_environ e;
> +
> +    if (environ == NULL)
> +      return e;
> +
> +    for (int i = 0; environ[i] != NULL; ++i)
> +      e.m_environ_vector.push_back (xstrdup (environ[i]));
> +
> +    /* The last element of the vector is always going to be NULL.  */
> +    e.m_environ_vector.push_back (NULL);
>  
> -extern struct gdb_environ *make_environ (void);
> +    return e;
> +  }

>    run_target->to_create_inferior (run_target, exec_file,
>  				  std::string (get_inferior_args ()),
> -				  environ_vector (current_inferior ()->environment),
> +				  current_inferior ()
> +				  ->environment.get_char_vector (),
>  				  from_tty);
> @@ -270,7 +270,6 @@ mi_cmd_inferior_tty_show (const char *command, char **argv, int argc)
>  void 
>  _initialize_mi_cmd_env (void)
>  {
> -  struct gdb_environ *environment;
>    const char *env;
>  
>    /* We want original execution path to reset to, if desired later.
> @@ -278,13 +277,10 @@ _initialize_mi_cmd_env (void)
>       current_inferior ()->environment.  Also, there's no obvious
>       place where this code can be moved such that it surely run
>       before any code possibly mangles original PATH.  */
> -  environment = make_environ ();
> -  init_environ (environment);
> -  env = get_in_environ (environment, path_var_name);
> +  env = getenv (path_var_name);
>  
>    /* Can be null if path is not set.  */
>    if (!env)
>      env = "";
>    orig_path = xstrdup (env);
> -  free_environ (environment);
>  }

Please split this change to a separate patch.  Don't we need to
update the comment just above?


> diff --git a/gdb/unittests/environ-selftests.c b/gdb/unittests/environ-selftests.c
> new file mode 100644
> index 0000000..948eacf
> --- /dev/null
> +++ b/gdb/unittests/environ-selftests.c
> @@ -0,0 +1,79 @@
> +/* Self tests for gdb_environ for GDB, the GNU debugger.
> +
> +   Copyright (C) 2017 Free Software Foundation, Inc.
> +
> +   This file is part of GDB.
> +
> +   This program is free software; you can redistribute it and/or modify
> +   it under the terms of the GNU General Public License as published by
> +   the Free Software Foundation; either version 3 of the License, or
> +   (at your option) any later version.
> +
> +   This program is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +   GNU General Public License for more details.
> +
> +   You should have received a copy of the GNU General Public License
> +   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
> +
> +#include "defs.h"
> +#include "selftest.h"
> +#include "common/environ.h"
> +
> +namespace selftests {
> +namespace environ {

"environ" as an identifier will be problematic.  Please
pick some other name here.

On some hosts "environ" is a define.  E.g., on my Fedora's
mingw-w64 install I see:

 /usr/x86_64-w64-mingw32/sys-root/mingw/include/stdlib.h:624:#define environ _environ

and gnulib has too:

 $ srcgrep -rn environ gnulib/import/ | grep define
 gnulib/import/extra/snippet/warn-on-use.h:61:   # define environ (*rpl_environ ())
 gnulib/import/unistd.in.h:411:#   define environ (*_NSGetEnviron ())
 gnulib/import/unistd.in.h:432:#  define environ (*rpl_environ ())

I don't think "namespace (*_NSGetEnviron ())" would compile.  :-)


> +
> +static void
> +run_tests ()
> +{
> +  if (setenv ("GDB_SELFTEST_ENVIRON", "1", 1) != 0)
> +    error ("Could not set environment variable for testing.");
> +
> +  gdb_environ env;
> +

Please add a test that that checks that get_char_vector() on an
empty gdb_environ works as expected.  I.e., something like:

   gdb_environ env;
   SELF_CHECK (env.get_char_vector()[0] == NULL);

AFAICS from:

 gdb_environ::gdb_environ ()
 {
 }

 char **
 gdb_environ::get_char_vector () const
 {
   return const_cast<char **> (&m_environ_vector[0]);
 }

we end up with a bogus envp, because it points at
m_environ_vector.end(), which is not a valid pointer
to dereference.  Even ignoring that, it does not point
to NULL.  So if we pass such a pointer to execve or some syscall
that accepts an envp, then it'll try iterating the vector until
it finds a NULL entry, and of course do the wrong thing,
maybe crash if you're lucky.

Note we can get this situation from here too:

  static gdb_environ from_host_environ ()
  {
    extern char **environ;
    gdb_environ e;

    if (environ == NULL)
      return e;


The old code did not suffer from this because it always
allocated gdb_environ::vector:

 struct gdb_environ *
 make_environ (void)
 {
   struct gdb_environ *e;

   e = XNEW (struct gdb_environ);

   e->allocated = 10;
   e->vector = (char **) xmalloc ((e->allocated + 1) * sizeof (char *));
   e->vector[0] = 0;
   return e;
 }

So we either always add a NULL to the vector, or we
change gdb_environ::get_char_vector instead, like:

 char **
 gdb_environ::get_char_vector () const
 {
   if (m_environ_vector.empty ())
     {
       static const char *const empty_envp[1] = { NULL };
       return const_cast<char **> (empty_envp);
     }
   return const_cast<char **> (&m_environ_vector[0]);
 }

This is OK because execve etc. are garanteed to never change
the envp they're passed.

> +  SELF_CHECK (env.get ("PWD") == NULL);
> +
> +  env = gdb_environ::from_host_environ ();
> +
> +  SELF_CHECK (strcmp (env.get ("GDB_SELFTEST_ENVIRON"), "1") == 0);
> +
> +  env.set ("GDB_SELFTEST_ENVIRON", "test");
> +  SELF_CHECK (strcmp (env.get ("GDB_SELFTEST_ENVIRON"), "test") == 0);
> +
> +  env.unset ("GDB_SELFTEST_ENVIRON");
> +  SELF_CHECK (env.get ("GDB_SELFTEST_ENVIRON") == NULL);
> +
> +  env.set ("GDB_SELFTEST_ENVIRON", "1");
> +  SELF_CHECK (strcmp (env.get ("GDB_SELFTEST_ENVIRON"), "1") == 0);
> +
> +  env.clear ();
> +  SELF_CHECK (env.get ("GDB_SELFTEST_ENVIRON") == NULL);

Like above, after env.clear() check that this works:

   SELF_CHECK (env.get_char_vector()[0] == NULL);

> +
> +  if (setenv ("GDB_SELFTEST_ENVIRON", "1", 1) != 0)
> +    error ("Could not set environment variable for testing.");

This setenv looks like a stale copy from the one at the top?
I'm not seeing why it's necessary here.

> +
> +  env = gdb_environ::from_host_environ ();
> +  char **penv = env.get_char_vector ();
> +  bool found_var = false, found_twice = false;
> +
> +  for (size_t i = 0; penv[i] != NULL; ++i)
> +    if (strcmp (penv[i], "GDB_SELFTEST_ENVIRON=1") == 0)
> +      {
> +	if (found_var)
> +	  found_twice = true;
> +	found_var = true;
> +      }
> +  SELF_CHECK (found_var == true);
> +  SELF_CHECK (found_twice == false);

Why not simply a count:

  int found_count = 0;
  for (size_t i = 0; penv[i] != NULL; ++i)
    if (strcmp (penv[i], "GDB_SELFTEST_ENVIRON=1") == 0)
      found_count++;
  SELF_CHECK (found_count == 1);


I think that no test actually explicitly sets more than one
var in the vector.  I think you should exercise that.
Also check that removing some other var doesn't remove the
first.  Etc.  E.g., set var 1, set var 2, unset var 1,
check that  var 2 is still there.

Thanks,
Pedro Alves


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]