This is the mail archive of the
gdb-patches@sourceware.org
mailing list for the GDB project.
Re: [PATCH v4] C++ify gdb/common/environ.c
- From: Pedro Alves <palves at redhat dot com>
- To: Sergio Durigan Junior <sergiodj at redhat dot com>, GDB Patches <gdb-patches at sourceware dot org>
- Cc: Simon Marchi <simon dot marchi at polymtl dot ca>
- Date: Fri, 16 Jun 2017 16:45:44 +0100
- Subject: Re: [PATCH v4] C++ify gdb/common/environ.c
- Authentication-results: sourceware.org; auth=none
- Authentication-results: ext-mx05.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com
- Authentication-results: ext-mx05.extmail.prod.ext.phx2.redhat.com; spf=pass smtp.mailfrom=palves at redhat dot com
- Dkim-filter: OpenDKIM Filter v2.11.0 mx1.redhat.com 95E8D23E6C5
- Dmarc-filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 95E8D23E6C5
- References: <20170413040455.23996-1-sergiodj@redhat.com> <20170614192219.12364-1-sergiodj@redhat.com>
On 06/14/2017 08:22 PM, Sergio Durigan Junior wrote:
> diff --git a/gdb/Makefile.in b/gdb/Makefile.in
> index 5e5fcaa..133db1a 100644
> --- a/gdb/Makefile.in
> +++ b/gdb/Makefile.in
> @@ -530,14 +530,16 @@ SUBDIR_UNITTESTS_SRCS = \
> unittests/offset-type-selftests.c \
> unittests/optional-selftests.c \
> unittests/ptid-selftests.c \
> - unittests/scoped_restore-selftests.c
> + unittests/scoped_restore-selftests.c \
> + unittests/environ-selftests.c
Please keep the list sorted.
(I'm guilty of missing that before too.)
>
> SUBDIR_UNITTESTS_OBS = \
> function-view-selftests.o \
> offset-type-selftests.o \
> optional-selftests.o \
> ptid-selftests.o \
> - scoped_restore-selftests.o
> + scoped_restore-selftests.o \
> + environ-selftests.o
Ditto.
> diff --git a/gdb/common/environ.c b/gdb/common/environ.c
> index 3145d01..657f2e0 100644
> --- a/gdb/common/environ.c
> +++ b/gdb/common/environ.c
> @@ -18,165 +18,102 @@
> #include "common-defs.h"
> #include "environ.h"
> #include <algorithm>
> -
> +#include <utility>
>
> -/* Return a new environment object. */
> +/* See common/environ.h. */
>
> -struct gdb_environ *
> -make_environ (void)
> +gdb_environ::gdb_environ ()
> {
> - struct gdb_environ *e;
> +}
>
> - e = XNEW (struct gdb_environ);
> +/* See common/environ.h. */
>
> - e->allocated = 10;
> - e->vector = (char **) xmalloc ((e->allocated + 1) * sizeof (char *));
> - e->vector[0] = 0;
> - return e;
> +gdb_environ::~gdb_environ ()
> +{
> + clear ();
> }
>
> -/* Free an environment and all the strings in it. */
> +/* See common/environ.h. */
>
> -void
> -free_environ (struct gdb_environ *e)
> +gdb_environ::gdb_environ (gdb_environ &&e)
> + : m_environ_vector (std::move (e.m_environ_vector))
> {
> - char **vector = e->vector;
> +}
>
> - while (*vector)
> - xfree (*vector++);
> +/* See common/environ.h. */
>
> - xfree (e->vector);
> - xfree (e);
> +gdb_environ &
> +gdb_environ::operator= (gdb_environ &&e)
> +{
> + clear ();
> + m_environ_vector = std::move (e.m_environ_vector);
> + return *this;
> }
>
> -/* Copy the environment given to this process into E.
> - Also copies all the strings in it, so we can be sure
> - that all strings in these environments are safe to free. */
> +/* See common/environ.h. */
>
> void
> -init_environ (struct gdb_environ *e)
> +gdb_environ::clear ()
> {
> - extern char **environ;
> - int i;
> -
> - if (environ == NULL)
> - return;
> -
> - for (i = 0; environ[i]; i++) /*EMPTY */ ;
> + for (char *v : m_environ_vector)
> + xfree (v);
> + m_environ_vector.clear ();
> +}
>
> - if (e->allocated < i)
> - {
> - e->allocated = std::max (i, e->allocated + 10);
> - e->vector = (char **) xrealloc ((char *) e->vector,
> - (e->allocated + 1) * sizeof (char *));
> - }
> +/* See common/environ.h. */
>
> - memcpy (e->vector, environ, (i + 1) * sizeof (char *));
> +const char *
> +gdb_environ::get (const std::string &var) const
> +{
Does this need to be a std::string instead of "const char *"?
Callers always pass a string literal down, so this is going to
force a deep string dup for no good reason, AFAICS.
> + size_t len = var.size ();
> + const char *var_str = var.c_str ();
>
> - while (--i >= 0)
> - {
> - int len = strlen (e->vector[i]);
> - char *newobj = (char *) xmalloc (len + 1);
> + for (char *el : m_environ_vector)
> + if (el != NULL && strncmp (el, var_str, len) == 0 && el[len] == '=')
> + return &el[len + 1];
>
> - memcpy (newobj, e->vector[i], len + 1);
> - e->vector[i] = newobj;
> - }
> + return NULL;
> }
> -char *
> -get_in_environ (const struct gdb_environ *e, const char *var)
> +void
> +gdb_environ::set (const std::string &var, const std::string &value)
Ditto.
> {
> - int len = strlen (var);
> - char **vector = e->vector;
> - char *s;
> -
> - for (; (s = *vector) != NULL; vector++)
> - if (strncmp (s, var, len) == 0 && s[len] == '=')
> - return &s[len + 1];
> + /* We have to unset the variable in the vector if it exists. */
> + unset (var);
>
> - return 0;
> + /* Insert the element before the last one, which is always NULL. */
> + m_environ_vector.insert (m_environ_vector.end () - 1,
> + concat (var.c_str (), "=",
> + value.c_str (), NULL));
> }
>
> -/* Store the value in E of VAR as VALUE. */
> +/* See common/environ.h. */
>
> void
> -set_in_environ (struct gdb_environ *e, const char *var, const char *value)
> +gdb_environ::unset (const std::string &var)
Ditto.
> -
> - return;
> + std::string match = var + '=';
> + const char *match_str = match.c_str ();
> +
> + /* We iterate until '.cend () - 1' because the last element is
> + always NULL. */
> + for (std::vector<char *>::const_iterator el = m_environ_vector.cbegin ();
> + el != m_environ_vector.cend () - 1;
> + ++el)
> + if (startswith (*el, match_str))
In gdb_environ::set you used:
strncmp (el, var_str, len) == 0 && el[len] == '='
It'd be better if places used the same matching code. Maybe even put
this in a separate helper function. The gdb_environ::set version
looks better to me for avoiding temporary heap-allocated strings.
> -/* Remove the setting for variable VAR from environment E. */
> +/* See common/environ.h. */
>
> -void
> -unset_in_environ (struct gdb_environ *e, const char *var)
> +char **
> +gdb_environ::get_char_vector () const
So far, getters in gdb's classes don't have a "get_" prefix.
(except "get()" or course, but that's really a getter in
the same sense.) Can we drop it here? Like:
char **gdb_environ::char_vector () const
Though I'd rename it like this instead:
char ** gdb_environ::envp () const
Because that's what the env vector is traditionally called, e.g.,
from "man 2 execve":
int execve(const char *filename, char *const argv[],
char *const envp[]);
int main(int argc, char *argv[], char *envp[])
Likewise I'd use that name for local variables where
we call gdb_environ::get_char_vector, just to follow
traditional terminology throughout.
> +/* Class that represents the environment variables as seen by the
> + inferior. */
> +
> +class gdb_environ
> +{
> +public:
> + /* Regular constructor and destructor. */
> + gdb_environ ();
> + ~gdb_environ ();
> +
> + /* Move constructor. */
> + gdb_environ (gdb_environ &&e);
> +
> + /* Move assignment. */
> + gdb_environ &operator= (gdb_environ &&e);
> +
> + /* Create a gdb_environ object using the host's environment
> + variables. */
> + static gdb_environ from_host_environ ()
> {
Nit: I find it a bit odd that the ctors/dtors are short but
defined out of line, while this function is defined inline.
If I was looking at controlling what the compiler could inline,
then I'd do it the other way around -- small ctor/dtor in
the header, and this larger function out of line in the .c file.
> - /* Number of usable slots allocated in VECTOR.
> - VECTOR always has one slot not counted here,
> - to hold the terminating zero. */
> - int allocated;
> - /* A vector of slots, ALLOCATED + 1 of them.
> - The first few slots contain strings "VAR=VALUE"
> - and the next one contains zero.
> - Then come some unused slots. */
> - char **vector;
> - };
> + extern char **environ;
> + gdb_environ e;
> +
> + if (environ == NULL)
> + return e;
> +
> + for (int i = 0; environ[i] != NULL; ++i)
> + e.m_environ_vector.push_back (xstrdup (environ[i]));
> +
> + /* The last element of the vector is always going to be NULL. */
> + e.m_environ_vector.push_back (NULL);
>
> -extern struct gdb_environ *make_environ (void);
> + return e;
> + }
> run_target->to_create_inferior (run_target, exec_file,
> std::string (get_inferior_args ()),
> - environ_vector (current_inferior ()->environment),
> + current_inferior ()
> + ->environment.get_char_vector (),
> from_tty);
> @@ -270,7 +270,6 @@ mi_cmd_inferior_tty_show (const char *command, char **argv, int argc)
> void
> _initialize_mi_cmd_env (void)
> {
> - struct gdb_environ *environment;
> const char *env;
>
> /* We want original execution path to reset to, if desired later.
> @@ -278,13 +277,10 @@ _initialize_mi_cmd_env (void)
> current_inferior ()->environment. Also, there's no obvious
> place where this code can be moved such that it surely run
> before any code possibly mangles original PATH. */
> - environment = make_environ ();
> - init_environ (environment);
> - env = get_in_environ (environment, path_var_name);
> + env = getenv (path_var_name);
>
> /* Can be null if path is not set. */
> if (!env)
> env = "";
> orig_path = xstrdup (env);
> - free_environ (environment);
> }
Please split this change to a separate patch. Don't we need to
update the comment just above?
> diff --git a/gdb/unittests/environ-selftests.c b/gdb/unittests/environ-selftests.c
> new file mode 100644
> index 0000000..948eacf
> --- /dev/null
> +++ b/gdb/unittests/environ-selftests.c
> @@ -0,0 +1,79 @@
> +/* Self tests for gdb_environ for GDB, the GNU debugger.
> +
> + Copyright (C) 2017 Free Software Foundation, Inc.
> +
> + This file is part of GDB.
> +
> + This program is free software; you can redistribute it and/or modify
> + it under the terms of the GNU General Public License as published by
> + the Free Software Foundation; either version 3 of the License, or
> + (at your option) any later version.
> +
> + This program is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + GNU General Public License for more details.
> +
> + You should have received a copy of the GNU General Public License
> + along with this program. If not, see <http://www.gnu.org/licenses/>. */
> +
> +#include "defs.h"
> +#include "selftest.h"
> +#include "common/environ.h"
> +
> +namespace selftests {
> +namespace environ {
"environ" as an identifier will be problematic. Please
pick some other name here.
On some hosts "environ" is a define. E.g., on my Fedora's
mingw-w64 install I see:
/usr/x86_64-w64-mingw32/sys-root/mingw/include/stdlib.h:624:#define environ _environ
and gnulib has too:
$ srcgrep -rn environ gnulib/import/ | grep define
gnulib/import/extra/snippet/warn-on-use.h:61: # define environ (*rpl_environ ())
gnulib/import/unistd.in.h:411:# define environ (*_NSGetEnviron ())
gnulib/import/unistd.in.h:432:# define environ (*rpl_environ ())
I don't think "namespace (*_NSGetEnviron ())" would compile. :-)
> +
> +static void
> +run_tests ()
> +{
> + if (setenv ("GDB_SELFTEST_ENVIRON", "1", 1) != 0)
> + error ("Could not set environment variable for testing.");
> +
> + gdb_environ env;
> +
Please add a test that that checks that get_char_vector() on an
empty gdb_environ works as expected. I.e., something like:
gdb_environ env;
SELF_CHECK (env.get_char_vector()[0] == NULL);
AFAICS from:
gdb_environ::gdb_environ ()
{
}
char **
gdb_environ::get_char_vector () const
{
return const_cast<char **> (&m_environ_vector[0]);
}
we end up with a bogus envp, because it points at
m_environ_vector.end(), which is not a valid pointer
to dereference. Even ignoring that, it does not point
to NULL. So if we pass such a pointer to execve or some syscall
that accepts an envp, then it'll try iterating the vector until
it finds a NULL entry, and of course do the wrong thing,
maybe crash if you're lucky.
Note we can get this situation from here too:
static gdb_environ from_host_environ ()
{
extern char **environ;
gdb_environ e;
if (environ == NULL)
return e;
The old code did not suffer from this because it always
allocated gdb_environ::vector:
struct gdb_environ *
make_environ (void)
{
struct gdb_environ *e;
e = XNEW (struct gdb_environ);
e->allocated = 10;
e->vector = (char **) xmalloc ((e->allocated + 1) * sizeof (char *));
e->vector[0] = 0;
return e;
}
So we either always add a NULL to the vector, or we
change gdb_environ::get_char_vector instead, like:
char **
gdb_environ::get_char_vector () const
{
if (m_environ_vector.empty ())
{
static const char *const empty_envp[1] = { NULL };
return const_cast<char **> (empty_envp);
}
return const_cast<char **> (&m_environ_vector[0]);
}
This is OK because execve etc. are garanteed to never change
the envp they're passed.
> + SELF_CHECK (env.get ("PWD") == NULL);
> +
> + env = gdb_environ::from_host_environ ();
> +
> + SELF_CHECK (strcmp (env.get ("GDB_SELFTEST_ENVIRON"), "1") == 0);
> +
> + env.set ("GDB_SELFTEST_ENVIRON", "test");
> + SELF_CHECK (strcmp (env.get ("GDB_SELFTEST_ENVIRON"), "test") == 0);
> +
> + env.unset ("GDB_SELFTEST_ENVIRON");
> + SELF_CHECK (env.get ("GDB_SELFTEST_ENVIRON") == NULL);
> +
> + env.set ("GDB_SELFTEST_ENVIRON", "1");
> + SELF_CHECK (strcmp (env.get ("GDB_SELFTEST_ENVIRON"), "1") == 0);
> +
> + env.clear ();
> + SELF_CHECK (env.get ("GDB_SELFTEST_ENVIRON") == NULL);
Like above, after env.clear() check that this works:
SELF_CHECK (env.get_char_vector()[0] == NULL);
> +
> + if (setenv ("GDB_SELFTEST_ENVIRON", "1", 1) != 0)
> + error ("Could not set environment variable for testing.");
This setenv looks like a stale copy from the one at the top?
I'm not seeing why it's necessary here.
> +
> + env = gdb_environ::from_host_environ ();
> + char **penv = env.get_char_vector ();
> + bool found_var = false, found_twice = false;
> +
> + for (size_t i = 0; penv[i] != NULL; ++i)
> + if (strcmp (penv[i], "GDB_SELFTEST_ENVIRON=1") == 0)
> + {
> + if (found_var)
> + found_twice = true;
> + found_var = true;
> + }
> + SELF_CHECK (found_var == true);
> + SELF_CHECK (found_twice == false);
Why not simply a count:
int found_count = 0;
for (size_t i = 0; penv[i] != NULL; ++i)
if (strcmp (penv[i], "GDB_SELFTEST_ENVIRON=1") == 0)
found_count++;
SELF_CHECK (found_count == 1);
I think that no test actually explicitly sets more than one
var in the vector. I think you should exercise that.
Also check that removing some other var doesn't remove the
first. Etc. E.g., set var 1, set var 2, unset var 1,
check that var 2 is still there.
Thanks,
Pedro Alves