This is the mail archive of the
gdb-patches@sourceware.org
mailing list for the GDB project.
Re: [PATCH v4] C++ify gdb/common/environ.c
- From: Sergio Durigan Junior <sergiodj at redhat dot com>
- To: Pedro Alves <palves at redhat dot com>
- Cc: GDB Patches <gdb-patches at sourceware dot org>, Simon Marchi <simon dot marchi at polymtl dot ca>
- Date: Fri, 16 Jun 2017 14:01:54 -0400
- Subject: Re: [PATCH v4] C++ify gdb/common/environ.c
- Authentication-results: sourceware.org; auth=none
- Authentication-results: ext-mx03.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com
- Authentication-results: ext-mx03.extmail.prod.ext.phx2.redhat.com; spf=pass smtp.mailfrom=sergiodj at redhat dot com
- Dkim-filter: OpenDKIM Filter v2.11.0 mx1.redhat.com 5468280F94
- Dmarc-filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 5468280F94
- References: <20170413040455.23996-1-sergiodj@redhat.com> <20170614192219.12364-1-sergiodj@redhat.com> <c80f859f-1146-a076-018f-95c8a5241ed5@redhat.com>
On Friday, June 16 2017, Pedro Alves wrote:
> On 06/14/2017 08:22 PM, Sergio Durigan Junior wrote:
>
>> diff --git a/gdb/Makefile.in b/gdb/Makefile.in
>> index 5e5fcaa..133db1a 100644
>> --- a/gdb/Makefile.in
>> +++ b/gdb/Makefile.in
>> @@ -530,14 +530,16 @@ SUBDIR_UNITTESTS_SRCS = \
>> unittests/offset-type-selftests.c \
>> unittests/optional-selftests.c \
>> unittests/ptid-selftests.c \
>> - unittests/scoped_restore-selftests.c
>> + unittests/scoped_restore-selftests.c \
>> + unittests/environ-selftests.c
>
> Please keep the list sorted.
>
> (I'm guilty of missing that before too.)
Done.
>>
>> SUBDIR_UNITTESTS_OBS = \
>> function-view-selftests.o \
>> offset-type-selftests.o \
>> optional-selftests.o \
>> ptid-selftests.o \
>> - scoped_restore-selftests.o
>> + scoped_restore-selftests.o \
>> + environ-selftests.o
>
> Ditto.
Done.
>> diff --git a/gdb/common/environ.c b/gdb/common/environ.c
>> index 3145d01..657f2e0 100644
>> --- a/gdb/common/environ.c
>> +++ b/gdb/common/environ.c
>> @@ -18,165 +18,102 @@
>> #include "common-defs.h"
>> #include "environ.h"
>> #include <algorithm>
>> -
>> +#include <utility>
>>
>> -/* Return a new environment object. */
>> +/* See common/environ.h. */
>>
>> -struct gdb_environ *
>> -make_environ (void)
>> +gdb_environ::gdb_environ ()
>> {
>> - struct gdb_environ *e;
>> +}
>>
>> - e = XNEW (struct gdb_environ);
>> +/* See common/environ.h. */
>>
>> - e->allocated = 10;
>> - e->vector = (char **) xmalloc ((e->allocated + 1) * sizeof (char *));
>> - e->vector[0] = 0;
>> - return e;
>> +gdb_environ::~gdb_environ ()
>> +{
>> + clear ();
>> }
>>
>> -/* Free an environment and all the strings in it. */
>> +/* See common/environ.h. */
>>
>> -void
>> -free_environ (struct gdb_environ *e)
>> +gdb_environ::gdb_environ (gdb_environ &&e)
>> + : m_environ_vector (std::move (e.m_environ_vector))
>> {
>> - char **vector = e->vector;
>> +}
>>
>> - while (*vector)
>> - xfree (*vector++);
>> +/* See common/environ.h. */
>>
>> - xfree (e->vector);
>> - xfree (e);
>> +gdb_environ &
>> +gdb_environ::operator= (gdb_environ &&e)
>> +{
>> + clear ();
>> + m_environ_vector = std::move (e.m_environ_vector);
>> + return *this;
>> }
>>
>> -/* Copy the environment given to this process into E.
>> - Also copies all the strings in it, so we can be sure
>> - that all strings in these environments are safe to free. */
>> +/* See common/environ.h. */
>>
>> void
>> -init_environ (struct gdb_environ *e)
>> +gdb_environ::clear ()
>> {
>> - extern char **environ;
>> - int i;
>> -
>> - if (environ == NULL)
>> - return;
>> -
>> - for (i = 0; environ[i]; i++) /*EMPTY */ ;
>> + for (char *v : m_environ_vector)
>> + xfree (v);
>> + m_environ_vector.clear ();
>> +}
>>
>> - if (e->allocated < i)
>> - {
>> - e->allocated = std::max (i, e->allocated + 10);
>> - e->vector = (char **) xrealloc ((char *) e->vector,
>> - (e->allocated + 1) * sizeof (char *));
>> - }
>> +/* See common/environ.h. */
>>
>> - memcpy (e->vector, environ, (i + 1) * sizeof (char *));
>> +const char *
>> +gdb_environ::get (const std::string &var) const
>> +{
>
> Does this need to be a std::string instead of "const char *"?
> Callers always pass a string literal down, so this is going to
> force a deep string dup for no good reason, AFAICS.
It doesn't. Changed to const char *.
>
>> + size_t len = var.size ();
>> + const char *var_str = var.c_str ();
>>
>> - while (--i >= 0)
>> - {
>> - int len = strlen (e->vector[i]);
>> - char *newobj = (char *) xmalloc (len + 1);
>> + for (char *el : m_environ_vector)
>> + if (el != NULL && strncmp (el, var_str, len) == 0 && el[len] == '=')
>> + return &el[len + 1];
>>
>> - memcpy (newobj, e->vector[i], len + 1);
>> - e->vector[i] = newobj;
>> - }
>> + return NULL;
>> }
>
>> -char *
>> -get_in_environ (const struct gdb_environ *e, const char *var)
>> +void
>> +gdb_environ::set (const std::string &var, const std::string &value)
>
> Ditto.
Likewise.
>> {
>> - int len = strlen (var);
>> - char **vector = e->vector;
>> - char *s;
>> -
>> - for (; (s = *vector) != NULL; vector++)
>> - if (strncmp (s, var, len) == 0 && s[len] == '=')
>> - return &s[len + 1];
>> + /* We have to unset the variable in the vector if it exists. */
>> + unset (var);
>>
>> - return 0;
>> + /* Insert the element before the last one, which is always NULL. */
>> + m_environ_vector.insert (m_environ_vector.end () - 1,
>> + concat (var.c_str (), "=",
>> + value.c_str (), NULL));
>> }
>>
>> -/* Store the value in E of VAR as VALUE. */
>> +/* See common/environ.h. */
>>
>> void
>> -set_in_environ (struct gdb_environ *e, const char *var, const char *value)
>> +gdb_environ::unset (const std::string &var)
>
> Ditto.
Likewise.
>
>> -
>> - return;
>> + std::string match = var + '=';
>> + const char *match_str = match.c_str ();
>> +
>> + /* We iterate until '.cend () - 1' because the last element is
>> + always NULL. */
>> + for (std::vector<char *>::const_iterator el = m_environ_vector.cbegin ();
>> + el != m_environ_vector.cend () - 1;
>> + ++el)
>> + if (startswith (*el, match_str))
>
> In gdb_environ::set you used:
I assume you meant gdb_environ::get, right?
>
> strncmp (el, var_str, len) == 0 && el[len] == '='
>
> It'd be better if places used the same matching code. Maybe even put
> this in a separate helper function. The gdb_environ::set version
> looks better to me for avoiding temporary heap-allocated strings.
Right, done.
>
>> -/* Remove the setting for variable VAR from environment E. */
>> +/* See common/environ.h. */
>>
>> -void
>> -unset_in_environ (struct gdb_environ *e, const char *var)
>> +char **
>> +gdb_environ::get_char_vector () const
>
> So far, getters in gdb's classes don't have a "get_" prefix.
> (except "get()" or course, but that's really a getter in
> the same sense.) Can we drop it here? Like:
Yeah, sure. Simon made this observation in a previous review about the
other methods, but I thought it'd make sense to keep the "get_" prefix
for this specific one.
>
> char **gdb_environ::char_vector () const
>
> Though I'd rename it like this instead:
>
> char ** gdb_environ::envp () const
>
> Because that's what the env vector is traditionally called, e.g.,
> from "man 2 execve":
>
> int execve(const char *filename, char *const argv[],
> char *const envp[]);
>
> int main(int argc, char *argv[], char *envp[])
>
> Likewise I'd use that name for local variables where
> we call gdb_environ::get_char_vector, just to follow
> traditional terminology throughout.
OK, fair enough. I'll rename it to envp then. There's just one place
where we assign the return of gdb_environ::get_char_vector to a local
variable (in infcmd.c), so I renamed it accordingly.
>> +/* Class that represents the environment variables as seen by the
>> + inferior. */
>> +
>> +class gdb_environ
>> +{
>> +public:
>> + /* Regular constructor and destructor. */
>> + gdb_environ ();
>> + ~gdb_environ ();
>> +
>> + /* Move constructor. */
>> + gdb_environ (gdb_environ &&e);
>> +
>> + /* Move assignment. */
>> + gdb_environ &operator= (gdb_environ &&e);
>> +
>> + /* Create a gdb_environ object using the host's environment
>> + variables. */
>> + static gdb_environ from_host_environ ()
>> {
>
> Nit: I find it a bit odd that the ctors/dtors are short but
> defined out of line, while this function is defined inline.
> If I was looking at controlling what the compiler could inline,
> then I'd do it the other way around -- small ctor/dtor in
> the header, and this larger function out of line in the .c file.
Question: if I define a method inside the class, does this implicitly
tell the compiler that I want to inline it, as oppose to defining the
method outside?
I'll follow your advice and define the short ctors inside the class, and
move the definition of from_host_environ to the C file.
>> - /* Number of usable slots allocated in VECTOR.
>> - VECTOR always has one slot not counted here,
>> - to hold the terminating zero. */
>> - int allocated;
>> - /* A vector of slots, ALLOCATED + 1 of them.
>> - The first few slots contain strings "VAR=VALUE"
>> - and the next one contains zero.
>> - Then come some unused slots. */
>> - char **vector;
>> - };
>> + extern char **environ;
>> + gdb_environ e;
>> +
>> + if (environ == NULL)
>> + return e;
>> +
>> + for (int i = 0; environ[i] != NULL; ++i)
>> + e.m_environ_vector.push_back (xstrdup (environ[i]));
>> +
>> + /* The last element of the vector is always going to be NULL. */
>> + e.m_environ_vector.push_back (NULL);
>>
>> -extern struct gdb_environ *make_environ (void);
>> + return e;
>> + }
>
>> run_target->to_create_inferior (run_target, exec_file,
>> std::string (get_inferior_args ()),
>> - environ_vector (current_inferior ()->environment),
>> + current_inferior ()
>> + ->environment.get_char_vector (),
>> from_tty);
>> @@ -270,7 +270,6 @@ mi_cmd_inferior_tty_show (const char *command, char **argv, int argc)
>> void
>> _initialize_mi_cmd_env (void)
>> {
>> - struct gdb_environ *environment;
>> const char *env;
>>
>> /* We want original execution path to reset to, if desired later.
>> @@ -278,13 +277,10 @@ _initialize_mi_cmd_env (void)
>> current_inferior ()->environment. Also, there's no obvious
>> place where this code can be moved such that it surely run
>> before any code possibly mangles original PATH. */
>> - environment = make_environ ();
>> - init_environ (environment);
>> - env = get_in_environ (environment, path_var_name);
>> + env = getenv (path_var_name);
>>
>> /* Can be null if path is not set. */
>> if (!env)
>> env = "";
>> orig_path = xstrdup (env);
>> - free_environ (environment);
>> }
>
> Please split this change to a separate patch. Don't we need to
> update the comment just above?
Sure, I'll split and send a separate patch. And yeah, the comment needs
updating, thanks for noticing that.
>
>> diff --git a/gdb/unittests/environ-selftests.c b/gdb/unittests/environ-selftests.c
>> new file mode 100644
>> index 0000000..948eacf
>> --- /dev/null
>> +++ b/gdb/unittests/environ-selftests.c
>> @@ -0,0 +1,79 @@
>> +/* Self tests for gdb_environ for GDB, the GNU debugger.
>> +
>> + Copyright (C) 2017 Free Software Foundation, Inc.
>> +
>> + This file is part of GDB.
>> +
>> + This program is free software; you can redistribute it and/or modify
>> + it under the terms of the GNU General Public License as published by
>> + the Free Software Foundation; either version 3 of the License, or
>> + (at your option) any later version.
>> +
>> + This program is distributed in the hope that it will be useful,
>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
>> + GNU General Public License for more details.
>> +
>> + You should have received a copy of the GNU General Public License
>> + along with this program. If not, see <http://www.gnu.org/licenses/>. */
>> +
>> +#include "defs.h"
>> +#include "selftest.h"
>> +#include "common/environ.h"
>> +
>> +namespace selftests {
>> +namespace environ {
>
> "environ" as an identifier will be problematic. Please
> pick some other name here.
>
> On some hosts "environ" is a define. E.g., on my Fedora's
> mingw-w64 install I see:
>
> /usr/x86_64-w64-mingw32/sys-root/mingw/include/stdlib.h:624:#define environ _environ
>
> and gnulib has too:
>
> $ srcgrep -rn environ gnulib/import/ | grep define
> gnulib/import/extra/snippet/warn-on-use.h:61: # define environ (*rpl_environ ())
> gnulib/import/unistd.in.h:411:# define environ (*_NSGetEnviron ())
> gnulib/import/unistd.in.h:432:# define environ (*rpl_environ ())
>
> I don't think "namespace (*_NSGetEnviron ())" would compile. :-)
Hah, right :-). I'll pick "namespace gdb_environ" then.
>
>> +
>> +static void
>> +run_tests ()
>> +{
>> + if (setenv ("GDB_SELFTEST_ENVIRON", "1", 1) != 0)
>> + error ("Could not set environment variable for testing.");
>> +
>> + gdb_environ env;
>> +
>
> Please add a test that that checks that get_char_vector() on an
> empty gdb_environ works as expected. I.e., something like:
>
> gdb_environ env;
> SELF_CHECK (env.get_char_vector()[0] == NULL);
Hm, right, makes sense. I'll add that.
> AFAICS from:
>
> gdb_environ::gdb_environ ()
> {
> }
>
> char **
> gdb_environ::get_char_vector () const
> {
> return const_cast<char **> (&m_environ_vector[0]);
> }
>
> we end up with a bogus envp, because it points at
> m_environ_vector.end(), which is not a valid pointer
> to dereference. Even ignoring that, it does not point
> to NULL. So if we pass such a pointer to execve or some syscall
> that accepts an envp, then it'll try iterating the vector until
> it finds a NULL entry, and of course do the wrong thing,
> maybe crash if you're lucky.
>
> Note we can get this situation from here too:
>
> static gdb_environ from_host_environ ()
> {
> extern char **environ;
> gdb_environ e;
>
> if (environ == NULL)
> return e;
>
>
> The old code did not suffer from this because it always
> allocated gdb_environ::vector:
>
> struct gdb_environ *
> make_environ (void)
> {
> struct gdb_environ *e;
>
> e = XNEW (struct gdb_environ);
>
> e->allocated = 10;
> e->vector = (char **) xmalloc ((e->allocated + 1) * sizeof (char *));
> e->vector[0] = 0;
> return e;
> }
>
> So we either always add a NULL to the vector, or we
> change gdb_environ::get_char_vector instead, like:
>
> char **
> gdb_environ::get_char_vector () const
> {
> if (m_environ_vector.empty ())
> {
> static const char *const empty_envp[1] = { NULL };
> return const_cast<char **> (empty_envp);
> }
> return const_cast<char **> (&m_environ_vector[0]);
> }
>
> This is OK because execve etc. are garanteed to never change
> the envp they're passed.
Oh, good catch. I prefer to just initialize the vector with a NULL
value in the ctor; will do that now.
>> + SELF_CHECK (env.get ("PWD") == NULL);
>> +
>> + env = gdb_environ::from_host_environ ();
>> +
>> + SELF_CHECK (strcmp (env.get ("GDB_SELFTEST_ENVIRON"), "1") == 0);
>> +
>> + env.set ("GDB_SELFTEST_ENVIRON", "test");
>> + SELF_CHECK (strcmp (env.get ("GDB_SELFTEST_ENVIRON"), "test") == 0);
>> +
>> + env.unset ("GDB_SELFTEST_ENVIRON");
>> + SELF_CHECK (env.get ("GDB_SELFTEST_ENVIRON") == NULL);
>> +
>> + env.set ("GDB_SELFTEST_ENVIRON", "1");
>> + SELF_CHECK (strcmp (env.get ("GDB_SELFTEST_ENVIRON"), "1") == 0);
>> +
>> + env.clear ();
>> + SELF_CHECK (env.get ("GDB_SELFTEST_ENVIRON") == NULL);
>
> Like above, after env.clear() check that this works:
>
> SELF_CHECK (env.get_char_vector()[0] == NULL);
Will do.
>> +
>> + if (setenv ("GDB_SELFTEST_ENVIRON", "1", 1) != 0)
>> + error ("Could not set environment variable for testing.");
>
> This setenv looks like a stale copy from the one at the top?
> I'm not seeing why it's necessary here.
It is indeed, thanks. Removed.
>> +
>> + env = gdb_environ::from_host_environ ();
>> + char **penv = env.get_char_vector ();
>> + bool found_var = false, found_twice = false;
>> +
>> + for (size_t i = 0; penv[i] != NULL; ++i)
>> + if (strcmp (penv[i], "GDB_SELFTEST_ENVIRON=1") == 0)
>> + {
>> + if (found_var)
>> + found_twice = true;
>> + found_var = true;
>> + }
>> + SELF_CHECK (found_var == true);
>> + SELF_CHECK (found_twice == false);
>
> Why not simply a count:
>
> int found_count = 0;
> for (size_t i = 0; penv[i] != NULL; ++i)
> if (strcmp (penv[i], "GDB_SELFTEST_ENVIRON=1") == 0)
> found_count++;
> SELF_CHECK (found_count == 1);
Good idea, thanks.
> I think that no test actually explicitly sets more than one
> var in the vector. I think you should exercise that.
> Also check that removing some other var doesn't remove the
> first. Etc. E.g., set var 1, set var 2, unset var 1,
> check that var 2 is still there.
Will do.
Thanks for the review. I'll send v5 soon.
--
Sergio
GPG key ID: 237A 54B1 0287 28BF 00EF 31F4 D0EB 7628 65FC 5E36
Please send encrypted e-mail if possible
http://sergiodj.net/