[PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241)
Adhemerval Zanella
adhemerval.zanella@linaro.org
Wed Aug 12 23:04:11 GMT 2020
On 11/08/2020 22:38, Xiaoming Ni wrote:
> On 2020/8/11 22:57, Adhemerval Zanella wrote:
>>
>>
>> On 11/08/2020 00:00, Xiaoming Ni wrote:
>>> On 2020/8/11 4:48, Adhemerval Zanella wrote:
>>>> It uses both a fixed internal buffer with PATH_MAX size to read and
>>>> copy the results of the readlink call.
>>>>
>>>> Also, if PATH_MAX is not defined it uses a default value of 1024
>>>> as for other stdlib implementations.
>>>>
>>>> The expected stack usage is about 8k on Linux where PATH_MAX is
>>>> define as 4096 (plus some internal function usage for local
>>>> variable).
>>>>
>>>> Checked on x86_64-linux-gnu and i686-linux-gnu.
>>>> ---
>>>> stdlib/Makefile | 3 +-
>>>> stdlib/canonicalize.c | 38 +++---
>>>> stdlib/tst-canon-bz26341.c | 108 ++++++++++++++++++
>>>> support/support_set_small_thread_stack_size.c | 12 +-
>>>> support/xthread.h | 2 +
>>>> 5 files changed, 138 insertions(+), 25 deletions(-)
>>>> create mode 100644 stdlib/tst-canon-bz26341.c
>>>>
>>>> diff --git a/stdlib/Makefile b/stdlib/Makefile
>>>> index 4615f6dfe7..7093b8a584 100644
>>>> --- a/stdlib/Makefile
>>>> +++ b/stdlib/Makefile
>>>> @@ -87,7 +87,7 @@ tests := tst-strtol tst-strtod testmb testrand testsort testdiv \
>>>> tst-makecontext-align test-bz22786 tst-strtod-nan-sign \
>>>> tst-swapcontext1 tst-setcontext4 tst-setcontext5 \
>>>> tst-setcontext6 tst-setcontext7 tst-setcontext8 \
>>>> - tst-setcontext9 tst-bz20544
>>>> + tst-setcontext9 tst-bz20544 tst-canon-bz26341
>>>> tests-internal := tst-strtod1i tst-strtod3 tst-strtod4 tst-strtod5i \
>>>> tst-tls-atexit tst-tls-atexit-nodelete
>>>> @@ -102,6 +102,7 @@ LDLIBS-test-atexit-race = $(shared-thread-library)
>>>> LDLIBS-test-at_quick_exit-race = $(shared-thread-library)
>>>> LDLIBS-test-cxa_atexit-race = $(shared-thread-library)
>>>> LDLIBS-test-on_exit-race = $(shared-thread-library)
>>>> +LDLIBS-tst-canon-bz26341 = $(shared-thread-library)
>>>> LDLIBS-test-dlclose-exit-race = $(shared-thread-library) $(libdl)
>>>> LDFLAGS-test-dlclose-exit-race = $(LDFLAGS-rdynamic)
>>>> diff --git a/stdlib/canonicalize.c b/stdlib/canonicalize.c
>>>> index cbd885a3c5..554ba221e4 100644
>>>> --- a/stdlib/canonicalize.c
>>>> +++ b/stdlib/canonicalize.c
>>>> @@ -28,6 +28,14 @@
>>>> #include <eloop-threshold.h>
>>>> #include <shlib-compat.h>
>>>> +#ifndef PATH_MAX
>>>> +# ifdef MAXPATHLEN
>>>> +# define PATH_MAX MAXPATHLEN
>>>> +# else
>>>> +# define PATH_MAX 1024
>>>> +# endif
>>>> +#endif
>>>> +
>>>> /* Return the canonical absolute name of file NAME. A canonical name
>>>> does not contain any `.', `..' components nor any repeated path
>>>> separators ('/') or symlinks. All path components must exist. If
>>>> @@ -42,9 +50,8 @@
>>>> char *
>>>> __realpath (const char *name, char *resolved)
>>>> {
>>>> - char *rpath, *dest, *extra_buf = NULL;
>>>> + char *rpath, *dest, extra_buf[PATH_MAX];
>>> Why does the 4 KB stack space need to be occupied? Even if there are no linked files ?
>>
>> It does not, it is a simplification to avoid to decompose the function
>> and handle symlinks in a special case. To avoid the stack allocation
>> for common case would need to either to use dynamic allocation or
>> adjust the function that once it founds a symlink, it calls another
>> function to handle the loop with a stack allocated provided buffer.
>> I don't think this extra code complexity really pays off.
>
>
> Extract the symlinks processing as an independent function and move extra_buf and buf to the new independent function to avoid wasting 8 KB stack space when the realpath is used to process unlinked files.
> Is this better?
Yes, my only reservation is the complexity and possible code duplication
to handle it. I can't see no easy way to accomplish it without duplicate
the loop code (minus the 'extra_buf' alloca) and make the default loop
calling it with the stack allocated extra_buf (and I would like to avoid
this approach).
Another possibility which I think it would be better it to use a scratch
buffer and make some compromise with stack usage and heap allocation.
The default 1024 bytes of the scratch buffer should hit mostly of the
common calls (it is 1/4 of PATH_MAX), so malloc would be called only for
large paths (which should be uncommon). We can also use a scratch buffer
for the readlink as well, since we might infer the required size from
the previous lstat call.
With something like below we can make the realpath uses a stack of about
~1024 and ~2048 if the path contains symbolic link:
---
diff --git a/stdlib/canonicalize.c b/stdlib/canonicalize.c
index cbd885a3c5..dca160f523 100644
--- a/stdlib/canonicalize.c
+++ b/stdlib/canonicalize.c
@@ -25,9 +25,56 @@
#include <errno.h>
#include <stddef.h>
+#include <scratch_buffer.h>
#include <eloop-threshold.h>
#include <shlib-compat.h>
+#ifndef PATH_MAX
+# ifdef MAXPATHLEN
+# define PATH_MAX MAXPATHLEN
+# else
+# define PATH_MAX 1024
+# endif
+#endif
+
+static bool
+realpath_readlink (const char *rpath, const char *end, size_t path_max,
+ size_t st_size, struct scratch_buffer *extra_buf)
+{
+ bool r = false;
+
+ struct scratch_buffer buf;
+ scratch_buffer_init (&buf);
+ /* Add the terminating null byte. */
+ if (!scratch_buffer_set_array_size (&buf, st_size + 1, sizeof (char)))
+ return false;
+
+ ssize_t n = __readlink (rpath, buf.data, buf.length - 1);
+ if (n < 0)
+ goto out;
+ ((char *) buf.data)[n] = '\0';
+
+ size_t len = strlen (end);
+ if (path_max - n <= len)
+ {
+ __set_errno (ENAMETOOLONG);
+ goto out;
+ }
+
+ if (!scratch_buffer_set_array_size (extra_buf, n + len + 1, sizeof (char)))
+ goto out;
+
+ /* Careful here, end may be a pointer into extra_buf... */
+ memmove ((char *) extra_buf->data + n, end, len + 1);
+ memcpy (extra_buf->data, buf.data, n);
+
+ r = true;
+
+out:
+ scratch_buffer_free (&buf);
+ return r;
+}
+
/* Return the canonical absolute name of file NAME. A canonical name
does not contain any `.', `..' components nor any repeated path
separators ('/') or symlinks. All path components must exist. If
@@ -42,10 +89,13 @@
char *
__realpath (const char *name, char *resolved)
{
- char *rpath, *dest, *extra_buf = NULL;
+ char *rpath, *dest;
const char *start, *end, *rpath_limit;
- long int path_max;
+ const size_t path_max = PATH_MAX;
int num_links = 0;
+ struct scratch_buffer extra_buf;
+
+ scratch_buffer_init (&extra_buf);
if (name == NULL)
{
@@ -65,14 +115,6 @@ __realpath (const char *name, char *resolved)
return NULL;
}
-#ifdef PATH_MAX
- path_max = PATH_MAX;
-#else
- path_max = __pathconf (name, _PC_PATH_MAX);
- if (path_max <= 0)
- path_max = 1024;
-#endif
-
if (resolved == NULL)
{
rpath = malloc (path_max);
@@ -101,7 +143,6 @@ __realpath (const char *name, char *resolved)
for (start = end = name; *start; start = end)
{
struct stat64 st;
- int n;
/* Skip sequence of multiple path-separators. */
while (*start == '/')
@@ -163,35 +204,19 @@ __realpath (const char *name, char *resolved)
if (S_ISLNK (st.st_mode))
{
- char *buf = __alloca (path_max);
- size_t len;
-
if (++num_links > __eloop_threshold ())
{
__set_errno (ELOOP);
goto error;
}
- n = __readlink (rpath, buf, path_max - 1);
- if (n < 0)
+ if (! realpath_readlink (rpath, end, path_max, st.st_size,
+ &extra_buf))
goto error;
- buf[n] = '\0';
-
- if (!extra_buf)
- extra_buf = __alloca (path_max);
- len = strlen (end);
- if (path_max - n <= len)
- {
- __set_errno (ENAMETOOLONG);
- goto error;
- }
-
- /* Careful here, end may be a pointer into extra_buf... */
- memmove (&extra_buf[n], end, len + 1);
- name = end = memcpy (extra_buf, buf, n);
+ name = end = extra_buf.data;
- if (buf[0] == '/')
+ if (((char *)extra_buf.data)[0] == '/')
dest = rpath + 1; /* It's an absolute symlink */
else
/* Back up to previous component, ignore if at root already: */
@@ -209,6 +234,8 @@ __realpath (const char *name, char *resolved)
--dest;
*dest = '\0';
+ scratch_buffer_free (&extra_buf);
+
assert (resolved == NULL || resolved == rpath);
return rpath;
@@ -216,6 +243,7 @@ error:
assert (resolved == NULL || resolved == rpath);
if (resolved == NULL)
free (rpath);
+ scratch_buffer_free (&extra_buf);
return NULL;
}
libc_hidden_def (__realpath)
More information about the Libc-alpha
mailing list