PING: [PATCH v3] Fix #27777 - now use a doubly-linked list for _IO_list_all

H.J. Lu hjl.tools@gmail.com
Mon May 13 12:43:26 GMT 2024


On Mon, May 6, 2024 at 6:52 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> From: Alexandre Ferrieux <alexandre.ferrieux@orange.com>
>
> This patch fixes BZ #27777 "fclose does a linear search, takes ages when
> many FILE* are opened".  Simply put, the master list of opened (FILE*),
> namely _IO_list_all, is a singly-linked list.  As a consequence, the
> removal of a single element is in O(N), which cripples the performance
> of fclose().  The patch switches to a doubly-linked list, yielding O(1)
> removal.  The one padding field in struct _IO_FILE, __pad5, is renamed
> to _prevchain for a doubly-linked list.  Since fields in struct _IO_FILE
> after the _lock field are internal to glibc and opaque to applications.
> We can change them as long as the size of struct _IO_FILE is unchanged,
> which is checked as the part of glibc ABI with sizes of _IO_2_1_stdin_,
> _IO_2_1_stdout_ and _IO_2_1_stderr_.
>
> NB: When _IO_vtable_offset (fp) == 0, copy relocation will cover the
> whole struct _IO_FILE.  Otherwise, only fields up to the _lock field
> will be copied to applications at run-time.  It is used to check if
> the _prevchain field can be safely accessed.
>
> As tst-fclose.c shows, after opening 2 million (FILE*), the fclose() of
> 100 of them takes quite a few seconds without the patch, and under 2
> seconds with it using "make check -jN" on a loaded machine.
>
> Co-Authored-By: H.J. Lu <hjl.tools@gmail.com>
> ---
>  libio/Makefile                 |  1 +
>  libio/bits/types/struct_FILE.h |  4 +--
>  libio/genops.c                 | 26 ++++++++++++++++
>  libio/stdfiles.c               | 15 +++++++++
>  libio/tst-fclose.c             | 56 ++++++++++++++++++++++++++++++++++
>  5 files changed, 100 insertions(+), 2 deletions(-)
>  create mode 100644 libio/tst-fclose.c
>
> diff --git a/libio/Makefile b/libio/Makefile
> index 0c1f16ee3b..7f598c840c 100644
> --- a/libio/Makefile
> +++ b/libio/Makefile
> @@ -94,6 +94,7 @@ tests = \
>    tst-eof \
>    tst-ext \
>    tst-ext2 \
> +  tst-fclose \
>    tst-fgetc-after-eof \
>    tst-fgetwc \
>    tst-fgetws \
> diff --git a/libio/bits/types/struct_FILE.h b/libio/bits/types/struct_FILE.h
> index 7cdaae86f8..d8d26639d1 100644
> --- a/libio/bits/types/struct_FILE.h
> +++ b/libio/bits/types/struct_FILE.h
> @@ -92,10 +92,10 @@ struct _IO_FILE_complete
>    struct _IO_wide_data *_wide_data;
>    struct _IO_FILE *_freeres_list;
>    void *_freeres_buf;
> -  size_t __pad5;
> +  struct _IO_FILE **_prevchain;
>    int _mode;
>    /* Make sure we don't get into trouble again.  */
> -  char _unused2[15 * sizeof (int) - 4 * sizeof (void *) - sizeof (size_t)];
> +  char _unused2[15 * sizeof (int) - 5 * sizeof (void *)];
>  };
>
>  /* These macros are used by bits/stdio.h and internal headers.  */
> diff --git a/libio/genops.c b/libio/genops.c
> index bc45e60a09..994ee9c0b1 100644
> --- a/libio/genops.c
> +++ b/libio/genops.c
> @@ -48,6 +48,19 @@ flush_cleanup (void *not_used)
>  }
>  #endif
>
> +/* Fields in struct _IO_FILE after the _lock field are internal to
> +   glibc and opaque to applications.  We can change them as long as
> +   the size of struct _IO_FILE is unchanged, which is checked as the
> +   part of glibc ABI with sizes of _IO_2_1_stdin_, _IO_2_1_stdout_
> +   and _IO_2_1_stderr_.
> +
> +   NB: When _IO_vtable_offset (fp) == 0, copy relocation will cover the
> +   whole struct _IO_FILE.  Otherwise, only fields up to the _lock field
> +   will be copied.  */
> +_Static_assert (offsetof (struct _IO_FILE, _prevchain)
> +               > offsetof (struct _IO_FILE, _lock),
> +               "offset of _prevchain > offset of _lock");
> +
>  void
>  _IO_un_link (struct _IO_FILE_plus *fp)
>  {
> @@ -62,6 +75,14 @@ _IO_un_link (struct _IO_FILE_plus *fp)
>  #endif
>        if (_IO_list_all == NULL)
>         ;
> +      else if (_IO_vtable_offset ((FILE *) fp) == 0)
> +       {
> +         FILE **pr = fp->file._prevchain;
> +         FILE *nx = fp->file._chain;
> +         *pr = nx;
> +         if (nx != NULL)
> +           nx->_prevchain = pr;
> +       }
>        else if (fp == _IO_list_all)
>         _IO_list_all = (struct _IO_FILE_plus *) _IO_list_all->file._chain;
>        else
> @@ -95,6 +116,11 @@ _IO_link_in (struct _IO_FILE_plus *fp)
>        _IO_flockfile ((FILE *) fp);
>  #endif
>        fp->file._chain = (FILE *) _IO_list_all;
> +      if (_IO_vtable_offset ((FILE *) fp) == 0)
> +       {
> +         fp->file._prevchain = (FILE **) &_IO_list_all;
> +         _IO_list_all->file._prevchain = &fp->file._chain;
> +       }
>        _IO_list_all = fp;
>  #ifdef _IO_MTSAFE_IO
>        _IO_funlockfile ((FILE *) fp);
> diff --git a/libio/stdfiles.c b/libio/stdfiles.c
> index cd8eca8bf3..d607fa02e0 100644
> --- a/libio/stdfiles.c
> +++ b/libio/stdfiles.c
> @@ -54,4 +54,19 @@ DEF_STDFILE(_IO_2_1_stdout_, 1, &_IO_2_1_stdin_, _IO_NO_READS);
>  DEF_STDFILE(_IO_2_1_stderr_, 2, &_IO_2_1_stdout_, _IO_NO_READS+_IO_UNBUFFERED);
>
>  struct _IO_FILE_plus *_IO_list_all = &_IO_2_1_stderr_;
> +
> +/* Finish the double-linking for stdfiles as static initialization
> +   cannot.  */
> +
> +__THROW __attribute__ ((constructor))
> +static void
> +_IO_stdfiles_init (void)
> +{
> +  struct _IO_FILE **f;
> +  for (f = (struct _IO_FILE **) &_IO_list_all;
> +       *f != NULL;
> +       f = &(*f)->_chain)
> +    (*f)->_prevchain = f;
> +}
> +
>  libc_hidden_data_def (_IO_list_all)
> diff --git a/libio/tst-fclose.c b/libio/tst-fclose.c
> new file mode 100644
> index 0000000000..bc0c216e29
> --- /dev/null
> +++ b/libio/tst-fclose.c
> @@ -0,0 +1,56 @@
> +/* Verify fclose performance with many opened files.
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <support/test-driver.h>
> +
> +#define N 2000000
> +#define M 100
> +
> +static int
> +do_test (void)
> +{
> +  FILE *ff, *keep[M];
> +  int i;
> +
> +  fprintf (stderr, "Preparing %d FILEs ...\n", N);
> +  fflush (stderr);
> +  for (i = 0; i < N; i++)
> +    {
> +      ff = fdopen (STDIN_FILENO, "r");
> +      if (!ff)
> +       {
> +         fprintf (stderr, "### failed to fdopen: %m\n");
> +         return EXIT_FAILURE;
> +       }
> +      if (i < M)
> +       keep[i] = ff;
> +    }
> +  fprintf (stderr, "Now fclosing %d FILEs ...", M);
> +  fflush (stderr);
> +  for (i = 0; i < M; i++)
> +    fclose (keep[i]);
> +  fprintf (stderr, "DONE\n");
> +  fflush (stderr);
> +  return EXIT_SUCCESS;
> +}
> +
> +#define TIMEOUT 2
> +#include <support/test-driver.c>
> --
> 2.45.0
>

PING

-- 
H.J.


More information about the Libc-alpha mailing list