This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH 2/2] Use cached offset in ftell when reliable


Ping!

On Thu, Feb 20, 2014 at 10:59:46PM +0530, Siddhesh Poyarekar wrote:
> Hi,
> 
> The cached offset is reliable to use in ftell when the stream handle
> is active.  We can consider a stream as being active when there is
> unbuffered data.  However, even in this case, we can use the cached
> offset only when the stream is not being written to in a+ mode,
> because this case may have unbuffered data and a stale offset; the
> previous read could have sent it off somewhere other than the end of
> the file.
> 
> There were a couple of adjustments necessary to get this to work.
> Firstly, fdopen now ceases to use _IO_file_attach because it sets the
> offset cache to the current file position.  This is not correct
> because there could be changes to the file descriptor before the
> stream handle is activated, which would not get reflected.
> 
> A similar offset caching action is done in _IO_fwide, claiming that
> wide streams have 'problems' with the file offsets.  There don't seem
> to be any obvious problems with not having the offset cache available,
> other than that it will have to be queried in a subsequent
> read/write/seek.  I have removed this as well.
> 
> The testsuite passes successfully with these changes on x86_64.
> 
> Siddhesh
> 
> 	* libio/fileops.c (do_ftell): Use cached offset when
> 	available.
> 	* libio/iofwide.c (do_ftell_wide): Likewise.
> 	* libio/iofdopen.c (_IO_new_fdopen): Don't use
> 	_IO_file_attach.
> 	* libio/wfileops.c (_IO_fwide): Don't cache offset.
> 
> ---
>  libio/fileops.c  | 34 ++++++++++++++++++++++++++++------
>  libio/iofdopen.c | 14 ++++----------
>  libio/iofwide.c  |  6 ------
>  libio/wfileops.c | 30 +++++++++++++++++++++++++++---
>  4 files changed, 59 insertions(+), 25 deletions(-)
> 
> diff --git a/libio/fileops.c b/libio/fileops.c
> index a177302..c44a5da 100644
> --- a/libio/fileops.c
> +++ b/libio/fileops.c
> @@ -952,12 +952,8 @@ get_file_offset (_IO_FILE *fp)
>  static _IO_off64_t
>  do_ftell (_IO_FILE *fp)
>  {
> -  _IO_off64_t result;
> -
> -  result = get_file_offset (fp);
> -
> -  if (result == EOF)
> -    return result;
> +  _IO_off64_t result = 0;
> +  bool use_cached_offset = false;
>  
>    /* No point looking at unflushed data if we haven't allocated buffers
>       yet.  */
> @@ -971,8 +967,34 @@ do_ftell (_IO_FILE *fp)
>  	result -= fp->_IO_read_end - fp->_IO_read_ptr;
>        else
>  	result += fp->_IO_write_ptr - fp->_IO_read_end;
> +
> +      /* It is safe to use the cached offset when available if there is
> +	 unbuffered data (indicating that the file handle is active) and the
> +	 handle is not for a file open in a+ mode.  The latter condition is
> +	 because there could be a scenario where there is a switch from read
> +	 mode to write mode using an fseek to an arbitrary position.  In this
> +	 case, there would be unbuffered data due to be appended to the end of
> +	 the file, but the offset may not necessarily be the end of the
> +	 file.  It is fine to use the cached offset when the a+ stream is in
> +	 read mode though, since the offset is maintained correctly in that
> +	 case.  Note that this is not a comprehensive set of cases when the
> +	 offset is reliable.  The offset may be reliable even in some cases
> +	 where there is no buffered input and the handle is active, but it's
> +	 just that we don't have a way to identify that condition reliably.  */
> +      use_cached_offset = (result != 0 && fp->_offset != _IO_pos_BAD
> +			   && ((fp->_flags & (_IO_IS_APPENDING | _IO_NO_READS))
> +			       == (_IO_IS_APPENDING | _IO_NO_READS)
> +			       && was_writing));
>      }
>  
> +  if (use_cached_offset)
> +    result += fp->_offset;
> +  else
> +    result += get_file_offset (fp);
> +
> +  if (result == EOF)
> +    return result;
> +
>    if (result < 0)
>      {
>        __set_errno (EINVAL);
> diff --git a/libio/iofdopen.c b/libio/iofdopen.c
> index 066ff19..4525f0f 100644
> --- a/libio/iofdopen.c
> +++ b/libio/iofdopen.c
> @@ -141,9 +141,6 @@ _IO_new_fdopen (fd, mode)
>  #ifdef _IO_MTSAFE_IO
>    new_f->fp.file._lock = &new_f->lock;
>  #endif
> -  /* Set up initially to use the `maybe_mmap' jump tables rather than using
> -     __fopen_maybe_mmap to do it, because we need them in place before we
> -     call _IO_file_attach or else it will allocate a buffer immediately.  */
>    _IO_no_init (&new_f->fp.file, 0, 0, &new_f->wd,
>  #ifdef _G_HAVE_MMAP
>  	       (use_mmap && (read_write & _IO_NO_WRITES))
> @@ -159,13 +156,10 @@ _IO_new_fdopen (fd, mode)
>  #if  !_IO_UNIFIED_JUMPTABLES
>    new_f->fp.vtable = NULL;
>  #endif
> -  if (_IO_file_attach ((_IO_FILE *) &new_f->fp, fd) == NULL)
> -    {
> -      _IO_setb (&new_f->fp.file, NULL, NULL, 0);
> -      _IO_un_link (&new_f->fp);
> -      free (new_f);
> -      return NULL;
> -    }
> +  /* We only record the fd because _IO_file_init will have unset the offset.
> +     We don't need to get the current offset in the file now since it could
> +     change between now and when the handle is activated.  */
> +  new_f->fp.file._fileno = fd;
>    new_f->fp.file._flags &= ~_IO_DELETE_DONT_CLOSE;
>  
>    _IO_mask_flags (&new_f->fp.file, read_write,
> diff --git a/libio/iofwide.c b/libio/iofwide.c
> index 5cff632..64187e4 100644
> --- a/libio/iofwide.c
> +++ b/libio/iofwide.c
> @@ -199,12 +199,6 @@ _IO_fwide (fp, mode)
>  
>        /* From now on use the wide character callback functions.  */
>        ((struct _IO_FILE_plus *) fp)->vtable = fp->_wide_data->_wide_vtable;
> -
> -      /* One last twist: we get the current stream position.  The wide
> -	 char streams have much more problems with not knowing the
> -	 current position and so we should disable the optimization
> -	 which allows the functions without knowing the position.  */
> -      fp->_offset = _IO_SYSSEEK (fp, 0, _IO_seek_cur);
>      }
>  
>    /* Set the mode now.  */
> diff --git a/libio/wfileops.c b/libio/wfileops.c
> index eda7828..651f5ce 100644
> --- a/libio/wfileops.c
> +++ b/libio/wfileops.c
> @@ -600,6 +600,7 @@ static _IO_off64_t
>  do_ftell_wide (_IO_FILE *fp)
>  {
>    _IO_off64_t result, offset = 0;
> +  bool use_cached_offset = false;
>  
>    /* No point looking for offsets in the buffer if it hasn't even been
>       allocated.  */
> @@ -696,13 +697,36 @@ do_ftell_wide (_IO_FILE *fp)
>  	      offset += outstop - out;
>  	    }
>  
> -	  /* _IO_read_end coincides with fp._offset, so the actual file position
> -	     is fp._offset - (_IO_read_end - new_write_ptr).  */
> +	  /* _IO_read_end coincides with fp._offset, so the actual file
> +	     position is fp._offset - (_IO_read_end - new_write_ptr).  */
>  	  offset -= fp->_IO_read_end - fp->_IO_write_ptr;
> +
>  	}
> +
> +      /* It is safe to use the cached offset when available if there is
> +	 unbuffered data (indicating that the file handle is active) and
> +	 the handle is not for a file open in a+ mode.  The latter
> +	 condition is because there could be a scenario where there is a
> +	 switch from read mode to write mode using an fseek to an arbitrary
> +	 position.  In this case, there would be unbuffered data due to be
> +	 appended to the end of the file, but the offset may not
> +	 necessarily be the end of the file.  It is fine to use the cached
> +	 offset when the a+ stream is in read mode though, since the offset
> +	 is maintained correctly in that case.  Note that this is not a
> +	 comprehensive set of cases when the offset is reliable.  The
> +	 offset may be reliable even in some cases where there is no
> +	 buffered input and the handle is active, but it's just that we
> +	 don't have a way to identify that condition reliably.  */
> +      use_cached_offset = (offset != 0 && fp->_offset != _IO_pos_BAD
> +			   && ((fp->_flags & (_IO_IS_APPENDING | _IO_NO_READS))
> +			       == (_IO_IS_APPENDING | _IO_NO_READS)
> +			       && was_writing));
>      }
>  
> -  result = get_file_offset (fp);
> +  if (use_cached_offset)
> +    result = fp->_offset;
> +  else
> +    result = get_file_offset (fp);
>  
>    if (result == EOF)
>      return result;
> -- 
> 1.8.3.1
> 


Attachment: pgpzQhSsBUviy.pgp
Description: PGP signature


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]