Cygwin Filesystem Performance degradation 1.7.5 vs 1.7.7, and methods for improving performance

Yoni Londner yonihola2@gmail.com
Wed Oct 13 15:38:00 GMT 2010


Hi,

 > I will send multiple patches later today
patch for caching symlink content + exec_state.

Yoni.

Index: fhandler_disk_file.cc
===================================================================
RCS file: /cvs/src/src/winsup/cygwin/fhandler_disk_file.cc,v
retrieving revision 1.345
diff -u -p -r1.345 fhandler_disk_file.cc
--- fhandler_disk_file.cc	7 Oct 2010 14:03:26 -0000	1.345
+++ fhandler_disk_file.cc	13 Oct 2010 15:29:36 -0000
@@ -348,7 +348,7 @@ fhandler_base::fstat_by_handle (struct _
       on the information stored in pc.fnoi.  So we overwrite them here. */
    if (get_io_handle ())
      {
-      PFILE_NETWORK_OPEN_INFORMATION pfnoi = pc.fnoi ();
+      PFILE_CYGWIN_INFORMATION pfnoi = pc.fnoi ();
        status = NtQueryInformationFile (h, &io, pfnoi, sizeof *pfnoi,
                                        FileNetworkOpenInformation);
        if (!NT_SUCCESS (status))
@@ -485,7 +485,7 @@ fhandler_base::fstat_helper (struct __st
    IO_STATUS_BLOCK st;
    FILE_COMPRESSION_INFORMATION fci;
    HANDLE h = get_stat_handle ();
-  PFILE_NETWORK_OPEN_INFORMATION pfnoi = pc.fnoi ();
+  PFILE_CYGWIN_INFORMATION pfnoi = pc.fnoi ();
    ULONG attributes = pc.file_attributes ();

    to_timestruc_t ((PFILETIME) &pfnoi->LastAccessTime, &buf->st_atim);
@@ -590,8 +590,14 @@ fhandler_base::fstat_helper (struct __st
        else
  	{
  	  buf->st_mode |= S_IFREG;
+	  file_cache_t *s = NULL;
+	  if ((s = file_cache_lookup(pc.get_win32(), pfnoi)) &&
+	      s->exec_state>=0)
+	    {
+	      pc.set_exec(s->exec_state);
+	    }
  	  /* Check suffix for executable file. */
-	  if (pc.exec_state () != is_executable)
+	  else if (pc.exec_state () != is_executable)
  	    {
  	      PUNICODE_STRING path = pc.get_nt_native_path ();

@@ -638,6 +644,11 @@ fhandler_base::fstat_helper (struct __st
  		  NtClose (h);
  		}
  	    }
+	    if (!s)
+	    {
+		file_cache_update(pc.get_win32(), pfnoi, NULL, 0,
+		    pc.exec_state());
+	    }
  	}
        if (pc.exec_state () == is_executable)
  	buf->st_mode |= STD_XBITS;
Index: path.cc
===================================================================
RCS file: /cvs/src/src/winsup/cygwin/path.cc,v
retrieving revision 1.617
diff -u -p -r1.617 path.cc
--- path.cc	9 Oct 2010 10:54:13 -0000	1.617
+++ path.cc	13 Oct 2010 15:29:39 -0000
@@ -73,6 +73,7 @@
  #include <wctype.h>

  bool dos_file_warning = true;
+bool use_file_cache = true;

  suffix_info stat_suffixes[] =
  {
@@ -1876,6 +1877,7 @@ symlink_info::check_sysfile (HANDLE h)
        else
  	res = posixify (srcbuf);
      }
+  debug_printf ("res %d", res);
    return res;
  }

@@ -2213,6 +2215,95 @@ symlink_info::parse_device (const char *
     Return -1 on error, 0 if PATH is not a symlink, or the length
     stored into BUF if PATH is a symlink.  */

+static inline unsigned int hash_from_int(unsigned int val)
+{
+    return val * 1103515245 + 12345; /* from rand() implementaion */
+}
+
+static inline unsigned int hash_from_mem(const char *buf, int len)
+{
+    unsigned int val = 0;
+    int i = 0, len4 = len & ~3;
+    for (; i<len4; i+=4)
+        val = hash_from_int(val ^ *(int *)(buf+i));
+    for (; i<len; i++)
+        val = hash_from_int(val ^ buf[i]);
+    return val;
+}
+
+static inline unsigned int hash_from_str(const char *str)
+{
+    return hash_from_mem(str, strlen(str));
+}
+
+#define FILE_CACHE_SIZE 1000
+#define FILE_CACHE_BUCKET 10
+typedef struct {
+    file_cache_t *f[FILE_CACHE_BUCKET];
+    int i;
+} file_cache_hash_t;
+static file_cache_hash_t file_cache[FILE_CACHE_SIZE];
+
+void file_cache_update(const char *path,
+    FILE_CYGWIN_INFORMATION *fnoi, char *contents, int symlink_len,
+    int exec_state)
+{
+    if (!use_file_cache)
+	return;
+    unsigned int hash = hash_from_str(path), bucket = hash%FILE_CACHE_SIZE;
+    int i;
+    file_cache_hash_t *c = &file_cache[bucket];
+    file_cache_t *s = (file_cache_t*)calloc(sizeof(file_cache_t), 1), *_s;
+    s->path = strdup(path);
+    s->hash = hash;
+    if (exec_state>=0)
+	s->exec_state = exec_state;
+    if (contents)
+	s->contents = strdup(contents);
+    if (symlink_len)
+	s->symlink_len = symlink_len;
+    memcpy(&s->fnoi, fnoi, sizeof(FILE_CYGWIN_INFORMATION));
+    /* if the same file is already in the cache - replace it */
+    for (i=0; i<FILE_CACHE_BUCKET && c->f[i] && c->f[i]->hash!=hash; i++);
+    if (i==FILE_CACHE_BUCKET)
+    {
+	/* file not yet in cache - do round robin */
+	i = c->i;
+	c->i = (c->i+1) % FILE_CACHE_BUCKET;
+    }
+    if ((_s = c->f[i]))
+    {
+	free(_s->path);
+	free(_s);
+    }
+    c->f[i] = s;
+}
+
+file_cache_t *file_cache_lookup(const char *path,
+    FILE_CYGWIN_INFORMATION *fnoi)
+{
+    if (!use_file_cache)
+	return NULL;
+    unsigned int hash = hash_from_str(path), bucket = hash%FILE_CACHE_SIZE;
+    file_cache_hash_t *c = &file_cache[bucket];
+    file_cache_t *s = NULL;
+    for (int i=0; i<FILE_CACHE_BUCKET; i++)
+    {
+	s = c->f[i];
+	if (!s)
+	    return NULL;
+	if (s->hash != hash || strcmp(path, s->path))
+	    continue;
+	/* its the same file */
+	return fnoi->CreationTime.QuadPart==s->fnoi.CreationTime.QuadPart &&
+	    fnoi->LastWriteTime.QuadPart==s->fnoi.LastWriteTime.QuadPart &&
+	    fnoi->ChangeTime.QuadPart==s->fnoi.ChangeTime.QuadPart &&
+	    fnoi->AllocationSize.QuadPart==s->fnoi.AllocationSize.QuadPart &&
+	    fnoi->FileAttributes==s->fnoi.FileAttributes ? s : NULL;
+    }
+    return NULL;
+}
+
  int
  symlink_info::check (char *path, const suffix_info *suffixes, fs_info &fs,
  		     path_conv_handle &conv_hdl)
@@ -2386,7 +2477,7 @@ restart:
  	    }
  	  else
  	    {
-	      PFILE_NETWORK_OPEN_INFORMATION pfnoi = conv_hdl.fnoi ();
+	      PFILE_CYGWIN_INFORMATION pfnoi = conv_hdl.fnoi ();

  	      status = NtQueryInformationFile (h, &io, pfnoi, sizeof *pfnoi,
  					       FileNetworkOpenInformation);
@@ -2500,7 +2591,7 @@ restart:
  		    }
  		  else
  		    {
-		      PFILE_NETWORK_OPEN_INFORMATION pfnoi = conv_hdl.fnoi ();
+		      PFILE_CYGWIN_INFORMATION pfnoi = conv_hdl.fnoi ();

  		      fileattr = fdi_buf.fdi.FileAttributes;
  		      memcpy (pfnoi, &fdi_buf.fdi.CreationTime, sizeof *pfnoi);
@@ -2622,18 +2713,28 @@ restart:
  	       == FILE_ATTRIBUTE_SYSTEM)
  	{
  	  HANDLE sym_h;
-
-	  status = NtOpenFile (&sym_h, SYNCHRONIZE | GENERIC_READ, &attr, &io,
-			       FILE_SHARE_VALID_FLAGS,
-			       FILE_OPEN_FOR_BACKUP_INTENT
-			       | FILE_SYNCHRONOUS_IO_NONALERT);
-
-	  if (!NT_SUCCESS (status))
-	    res = 0;
+	  file_cache_t *s = NULL;
+	  if ((s = file_cache_lookup(path, conv_hdl.fnoi ())) &&
+	      (res = s->symlink_len)>0)
+	    {
+	      strcpy(contents, s->contents);
+	      pflags |= PATH_SYMLINK;
+	    }
  	  else
  	    {
-	      res = check_sysfile (sym_h);
-	      NtClose (sym_h);
+	      status = NtOpenFile (&sym_h, SYNCHRONIZE | GENERIC_READ, &attr,
+		                   &io, FILE_SHARE_VALID_FLAGS,
+			           FILE_OPEN_FOR_BACKUP_INTENT
+			           | FILE_SYNCHRONOUS_IO_NONALERT);
+
+	      if (!NT_SUCCESS (status))
+	        res = 0;
+	      else
+	        {
+	          res = check_sysfile (sym_h);
+		  file_cache_update(path, conv_hdl.fnoi (), contents, res, -1);
+	          NtClose (sym_h);
+	        }
  	    }
  	  if (res)
  	    break;
Index: path.h
===================================================================
RCS file: /cvs/src/src/winsup/cygwin/path.h,v
retrieving revision 1.152
diff -u -p -r1.152 path.h
--- path.h	7 Oct 2010 14:03:26 -0000	1.152
+++ path.h	13 Oct 2010 15:29:41 -0000
@@ -90,23 +90,26 @@ enum path_types
  };

  class symlink_info;
-struct _FILE_NETWORK_OPEN_INFORMATION;
+/* Identical to FILE_NETWORK_OPEN_INFORMATION.  We don't want to pull in
+   ntdll.h here, though. */
+typedef struct _FILE_CYGWIN_INFORMATION
+{
+  LARGE_INTEGER CreationTime;
+  LARGE_INTEGER LastAccessTime;
+  LARGE_INTEGER LastWriteTime;
+  LARGE_INTEGER ChangeTime;
+  LARGE_INTEGER AllocationSize;
+  LARGE_INTEGER EndOfFile;
+  ULONG FileAttributes;
+  ULONG ReparseTag;
+  LARGE_INTEGER FileId;
+} FILE_CYGWIN_INFORMATION, *PFILE_CYGWIN_INFORMATION;

  class path_conv_handle
  {
    HANDLE      hdl;
    union {
-    /* Identical to FILE_NETWORK_OPEN_INFORMATION.  We don't want to 
pull in
-       ntdll.h here, though. */
-    struct {
-      LARGE_INTEGER CreationTime;
-      LARGE_INTEGER LastAccessTime;
-      LARGE_INTEGER LastWriteTime;
-      LARGE_INTEGER ChangeTime;
-      LARGE_INTEGER AllocationSize;
-      LARGE_INTEGER EndOfFile;
-      ULONG FileAttributes;
-    } _fnoi;
+    FILE_CYGWIN_INFORMATION _fnoi;
      /* For NFS. */
      fattr3 _fattr3;
    } attribs;
@@ -127,12 +130,26 @@ public:
        hdl = NULL;
    }
    inline HANDLE handle () const { return hdl; }
-  inline struct _FILE_NETWORK_OPEN_INFORMATION *fnoi ()
-  { return (struct _FILE_NETWORK_OPEN_INFORMATION *) &attribs._fnoi; }
+  inline FILE_CYGWIN_INFORMATION *fnoi ()
+  { return (FILE_CYGWIN_INFORMATION *) &attribs._fnoi; }
    inline struct fattr3 *nfsattr ()
    { return (struct fattr3 *) &attribs._fattr3; }
  };

+typedef struct {
+    FILE_CYGWIN_INFORMATION fnoi;
+    char *path;
+    char *contents;
+    int symlink_len;
+    int exec_state;
+    unsigned int hash;
+} file_cache_t;
+
+void file_cache_update(const char *path,
+    FILE_CYGWIN_INFORMATION *fnoi, char *contents, int symlink_len,
+    int exec_state);
+file_cache_t *file_cache_lookup(const char *path, 
FILE_CYGWIN_INFORMATION *fnoi);
+
  class path_conv
  {
    DWORD fileattr;
@@ -320,7 +337,7 @@ class path_conv
    bool is_binary ();

    HANDLE handle () const { return conv_handle.handle (); }
-  struct _FILE_NETWORK_OPEN_INFORMATION *fnoi () { return 
conv_handle.fnoi (); }
+  FILE_CYGWIN_INFORMATION *fnoi () { return conv_handle.fnoi (); }
    struct fattr3 *nfsattr () { return conv_handle.nfsattr (); }
    void reset_conv_handle () { conv_handle.set (NULL); }
    void close_conv_handle () { conv_handle.close (); }



More information about the Cygwin-developers mailing list