[PATCH] Move hardlink handling out of dwz function

Tom de Vries tdevries@suse.de
Tue Mar 23 15:58:12 GMT 2021


Hi,

Currently hardlink handling is done in the dwz function, on per-file basis,
with the analysis having a scope of previously processed files.

Move hardlink handling out of dwz into dedicated functions detect_hardlinks
and update_hardlinks, both called from dwz_files_1.

The detect_hardlinks is called before any file is processed, the
update_hardlinks is called after all files are processed.

This allows parallelization of the processing of the files.

Any comments?

Thanks,
- Tom

Move hardlink handling out of dwz function

2021-03-23  Tom de Vries  <tdevries@suse.de>

	* dwz.c (struct file_result): Add comment about res == -3 as
	uninitialized.  Add hardlink_to field.
	(detect_hardlinks, update_hardlinks): New function, factored out of ...
	(dwz): ... here.  Drop resa and files parameters.
	(dwz_with_low_mem): Drop resa and files parameters.
	(dwz_one_file): Update call to dwz_with_low_mem.
	(dwz_files_1): Update call to dwz_with_low_mem.  Add calls to
	detect_hardlinks and update_hardlinks.

---
 dwz.c | 219 ++++++++++++++++++++++++++++++++++++++++++------------------------
 1 file changed, 141 insertions(+), 78 deletions(-)

diff --git a/dwz.c b/dwz.c
index 92f8afa..245e540 100644
--- a/dwz.c
+++ b/dwz.c
@@ -15250,7 +15250,8 @@ remove_empty_pus (void)
 /* Helper structure for hardlink discovery.  */
 struct file_result
 {
-  /* -2: Already processed under different name.
+  /* -3: Uninitialized.
+     -2: Already processed under different name.
      -1: Ignore.
       0: Processed, changed.
       1: Processed, unchanged.  */
@@ -15258,6 +15259,7 @@ struct file_result
   dev_t dev;
   ino_t ino;
   nlink_t nlink;
+  size_t hardlink_to;
   unsigned int die_count;
 };
 
@@ -15266,14 +15268,16 @@ struct file_result
    the result will be written into a temporary file that is renamed
    over FILE.  */
 static int
-dwz (const char *file, const char *outfile, struct file_result *res,
-     struct file_result *resa, char **files)
+dwz (const char *file, const char *outfile, struct file_result *res)
 {
   DSO *dso;
   int ret = 0, fd;
   unsigned int i;
   struct stat st;
 
+  if (res->res == -1)
+    return 1;
+
   res->res = -1;
   fd = open (file, O_RDONLY);
   if (fd < 0)
@@ -15289,64 +15293,6 @@ dwz (const char *file, const char *outfile, struct file_result *res,
     }
 
   res->res = 1;
-  res->dev = st.st_dev;
-  res->ino = st.st_ino;
-  res->nlink = st.st_nlink;
-  /* Hardlink handling if requested.  */
-  if (resa != NULL)
-    {
-      size_t n;
-      for (n = 0; &resa[n] != res; n++)
-	if (resa[n].res >= 0
-	    && resa[n].nlink > 1
-	    && resa[n].dev == st.st_dev
-	    && resa[n].ino == st.st_ino)
-	  break;
-      if (&resa[n] != res)
-	{
-	  /* If a hardlink to this has been processed before
-	     and we didn't change it, just assume the same
-	     state.  */
-	  if (resa[n].res == 1)
-	    {
-	      if (tracing)
-		fprintf (stderr, "Skipping hardlink %s to unchanged file\n",
-			 file);
-	      close (fd);
-	      res->res = -2;
-	      return 0;
-	    }
-	  /* If it changed, try to hardlink it again.  */
-	  if (resa[n].res == 0)
-	    {
-	      size_t len = strlen (file);
-	      char *filename = alloca (len + sizeof (".#dwz#.XXXXXX"));
-	      int fd2;
-	      if (tracing)
-		fprintf (stderr, "Updating hardlink %s to changed file\n",
-			 file);
-	      memcpy (filename, file, len);
-	      memcpy (filename + len, ".#dwz#.XXXXXX",
-		      sizeof (".#dwz#.XXXXXX"));
-	      fd2 = mkstemp (filename);
-	      if (fd2 >= 0)
-		{
-		  close (fd2);
-		  unlink (filename);
-		  if (link (files[n], filename) == 0)
-		    {
-		      if (rename (filename, file) == 0)
-			{
-			  close (fd);
-			  res->res = -2;
-			  return 0;
-			}
-		      unlink (filename);
-		    }
-		}
-	    }
-	}
-    }
 
   if (tracing)
     {
@@ -16260,8 +16206,7 @@ make_temp_file (const char *name)
    is hit.  */
 static int
 dwz_with_low_mem (const char *file, const char *outfile,
-		  struct file_result *res, struct file_result *resa,
-		  char **files, bool *low_mem_p)
+		  struct file_result *res, bool *low_mem_p)
 {
   int ret;
 
@@ -16270,7 +16215,7 @@ dwz_with_low_mem (const char *file, const char *outfile,
 
   ret = (low_mem_die_limit == 0
 	 ? 2
-	 : dwz (file, outfile, res, resa, files));
+	 : dwz (file, outfile, res));
 
   if (ret == 2)
     {
@@ -16278,7 +16223,7 @@ dwz_with_low_mem (const char *file, const char *outfile,
       if (low_mem_p)
 	*low_mem_p = true;
 
-      ret = dwz (file, outfile, res, resa, files);
+      ret = dwz (file, outfile, res);
     }
 
   return ret;
@@ -16296,7 +16241,117 @@ dwz_one_file (const char *file, const char *outfile)
 
   res.die_count = 0;
 
-  return dwz_with_low_mem (file, outfile, &res, NULL, NULL, NULL);
+  return dwz_with_low_mem (file, outfile, &res, NULL);
+}
+
+/* Detect which FILES are hardlinks, and mark those in RESA.  */
+static bool
+detect_hardlinks (int nr_files, char *files[], struct file_result *resa)
+{
+  bool found = false;
+  int i;
+
+  /* Try to open all files.  */
+  for (i = 0; i < nr_files; i++)
+    {
+      struct file_result *res = &resa[i];
+      int fd;
+      struct stat st;
+
+      const char *file = files[i];
+      res->res = -1;
+
+      fd = open (file, O_RDONLY);
+      if (fd < 0)
+	error (0, errno, "Failed to open input file %s", file);
+      else if (fstat (fd, &st) < 0)
+	error (0, errno, "Failed to stat input file %s", file);
+      else
+	{
+	  res->res = 1;
+	  res->dev = st.st_dev;
+	  res->ino = st.st_ino;
+	  res->nlink = st.st_nlink;
+	}
+
+      close (fd);
+    }
+
+  /* Detect hard links.  */
+  for (i = 0; i < nr_files; i++)
+    {
+      struct file_result *res = &resa[i];
+      size_t n;
+      for (n = 0; &resa[n] != res; n++)
+	if (resa[n].res >= 0
+	    && resa[n].nlink > 1
+	    && resa[n].dev == res->dev
+	    && resa[n].ino == res->ino)
+	  break;
+      if (&resa[n] == res)
+	continue;
+      res->res = -2;
+      res->hardlink_to = n;
+      found = true;
+    }
+
+  return found;
+}
+
+/* Update the FILES marked as hardlink in RESA.  */
+static void
+update_hardlinks (int nr_files, char *files[], struct file_result *resa)
+{
+  int i;
+
+  /* Update hardlinks.  */
+  for (i = 0; i < nr_files; i++)
+    {
+      struct file_result *res = &resa[i];
+      const char *file = files[i];
+      size_t n;
+      if (res->res != -2)
+	continue;
+      n = res->hardlink_to;
+
+      /* If a hardlink to this has been processed before
+	 and we didn't change it, just assume the same
+	 state.  */
+      if (resa[n].res == 1)
+	{
+	  if (tracing)
+	    fprintf (stderr, "Skipping hardlink %s to unchanged file\n",
+		     file);
+	  continue;
+	}
+
+      /* If it changed, try to hardlink it again.  */
+      if (resa[n].res == 0)
+	{
+	  size_t len = strlen (file);
+	  char *filename = alloca (len + sizeof (".#dwz#.XXXXXX"));
+	  int fd2;
+	  if (tracing)
+	    fprintf (stderr, "Updating hardlink %s to changed file\n",
+		     file);
+	  memcpy (filename, file, len);
+	  memcpy (filename + len, ".#dwz#.XXXXXX",
+		  sizeof (".#dwz#.XXXXXX"));
+	  fd2 = mkstemp (filename);
+	  if (fd2 >= 0)
+	    {
+	      close (fd2);
+	      unlink (filename);
+	      if (link (files[n], filename) == 0)
+		{
+		  if (rename (filename, file) == 0)
+		    ;
+		  else
+		    unlink (filename);
+		}
+	    }
+	}
+    }
 }
 
 /* Dwarf-compress FILES.  If HARDLINK, detect if some files are hardlinks and
@@ -16308,11 +16363,13 @@ dwz_files_1 (int nr_files, char *files[], bool hardlink,
   int ret = 0;
   int i;
   const char *file;
-  bool hardlinks = false;
   int successcount = 0;
 
   for (i = 0; i < nr_files; ++i)
-    resa[i].die_count = 0;
+    {
+      resa[i].die_count = 0;
+      resa[i].res = -3;
+    }
 
   if (multifile)
     {
@@ -16332,25 +16389,30 @@ dwz_files_1 (int nr_files, char *files[], bool hardlink,
 	}
     }
 
+  if (hardlink)
+    hardlink = detect_hardlinks (nr_files, files, resa);
+
   for (i = 0; i < nr_files; i++)
     {
       int thisret;
       file = files[i];
+      struct file_result *res = &resa[i];
+      if (res->res == -2)
+	/* Skip hard links.  */
+	continue;
       if (stats_p)
 	init_stats (file);
       bool low_mem_p;
-      thisret = dwz_with_low_mem (file, NULL, &resa[i],
-				  hardlinks ? resa : NULL, files, &low_mem_p);
+      thisret = dwz_with_low_mem (file, NULL, res, &low_mem_p);
       if (thisret == 1)
 	ret = 1;
       else if (!low_mem_p && resa[i].res >= 0)
 	successcount++;
-      if (hardlink
-	  && resa[i].res >= 0
-	  && resa[i].nlink > 1)
-	hardlinks = true;
     }
 
+  if (hardlink)
+    update_hardlinks (nr_files, files, resa);
+
   if (multifile == NULL)
     return ret;
 
@@ -16386,14 +16448,15 @@ dwz_files_1 (int nr_files, char *files[], bool hardlink,
 	    init_stats (file);
 	  multifile_mode = MULTIFILE_MODE_FI;
 	  /* Don't process again files that couldn't
-	     be processed successfully.  */
-	  if (resa[i].res == -1)
+	     be processed successfully.  Also skip hard links.  */
+	  if (resa[i].res == -1 || resa[i].res == -2)
 	    continue;
 	  for (cu = alt_first_cu; cu; cu = cu->cu_next)
 	    alt_clear_dups (cu->cu_die);
-	  ret |= dwz (file, NULL, &resa[i],
-		      hardlinks ? resa : NULL, files);
+	  ret |= dwz (file, NULL, &resa[i]);
 	}
+      if (hardlink)
+	update_hardlinks (nr_files, files, resa);
       elf_end (dso->elf);
       close (multi_fd);
       free (dso);


More information about the Dwz mailing list