[PATCH] Move hardlink handling out of dwz function
Tom de Vries
tdevries@suse.de
Tue Mar 23 15:58:12 GMT 2021
Hi,
Currently hardlink handling is done in the dwz function, on per-file basis,
with the analysis having a scope of previously processed files.
Move hardlink handling out of dwz into dedicated functions detect_hardlinks
and update_hardlinks, both called from dwz_files_1.
The detect_hardlinks is called before any file is processed, the
update_hardlinks is called after all files are processed.
This allows parallelization of the processing of the files.
Any comments?
Thanks,
- Tom
Move hardlink handling out of dwz function
2021-03-23 Tom de Vries <tdevries@suse.de>
* dwz.c (struct file_result): Add comment about res == -3 as
uninitialized. Add hardlink_to field.
(detect_hardlinks, update_hardlinks): New function, factored out of ...
(dwz): ... here. Drop resa and files parameters.
(dwz_with_low_mem): Drop resa and files parameters.
(dwz_one_file): Update call to dwz_with_low_mem.
(dwz_files_1): Update call to dwz_with_low_mem. Add calls to
detect_hardlinks and update_hardlinks.
---
dwz.c | 219 ++++++++++++++++++++++++++++++++++++++++++------------------------
1 file changed, 141 insertions(+), 78 deletions(-)
diff --git a/dwz.c b/dwz.c
index 92f8afa..245e540 100644
--- a/dwz.c
+++ b/dwz.c
@@ -15250,7 +15250,8 @@ remove_empty_pus (void)
/* Helper structure for hardlink discovery. */
struct file_result
{
- /* -2: Already processed under different name.
+ /* -3: Uninitialized.
+ -2: Already processed under different name.
-1: Ignore.
0: Processed, changed.
1: Processed, unchanged. */
@@ -15258,6 +15259,7 @@ struct file_result
dev_t dev;
ino_t ino;
nlink_t nlink;
+ size_t hardlink_to;
unsigned int die_count;
};
@@ -15266,14 +15268,16 @@ struct file_result
the result will be written into a temporary file that is renamed
over FILE. */
static int
-dwz (const char *file, const char *outfile, struct file_result *res,
- struct file_result *resa, char **files)
+dwz (const char *file, const char *outfile, struct file_result *res)
{
DSO *dso;
int ret = 0, fd;
unsigned int i;
struct stat st;
+ if (res->res == -1)
+ return 1;
+
res->res = -1;
fd = open (file, O_RDONLY);
if (fd < 0)
@@ -15289,64 +15293,6 @@ dwz (const char *file, const char *outfile, struct file_result *res,
}
res->res = 1;
- res->dev = st.st_dev;
- res->ino = st.st_ino;
- res->nlink = st.st_nlink;
- /* Hardlink handling if requested. */
- if (resa != NULL)
- {
- size_t n;
- for (n = 0; &resa[n] != res; n++)
- if (resa[n].res >= 0
- && resa[n].nlink > 1
- && resa[n].dev == st.st_dev
- && resa[n].ino == st.st_ino)
- break;
- if (&resa[n] != res)
- {
- /* If a hardlink to this has been processed before
- and we didn't change it, just assume the same
- state. */
- if (resa[n].res == 1)
- {
- if (tracing)
- fprintf (stderr, "Skipping hardlink %s to unchanged file\n",
- file);
- close (fd);
- res->res = -2;
- return 0;
- }
- /* If it changed, try to hardlink it again. */
- if (resa[n].res == 0)
- {
- size_t len = strlen (file);
- char *filename = alloca (len + sizeof (".#dwz#.XXXXXX"));
- int fd2;
- if (tracing)
- fprintf (stderr, "Updating hardlink %s to changed file\n",
- file);
- memcpy (filename, file, len);
- memcpy (filename + len, ".#dwz#.XXXXXX",
- sizeof (".#dwz#.XXXXXX"));
- fd2 = mkstemp (filename);
- if (fd2 >= 0)
- {
- close (fd2);
- unlink (filename);
- if (link (files[n], filename) == 0)
- {
- if (rename (filename, file) == 0)
- {
- close (fd);
- res->res = -2;
- return 0;
- }
- unlink (filename);
- }
- }
- }
- }
- }
if (tracing)
{
@@ -16260,8 +16206,7 @@ make_temp_file (const char *name)
is hit. */
static int
dwz_with_low_mem (const char *file, const char *outfile,
- struct file_result *res, struct file_result *resa,
- char **files, bool *low_mem_p)
+ struct file_result *res, bool *low_mem_p)
{
int ret;
@@ -16270,7 +16215,7 @@ dwz_with_low_mem (const char *file, const char *outfile,
ret = (low_mem_die_limit == 0
? 2
- : dwz (file, outfile, res, resa, files));
+ : dwz (file, outfile, res));
if (ret == 2)
{
@@ -16278,7 +16223,7 @@ dwz_with_low_mem (const char *file, const char *outfile,
if (low_mem_p)
*low_mem_p = true;
- ret = dwz (file, outfile, res, resa, files);
+ ret = dwz (file, outfile, res);
}
return ret;
@@ -16296,7 +16241,117 @@ dwz_one_file (const char *file, const char *outfile)
res.die_count = 0;
- return dwz_with_low_mem (file, outfile, &res, NULL, NULL, NULL);
+ return dwz_with_low_mem (file, outfile, &res, NULL);
+}
+
+/* Detect which FILES are hardlinks, and mark those in RESA. */
+static bool
+detect_hardlinks (int nr_files, char *files[], struct file_result *resa)
+{
+ bool found = false;
+ int i;
+
+ /* Try to open all files. */
+ for (i = 0; i < nr_files; i++)
+ {
+ struct file_result *res = &resa[i];
+ int fd;
+ struct stat st;
+
+ const char *file = files[i];
+ res->res = -1;
+
+ fd = open (file, O_RDONLY);
+ if (fd < 0)
+ error (0, errno, "Failed to open input file %s", file);
+ else if (fstat (fd, &st) < 0)
+ error (0, errno, "Failed to stat input file %s", file);
+ else
+ {
+ res->res = 1;
+ res->dev = st.st_dev;
+ res->ino = st.st_ino;
+ res->nlink = st.st_nlink;
+ }
+
+ close (fd);
+ }
+
+ /* Detect hard links. */
+ for (i = 0; i < nr_files; i++)
+ {
+ struct file_result *res = &resa[i];
+ size_t n;
+ for (n = 0; &resa[n] != res; n++)
+ if (resa[n].res >= 0
+ && resa[n].nlink > 1
+ && resa[n].dev == res->dev
+ && resa[n].ino == res->ino)
+ break;
+ if (&resa[n] == res)
+ continue;
+ res->res = -2;
+ res->hardlink_to = n;
+ found = true;
+ }
+
+ return found;
+}
+
+/* Update the FILES marked as hardlink in RESA. */
+static void
+update_hardlinks (int nr_files, char *files[], struct file_result *resa)
+{
+ int i;
+
+ /* Update hardlinks. */
+ for (i = 0; i < nr_files; i++)
+ {
+ struct file_result *res = &resa[i];
+ const char *file = files[i];
+ size_t n;
+ if (res->res != -2)
+ continue;
+ n = res->hardlink_to;
+
+ /* If a hardlink to this has been processed before
+ and we didn't change it, just assume the same
+ state. */
+ if (resa[n].res == 1)
+ {
+ if (tracing)
+ fprintf (stderr, "Skipping hardlink %s to unchanged file\n",
+ file);
+ continue;
+ }
+
+ /* If it changed, try to hardlink it again. */
+ if (resa[n].res == 0)
+ {
+ size_t len = strlen (file);
+ char *filename = alloca (len + sizeof (".#dwz#.XXXXXX"));
+ int fd2;
+ if (tracing)
+ fprintf (stderr, "Updating hardlink %s to changed file\n",
+ file);
+ memcpy (filename, file, len);
+ memcpy (filename + len, ".#dwz#.XXXXXX",
+ sizeof (".#dwz#.XXXXXX"));
+ fd2 = mkstemp (filename);
+ if (fd2 >= 0)
+ {
+ close (fd2);
+ unlink (filename);
+ if (link (files[n], filename) == 0)
+ {
+ if (rename (filename, file) == 0)
+ ;
+ else
+ unlink (filename);
+ }
+ }
+ }
+ }
}
/* Dwarf-compress FILES. If HARDLINK, detect if some files are hardlinks and
@@ -16308,11 +16363,13 @@ dwz_files_1 (int nr_files, char *files[], bool hardlink,
int ret = 0;
int i;
const char *file;
- bool hardlinks = false;
int successcount = 0;
for (i = 0; i < nr_files; ++i)
- resa[i].die_count = 0;
+ {
+ resa[i].die_count = 0;
+ resa[i].res = -3;
+ }
if (multifile)
{
@@ -16332,25 +16389,30 @@ dwz_files_1 (int nr_files, char *files[], bool hardlink,
}
}
+ if (hardlink)
+ hardlink = detect_hardlinks (nr_files, files, resa);
+
for (i = 0; i < nr_files; i++)
{
int thisret;
file = files[i];
+ struct file_result *res = &resa[i];
+ if (res->res == -2)
+ /* Skip hard links. */
+ continue;
if (stats_p)
init_stats (file);
bool low_mem_p;
- thisret = dwz_with_low_mem (file, NULL, &resa[i],
- hardlinks ? resa : NULL, files, &low_mem_p);
+ thisret = dwz_with_low_mem (file, NULL, res, &low_mem_p);
if (thisret == 1)
ret = 1;
else if (!low_mem_p && resa[i].res >= 0)
successcount++;
- if (hardlink
- && resa[i].res >= 0
- && resa[i].nlink > 1)
- hardlinks = true;
}
+ if (hardlink)
+ update_hardlinks (nr_files, files, resa);
+
if (multifile == NULL)
return ret;
@@ -16386,14 +16448,15 @@ dwz_files_1 (int nr_files, char *files[], bool hardlink,
init_stats (file);
multifile_mode = MULTIFILE_MODE_FI;
/* Don't process again files that couldn't
- be processed successfully. */
- if (resa[i].res == -1)
+ be processed successfully. Also skip hard links. */
+ if (resa[i].res == -1 || resa[i].res == -2)
continue;
for (cu = alt_first_cu; cu; cu = cu->cu_next)
alt_clear_dups (cu->cu_die);
- ret |= dwz (file, NULL, &resa[i],
- hardlinks ? resa : NULL, files);
+ ret |= dwz (file, NULL, &resa[i]);
}
+ if (hardlink)
+ update_hardlinks (nr_files, files, resa);
elf_end (dso->elf);
close (multi_fd);
free (dso);
More information about the Dwz
mailing list