[RFC] Allow parallel multifile with -p -e
Tom de Vries
tdevries@suse.de
Fri Mar 26 16:40:51 GMT 2021
Hi,
Currently, parallel dwz is disabled when multifile is used:
...
$ dwz -m 5 3 1 2 4 -j 4
...
Enable this when the multifile parameter characteristics are specified using
-p and -e:
...
$ dwz -m 5 3 1 2 4 -j 4 -p 8 -e l
...
This works around the child processes having to communicate back to the parent
the found pointer size and endiannes, and doing the -j auto and -e auto
consistency checking.
So let's compare the output:
...
$ mkdir j1 j4
$ dwz -m 5 3 1 2 4 -j 1 -p 8 -e l
$ cp 1 2 3 4 5 j1
$ dwz -m 5 3 1 2 4 -j 4 -p 8 -e l
$ cp 1 2 3 4 5 j4
...
This gives us reproducible compression:
...
$ ls -la j1/*
-rwxr-xr-x 1 vries users 11432 Mar 26 17:16 j1/1
-rwxr-xr-x 1 vries users 11432 Mar 26 17:16 j1/2
-rwxr-xr-x 1 vries users 807376 Mar 26 17:16 j1/3
-rwxr-xr-x 1 vries users 807376 Mar 26 17:16 j1/4
-rw-r--r-- 1 vries users 64543 Mar 26 17:16 j1/5
$ ls -la j4/*
-rwxr-xr-x 1 vries users 11432 Mar 26 17:16 j4/1
-rwxr-xr-x 1 vries users 11432 Mar 26 17:16 j4/2
-rwxr-xr-x 1 vries users 807376 Mar 26 17:16 j4/3
-rwxr-xr-x 1 vries users 807376 Mar 26 17:16 j4/4
-rw-r--r-- 1 vries users 64543 Mar 26 17:16 j4/5
...
But it doesn't give reproducible results:
...
$ md5sum j1/*
e6e655f7b5d1078672c8b0da99ab8c41 j1/1
e6e655f7b5d1078672c8b0da99ab8c41 j1/2
d833aa3ad6ad35597e1b7d0635b401cf j1/3
d833aa3ad6ad35597e1b7d0635b401cf j1/4
d5282aa9d065f1d00fd7a46c54ebde8d j1/5
$ md5sum j4/*
de1645ce60bba6f345b2334825deb01f j4/1
de1645ce60bba6f345b2334825deb01f j4/2
ac2f16c50cf3d31be1f42f35ced4a091 j4/3
ac2f16c50cf3d31be1f42f35ced4a091 j4/4
7fc3cd2c2514c8bf1f23348a27025b8d j4/5
...
The temporary multifile section contributions happen in random
order, so consequently the multifile layout will be different, and the
files referring to the multifile will be different.
Any comments?
Thanks,
- Tom
Allow parallel multifile with -p -e
2021-03-26 Tom de Vries <tdevries@suse.de>
PR dwz/25951
* args.c (parse_args): Allow max_forks > 1 in combination with
multifile, provided -j and -e are used.
* dwz.c (write_multifile): Lock multi_info_fd before use. Refresh
multi_*_off.
(encode_child_exit_status, decode_child_exit_status): Handle
skip_multifile.
(dwz_files_1): Allow max_forks > 1 in combination with
multifile.
---
args.c | 4 ++++
dwz.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
2 files changed, 72 insertions(+), 7 deletions(-)
diff --git a/args.c b/args.c
index d44e632..67301e9 100644
--- a/args.c
+++ b/args.c
@@ -708,4 +708,8 @@ parse_args (int argc, char *argv[], bool *hardlink, const char **outfile)
threads with only 2 cores. */
max_forks = nprocs / 2;
}
+
+ if (max_forks > 1 && multifile
+ && multifile_force_ptr_size == 0 && multifile_force_endian == 0)
+ max_forks = 0;
}
diff --git a/dwz.c b/dwz.c
index 1be4f2a..2e85861 100644
--- a/dwz.c
+++ b/dwz.c
@@ -15068,6 +15068,17 @@ write_multifile (DSO *dso, struct file_result *res)
unsigned int i;
int ret = 0;
+ if (max_forks > 1)
+ {
+ lockf (multi_info_fd, F_LOCK, 0);
+
+ multi_info_off = lseek (multi_info_fd, 0L, SEEK_END);
+ multi_abbrev_off = lseek (multi_abbrev_fd, 0L, SEEK_END);
+ multi_line_off = lseek (multi_line_fd, 0L, SEEK_END);
+ multi_str_off = lseek (multi_str_fd, 0L, SEEK_END);
+ multi_macro_off = lseek (multi_macro_fd, 0L, SEEK_END);
+ }
+
if (unlikely (progress_p))
{
report_progress ();
@@ -15091,6 +15102,8 @@ write_multifile (DSO *dso, struct file_result *res)
error (0, 0, "Multi-file optimization not allowed for different"
" pointer sizes");
multifile = NULL;
+ if (max_forks > 1)
+ lockf (multi_info_fd, F_ULOCK, 0);
return 1;
}
else
@@ -15229,6 +15242,8 @@ write_multifile (DSO *dso, struct file_result *res)
debug_sections[i].new_size = saved_new_size[i];
saved_new_data[i] = NULL;
}
+ if (max_forks > 1)
+ lockf (multi_info_fd, F_ULOCK, 0);
return ret;
}
@@ -16410,12 +16425,13 @@ update_hardlinks (int nr_files, char *files[], struct file_result *resa)
static int
encode_child_exit_status (int thisret, struct file_result *res)
{
+ assert (thisret == 0 || thisret == 1);
if (thisret == 0 && res->low_mem_p)
thisret = 2;
- assert (thisret >= 0 && thisret <= 2);
- assert (res->res >= -3);
- thisret = thisret + ((res->res + 3) << 2);
- return thisret;
+ assert (res->res >= -3 && res->res <= 1);
+ return (thisret
+ + ((res->res + 3) << 2)
+ + ((res->skip_multifile ? 1 : 0) << 5));
}
/* Decode child process exit status. */
@@ -16425,14 +16441,21 @@ decode_child_exit_status (int state, struct file_result *res)
int ret;
if (!WIFEXITED (state))
error (1, 0, "Child dwz process got killed");
- ret = WEXITSTATUS (state) & 0x3;
+ int status = WEXITSTATUS (state);
+ ret = status & 0x3;
+ status >>= 2;
+
res->low_mem_p = false;
if (ret == 2)
{
ret = 0;
res->low_mem_p = true;
}
- res->res = (int)((WEXITSTATUS (state) & ~0x3) >> 2) - 3;
+
+ res->res = (int)(status & 0x7) - 3;
+ status >>= 3;
+
+ res->skip_multifile = (status & 0x1) ? true : false;
return ret;
}
@@ -16473,7 +16496,7 @@ dwz_files_1 (int nr_files, char *files[], bool hardlink,
hardlink = detect_hardlinks (nr_files, files, resa);
int nr_forks = 0;
- if (max_forks > 1 && multifile == NULL)
+ if (max_forks > 1)
{
pid_t pids[nr_files];
for (i = 0; i < nr_files; i++)
@@ -16493,6 +16516,8 @@ dwz_files_1 (int nr_files, char *files[], bool hardlink,
= decode_child_exit_status (state, res);
if (thisret == 1)
ret = 1;
+ else if (!res->low_mem_p && !res->skip_multifile && res->res >= 0)
+ successcount++;
nr_forks--;
int j;
for (j = 0; j < i; ++j)
@@ -16533,6 +16558,8 @@ dwz_files_1 (int nr_files, char *files[], bool hardlink,
thisret = decode_child_exit_status (state, res);
if (thisret == 1)
ret = 1;
+ else if (!res->low_mem_p && !res->skip_multifile && res->res >= 0)
+ successcount++;
}
}
else
@@ -16567,6 +16594,14 @@ dwz_files_1 (int nr_files, char *files[], bool hardlink,
return ret;
}
+ if (max_forks > 1)
+ {
+ multi_info_off = lseek (multi_info_fd, 0L, SEEK_END);
+ multi_abbrev_off = lseek (multi_abbrev_fd, 0L, SEEK_END);
+ multi_line_off = lseek (multi_line_fd, 0L, SEEK_END);
+ multi_str_off = lseek (multi_str_fd, 0L, SEEK_END);
+ multi_macro_off = lseek (multi_macro_fd, 0L, SEEK_END);
+ }
if (multi_info_off == 0 && multi_str_off == 0 && multi_macro_off == 0)
{
if (!quiet)
@@ -16574,6 +16609,32 @@ dwz_files_1 (int nr_files, char *files[], bool hardlink,
return ret;
}
+ if (max_forks > 1)
+ {
+ for (i = 0; i < nr_files; i++)
+ {
+ struct file_result *res = &resa[i];
+ if (!res->low_mem_p && !res->skip_multifile && res->res >= 0)
+ {
+ int fd = open (files[i], O_RDONLY);
+ if (fd < 0)
+ return ret;
+ DSO *dso = fdopen_dso (fd, files[i]);
+ if (dso == NULL)
+ {
+ close (fd);
+ return ret;
+ }
+ assert (multi_ehdr.e_ident[0] == '\0');
+ multi_ehdr = dso->ehdr;
+ break;
+ }
+ }
+
+ multi_ptr_size = multifile_force_ptr_size;
+ multi_endian = multifile_force_endian;
+ }
+
unsigned int multifile_die_count = 0;
int multi_fd = optimize_multifile (&multifile_die_count);
DSO *dso;
More information about the Dwz
mailing list