This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH] New feature proposal: pureglibc
- From: Renzo Davoli <renzo at cs dot unibo dot it>
- To: libc-alpha at sourceware dot org
- Date: Tue, 22 Aug 2017 16:49:17 +0200
- Subject: [PATCH] New feature proposal: pureglibc
- Authentication-results: sourceware.org; auth=none
glibc, like many standard C library implementations, can be seen as the union of two libraries:
* the actual C library which provides high level services, using the system calls
of the underlying kernel,
* the interface library to the kernel, which dispatches the system call
to the kernel and retrieves the results.
A pure C library is a library providing only the former item.
It is useful to have a pure C library when a process needs to use self-virtualization.
In view-os, for example, using self-virtualization I can run modules like umfuseext2
further virtualizing the system calls generated by the ext2fs and glibc libraries.
In this way the file system image can be something generated on-the-fly instead of a
file as requested by the ext2fs design.
This is just an example. Many other applications can be found using coding creativity.
I am currently using a tricky and partial implementation of purelibc as an
overlay shared library redefining some glibc functions.
This is clearly a workaround.
https://sourceforge.net/p/view-os/code/HEAD/tree/trunk/purelibc/
http://wiki.v2.cs.unibo.it/wiki/index.php?title=PureLibc
The patch here attached is a draft implementation of a "pureglibc":
a global variable permits to divert all the system calls generated
by glibc to a process-provided function, glibc becomes in this way a 'pure' C library
when this variable is non-NULL: system calls can be traced and virtualized.
The patch currently implements pureglibc for the x86_64 architecture only.
I am posting this as a proposal for a new feature, asking for comments and
for alternative (effective) ways to implement the same feature.
Thank you.
renzo
Here below: two "hello world" examples, one for tracing, one for self-virtualization, and the patch.
Example #1:
System Call tracing:
------
#define _GNU_SOURCE
#include <stdio.h>
#include <unistd.h>
#include <stdarg.h>
#include <string.h>
#include <sys/syscall.h>
#include <dlfcn.h>
long (**pure_ptr)(long number, long nr, ...);
void printsyscall(long number, long nr, long *args) {
char buf[256];
int i;
sprintf(buf, "syscall %d -", number);
for (i = 0; i < nr; i++)
sprintf(buf + strlen(buf), " %016x", args[i]);
sprintf(buf + strlen(buf), "\n");
syscall(__NR_write, 1, buf, strlen(buf));
}
#define NSYSARG 6
long mysyscall(long number, long nr, ...) {
long arg[NSYSARG];
va_list ap;
int i;
va_start(ap, nr);
for (i = 0; i < NSYSARG; i++)
arg[i] = i < nr ? va_arg(ap, long) : 0;
va_end(ap);
printsyscall(number, nr, arg);
return syscall(number, arg[0], arg[1], arg[2], arg[3], arg[4], arg[5]);
}
int main() {
pure_ptr = dlsym(RTLD_DEFAULT, "pure_syscall");
if (pure_ptr) {
printf("PURE enabled glibc found\n");
*pure_ptr = mysyscall;
}
printf("hello world\n");
}
-------
Example #2:
Virtualization (when this shared object is preloaded
"open" or "openat" syscalls of /etc/passwd, open /tmp/passwd instead.)
-------
#define _GNU_SOURCE
#include <stdio.h>
#include <unistd.h>
#include <stdarg.h>
#include <string.h>
#include <sys/syscall.h>
#include <dlfcn.h>
long (**pure_ptr)(long number, long nr, ...);
#define NSYSARG 6
long mysyscall(long number, long nr, ...) {
long arg[NSYSARG];
va_list ap;
int i;
va_start(ap, nr);
for (i = 0; i < NSYSARG; i++)
arg[i] = i < nr ? va_arg(ap, long) : 0;
va_end(ap);
if (number == __NR_open && strcmp((char *) arg[0], "/etc/passwd") == 0)
arg[0] = (long) "/tmp/passwd";
if (number == __NR_openat && strcmp((char *) arg[1], "/etc/passwd") == 0)
arg[1] = (long) "/tmp/passwd";
return syscall(number, arg[0], arg[1], arg[2], arg[3], arg[4], arg[5]);
}
__attribute__((constructor))
void init(void) {
pure_ptr = dlsym(RTLD_DEFAULT, "pure_syscall");
if (pure_ptr) {
printf("PURE enabled glibc found\n");
*pure_ptr = mysyscall;
}
}
--------
Here is the patch:
2018-08-21 Renzo Davoli <renzo@cs.unibo.it>
diff --git a/misc/Versions b/misc/Versions
index bfbda505e4..ddf3a2f887 100644
--- a/misc/Versions
+++ b/misc/Versions
@@ -156,7 +156,7 @@ libc {
gnu_dev_major; gnu_dev_minor; gnu_dev_makedev;
}
GLIBC_2.26 {
- preadv2; preadv64v2; pwritev2; pwritev64v2;
+ preadv2; preadv64v2; pwritev2; pwritev64v2; __pure_syscall; pure_syscall;
}
GLIBC_PRIVATE {
__madvise;
diff --git a/misc/init-misc.c b/misc/init-misc.c
index 02f2b0fa12..2e7bf13e69 100644
--- a/misc/init-misc.c
+++ b/misc/init-misc.c
@@ -24,6 +24,8 @@ char *__progname = (char *) "";
weak_alias (__progname_full, program_invocation_name)
weak_alias (__progname, program_invocation_short_name)
+long int (*__pure_syscall)(long name, long nr, ...) = NULL;
+weak_alias (__pure_syscall, pure_syscall)
void
__init_misc (int argc, char **argv, char **envp)
diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
index 880e496880..258f7b9e51 100644
--- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h
+++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
@@ -193,7 +193,7 @@
# undef INLINE_SYSCALL
# define INLINE_SYSCALL(name, nr, args...) \
({ \
- unsigned long int resultvar = INTERNAL_SYSCALL (name, , nr, args); \
+ unsigned long int resultvar = INTERNAL_SYSCALL (name, , nr, ##args); \
if (__glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (resultvar, ))) \
{ \
__set_errno (INTERNAL_SYSCALL_ERRNO (resultvar, )); \
@@ -221,6 +221,8 @@
/* Registers clobbered by syscall. */
# define REGISTERS_CLOBBERED_BY_SYSCALL "cc", "r11", "cx"
+extern long int (*__pure_syscall)(long name, long nr, ...);
+
# define INTERNAL_SYSCALL_NCS(name, err, nr, args...) \
({ \
unsigned long int resultvar; \
@@ -233,7 +235,13 @@
(long int) resultvar; })
# undef INTERNAL_SYSCALL
# define INTERNAL_SYSCALL(name, err, nr, args...) \
- INTERNAL_SYSCALL_NCS (__NR_##name, err, nr, ##args)
+ ({ \
+ long int resultvar; \
+ if (__glibc_unlikely (__pure_syscall != NULL)) \
+ resultvar = __pure_syscall(__NR_##name, nr, ##args); \
+ else \
+ resultvar = INTERNAL_SYSCALL_NCS (__NR_##name, err, nr, ##args); \
+ resultvar; })
# define INTERNAL_SYSCALL_NCS_TYPES(name, err, nr, args...) \
({ \