This is the mail archive of the
systemtap@sourceware.org
mailing list for the systemtap project.
Re: [RFC][Patch 0/2]kprobe: kprobe-booster against 2.6.14-mm1 for i386
- From: Masami Hiramatsu <hiramatu at sdl dot hitachi dot co dot jp>
- To: "Frank Ch. Eigler" <fche at redhat dot com>, systemtap at sources dot redhat dot com
- Cc: Yumiko Sugita <sugita at sdl dot hitachi dot co dot jp>, Satoshi Oshima <soshima at redhat dot com>, Hideo Aoki <haoki at redhat dot com>
- Date: Mon, 28 Nov 2005 23:33:10 +0900
- Subject: Re: [RFC][Patch 0/2]kprobe: kprobe-booster against 2.6.14-mm1 for i386
- References: <43870DDB.8020306@sdl.hitachi.co.jp> <y0mzmnsinc8.fsf@tooth.toronto.redhat.com>
Hi, Frank
Frank Ch. Eigler wrote:
> Masami Hiramatsu wrote:
>
>>I publish a couple of patches of kprobe-booster in next mails.
>>[...]
>
> Can you describe what kinds of tests you ran on this exciting
> optimization, and their results?
I made a test program attached to this mail.
I checked correctness of kprobe-booster (take2) by using
this program.
I copied booster functions (*) from arch/i386/kernel/kprobes.c
into this test program.
(*) can_boost, arch_copy_kprobe, and resume_execution.
This program makes some "1 byte opcode" maps indicating which
opcode will be boosted by kprobe-booster. In the opcode maps,
line number means top 4 bits of opcode, and column number
means bottom 4 bits of opcode.
Here is a example of results.
opcode2:21h, offset:1 byte(s)
0h 1h 2h 3h 4h 5h 6h 7h 8h 9h ah bh ch dh eh fh
0h 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 -1
1h 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
2h 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
3h 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
4h 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
5h 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
6h 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
7h -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
8h 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
9h 1 1 1 1 1 1 1 1 1 1 -1 1 -1 1 1 1
ah 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
bh 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
ch -1 -1 1 1 1 1 -1 -1 1 1 1 1 -1 -1 -1 1
dh -1 -1 -1 -1 1 1 -1 1 -1 -1 -1 -1 -1 -1 -1 -1
eh -1 -1 -1 -1 1 1 1 1 -1 1 1 1 1 1 1 1
fh -1 -1 -1 -1 -1 1 -1 -1 1 1 1 1 1 1 -1 1
In the result, there is a description line as like
"opcode2:21h, offset:1 byte(s)"
This means "the 2nd byte of opcode is 0x21. After single
step execution, EIP moves 1 byte."
After that, you get an opcode map. In this map,
"1" means that the opcode can be boosted,
and "-1" means that the opcode is not boosted.
How would you think about it?
And would you have better idea for testing?
Best Regards,
--
Masami HIRAMATSU
2nd Research Dept.
Hitachi, Ltd., Systems Development Laboratory
E-mail: hiramatu@sdl.hitachi.co.jp
#include <stdio.h>
typedef unsigned char kprobe_opcode_t;
#define MAX_INSN_SIZE 16
#define TF_MASK 1
#define IF_MASK 2
struct kprobe {
struct {
kprobe_opcode_t insn[MAX_INSN_SIZE];
int boostable;
} ainsn;
kprobe_opcode_t opcode;
kprobe_opcode_t *addr;
};
struct pt_regs {
long eip;
long esp;
long eflags;
};
struct kprobe_ctlblk {
long kprobe_old_eflags;
};
void set_jmp_op(void *a, void *b){ }
#define __kprobes
// copy code from kprobe.c
/*
* returns non-zero if opcodes can be boosted.
*/
static inline int can_boost(kprobe_opcode_t opcode)
{
switch (opcode & 0xf0 ) {
case 0x70:
return 0; /* can't boost conditional jump */
case 0x90:
/* can't boost call and pushf */
return opcode != 0x9a && opcode != 0x9c;
case 0xc0:
/* can't boost undefined opcodes and soft-interruptions */
return (0xc1 < opcode && opcode < 0xc6) ||
(0xc7 < opcode && opcode < 0xcc) || opcode == 0xcf;
case 0xd0:
/* can boost AA* and XLAT */
return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7);
case 0xe0:
/* can boost in/out and (may be) jmps */
return (0xe3 < opcode && opcode != 0xe8);
case 0xf0:
/* clear and set flags can be boost */
return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe));
default:
/* currently, can't boost 2 bytes opcodes */
return opcode != 0x0f;
}
}
static void __kprobes resume_execution(struct kprobe *p,
struct pt_regs *regs, struct kprobe_ctlblk *kcb)
{
unsigned long *tos = (unsigned long *)®s->esp;
unsigned long copy_eip = (unsigned long)&p->ainsn.insn;
unsigned long orig_eip = (unsigned long)p->addr;
regs->eflags &= ~TF_MASK;
switch (p->ainsn.insn[0]) {
case 0x9c: /* pushfl */
*tos &= ~(TF_MASK | IF_MASK);
*tos |= kcb->kprobe_old_eflags;
break;
case 0xc3: /* ret/lret */
case 0xcb:
case 0xc2:
case 0xca:
case 0xea: /* jmp absolute -- eip is correct */
/* eip is already adjusted, no more changes required */
p->ainsn.boostable = 1;
goto no_change;
case 0xe8: /* call relative - Fix return addr */
*tos = orig_eip + (*tos - copy_eip);
break;
case 0xff:
if ((p->ainsn.insn[1] & 0x30) == 0x10) {
/* call absolute, indirect */
/* Fix return addr; eip is correct
But this is not boostable */
*tos = orig_eip + (*tos - copy_eip);
goto no_change;
} else if (((p->ainsn.insn[1] & 0x31) == 0x20) || /* jmp near, absolute indirect */
((p->ainsn.insn[1] & 0x31) == 0x21)) { /* jmp far, absolute indirect */
/* eip is correct. And this is boostable */
p->ainsn.boostable = 1;
goto no_change;
}
default:
break;
}
if (p->ainsn.boostable == 0) {
if ( regs->eip > copy_eip &&
(regs->eip - copy_eip) + 5 < MAX_INSN_SIZE) {
/* these instructions can be executed directly if it
jumps back to correct address. */
set_jmp_op((void *)regs->eip,
(void *)orig_eip + (regs->eip - copy_eip));
p->ainsn.boostable = 1;
} else {
p->ainsn.boostable = -1;
}
}
regs->eip = orig_eip + (regs->eip - copy_eip);
no_change:
return ;
}
void __kprobes arch_copy_kprobe(struct kprobe *p)
{
memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
p->opcode = *p->addr;
if (can_boost(p->opcode)) {
p->ainsn.boostable = 0;
} else {
p->ainsn.boostable = -1;
}
}
int validate(int opcode, int opcode2, long offset)
{
struct kprobe kp;
struct pt_regs regs;
kprobe_opcode_t dummy_code[MAX_INSN_SIZE];
long dummy_stack[10];
struct kprobe_ctlblk kcb = {0};
dummy_code[0] = (kprobe_opcode_t)opcode;
dummy_code[1] = (kprobe_opcode_t)opcode2;
regs.esp = (long)dummy_stack;
kp.addr = (void*)dummy_code;
arch_copy_kprobe(&kp);
regs.eip = (long)kp.ainsn.insn + offset;
resume_execution(&kp, ®s, &kcb);
return kp.ainsn.boostable;
}
void validate_map(int op2, long offs)
{
int i, j, ret;
printf("opcode2:%xh, offset:%d byte(s)\n", op2, offs);
printf(" ");
for (j = 0; j <= 0xf; j++) printf("%1xh ", j);
printf("\n");
for (i = 0; i <= 0xf; i++) {
printf("%1xh ", i);
for (j = 0; j <= 0xf; j++) {
ret = validate(i<<4|j, op2, offs);
printf("%2d ", ret);
}
printf("\n");
}
}
int main(void)
{
validate_map(0,1);
validate_map(0,-1);
validate_map(0,MAX_INSN_SIZE-4);
validate_map(0x10,1);
validate_map(0x20,1);
validate_map(0x21,1);
return 0;
}