]> sourceware.org Git - systemtap.git/blob - bpf-translate.cxx
PR23860: reduce stack pressure from format strings
[systemtap.git] / bpf-translate.cxx
1 // bpf translation pass
2 // Copyright (C) 2016-2018 Red Hat Inc.
3 //
4 // This file is part of systemtap, and is free software. You can
5 // redistribute it and/or modify it under the terms of the GNU General
6 // Public License (GPL); either version 2, or (at your option) any
7 // later version.
8
9 #include "config.h"
10 #include "bpf-internal.h"
11 #include "parse.h"
12 #include "staptree.h"
13 #include "elaborate.h"
14 #include "session.h"
15 #include "translator-output.h"
16 #include "tapsets.h"
17 #include <sstream>
18 #include <unistd.h>
19 #include <fcntl.h>
20
21 extern "C" {
22 #include <libelf.h>
23 /* Unfortunately strtab manipulation functions were only officially added
24 to elfutils libdw in 0.167. Before that there were internal unsupported
25 ebl variants. While libebl.h isn't supported we'll try to use it anyway
26 if the elfutils we build against is too old. */
27 #include <elfutils/version.h>
28 #if _ELFUTILS_PREREQ (0, 167)
29 #include <elfutils/libdwelf.h>
30 typedef Dwelf_Strent Stap_Strent;
31 typedef Dwelf_Strtab Stap_Strtab;
32 #define stap_strtab_init dwelf_strtab_init
33 #define stap_strtab_add(X,Y) dwelf_strtab_add(X,Y)
34 #define stap_strtab_free dwelf_strtab_free
35 #define stap_strtab_finalize dwelf_strtab_finalize
36 #define stap_strent_offset dwelf_strent_off
37 #else
38 #include <elfutils/libebl.h>
39 typedef Ebl_Strent Stap_Strent;
40 typedef Ebl_Strtab Stap_Strtab;
41 #define stap_strtab_init ebl_strtabinit
42 #define stap_strtab_add(X,Y) ebl_strtabadd(X,Y,0)
43 #define stap_strtab_free ebl_strtabfree
44 #define stap_strtab_finalize ebl_strtabfinalize
45 #define stap_strent_offset ebl_strtaboffset
46 #endif
47 #include <linux/version.h>
48 #include <asm/ptrace.h>
49 }
50
51 #ifndef EM_BPF
52 #define EM_BPF 0xeb9f
53 #endif
54 #ifndef R_BPF_MAP_FD
55 #define R_BPF_MAP_FD 1
56 #endif
57
58 std::string module_name;
59
60 namespace bpf {
61
62 struct side_effects_visitor : public expression_visitor
63 {
64 bool side_effects;
65
66 side_effects_visitor() : side_effects(false) { }
67
68 void visit_expression(expression *) { }
69 void visit_pre_crement(pre_crement *) { side_effects = true; }
70 void visit_post_crement(post_crement *) { side_effects = true; }
71 void visit_assignment (assignment *) { side_effects = true; }
72 void visit_functioncall (functioncall *) { side_effects = true; }
73 void visit_print_format (print_format *) { side_effects = true; }
74 void visit_stat_op (stat_op *) { side_effects = true; }
75 void visit_hist_op (hist_op *) { side_effects = true; }
76 };
77
78 struct init_block : public ::block
79 {
80 // This block contains statements that initialize global variables
81 // with default values. It should be visited first among any
82 // begin probe bodies. Note that initialization of internal globals
83 // (ex. the exit status) is handled by the stapbpf runtime.
84 init_block(globals &glob);
85 ~init_block();
86 bool empty() { return this->statements.empty(); }
87 };
88
89 init_block::init_block(globals &glob)
90 {
91 for (auto i = glob.globals.begin(); i != glob.globals.end(); ++i)
92 {
93 struct vardecl *v = i->first;
94
95 if (v->init && v->type == pe_long)
96 {
97 struct literal_number *num = static_cast<literal_number *>(v->init);
98 struct symbol *sym = new symbol;
99 struct assignment *asgn = new assignment;
100 struct expr_statement *stmt = new expr_statement;
101
102 sym->referent = v;
103 asgn->type = pe_long;
104 asgn->op = "=";
105 asgn->left = sym;
106 asgn->right = num;
107 stmt->value = asgn;
108 this->statements.push_back(stmt);
109 }
110 }
111 }
112
113 init_block::~init_block()
114 {
115 for (auto i = this->statements.begin(); i != this->statements.end(); ++i)
116 {
117 struct expr_statement *stmt = static_cast<expr_statement *>(*i);
118 struct assignment *asgn = static_cast<assignment *>(stmt->value);
119 struct symbol *sym = static_cast<symbol *>(asgn->left);
120
121 // referent and right are not owned by this.
122 sym->referent = NULL;
123 asgn->right = NULL;
124 delete sym;
125 delete asgn;
126 delete stmt;
127 }
128 }
129
130 static bool
131 has_side_effects (expression *e)
132 {
133 side_effects_visitor t;
134 e->visit (&t);
135 return t.side_effects;
136 }
137
138 /* forward declarations */
139 struct asm_stmt;
140 static void print_format_add_tag(std::string&);
141 static void print_format_add_tag(print_format*);
142
143 struct bpf_unparser : public throwing_visitor
144 {
145 // The visitor class isn't as helpful as it might be. As a consequence,
146 // the RESULT member is set after visiting any expression type. Use the
147 // emit_expr helper to return the result properly.
148 value *result;
149
150 // The program into which we are emitting code.
151 program &this_prog;
152 globals &glob;
153 value *this_in_arg0;
154
155 // The "current" block into which we are currently emitting code.
156 insn_append_inserter this_ins;
157 void set_block(block *b)
158 { this_ins.b = b; this_ins.i = b->last; }
159 void clear_block()
160 { this_ins.b = NULL; this_ins.i = NULL; }
161 bool in_block() const
162 { return this_ins.b != NULL; }
163
164 // Destinations for "break", "continue", and "return" respectively.
165 std::vector<block *> loop_break;
166 std::vector<block *> loop_cont;
167 std::vector<block *> func_return;
168 std::vector<value *> func_return_val;
169 std::vector<functiondecl *> func_calls;
170
171 // Local variable declarations.
172 typedef std::unordered_map<vardecl *, value *> locals_map;
173 locals_map *this_locals;
174
175 // Return 0.
176 block *ret0_block;
177 block *exit_block;
178 block *get_ret0_block();
179 block *get_exit_block();
180
181 // TODO General triage of bpf-possible functionality:
182 virtual void visit_block (::block *s);
183 // TODO visit_try_block -> UNHANDLED
184 virtual void visit_embeddedcode (embeddedcode *s);
185 virtual void visit_null_statement (null_statement *s);
186 virtual void visit_expr_statement (expr_statement *s);
187 virtual void visit_if_statement (if_statement* s);
188 virtual void visit_for_loop (for_loop* s);
189 virtual void visit_foreach_loop (foreach_loop* s);
190 virtual void visit_return_statement (return_statement* s);
191 virtual void visit_delete_statement (delete_statement* s);
192 // TODO visit_next_statement -> UNHANDLED
193 virtual void visit_break_statement (break_statement* s);
194 virtual void visit_continue_statement (continue_statement* s);
195 virtual void visit_literal_string (literal_string *e);
196 virtual void visit_literal_number (literal_number* e);
197 // TODO visit_embedded_expr -> UNHANDLED, could treat as embedded_code
198 virtual void visit_binary_expression (binary_expression* e);
199 virtual void visit_unary_expression (unary_expression* e);
200 virtual void visit_pre_crement (pre_crement* e);
201 virtual void visit_post_crement (post_crement* e);
202 virtual void visit_logical_or_expr (logical_or_expr* e);
203 virtual void visit_logical_and_expr (logical_and_expr* e);
204 virtual void visit_array_in (array_in* e);
205 // ??? visit_regex_query -> UNHANDLED, requires new kernel functionality
206 virtual void visit_compound_expression (compound_expression *e);
207 virtual void visit_comparison (comparison* e);
208 // TODO visit_concatenation -> (2) pseudo-LOOP: copy the strings while concatenating
209 virtual void visit_ternary_expression (ternary_expression* e);
210 virtual void visit_assignment (assignment* e);
211 virtual void visit_symbol (symbol* e);
212 virtual void visit_target_register (target_register* e);
213 virtual void visit_target_deref (target_deref* e);
214 // visit_target_bitfield -> ?? should already be handled in earlier pass?
215 // visit_target_symbol -> ?? should already be handled in earlier pass
216 virtual void visit_arrayindex (arrayindex *e);
217 virtual void visit_functioncall (functioncall* e);
218 virtual void visit_print_format (print_format* e);
219 // TODO visit_stat_op -> (3) possibly userspace-only :: get the correct stat value out of BPF_MAP_TYPE_PERCPU_?
220 // TODO visit_hist_op -> implement as a userspace-only helper
221 // visit_atvar_op -> ?? should already be handled in earlier pass
222 // visit_cast_op -> ?? should already be handled in earlier pass
223 // visit_autocast_op -> ?? should already be handled in earlier pass
224 // visit_defined_op -> ?? should already be handled in earlier pass
225 // visit_entry_op -> ?? should already be handled in earlier pass
226 // visit_perf_op -> ?? should already be handled in earlier pass
227
228 // TODO: Other bpf functionality to take advantage of in tapsets, or as alternate implementations:
229 // - backtrace.stp :: BPF_MAP_TYPE_STACKTRACE + bpf_getstackid
230 // - BPF_MAP_TYPE_LRU_HASH :: for size-limited maps
231 // - BPF_MAP_GET_NEXT_KEY :: for user-space iteration through maps
232 // see https://ferrisellis.com/posts/ebpf_syscall_and_maps/#ebpf-map-types
233
234 void emit_stmt(statement *s);
235 void emit_mov(value *d, value *s);
236 void emit_jmp(block *b);
237 void emit_cond(expression *e, block *t, block *f);
238 void emit_store(expression *dest, value *src);
239 value *emit_expr(expression *e);
240 value *emit_bool(expression *e);
241 value *emit_context_var(bpf_context_vardecl *v);
242
243 value *emit_functioncall(functiondecl *f, const std::vector<value *> &args);
244 value *emit_print_format(const std::string &format,
245 const std::vector<value *> &actual,
246 bool print_to_stream = true);
247
248 // Used for the embedded-code assembler:
249 int64_t parse_imm (const asm_stmt &stmt, const std::string &str);
250 size_t parse_asm_stmt (embeddedcode *s, size_t start,
251 /*OUT*/asm_stmt &stmt);
252 value *emit_asm_arg(const asm_stmt &stmt, const std::string &arg,
253 bool allow_imm = true, bool allow_emit = true);
254 value *emit_asm_reg(const asm_stmt &stmt, const std::string &reg);
255 value *get_asm_reg(const asm_stmt &stmt, const std::string &reg);
256 void emit_asm_opcode(const asm_stmt &stmt,
257 std::map<std::string, block *> label_map);
258
259 // Used for the embedded-code assembler's diagnostics:
260 source_loc adjusted_loc;
261 size_t adjust_pos;
262 std::vector<token *> adjusted_toks; // track for deallocation
263
264 // Used for string data:
265 value *emit_literal_string(const std::string &str, const token *tok);
266 value *emit_string_copy(value *dest, int ofs, value *src, bool zero_pad = false);
267
268 // Used for passing long and string arguments on the stack where an address is expected:
269 void emit_long_arg(value *arg, int ofs, value *val);
270 void emit_str_arg(value *arg, int ofs, value *str);
271
272 void add_prologue();
273 locals_map *new_locals(const std::vector<vardecl *> &);
274
275 bpf_unparser (program &c, globals &g);
276 virtual ~bpf_unparser ();
277 };
278
279 bpf_unparser::bpf_unparser(program &p, globals &g)
280 : throwing_visitor ("unhandled statement or expression type"),
281 result(NULL), this_prog(p), glob(g), this_locals(NULL),
282 ret0_block(NULL), exit_block(NULL)
283 { }
284
285 bpf_unparser::~bpf_unparser()
286 {
287 delete this_locals;
288 }
289
290 bpf_unparser::locals_map *
291 bpf_unparser::new_locals(const std::vector<vardecl *> &vars)
292 {
293 locals_map *m = new locals_map;
294
295 for (std::vector<vardecl *>::const_iterator i = vars.begin ();
296 i != vars.end (); ++i)
297 {
298 const locals_map::value_type v (*i, this_prog.new_reg());
299 auto ok = m->insert (v);
300 assert (ok.second);
301 }
302
303 return m;
304 }
305
306 block *
307 bpf_unparser::get_exit_block()
308 {
309 if (exit_block)
310 return exit_block;
311
312 block *b = this_prog.new_block();
313 insn_append_inserter ins(b, "exit_block");
314
315 this_prog.mk_exit(ins);
316
317 exit_block = b;
318 return b;
319 }
320
321 block *
322 bpf_unparser::get_ret0_block()
323 {
324 if (ret0_block)
325 return ret0_block;
326
327 block *b = this_prog.new_block();
328 insn_append_inserter ins(b, "ret0_block");
329
330 this_prog.mk_mov(ins, this_prog.lookup_reg(BPF_REG_0), this_prog.new_imm(0));
331 b->fallthru = new edge(b, get_exit_block());
332
333 ret0_block = b;
334 return b;
335 }
336
337 void
338 bpf_unparser::emit_stmt(statement *s)
339 {
340 if (s)
341 s->visit (this);
342 }
343
344 value *
345 bpf_unparser::emit_expr(expression *e)
346 {
347 e->visit (this);
348 value *v = result;
349 result = NULL;
350 return v;
351 }
352
353 void
354 bpf_unparser::emit_mov(value *d, value *s)
355 {
356 this_prog.mk_mov(this_ins, d, s);
357 }
358
359 void
360 bpf_unparser::emit_jmp(block *b)
361 {
362 // Begin by hoping that we can simply place the destination as fallthru.
363 // If this assumption doesn't hold, it'll be fixed by reorder_blocks.
364 block *this_block = this_ins.get_block ();
365 this_block->fallthru = new edge(this_block, b);
366 clear_block ();
367 }
368
369 void
370 bpf_unparser::emit_cond(expression *e, block *t_dest, block *f_dest)
371 {
372 condition cond;
373 value *s0, *s1;
374
375 // Look for and handle logical operators first.
376 if (logical_or_expr *l = dynamic_cast<logical_or_expr *>(e))
377 {
378 block *cont_block = this_prog.new_block ();
379 emit_cond (l->left, t_dest, cont_block);
380 set_block (cont_block);
381 emit_cond (l->right, t_dest, f_dest);
382 return;
383 }
384 if (logical_and_expr *l = dynamic_cast<logical_and_expr *>(e))
385 {
386 block *cont_block = this_prog.new_block ();
387 emit_cond (l->left, cont_block, f_dest);
388 set_block (cont_block);
389 emit_cond (l->right, t_dest, f_dest);
390 return;
391 }
392 if (unary_expression *u = dynamic_cast<unary_expression *>(e))
393 if (u->op == "!")
394 {
395 emit_cond (u->operand, f_dest, t_dest);
396 return;
397 }
398
399 // What is left must generate a comparison + conditional branch.
400 if (comparison *c = dynamic_cast<comparison *>(e))
401 {
402 s0 = emit_expr (c->left);
403 s1 = emit_expr (c->right);
404 if (c->op == "==")
405 cond = EQ;
406 else if (c->op == "!=")
407 cond = NE;
408 else if (c->op == "<")
409 cond = LT;
410 else if (c->op == "<=")
411 cond = LE;
412 else if (c->op == ">")
413 cond = GT;
414 else if (c->op == ">=")
415 cond = GE;
416 else
417 throw SEMANTIC_ERROR (_("unhandled comparison operator"), e->tok);
418 }
419 else
420 {
421 binary_expression *bin = dynamic_cast<binary_expression *>(e);
422 if (bin && bin->op == "&")
423 {
424 s0 = emit_expr (bin->left);
425 s1 = emit_expr (bin->right);
426 cond = TEST;
427 }
428 else
429 {
430 // Fall back to E != 0.
431 s0 = emit_expr (e);
432 s1 = this_prog.new_imm(0);
433 cond = NE;
434 }
435 }
436
437 this_prog.mk_jcond (this_ins, cond, s0, s1, t_dest, f_dest);
438 clear_block ();
439 }
440
441 value *
442 bpf_unparser::emit_bool (expression *e)
443 {
444 block *else_block = this_prog.new_block ();
445 block *join_block = this_prog.new_block ();
446 value *r = this_prog.new_reg();
447
448 emit_mov (r, this_prog.new_imm(1));
449 emit_cond (e, join_block, else_block);
450
451 set_block (else_block);
452 emit_mov (r, this_prog.new_imm(0));
453 emit_jmp (join_block);
454
455 set_block(join_block);
456 return r;
457 }
458
459 void
460 bpf_unparser::emit_store(expression *e, value *val)
461 {
462 if (symbol *s = dynamic_cast<symbol *>(e)) // scalar lvalue
463 {
464 vardecl *var = s->referent;
465 assert (var->arity == 0);
466
467 auto g = glob.globals.find (var);
468 if (g != glob.globals.end())
469 {
470 value *frame = this_prog.lookup_reg(BPF_REG_10);
471 int key_ofs, val_ofs;
472
473 // BPF_FUNC_map_update_elem will dereference the address
474 // passed in BPF_REG_3:
475 switch (var->type)
476 {
477 case pe_long:
478 // Store the long on the stack and pass its address:
479 val_ofs = -8;
480 emit_long_arg(this_prog.lookup_reg(BPF_REG_3), val_ofs, val);
481 break;
482 case pe_string:
483 // Zero-pad and copy the string to the stack and pass its address:
484 val_ofs = -BPF_MAXSTRINGLEN;
485 emit_str_arg(this_prog.lookup_reg(BPF_REG_3), val_ofs, val);
486 this_prog.use_tmp_space(BPF_MAXSTRINGLEN);
487 break;
488 // ??? pe_stats -> TODO (3) unknown (but stats could be implemented as BPF_MAP_TYPE_PERCPU_ARRAY)
489 default:
490 goto err;
491 }
492
493 key_ofs = val_ofs - 4;
494 this_prog.mk_st(this_ins, BPF_W, frame, key_ofs,
495 this_prog.new_imm(g->second.second));
496 this_prog.use_tmp_space(-key_ofs);
497
498 this_prog.load_map(this_ins, this_prog.lookup_reg(BPF_REG_1),
499 g->second.first);
500 this_prog.mk_binary(this_ins, BPF_ADD,
501 this_prog.lookup_reg(BPF_REG_2),
502 frame, this_prog.new_imm(key_ofs));
503 emit_mov(this_prog.lookup_reg(BPF_REG_4), this_prog.new_imm(0));
504 this_prog.mk_call(this_ins, BPF_FUNC_map_update_elem, 4);
505 return;
506 }
507
508 auto i = this_locals->find (var);
509 if (i != this_locals->end ())
510 {
511 emit_mov (i->second, val);
512 return;
513 }
514 }
515 else if (arrayindex *a = dynamic_cast<arrayindex *>(e)) // array lvalue
516 {
517 if (symbol *a_sym = dynamic_cast<symbol *>(a->base))
518 {
519 vardecl *v = a_sym->referent;
520 int key_ofs, val_ofs;
521
522 if (v->arity != 1)
523 throw SEMANTIC_ERROR(_("unhandled multi-dimensional array"), v->tok);
524
525 auto g = glob.globals.find(v);
526 if (g == glob.globals.end())
527 throw SEMANTIC_ERROR(_("unknown array variable"), v->tok);
528
529 value *idx = emit_expr(a->indexes[0]);
530 switch (v->index_types[0])
531 {
532 case pe_long:
533 // Store the long on the stack and pass its address:
534 key_ofs = -8;
535 emit_long_arg(this_prog.lookup_reg(BPF_REG_2), key_ofs, idx);
536 break;
537 case pe_string:
538 // Zero-pad and copy the string to the stack and pass its address:
539 key_ofs = -BPF_MAXSTRINGLEN;
540 emit_str_arg(this_prog.lookup_reg(BPF_REG_2), key_ofs, idx);
541 break;
542 default:
543 throw SEMANTIC_ERROR(_("unhandled index type"), e->tok);
544 }
545 switch (v->type)
546 {
547 case pe_long:
548 // Store the long on the stack and pass its address:
549 val_ofs = key_ofs - 8;
550 emit_long_arg(this_prog.lookup_reg(BPF_REG_3), val_ofs, val);
551 break;
552 case pe_string:
553 // Zero-pad and copy the string to the stack and pass its address:
554 val_ofs = key_ofs - BPF_MAXSTRINGLEN;
555 emit_str_arg(this_prog.lookup_reg(BPF_REG_3), val_ofs, val);
556 this_prog.use_tmp_space(BPF_MAXSTRINGLEN);
557 break;
558 default:
559 throw SEMANTIC_ERROR(_("unhandled array type"), v->tok);
560 }
561
562 this_prog.use_tmp_space(-val_ofs);
563 this_prog.load_map(this_ins, this_prog.lookup_reg(BPF_REG_1),
564 g->second.first);
565 emit_mov(this_prog.lookup_reg(BPF_REG_4), this_prog.new_imm(0));
566 this_prog.mk_call(this_ins, BPF_FUNC_map_update_elem, 4);
567 return;
568 }
569 }
570 err:
571 throw SEMANTIC_ERROR (_("unknown lvalue"), e->tok);
572 }
573
574 void
575 bpf_unparser::visit_block (::block *s)
576 {
577 unsigned n = s->statements.size();
578 for (unsigned i = 0; i < n; ++i)
579 emit_stmt (s->statements[i]);
580 }
581
582 /* WORK IN PROGRESS: A simple eBPF assembler.
583
584 In order to effectively write eBPF tapset functions, we want to use
585 embedded-code assembly rather than compile from SystemTap code. At
586 the same time, we want to hook into stapbpf functionality to
587 reserve stack memory, allocate virtual registers or signal errors.
588
589 The assembler syntax will probably take a couple of attempts to get
590 just right. This attempt keeps things as close as possible to the
591 first embedded-code assembler, with a few more features and a
592 disgustingly lenient parser that allows things like
593 $ this is all one "**identifier**" believe-it!-or-not
594
595 Ahh for the days of 1960s FORTRAN.
596
597 ??? It might make more sense to implement an assembler based on
598 the syntax used in official eBPF subsystem docs. */
599
600 /* Supported assembly statement types include:
601
602 <stmt> ::= label, <dest=label>;
603 <stmt> ::= alloc, <dest=reg>, <imm=imm>;
604 <stmt> ::= call, <dest=optreg>, <param[0]=function name>, <param[1]=arg>, ...;
605 <stmt> ::= <code=integer opcode>, <dest=reg>, <src1=reg>,
606 <off/jmp_target=off>, <imm=imm>;
607
608 Supported argument types include:
609
610 <arg> ::= <reg> | <imm>
611 <optreg> ::= <reg> | -
612 <reg> ::= <register index> | r<register index> |
613 $<identifier> | $<integer constant> | $$ | <string constant>
614 <imm> ::= <integer constant> | BPF_MAXSTRINGLEN | -
615 <off> ::= <imm> | <jump label>
616
617 */
618
619 // #define BPF_ASM_DEBUG
620
621 struct asm_stmt {
622 std::string kind;
623
624 unsigned code;
625 std::string dest, src1;
626 int64_t off, imm;
627
628 // metadata for jmp instructions
629 // ??? The logic around these flags could be pruned a bit.
630 bool has_jmp_target = false;
631 bool has_fallthrough = false;
632 std::string jmp_target, fallthrough;
633
634 // metadata for call, error instructions
635 std::vector<std::string> params;
636
637 token *tok;
638 };
639
640 std::ostream&
641 operator << (std::ostream& o, const asm_stmt& stmt)
642 {
643 if (stmt.kind == "label")
644 o << "label, " << stmt.dest << ";";
645 else if (stmt.kind == "opcode")
646 {
647 o << std::hex << stmt.code << ", "
648 << stmt.dest << ", "
649 << stmt.src1 << ", ";
650 if (stmt.off != 0 || stmt.jmp_target == "")
651 o << stmt.off;
652 else if (stmt.off != 0) // && stmt.jmp_target != ""
653 o << stmt.off << "/";
654 if (stmt.jmp_target != "")
655 o << "label:" << stmt.jmp_target;
656 o << ", "
657 << stmt.imm << ";"
658 << (stmt.has_fallthrough ? " +FALLTHROUGH " + stmt.fallthrough : "");
659 }
660 else if (stmt.kind == "alloc")
661 {
662 o << "alloc, " << stmt.dest << ", " << stmt.imm << ";";
663 }
664 else if (stmt.kind == "call")
665 {
666 o << "call, " << stmt.dest << ", ";
667 for (unsigned k = 0; k < stmt.params.size(); k++)
668 {
669 o << stmt.params[k];
670 o << (k >= stmt.params.size() - 1 ? ";" : ", ");
671 }
672 }
673 else
674 o << "<unknown asm_stmt kind '" << stmt.kind << "'>";
675 return o;
676 }
677
678 bool
679 is_numeric (const std::string &str)
680 {
681 size_t pos = 0;
682 try {
683 stol(str, &pos, 0);
684 } catch (const std::invalid_argument &e) {
685 return false;
686 } catch (const std::out_of_range &e) {
687 /* XXX: probably numeric but not valid; give up */
688 return false;
689 } catch (...) {
690 /* XXX: handle other errors the same way */
691 std::cerr << "BUG: bpf assembler -- is_numeric() saw unexpected exception" << std::endl;
692 return false;
693 }
694 return (pos == str.size());
695 }
696
697 int64_t
698 bpf_unparser::parse_imm (const asm_stmt &stmt, const std::string &str)
699 {
700 int64_t val;
701 if (str == "BPF_MAXSTRINGLEN")
702 val = BPF_MAXSTRINGLEN;
703 else if (str == "-")
704 val = 0;
705 else try {
706 val = stol(str);
707 } catch (std::exception &e) { // XXX: invalid_argument, out_of_range
708 throw SEMANTIC_ERROR (_F("invalid bpf embeddedcode operand '%s'",
709 str.c_str()), stmt.tok);
710 }
711 return val;
712 }
713
714 /* Parse an assembly statement starting from position start in code,
715 then write the output in stmt. Returns a position immediately after
716 the parsed statement. */
717 size_t
718 bpf_unparser::parse_asm_stmt (embeddedcode *s, size_t start,
719 /*OUT*/asm_stmt &stmt)
720 {
721 const interned_string &code = s->code;
722
723 retry:
724 std::vector<std::string> args;
725 unsigned n = code.size();
726 size_t pos;
727 bool in_comment = false;
728 bool in_string = false;
729
730 // ??? As before, parser is extremely non-rigorous and could do
731 // with some tightening in terms of the inputs it accepts.
732 std::string arg = "";
733 size_t save_start = start; // -- position for diagnostics
734 for (pos = start; pos < n; pos++)
735 {
736 char c = code[pos];
737 char c2 = pos + 1 < n ? code [pos + 1] : 0;
738 if (isspace(c) && !in_string)
739 continue; // skip
740 else if (in_comment)
741 {
742 if (c == '*' && c2 == '/')
743 ++pos, in_comment = false;
744 // else skip
745 }
746 else if (in_string)
747 {
748 // resulting string will be processed by translate_escapes()
749 if (c == '"')
750 arg.push_back(c), in_string = false; // include quote
751 else if (c == '\\' && c2 == '"')
752 ++pos, arg.push_back(c), arg.push_back(c2);
753 else // accept any char, including whitespace
754 arg.push_back(c);
755 }
756 else if (c == '/' && c2 == '*')
757 ++pos, in_comment = true;
758 else if (c == '"') // found a literal string
759 {
760 if (arg.empty() && args.empty())
761 save_start = pos; // start of first argument
762
763 // XXX: This allows '"' inside an arg and will treat the
764 // string as a sequence of weird identifier characters. A
765 // more rigorous parser would error on mixing strings and
766 // regular chars.
767 arg.push_back(c); // include quote
768 in_string = true;
769 }
770 else if (c == ',') // reached end of argument
771 {
772 // XXX: This strips out empty args. A more rigorous parser would error.
773 if (arg != "")
774 args.push_back(arg);
775 arg = "";
776 }
777 else if (c == ';') // reached end of statement
778 {
779 // XXX: This strips out empty args. A more rigorous parser would error.
780 if (arg != "")
781 args.push_back(arg);
782 arg = "";
783 pos++; break;
784 }
785 else // found (we assume) a regular char
786 {
787 if (arg.empty() && args.empty())
788 save_start = pos; // start of first argument
789
790 // XXX: As before, this strips whitespace within args
791 // (so '$ab', '$ a b' and '$a b' are equivalent).
792 //
793 // A more rigorous parser would track in_arg
794 // and after_arg states and error on whitespace within args.
795 arg.push_back(c);
796 }
797 }
798 // final ';' is optional, so we watch for a trailing arg:
799 if (arg != "") args.push_back(arg);
800
801 // handle the case with no args
802 if (args.empty() && pos >= n)
803 return std::string::npos; // finished parsing
804 else if (args.empty())
805 {
806 // XXX: This skips an empty statement.
807 // A more rigorous parser would error.
808 start = pos;
809 goto retry;
810 }
811
812 // compute token with adjusted source location for diagnostics
813 // TODO: needs some attention to how multiline tokens are printed in error reporting -- with this code, caret aligns incorrectly
814 for (/* use saved adjust_pos */; adjust_pos < save_start && adjust_pos < n; adjust_pos++)
815 {
816 char c = code[adjust_pos];
817 if (c == '\n')
818 {
819 adjusted_loc.line++;
820 adjusted_loc.column = 1;
821 }
822 else
823 adjusted_loc.column++;
824 }
825
826 // Now populate the statement data.
827
828 stmt = asm_stmt(); // clear pre-existing data
829
830 // set token with adjusted source location
831 stmt.tok = s->tok->adjust_location(adjusted_loc);
832 adjusted_toks.push_back(stmt.tok);
833
834 #ifdef BPF_ASM_DEBUG
835 std::cerr << "bpf_asm parse_asm_stmt: tokenizer got ";
836 for (unsigned k = 0; k < args.size(); k++)
837 std::cerr << args[k] << ", ";
838 std::cerr << std::endl;
839 #endif
840 if (args[0] == "label")
841 {
842 if (args.size() != 2)
843 throw SEMANTIC_ERROR (_F("invalid bpf embeddedcode syntax (label expects 1 arg, found %llu)", (long long) args.size()-1), stmt.tok);
844 stmt.kind = args[0];
845 stmt.dest = args[1];
846 }
847 else if (args[0] == "alloc")
848 {
849 if (args.size() != 3)
850 throw SEMANTIC_ERROR (_F("invalid bpf embeddedcode syntax (alloc expects 2 args, found %llu)", (long long) args.size()-1), stmt.tok);
851 stmt.kind = args[0];
852 stmt.dest = args[1];
853 stmt.imm = parse_imm(stmt, args[2]);
854 }
855 else if (args[0] == "call")
856 {
857 if (args.size() < 3)
858 throw SEMANTIC_ERROR (_F("invalid bpf embeddedcode syntax (call expects at least 2 args, found %llu)", (long long) args.size()-1), stmt.tok);
859 stmt.kind = args[0];
860 stmt.dest = args[1];
861 assert(stmt.params.empty());
862 for (unsigned k = 2; k < args.size(); k++)
863 stmt.params.push_back(args[k]);
864 }
865 else if (is_numeric(args[0]))
866 {
867 if (args.size() != 5)
868 throw SEMANTIC_ERROR (_F("invalid bpf embeddedcode syntax (opcode expects 4 args, found %llu)", (long long) args.size()-1), stmt.tok);
869 stmt.kind = "opcode";
870 try {
871 stmt.code = stoul(args[0], 0, 0);
872 } catch (std::exception &e) { // XXX: invalid_argument, out_of_range
873 throw SEMANTIC_ERROR (_F("invalid bpf embeddedcode opcode '%s'",
874 args[0].c_str()), stmt.tok);
875 }
876 stmt.dest = args[1];
877 stmt.src1 = args[2];
878
879 stmt.has_jmp_target =
880 BPF_CLASS(stmt.code) == BPF_JMP
881 && BPF_OP(stmt.code) != BPF_EXIT
882 && BPF_OP(stmt.code) != BPF_CALL;
883 stmt.has_fallthrough = // only for jcond
884 stmt.has_jmp_target
885 && BPF_OP(stmt.code) != BPF_JA;
886 // XXX: stmt.fallthrough is computed by visit_embeddedcode
887
888 if (stmt.has_jmp_target)
889 {
890 stmt.off = 0;
891 stmt.jmp_target = args[3];
892 }
893 else
894 stmt.off = parse_imm(stmt, args[3]);
895
896 stmt.imm = parse_imm(stmt, args[4]);
897 }
898 else
899 throw SEMANTIC_ERROR (_F("unknown bpf embeddedcode operator '%s'",
900 args[0].c_str()), stmt.tok);
901
902 // we returned one statement, there may be more parsing to be done
903 return pos;
904 }
905
906 /* forward declaration */
907 std::string translate_escapes (const interned_string &str);
908
909 /* Convert a <reg> or <imm> operand to a value.
910 May emit code to store a string constant on the stack. */
911 value *
912 bpf_unparser::emit_asm_arg (const asm_stmt &stmt, const std::string &arg,
913 bool allow_imm, bool allow_emit)
914 {
915 if (arg == "$$")
916 {
917 /* arg is a return value */
918 if (func_return.empty())
919 throw SEMANTIC_ERROR (_("no return value outside function"), stmt.tok);
920 return func_return_val.back();
921 }
922 else if (arg[0] == '$')
923 {
924 /* assume arg is a variable */
925 std::string var = arg.substr(1);
926 for (auto i = this_locals->begin(); i != this_locals->end(); ++i)
927 {
928 vardecl *v = i->first;
929 if (var == v->unmangled_name)
930 return i->second;
931 }
932
933 /* if it's an unknown variable, allocate a temporary */
934 struct vardecl *vd = new vardecl;
935 vd->name = "__bpfasm__local_" + var;
936 vd->unmangled_name = var;
937 vd->type = pe_long;
938 vd->arity = 0;
939 value *reg = this_prog.new_reg();
940 const locals_map::value_type v (vd, reg);
941 auto ok = this_locals->insert (v);
942 assert (ok.second);
943 return reg;
944 }
945 else if (is_numeric(arg) && allow_imm)
946 {
947 /* arg is an immediate constant */
948 long imm = stol(arg, 0, 0);
949 return this_prog.new_imm(imm);
950 }
951 else if (is_numeric(arg) || arg[0] == 'r')
952 {
953 /* arg is a register number */
954 std::string reg = arg[0] == 'r' ? arg.substr(1) : arg;
955 unsigned long num;
956 bool parsed = false;
957 try {
958 num = stoul(reg, 0, 0);
959 parsed = true;
960 } catch (std::exception &e) {} // XXX: invalid_argument, out_of_range
961 if (!parsed || num > 10)
962 throw SEMANTIC_ERROR (_F("invalid bpf register '%s'",
963 arg.c_str()), stmt.tok);
964 return this_prog.lookup_reg(num);
965 }
966 else if (arg[0] == '"')
967 {
968 if (!allow_emit)
969 throw SEMANTIC_ERROR (_F("invalid bpf argument %s "
970 "(string literal not allowed here)",
971 arg.c_str()), stmt.tok);
972
973 /* arg is a string constant */
974 if (arg[arg.size() - 1] != '"')
975 throw SEMANTIC_ERROR (_F("BUG: improper string %s",
976 arg.c_str()), stmt.tok);
977 std::string escaped_str = arg.substr(1,arg.size()-2); /* strip quotes */
978 std::string str = translate_escapes(escaped_str);
979 return emit_literal_string(str, stmt.tok);
980 }
981 else if (arg == "BPF_MAXSTRINGLEN")
982 {
983 /* arg is BPF_MAXSTRINGLEN */
984 if (!allow_imm)
985 throw SEMANTIC_ERROR (_F("invalid bpf register '%s'",
986 arg.c_str()), stmt.tok);
987 return this_prog.new_imm(BPF_MAXSTRINGLEN);
988 }
989 else if (arg == "-")
990 {
991 /* arg is null a.k.a '0' */
992 if (!allow_imm)
993 throw SEMANTIC_ERROR (_F("invalid bpf register '%s'",
994 arg.c_str()), stmt.tok);
995 return this_prog.new_imm(0);
996 }
997 else if (allow_imm)
998 throw SEMANTIC_ERROR (_F("invalid bpf argument '%s'",
999 arg.c_str()), stmt.tok);
1000 else
1001 throw SEMANTIC_ERROR (_F("invalid bpf register '%s'",
1002 arg.c_str()), stmt.tok);
1003
1004 }
1005
1006 /* As above, but don't accept immediate values.
1007 Do accept string constants (since they're stored in a register). */
1008 value *
1009 bpf_unparser::emit_asm_reg (const asm_stmt &stmt, const std::string &reg)
1010 {
1011 return emit_asm_arg(stmt, reg, /*allow_imm=*/false);
1012 }
1013
1014 /* As above, but don't allow string constants or anything that emits code.
1015 Useful if the context requires an lvalue. */
1016 value *
1017 bpf_unparser::get_asm_reg (const asm_stmt &stmt, const std::string &reg)
1018 {
1019 return emit_asm_arg(stmt, reg, /*allow_imm=*/false, /*allow_emit=*/false);
1020 }
1021
1022 void
1023 bpf_unparser::emit_asm_opcode (const asm_stmt &stmt,
1024 std::map<std::string, block *> label_map)
1025 {
1026 if (stmt.code > 0xff && stmt.code != BPF_LD_MAP)
1027 throw SEMANTIC_ERROR (_("invalid bpf code"), stmt.tok);
1028
1029 bool r_dest = false, r_src0 = false, r_src1 = false, i_src1 = false;
1030 bool op_jmp = false, op_jcond = false; condition c;
1031 switch (BPF_CLASS (stmt.code))
1032 {
1033 case BPF_LDX:
1034 r_dest = r_src1 = true;
1035 break;
1036 case BPF_STX:
1037 r_src0 = r_src1 = true;
1038 break;
1039 case BPF_ST:
1040 r_src0 = i_src1 = true;
1041 break;
1042
1043 case BPF_ALU:
1044 case BPF_ALU64:
1045 r_dest = true;
1046 if (stmt.code & BPF_X)
1047 r_src1 = true;
1048 else
1049 i_src1 = true;
1050 switch (BPF_OP (stmt.code))
1051 {
1052 case BPF_NEG:
1053 case BPF_MOV:
1054 break;
1055 case BPF_END:
1056 /* X/K bit repurposed as LE/BE. */
1057 i_src1 = false, r_src1 = true;
1058 break;
1059 default:
1060 r_src0 = true;
1061 }
1062 break;
1063
1064 case BPF_JMP:
1065 switch (BPF_OP (stmt.code))
1066 {
1067 case BPF_EXIT:
1068 // no special treatment needed
1069 break;
1070 case BPF_CALL:
1071 i_src1 = true;
1072 break;
1073 case BPF_JA:
1074 op_jmp = true;
1075 break;
1076 default:
1077 // XXX: assume this is a jcond op
1078 op_jcond = true;
1079 r_src0 = true;
1080 if (stmt.code & BPF_X)
1081 r_src1 = true;
1082 else
1083 i_src1 = true;
1084 }
1085
1086 // compute jump condition c
1087 switch (BPF_OP (stmt.code))
1088 {
1089 case BPF_JEQ: c = EQ; break;
1090 case BPF_JNE: c = NE; break;
1091 case BPF_JGT: c = GTU; break;
1092 case BPF_JGE: c = GEU; break;
1093 case BPF_JLT: c = LTU; break;
1094 case BPF_JLE: c = LEU; break;
1095 case BPF_JSGT: c = GT; break;
1096 case BPF_JSGE: c = GE; break;
1097 case BPF_JSLT: c = LT; break;
1098 case BPF_JSLE: c = LE; break;
1099 case BPF_JSET: c = TEST; break;
1100 default:
1101 if (op_jcond)
1102 throw SEMANTIC_ERROR (_("invalid branch in bpf code"), stmt.tok);
1103 }
1104 break;
1105
1106 default:
1107 if (stmt.code == BPF_LD_MAP)
1108 r_dest = true, i_src1 = true;
1109 else
1110 throw SEMANTIC_ERROR (_F("unknown opcode '%d' in bpf code",
1111 stmt.code), stmt.tok);
1112 }
1113
1114 value *v_dest = NULL;
1115 if (r_dest || r_src0)
1116 v_dest = get_asm_reg(stmt, stmt.dest);
1117 else if (stmt.dest != "0" && stmt.dest != "-")
1118 throw SEMANTIC_ERROR (_F("invalid register field '%s' in bpf code",
1119 stmt.dest.c_str()), stmt.tok);
1120
1121 value *v_src1 = NULL;
1122 if (r_src1)
1123 v_src1 = emit_asm_reg(stmt, stmt.src1);
1124 else
1125 {
1126 if (stmt.src1 != "0" && stmt.src1 != "-")
1127 throw SEMANTIC_ERROR (_F("invalid register field '%s' in bpf code",
1128 stmt.src1.c_str()), stmt.tok);
1129 if (i_src1)
1130 v_src1 = this_prog.new_imm(stmt.imm);
1131 else if (stmt.imm != 0)
1132 throw SEMANTIC_ERROR (_("invalid immediate field in bpf code"), stmt.tok);
1133 }
1134
1135 if (stmt.off != (int16_t)stmt.off)
1136 throw SEMANTIC_ERROR (_F("offset field '%lld' out of range in bpf code", (long long) stmt.off), stmt.tok);
1137
1138 if (op_jmp)
1139 {
1140 block *target = label_map[stmt.jmp_target];
1141 this_prog.mk_jmp(this_ins, target);
1142 }
1143 else if (op_jcond)
1144 {
1145 if (label_map.count(stmt.jmp_target) == 0)
1146 throw SEMANTIC_ERROR(_F("undefined jump target '%s' in bpf code",
1147 stmt.jmp_target.c_str()), stmt.tok);
1148 if (label_map.count(stmt.fallthrough) == 0)
1149 throw SEMANTIC_ERROR(_F("BUG: undefined fallthrough target '%s'",
1150 stmt.fallthrough.c_str()), stmt.tok);
1151 block *target = label_map[stmt.jmp_target];
1152 block *fallthrough = label_map[stmt.fallthrough];
1153 this_prog.mk_jcond(this_ins, c, v_dest, v_src1, target, fallthrough);
1154 }
1155 else // regular opcode
1156 {
1157 insn *i = this_ins.new_insn();
1158 i->code = stmt.code;
1159 i->dest = (r_dest ? v_dest : NULL);
1160 i->src0 = (r_src0 ? v_dest : NULL);
1161 i->src1 = v_src1;
1162 i->off = stmt.off;
1163 }
1164 }
1165
1166 void
1167 bpf_unparser::visit_embeddedcode (embeddedcode *s)
1168 {
1169 #ifdef DEBUG_CODEGEN
1170 this_ins.notes.push("asm");
1171 #endif
1172 std::vector<asm_stmt> statements;
1173 asm_stmt stmt;
1174
1175 // track adjusted source location for each stmt
1176 adjusted_loc = s->tok->location;
1177 adjust_pos = 0;
1178
1179 size_t pos = 0;
1180 while ((pos = parse_asm_stmt(s, pos, stmt)) != std::string::npos)
1181 {
1182 statements.push_back(stmt);
1183 }
1184
1185 // build basic block table
1186 std::map<std::string, block *> label_map;
1187 block *entry_block = this_ins.b;
1188 label_map[";;entry"] = entry_block;
1189
1190 bool after_label = true;
1191 asm_stmt *after_jump = NULL;
1192 unsigned fallthrough_count = 0;
1193 for (std::vector<asm_stmt>::iterator it = statements.begin();
1194 it != statements.end(); it++)
1195 {
1196 stmt = *it;
1197
1198 if (after_jump != NULL && stmt.kind == "label")
1199 {
1200 after_jump->has_fallthrough = true;
1201 after_jump->fallthrough = stmt.dest;
1202 }
1203 else if (after_jump != NULL)
1204 {
1205 block *b = this_prog.new_block();
1206
1207 // generate unique label for fallthrough edge
1208 std::ostringstream oss;
1209 oss << "fallthrough;;" << fallthrough_count++;
1210 std::string fallthrough_label = oss.str();
1211 // XXX: semicolons prevent collision with programmer-defined labels
1212
1213 label_map[fallthrough_label] = b;
1214 set_block(b);
1215
1216 after_jump->has_fallthrough = true;
1217 after_jump->fallthrough = fallthrough_label;
1218 }
1219
1220 if (stmt.kind == "label" && after_label)
1221 {
1222 // avoid creating multiple blocks for consecutive labels
1223 label_map[stmt.dest] = this_ins.b;
1224 after_jump = NULL;
1225 }
1226 else if (stmt.kind == "label")
1227 {
1228 block *b = this_prog.new_block();
1229 label_map[stmt.dest] = b;
1230 set_block(b);
1231 after_label = true;
1232 after_jump = NULL;
1233 }
1234 else if (stmt.has_fallthrough)
1235 {
1236 after_label = false;
1237 after_jump = &*it; // be sure to refer to original, not copied stmt
1238 }
1239 else if (stmt.kind == "opcode" && BPF_CLASS(stmt.code) == BPF_JMP
1240 && BPF_OP(stmt.code) != BPF_CALL /* CALL stays in the same block */)
1241 {
1242 after_label = false;
1243 after_jump = &*it; // be sure to refer to original, not copied stmt
1244 }
1245 else
1246 {
1247 after_label = false;
1248 after_jump = NULL;
1249 }
1250 }
1251 if (after_jump != NULL) // ??? should just fall through to exit
1252 throw SEMANTIC_ERROR (_("BUG: bpf embeddedcode doesn't support "
1253 "fallthrough on final asm_stmt"), stmt.tok);
1254
1255 // emit statements
1256 bool jumped_already = false;
1257 set_block(entry_block);
1258 for (std::vector<asm_stmt>::iterator it = statements.begin();
1259 it != statements.end(); it++)
1260 {
1261 stmt = *it;
1262 #ifdef BPF_ASM_DEBUG
1263 std::cerr << "bpf_asm visit_embeddedcode: " << stmt << std::endl;
1264 #endif
1265 if (stmt.kind == "label")
1266 {
1267 if (!jumped_already)
1268 emit_jmp (label_map[stmt.dest]);
1269 set_block(label_map[stmt.dest]);
1270 }
1271 else if (stmt.kind == "alloc")
1272 {
1273 /* Reserve stack space and store its address in dest. */
1274 int ofs = -this_prog.max_tmp_space - stmt.imm;
1275 this_prog.use_tmp_space(-ofs);
1276 // ??? Consider using a storage allocator and this_prog.new_obj().
1277
1278 value *dest = get_asm_reg(stmt, stmt.dest);
1279 this_prog.mk_binary(this_ins, BPF_ADD, dest,
1280 this_prog.lookup_reg(BPF_REG_10) /*frame*/,
1281 this_prog.new_imm(ofs));
1282 }
1283 else if (stmt.kind == "call")
1284 {
1285 assert (!stmt.params.empty());
1286 std::string func_name = stmt.params[0];
1287 bpf_func_id hid = bpf_function_id(func_name);
1288 if (hid != __BPF_FUNC_MAX_ID)
1289 {
1290 // ??? For diagnostics: check if the number of arguments is correct.
1291 regno r = BPF_REG_1; unsigned nargs = 0;
1292 for (unsigned k = 1; k < stmt.params.size(); k++)
1293 {
1294 // ??? Could make params optional to avoid the MOVs,
1295 // ??? since the calling convention is well-known.
1296 value *from_reg = emit_asm_arg(stmt, stmt.params[k]);
1297 value *to_reg = this_prog.lookup_reg(r);
1298 this_prog.mk_mov(this_ins, to_reg, from_reg);
1299 nargs++; r++;
1300 }
1301 this_prog.mk_call(this_ins, hid, nargs);
1302 if (stmt.dest != "-")
1303 {
1304 value *dest = get_asm_reg(stmt, stmt.dest);
1305 this_prog.mk_mov(this_ins, dest,
1306 this_prog.lookup_reg(BPF_REG_0) /* returnval */);
1307 }
1308 // ??? For diagnostics: check other cases with stmt.dest.
1309 }
1310 else if (func_name == "printf" || func_name == "sprintf")
1311 {
1312 if (stmt.params.size() < 2)
1313 throw SEMANTIC_ERROR (_F("bpf embeddedcode '%s' expects format string, "
1314 "none provided", func_name.c_str()),
1315 stmt.tok);
1316 std::string format = stmt.params[1];
1317 if (format.size() < 2 || format[0] != '"'
1318 || format[format.size()-1] != '"')
1319 throw SEMANTIC_ERROR (_F("bpf embeddedcode '%s' expects format string, "
1320 "but first parameter is not a string literal",
1321 func_name.c_str()), stmt.tok);
1322 format = format.substr(1,format.size()-2); /* strip quotes */
1323 format = translate_escapes(format);
1324
1325 bool print_to_stream = (func_name == "printf");
1326 if (print_to_stream)
1327 print_format_add_tag(format);
1328
1329 size_t format_bytes = format.size() + 1;
1330 if (format_bytes > BPF_MAXFORMATLEN)
1331 throw SEMANTIC_ERROR(_("Format string for print too long"), stmt.tok);
1332
1333 std::vector<value *> args;
1334 for (unsigned k = 2; k < stmt.params.size(); k++)
1335 args.push_back(emit_asm_arg(stmt, stmt.params[k]));
1336 if (args.size() > 3)
1337 throw SEMANTIC_ERROR(_NF("additional argument to print",
1338 "too many arguments to print (%zu)",
1339 args.size(), args.size()), stmt.tok);
1340
1341 value *retval = emit_print_format(format, args, print_to_stream);
1342 if (retval != NULL && stmt.dest != "-")
1343 {
1344 value *dest = get_asm_reg(stmt, stmt.dest);
1345 this_prog.mk_mov(this_ins, dest, retval);
1346
1347 }
1348 // ??? For diagnostics: check other cases with retval and stmt.dest.
1349 }
1350 else
1351 {
1352 // TODO: Experimental code for supporting basic functioncalls.
1353 // Needs improvement and simplification to work with full generality.
1354 // But thus far, it is sufficient for calling exit().
1355 #if 1
1356 if (func_name != "exit")
1357 throw SEMANTIC_ERROR(_("BUG: bpf embeddedcode non-helper 'call' operation only supports printf(),sprintf(),exit() for now"), stmt.tok);
1358 #elif 1
1359 throw SEMANTIC_ERROR(_("BUG: bpf embeddedcode non-helper 'call' operation only supports printf(),sprintf() for now"), stmt.tok);
1360 #endif
1361 #if 1
1362 // ???: Passing systemtap_session through all the way to here
1363 // seems intrusive, but less intrusive than moving
1364 // embedded-code assembly to the translate_globals() pass.
1365 symresolution_info sym (*glob.session);
1366 functioncall *call = new functioncall;
1367 call->tok = stmt.tok;
1368 unsigned nargs = stmt.params.size() - 1;
1369 std::vector<functiondecl*> fds
1370 = sym.find_functions (call, func_name, nargs, stmt.tok);
1371 delete call;
1372
1373 if (fds.empty())
1374 // ??? Could call levenshtein_suggest() as in
1375 // symresolution_info::visit_functioncall().
1376 throw SEMANTIC_ERROR(_("bpf embeddedcode unresolved function call"), stmt.tok);
1377 if (fds.size() > 1)
1378 throw SEMANTIC_ERROR(_("bpf embeddedcode unhandled function overloading"), stmt.tok);
1379 functiondecl *f = fds[0];
1380 // TODO: Imitation of semantic_pass_symbols, does not
1381 // cover full generality of the lookup process.
1382 update_visitor_loop (*glob.session, glob.session->code_filters, f->body);
1383 sym.current_function = f; sym.current_probe = 0;
1384 f->body->visit (&sym);
1385
1386 // ??? For now, always inline the function call.
1387 for (auto i = func_calls.begin(); i != func_calls.end(); ++i)
1388 if (f == *i)
1389 throw SEMANTIC_ERROR (_("unhandled function recursion"), stmt.tok);
1390
1391 // Collect the function arguments.
1392 std::vector<value *> args;
1393 for (unsigned k = 1; k < stmt.params.size(); k++)
1394 args.push_back(emit_asm_arg(stmt, stmt.params[k]));
1395
1396 if (args.size () != f->formal_args.size())
1397 throw SEMANTIC_ERROR(_F("bpf embeddedcode call to function '%s' "
1398 "expected %zu arguments, got %zu",
1399 func_name.c_str(),
1400 f->formal_args.size(), args.size()),
1401 stmt.tok);
1402
1403 value *retval = emit_functioncall(f, args);
1404 if (stmt.dest != "-")
1405 {
1406 value *dest = get_asm_reg(stmt, stmt.dest);
1407 this_prog.mk_mov(this_ins, dest, retval);
1408 }
1409 // ??? For diagnostics: check other cases with retval and stmt.dest.
1410 #endif
1411 }
1412 }
1413 else if (stmt.kind == "opcode")
1414 {
1415 emit_asm_opcode (stmt, label_map);
1416 }
1417 else
1418 throw SEMANTIC_ERROR (_F("BUG: bpf embeddedcode contains unexpected "
1419 "asm_stmt kind '%s'", stmt.kind.c_str()),
1420 stmt.tok);
1421 if (stmt.has_fallthrough)
1422 {
1423 jumped_already = true;
1424 set_block(label_map[stmt.fallthrough]);
1425 }
1426 else
1427 jumped_already = false;
1428 }
1429
1430 // housekeeping -- deallocate adjusted_toks along with statements
1431 for (std::vector<token *>::iterator it = adjusted_toks.begin();
1432 it != adjusted_toks.end(); it++)
1433 delete *it;
1434 adjusted_toks.clear();
1435
1436 #ifdef DEBUG_CODEGEN
1437 this_ins.notes.pop(); // asm
1438 #endif
1439 }
1440
1441 void
1442 bpf_unparser::visit_null_statement (null_statement *)
1443 { }
1444
1445 void
1446 bpf_unparser::visit_expr_statement (expr_statement *s)
1447 {
1448 (void) emit_expr (s->value);
1449 }
1450
1451 void
1452 bpf_unparser::visit_if_statement (if_statement* s)
1453 {
1454 block *then_block = this_prog.new_block ();
1455 block *join_block = this_prog.new_block ();
1456
1457 if (s->elseblock)
1458 {
1459 block *else_block = this_prog.new_block ();
1460 emit_cond (s->condition, then_block, else_block);
1461
1462 set_block (then_block);
1463 emit_stmt (s->thenblock);
1464 if (in_block ())
1465 emit_jmp (join_block);
1466
1467 set_block (else_block);
1468 emit_stmt (s->elseblock);
1469 if (in_block ())
1470 emit_jmp (join_block);
1471 }
1472 else
1473 {
1474 emit_cond (s->condition, then_block, join_block);
1475
1476 set_block (then_block);
1477 emit_stmt (s->thenblock);
1478 if (in_block ())
1479 emit_jmp (join_block);
1480 }
1481 set_block (join_block);
1482 }
1483
1484 void
1485 bpf_unparser::visit_for_loop (for_loop* s)
1486 {
1487 block *body_block = this_prog.new_block ();
1488 block *iter_block = this_prog.new_block ();
1489 block *test_block = this_prog.new_block ();
1490 block *join_block = this_prog.new_block ();
1491
1492 emit_stmt (s->init);
1493 if (!in_block ())
1494 return;
1495 emit_jmp (test_block);
1496
1497 loop_break.push_back (join_block);
1498 loop_cont.push_back (iter_block);
1499
1500 set_block (body_block);
1501 emit_stmt (s->block);
1502 if (in_block ())
1503 emit_jmp (iter_block);
1504
1505 loop_cont.pop_back ();
1506 loop_break.pop_back ();
1507
1508 set_block (iter_block);
1509 emit_stmt (s->incr);
1510 if (in_block ())
1511 emit_jmp (test_block);
1512
1513 set_block (test_block);
1514 emit_cond (s->cond, body_block, join_block);
1515
1516 set_block (join_block);
1517 }
1518
1519 void
1520 bpf_unparser::visit_foreach_loop(foreach_loop* s)
1521 {
1522 if (s->indexes.size() != 1)
1523 throw SEMANTIC_ERROR(_("unhandled multi-dimensional array"), s->tok);
1524
1525 vardecl *keydecl = s->indexes[0]->referent;
1526 auto i = this_locals->find(keydecl);
1527 if (i == this_locals->end())
1528 throw SEMANTIC_ERROR(_("unknown index"), keydecl->tok);
1529
1530 symbol *a;
1531 if (! (a = dynamic_cast<symbol *>(s->base)))
1532 throw SEMANTIC_ERROR(_("unknown type"), s->base->tok);
1533 vardecl *arraydecl = a->referent;
1534
1535 auto g = glob.globals.find(arraydecl);
1536 if (g == glob.globals.end())
1537 throw SEMANTIC_ERROR(_("unknown array"), arraydecl->tok);
1538
1539 int map_id = g->second.first;
1540 value *limit = this_prog.new_reg();
1541 value *key = i->second;
1542 value *i0 = this_prog.new_imm(0);
1543 value *key_ofs = this_prog.new_imm(-8);
1544 value *newkey_ofs = this_prog.new_imm(-16);
1545 value *frame = this_prog.lookup_reg(BPF_REG_10);
1546 block *body_block = this_prog.new_block ();
1547 block *load_block = this_prog.new_block();
1548 block *iter_block = this_prog.new_block ();
1549 block *join_block = this_prog.new_block ();
1550
1551 // Track iteration limit.
1552 if (s->limit)
1553 this_prog.mk_mov(this_ins, limit, emit_expr(s->limit));
1554 else
1555 this_prog.mk_mov(this_ins, limit, this_prog.new_imm(-1));
1556
1557 // Get the first key.
1558 this_prog.load_map (this_ins, this_prog.lookup_reg(BPF_REG_1), map_id);
1559 this_prog.mk_mov (this_ins, this_prog.lookup_reg(BPF_REG_2), i0);
1560 this_prog.mk_binary (this_ins, BPF_ADD, this_prog.lookup_reg(BPF_REG_3),
1561 frame, newkey_ofs);
1562 this_prog.mk_mov (this_ins, this_prog.lookup_reg(BPF_REG_4),
1563 this_prog.new_imm(s->sort_direction));
1564 this_prog.mk_mov (this_ins, this_prog.lookup_reg(BPF_REG_5), limit);
1565 this_prog.mk_call (this_ins, BPF_FUNC_map_get_next_key, 5);
1566 this_prog.mk_jcond (this_ins, NE, this_prog.lookup_reg(BPF_REG_0), i0,
1567 join_block, load_block);
1568
1569 this_prog.use_tmp_space(16);
1570
1571 emit_jmp(load_block);
1572
1573 // Do loop body
1574 loop_break.push_back (join_block);
1575 loop_cont.push_back (iter_block);
1576
1577 set_block(body_block);
1578 emit_stmt(s->block);
1579 if (in_block ())
1580 emit_jmp(iter_block);
1581
1582 loop_cont.pop_back ();
1583 loop_break.pop_back ();
1584
1585 // Call map_get_next_key, exit loop if it doesn't return 0
1586 set_block(iter_block);
1587
1588 this_prog.load_map (this_ins, this_prog.lookup_reg(BPF_REG_1), map_id);
1589 this_prog.mk_st (this_ins, BPF_DW, frame, -8, key);
1590 this_prog.mk_binary (this_ins, BPF_ADD, this_prog.lookup_reg(BPF_REG_2),
1591 frame, key_ofs);
1592 this_prog.mk_binary (this_ins, BPF_ADD, this_prog.lookup_reg(BPF_REG_3),
1593 frame, newkey_ofs);
1594 this_prog.mk_mov (this_ins, this_prog.lookup_reg(BPF_REG_4),
1595 this_prog.new_imm(s->sort_direction));
1596 this_prog.mk_mov (this_ins, this_prog.lookup_reg(BPF_REG_5), limit);
1597 this_prog.mk_call (this_ins, BPF_FUNC_map_get_next_key, 5);
1598 this_prog.mk_jcond (this_ins, NE, this_prog.lookup_reg(BPF_REG_0), i0,
1599 join_block, load_block);
1600
1601 // Load next key, decrement limit if applicable
1602 set_block(load_block);
1603 this_prog.mk_ld (this_ins, BPF_DW, key, frame, -16);
1604
1605 if (s->limit)
1606 this_prog.mk_binary (this_ins, BPF_ADD, limit, limit, this_prog.new_imm(-1));
1607
1608 emit_jmp(body_block);
1609 set_block(join_block);
1610 }
1611
1612
1613 void
1614 bpf_unparser::visit_break_statement (break_statement* s)
1615 {
1616 if (loop_break.empty ())
1617 throw SEMANTIC_ERROR (_("cannot 'break' outside loop"), s->tok);
1618 emit_jmp (loop_break.back ());
1619 }
1620
1621 void
1622 bpf_unparser:: visit_continue_statement (continue_statement* s)
1623 {
1624 if (loop_cont.empty ())
1625 throw SEMANTIC_ERROR (_("cannot 'continue' outside loop"), s->tok);
1626 emit_jmp (loop_cont.back ());
1627 }
1628
1629 void
1630 bpf_unparser::visit_return_statement (return_statement* s)
1631 {
1632 if (func_return.empty ())
1633 throw SEMANTIC_ERROR (_("cannot 'return' outside function"), s->tok);
1634 assert (!func_return_val.empty ());
1635 if (s->value)
1636 emit_mov (func_return_val.back (), emit_expr (s->value));
1637 emit_jmp (func_return.back ());
1638 }
1639
1640 void
1641 bpf_unparser::visit_delete_statement (delete_statement *s)
1642 {
1643 expression *e = s->value;
1644 if (symbol *s = dynamic_cast<symbol *>(e))
1645 {
1646 vardecl *var = s->referent;
1647 if (var->arity != 0)
1648 throw SEMANTIC_ERROR (_("unimplemented delete of array"), s->tok);
1649
1650 auto g = glob.globals.find (var);
1651 if (g != glob.globals.end())
1652 {
1653 value *frame = this_prog.lookup_reg(BPF_REG_10);
1654 int key_ofs, val_ofs;
1655
1656 switch (var->type)
1657 {
1658 case pe_long:
1659 val_ofs = -8;
1660 this_prog.mk_st(this_ins, BPF_DW, frame, val_ofs,
1661 this_prog.new_imm(0));
1662 this_prog.mk_binary(this_ins, BPF_ADD,
1663 this_prog.lookup_reg(BPF_REG_3),
1664 frame, this_prog.new_imm(val_ofs));
1665 break;
1666 // ??? pe_string -> (2) TODO delete ref (but leave the storage for later cleanup of the entire containing struct?)
1667 default:
1668 goto err;
1669 }
1670
1671 key_ofs = val_ofs - 4;
1672 this_prog.mk_st(this_ins, BPF_W, frame, key_ofs,
1673 this_prog.new_imm(g->second.second));
1674 this_prog.use_tmp_space(-key_ofs);
1675
1676 this_prog.load_map(this_ins, this_prog.lookup_reg(BPF_REG_1),
1677 g->second.first);
1678 this_prog.mk_binary(this_ins, BPF_ADD,
1679 this_prog.lookup_reg(BPF_REG_2),
1680 frame, this_prog.new_imm(key_ofs));
1681 emit_mov(this_prog.lookup_reg(BPF_REG_4), this_prog.new_imm(0));
1682 this_prog.mk_call(this_ins, BPF_FUNC_map_update_elem, 4);
1683 return;
1684 }
1685
1686 auto i = this_locals->find (var);
1687 if (i != this_locals->end ())
1688 {
1689 emit_mov (i->second, this_prog.new_imm(0));
1690 return;
1691 }
1692 }
1693 else if (arrayindex *a = dynamic_cast<arrayindex *>(e))
1694 {
1695 if (symbol *a_sym = dynamic_cast<symbol *>(a->base))
1696 {
1697 vardecl *v = a_sym->referent;
1698 int key_ofs;
1699
1700 if (v->arity != 1)
1701 throw SEMANTIC_ERROR(_("unhandled multi-dimensional array"), v->tok);
1702
1703 auto g = glob.globals.find(v);
1704 if (g == glob.globals.end())
1705 throw SEMANTIC_ERROR(_("unknown array variable"), v->tok);
1706
1707 value *idx = emit_expr(a->indexes[0]);
1708 switch (v->index_types[0])
1709 {
1710 case pe_long:
1711 // Store the long on the stack and pass its address:
1712 key_ofs = -8;
1713 emit_long_arg(this_prog.lookup_reg(BPF_REG_2), key_ofs, idx);
1714 break;
1715 case pe_string:
1716 // Zero-pad and copy the string to the stack and pass its address:
1717 key_ofs = -BPF_MAXSTRINGLEN;
1718 emit_str_arg(this_prog.lookup_reg(BPF_REG_2), key_ofs, idx);
1719 break;
1720 default:
1721 throw SEMANTIC_ERROR(_("unhandled index type"), e->tok);
1722 }
1723
1724 this_prog.use_tmp_space(-key_ofs);
1725 this_prog.load_map(this_ins, this_prog.lookup_reg(BPF_REG_1),
1726 g->second.first);
1727 this_prog.mk_call(this_ins, BPF_FUNC_map_delete_elem, 2);
1728 return;
1729 }
1730 }
1731 err:
1732 throw SEMANTIC_ERROR (_("unknown lvalue"), e->tok);
1733 }
1734
1735 // Translate string escape characters.
1736 // Accepts strings produced by parse.cxx lexer::scan and
1737 // by the eBPF embedded-code assembler.
1738 //
1739 // PR23559: This is currently an eBPF-only version of the function
1740 // that does not translate octal escapes.
1741 std::string
1742 translate_escapes (const interned_string &str)
1743 {
1744 std::string result;
1745 bool saw_esc = false;
1746 for (interned_string::const_iterator j = str.begin();
1747 j != str.end(); ++j)
1748 {
1749 if (saw_esc)
1750 {
1751 saw_esc = false;
1752 switch (*j)
1753 {
1754 case 'f': result += '\f'; break;
1755 case 'n': result += '\n'; break;
1756 case 'r': result += '\r'; break;
1757 case 't': result += '\t'; break;
1758 case 'v': result += '\v'; break;
1759 default: result += *j; break;
1760 }
1761 }
1762 else if (*j == '\\')
1763 saw_esc = true;
1764 else
1765 result += *j;
1766 }
1767 return result;
1768 }
1769
1770 value *
1771 bpf_unparser::emit_literal_string (const std::string &str, const token *tok)
1772 {
1773 size_t str_bytes = str.size() + 1;
1774 if (str_bytes > BPF_MAXSTRINGLEN)
1775 throw SEMANTIC_ERROR(_("string literal too long"), tok);
1776 return this_prog.new_str(str); // will be lowered to a pointer by bpf-opt.cxx
1777 }
1778
1779 void
1780 bpf_unparser::visit_literal_string (literal_string* e)
1781 {
1782 interned_string v = e->value;
1783 std::string str = translate_escapes(v);
1784 result = emit_literal_string(str, e->tok);
1785 }
1786
1787 void
1788 bpf_unparser::visit_literal_number (literal_number* e)
1789 {
1790 result = this_prog.new_imm(e->value);
1791 }
1792
1793 void
1794 bpf_unparser::visit_binary_expression (binary_expression* e)
1795 {
1796 int code;
1797 if (e->op == "+")
1798 code = BPF_ADD;
1799 else if (e->op == "-")
1800 code = BPF_SUB;
1801 else if (e->op == "*")
1802 code = BPF_MUL;
1803 else if (e->op == "&")
1804 code = BPF_AND;
1805 else if (e->op == "|")
1806 code = BPF_OR;
1807 else if (e->op == "^")
1808 code = BPF_XOR;
1809 else if (e->op == "<<")
1810 code = BPF_LSH;
1811 else if (e->op == ">>")
1812 code = BPF_ARSH;
1813 else if (e->op == ">>>")
1814 code = BPF_RSH;
1815 else if (e->op == "/")
1816 code = BPF_DIV;
1817 else if (e->op == "%")
1818 code = BPF_MOD;
1819 else
1820 throw SEMANTIC_ERROR (_("unhandled binary operator"), e->tok);
1821
1822 value *s0 = this_prog.new_reg();
1823 // copy e->left into a seperate reg in case evaluating e->right
1824 // causes e->left to mutate (ex. x + x++).
1825 this_prog.mk_mov(this_ins, s0, emit_expr (e->left));
1826
1827 value *s1 = emit_expr (e->right);
1828 value *d = this_prog.new_reg ();
1829 this_prog.mk_binary (this_ins, code, d, s0, s1);
1830 result = d;
1831 }
1832
1833 void
1834 bpf_unparser::visit_unary_expression (unary_expression* e)
1835 {
1836 if (e->op == "-")
1837 {
1838 // Note that negative literals appear in the script langauge as
1839 // unary negations over positive literals.
1840 if (literal_number *lit = dynamic_cast<literal_number *>(e))
1841 result = this_prog.new_imm(-(uint64_t)lit->value);
1842 else
1843 {
1844 value *s = emit_expr (e->operand);
1845 value *d = this_prog.new_reg();
1846 this_prog.mk_unary (this_ins, BPF_NEG, d, s);
1847 result = d;
1848 }
1849 }
1850 else if (e->op == "~")
1851 {
1852 value *s1 = this_prog.new_imm(-1);
1853 value *s0 = emit_expr (e->operand);
1854 value *d = this_prog.new_reg ();
1855 this_prog.mk_binary (this_ins, BPF_XOR, d, s0, s1);
1856 result = d;
1857 }
1858 else if (e->op == "!")
1859 result = emit_bool (e);
1860 else if (e->op == "+")
1861 result = emit_expr (e->operand);
1862 else
1863 throw SEMANTIC_ERROR (_("unhandled unary operator"), e->tok);
1864 }
1865
1866 void
1867 bpf_unparser::visit_pre_crement (pre_crement* e)
1868 {
1869 int dir;
1870 if (e->op == "++")
1871 dir = 1;
1872 else if (e->op == "--")
1873 dir = -1;
1874 else
1875 throw SEMANTIC_ERROR (_("unhandled crement operator"), e->tok);
1876
1877 value *c = this_prog.new_imm(dir);
1878 value *v = emit_expr (e->operand);
1879 this_prog.mk_binary (this_ins, BPF_ADD, v, v, c);
1880 emit_store (e->operand, v);
1881 result = v;
1882 }
1883
1884 void
1885 bpf_unparser::visit_post_crement (post_crement* e)
1886 {
1887 int dir;
1888 if (e->op == "++")
1889 dir = 1;
1890 else if (e->op == "--")
1891 dir = -1;
1892 else
1893 throw SEMANTIC_ERROR (_("unhandled crement operator"), e->tok);
1894
1895 value *c = this_prog.new_imm(dir);
1896 value *r = this_prog.new_reg ();
1897 value *v = emit_expr (e->operand);
1898
1899 emit_mov (r, v);
1900 this_prog.mk_binary (this_ins, BPF_ADD, v, v, c);
1901 emit_store (e->operand, v);
1902 result = r;
1903 }
1904
1905 void
1906 bpf_unparser::visit_logical_or_expr (logical_or_expr* e)
1907 {
1908 result = emit_bool (e);
1909 }
1910
1911 void
1912 bpf_unparser::visit_logical_and_expr (logical_and_expr* e)
1913 {
1914 result = emit_bool (e);
1915 }
1916
1917 // ??? This matches the code in translate.cxx, but it looks like the
1918 // functionality has been disabled in the SystemTap parser.
1919 void
1920 bpf_unparser::visit_compound_expression (compound_expression* e)
1921 {
1922 e->left->visit(this);
1923 e->right->visit(this); // overwrite result of first expression
1924 }
1925
1926 void
1927 bpf_unparser::visit_comparison (comparison* e)
1928 {
1929 result = emit_bool (e);
1930 }
1931
1932 void
1933 bpf_unparser::visit_ternary_expression (ternary_expression* e)
1934 {
1935 block *join_block = this_prog.new_block ();
1936 value *r = this_prog.new_reg ();
1937
1938 if (!has_side_effects (e->truevalue))
1939 {
1940 block *else_block = this_prog.new_block ();
1941
1942 emit_mov (r, emit_expr (e->truevalue));
1943 emit_cond (e->cond, join_block, else_block);
1944
1945 set_block (else_block);
1946 emit_mov (r, emit_expr (e->falsevalue));
1947 emit_jmp (join_block);
1948 }
1949 else if (!has_side_effects (e->falsevalue))
1950 {
1951 block *then_block = this_prog.new_block ();
1952
1953 emit_mov (r, emit_expr (e->falsevalue));
1954 emit_cond (e->cond, join_block, then_block);
1955
1956 set_block (then_block);
1957 emit_mov (r, emit_expr (e->truevalue));
1958 emit_jmp (join_block);
1959 }
1960 else
1961 {
1962 block *then_block = this_prog.new_block ();
1963 block *else_block = this_prog.new_block ();
1964 emit_cond (e->cond, then_block, else_block);
1965
1966 set_block (then_block);
1967 emit_mov (r, emit_expr (e->truevalue));
1968 emit_jmp (join_block);
1969
1970 set_block (else_block);
1971 emit_mov (r, emit_expr (e->falsevalue));
1972 emit_jmp (join_block);
1973 }
1974
1975 set_block (join_block);
1976 result = r;
1977 }
1978
1979 void
1980 bpf_unparser::visit_assignment (assignment* e)
1981 {
1982 value *r = emit_expr (e->right);
1983
1984 if (e->op != "=")
1985 {
1986 int code;
1987 if (e->op == "+=")
1988 code = BPF_ADD;
1989 else if (e->op == "-=")
1990 code = BPF_SUB;
1991 else if (e->op == "*=")
1992 code = BPF_MUL;
1993 else if (e->op == "/=")
1994 code = BPF_DIV;
1995 else if (e->op == "%=")
1996 code = BPF_MOD;
1997 else if (e->op == "<<=")
1998 code = BPF_LSH;
1999 else if (e->op == ">>=")
2000 code = BPF_ARSH;
2001 else if (e->op == "&=")
2002 code = BPF_AND;
2003 else if (e->op == "^=")
2004 code = BPF_XOR;
2005 else if (e->op == "|=")
2006 code = BPF_OR;
2007 else
2008 throw SEMANTIC_ERROR (_("unhandled assignment operator"), e->tok);
2009
2010 value *l = emit_expr (e->left);
2011 this_prog.mk_binary (this_ins, code, l, l, r);
2012 r = l;
2013 }
2014
2015 emit_store (e->left, r);
2016 result = r;
2017 }
2018
2019 value *
2020 bpf_unparser::emit_context_var(bpf_context_vardecl *v)
2021 {
2022 // similar to visit_target_deref but the size/offset info
2023 // is given in v->size/v->offset instead of an expression.
2024 value *d = this_prog.new_reg();
2025
2026 if (v->size > 8)
2027 {
2028 // Compute a pointer but do not dereference. Needed
2029 // for array context variables.
2030 this_prog.mk_binary (this_ins, BPF_ADD, d, this_in_arg0,
2031 this_prog.new_imm(v->offset));
2032
2033 return d;
2034 }
2035
2036 value *frame = this_prog.lookup_reg(BPF_REG_10);
2037
2038 this_prog.mk_binary (this_ins, BPF_ADD, this_prog.lookup_reg(BPF_REG_3),
2039 this_in_arg0, this_prog.new_imm(v->offset));
2040 this_prog.mk_mov (this_ins, this_prog.lookup_reg(BPF_REG_2),
2041 this_prog.new_imm(v->size));
2042 this_prog.mk_binary (this_ins, BPF_ADD, this_prog.lookup_reg(BPF_REG_1),
2043 frame, this_prog.new_imm(-v->size));
2044 this_prog.use_tmp_space (v->size);
2045
2046 this_prog.mk_call (this_ins, BPF_FUNC_probe_read, 3);
2047
2048 int opc;
2049 switch (v->size)
2050 {
2051 case 1: opc = BPF_B; break;
2052 case 2: opc = BPF_H; break;
2053 case 4: opc = BPF_W; break;
2054 case 8: opc = BPF_DW; break;
2055
2056 default: assert(0);
2057 }
2058
2059 this_prog.mk_ld (this_ins, opc, d, frame, -v->size);
2060
2061 if (v->is_signed && v->size < 8)
2062 {
2063 value *sh = this_prog.new_imm ((8 - v->size) * 8);
2064 this_prog.mk_binary (this_ins, BPF_LSH, d, d, sh);
2065 this_prog.mk_binary (this_ins, BPF_ARSH, d, d, sh);
2066 }
2067
2068 return d;
2069 }
2070
2071 void
2072 bpf_unparser::visit_symbol (symbol *s)
2073 {
2074 vardecl *v = s->referent;
2075 assert (v->arity < 1);
2076
2077 if (bpf_context_vardecl *c = dynamic_cast<bpf_context_vardecl*>(v))
2078 {
2079 result = emit_context_var(c);
2080 return;
2081 }
2082
2083 auto g = glob.globals.find (v);
2084 if (g != glob.globals.end())
2085 {
2086 value *frame = this_prog.lookup_reg(BPF_REG_10);
2087 this_prog.mk_st(this_ins, BPF_W, frame, -4,
2088 this_prog.new_imm(g->second.second));
2089 this_prog.use_tmp_space(4);
2090
2091 this_prog.load_map(this_ins, this_prog.lookup_reg(BPF_REG_1),
2092 g->second.first);
2093 this_prog.mk_binary(this_ins, BPF_ADD, this_prog.lookup_reg(BPF_REG_2),
2094 frame, this_prog.new_imm(-4));
2095 this_prog.mk_call(this_ins, BPF_FUNC_map_lookup_elem, 2);
2096
2097 value *r0 = this_prog.lookup_reg(BPF_REG_0);
2098 value *i0 = this_prog.new_imm(0);
2099 block *cont_block = this_prog.new_block();
2100 block *exit_block = get_exit_block();
2101
2102 // Note that the kernel bpf verifier requires that we check that
2103 // the pointer is non-null.
2104 this_prog.mk_jcond(this_ins, EQ, r0, i0, exit_block, cont_block);
2105
2106 set_block(cont_block);
2107
2108 result = this_prog.new_reg();
2109 switch (v->type)
2110 {
2111 case pe_long:
2112 this_prog.mk_ld(this_ins, BPF_DW, result, r0, 0);
2113 break;
2114 case pe_string:
2115 // Just return the address of the string within the map:
2116 emit_mov(result, r0);
2117 break;
2118 default:
2119 throw SEMANTIC_ERROR (_("unhandled global variable type"), s->tok);
2120 }
2121 return;
2122 }
2123
2124 // ??? Maybe use result = this_locals.at (v);
2125 // to throw std::out_of_range on lookup failure.
2126 auto l = this_locals->find (v);
2127 if (l != this_locals->end())
2128 {
2129 result = (*l).second;
2130 return;
2131 }
2132 throw SEMANTIC_ERROR (_("unknown variable"), s->tok);
2133 }
2134
2135 void
2136 bpf_unparser::visit_arrayindex(arrayindex *e)
2137 {
2138 if (symbol *sym = dynamic_cast<symbol *>(e->base))
2139 {
2140 vardecl *v = sym->referent;
2141
2142 if (v->arity != 1)
2143 throw SEMANTIC_ERROR(_("unhandled multi-dimensional array"), v->tok);
2144
2145 auto g = glob.globals.find(v);
2146 if (g == glob.globals.end())
2147 throw SEMANTIC_ERROR(_("unknown array variable"), v->tok);
2148
2149 value *idx = emit_expr(e->indexes[0]);
2150 switch (v->index_types[0])
2151 {
2152 case pe_long:
2153 // Store the long on the stack and pass its address:
2154 emit_long_arg(this_prog.lookup_reg(BPF_REG_2), -8, idx);
2155 this_prog.use_tmp_space(8);
2156 break;
2157 case pe_string:
2158 // Zero-pad and copy the string to the stack and pass its address:
2159 emit_str_arg(this_prog.lookup_reg(BPF_REG_2), -BPF_MAXSTRINGLEN, idx);
2160 this_prog.use_tmp_space(BPF_MAXSTRINGLEN);
2161 break;
2162 default:
2163 throw SEMANTIC_ERROR(_("unhandled index type"), e->tok);
2164 }
2165
2166 this_prog.load_map(this_ins, this_prog.lookup_reg(BPF_REG_1),
2167 g->second.first);
2168
2169 value *r0 = this_prog.lookup_reg(BPF_REG_0);
2170 value *i0 = this_prog.new_imm(0);
2171 block *t_block = this_prog.new_block();
2172 block *f_block = this_prog.new_block();
2173 block *join_block = this_prog.new_block();
2174 result = this_prog.new_reg();
2175
2176 this_prog.mk_call(this_ins, BPF_FUNC_map_lookup_elem, 2);
2177 this_prog.mk_jcond(this_ins, EQ, r0, i0, t_block, f_block);
2178
2179 // Key is not in the array. Evaluate to 0.
2180 set_block(t_block);
2181 emit_mov(result, i0);
2182 emit_jmp(join_block);
2183
2184 // Key is in the array. Get value from stack.
2185 set_block(f_block);
2186 if (v->type == pe_long)
2187 this_prog.mk_ld(this_ins, BPF_DW, result, r0, 0);
2188 else
2189 emit_mov(result, r0);
2190
2191 emit_jmp(join_block);
2192 set_block(join_block);
2193 }
2194 else
2195 throw SEMANTIC_ERROR(_("unhandled arrayindex expression"), e->tok);
2196 }
2197
2198 void
2199 bpf_unparser::visit_array_in(array_in* e)
2200 {
2201 arrayindex *a = e->operand;
2202
2203 if (symbol *s = dynamic_cast<symbol *>(a->base))
2204 {
2205 vardecl *v = s->referent;
2206
2207 if (v->arity != 1)
2208 throw SEMANTIC_ERROR(_("unhandled multi-dimensional array"), v->tok);
2209
2210 auto g = glob.globals.find (v);
2211
2212 if (g == glob.globals.end())
2213 throw SEMANTIC_ERROR(_("unknown variable"), v->tok);
2214
2215 value *idx = emit_expr(a->indexes[0]);
2216
2217 switch(v->index_types[0])
2218 {
2219 case pe_long:
2220 // Store the long on the stack and pass its address:
2221 emit_long_arg(this_prog.lookup_reg(BPF_REG_2), -8, idx);
2222 this_prog.use_tmp_space(8);
2223 break;
2224 case pe_string:
2225 // Zero-pad and copy the string to the stack and pass its address:
2226 emit_str_arg(this_prog.lookup_reg(BPF_REG_2), -BPF_MAXSTRINGLEN, idx);
2227 this_prog.use_tmp_space(BPF_MAXSTRINGLEN);
2228 break;
2229 default:
2230 throw SEMANTIC_ERROR(_("unhandled index type"), e->tok);
2231 }
2232
2233 this_prog.load_map(this_ins, this_prog.lookup_reg(BPF_REG_1),
2234 g->second.first);
2235 this_prog.mk_call(this_ins, BPF_FUNC_map_lookup_elem, 2);
2236
2237 value *r0 = this_prog.lookup_reg(BPF_REG_0);
2238 value *i0 = this_prog.new_imm(0);
2239 value *i1 = this_prog.new_imm(1);
2240 value *d = this_prog.new_reg();
2241
2242 block *b0 = this_prog.new_block();
2243 block *b1 = this_prog.new_block();
2244 block *cont_block = this_prog.new_block();
2245
2246 this_prog.mk_jcond(this_ins, EQ, r0, i0, b0, b1);
2247
2248 // d = 0
2249 set_block(b0);
2250 this_prog.mk_mov(this_ins, d, i0);
2251 b0->fallthru = new edge(b0, cont_block);
2252
2253 // d = 1
2254 set_block(b1);
2255 this_prog.mk_mov(this_ins, d, i1);
2256 b1->fallthru = new edge(b1, cont_block);
2257
2258 set_block(cont_block);
2259 result = d;
2260
2261 return;
2262 }
2263 /// ??? hist_op
2264
2265 throw SEMANTIC_ERROR(_("unhandled operand type"), a->base->tok);
2266 }
2267
2268 void
2269 bpf_unparser::visit_target_deref (target_deref* e)
2270 {
2271 // ??? For some hosts, including x86_64, it works to read userspace
2272 // and kernelspace with the same function. For others, like s390x,
2273 // this only works to read kernelspace.
2274
2275 value *src = emit_expr (e->addr);
2276 value *frame = this_prog.lookup_reg (BPF_REG_10);
2277
2278 this_prog.mk_mov (this_ins, this_prog.lookup_reg(BPF_REG_3), src);
2279 this_prog.mk_mov (this_ins, this_prog.lookup_reg(BPF_REG_2),
2280 this_prog.new_imm (e->size));
2281 this_prog.mk_binary (this_ins, BPF_ADD, this_prog.lookup_reg(BPF_REG_1),
2282 frame, this_prog.new_imm (-(int64_t)e->size));
2283 this_prog.use_tmp_space(e->size);
2284
2285 this_prog.mk_call(this_ins, BPF_FUNC_probe_read, 3);
2286
2287 value *d = this_prog.new_reg ();
2288 int opc;
2289 switch (e->size)
2290 {
2291 case 1: opc = BPF_B; break;
2292 case 2: opc = BPF_H; break;
2293 case 4: opc = BPF_W; break;
2294 case 8: opc = BPF_DW; break;
2295 default:
2296 throw SEMANTIC_ERROR(_("unhandled deref size"), e->tok);
2297 }
2298 this_prog.mk_ld (this_ins, opc, d, frame, -e->size);
2299
2300 if (e->signed_p && e->size < 8)
2301 {
2302 value *sh = this_prog.new_imm ((8 - e->size) * 8);
2303 this_prog.mk_binary (this_ins, BPF_LSH, d, d, sh);
2304 this_prog.mk_binary (this_ins, BPF_ARSH, d, d, sh);
2305 }
2306 result = d;
2307 }
2308
2309 void
2310 bpf_unparser::visit_target_register (target_register* e)
2311 {
2312 // ??? Should not hard-code register size.
2313 int size = sizeof(void *);
2314 // ??? Should not hard-code register offsets in pr_regs.
2315 int ofs = 0;
2316 switch (e->regno)
2317 {
2318 #if defined(__i386__)
2319 case 0: ofs = offsetof(pt_regs, eax); break;
2320 case 1: ofs = offsetof(pt_regs, ecx); break;
2321 case 2: ofs = offsetof(pt_regs, edx); break;
2322 case 3: ofs = offsetof(pt_regs, ebx); break;
2323 case 4: ofs = offsetof(pt_regs, esp); break;
2324 case 5: ofs = offsetof(pt_regs, ebp); break;
2325 case 6: ofs = offsetof(pt_regs, esi); break;
2326 case 7: ofs = offsetof(pt_regs, edi); break;
2327 case 8: ofs = offsetof(pt_regs, eip); break;
2328 #elif defined(__x86_64__)
2329 case 0: ofs = offsetof(pt_regs, rax); break;
2330 case 1: ofs = offsetof(pt_regs, rdx); break;
2331 case 2: ofs = offsetof(pt_regs, rcx); break;
2332 case 3: ofs = offsetof(pt_regs, rbx); break;
2333 case 4: ofs = offsetof(pt_regs, rsi); break;
2334 case 5: ofs = offsetof(pt_regs, rdi); break;
2335 case 6: ofs = offsetof(pt_regs, rbp); break;
2336 case 7: ofs = offsetof(pt_regs, rsp); break;
2337 case 8: ofs = offsetof(pt_regs, r8); break;
2338 case 9: ofs = offsetof(pt_regs, r9); break;
2339 case 10: ofs = offsetof(pt_regs, r10); break;
2340 case 11: ofs = offsetof(pt_regs, r11); break;
2341 case 12: ofs = offsetof(pt_regs, r12); break;
2342 case 13: ofs = offsetof(pt_regs, r13); break;
2343 case 14: ofs = offsetof(pt_regs, r14); break;
2344 case 15: ofs = offsetof(pt_regs, r15); break;
2345 case 16: ofs = offsetof(pt_regs, rip); break;
2346 #elif defined(__arm__)
2347 case 0: ofs = offsetof(pt_regs, uregs[0]); break;
2348 case 1: ofs = offsetof(pt_regs, uregs[1]); break;
2349 case 2: ofs = offsetof(pt_regs, uregs[2]); break;
2350 case 3: ofs = offsetof(pt_regs, uregs[3]); break;
2351 case 4: ofs = offsetof(pt_regs, uregs[4]); break;
2352 case 5: ofs = offsetof(pt_regs, uregs[5]); break;
2353 case 6: ofs = offsetof(pt_regs, uregs[6]); break;
2354 case 7: ofs = offsetof(pt_regs, uregs[7]); break;
2355 case 8: ofs = offsetof(pt_regs, uregs[8]); break;
2356 case 9: ofs = offsetof(pt_regs, uregs[9]); break;
2357 case 10: ofs = offsetof(pt_regs, uregs[10]); break;
2358 case 11: ofs = offsetof(pt_regs, uregs[11]); break;
2359 case 12: ofs = offsetof(pt_regs, uregs[12]); break;
2360 case 13: ofs = offsetof(pt_regs, uregs[13]); break;
2361 case 14: ofs = offsetof(pt_regs, uregs[14]); break;
2362 case 15: ofs = offsetof(pt_regs, uregs[15]); break;
2363 #elif defined(__aarch64__)
2364 case 0: ofs = offsetof(user_pt_regs, regs[0]); break;
2365 case 1: ofs = offsetof(user_pt_regs, regs[1]); break;
2366 case 2: ofs = offsetof(user_pt_regs, regs[2]); break;
2367 case 3: ofs = offsetof(user_pt_regs, regs[3]); break;
2368 case 4: ofs = offsetof(user_pt_regs, regs[4]); break;
2369 case 5: ofs = offsetof(user_pt_regs, regs[5]); break;
2370 case 6: ofs = offsetof(user_pt_regs, regs[6]); break;
2371 case 7: ofs = offsetof(user_pt_regs, regs[7]); break;
2372 case 8: ofs = offsetof(user_pt_regs, regs[8]); break;
2373 case 9: ofs = offsetof(user_pt_regs, regs[9]); break;
2374 case 10: ofs = offsetof(user_pt_regs, regs[10]); break;
2375 case 11: ofs = offsetof(user_pt_regs, regs[11]); break;
2376 case 12: ofs = offsetof(user_pt_regs, regs[12]); break;
2377 case 13: ofs = offsetof(user_pt_regs, regs[13]); break;
2378 case 14: ofs = offsetof(user_pt_regs, regs[14]); break;
2379 case 15: ofs = offsetof(user_pt_regs, regs[15]); break;
2380 case 16: ofs = offsetof(user_pt_regs, regs[16]); break;
2381 case 17: ofs = offsetof(user_pt_regs, regs[17]); break;
2382 case 18: ofs = offsetof(user_pt_regs, regs[18]); break;
2383 case 19: ofs = offsetof(user_pt_regs, regs[19]); break;
2384 case 20: ofs = offsetof(user_pt_regs, regs[20]); break;
2385 case 21: ofs = offsetof(user_pt_regs, regs[21]); break;
2386 case 22: ofs = offsetof(user_pt_regs, regs[22]); break;
2387 case 23: ofs = offsetof(user_pt_regs, regs[23]); break;
2388 case 24: ofs = offsetof(user_pt_regs, regs[24]); break;
2389 case 25: ofs = offsetof(user_pt_regs, regs[25]); break;
2390 case 26: ofs = offsetof(user_pt_regs, regs[26]); break;
2391 case 27: ofs = offsetof(user_pt_regs, regs[27]); break;
2392 case 28: ofs = offsetof(user_pt_regs, regs[28]); break;
2393 case 29: ofs = offsetof(user_pt_regs, regs[29]); break;
2394 case 30: ofs = offsetof(user_pt_regs, regs[30]); break;
2395 case 31: ofs = offsetof(user_pt_regs, sp); break;
2396 #elif defined(__powerpc__)
2397 case 0: ofs = offsetof(pt_regs, gpr[0]); break;
2398 case 1: ofs = offsetof(pt_regs, gpr[1]); break;
2399 case 2: ofs = offsetof(pt_regs, gpr[2]); break;
2400 case 3: ofs = offsetof(pt_regs, gpr[3]); break;
2401 case 4: ofs = offsetof(pt_regs, gpr[4]); break;
2402 case 5: ofs = offsetof(pt_regs, gpr[5]); break;
2403 case 6: ofs = offsetof(pt_regs, gpr[6]); break;
2404 case 7: ofs = offsetof(pt_regs, gpr[7]); break;
2405 case 8: ofs = offsetof(pt_regs, gpr[8]); break;
2406 case 9: ofs = offsetof(pt_regs, gpr[9]); break;
2407 case 10: ofs = offsetof(pt_regs, gpr[10]); break;
2408 case 11: ofs = offsetof(pt_regs, gpr[11]); break;
2409 case 12: ofs = offsetof(pt_regs, gpr[12]); break;
2410 case 13: ofs = offsetof(pt_regs, gpr[13]); break;
2411 case 14: ofs = offsetof(pt_regs, gpr[14]); break;
2412 case 15: ofs = offsetof(pt_regs, gpr[15]); break;
2413 case 16: ofs = offsetof(pt_regs, gpr[16]); break;
2414 case 17: ofs = offsetof(pt_regs, gpr[17]); break;
2415 case 18: ofs = offsetof(pt_regs, gpr[18]); break;
2416 case 19: ofs = offsetof(pt_regs, gpr[19]); break;
2417 case 20: ofs = offsetof(pt_regs, gpr[20]); break;
2418 case 21: ofs = offsetof(pt_regs, gpr[21]); break;
2419 case 22: ofs = offsetof(pt_regs, gpr[22]); break;
2420 case 23: ofs = offsetof(pt_regs, gpr[23]); break;
2421 case 24: ofs = offsetof(pt_regs, gpr[24]); break;
2422 case 25: ofs = offsetof(pt_regs, gpr[25]); break;
2423 case 26: ofs = offsetof(pt_regs, gpr[26]); break;
2424 case 27: ofs = offsetof(pt_regs, gpr[27]); break;
2425 case 28: ofs = offsetof(pt_regs, gpr[28]); break;
2426 case 29: ofs = offsetof(pt_regs, gpr[29]); break;
2427 case 30: ofs = offsetof(pt_regs, gpr[30]); break;
2428 case 31: ofs = offsetof(pt_regs, gpr[31]); break;
2429 case 64: ofs = offsetof(pt_regs, ccr); break;
2430 case 66: ofs = offsetof(pt_regs, msr); break;
2431 case 101: ofs = offsetof(pt_regs, xer); break;
2432 case 108: ofs = offsetof(pt_regs, link); break;
2433 case 109: ofs = offsetof(pt_regs, ctr); break;
2434 case 118: ofs = offsetof(pt_regs, dsisr); break;
2435 case 119: ofs = offsetof(pt_regs, dar); break;
2436 # if !defined(__powerpc64__)
2437 case 100: ofs = offsetof(pt_regs, mq); break;
2438 # endif
2439 // ??? NIP is not assigned to a dwarf register number at all.
2440 #elif defined(__s390__)
2441 case 0: ofs = offsetof(user_regs_struct, gprs[0]); break;
2442 case 1: ofs = offsetof(user_regs_struct, gprs[1]); break;
2443 case 2: ofs = offsetof(user_regs_struct, gprs[2]); break;
2444 case 3: ofs = offsetof(user_regs_struct, gprs[3]); break;
2445 case 4: ofs = offsetof(user_regs_struct, gprs[4]); break;
2446 case 5: ofs = offsetof(user_regs_struct, gprs[5]); break;
2447 case 6: ofs = offsetof(user_regs_struct, gprs[6]); break;
2448 case 7: ofs = offsetof(user_regs_struct, gprs[7]); break;
2449 case 8: ofs = offsetof(user_regs_struct, gprs[8]); break;
2450 case 9: ofs = offsetof(user_regs_struct, gprs[9]); break;
2451 case 10: ofs = offsetof(user_regs_struct, gprs[10]); break;
2452 case 11: ofs = offsetof(user_regs_struct, gprs[11]); break;
2453 case 12: ofs = offsetof(user_regs_struct, gprs[12]); break;
2454 case 13: ofs = offsetof(user_regs_struct, gprs[13]); break;
2455 case 14: ofs = offsetof(user_regs_struct, gprs[14]); break;
2456 case 15: ofs = offsetof(user_regs_struct, gprs[15]); break;
2457 // Note that the FPRs are not numbered sequentially
2458 case 16: ofs = offsetof(user_regs_struct, fp_regs.fprs[0]); break;
2459 case 17: ofs = offsetof(user_regs_struct, fp_regs.fprs[2]); break;
2460 case 18: ofs = offsetof(user_regs_struct, fp_regs.fprs[4]); break;
2461 case 19: ofs = offsetof(user_regs_struct, fp_regs.fprs[6]); break;
2462 case 20: ofs = offsetof(user_regs_struct, fp_regs.fprs[1]); break;
2463 case 21: ofs = offsetof(user_regs_struct, fp_regs.fprs[3]); break;
2464 case 22: ofs = offsetof(user_regs_struct, fp_regs.fprs[5]); break;
2465 case 23: ofs = offsetof(user_regs_struct, fp_regs.fprs[7]); break;
2466 case 24: ofs = offsetof(user_regs_struct, fp_regs.fprs[8]); break;
2467 case 25: ofs = offsetof(user_regs_struct, fp_regs.fprs[10]); break;
2468 case 26: ofs = offsetof(user_regs_struct, fp_regs.fprs[12]); break;
2469 case 27: ofs = offsetof(user_regs_struct, fp_regs.fprs[14]); break;
2470 case 28: ofs = offsetof(user_regs_struct, fp_regs.fprs[9]); break;
2471 case 29: ofs = offsetof(user_regs_struct, fp_regs.fprs[11]); break;
2472 case 30: ofs = offsetof(user_regs_struct, fp_regs.fprs[13]); break;
2473 case 31: ofs = offsetof(user_regs_struct, fp_regs.fprs[15]); break;
2474 // ??? Omitting CTRs (not in user_regs_struct)
2475 // ??? Omitting ACRs (lazy, and unlikely to appear in unwind)
2476 case 64: ofs = offsetof(user_regs_struct, psw.mask); break;
2477 case 65: ofs = offsetof(user_regs_struct, psw.addr); break;
2478 #endif
2479 default:
2480 throw SEMANTIC_ERROR(_("unhandled register number"), e->tok);
2481 }
2482
2483 value *frame = this_prog.lookup_reg (BPF_REG_10);
2484 this_prog.mk_binary (this_ins, BPF_ADD, this_prog.lookup_reg(BPF_REG_3),
2485 this_in_arg0, this_prog.new_imm (ofs));
2486 this_prog.mk_mov (this_ins, this_prog.lookup_reg(BPF_REG_2),
2487 this_prog.new_imm (size));
2488 this_prog.mk_binary (this_ins, BPF_ADD, this_prog.lookup_reg(BPF_REG_1),
2489 frame, this_prog.new_imm (-size));
2490 this_prog.use_tmp_space(size);
2491
2492 this_prog.mk_call(this_ins, BPF_FUNC_probe_read, 3);
2493
2494 value *d = this_prog.new_reg ();
2495 int opc;
2496 switch (size)
2497 {
2498 case 4: opc = BPF_W; break;
2499 case 8: opc = BPF_DW; break;
2500 default:
2501 throw SEMANTIC_ERROR(_("unhandled register size"), e->tok);
2502 }
2503 this_prog.mk_ld (this_ins, opc, d, frame, -size);
2504 result = d;
2505 }
2506
2507 // Emit unrolled-loop code to write string literal from src to
2508 // dest[+ofs] in 4-byte chunks, with optional zero-padding up to
2509 // BPF_MAXSTRINGLEN.
2510 //
2511 // ??? Could use 8-byte chunks if we're starved for instruction count.
2512 // ??? Endianness of the target comes into play here.
2513 value *
2514 emit_simple_literal_str(program &this_prog, insn_inserter &this_ins,
2515 value *dest, int ofs, std::string &src, bool zero_pad)
2516 {
2517 #ifdef DEBUG_CODEGEN
2518 this_ins.notes.push("str");
2519 #endif
2520
2521 size_t str_bytes = src.size() + 1;
2522 size_t str_words = (str_bytes + 3) / 4;
2523
2524 for (unsigned i = 0; i < str_words; ++i)
2525 {
2526 uint32_t word = 0;
2527 for (unsigned j = 0; j < 4; ++j)
2528 if (i * 4 + j < str_bytes - 1)
2529 {
2530 // ??? assuming little-endian target
2531 word |= (uint32_t)src[i * 4 + j] << (j * 8);
2532 }
2533 this_prog.mk_st(this_ins, BPF_W,
2534 dest, (int32_t)i * 4 + ofs,
2535 this_prog.new_imm(word));
2536 }
2537
2538 // XXX: bpf_map_update_elem and bpf_map_lookup_elem will always copy
2539 // exactly BPF_MAXSTRINGLEN bytes, which can cause problems with
2540 // garbage data beyond the end of the string, particularly for map
2541 // keys. The silliest way to solve this is by padding every string
2542 // constant to BPF_MAXSTRINGLEN bytes, but the stack isn't really
2543 // big enough for this to work with practical programs.
2544 //
2545 // So instead we have this optional code to pad the string, and
2546 // enable the option only when copying a string to a map key.
2547 if (zero_pad)
2548 {
2549 for (unsigned i = str_words; i < BPF_MAXSTRINGLEN / 4; i++)
2550 {
2551 this_prog.mk_st(this_ins, BPF_W,
2552 dest, (int32_t)i * 4 + ofs,
2553 this_prog.new_imm(0));
2554 }
2555 }
2556
2557 value *out = this_prog.new_reg();
2558 this_prog.mk_binary(this_ins, BPF_ADD, out,
2559 dest, this_prog.new_imm(ofs));
2560
2561 #ifdef DEBUG_CODEGEN
2562 this_ins.notes.pop(); // str
2563 #endif
2564 return out;
2565 }
2566
2567 // Emit unrolled-loop code to write string value from src to
2568 // dest[+ofs] in 4-byte chunks, with optional zero-padding up to
2569 // BPF_MAXSTRINGLEN.
2570 //
2571 // TODO (PR23860): This code does not work when the source and target
2572 // regions overlap.
2573 //
2574 // ??? Could use 8-byte chunks if we're starved for instruction count.
2575 // ??? Endianness of the target may come into play here.
2576 value *
2577 bpf_unparser::emit_string_copy(value *dest, int ofs, value *src, bool zero_pad)
2578 {
2579 if (src->is_str())
2580 {
2581 /* If src is a string literal, its exact length is known and
2582 we can emit simpler, unconditional string copying code. */
2583 std::string str = src->str();
2584 return emit_simple_literal_str(this_prog, this_ins,
2585 dest, ofs, str, zero_pad);
2586 }
2587
2588 #ifdef DEBUG_CODEGEN
2589 this_ins.notes.push(zero_pad ? "strcpy_zero_pad" : "strcpy");
2590 #endif
2591
2592 size_t str_bytes = BPF_MAXSTRINGLEN;
2593 size_t str_words = (str_bytes + 3) / 4;
2594
2595 block *join_block = this_prog.new_block();
2596
2597 /* block_A[i] copies src[4*i] to dest[4*i+ofs];
2598 block_B[i] copies 0 to dest[4*i+ofs].
2599 Since block_B[0] is never branched to, we set it to NULL. */
2600 std::vector<block *> block_A, block_B;
2601 block_A.push_back(this_ins.get_block());
2602 if (zero_pad) block_B.push_back(NULL);
2603
2604 for (unsigned i = 0; i < str_words; ++i)
2605 {
2606 block *next_block;
2607 if (i < str_words - 1)
2608 {
2609 /* Create block_A[i+1], block_B[i+1]: */
2610 block_A.push_back(this_prog.new_block());
2611 if (zero_pad) block_B.push_back(this_prog.new_block());
2612 next_block = block_A[i+1];
2613 }
2614 else
2615 {
2616 next_block = join_block;
2617 }
2618
2619 set_block(block_A[i]);
2620
2621 value *word = this_prog.new_reg();
2622 this_prog.mk_ld(this_ins, BPF_W, word,
2623 src, (int32_t)i * 4);
2624 this_prog.mk_st(this_ins, BPF_W,
2625 dest, (int32_t)i * 4 + ofs,
2626 word);
2627
2628 /* Finish unconditionally after copying BPF_MAXSTRINGLEN bytes: */
2629 if (i == str_words - 1)
2630 {
2631 emit_jmp(next_block);
2632 continue;
2633 }
2634
2635 // Determining whether a word contains a NUL byte is a neat bit-fiddling puzzle.
2636 // Kudos go to Valgrind and Memcheck for showing the way, along the lines of:
2637 //
2638 // b1 := word & 0xff; nz1 := (-b1)|b1; all_nz = nz1
2639 // b2 := (word >> 8) & 0xff; nz2 := (-b2)|b2; all_nz = all_nz & nz2
2640 // b3 := (word >> 16) & 0xff; nz3 := (-b3)|b3; all_nz = all_nz & nz3
2641 // b4 := (word >> 24) & 0xff; nz4 := (-b4)|b4; all_nz = all_nz & nz4
2642 // all_nz := nz1 & nz2 & nz3 & nz4
2643 //
2644 // Here, nzX is 0 iff bX is NUL, all_nz is 0 iff word contains a NUL byte.
2645 value *all_nz = this_prog.new_reg();
2646 value *bN = this_prog.new_reg();
2647 value *nZ = this_prog.new_reg();
2648 for (unsigned j = 0; j < 4; j++)
2649 {
2650 unsigned shift = 8*j;
2651 if (shift != 0)
2652 {
2653 this_prog.mk_binary(this_ins, BPF_RSH, bN, word, this_prog.new_imm(shift));
2654 }
2655 else
2656 {
2657 emit_mov(bN, word);
2658 }
2659 this_prog.mk_binary(this_ins, BPF_AND, bN, bN, this_prog.new_imm(0xff));
2660 this_prog.mk_unary(this_ins, BPF_NEG, nZ, bN);
2661 this_prog.mk_binary(this_ins, BPF_OR, nZ, nZ, bN);
2662 if (j == 0)
2663 {
2664 emit_mov(all_nz, nZ);
2665 }
2666 else
2667 {
2668 this_prog.mk_binary(this_ins, BPF_AND, all_nz, all_nz, nZ);
2669 }
2670 }
2671
2672 this_prog.mk_jcond(this_ins, EQ, all_nz, this_prog.new_imm(0),
2673 zero_pad ? block_B[i+1] : join_block, next_block);
2674 }
2675
2676 // XXX: Zero-padding is only used under specific circumstances;
2677 // see the corresponding comment in emit_simple_literal_str().
2678 if (zero_pad)
2679 {
2680 for (unsigned i = 0; i < str_words; ++i)
2681 {
2682 /* Since block_B[0] is never branched to, it was set to NULL. */
2683 if (block_B[i] == NULL) continue;
2684
2685 set_block(block_B[i]);
2686 this_prog.mk_st(this_ins, BPF_W,
2687 dest, (int32_t)i * 4 + ofs,
2688 this_prog.new_imm(0));
2689
2690 emit_jmp(i < str_words - 1 ? block_B[i+1] : join_block);
2691 }
2692 }
2693
2694 set_block(join_block);
2695
2696 value *out = this_prog.new_reg();
2697 this_prog.mk_binary(this_ins, BPF_ADD, out,
2698 dest, this_prog.new_imm(ofs));
2699 #ifdef DEBUG_CODEGEN
2700 this_ins.notes.pop(); // strcpy
2701 #endif
2702 return out;
2703 }
2704
2705 // Used for passing long arguments on the stack where an address is
2706 // expected. Store val in a stack slot at offset ofs and store the
2707 // stack address of val in arg.
2708 void
2709 bpf_unparser::emit_long_arg(value *arg, int ofs, value *val)
2710 {
2711 value *frame = this_prog.lookup_reg(BPF_REG_10);
2712 this_prog.mk_st(this_ins, BPF_DW, frame, ofs, val);
2713 this_prog.mk_binary(this_ins, BPF_ADD, arg,
2714 frame, this_prog.new_imm(ofs));
2715 }
2716
2717 // Used for passing string arguments on the stack where an address is
2718 // expected. Zero-pad and copy str to the stack at offset ofs and
2719 // store the stack address of str in arg. Zero-padding is required
2720 // since functions such as map_update_elem will expect a fixed-length
2721 // value of BPF_MAXSTRINGLEN for string map keys.
2722 void
2723 bpf_unparser::emit_str_arg(value *arg, int ofs, value *str)
2724 {
2725 value *frame = this_prog.lookup_reg(BPF_REG_10);
2726 value *out = emit_string_copy(frame, ofs, str, true /* zero pad */);
2727 emit_mov(arg, out);
2728 }
2729
2730 value *
2731 bpf_unparser::emit_functioncall (functiondecl *f, const std::vector<value *>& args)
2732 {
2733 // Create a new map for the function's local variables.
2734 locals_map *locals = new_locals(f->locals);
2735
2736 // Install locals in the map.
2737 unsigned n = args.size();
2738 for (unsigned i = 0; i < n; ++i)
2739 {
2740 const locals_map::value_type v (f->formal_args[i], args[i]);
2741 auto ok = locals->insert (v);
2742 assert (ok.second);
2743 }
2744
2745 locals_map *old_locals = this_locals;
2746 this_locals = locals;
2747
2748 block *join_block = this_prog.new_block ();
2749 value *retval = this_prog.new_reg ();
2750
2751 func_calls.push_back (f);
2752 func_return.push_back (join_block);
2753 func_return_val.push_back (retval);
2754 emit_stmt (f->body);
2755 func_return_val.pop_back ();
2756 func_return.pop_back ();
2757 func_calls.pop_back ();
2758
2759 if (in_block ())
2760 emit_jmp (join_block);
2761 set_block (join_block);
2762
2763 this_locals = old_locals;
2764 delete locals;
2765
2766 return retval;
2767 }
2768
2769 void
2770 bpf_unparser::visit_functioncall (functioncall *e)
2771 {
2772 // ??? Function overloading isn't handled.
2773 if (e->referents.size () != 1)
2774 throw SEMANTIC_ERROR (_("unhandled function overloading"), e->tok);
2775 functiondecl *f = e->referents[0];
2776
2777 // ??? For now, always inline the function call.
2778 for (auto i = func_calls.begin(); i != func_calls.end(); ++i)
2779 if (f == *i)
2780 throw SEMANTIC_ERROR (_("unhandled function recursion"), e->tok);
2781
2782 // XXX: Should have been checked in earlier pass.
2783 assert (e->args.size () == f->formal_args.size ());
2784
2785 // Evaluate and collect the function arguments.
2786 std::vector<value *> args;
2787 for (unsigned n = e->args.size (), i = 0; i < n; ++i)
2788 {
2789 value *r = this_prog.new_reg ();
2790 emit_mov (r, emit_expr (e->args[i]));
2791 args.push_back(r);
2792 }
2793
2794 result = emit_functioncall(f, args);
2795 }
2796
2797 static void
2798 print_format_add_tag(std::string& format)
2799 {
2800 // surround the string with <MODNAME>...</MODNAME> to facilitate
2801 // stapbpf recovering it from debugfs.
2802 std::string start_tag = module_name;
2803 start_tag = "<" + start_tag.erase(4,1) + ">";
2804 std::string end_tag = start_tag + "\n";
2805 end_tag.insert(1, "/");
2806 format = start_tag + format + end_tag;
2807 }
2808
2809 static void
2810 print_format_add_tag(print_format *e)
2811 {
2812 if (e->tag)
2813 return;
2814
2815 e->tag = true;
2816 // surround the string with <MODNAME>...</MODNAME> to facilitate
2817 // stapbpf recovering it from debugfs.
2818 std::string start_tag = module_name;
2819 start_tag = "<" + start_tag.erase(4, 1) + ">";
2820 std::string end_tag = start_tag + "\n";
2821 end_tag.insert(1, "/");
2822 e->raw_components.insert(0, start_tag);
2823 e->raw_components.append(end_tag);
2824
2825 if (e->components.empty())
2826 {
2827 print_format::format_component c;
2828 c.literal_string = start_tag + end_tag;
2829 e->components.insert(e->components.begin(), c);
2830 }
2831 else
2832 {
2833 if (e->components[0].type == print_format::conv_literal)
2834 {
2835 std::string s = start_tag
2836 + e->components[0].literal_string.to_string();
2837 e->components[0].literal_string = s;
2838 }
2839 else
2840 {
2841 print_format::format_component c;
2842 c.literal_string = start_tag;
2843 e->components.insert(e->components.begin(), c);
2844 }
2845
2846 if (e->components.back().type == print_format::conv_literal)
2847 {
2848 std::string s = end_tag
2849 + e->components.back().literal_string.to_string();
2850 e->components.back().literal_string = s;
2851 }
2852 else
2853 {
2854 print_format::format_component c;
2855 c.literal_string = end_tag;
2856 e->components.insert(e->components.end(), c);
2857 }
2858 }
2859 }
2860
2861 value *
2862 bpf_unparser::emit_print_format (const std::string& format,
2863 const std::vector<value *>& actual,
2864 bool print_to_stream)
2865 {
2866 size_t nargs = actual.size();
2867
2868 // The bpf verifier requires that the format string be stored on the
2869 // bpf program stack. This is handled by bpf-opt.cxx lowering STR values.
2870 size_t format_bytes = format.size() + 1;
2871 this_prog.mk_mov(this_ins, this_prog.lookup_reg(BPF_REG_1),
2872 this_prog.new_str(format, true /*format_str*/));
2873 emit_mov(this_prog.lookup_reg(BPF_REG_2), this_prog.new_imm(format_bytes));
2874 for (size_t i = 0; i < nargs; ++i)
2875 emit_mov(this_prog.lookup_reg(BPF_REG_3 + i), actual[i]);
2876
2877 if (print_to_stream)
2878 this_prog.mk_call(this_ins, BPF_FUNC_trace_printk, nargs + 2);
2879 else
2880 {
2881 this_prog.mk_call(this_ins, BPF_FUNC_sprintf, nargs + 2);
2882 return this_prog.lookup_reg(BPF_REG_0);
2883 }
2884 return NULL;
2885 }
2886
2887 void
2888 bpf_unparser::visit_print_format (print_format *e)
2889 {
2890 if (e->hist)
2891 throw SEMANTIC_ERROR (_("unhandled histogram print"), e->tok);
2892
2893 if (e->print_to_stream)
2894 print_format_add_tag(e);
2895
2896 // ??? Traditional stap allows max 32 args; trace_printk allows only 3.
2897 // ??? Could split the print into multiple calls, such that each is
2898 // under the limit.
2899 size_t nargs = e->args.size();
2900 size_t i;
2901 if (nargs > 3)
2902 throw SEMANTIC_ERROR(_NF("additional argument to print",
2903 "too many arguments to print (%zu)",
2904 e->args.size(), e->args.size()), e->tok);
2905
2906 std::vector<value *> actual;
2907 for (i = 0; i < nargs; ++i)
2908 actual.push_back(emit_expr(e->args[i]));
2909
2910 std::string format;
2911 if (e->print_with_format)
2912 {
2913 // ??? If this is a long string with no actual arguments,
2914 // intern the string as a global and use "%s" as the format.
2915 interned_string fstr = e->raw_components;
2916 format += translate_escapes(fstr);
2917 }
2918 else
2919 {
2920 // Synthesize a print-format string if the user didn't
2921 // provide one; the synthetic string simply contains one
2922 // directive for each argument.
2923 std::string delim;
2924 if (e->print_with_delim)
2925 {
2926 interned_string dstr = e->delimiter;
2927 for (interned_string::const_iterator j = dstr.begin();
2928 j != dstr.end(); ++j)
2929 {
2930 if (*j == '%')
2931 delim += '%';
2932 delim += *j;
2933 }
2934 }
2935
2936 for (i = 0; i < nargs; ++i)
2937 {
2938 if (i > 0 && e->print_with_delim)
2939 format += delim;
2940 switch (e->args[i]->type)
2941 {
2942 default:
2943 case pe_unknown:
2944 throw SEMANTIC_ERROR(_("cannot print unknown expression type"),
2945 e->args[i]->tok);
2946 case pe_stats:
2947 throw SEMANTIC_ERROR(_("cannot print a raw stats object"),
2948 e->args[i]->tok);
2949 case pe_long:
2950 format += "%lld";
2951 break;
2952 case pe_string:
2953 format += "%s";
2954 break;
2955 }
2956 }
2957 if (e->print_with_newline)
2958 format += '\n';
2959
2960 if (e->print_to_stream)
2961 print_format_add_tag(format);
2962 }
2963
2964 size_t format_bytes = format.size() + 1;
2965 if (format_bytes > BPF_MAXFORMATLEN)
2966 throw SEMANTIC_ERROR(_("Format string for print too long"), e->tok);
2967
2968 value *retval = emit_print_format(format, actual, e->print_to_stream);
2969 if (retval != NULL)
2970 result = retval;
2971 }
2972
2973 // } // anon namespace
2974
2975 void
2976 build_internal_globals(globals& glob)
2977 {
2978 struct vardecl exit;
2979 exit.name = "__global___STAPBPF_exit";
2980 exit.unmangled_name = "__STAPBPF_exit";
2981 exit.type = pe_long;
2982 exit.arity = 0;
2983 glob.internal_exit = exit;
2984
2985 glob.globals.insert(std::pair<vardecl *, globals::map_slot>
2986 (&glob.internal_exit,
2987 globals::map_slot(0, globals::EXIT)));
2988 glob.maps.push_back
2989 ({ BPF_MAP_TYPE_HASH, 4, 8, globals::NUM_INTERNALS, 0 });
2990 }
2991
2992 static void
2993 translate_globals (globals &glob, systemtap_session& s)
2994 {
2995 int long_map = -1; // -- for scalar long variables
2996 int str_map = -1; // -- for scalar string variables
2997 build_internal_globals(glob);
2998
2999 for (auto i = s.globals.begin(); i != s.globals.end(); ++i)
3000 {
3001 vardecl *v = *i;
3002 int this_map, this_idx;
3003
3004 switch (v->arity)
3005 {
3006 case 0: // scalars
3007 switch (v->type)
3008 {
3009 case pe_long:
3010 if (long_map < 0)
3011 {
3012 globals::bpf_map_def m = {
3013 BPF_MAP_TYPE_ARRAY, 4, 8, 0, 0
3014 };
3015 long_map = glob.maps.size();
3016 glob.maps.push_back(m);
3017 }
3018 this_map = long_map;
3019 this_idx = glob.maps[long_map].max_entries++;
3020 break;
3021
3022 case pe_string:
3023 if (str_map < 0)
3024 {
3025 globals::bpf_map_def m = {
3026 BPF_MAP_TYPE_ARRAY, 4, BPF_MAXSTRINGLEN, 0, 0
3027 };
3028 str_map = glob.maps.size();
3029 glob.maps.push_back(m);
3030 }
3031 this_map = str_map;
3032 this_idx = glob.maps[str_map].max_entries++;
3033 break;
3034
3035 // ??? pe_stats -> TODO (3) exists as a BPF_MAP_TYPE_PERCPU_ARRAY
3036 default:
3037 throw SEMANTIC_ERROR (_("unhandled scalar type"), v->tok);
3038 }
3039 break;
3040
3041 case 1: // single dimension array
3042 {
3043 globals::bpf_map_def m = { BPF_MAP_TYPE_HASH, 0, 0, 0, 0 };
3044
3045 switch (v->index_types[0])
3046 {
3047 case pe_long:
3048 m.key_size = 8;
3049 break;
3050 case pe_string:
3051 m.key_size = BPF_MAXSTRINGLEN;
3052 break;
3053 default:
3054 throw SEMANTIC_ERROR (_("unhandled index type"), v->tok);
3055 }
3056 switch (v->type)
3057 {
3058 case pe_long:
3059 m.value_size = 8;
3060 break;
3061 case pe_string:
3062 m.value_size = BPF_MAXSTRINGLEN;
3063 break;
3064 // ??? pe_stats -> TODO (3) map is BPF_MAP_TYPE_PERCPU_{HASH,ARRAY}, value_size is unknown
3065 default:
3066 throw SEMANTIC_ERROR (_("unhandled array element type"), v->tok);
3067 }
3068
3069 m.max_entries = v->maxsize > 0 ? v->maxsize : BPF_MAXMAPENTRIES;
3070 this_map = glob.maps.size();
3071 glob.maps.push_back(m);
3072 this_idx = 0;
3073 }
3074 break;
3075
3076 default:
3077 // Multi-dimensional arrays not supported for now.
3078 throw SEMANTIC_ERROR (_("unhandled multi-dimensional array"), v->tok);
3079 }
3080
3081 assert(this_map != globals::internal_map_idx);
3082 auto ok = (glob.globals.insert
3083 (std::pair<vardecl *, globals::map_slot>
3084 (v, globals::map_slot(this_map, this_idx))));
3085 assert(ok.second);
3086 }
3087 }
3088
3089 struct BPF_Section
3090 {
3091 Elf_Scn *scn;
3092 Elf64_Shdr *shdr;
3093 std::string name;
3094 Stap_Strent *name_ent;
3095 Elf_Data *data;
3096 bool free_data; // NB: then data must have been malloc()'d!
3097
3098 BPF_Section(const std::string &n);
3099 ~BPF_Section();
3100 };
3101
3102 BPF_Section::BPF_Section(const std::string &n)
3103 : scn(0), name(n), name_ent(0), data(0), free_data(false)
3104 { }
3105
3106 BPF_Section::~BPF_Section()
3107 {
3108 if (free_data)
3109 free(data->d_buf);
3110 }
3111
3112 struct BPF_Symbol
3113 {
3114 std::string name;
3115 Stap_Strent *name_ent;
3116 Elf64_Sym sym;
3117
3118 BPF_Symbol(const std::string &n, BPF_Section *, long);
3119 };
3120
3121 BPF_Symbol::BPF_Symbol(const std::string &n, BPF_Section *sec, long off)
3122 : name(n), name_ent(0)
3123 {
3124 memset(&sym, 0, sizeof(sym));
3125 sym.st_shndx = elf_ndxscn(sec->scn);
3126 sym.st_value = off;
3127 }
3128
3129 struct BPF_Output
3130 {
3131 Elf *elf;
3132 Elf64_Ehdr *ehdr;
3133 Stap_Strtab *str_tab;
3134
3135 std::vector<BPF_Section *> sections;
3136 std::vector<BPF_Symbol *> symbols;
3137
3138 BPF_Output(int fd);
3139 ~BPF_Output();
3140 BPF_Section *new_scn(const std::string &n);
3141 BPF_Symbol *new_sym(const std::string &n, BPF_Section *, long);
3142 BPF_Symbol *append_sym(const std::string &n, BPF_Section *, long);
3143 };
3144
3145 BPF_Output::BPF_Output(int fd)
3146 : elf(elf_begin(fd, ELF_C_WRITE_MMAP, NULL)),
3147 ehdr(elf64_newehdr(elf)),
3148 str_tab(stap_strtab_init(true))
3149 {
3150 ehdr->e_type = ET_REL;
3151 ehdr->e_machine = EM_BPF;
3152 }
3153
3154 BPF_Output::~BPF_Output()
3155 {
3156 stap_strtab_free(str_tab);
3157
3158 for (auto i = symbols.begin(); i != symbols.end(); ++i)
3159 delete *i;
3160 for (auto i = sections.begin(); i != sections.end(); ++i)
3161 delete *i;
3162
3163 elf_end(elf);
3164 }
3165
3166 BPF_Section *
3167 BPF_Output::new_scn(const std::string &name)
3168 {
3169 BPF_Section *n = new BPF_Section(name);
3170 Elf_Scn *scn = elf_newscn(elf);
3171
3172 n->scn = scn;
3173 n->shdr = elf64_getshdr(scn);
3174 n->data = elf_newdata(scn);
3175 n->name_ent = stap_strtab_add(str_tab, n->name.c_str());
3176
3177 sections.push_back(n);
3178 return n;
3179 }
3180
3181 BPF_Symbol *
3182 BPF_Output::new_sym(const std::string &name, BPF_Section *sec, long off)
3183 {
3184 BPF_Symbol *s = new BPF_Symbol(name, sec, off);
3185 s->name_ent = stap_strtab_add(str_tab, s->name.c_str());
3186 return s;
3187 }
3188
3189 BPF_Symbol *
3190 BPF_Output::append_sym(const std::string &name, BPF_Section *sec, long off)
3191 {
3192 BPF_Symbol *s = new_sym(name, sec, off);
3193 symbols.push_back(s);
3194 return s;
3195 }
3196
3197 static void
3198 output_kernel_version(BPF_Output &eo, const std::string &base_version)
3199 {
3200 unsigned long maj = 0, min = 0, rel = 0;
3201 char *q;
3202
3203 maj = strtoul(base_version.c_str(), &q, 10);
3204 if (*q == '.')
3205 {
3206 min = strtoul(q + 1, &q, 10);
3207 if (*q == '.')
3208 rel = strtoul(q + 1, NULL, 10);
3209 }
3210
3211 BPF_Section *so = eo.new_scn("version");
3212 Elf_Data *data = so->data;
3213 data->d_buf = malloc(sizeof(uint32_t));
3214 assert (data->d_buf);
3215 * (uint32_t*) data->d_buf = KERNEL_VERSION(maj, min, rel);
3216 data->d_type = ELF_T_BYTE;
3217 data->d_size = 4;
3218 data->d_align = 4;
3219 so->free_data = true;
3220 so->shdr->sh_type = SHT_PROGBITS;
3221 so->shdr->sh_entsize = 4;
3222 }
3223
3224 static void
3225 output_license(BPF_Output &eo)
3226 {
3227 BPF_Section *so = eo.new_scn("license");
3228 Elf_Data *data = so->data;
3229 data->d_buf = (void *)"GPL";
3230 data->d_type = ELF_T_BYTE;
3231 data->d_size = 4;
3232 so->shdr->sh_type = SHT_PROGBITS;
3233 }
3234
3235 static void
3236 output_stapbpf_script_name(BPF_Output &eo, const std::string script_name)
3237 {
3238 BPF_Section *so = eo.new_scn("stapbpf_script_name");
3239 Elf_Data *data = so->data;
3240 size_t script_name_len = strlen(script_name.c_str());
3241 data->d_buf = (void *)malloc(script_name_len + 1);
3242 char *script_name_buf = (char *)data->d_buf;
3243 script_name.copy(script_name_buf, script_name_len);
3244 script_name_buf[script_name_len] = '\0';
3245 data->d_size = script_name_len + 1;
3246 so->free_data = true;
3247 so->shdr->sh_type = SHT_PROGBITS;
3248 }
3249
3250 static void
3251 output_maps(BPF_Output &eo, globals &glob)
3252 {
3253 unsigned nmaps = glob.maps.size();
3254 if (nmaps == 0)
3255 return;
3256
3257 assert(sizeof(unsigned) == sizeof(Elf64_Word));
3258
3259 const size_t bpf_map_def_sz = sizeof(globals::bpf_map_def);
3260 BPF_Section *so = eo.new_scn("maps");
3261 Elf_Data *data = so->data;
3262 data->d_buf = glob.maps.data();
3263 data->d_type = ELF_T_BYTE;
3264 data->d_size = nmaps * bpf_map_def_sz;
3265 data->d_align = 4;
3266 so->shdr->sh_type = SHT_PROGBITS;
3267 so->shdr->sh_entsize = bpf_map_def_sz;
3268
3269 // Allow the global arrays to have their actual names.
3270 eo.symbols.reserve(nmaps);
3271 for (unsigned i = 0; i < nmaps; ++i)
3272 eo.symbols.push_back(NULL);
3273
3274 for (auto i = glob.globals.begin(); i != glob.globals.end(); ++i)
3275 {
3276 vardecl *v = i->first;
3277 if (v->arity <= 0)
3278 continue;
3279 unsigned m = i->second.first;
3280 assert(eo.symbols[m] == NULL);
3281
3282 BPF_Symbol *s = eo.new_sym(v->name, so, m * bpf_map_def_sz);
3283 s->sym.st_info = ELF64_ST_INFO(STB_LOCAL, STT_OBJECT);
3284 s->sym.st_size = bpf_map_def_sz;
3285 eo.symbols[m] = s;
3286 }
3287
3288 // Give internal names to other maps.
3289 for (unsigned i = 0; i < nmaps; ++i)
3290 {
3291 if (eo.symbols[i] != NULL)
3292 continue;
3293
3294 BPF_Symbol *s = eo.new_sym(std::string("map.") + std::to_string(i),
3295 so, i * bpf_map_def_sz);
3296 s->sym.st_info = ELF64_ST_INFO(STB_LOCAL, STT_OBJECT);
3297 s->sym.st_size = bpf_map_def_sz;
3298 eo.symbols[i] = s;
3299 }
3300 }
3301
3302 void
3303 bpf_unparser::add_prologue()
3304 {
3305 value *i0 = this_prog.new_imm(0);
3306
3307 // lookup exit global
3308 value *frame = this_prog.lookup_reg(BPF_REG_10);
3309 this_prog.mk_st(this_ins, BPF_W, frame, -4, i0);
3310 this_prog.use_tmp_space(4);
3311
3312 this_prog.load_map(this_ins, this_prog.lookup_reg(BPF_REG_1), 0);
3313 this_prog.mk_binary(this_ins, BPF_ADD, this_prog.lookup_reg(BPF_REG_2),
3314 frame, this_prog.new_imm(-4));
3315 this_prog.mk_call(this_ins, BPF_FUNC_map_lookup_elem, 2);
3316
3317 value *r0 = this_prog.lookup_reg(BPF_REG_0);
3318 block *cont_block = this_prog.new_block();
3319 block *exit_block = get_exit_block();
3320
3321 // check that map_lookup_elem returned non-null ptr
3322 this_prog.mk_jcond(this_ins, EQ, r0, i0, exit_block, cont_block);
3323 set_block(cont_block);
3324
3325 // load exit status from ptr
3326 value *exit_status = this_prog.new_reg();
3327 this_prog.mk_ld(this_ins, BPF_DW, exit_status, r0, 0);
3328
3329 // if exit_status == 1 jump to exit, else continue with handler
3330 cont_block = this_prog.new_block();
3331 this_prog.mk_jcond(this_ins, EQ, exit_status, this_prog.new_imm(1),
3332 exit_block, cont_block);
3333 set_block(cont_block);
3334 }
3335
3336 static void
3337 translate_probe(program &prog, globals &glob, derived_probe *dp)
3338 {
3339 bpf_unparser u(prog, glob);
3340 u.this_locals = u.new_locals(dp->locals);
3341
3342 u.set_block(prog.new_block ());
3343
3344 // Save the input argument early.
3345 // ??? Ideally this would be deleted as dead code if it were unused;
3346 // we don't implement that at the moment. Nor is it easy to support
3347 // inserting a new start block that would enable retroactively saving
3348 // this only when needed.
3349 u.this_in_arg0 = prog.lookup_reg(BPF_REG_6);
3350 prog.mk_mov(u.this_ins, u.this_in_arg0, prog.lookup_reg(BPF_REG_1));
3351
3352 u.add_prologue();
3353
3354 dp->body->visit (&u);
3355 if (u.in_block())
3356 u.emit_jmp(u.get_ret0_block());
3357 }
3358
3359 static void
3360 translate_probe_v(program &prog, globals &glob,
3361 const std::vector<derived_probe *> &v)
3362 {
3363 bpf_unparser u(prog, glob);
3364 block *this_block;
3365
3366 if (prog.blocks.empty())
3367 this_block = prog.new_block();
3368 else
3369 {
3370 u.set_block(prog.blocks.back());
3371 this_block = prog.new_block();
3372 u.emit_jmp(this_block);
3373 }
3374
3375 for (size_t n = v.size(), i = 0; i < n; ++i)
3376 {
3377 u.set_block(this_block);
3378
3379 derived_probe *dp = v[i];
3380 u.this_locals = u.new_locals(dp->locals);
3381 dp->body->visit (&u);
3382 delete u.this_locals;
3383 u.this_locals = NULL;
3384
3385 if (i == n - 1)
3386 this_block = u.get_ret0_block();
3387 else
3388 this_block = prog.new_block();
3389 if (u.in_block())
3390 u.emit_jmp(this_block);
3391 }
3392 }
3393
3394 static void
3395 translate_init_and_probe_v(program &prog, globals &glob, init_block &b,
3396 const std::vector<derived_probe *> &v)
3397 {
3398 bpf_unparser u(prog, glob);
3399 block *this_block = prog.new_block();
3400
3401 u.set_block(this_block);
3402 b.visit(&u);
3403
3404 if (!v.empty())
3405 translate_probe_v(prog, glob, v);
3406 else
3407 {
3408 this_block = u.get_ret0_block();
3409 assert(u.in_block());
3410 u.emit_jmp(this_block);
3411 }
3412 }
3413
3414 static BPF_Section *
3415 output_probe(BPF_Output &eo, program &prog,
3416 const std::string &name, unsigned flags)
3417 {
3418 unsigned ninsns = 0, nreloc = 0;
3419
3420 // Count insns and relocations; drop in jump offset.
3421 for (auto i = prog.blocks.begin(); i != prog.blocks.end(); ++i)
3422 {
3423 block *b = *i;
3424
3425 for (insn *j = b->first; j != NULL; j = j->next)
3426 {
3427 unsigned code = j->code;
3428 if ((code & 0xff) == (BPF_LD | BPF_IMM | BPF_DW))
3429 {
3430 if (code == BPF_LD_MAP)
3431 nreloc += 1;
3432 ninsns += 2;
3433 }
3434 else
3435 {
3436 if (j->is_jmp())
3437 j->off = b->taken->next->first->id - (j->id + 1);
3438 else if (j->is_call())
3439 j->off = 0;
3440 ninsns += 1;
3441 }
3442 }
3443 }
3444
3445 bpf_insn *buf = (bpf_insn*) calloc (sizeof(bpf_insn), ninsns);
3446 assert (buf);
3447 Elf64_Rel *rel = (Elf64_Rel*) calloc (sizeof(Elf64_Rel), nreloc);
3448 assert (rel);
3449
3450 unsigned i = 0, r = 0;
3451 for (auto bi = prog.blocks.begin(); bi != prog.blocks.end(); ++bi)
3452 {
3453 block *b = *bi;
3454
3455 for (insn *j = b->first; j != NULL; j = j->next)
3456 {
3457 unsigned code = j->code;
3458 value *d = j->dest;
3459 value *s = j->src1;
3460
3461 if (code == BPF_LD_MAP)
3462 {
3463 unsigned val = s->imm();
3464
3465 // Note that we arrange for the map symbols to be first.
3466 rel[r].r_offset = i * sizeof(bpf_insn);
3467 rel[r].r_info = ELF64_R_INFO(val + 1, R_BPF_MAP_FD);
3468 r += 1;
3469
3470 buf[i + 0].code = code;
3471 buf[i + 0].dst_reg = d->reg();
3472 buf[i + 0].src_reg = code >> 8;
3473 i += 2;
3474 }
3475 else if (code == (BPF_LD | BPF_IMM | BPF_DW))
3476 {
3477 uint64_t val = s->imm();
3478 buf[i + 0].code = code;
3479 buf[i + 0].dst_reg = d->reg();
3480 buf[i + 0].src_reg = code >> 8;
3481 buf[i + 0].imm = val;
3482 buf[i + 1].imm = val >> 32;
3483 i += 2;
3484 }
3485 else
3486 {
3487 buf[i].code = code;
3488 if (!d)
3489 d = j->src0;
3490 if (d)
3491 buf[i].dst_reg = d->reg();
3492 if (s)
3493 {
3494 if (s->is_reg())
3495 buf[i].src_reg = s->reg();
3496 else
3497 buf[i].imm = s->imm();
3498 }
3499 buf[i].off = j->off;
3500 i += 1;
3501 }
3502 }
3503 }
3504 assert(i == ninsns);
3505 assert(r == nreloc);
3506
3507 BPF_Section *so = eo.new_scn(name);
3508 Elf_Data *data = so->data;
3509 data->d_buf = buf;
3510 data->d_type = ELF_T_BYTE;
3511 data->d_size = ninsns * sizeof(bpf_insn);
3512 data->d_align = 8;
3513 so->free_data = true;
3514 so->shdr->sh_type = SHT_PROGBITS;
3515 so->shdr->sh_flags = SHF_EXECINSTR | flags;
3516
3517 if (nreloc)
3518 {
3519 BPF_Section *ro = eo.new_scn(std::string(".rel.") + name);
3520 Elf_Data *rdata = ro->data;
3521 rdata->d_buf = rel;
3522 rdata->d_type = ELF_T_REL;
3523 rdata->d_size = nreloc * sizeof(Elf64_Rel);
3524 ro->free_data = true;
3525 ro->shdr->sh_type = SHT_REL;
3526 ro->shdr->sh_info = elf_ndxscn(so->scn);
3527 }
3528
3529 return so;
3530 }
3531
3532 static void
3533 output_symbols_sections(BPF_Output &eo)
3534 {
3535 BPF_Section *str = eo.new_scn(".strtab");
3536 str->shdr->sh_type = SHT_STRTAB;
3537 str->shdr->sh_entsize = 1;
3538
3539 unsigned nsym = eo.symbols.size();
3540 unsigned isym = 0;
3541 if (nsym > 0)
3542 {
3543 BPF_Section *sym = eo.new_scn(".symtab");
3544 sym->shdr->sh_type = SHT_SYMTAB;
3545 sym->shdr->sh_link = elf_ndxscn(str->scn);
3546 sym->shdr->sh_info = nsym + 1;
3547
3548 Elf64_Sym *buf = new Elf64_Sym[nsym + 1];
3549 memset(buf, 0, sizeof(Elf64_Sym));
3550
3551 sym->data->d_buf = buf;
3552 sym->data->d_type = ELF_T_SYM;
3553 sym->data->d_size = (nsym + 1) * sizeof(Elf64_Sym);
3554
3555 stap_strtab_finalize(eo.str_tab, str->data);
3556
3557 for (unsigned i = 0; i < nsym; ++i)
3558 {
3559 BPF_Symbol *s = eo.symbols[i];
3560 Elf64_Sym *b = buf + (i + 1);
3561 *b = s->sym;
3562 b->st_name = stap_strent_offset(s->name_ent);
3563 }
3564
3565 isym = elf_ndxscn(sym->scn);
3566 }
3567 else
3568 stap_strtab_finalize(eo.str_tab, str->data);
3569
3570 eo.ehdr->e_shstrndx = elf_ndxscn(str->scn);
3571
3572 for (auto i = eo.sections.begin(); i != eo.sections.end(); ++i)
3573 {
3574 BPF_Section *s = *i;
3575 s->shdr->sh_name = stap_strent_offset(s->name_ent);
3576 if (s->shdr->sh_type == SHT_REL)
3577 s->shdr->sh_link = isym;
3578 }
3579 }
3580
3581 } // namespace bpf
3582
3583 int
3584 translate_bpf_pass (systemtap_session& s)
3585 {
3586 using namespace bpf;
3587
3588 init_bpf_helper_tables();
3589
3590 if (elf_version(EV_CURRENT) == EV_NONE)
3591 return 1;
3592
3593 module_name = s.module_name;
3594 const std::string module = s.tmpdir + "/" + s.module_filename();
3595 int fd = open(module.c_str(), O_RDWR | O_CREAT | O_TRUNC, 0666);
3596 if (fd < 0)
3597 return 1;
3598
3599 BPF_Output eo(fd);
3600 globals glob; glob.session = &s;
3601 int ret = 0;
3602 const token* t = 0;
3603 try
3604 {
3605 translate_globals(glob, s);
3606 output_maps(eo, glob);
3607
3608 if (s.be_derived_probes || !glob.empty())
3609 {
3610 std::vector<derived_probe *> begin_v, end_v;
3611 sort_for_bpf(s.be_derived_probes, begin_v, end_v);
3612 init_block init(glob);
3613
3614 if (!init.empty())
3615 {
3616 if (!begin_v.empty())
3617 t = begin_v[0]->tok;
3618
3619 program p;
3620 translate_init_and_probe_v(p, glob, init, begin_v);
3621 p.generate();
3622 output_probe(eo, p, "stap_begin", 0);
3623 }
3624 else if (!begin_v.empty())
3625 {
3626 t = begin_v[0]->tok;
3627 program p;
3628 translate_probe_v(p, glob, begin_v);
3629 p.generate();
3630 output_probe(eo, p, "stap_begin", 0);
3631 }
3632
3633 if (!end_v.empty())
3634 {
3635 t = end_v[0]->tok;
3636 program p;
3637 translate_probe_v(p, glob, end_v);
3638 p.generate();
3639 output_probe(eo, p, "stap_end", 0);
3640 }
3641 }
3642
3643 if (s.generic_kprobe_derived_probes)
3644 {
3645 sort_for_bpf_probe_arg_vector kprobe_v;
3646 sort_for_bpf(s.generic_kprobe_derived_probes, kprobe_v);
3647
3648 for (auto i = kprobe_v.begin(); i != kprobe_v.end(); ++i)
3649 {
3650 t = i->first->tok;
3651 program p;
3652 translate_probe(p, glob, i->first);
3653 p.generate();
3654 output_probe(eo, p, i->second, SHF_ALLOC);
3655 }
3656 }
3657
3658 if (s.perf_derived_probes)
3659 {
3660 sort_for_bpf_probe_arg_vector perf_v;
3661 sort_for_bpf(s.perf_derived_probes, perf_v);
3662
3663 for (auto i = perf_v.begin(); i != perf_v.end(); ++i)
3664 {
3665 t = i->first->tok;
3666 program p;
3667 translate_probe(p, glob, i->first);
3668 p.generate();
3669 output_probe(eo, p, i->second, SHF_ALLOC);
3670 }
3671 }
3672
3673 if (s.hrtimer_derived_probes || s.timer_derived_probes)
3674 {
3675 sort_for_bpf_probe_arg_vector timer_v;
3676 sort_for_bpf(s.hrtimer_derived_probes,
3677 s.timer_derived_probes, timer_v);
3678
3679 for (auto i = timer_v.begin(); i != timer_v.end(); ++i)
3680 {
3681 t = i->first->tok;
3682 program p;
3683 translate_probe(p, glob, i->first);
3684 p.generate();
3685 output_probe(eo, p, i->second, SHF_ALLOC);
3686 }
3687 }
3688
3689 if (s.tracepoint_derived_probes)
3690 {
3691 sort_for_bpf_probe_arg_vector trace_v;
3692 sort_for_bpf(s.tracepoint_derived_probes, trace_v);
3693
3694 for (auto i = trace_v.begin(); i != trace_v.end(); ++i)
3695 {
3696 t = i->first->tok;
3697 program p;
3698 translate_probe(p, glob, i->first);
3699 p.generate();
3700 output_probe(eo, p, i->second, SHF_ALLOC);
3701 }
3702 }
3703
3704 if (s.uprobe_derived_probes)
3705 {
3706 sort_for_bpf_probe_arg_vector uprobe_v;
3707 sort_for_bpf(s.uprobe_derived_probes, uprobe_v);
3708
3709 for (auto i = uprobe_v.begin(); i != uprobe_v.end(); ++i)
3710 {
3711 t = i->first->tok;
3712 program p;
3713 translate_probe(p, glob, i->first);
3714 p.generate();
3715 output_probe(eo, p, i->second, SHF_ALLOC);
3716 }
3717 }
3718
3719 output_kernel_version(eo, s.kernel_base_release);
3720 output_license(eo);
3721 output_stapbpf_script_name(eo, escaped_literal_string(s.script_basename()));
3722 output_symbols_sections(eo);
3723
3724 int64_t r = elf_update(eo.elf, ELF_C_WRITE_MMAP);
3725 if (r < 0)
3726 {
3727 std::clog << "Error writing output file: "
3728 << elf_errmsg(elf_errno()) << std::endl;
3729 ret = 1;
3730 }
3731 }
3732 catch (const semantic_error &e)
3733 {
3734 s.print_error(e);
3735 ret = 1;
3736 }
3737 catch (const std::runtime_error &e)
3738 {
3739 semantic_error er(ERR_SRC, _F("bpf translation failure: %s", e.what()), t);
3740 s.print_error(er);
3741 ret = 1;
3742 }
3743 catch (...)
3744 {
3745 std::cerr << "bpf translation internal error" << std::endl;
3746 ret = 1;
3747 }
3748
3749 close(fd);
3750 if (ret == 1)
3751 unlink(s.translated_source.c_str());
3752 return ret;
3753 }
This page took 0.218128 seconds and 5 git commands to generate.