From a58390d23514c4fe9744456a7fb4043449c8d369 Mon Sep 17 00:00:00 2001 From: Sagar Patel Date: Thu, 29 Aug 2019 11:32:17 -0400 Subject: [PATCH] PR24926: correct printing of utf-8 characters on stapbpf There were two bugs corrupting the string bytes and instructions. The first bug involved the implicit sign extension of negative char values. The second bug involved a faulty optimization (fixup_operands) which used the incorrect instruction opcode. 1) Cast char to unsigned char before casting to uint32_t. 2) Changed opcode of optimized instruction to (BPF_STX | BPF_MEM | BPF_W). --- bpf-opt.cxx | 6 +++++- bpf-translate.cxx | 7 ++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/bpf-opt.cxx b/bpf-opt.cxx index 5c7acb6b9..225d1d746 100644 --- a/bpf-opt.cxx +++ b/bpf-opt.cxx @@ -136,7 +136,11 @@ fixup_operands(program &p) insn_before_inserter ins(b, j, "opt"); p.mk_mov(ins, n, s1); j->src1 = s1 = n; - } + + // Since the content is in the src register, we need + // to use BPF_STX instead of BPF_ST + j->code = BPF_STX | BPF_MEM | BPF_W; + } if (value *s0 = j->src0) { diff --git a/bpf-translate.cxx b/bpf-translate.cxx index 2cd097663..11ccd7627 100644 --- a/bpf-translate.cxx +++ b/bpf-translate.cxx @@ -2869,8 +2869,13 @@ emit_simple_literal_str(program &this_prog, insn_inserter &this_ins, if (i * 4 + j < str_bytes - 1) { // ??? assuming little-endian target - word |= (uint32_t)src[i * 4 + j] << (j * 8); + // + // Must cast each signed char in src to unsigned char first + // in order to avoid the implicit sign extension resulting + // from the uint32_t cast. + word |= ((uint32_t)(unsigned char)src[i * 4 + j]) << (j * 8); } + this_prog.mk_st(this_ins, BPF_W, dest, (int32_t)i * 4 + ofs, this_prog.new_imm(word)); -- 2.43.5