我正在玩gobpf并且在计算跟踪的用户空间函数的持续时间方面存在问题。我使用bpf_ktime_get_ns()来读取时间然后尝试计算delta,但是获得了大量的数字,尽管跟踪函数只能休眠1秒。
这是经过测试的C程序,它具有一个名为“ameba”的功能。
#include <stdio.h>
#include <strings.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>
char * ameba(char * s1, char * s2);
int main(void) {
time_t rawtime;
struct tm * timeinfo;
time(&rawtime);
timeinfo = localtime(&rawtime);
printf("enter: %s", asctime (timeinfo));
printf("%s\n", ameba("lessqqmorepewpew", "good luck, have fun"));
time(&rawtime);
timeinfo = localtime(&rawtime);
printf("return: %s", asctime(timeinfo));
}
char * ameba(char * s1, char * s2) {
char *s;
s = (char *) malloc(128);
sleep(1);
snprintf(s, 128, "phrase1: %s, phrase2: %s", s1, s2);
return s;
}
去代码
package main
import (
"bytes"
"encoding/binary"
"fmt"
"os"
"os/signal"
"time"
bpf "github.com/iovisor/gobpf/bcc"
)
const source string = `
#include <uapi/linux/ptrace.h>
#include <linux/sched.h>
struct val_t {
u32 pid;
char comm[TASK_COMM_LEN];
u64 ts;
};
struct data_t {
u32 pid;
char comm[TASK_COMM_LEN];
u64 delta;
};
BPF_HASH(start, u32, struct val_t);
BPF_PERF_OUTPUT(ameba_events);
int do_entry(struct pt_regs *ctx) {
if (!PT_REGS_PARM1(ctx))
return 0;
struct val_t val = {};
u32 pid = bpf_get_current_pid_tgid();
if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) == 0) {
val.pid = bpf_get_current_pid_tgid();
val.ts = bpf_ktime_get_ns();
start.update(&pid, &val);
}
return 0;
}
int do_return(struct pt_regs *ctx) {
struct val_t *valp;
struct data_t data = {};
u64 delta;
u32 pid = bpf_get_current_pid_tgid();
u64 tsp = bpf_ktime_get_ns();
valp = start.lookup(&pid);
if (valp == 0)
return 0; // missed start
bpf_probe_read(&data.comm, sizeof(data.comm), valp->comm);
data.pid = valp->pid;
data.delta = tsp - valp->ts;
ameba_events.perf_submit(ctx, &data, sizeof(data));
start.delete(&pid);
return 0;
}
`
type amebaEvent struct {
Pid uint32
Comm [16]byte
Delta uint64
}
func main() {
m := bpf.NewModule(source, []string{})
defer m.Close()
amebaUprobe, err := m.LoadUprobe("do_entry")
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to load do_entry: %s\n", err)
os.Exit(1)
}
amebaUretprobe, err := m.LoadUprobe("do_return")
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to load do_return: %s\n", err)
os.Exit(1)
}
err = m.AttachUprobe("/tmp/sandbox/ameba", "ameba", amebaUprobe, -1)
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to attach do_entry uprobe: %s\n", err)
os.Exit(1)
}
err = m.AttachUretprobe("/tmp/sandbox/ameba", "ameba", amebaUretprobe, -1)
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to attach do_return uretprobe: %s\n", err)
os.Exit(1)
}
table := bpf.NewTable(m.TableId("ameba_events"), m)
channel := make(chan []byte)
perfMap, err := bpf.InitPerfMap(table, channel)
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to init perf map: %s\n", err)
os.Exit(1)
}
sig := make(chan os.Signal, 1)
signal.Notify(sig, os.Interrupt, os.Kill)
fmt.Printf("%10s\t%s\t%s\t%s\n", "PID", "COMMAND", "DURATION", "RAW")
go func() {
var event amebaEvent
for {
data := <-channel
err := binary.Read(bytes.NewBuffer(data), binary.LittleEndian, &event)
if err != nil {
fmt.Printf("failed to decode received data: %s\n", err)
continue
}
// Convert C string (null-terminated) to Go string
comm := string(event.Comm[:bytes.IndexByte(event.Comm[:], 0)])
fmt.Printf("%10d\t%s\t%s\t%d\n", event.Pid, comm, time.Duration(event.Delta), event.Delta)
}
}()
perfMap.Start()
<-sig
perfMap.Stop()
}
程序输出示例:
PID命令持续时间 15515 ameba 1193179h21m9.457496929s 4295445669457496929 15550 ameba 1193198h27m37.653709676s 4295514457653709676
这个错误在哪里?
这看起来像是由内核和用户端之间的paddings不匹配引起的。 data_t
结构实际上在编译时填充,等同于以下内容:
struct data_t {
u32 pid;
char padding[4];
char comm[TASK_COMM_LEN];
u64 delta;
};
如果您在Go侧明确添加相同的填充,则问题将消失:
type amebaEvent struct {
Pid uint32
Pad [4]byte
Comm [16]byte
Delta uint64
}
生产:
PID COMMAND DURATION RAW
8258 a 1.000179625s 1000179625
8260 a 1.000158337s 1000158337
正如您在评论中提到的,另一种解决方案是使用__attribute__((packed))
打包C结构以删除填充。
它与bcc:https://github.com/iovisor/bcc/issues/2017上的这个问题密切相关 - 尽管不完全相同。