td-agent ruby 进程以 100% CPU 运行。代理工作并输出预期结果,但消耗的系统资源正在减慢机器速度。
通过 systemctl 重新启动代理不起作用,因为进程没有结束并且服务卡在“停止”状态。进程也不会使用 killall 或 pkill 响应 SIGTERM 或 SIGKILL。重新启动无法修复,一旦进程在启动时启动,它就会再次跳转到 100%。 (我应该补充一点,这个相同的配置和权限方案正在 125 多台其他机器上运行,只有 2 台有问题)
我的配置
## match tag=debug.** and dump to console
<match debug.**>
@type stdout
@id output_stdout
</match>
####
## HTTP Source:
# HTTP input
# POST http://localhost:8888/<tag>?json=<json>
# POST http://localhost:8888/td.myapp.login?json={"user"%3A"me"}
# @see http://docs.fluentd.org/articles/in_http
<source>
@type http
@id input_http
port 8888
</source>
####
## live debugging agent:
<source>
@type debug_agent
@id input_debug_agent
bind 127.0.0.1
port 24230
</source>
####
##ALL LOGS
##input
#Source
<source>
@type tail
@id input_tail_all
<parse>
@type none
</parse>
path /var/log/*log
pos_file /var/log/td-agent/tmp/all.log.pos
pos_file_compaction_interval 72h
enable_stat_watcher false
tag td.*
</source>
##Clean
#silence datadog
<filter td.var.log.syslog>
@type grep
<and>
<exclude>
key message
pattern pkg/collector/python/datadog_agent
</exclude>
</and>
</filter>
<filter td.var.log.syslog>
@type grep
<and>
<exclude>
key message
pattern chef-client
</exclude>
</and>
</filter>
############ This portion is only used for testing. Uncomment if you need to output all logs flagged by td-agent #############
# ##output
# <match td.var.**>
# @type file
# @id output_file
# <buffer>
# @type file
# timekey 1h
# timekey_use_utc true
# </buffer>
# path /var/log/fluent/all_logs/
# </match>
################################################################################################################################
####
##User Logins
#Source files
<source>
@type tail
@id in_tail_logins
<parse>
@type syslog
parser_type regexp
</parse>
path /var/log/auth.log
pos_file /var/log/td-agent/tmp/auth.log.pos
pos_file_compaction_interval 72h
enable_stat_watcher false
tag td.auth
</source>
##clean
#silence docker
<filter td.auth>
@type grep
<exclude>
key message
pattern docker
</exclude>
</filter>
##tag sub-types
#
<match td.auth>
@type rewrite_tag_filter
<rule>
key message
pattern /COMMAND/
tag sudo
</rule>
<rule>
key message
pattern /Accepted.publickey.+ssh.+/
tag ssh
</rule>
<rule>
key message
pattern /.nx\:session.*session.opened+/
tag nomachine
</rule>
<rule>
key message
pattern /session.closed.for.user+/
tag logout
</rule>
</match>
##transform output
#transform sudo
<filter sudo>
@type parser
key_name message
<parse>
@type regexp
expression /(?<user>[^ ]+)[^ ]* [^ ]* [^ ]* [^ ]* [^ ]* ; USER=(?<sudoer>[^ ]+) ; COMMAND=(?<command>.*)$/
</parse>
</filter>
#transform ssh
<filter ssh>
@type parser
key_name message
<parse>
@type regexp
expression /(?<user>(?<=for ).[^ ]+)/
</parse>
</filter>
<filter ssh>
@type record_transformer
<record>
login-type ssh
</record>
</filter>
#transform nomachine
<filter nomachine>
@type parser
key_name message
<parse>
@type regexp
expression /(?<user>(?<=for.user ).[^ ]+)/
</parse>
</filter>
<filter nomachine>
@type record_transformer
<record>
login-type nomachine
</record>
</filter>
#transform logout
<filter logout>
@type grep
<exclude>
key message
pattern /root/
</exclude>
</filter>
<filter logout>
@type grep
<exclude>
key message
pattern /cron/
</exclude>
</filter>
<filter logout>
@type grep
<exclude>
key message
pattern /su/
</exclude>
</filter>
<filter logout>
@type parser
key_name message
<parse>
@type regexp
expression /(?<user>(?<=for.user ).[^ ]+)/
</parse>
</filter>
<filter logout>
@type record_transformer
<record>
login-type logout
</record>
</filter>
##output
#output sudo
<match sudo>
@type file
@id auth_output_file
<buffer>
timekey 1d
timekey_use_utc true
</buffer>
path /var/log/fluent/sudo/
</match>
#output ssh nomachine
<match ssh nomachine logout>
@type file
@id ssh_login_output
<buffer>
timekey 1d
timekey_use_utc true
</buffer>
path /var/log/fluent/logins/
</match>
STRACE 样本
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
futex(0x7f5700328e94, FUTEX_WAKE_PRIVATE, 1) = 1
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
futex(0x7f5700328e90, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f570781ea80, FUTEX_WAIT_PRIVATE, 0, NULL) = 0
futex(0x7f570781ea10, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x7f570781ea10, FUTEX_WAKE_PRIVATE, 1) = 0
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
futex(0x7f5700328e94, FUTEX_WAKE_PRIVATE, 1) = 1
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
futex(0x7f570781ea10, FUTEX_WAIT_PRIVATE, 2, NULL) = -1 EAGAIN (Resource temporarily unavailable)
futex(0x7f570781ea10, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x7f570781ea10, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x7f570781ea84, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f570781ea10, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f5700328e90, FUTEX_WAKE_PRIVATE, 1) = 1
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
futex(0x7f5700328e94, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f570781ea80, FUTEX_WAIT_PRIVATE, 0, NULL) = 0
futex(0x7f570781ea10, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x7f570781ea10, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x7f5700328e90, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f570781ea10, FUTEX_WAKE_PRIVATE, 1) = 1
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
futex(0x7f5707858214, FUTEX_WAIT_PRIVATE, 0, {tv_sec=0, tv_nsec=31753510}) = 0
futex(0x7f570781ea10, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x7f570781ea84, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f570781ea10, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f5700328e94, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f570781ea10, FUTEX_WAKE_PRIVATE, 1) = 1
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
futex(0x7f5700328b90, FUTEX_WAKE_PRIVATE, 1) = 1
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
futex(0x7f5700328b94, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f570781ea80, FUTEX_WAIT_PRIVATE, 0, NULL) = 0
futex(0x7f570781ea10, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x7f5707858214, FUTEX_WAIT_PRIVATE, 0, {tv_sec=0, tv_nsec=41667651}) = 0
futex(0x7f570781ea10, FUTEX_WAKE_PRIVATE, 1) = 0
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
futex(0x7f5700328e90, FUTEX_WAKE_PRIVATE, 1) = 1
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
futex(0x7f5700328e94, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f570781ea84, FUTEX_WAIT_PRIVATE, 0, NULL) = 0
futex(0x7f570781ea10, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x7f570781ea10, FUTEX_WAKE_PRIVATE, 1) = 0
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
read(29, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192