我在 gin-gonic/gin Web 服务器中遇到了潜在的内存泄漏。我尝试通过创建一个简单的 /health_check 端点来复制该问题。每秒都会命中端点 /health_check。当可用 Pod 内存耗尽时,该问题会导致内存不足 (OOM) 情况。 Pod 中没有其他容器正在运行。
我还公开了 pprof 和 prometheus 指标来了解该问题,但找不到任何内容。没有看到任何其他列出的问题报告相同的问题,所以我希望有人可以帮助我隔离问题。
我没有看到堆或堆栈内存有任何增加,但进程内存不断增加。我可以使用 RSS 看到增加,并使用 pmap 看到相应的内存块,但我无法追踪为什么内存没有清除或分配的内存用于什么。
具有相关端点的简单代码示例:
package server
import (
"fmt"
"net/http"
_ "net/http/pprof"
"github.com/gin-gonic/gin"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
func setupRouter() *gin.Engine {
router := gin.New()
router.GET("/health_check", func(c *gin.Context) {
c.String(http.StatusOK, "Hello World!")
})
router.GET("/debug/pprof", gin.WrapF(http.DefaultServeMux.ServeHTTP))
router.GET("/debug/pprof/:pprofType", gin.WrapF(http.DefaultServeMux.ServeHTTP))
router.GET("/metrics", func(c *gin.Context) {
handler := promhttp.Handler()
handler.ServeHTTP(c.Writer, c.Request)
})
return router
}
func Start() {
router := setupRouter()
err := router.Run(":8080")
if err != nil {
fmt.Printf("Error starting server: %v\n", err)
}
}
package main
import (
"example.com/health-check/server"
)
func main() {
server.Start()
}
构建命令:
go build -ldflags="-s -w" -race -o health-check main.go
Pod 资源限制:
我期望内存使用量保持一致。有些波动是可以接受的,但我希望内存使用情况基本保持一致,不会出现内存不足的情况。
由于 OOM 在大约 90 分钟内持续增加内存,Pod 崩溃了。下图中的每个峰值都表示 Pod 由于 OOM 而重新启动。
协程
goroutine 8334 [running]:
runtime/pprof.writeGoroutineStacks({0x7f0e9e220b20, 0xc000506200})
/usr/local/go/src/runtime/pprof/pprof.go:703 +0x75
runtime/pprof.writeGoroutine({0x7f0e9e220b20, 0xc000506200}, 0x2)
/usr/local/go/src/runtime/pprof/pprof.go:692 +0x45
runtime/pprof.(*Profile).WriteTo(0x1509f00, {0x7f0e9e220b20, 0xc000506200}, 0xc?)
/usr/local/go/src/runtime/pprof/pprof.go:329 +0x1b1
net/http/pprof.handler.ServeHTTP({0xc000051601, 0x9}, {0x7f0e5462fd18, 0xc000506200}, 0x0?)
/usr/local/go/src/net/http/pprof/pprof.go:267 +0x58a
net/http/pprof.Index({0x7f0e5462fd18?, 0xc000506200}, 0xc000506600)
/usr/local/go/src/net/http/pprof/pprof.go:384 +0x129
net/http.HandlerFunc.ServeHTTP(0xf53ac8, {0x7f0e5462fd18, 0xc000506200}, 0xc0000b0500?)
/usr/local/go/src/net/http/server.go:2136 +0x48
net/http.(*ServeMux).ServeHTTP(0xc0004a3740?, {0x7f0e5462fd18, 0xc000506200}, 0xc000506600)
/usr/local/go/src/net/http/server.go:2514 +0xbd
example.com/health-check/server.setupRouter.WrapF.func4(0xc000506200)
/go/pkg/mod/github.com/gin-gonic/[email protected]/utils.go:42 +0x97
github.com/gin-gonic/gin.(*Context).Next(...)
/go/pkg/mod/github.com/gin-gonic/[email protected]/context.go:174
github.com/gin-gonic/gin.(*Engine).handleHTTPRequest(0xc0001871e0, 0xc000506200)
/go/pkg/mod/github.com/gin-gonic/[email protected]/gin.go:620 +0xb91
github.com/gin-gonic/gin.(*Engine).ServeHTTP(0xc0001871e0, {0x105b2e0?, 0xc00027e540}, 0xc000506600)
/go/pkg/mod/github.com/gin-gonic/[email protected]/gin.go:576 +0x425
net/http.serverHandler.ServeHTTP({0xc000600db0?}, {0x105b2e0, 0xc00027e540}, 0xc000506600)
/usr/local/go/src/net/http/server.go:2938 +0x2a2
net/http.(*conn).serve(0xc000240900, {0x105c478, 0xc0001ad5c0})
/usr/local/go/src/net/http/server.go:2009 +0xc25
created by net/http.(*Server).Serve in goroutine 1
/usr/local/go/src/net/http/server.go:3086 +0x80d
goroutine 1 [IO wait]:
internal/poll.runtime_pollWait(0x7f0e9ea9d410, 0x72)
/usr/local/go/src/runtime/netpoll.go:343 +0x85
internal/poll.(*pollDesc).wait(0xc0003a62a0, 0x4ae001?, 0x0)
/usr/local/go/src/internal/poll/fd_poll_runtime.go:84 +0xb1
internal/poll.(*pollDesc).waitRead(...)
/usr/local/go/src/internal/poll/fd_poll_runtime.go:89
internal/poll.(*FD).Accept(0xc0003a6280)
/usr/local/go/src/internal/poll/fd_unix.go:611 +0x405
net.(*netFD).accept(0xc0003a6280)
/usr/local/go/src/net/fd_unix.go:172 +0x3e
net.(*TCPListener).accept(0xc00024d480)
/usr/local/go/src/net/tcpsock_posix.go:152 +0x3e
net.(*TCPListener).Accept(0xc00024d480)
/usr/local/go/src/net/tcpsock.go:315 +0x65
net/http.(*Server).Serve(0xc00054c000, {0x105b520, 0xc00024d480})
/usr/local/go/src/net/http/server.go:3056 +0x57f
net/http.(*Server).ListenAndServe(0xc00054c000)
/usr/local/go/src/net/http/server.go:2985 +0xbd
net/http.ListenAndServe(...)
/usr/local/go/src/net/http/server.go:3239
github.com/gin-gonic/gin.(*Engine).Run(0xc0001871e0, {0xc0003bbef8, 0x1, 0x1})
/go/pkg/mod/github.com/gin-gonic/[email protected]/gin.go:386 +0x38d
example.com/health-check/server.Start()
/app/server/server.go:49 +0x52
main.main()
/app/main.go:8 +0x1d
goroutine 82 [IO wait]:
internal/poll.runtime_pollWait(0x7f0e9ea9d318, 0x72)
/usr/local/go/src/runtime/netpoll.go:343 +0x85
internal/poll.(*pollDesc).wait(0xc0002c60a0, 0xc000568000?, 0x0)
/usr/local/go/src/internal/poll/fd_poll_runtime.go:84 +0xb1
internal/poll.(*pollDesc).waitRead(...)
/usr/local/go/src/internal/poll/fd_poll_runtime.go:89
internal/poll.(*FD).Read(0xc0002c6080, {0xc000568000, 0x1000, 0x1000})
/usr/local/go/src/internal/poll/fd_unix.go:164 +0x3e5
net.(*netFD).Read(0xc0002c6080, {0xc000568000, 0x1000, 0x1000})
/usr/local/go/src/net/fd_posix.go:55 +0x4b
net.(*conn).Read(0xc000514010, {0xc000568000, 0x1000, 0x1000})
/usr/local/go/src/net/net.go:179 +0xad
net/http.(*connReader).Read(0xc0002c4450, {0xc000568000, 0x1000, 0x1000})
/usr/local/go/src/net/http/server.go:791 +0x2b5
bufio.(*Reader).fill(0xc000536d20)
/usr/local/go/src/bufio/bufio.go:113 +0x29a
bufio.(*Reader).Peek(0xc000536d20, 0x4)
/usr/local/go/src/bufio/bufio.go:151 +0xc7
net/http.(*conn).serve(0xc0002e21b0, {0x105c478, 0xc0001ad5c0})
/usr/local/go/src/net/http/server.go:2044 +0xe7c
created by net/http.(*Server).Serve in goroutine 1
/usr/local/go/src/net/http/server.go:3086 +0x80d
goroutine 8335 [IO wait]:
internal/poll.runtime_pollWait(0x7f0e9ea9d128, 0x72)
/usr/local/go/src/runtime/netpoll.go:343 +0x85
internal/poll.(*pollDesc).wait(0xc0002c62a0, 0xc000600dc1?, 0x0)
/usr/local/go/src/internal/poll/fd_poll_runtime.go:84 +0xb1
internal/poll.(*pollDesc).waitRead(...)
/usr/local/go/src/internal/poll/fd_poll_runtime.go:89
internal/poll.(*FD).Read(0xc0002c6280, {0xc000600dc1, 0x1, 0x1})
/usr/local/go/src/internal/poll/fd_unix.go:164 +0x3e5
net.(*netFD).Read(0xc0002c6280, {0xc000600dc1, 0x1, 0x1})
/usr/local/go/src/net/fd_posix.go:55 +0x4b
net.(*conn).Read(0xc0000b04d0, {0xc000600dc1, 0x1, 0x1})
/usr/local/go/src/net/net.go:179 +0xad
net/http.(*connReader).backgroundRead(0xc000600db0)
/usr/local/go/src/net/http/server.go:683 +0x83
created by net/http.(*connReader).startBackgroundRead in goroutine 8334
/usr/local/go/src/net/http/server.go:679 +0x246
感谢任何帮助或指导,非常感谢。
我们也面临着非常相似的问题。我们怀疑不调用 c.Abort() 而只使用 c.String()/c.JSON() 是否是根本原因。