Varnish 坚持向不健康的后端发送请求

Question

我正在解决 Varnish（版本 4）的问题，它只向单个后端主机发送流量，即使它不健康。

这是 VCL：

    vcl 4.0;

import std;
import directors;

include "backends.vcl";

sub vcl_init {
    call backends_init;
}

sub vcl_recv {
    set req.http.Host = regsub(req.http.Host, ":[0-9]+", "");
    unset req.http.proxy;
    set req.url = std.querysort(req.url);
    set req.url = regsub(req.url, "\?$", "");
    set req.http.Surrogate-Capability = "key=ESI/1.0";

    if (std.healthy(req.backend_hint)) {
        #set req.grace = 10s;
    }

    if (!req.http.X-Forwarded-Proto) {
        if(std.port(server.ip) == 443 || std.port(server.ip) == 8443) {
            set req.http.X-Forwarded-Proto = "https";
        } else {
            set req.http.X-Forwarded-Proto = "https";
        }
    }

    if (req.http.Upgrade ~ "(?i)websocket") {
        return (pipe);
    }

    if (req.url ~ "(\?|&)(utm_source|utm_medium|utm_campaign|utm_content|gclid|cx|ie|cof|siteurl)=") {
        set req.url = regsuball(req.url, "&(utm_source|utm_medium|utm_campaign|utm_content|gclid|cx|ie|cof|siteurl)=([A-z0-9_\-\.%25]+)", "");
        set req.url = regsuball(req.url, "\?(utm_source|utm_medium|utm_campaign|utm_content|gclid|cx|ie|cof|siteurl)=([A-z0-9_\-\.%25]+)", "?");
        set req.url = regsub(req.url, "\?&", "?");
        set req.url = regsub(req.url, "\?$", "");
    }

    if (req.method == "PURGE") {
        if (!client.ip ~ purge) {
            return (synth(405, "Cannot purge cache from here"));
        }
        return (purge);
    }

    if (req.method != "GET" &&
        req.method != "HEAD" &&
        req.method != "PUT" &&
        req.method != "POST" &&
        req.method != "TRACE" &&
        req.method != "OPTIONS" &&
        req.method != "PATCH" &&
        req.method != "DELETE") {
        return (pipe);
    }

    if (req.method != "GET" && req.method != "HEAD") {
        return (pass);
    }

    if (req.url ~ "^[^?]*\.(7z|avi|bmp|bz2|css|csv|doc|docx|eot|flac|flv|gif|gz|ico|jpeg|jpg|js|less|mka|mkv|mov|mp3|mp4|mpeg|mpg|odt|ogg|ogm|opus|otf|pdf|png|ppt|pptx|rar|rtf|svg|svgz|swf|tar|tbz|tgz|ttf|txt|txz|wav|webm|webp|woff|woff2|xls|xlsx|xml|xz|zip)(\?.*)?$") {
        unset req.http.Cookie;
        return(hash);
    }

    # Remove all cookies except for PHP session cookie
    if (req.http.Cookie) {
        set req.http.Cookie = ";" + req.http.Cookie;
        set req.http.Cookie = regsuball(req.http.Cookie, "; +", ";");
        set req.http.Cookie = regsuball(req.http.Cookie, ";(PHPSESSID|SSESS[^=]*)=", "; \1=");
        set req.http.Cookie = regsuball(req.http.Cookie, ";[^ ][^;]*", "");
        set req.http.Cookie = regsuball(req.http.Cookie, "^[; ]+|[; ]+$", "");
 
        if (req.http.Cookie == "") {
            unset req.http.Cookie;
        }
    }

    if (req.http.cookie ~ "^\s*$") {
        unset req.http.cookie;
    }
}

sub vcl_hash {
    hash_data(req.http.X-Forwarded-Proto);
}

sub vcl_backend_response {
    if (bereq.url ~ "^[^?]*\.(7z|avi|bmp|bz2|css|csv|doc|docx|eot|flac|flv|gif|gz|ico|jpeg|jpg|js|less|mka|mkv|mov|mp3|mp4|mpeg|mpg|odt|ogg|ogm|opus|otf|pdf|png|ppt|pptx|rar|rtf|svg|svgz|swf|tar|tbz|tgz|ttf|txt|txz|wav|webm|webp|woff|woff2|xls|xlsx|xml|xz|zip)(\?.*)?$") {
        unset beresp.http.Set-Cookie;
        set beresp.ttl = 1d;
    }

    if (beresp.http.Surrogate-Control ~ "ESI/1.0") {
        unset beresp.http.Surrogate-Control;
        set beresp.do_esi = true;
    }

    set beresp.grace = 6h;
}

以及上面默认VCL顶部引用的后端VCL

backend Server1 {
        # Instance: i-XXXXXXXXXXXXXXXXXX
        .host = "10.135.49.20";

        .port = "80";
        .max_connections = 300; # That's it
        .probe = {
                #.url = "/"; # short easy way (GET /)
                # We prefer to only do a HEAD /
                .request =
                        "HEAD / HTTP/1.1"
                        "Host: localhost"
                        "Connection: close"
                        "User-Agent: Varnish Health Probe";
                .interval = 10s; # check the health of each backend every 5 seconds
                .timeout = 5s; # timing out after 1 second.
                # If 3 out of the last 5 polls succeeded the backend is considered healthy, otherwise it will be marked as sick
                .window = 5;
                .threshold = 3;
                }
        .first_byte_timeout     = 90s;   # How long to wait before we receive a first byte from our backend?
        .connect_timeout        = 5s;    # How long to wait for a backend connection?
        .between_bytes_timeout  = 2s;    # How long to wait between bytes received from our backend?


}
backend Server2 {
        # Instance: i-XXXXXXXXXXXXXX
        .host = "10.135.49.137";

        .port = "80";
        .max_connections = 300; # That's it
        .probe = {
                #.url = "/"; # short easy way (GET /)
                # We prefer to only do a HEAD /
                .request =
                        "HEAD / HTTP/1.1"
                        "Host: localhost"
                        "Connection: close"
                        "User-Agent: Varnish Health Probe";
                .interval = 10s; # check the health of each backend every 5 seconds
                .timeout = 5s; # timing out after 1 second.
                # If 3 out of the last 5 polls succeeded the backend is considered healthy, otherwise it will be marked as sick
                .window = 5;
                .threshold = 3;
                }
        .first_byte_timeout     = 90s;   # How long to wait before we receive a first byte from our backend?
        .connect_timeout        = 5s;    # How long to wait for a backend connection?
        .between_bytes_timeout  = 2s;    # How long to wait between bytes received from our backend?


}


sub backends_init {
        new vdir = directors.round_robin();

        vdir.add_backend(Server1);
        vdir.add_backend(Server2);
}

acl purge {
        "localhost";
        "10.135.49.20";
        "10.135.49.137";
        "10.135.48.160";
}

如果我查看后端运行状况，我会看到以下内容；

varnishlog -g raw -i backend_health
         0 Backend_health - Server1 Still sick ------- 0 3 5 0.000000 0.002185
         0 Backend_health - Server2 Still healthy 4--X-RH 5 3 5 0.001479 0.001719 HTTP/1.1 200 OK
         0 Backend_health - Server1 Still sick ------- 0 3 5 0.000000 0.002185
         0 Backend_health - Server2 Still healthy 4--X-RH 5 3 5 0.001306 0.001616 HTTP/1.1 200 OK

一台生病的服务器，一台健康的服务器，这是预期的，因为我已经在 Server1 上停止了 Apache 然而，当我请求页面时，我收到 503 后端获取失败。如果我要终止 Server1 实例，它将自动从后端配置中删除，并且 varnish 将开始将流量路由到单个健康主机。如果我添加第二个后端主机，流量仍然只会发送到一个后端，如果我使该后端不健康，我将收到 503 错误。

我认为问题一定出在我的后端配置上，但我可以看到问题出在哪里。

Answer 1

我知道它必须是简单的事情，而且确实如此。

我错过了 set req.backend_hint = vdir.backend();在 sub vcl_recv 内部，下面是完整的默认 VCL，其中包含修复程序。

vcl 4.0;

import std;
import directors;

include "backends.vcl";

sub vcl_init {
    call backends_init;
}

sub vcl_recv {
    set req.http.Host = regsub(req.http.Host, ":[0-9]+", "");
    unset req.http.proxy;
    set req.url = std.querysort(req.url);
    set req.url = regsub(req.url, "\?$", "");
    set req.http.Surrogate-Capability = "key=ESI/1.0";

    if (std.healthy(req.backend_hint)) {
        #set req.grace = 10s;
    }

    set req.backend_hint = vdir.backend();

    if (!req.http.X-Forwarded-Proto) {
        if(std.port(server.ip) == 443 || std.port(server.ip) == 8443) {
            set req.http.X-Forwarded-Proto = "https";
        } else {
            set req.http.X-Forwarded-Proto = "https";
        }
    }

    if (req.http.Upgrade ~ "(?i)websocket") {
        return (pipe);
    }

    if (req.url ~ "(\?|&)(utm_source|utm_medium|utm_campaign|utm_content|gclid|cx|ie|cof|siteurl)=") {
        set req.url = regsuball(req.url, "&(utm_source|utm_medium|utm_campaign|utm_content|gclid|cx|ie|cof|siteurl)=([A-z0-9_\-\.%25]+)", "");
        set req.url = regsuball(req.url, "\?(utm_source|utm_medium|utm_campaign|utm_content|gclid|cx|ie|cof|siteurl)=([A-z0-9_\-\.%25]+)", "?");
        set req.url = regsub(req.url, "\?&", "?");
        set req.url = regsub(req.url, "\?$", "");
    }

    if (req.method == "PURGE") {
        if (!client.ip ~ purge) {
            return (synth(405, "Cannot purge cache from here"));
        }
        return (purge);
    }

    if (req.method != "GET" &&
        req.method != "HEAD" &&
        req.method != "PUT" &&
        req.method != "POST" &&
        req.method != "TRACE" &&
        req.method != "OPTIONS" &&
        req.method != "PATCH" &&
        req.method != "DELETE") {
        return (pipe);
    }

    if (req.method != "GET" && req.method != "HEAD") {
        return (pass);
    }

    if (req.url ~ "^[^?]*\.(7z|avi|bmp|bz2|css|csv|doc|docx|eot|flac|flv|gif|gz|ico|jpeg|jpg|js|less|mka|mkv|mov|mp3|mp4|mpeg|mpg|odt|ogg|ogm|opus|otf|pdf|png|ppt|pptx|rar|rtf|svg|svgz|swf|tar|tbz|tgz|ttf|txt|txz|wav|webm|webp|woff|woff2|xls|xlsx|xml|xz|zip)(\?.*)?$") {
        unset req.http.Cookie;
        return(hash);
    }

    # Remove all cookies except for PHP session cookie
    if (req.http.Cookie) {
        set req.http.Cookie = ";" + req.http.Cookie;
        set req.http.Cookie = regsuball(req.http.Cookie, "; +", ";");
        set req.http.Cookie = regsuball(req.http.Cookie, ";(PHPSESSID|SSESS[^=]*)=", "; \1=");
        set req.http.Cookie = regsuball(req.http.Cookie, ";[^ ][^;]*", "");
        set req.http.Cookie = regsuball(req.http.Cookie, "^[; ]+|[; ]+$", "");
 
        if (req.http.Cookie == "") {
            unset req.http.Cookie;
        }
    }

    if (req.http.cookie ~ "^\s*$") {
        unset req.http.cookie;
    }
}

sub vcl_hash {
    hash_data(req.http.X-Forwarded-Proto);
}

sub vcl_backend_response {
    if (bereq.url ~ "^[^?]*\.(7z|avi|bmp|bz2|css|csv|doc|docx|eot|flac|flv|gif|gz|ico|jpeg|jpg|js|less|mka|mkv|mov|mp3|mp4|mpeg|mpg|odt|ogg|ogm|opus|otf|pdf|png|ppt|pptx|rar|rtf|svg|svgz|swf|tar|tbz|tgz|ttf|txt|txz|wav|webm|webp|woff|woff2|xls|xlsx|xml|xz|zip)(\?.*)?$") {
        unset beresp.http.Set-Cookie;
        set beresp.ttl = 1d;
    }

    if (beresp.http.Surrogate-Control ~ "ESI/1.0") {
        unset beresp.http.Surrogate-Control;
        set beresp.do_esi = true;
    }

    set beresp.grace = 6h;
}

Varnish 坚持向不健康的后端发送请求

问题描述投票：0回答：1

1个回答

最新问题

Varnish 坚持向不健康的后端发送请求

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1