Grafana Loki 失败的 loki-write pod - terraform iac

问题描述 投票:0回答:1

我正在尝试使用 Terraform 和配置来部署 Grafana Loki:

洛基.tf

resource "helm_release" "loki" {
  name       = "loki"
  repository = "https://grafana.github.io/helm-charts"
  chart      = "loki"
  version    = "5.10.0"
  
  values = [
    templatefile("${path.module}/templates/loki.yaml", {

    })
  ]
}

resource "helm_release" "promtail" {
  chart      = "promtail"
  name       = "promtail"
  repository = "https://grafana.github.io/helm-charts"
  version    = "6.15.5"
  values = [
    templatefile("${path.module}/templates/promtail.yaml", {
      loki_svc = "${helm_release.loki.name}"
    })
  ]
  depends_on = [helm_release.loki]
}

loki.yml

auth_enabled: true

server:
  http_listen_port: 3100

common:
  ring:
    instance_addr: 127.0.0.1
    kvstore:
      store: inmemory
  replication_factor: 1
  path_prefix: /loki

schema_config:
  configs:
  - from: 2020-05-15
    store: tsdb
    object_store: s3
    schema: v13
    index:
      prefix: index_
      period: 24h

storage_config:
  aws:
    bucketnames: {name-of-my-bucket}
    endpoint: s3.us-east-1.amazonaws.com
    region: us-east-1
    access_key_id: {my-acces-key}
    secret_access_key: {my-secret-key}
    insecure: false
    http_config:
      idle_conn_timeout: 90s
      response_header_timeout: 0s
      insecure_skip_verify: false
    #s3forcepathstyle: false

grafana.tf

resource "helm_release" "grafana" {
  chart      = "grafana"
  name       = "grafana"
  repository = "https://grafana.github.io/helm-charts"
  version    = "6.33.1"
  values = [
    templatefile("${path.module}/templates/grafana-values.yaml", {
      admin_existing_secret = kubernetes_secret.grafana.metadata[0].name
      admin_user_key        = "{my-user-key}"
      admin_password_key    = "{my-password-key"
      prometheus_svc        = "${helm_release.prometheus.name}-server"
      loki_svc              = "${helm_release.loki.name}"
      replicas              = 1
      root_url              = "/grafana"
    })
  ]
  depends_on = [
    helm_release.prometheus,
    helm_release.loki,
    helm_release.promtail
  ]
}

grafana-values.yaml

datasources:
  datasources.yaml:
    apiVersion: 1
    datasources:
    - name: Prometheus
      type: prometheus
      url: http://${prometheus_svc}
      access: proxy
      isDefault: true
    - name: Loki
      type: loki
      access: proxy
      url: http://loki-gateway.default.svc.cluster.local
      jsonData:
        httpHeaderName1: 'X-Scope-OrgID'
        maxLines: 1000
      secureJsonData:
        httpHeaderValue1: '1'
        tlsCACert: ""
        tlsClientCert: ""
        tlsClientKey: ""
      version: 1

promtail.yaml

--- # Daemonset.yaml
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: promtail-daemonset
spec:
  selector:
    matchLabels:
      name: promtail
  template:
    metadata:
      labels:
        name: promtail
    spec:
      serviceAccount: promtail-serviceaccount
      containers:
      - name: promtail-container
        image: grafana/promtail
        args:
        - -config.file=/etc/promtail/promtail.yaml
        env: 
        - name: 'my-website-url' # needed when using kubernetes_sd_configs
          valueFrom:
            fieldRef:
              fieldPath: 'spec.nodeName'
        volumeMounts:
        - name: logs
          mountPath: /var/log
        - name: promtail-config
          mountPath: /etc/promtail
        - mountPath: /var/lib/docker/containers
          name: varlibdockercontainers
          readOnly: true
      volumes:
      - name: logs
        hostPath:
          path: /var/log
      - name: varlibdockercontainers
        hostPath:
          path: /var/lib/docker/containers
      - name: promtail-config
        configMap:
          name: promtail-config
--- # configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: promtail-config
data:
  promtail.yaml: |
    server:
      http_listen_port: 9080
      grpc_listen_port: 0

    clients:
    - url: https://loki-gateway/loki/api/v1/push

    positions:
      filename: /tmp/positions.yaml
    target_config:
      sync_period: 10s
    scrape_configs:
    - job_name: pod-logs
      kubernetes_sd_configs:
        - role: pod
      pipeline_stages:
        - docker: {}
      relabel_configs:
        - source_labels:
            - __meta_kubernetes_pod_node_name
          target_label: __host__
        - action: labelmap
          regex: __meta_kubernetes_pod_label_(.+)
        - action: replace
          replacement: $1
          separator: /
          source_labels:
            - __meta_kubernetes_namespace
            - __meta_kubernetes_pod_name
          target_label: job
        - action: replace
          source_labels:
            - __meta_kubernetes_namespace
          target_label: namespace
        - action: replace
          source_labels:
            - __meta_kubernetes_pod_name
          target_label: pod
        - action: replace
          source_labels:
            - __meta_kubernetes_pod_container_name
          target_label: container
        - replacement: /var/log/pods/*$1/*.log
          separator: /
          source_labels:
            - __meta_kubernetes_pod_uid
            - __meta_kubernetes_pod_container_name
          target_label: __path__

--- # Clusterrole.yaml
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: promtail-clusterrole
rules:
  - apiGroups: [""]
    resources:
    - nodes
    - services
    - pods
    verbs:
    - get
    - watch
    - list

--- # ServiceAccount.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
  name: promtail-serviceaccount

--- # Rolebinding.yaml
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: promtail-clusterrolebinding
subjects:
    - kind: ServiceAccount
      name: promtail-serviceaccount
      namespace: default
roleRef:
    kind: ClusterRole
    name: promtail-clusterrole
    apiGroup: rbac.authorization.k8s.io

我尝试了多个 loki.yaml 文件,但可以绕过这个。现在错误是:

level=error ts=2024-04-23T12:54:33.875038295Z caller=flush.go:144 org_id=self-monitoring msg="刷新失败" err="刷新块失败:存储放置块:RequestError:发送请求失败 造成的:把 \"https://chunks.s3.dummy.amazonaws.com/self-monitoring/904272f770f14454/18ec8593247%3A18ec8638029%3A20e8c351%5C%5C": 拨打 tcp: 查找 chunks.s3.dummy.amazonaws。 172.20.0.10:53 上的 com:没有这样的主机,num_chunks:1,标签:{app_kubernetes_io_component=\"read\",app_kubernetes_io_instance=\"loki\",app_kubernetes_io_name=\"loki\",app_kubernetes_io_part_of=\"memberlist\" ,集群=\“loki\”,容器=\“loki\”,文件名=\“/var/log/pods/default_loki-read-84566c7646-lkfjw_19badfe5-087a-458d-a421-58b580e79cdd/loki/0.log\ ”,作业=\“默认/loki-read\”,命名空间=\“默认\”,pod=\“loki-read-84566c7646-lkfjw\”,pod_template_hash=\“84566c7646\”,流=\“stderr\” “}”

我在这里和不同的网站上看到了真正的问题,但没有任何帮助。我知道可以将日志添加到 s3 存储桶,但我不知道如何解决这个问题。

amazon-s3 terraform grafana-loki infrastructure-as-code promtail
1个回答
0
投票

我不知道为什么,但我首先添加了

loki:  
  commonConfig:
    replication_factor: 3  
  storage:  
    bucketNames:  
      chunks: chunks  
      ruler: ruler  
      admin: admin  
    type: 's3'  
    s3:  
      endpoint: s3.us-east-1.amazonaws.com  
      region: us-east-1  
      secretAccessKey: {key}  
      accessKeyId: {KeyID}  
      s3ForcePathStyle: false  
      insecure: false  
singleBinary:  
  replicas: 3

之后我运行 terraform apply 然后我添加了其余的代码,然后所有代码都开始工作。

希望这对某人有帮助

© www.soinside.com 2019 - 2024. All rights reserved.