fluentd-gcp-configmap-old.yaml

Documentation: k8s.io/kubernetes/cluster/addons/fluentd-gcp

     1# This ConfigMap is used to ingest logs against old resources like
     2# "gke_container" and "gce_instance" when $LOGGING_STACKDRIVER_RESOURCE_TYPES is
     3# set to "old".
     4# When $LOGGING_STACKDRIVER_RESOURCE_TYPES is set to "new", the ConfigMap in
     5# fluentd-gcp-configmap.yaml will be used for ingesting logs against new
     6# resources like "k8s_container" and "k8s_node".
     7kind: ConfigMap
     8apiVersion: v1
     9data:
    10  containers.input.conf: |-
    11    # This configuration file for Fluentd is used
    12    # to watch changes to Docker log files that live in the
    13    # directory /var/lib/docker/containers/ and are symbolically
    14    # linked to from the /var/log/containers directory using names that capture the
    15    # pod name and container name. These logs are then submitted to
    16    # Google Cloud Logging which assumes the installation of the cloud-logging plug-in.
    17    #
    18    # Example
    19    # =======
    20    # A line in the Docker log file might look like this JSON:
    21    #
    22    # {"log":"2014/09/25 21:15:03 Got request with path wombat\\n",
    23    #  "stream":"stderr",
    24    #   "time":"2014-09-25T21:15:03.499185026Z"}
    25    #
    26    # The original tag is derived from the log file's location.
    27    # For example a Docker container's logs might be in the directory:
    28    #  /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b
    29    # and in the file:
    30    #  997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log
    31    # where 997599971ee6... is the Docker ID of the running container.
    32    # The Kubernetes kubelet makes a symbolic link to this file on the host
    33    # machine in the /var/log/containers directory which includes the pod name,
    34    # the namespace name and the Kubernetes container name:
    35    #    synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
    36    #    ->
    37    #    /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log
    38    # The /var/log directory on the host is mapped to the /var/log directory in the container
    39    # running this instance of Fluentd and we end up collecting the file:
    40    #   /var/log/containers/synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
    41    # This results in the tag:
    42    #  var.log.containers.synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
    43    # where 'synthetic-logger-0.25lps-pod' is the pod name, 'default' is the
    44    # namespace name, 'synth-lgr' is the container name and '997599971ee6..' is
    45    # the container ID.
    46    # The record reformer is used is discard the var.log.containers prefix and
    47    # the Docker container ID suffix and "kubernetes." is pre-pended giving the tag:
    48    #   kubernetes.synthetic-logger-0.25lps-pod_default_synth-lgr
    49    # Tag is then parsed by google_cloud plugin and translated to the metadata,
    50    # visible in the log viewer
    51
    52    # Json Log Example:
    53    # {"log":"[info:2016-02-16T16:04:05.930-08:00] Some log text here\n","stream":"stdout","time":"2016-02-17T00:04:05.931087621Z"}
    54    # CRI Log Example:
    55    # 2016-02-17T00:04:05.931087621Z stdout F [info:2016-02-16T16:04:05.930-08:00] Some log text here
    56    <source>
    57      @type tail
    58      path /var/log/containers/*.log
    59      pos_file /var/log/gcp-containers.log.pos
    60      # Tags at this point are in the format of:
    61      # reform.var.log.containers.<POD_NAME>_<NAMESPACE_NAME>_<CONTAINER_NAME>-<CONTAINER_ID>.log
    62      tag reform.*
    63      read_from_head true
    64      <parse>
    65        @type multi_format
    66        <pattern>
    67          format json
    68          time_key time
    69          time_format %Y-%m-%dT%H:%M:%S.%NZ
    70        </pattern>
    71        <pattern>
    72          format /^(?<time>.+) (?<stream>stdout|stderr) [^ ]* (?<log>.*)$/
    73          time_format %Y-%m-%dT%H:%M:%S.%N%:z
    74        </pattern>
    75      </parse>
    76    </source>
    77
    78    <filter reform.**>
    79      @type parser
    80      format /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<log>.*)/
    81      reserve_data true
    82      suppress_parse_error_log true
    83      emit_invalid_record_to_error false
    84      key_name log
    85    </filter>
    86
    87    <match reform.**>
    88      @type record_reformer
    89      enable_ruby true
    90      # Tags at this point are in the format of:
    91      # 'raw.kubernetes.<POD_NAME>_<NAMESPACE_NAME>_<CONTAINER_NAME>'.
    92      tag raw.kubernetes.${tag_suffix[4].split('-')[0..-2].join('-')}
    93    </match>
    94
    95    # Detect exceptions in the log output and forward them as one log entry.
    96    <match raw.kubernetes.**>
    97      @type detect_exceptions
    98
    99      remove_tag_prefix raw
   100      message log
   101      stream stream
   102      multiline_flush_interval 5
   103      max_bytes 500000
   104      max_lines 1000
   105    </match>
   106  system.input.conf: |-
   107    # Example:
   108    # Dec 21 23:17:22 gke-foo-1-1-4b5cbd14-node-4eoj startupscript: Finished running startup script /var/run/google.startup.script
   109    <source>
   110      @type tail
   111      format syslog
   112      path /var/log/startupscript.log
   113      pos_file /var/log/gcp-startupscript.log.pos
   114      tag startupscript
   115    </source>
   116
   117    # Examples:
   118    # time="2016-02-04T06:51:03.053580605Z" level=info msg="GET /containers/json"
   119    # time="2016-02-04T07:53:57.505612354Z" level=error msg="HTTP Error" err="No such image: -f" statusCode=404
   120    # TODO(random-liu): Remove this after cri container runtime rolls out.
   121    <source>
   122      @type tail
   123      format /^time="(?<time>[^)]*)" level=(?<severity>[^ ]*) msg="(?<message>[^"]*)"( err="(?<error>[^"]*)")?( statusCode=($<status_code>\d+))?/
   124      path /var/log/docker.log
   125      pos_file /var/log/gcp-docker.log.pos
   126      tag docker
   127    </source>
   128
   129    # Example:
   130    # 2016/02/04 06:52:38 filePurge: successfully removed file /var/etcd/data/member/wal/00000000000006d0-00000000010a23d1.wal
   131    <source>
   132      @type tail
   133      # Not parsing this, because it doesn't have anything particularly useful to
   134      # parse out of it (like severities).
   135      format none
   136      path /var/log/etcd.log
   137      pos_file /var/log/gcp-etcd.log.pos
   138      tag etcd
   139    </source>
   140
   141    # Multi-line parsing is required for all the kube logs because very large log
   142    # statements, such as those that include entire object bodies, get split into
   143    # multiple lines by glog.
   144
   145    # Example:
   146    # I0204 07:32:30.020537    3368 server.go:1048] POST /stats/container/: (13.972191ms) 200 [[Go-http-client/1.1] 10.244.1.3:40537]
   147    <source>
   148      @type tail
   149      format multiline
   150      multiline_flush_interval 5s
   151      format_firstline /^\w\d{4}/
   152      format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
   153      time_format %m%d %H:%M:%S.%N
   154      path /var/log/kubelet.log
   155      pos_file /var/log/gcp-kubelet.log.pos
   156      tag kubelet
   157    </source>
   158
   159    # Example:
   160    # I1118 21:26:53.975789       6 proxier.go:1096] Port "nodePort for kube-system/default-http-backend:http" (:31429/tcp) was open before and is still needed
   161    <source>
   162      @type tail
   163      format multiline
   164      multiline_flush_interval 5s
   165      format_firstline /^\w\d{4}/
   166      format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
   167      time_format %m%d %H:%M:%S.%N
   168      path /var/log/kube-proxy.log
   169      pos_file /var/log/gcp-kube-proxy.log.pos
   170      tag kube-proxy
   171    </source>
   172
   173    # Example:
   174    # I0204 07:00:19.604280       5 handlers.go:131] GET /api/v1/nodes: (1.624207ms) 200 [[kube-controller-manager/v1.1.3 (linux/amd64) kubernetes/6a81b50] 127.0.0.1:38266]
   175    <source>
   176      @type tail
   177      format multiline
   178      multiline_flush_interval 5s
   179      format_firstline /^\w\d{4}/
   180      format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
   181      time_format %m%d %H:%M:%S.%N
   182      path /var/log/kube-apiserver.log
   183      pos_file /var/log/gcp-kube-apiserver.log.pos
   184      tag kube-apiserver
   185    </source>
   186
   187    # Example:
   188    # I0204 06:55:31.872680       5 servicecontroller.go:277] LB already exists and doesn't need update for service kube-system/kube-ui
   189    <source>
   190      @type tail
   191      format multiline
   192      multiline_flush_interval 5s
   193      format_firstline /^\w\d{4}/
   194      format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
   195      time_format %m%d %H:%M:%S.%N
   196      path /var/log/kube-controller-manager.log
   197      pos_file /var/log/gcp-kube-controller-manager.log.pos
   198      tag kube-controller-manager
   199    </source>
   200
   201    # Example:
   202    # W0204 06:49:18.239674       7 reflector.go:245] pkg/scheduler/factory/factory.go:193: watch of *api.Service ended with: 401: The event in requested index is outdated and cleared (the requested history has been cleared [2578313/2577886]) [2579312]
   203    <source>
   204      @type tail
   205      format multiline
   206      multiline_flush_interval 5s
   207      format_firstline /^\w\d{4}/
   208      format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
   209      time_format %m%d %H:%M:%S.%N
   210      path /var/log/kube-scheduler.log
   211      pos_file /var/log/gcp-kube-scheduler.log.pos
   212      tag kube-scheduler
   213    </source>
   214
   215    # Example:
   216    # I0603 15:31:05.793605       6 cluster_manager.go:230] Reading config from path /etc/gce.conf
   217    <source>
   218      @type tail
   219      format multiline
   220      multiline_flush_interval 5s
   221      format_firstline /^\w\d{4}/
   222      format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
   223      time_format %m%d %H:%M:%S.%N
   224      path /var/log/glbc.log
   225      pos_file /var/log/gcp-glbc.log.pos
   226      tag glbc
   227    </source>
   228
   229    # Example:
   230    # I0603 15:31:05.793605       6 cluster_manager.go:230] Reading config from path /etc/gce.conf
   231    <source>
   232      @type tail
   233      format multiline
   234      multiline_flush_interval 5s
   235      format_firstline /^\w\d{4}/
   236      format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
   237      time_format %m%d %H:%M:%S.%N
   238      path /var/log/cluster-autoscaler.log
   239      pos_file /var/log/gcp-cluster-autoscaler.log.pos
   240      tag cluster-autoscaler
   241    </source>
   242
   243    # Logs from systemd-journal for interesting services.
   244    # TODO(random-liu): Keep this for compatibility, remove this after
   245    # cri container runtime rolls out.
   246    <source>
   247      @type systemd
   248      filters [{ "_SYSTEMD_UNIT": "docker.service" }]
   249      pos_file /var/log/gcp-journald-docker.pos
   250      read_from_head true
   251      tag docker
   252    </source>
   253
   254    <source>
   255      @type systemd
   256      filters [{ "_SYSTEMD_UNIT": "{{ fluentd_container_runtime_service }}.service" }]
   257      pos_file /var/log/gcp-journald-container-runtime.pos
   258      read_from_head true
   259      tag container-runtime
   260    </source>
   261
   262    <source>
   263      @type systemd
   264      filters [{ "_SYSTEMD_UNIT": "kubelet.service" }]
   265      pos_file /var/log/gcp-journald-kubelet.pos
   266      read_from_head true
   267      tag kubelet
   268    </source>
   269
   270    <source>
   271      @type systemd
   272      filters [{ "_SYSTEMD_UNIT": "node-problem-detector.service" }]
   273      pos_file /var/log/gcp-journald-node-problem-detector.pos
   274      read_from_head true
   275      tag node-problem-detector
   276    </source>
   277
   278    # BEGIN_NODE_JOURNAL
   279    # Whether to include node-journal or not is determined when starting the
   280    # cluster. It is not changed when the cluster is already running.
   281    <source>
   282      @type systemd
   283      pos_file /var/log/gcp-journald.pos
   284      read_from_head true
   285      tag node-journal
   286    </source>
   287
   288    <filter node-journal>
   289      @type grep
   290      <exclude>
   291        key _SYSTEMD_UNIT
   292        pattern ^(docker|{{ fluentd_container_runtime_service }}|kubelet|node-problem-detector)\.service$
   293      </exclude>
   294    </filter>
   295    # END_NODE_JOURNAL
   296  monitoring.conf: |-
   297    # This source is used to acquire approximate process start timestamp,
   298    # which purpose is explained before the corresponding output plugin.
   299    <source>
   300      @type exec
   301      command /bin/sh -c 'date +%s'
   302      tag process_start
   303      time_format %Y-%m-%d %H:%M:%S
   304      keys process_start_timestamp
   305    </source>
   306
   307    # This filter is used to convert process start timestamp to integer
   308    # value for correct ingestion in the prometheus output plugin.
   309    <filter process_start>
   310      @type record_transformer
   311      enable_ruby true
   312      auto_typecast true
   313      <record>
   314        process_start_timestamp ${record["process_start_timestamp"].to_i}
   315      </record>
   316    </filter>
   317  output.conf: |-
   318    # This match is placed before the all-matching output to provide metric
   319    # exporter with a process start timestamp for correct exporting of
   320    # cumulative metrics to Stackdriver.
   321    <match process_start>
   322      @type prometheus
   323
   324      <metric>
   325        type gauge
   326        name process_start_time_seconds
   327        desc Timestamp of the process start in seconds
   328        key process_start_timestamp
   329      </metric>
   330    </match>
   331
   332    # This filter allows to count the number of log entries read by fluentd
   333    # before they are processed by the output plugin. This in turn allows to
   334    # monitor the number of log entries that were read but never sent, e.g.
   335    # because of liveness probe removing buffer.
   336    <filter **>
   337      @type prometheus
   338      <metric>
   339        type counter
   340        name logging_entry_count
   341        desc Total number of log entries generated by either application containers or system components
   342      </metric>
   343    </filter>
   344
   345    # TODO(instrumentation): Reconsider this workaround later.
   346    # Trim the entries which exceed slightly less than 100KB, to avoid
   347    # dropping them. It is a necessity, because Stackdriver only supports
   348    # entries that are up to 100KB in size.
   349    <filter kubernetes.**>
   350      @type record_transformer
   351      enable_ruby true
   352      <record>
   353        log ${record['log'].length > 100000 ? "[Trimmed]#{record['log'][0..100000]}..." : record['log']}
   354      </record>
   355    </filter>
   356
   357    # Do not collect fluentd's own logs to avoid infinite loops.
   358    <match fluent.**>
   359      @type null
   360    </match>
   361
   362    # We use 2 output stanzas - one to handle the container logs and one to handle
   363    # the node daemon logs, the latter of which explicitly sends its logs to the
   364    # compute.googleapis.com service rather than container.googleapis.com to keep
   365    # them separate since most users don't care about the node logs.
   366    <match kubernetes.**>
   367      @type google_cloud
   368
   369      # Try to detect JSON formatted log entries.
   370      detect_json true
   371      # Collect metrics in Prometheus registry about plugin activity.
   372      enable_monitoring true
   373      monitoring_type prometheus
   374      # Allow log entries from multiple containers to be sent in the same request.
   375      split_logs_by_tag false
   376      # Set the buffer type to file to improve the reliability and reduce the memory consumption
   377      buffer_type file
   378      buffer_path /var/log/fluentd-buffers/kubernetes.containers.buffer
   379      # Set queue_full action to block because we want to pause gracefully
   380      # in case of the off-the-limits load instead of throwing an exception
   381      buffer_queue_full_action block
   382      # Set the chunk limit conservatively to avoid exceeding the recommended
   383      # chunk size of 5MB per write request.
   384      buffer_chunk_limit 1M
   385      # Cap the combined memory usage of this buffer and the one below to
   386      # 1MiB/chunk * (6 + 2) chunks = 8 MiB
   387      buffer_queue_limit 6
   388      # Never wait more than 5 seconds before flushing logs in the non-error case.
   389      flush_interval 5s
   390      # Never wait longer than 30 seconds between retries.
   391      max_retry_wait 30
   392      # Disable the limit on the number of retries (retry forever).
   393      disable_retry_limit
   394      # Use multiple threads for processing.
   395      num_threads 2
   396      use_grpc true
   397    </match>
   398
   399    # Keep a smaller buffer here since these logs are less important than the user's
   400    # container logs.
   401    <match **>
   402      @type google_cloud
   403
   404      detect_json true
   405      enable_monitoring true
   406      monitoring_type prometheus
   407      # Allow entries from multiple system logs to be sent in the same request.
   408      split_logs_by_tag false
   409      detect_subservice false
   410      buffer_type file
   411      buffer_path /var/log/fluentd-buffers/kubernetes.system.buffer
   412      buffer_queue_full_action block
   413      buffer_chunk_limit 1M
   414      buffer_queue_limit 2
   415      flush_interval 5s
   416      max_retry_wait 30
   417      disable_retry_limit
   418      num_threads 2
   419      use_grpc true
   420    </match>
   421metadata:
   422  name: fluentd-gcp-config-old-v1.2.5
   423  namespace: kube-system
   424  labels:
   425    addonmanager.kubernetes.io/mode: Reconcile
View as plain text