apiVersion: opentelemetry.io/v1alpha1 kind: OpenTelemetryCollector metadata: name: o11y-platform-otel-collector spec: image: otel/opentelemetry-collector-contrib:0.120.0 ports: - targetPort: 55679 name: zpages port: 55679 mode: daemonset serviceAccount: otel-collector hostNetwork: true env: - name: K8S_NODE_NAME valueFrom: fieldRef: fieldPath: status.hostIP - name: MY_POD_IP valueFrom: fieldRef: apiVersion: v1 fieldPath: status.podIP volumeMounts: - mountPath: /var/log/pods name: varlog readOnly: true - mountPath: /var/lib/docker/containers name: varlibdockercontainers readOnly: true - name: hostfs mountPath: /hostfs readOnly: true mountPropagation: HostToContainer volumes: - name: varlog hostPath: path: /var/log/pods - name: varlibdockercontainers hostPath: path: /var/lib/docker/containers - name: hostfs hostPath: path: / config: | connectors: spanmetrics: exemplars: enabled: true namespace: span.metrics exporters: debug: verbosity: detailed otlphttp/metrics: headers: X-Scope-OrgID: o11y-platform auth: authenticator: basicauth/prw endpoint: http://xxxxxxx:80/otlp tls: insecure: true retry_on_failure: enabled: true initial_interval: 5s max_interval: 30s max_elapsed_time: 300s sending_queue: enabled: true queue_size: 25000000 num_consumers: 10 storage: db_storage loki: headers: X-Scope-OrgID: o11y-platform tls: insecure: true insecure_skip_verify: true auth: authenticator: basicauth/lokiauth endpoint: http://xxxxxxx:3100/loki/api/v1/push sending_queue: storage: db_storage enabled: true queue_size: 25920000 num_consumers: 10 retry_on_failure: enabled: true initial_interval: 5s max_interval: 30s max_elapsed_time: 30s otlp: headers: X-Scope-OrgID: o11y-platform tls: insecure: true endpoint: 108.141.204.6:4317 sending_queue: storage: db_storage enabled: true queue_size: 25920000 num_consumers: 10 retry_on_failure: enabled: true initial_interval: 5s max_interval: 30s max_elapsed_time: 30s extensions: db_storage: driver: "pgx" datasource: "postgres://otelcol:xxxxxx@postgres.otel-collector:5432/otelcol_db?sslmode=disable" basicauth/prw: client_auth: username: admin password: xxxxxx basicauth/lokiauth: client_auth: username: loki password: xxxx basicauth/tempoauth: client_auth: username: tempo password: xxxx zpages: endpoint: ':55679' # file_storage: # directory: /data # create_directory: true # compaction: # on_rebound: true # max_transaction_size: 65_536 # rebound_needed_threshold_mib: 6 # rebound_trigger_threshold_mib: 11 health_check: endpoint: 0.0.0.0:13133 service: extensions: - basicauth/prw # - file_storage - basicauth/lokiauth - basicauth/tempoauth - zpages - db_storage telemetry: metrics: address: 127.0.0.1:8888 level: detailed logs: level: debug pipelines: metrics: receivers: - otlp - prometheus - k8s_cluster - kubeletstats - spanmetrics - hostmetrics - azuremonitor exporters: - otlphttp/metrics - debug processors: - memory_limiter - attributes/metrics - k8sattributes - resource/metrics - batch/metrics logs: receivers: - filelog - otlp - azureeventhub exporters: - loki processors: - resource/logs traces: receivers: - otlp exporters: - otlp - spanmetrics processors: - memory_limiter - k8sattributes - resource/traces - batch/traces processors: resourcedetection/system: detectors: [env, system, gcp, eks] timeout: 2s override: false attributes/metrics: actions: - action: insert key: environment value: xxxx - action: insert from_attribute: k8s.pod.uid key: service.instance.id - action: insert key: cluster_name value: xxxxxx - action: insert key: cloud_provider value: AKS - action: insert key: X_Scope_OrgID value: o11y-platform - action: insert key: cluster value: xxxxxxxx resource/metrics: attributes: - action: insert key: node value: ${env:K8S_NODE_NAME} resource/logs: attributes: - action: insert from_attribute: k8s.pod.uid key: service.instance.id - action: insert key: cluster_name value: xxxxxx - action: insert key: X_Scope_OrgID value: o11y-platform - action: insert key: cloud_provider value: AKS - action: insert key: environment value: xxxxxx - action: insert key: node value: ${env:K8S_NODE_NAME} - action: insert key: loki.format value: raw - action: insert key: loki.resource.labels value: pod, namespace, container, filename, cluster_name, X_Scope_OrgID, cloud_provider, environment, node resource/traces: attributes: - action: insert from_attribute: k8s.pod.uid key: service.instance.id - action: insert key: cluster_name value: xxxxxxxx - action: insert key: X_Scope_OrgID value: o11y-platform - action: insert key: cloud_provider value: AKS - action: insert key: environment value: xxxxxxx - action: insert key: node value: ${env:K8S_NODE_NAME} batch/metrics: send_batch_size: 8192 timeout: 200ms batch/traces: timeout: 10s send_batch_size: 1024 memory_limiter: check_interval: 1s limit_percentage: 80 spike_limit_percentage: 20 k8sattributes: auth_type: "serviceAccount" passthrough: true filter: node_from_env_var: K8S_NODE_NAME receivers: kubeletstats: collection_interval: 10s auth_type: "serviceAccount" endpoint: ${env:K8S_NODE_NAME}:10250 insecure_skip_verify: true metric_groups: - container - pod - volume - node extra_metadata_labels: - container.id - k8s.volume.type k8s_cluster: auth_type: serviceAccount collection_interval: 10s node_conditions_to_report: [Ready, MemoryPressure,DiskPressure,NetworkUnavailable] allocatable_types_to_report: [cpu, memory, storage, ephemeral-storage] k8s_events: auth_type : "serviceAccount" otlp: protocols: grpc: endpoint: ${env:MY_POD_IP}:4317 http: endpoint: ${env:MY_POD_IP}:4318 prometheus: config: scrape_configs: - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token job_name: integrations/kubernetes/cadvisor kubernetes_sd_configs: - role: node relabel_configs: - replacement: kubernetes.default.svc.cluster.local:443 target_label: __address__ - regex: (.+) replacement: /api/v1/nodes/$${1}/proxy/metrics/cadvisor source_labels: - __meta_kubernetes_node_name target_label: __metrics_path__ scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt insecure_skip_verify: false server_name: kubernetes - job_name: integrations/kubernetes/kube-state-metrics kubernetes_sd_configs: - role: pod relabel_configs: - action: keep regex: kube-state-metrics source_labels: - __meta_kubernetes_pod_label_app_kubernetes_io_name target_label: namespace - job_name: integrations/node_exporter kubernetes_sd_configs: - role: pod relabel_configs: - action: keep regex: prometheus-node-exporter.* source_labels: - __meta_kubernetes_pod_label_app_kubernetes_io_name - action: replace source_labels: - __meta_kubernetes_pod_node_name target_label: instance - action: replace source_labels: - __meta_kubernetes_namespace target_label: namespace - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token job_name: integrations/kubernetes/kubelet kubernetes_sd_configs: - role: node relabel_configs: - replacement: kubernetes.default.svc.cluster.local:443 target_label: __address__ - regex: (.+) replacement: /api/v1/nodes/$${1}/proxy/metrics source_labels: - __meta_kubernetes_node_name target_label: __metrics_path__ scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt insecure_skip_verify: false server_name: kubernetes - job_name: "kubernetes-apiservers" kubernetes_sd_configs: - role: endpoints namespaces: names: - default scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt insecure_skip_verify: true bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token relabel_configs: - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] action: keep regex: kubernetes;https - action: replace source_labels: - __meta_kubernetes_namespace target_label: Namespace - action: replace source_labels: - __meta_kubernetes_service_name target_label: Service hostmetrics: root_path: /hostfs collection_interval: 10s scrapers: cpu: metrics: system.cpu.utilization: enabled: true disk: null load: filesystem: exclude_fs_types: fs_types: - autofs - binfmt_misc - bpf - cgroup2 - configfs - debugfs - devpts - devtmpfs - fusectl - hugetlbfs - iso9660 - mqueue - nsfs - overlay - proc - procfs - pstore - rpc_pipefs - securityfs - selinuxfs - squashfs - sysfs - tracefs match_type: strict exclude_mount_points: match_type: regexp mount_points: - /dev/* - /proc/* - /sys/* - /run/k3s/containerd/* - /var/lib/docker/* - /var/lib/kubelet/* - /snap/* metrics: system.filesystem.utilization: enabled: true memory: metrics: system.memory.utilization: enabled: true network: paging: filelog: include: - /var/log/pods/*/*/*.log start_at: beginning include_file_path: true include_file_name: false operators: - type: router id: get-format routes: - output: parser-docker expr: 'body matches "^\\{"' - output: parser-crio expr: 'body matches "^[^ Z]+ "' - output: parser-containerd expr: 'body matches "^[^ Z]+Z"' - type: regex_parser id: parser-crio regex: '^(?P