|
3 | 3 | Airbyte supports two ways to collect metrics - using datadog or open telemetry.
|
4 | 4 | Fill in `METRIC_CLIENT` field in `.env` file to get started!
|
5 | 5 |
|
| 6 | +**Prerequisite:** |
| 7 | +In order to get metrics from airbyte we need to deploy a container / pod called metrics-reporter like below |
| 8 | +``` |
| 9 | +airbyte-metrics: |
| 10 | + image: airbyte/metrics-reporter:${VERSION} |
| 11 | + container_name: airbyte-metrics |
| 12 | + environment: |
| 13 | + - METRIC_CLIENT=${METRIC_CLIENT} |
| 14 | + - OTEL_COLLECTOR_ENDPOINT=${OTEL_COLLECTOR_ENDPOINT} |
| 15 | +``` |
| 16 | + |
| 17 | + |
6 | 18 | # Open Telemetry
|
7 | 19 |
|
8 | 20 | 1. In `.env` change `METRIC_CLIENT` to `otel`.
|
@@ -53,6 +65,191 @@ OTEL_COLLECTOR_ENDPOINT=<address>
|
53 | 65 | If you started open telemetry collector in the link above, the address should be `http://otel-collector:4317`.
|
54 | 66 | Note the format - unlike the base `.env`, there is no quote in `.env` file under kubernetes.
|
55 | 67 |
|
| 68 | + |
| 69 | +## Tutorial |
| 70 | + |
| 71 | +Deploy the airbyte metric pod : |
| 72 | +``` |
| 73 | +--- |
| 74 | +apiVersion: apps/v1 |
| 75 | +kind: Deployment |
| 76 | +metadata: |
| 77 | + name: {{ .Release.Name }}-metrics |
| 78 | + namespace: {{ .Release.Namespace }} |
| 79 | + labels: {{ set . "component" "metrics" | include "labels" | nindent 4 }} |
| 80 | +spec: |
| 81 | + selector: |
| 82 | + matchLabels: {{ set . "component" "metrics" | include "labels" | nindent 6 }} |
| 83 | + template: |
| 84 | + metadata: |
| 85 | + labels: {{ set . "component" "metrics" | include "labels" | nindent 8 }} |
| 86 | + spec: |
| 87 | + containers: |
| 88 | + - name: airbyte-metrics |
| 89 | + image: "airbyte/metrics-reporter:latest" |
| 90 | + imagePullPolicy: IfNotPresent |
| 91 | + env: |
| 92 | + - name: AIRBYTE_VERSION |
| 93 | + value: latest |
| 94 | + - name: DATABASE_PASSWORD |
| 95 | + valueFrom: |
| 96 | + secretKeyRef: |
| 97 | + name: {{ include "airbyte.database.secret.name" . }} |
| 98 | + key: DATABASE_PASSWORD |
| 99 | + - name: DATABASE_URL |
| 100 | + value: {{ include "airbyte.database.url" . | quote }} |
| 101 | + - name: DATABASE_USER |
| 102 | + valueFrom: |
| 103 | + secretKeyRef: |
| 104 | + name: {{ .Release.Name }}-secrets |
| 105 | + key: DATABASE_USER |
| 106 | + - name: CONFIGS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION |
| 107 | + value: 0.35.15.001 |
| 108 | + - name: METRIC_CLIENT |
| 109 | + value: otel |
| 110 | + - name: OTEL_COLLECTOR_ENDPOINT |
| 111 | + value: http://otel-collector:4317 |
| 112 | +``` |
| 113 | + |
| 114 | + |
| 115 | +Deploy an Open telemetry pod like below : |
| 116 | + |
| 117 | +``` |
| 118 | +apiVersion: apps/v1 |
| 119 | +kind: Deployment |
| 120 | +metadata: |
| 121 | + name: otel-collector |
| 122 | + namespace: {{ .Release.Namespace }} |
| 123 | + labels: {{ set . "component" "otel-collector" | include "labels" | nindent 4 }} |
| 124 | +spec: |
| 125 | + selector: |
| 126 | + matchLabels: {{ set . "component" "otel-collector" | include "labels" | nindent 6 }} |
| 127 | + replicas: 1 |
| 128 | + template: |
| 129 | + metadata: |
| 130 | + labels: {{ set . "component" "otel-collector" | include "labels" | nindent 8 }} |
| 131 | + spec: |
| 132 | + containers: |
| 133 | + - command: |
| 134 | + - "/otelcol" |
| 135 | + - "--config=/conf/otel-collector-config.yaml" |
| 136 | + image: "otel/opentelemetry-collector:latest" |
| 137 | + name: otel-collector |
| 138 | + ports: |
| 139 | + - containerPort: 4317 # Default endpoint for OpenTelemetry receiver. |
| 140 | + - containerPort: 8889 # Port for Prometheus instance |
| 141 | + volumeMounts: |
| 142 | + - name: config |
| 143 | + mountPath: /conf |
| 144 | + volumes: |
| 145 | + - configMap: |
| 146 | + name: otel-collector-conf |
| 147 | + items: |
| 148 | + - key: otel-collector-config |
| 149 | + path: otel-collector-config.yaml |
| 150 | + name: config |
| 151 | +``` |
| 152 | + |
| 153 | +WIth this Config Map : |
| 154 | + |
| 155 | +``` |
| 156 | +apiVersion: v1 |
| 157 | +kind: ConfigMap |
| 158 | +metadata: |
| 159 | + name: otel-collector-conf |
| 160 | + namespace: {{ .Release.Namespace }} |
| 161 | + labels: {{ set . "component" "otel-collector" | include "labels" | nindent 4 }} |
| 162 | +data: |
| 163 | + otel-collector-config: | |
| 164 | + receivers: |
| 165 | + otlp: |
| 166 | + protocols: |
| 167 | + grpc: |
| 168 | + endpoint: "0.0.0.0:4317" |
| 169 | + processors: |
| 170 | + batch: |
| 171 | + memory_limiter: |
| 172 | + limit_mib: 1500 |
| 173 | + spike_limit_mib: 512 |
| 174 | + check_interval: 5s |
| 175 | + exporters: |
| 176 | + prometheus: |
| 177 | + endpoint: "0.0.0.0:8889" |
| 178 | + namespace: "default" |
| 179 | + service: |
| 180 | + pipelines: |
| 181 | + metrics: |
| 182 | + receivers: [otlp] |
| 183 | + processors: [memory_limiter, batch] |
| 184 | + exporters: [prometheus] |
| 185 | +``` |
| 186 | + |
| 187 | +Then we need a service to be able to access both open telemetry GRPC and Prometheus |
| 188 | +``` |
| 189 | +apiVersion: v1 |
| 190 | +kind: Service |
| 191 | +metadata: |
| 192 | + name: otel-collector |
| 193 | + namespace: {{ .Release.Namespace }} |
| 194 | + labels: {{ set . "component" "otel-collector" | include "labels" | nindent 4 }} |
| 195 | +spec: |
| 196 | + ports: |
| 197 | + - name: otlp-grpc # Default endpoint for OpenTelemetry gRPC receiver. |
| 198 | + port: 4317 |
| 199 | + protocol: TCP |
| 200 | + targetPort: 4317 |
| 201 | + - name: prometheus |
| 202 | + port: 8889 |
| 203 | + selector: {{ set . "component" "otel-collector" | include "labels" | nindent 6 }} |
| 204 | +``` |
| 205 | + |
| 206 | +And finally We can add a service monitor to receive metrics in prometheus and optionally add some prometheus rules to generate alerts. |
| 207 | +You can replace with your prometheus name. |
| 208 | +``` |
| 209 | +apiVersion: monitoring.coreos.com/v1 |
| 210 | +kind: ServiceMonitor |
| 211 | +metadata: |
| 212 | + name: {{ .Release.Name }} |
| 213 | + namespace: {{ .Release.Namespace }} |
| 214 | + labels: |
| 215 | + {{ set . "component" "metrics" | include "labels" | nindent 4 }} |
| 216 | + prometheus: <your_prometheus_name> |
| 217 | +spec: |
| 218 | + endpoints: |
| 219 | + - interval: 30s |
| 220 | + port: prometheus |
| 221 | + path: /metrics |
| 222 | + relabelings: |
| 223 | + - action: labeldrop |
| 224 | + regex: (service|endpoint|namespace|container) |
| 225 | + selector: |
| 226 | + matchLabels: {{ set . "component" "otel-collector" | include "labels" | nindent 6 }} |
| 227 | +``` |
| 228 | + |
| 229 | +One rule example : |
| 230 | +``` |
| 231 | +apiVersion: monitoring.coreos.com/v1 |
| 232 | +kind: PrometheusRule |
| 233 | +metadata: |
| 234 | + name: {{ .Release.Name }}-rules |
| 235 | + namespace: {{ .Release.Namespace }} |
| 236 | + labels: |
| 237 | + {{ set . "component" "prometheus-rules" | include "labels" | nindent 4 }} |
| 238 | + prometheus: <your_prometheus_name> |
| 239 | +spec: |
| 240 | + groups: |
| 241 | + - name: airbyte |
| 242 | + rules: |
| 243 | + - alert: AirbyteJobFail |
| 244 | + for: 0m |
| 245 | + expr: min(airbyte_job_failed_by_release_stage) > 0 |
| 246 | + labels: |
| 247 | + priority: P2 |
| 248 | + annotations: |
| 249 | + summary: {{ `"An Airbyte Job has failed"` }} |
| 250 | +``` |
| 251 | + |
| 252 | + |
56 | 253 | # Datadog
|
57 | 254 | TBD
|
58 | 255 |
|
|
0 commit comments