--- groups: - name: BlackboxExporter rules: - alert: BlackboxAllWanProbesFailed expr: 'sum by (host_type) (probe_success{host_type="wan"})==0' for: 5s labels: severity: critical annotations: summary: Lost internet access descrtiption: Failed to contact any wan probes - alert: BlackboxProbeFailed expr: 'probe_success == 0' for: 0m labels: severity: error annotations: summary: Unable to reach (instance {{ $labels.instance }}) description: "Probe failed\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: BlackboxConfigurationReloadFailure expr: 'blackbox_exporter_config_last_reload_successful != 1' for: 0m labels: severity: warning annotations: summary: Blackbox configuration reload failure (instance {{ $labels.instance }}) description: "Blackbox configuration reload failure\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: BlackboxSlowProbe expr: 'avg_over_time(probe_duration_seconds[1m]) > 1' for: 1m labels: severity: warning annotations: summary: Getting slow responses from (instance {{ $labels.instance }}) description: "Blackbox probe took more than 1s to complete\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: BlackboxProbeHttpFailure expr: 'probe_http_status_code <= 199 OR probe_http_status_code >= 400' for: 0m labels: severity: error annotations: summary: HTTP failure (instance {{ $labels.instance }}) description: "HTTP status code is not 200-399\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: BlackboxSslCertificateWillExpireSoon expr: '3 <= round((last_over_time(probe_ssl_earliest_cert_expiry[10m]) - time()) / 86400, 0.1) < 20' for: 0m labels: severity: warning annotations: summary: SSL certificate will expire soon (instance {{ $labels.instance }}) description: "SSL certificate expires in less than 20 days\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: BlackboxSslCertificateWillExpireSoon expr: '0 <= round((last_over_time(probe_ssl_earliest_cert_expiry[10m]) - time()) / 86400, 0.1) < 3' for: 0m labels: severity: error annotations: summary: SSL certificate expiry imminent (instance {{ $labels.instance }}) description: "SSL certificate expires in less than 3 days\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: BlackboxSslCertificateExpired expr: 'round((last_over_time(probe_ssl_earliest_cert_expiry[10m]) - time()) / 86400, 0.1) < 0' for: 0m labels: severity: critical annotations: summary: SSL certificate expired (instance {{ $labels.instance }}) description: "SSL certificate has expired already\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: BlackboxProbeSlowHttp expr: 'avg_over_time(probe_http_duration_seconds[1m]) > 1' for: 1m labels: severity: warning annotations: summary: Slow HTTP responses from (instance {{ $labels.instance }}) description: "HTTP request took more than 1s\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: BlackboxProbeSlowPing expr: 'avg_over_time(probe_icmp_duration_seconds[1m]) > 1' for: 1m labels: severity: warning annotations: summary: Slow ping responses from (instance {{ $labels.instance }}) description: "Blackbox ping took more than 1s\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"