diff --git a/roles/alertmanager/README.md b/roles/alertmanager/README.md new file mode 100644 index 0000000..a34128d --- /dev/null +++ b/roles/alertmanager/README.md @@ -0,0 +1 @@ +Sets up a prometheus alertmanager docker container. diff --git a/roles/alertmanager/defaults/main.yml b/roles/alertmanager/defaults/main.yml new file mode 100644 index 0000000..46f490f --- /dev/null +++ b/roles/alertmanager/defaults/main.yml @@ -0,0 +1,8 @@ +--- + +alertmanager_storage_retention: "{{ prometheus_storage_retention | default('3650d') }}" + +alertmanager_smtp_server: "{{ smtp_server | default('') }}" +alertmanager_smtp_from: "{{ smtp_from | default('') }}" +alertmanager_smtp_user: "{{ smtp_user | default('') }}" +alertmanager_smtp_pw: "{{ smtp_pw | default('') }}" diff --git a/roles/alertmanager/meta/argument_specs.yml b/roles/alertmanager/meta/argument_specs.yml new file mode 100644 index 0000000..93749e1 --- /dev/null +++ b/roles/alertmanager/meta/argument_specs.yml @@ -0,0 +1,79 @@ +--- + +argument_specs: + main: + short_description: Prometheus alertmanager docker container + options: + alertmanager_storage_retention: + description: Period of time for which alertmanager data is stored for. A number followed by unit (s, m, h, d, w, y). + type: str + required: false + default: "{{ prometheus_storage_retention | default('3650d') }}" + + alertmanager_smtp_server: + description: Smtp server to use for sending mail. Must be reachable on port 587. Emails not sent if not defined + type: str + required: false + default: "{{ smtp_server | default('') }}" + alertmanager_smtp_from: + description: Address to send mail from. Required if sending emails. + type: str + required: false + default: "{{ smtp_from | default('') }}" + alertmanager_smtp_user: + description: User to login to smtp server with. No authentication if not defined. + type: str + required: false + default: "{{ smtp_user | default('') }}" + alertmanager_smtp_pw: + description: Password for the smtp user + type: str + required: false + default: "{{ smtp_pw | default('') }}" + smtp_server: + description: Global smtp server value, default for alertmanager_smtp_server + type: str + required: false + smtp_from: + description: Global smtp from value, default for alertmanager_smtp_from + type: str + required: "{{ alertmanager_smtp_server | length > 0 and alertmanager_smtp_from | length == 0 }}" + smtp_user: + description: Global smtp user value, default for alertmanager_smtp_user + type: str + required: false + smtp_pw: + description: Global smtp password value, default for alertmanager_smtp_pw + type: str + required: "{{ alertmanager_smtp_server | length > 0 and alertmanager_smtp_user | length > 0 and alertmanager_smtp_pw | length == 0 }}" + + # All options after this will be passed directly to the container role + docker_service_suffix: + description: "Passed to container role" + required: false + docker_host_user: + description: "Passed to container role" + required: false + + database_passwords: + description: "Passed to container role" + required: false + docker_additional_services: + description: "Passed to container role" + required: false + + docker_volume_type: + description: "Passed to container role" + required: false + reverse_proxy_type: + description: "Passed to container role" + required: false + ports: + description: "Passed to container role" + required: false + docker_vhost_domains: + description: "Passed to container role" + required: false + docker_entrypoint: + description: "Passed to container role" + required: false diff --git a/roles/alertmanager/tasks/main.yml b/roles/alertmanager/tasks/main.yml new file mode 100644 index 0000000..b4965a5 --- /dev/null +++ b/roles/alertmanager/tasks/main.yml @@ -0,0 +1,19 @@ +--- + +- name: Prometheus alertmanager container + include_role: + name: container + vars: + docker_service: alertmanager + docker_image: prom/alertmanager + reverse_proxy_type: none + docker_command: + - "--config.file=/etc/alertmanager/alertmanager.yml" + - "--storage.path=/alertmanager" + - "--data.retention={{ alertmanager_storage_retention }}" + + docker_mounts: + - name: data + path: /alertmanager + - template: alertmanager.yml + path: /etc/alertmanager/alertmanager.yml diff --git a/roles/alertmanager/templates/alertmanager.yml.j2 b/roles/alertmanager/templates/alertmanager.yml.j2 new file mode 100644 index 0000000..93cf4e5 --- /dev/null +++ b/roles/alertmanager/templates/alertmanager.yml.j2 @@ -0,0 +1,68 @@ +--- + +# {{ ansible_managed }} + +global: + # The smarthost and SMTP sender used for mail notifications. +{% if alertmanager_smtp_server | length > 0 %} + smtp_smarthost: '{{ alertmanager_smtp_server }}:587' + smtp_from: '{{ alertmanager_smtp_from }}' +{% if alertmanager_smtp_user | length > 0 %} + smtp_auth_username: '{{ alertmanager_smtp_user }}' + smtp_auth_password: '{{ alertmanager_smtp_pw }}' +{% endif %} +{% endif %} + +# The directory from which notification templates are read. +templates: + - '/etc/alertmanager/template/*.tmpl' + +# The root route on which each incoming alert enters. +route: + # The labels by which incoming alerts are grouped together. For example, + # multiple alerts coming in for cluster=A and alertname=LatencyHigh would + # be batched into a single group. + # + # To aggregate by all possible labels use '...' as the sole label name. + # This effectively disables aggregation entirely, passing through all + # alerts as-is. This is unlikely to be what you want, unless you have + # a very low alert volume or your upstream notification system performs + # its own grouping. Example: group_by: [...] + group_by: ['alertname', 'cluster', 'service'] + + # When a new group of alerts is created by an incoming alert, wait at + # least 'group_wait' to send the initial notification. + # This way ensures that you get multiple alerts for the same group that start + # firing shortly after another are batched together on the first + # notification. + group_wait: 30s + + # When the first notification was sent, wait 'group_interval' to send a batch + # of new alerts that started firing for that group. + group_interval: 5m + + # If an alert has successfully been sent, wait 'repeat_interval' to + # resend them. + repeat_interval: 3h + + # A default receiver + receiver: uumas_email + + # All the above attributes are inherited by all child routes and can + # overwritten on each. + + # The child route trees. + routes: {{ alertmanager_routes }} + +inhibit_rules: + - source_matchers: [severity="critical"] + target_matchers: [severity="warning"] + # Apply inhibition if the alertname is the same. + # CAUTION: + # If all label names listed in `equal` are missing + # from both the source and target alerts, + # the inhibition rule will apply! + equal: [alertname, cluster, service] + + +receivers: {{ alertmanager_receivers }}