diff --git a/README.md b/README.md index 7ccbafd4..98577756 100644 --- a/README.md +++ b/README.md @@ -17,14 +17,16 @@ core applications: https://status.twinnation.org/ - [Usage](#usage) - [Configuration](#configuration) - [Conditions](#conditions) + - [Alerting](#alerting) + - [Configuring Slack alerts](#configuring-slack-alerts) + - [Configuring PagerDuty alerts](#configuring-pagerduty-alerts) + - [Configuring Twilio alerts](#configuring-twilio-alerts) + - [Configuring custom alerts](#configuring-custom-alerts) - [Docker](#docker) - [Running the tests](#running-the-tests) - [Using in Production](#using-in-production) - [FAQ](#faq) - [Sending a GraphQL request](#sending-a-graphql-request) - - [Configuring Slack alerts](#configuring-slack-alerts) - - [Configuring Twilio alerts](#configuring-twilio-alerts) - - [Configuring custom alerts](#configuring-custom-alerts) ## Features @@ -72,35 +74,36 @@ Note that you can also add environment variables in the configuration file (i.e. ### Configuration -| Parameter | Description | Default | -| -------------------------------------- | --------------------------------------------------------------- | -------------- | -| `debug` | Whether to enable debug logs | `false` | -| `metrics` | Whether to expose metrics at /metrics | `false` | -| `services` | List of services to monitor | Required `[]` | -| `services[].name` | Name of the service. Can be anything. | Required `""` | -| `services[].url` | URL to send the request to | Required `""` | -| `services[].conditions` | Conditions used to determine the health of the service | `[]` | -| `services[].interval` | Duration to wait between every status check | `60s` | -| `services[].method` | Request method | `GET` | -| `services[].graphql` | Whether to wrap the body in a query param (`{"query":"$body"}`) | `false` | -| `services[].body` | Request body | `""` | -| `services[].headers` | Request headers | `{}` | -| `services[].alerts[].type` | Type of alert. Valid types: `slack`, `twilio`, `custom` | Required `""` | -| `services[].alerts[].enabled` | Whether to enable the alert | `false` | -| `services[].alerts[].threshold` | Number of failures in a row needed before triggering the alert | `3` | -| `services[].alerts[].description` | Description of the alert. Will be included in the alert sent | `""` | -| `services[].alerts[].send-on-resolved` | Whether to send a notification once a triggered alert subsides | `false` | -| `alerting` | Configuration for alerting | `{}` | -| `alerting.slack` | Webhook to use for alerts of type `slack` | `""` | -| `alerting.twilio` | Settings for alerts of type `twilio` | `""` | -| `alerting.twilio.sid` | Twilio account SID | Required `""` | -| `alerting.twilio.token` | Twilio auth token | Required `""` | -| `alerting.twilio.from` | Number to send Twilio alerts from | Required `""` | -| `alerting.twilio.to` | Number to send twilio alerts to | Required `""` | -| `alerting.custom` | Configuration for custom actions on failure or alerts | `""` | -| `alerting.custom.url` | Custom alerting request url | `""` | -| `alerting.custom.body` | Custom alerting request body. | `""` | -| `alerting.custom.headers` | Custom alerting request headers | `{}` | +| Parameter | Description | Default | +| --------------------------------------------- | -------------------------------------------------------------------------- | -------------- | +| `debug` | Whether to enable debug logs | `false` | +| `metrics` | Whether to expose metrics at /metrics | `false` | +| `services` | List of services to monitor | Required `[]` | +| `services[].name` | Name of the service. Can be anything. | Required `""` | +| `services[].url` | URL to send the request to | Required `""` | +| `services[].conditions` | Conditions used to determine the health of the service | `[]` | +| `services[].interval` | Duration to wait between every status check | `60s` | +| `services[].method` | Request method | `GET` | +| `services[].graphql` | Whether to wrap the body in a query param (`{"query":"$body"}`) | `false` | +| `services[].body` | Request body | `""` | +| `services[].headers` | Request headers | `{}` | +| `services[].alerts[].type` | Type of alert. Valid types: `slack`, `twilio`, `custom` | Required `""` | +| `services[].alerts[].enabled` | Whether to enable the alert | `false` | +| `services[].alerts[].threshold` | Number of failures in a row needed before triggering the alert | `3` | +| `services[].alerts[].description` | Description of the alert. Will be included in the alert sent | `""` | +| `services[].alerts[].send-on-resolved` | Whether to send a notification once a triggered alert subsides | `false` | +| `services[].alerts[].success-before-resolved` | Number of successes in a row needed before sending a resolved notification | `2` | +| `alerting` | Configuration for alerting | `{}` | +| `alerting.slack` | Webhook to use for alerts of type `slack` | `""` | +| `alerting.twilio` | Settings for alerts of type `twilio` | `""` | +| `alerting.twilio.sid` | Twilio account SID | Required `""` | +| `alerting.twilio.token` | Twilio auth token | Required `""` | +| `alerting.twilio.from` | Number to send Twilio alerts from | Required `""` | +| `alerting.twilio.to` | Number to send twilio alerts to | Required `""` | +| `alerting.custom` | Configuration for custom actions on failure or alerts | `""` | +| `alerting.custom.url` | Custom alerting request url | `""` | +| `alerting.custom.body` | Custom alerting request body. | `""` | +| `alerting.custom.headers` | Custom alerting request headers | `{}` | ### Conditions @@ -121,6 +124,136 @@ Here are some examples of conditions you can use: | `len([BODY].name) == 8` | String at jsonpath `$.name` has a length of 8 | `{"name":"john.doe"}` | `{"name":"bob"}` | +### Alerting + + + +#### Configuring Slack alerts + +```yaml +alerting: + slack: "https://hooks.slack.com/services/**********/**********/**********" +services: + - name: twinnation + interval: 30s + url: "https://twinnation.org/health" + alerts: + - type: slack + enabled: true + description: "healthcheck failed 3 times in a row" + send-on-resolved: true + - type: slack + enabled: true + threshold: 5 + description: "healthcheck failed 5 times in a row" + send-on-resolved: true + conditions: + - "[STATUS] == 200" + - "[BODY].status == UP" + - "[RESPONSE_TIME] < 300" +``` + +Here's an example of what the notifications look like: + +![Slack notifications](.github/assets/slack-alerts.png) + + +#### Configuring PagerDuty alerts + +It is highly recommended to set `services[].alerts[].send-on-resolved` to `true` for alerts +of type `pagerduty`, because unlike other alerts, the operation resulting from setting said +parameter to `true` will not create another incident, but mark the incident as resolved on +PagerDuty instead. + +```yaml +alerting: + pagerduty: "********************************" +services: + - name: twinnation + interval: 30s + url: "https://twinnation.org/health" + alerts: + - type: pagerduty + enabled: true + threshold: 3 + description: "healthcheck failed 3 times in a row" + send-on-resolved: true + success-before-resolved: 5 + conditions: + - "[STATUS] == 200" + - "[BODY].status == UP" + - "[RESPONSE_TIME] < 300" +``` + + +#### Configuring Twilio alerts + +```yaml +alerting: + twilio: + sid: "..." + token: "..." + from: "+1-234-567-8901" + to: "+1-234-567-8901" +services: + - name: twinnation + interval: 30s + url: "https://twinnation.org/health" + alerts: + - type: twilio + enabled: true + threshold: 5 + description: "healthcheck failed 5 times in a row" + conditions: + - "[STATUS] == 200" + - "[BODY].status == UP" + - "[RESPONSE_TIME] < 300" +``` + + +#### Configuring custom alerts + +While they're called alerts, you can use this feature to call anything. + +For instance, you could automate rollbacks by having an application that keeps tracks of new deployments, and by +leveraging Gatus, you could have Gatus call that application endpoint when a service starts failing. Your application +would then check if the service that started failing was recently deployed, and if it was, then automatically +roll it back. + +The values `[ALERT_DESCRIPTION]` and `[SERVICE_NAME]` are automatically substituted for the alert description and the +service name respectively in the body (`alerting.custom.body`) as well as the url (`alerting.custom.url`). + +If you have `send-on-resolved` set to `true`, you may want to use `[ALERT_TRIGGERED_OR_RESOLVED]` to differentiate +the notifications. It will be replaced for either `TRIGGERED` or `RESOLVED`, based on the situation. + +For all intents and purpose, we'll configure the custom alert with a Slack webhook, but you can call anything you want. + +```yaml +alerting: + custom: + url: "https://hooks.slack.com/services/**********/**********/**********" + method: "POST" + body: | + { + "text": "[ALERT_TRIGGERED_OR_RESOLVED]: [SERVICE_NAME] - [ALERT_DESCRIPTION]" + } +services: + - name: twinnation + interval: 30s + url: "https://twinnation.org/health" + alerts: + - type: custom + enabled: true + threshold: 10 + send-on-resolved: true + description: "healthcheck failed 10 times in a row" + conditions: + - "[STATUS] == 200" + - "[BODY].status == UP" + - "[RESPONSE_TIME] < 300" +``` + + ## Docker ``` @@ -173,101 +306,3 @@ will send a `POST` request to `http://localhost:8080/playground` with the follow ```json {"query":" {\n user(gender: \"female\") {\n id\n name\n gender\n avatar\n }\n }"} ``` - - -### Configuring Slack alerts - -```yaml -alerting: - slack: "https://hooks.slack.com/services/**********/**********/**********" -services: - - name: twinnation - interval: 30s - url: "https://twinnation.org/health" - alerts: - - type: slack - enabled: true - description: "healthcheck failed 3 times in a row" - send-on-resolved: true - - type: slack - enabled: true - threshold: 5 - description: "healthcheck failed 5 times in a row" - send-on-resolved: true - conditions: - - "[STATUS] == 200" - - "[BODY].status == UP" - - "[RESPONSE_TIME] < 300" -``` - -Here's an example of what the notifications look like: - -![Slack notifications](.github/assets/slack-alerts.png) - - -### Configuring Twilio alerts - -```yaml -alerting: - twilio: - sid: "..." - token: "..." - from: "+1-234-567-8901" - to: "+1-234-567-8901" -services: - - name: twinnation - interval: 30s - url: "https://twinnation.org/health" - alerts: - - type: twilio - enabled: true - threshold: 5 - description: "healthcheck failed 5 times in a row" - conditions: - - "[STATUS] == 200" - - "[BODY].status == UP" - - "[RESPONSE_TIME] < 300" -``` - - -### Configuring custom alerts - -While they're called alerts, you can use this feature to call anything. - -For instance, you could automate rollbacks by having an application that keeps tracks of new deployments, and by -leveraging Gatus, you could have Gatus call that application endpoint when a service starts failing. Your application -would then check if the service that started failing was recently deployed, and if it was, then automatically -roll it back. - -The values `[ALERT_DESCRIPTION]` and `[SERVICE_NAME]` are automatically substituted for the alert description and the -service name respectively in the body (`alerting.custom.body`) as well as the url (`alerting.custom.url`). - -If you have `send-on-resolved` set to `true`, you may want to use `[ALERT_TRIGGERED_OR_RESOLVED]` to differentiate -the notifications. It will be replaced for either `TRIGGERED` or `RESOLVED`, based on the situation. - -For all intents and purpose, we'll configure the custom alert with a Slack webhook, but you can call anything you want. - -```yaml -alerting: - custom: - url: "https://hooks.slack.com/services/**********/**********/**********" - method: "POST" - body: | - { - "text": "[ALERT_TRIGGERED_OR_RESOLVED]: [SERVICE_NAME] - [ALERT_DESCRIPTION]" - } -services: - - name: twinnation - interval: 30s - url: "https://twinnation.org/health" - alerts: - - type: custom - enabled: true - threshold: 10 - send-on-resolved: true - description: "healthcheck failed 10 times in a row" - conditions: - - "[STATUS] == 200" - - "[BODY].status == UP" - - "[RESPONSE_TIME] < 300" -``` diff --git a/alerting/alerting.go b/alerting/alerting.go new file mode 100644 index 00000000..24f20f98 --- /dev/null +++ b/alerting/alerting.go @@ -0,0 +1,158 @@ +package alerting + +import ( + "encoding/json" + "fmt" + "github.com/TwinProduction/gatus/config" + "github.com/TwinProduction/gatus/core" + "log" +) + +// Handle takes care of alerts to resolve and alerts to trigger based on result success or failure +func Handle(service *core.Service, result *core.Result) { + cfg := config.Get() + if cfg.Alerting == nil { + return + } + if result.Success { + handleAlertsToResolve(service, result, cfg) + } else { + handleAlertsToTrigger(service, result, cfg) + } +} + +func handleAlertsToTrigger(service *core.Service, result *core.Result, cfg *config.Config) { + service.NumberOfSuccessesInARow = 0 + service.NumberOfFailuresInARow++ + for _, alert := range service.Alerts { + // If the alert hasn't been triggered, move to the next one + if !alert.Enabled || alert.Threshold != service.NumberOfFailuresInARow { + continue + } + if alert.Triggered { + if cfg.Debug { + log.Printf("[alerting][handleAlertsToTrigger] Alert with description='%s' has already been triggered, skipping", alert.Description) + } + continue + } + var alertProvider *core.CustomAlertProvider + if alert.Type == core.SlackAlert { + if len(cfg.Alerting.Slack) > 0 { + log.Printf("[alerting][handleAlertsToTrigger] Sending Slack alert because alert with description='%s' has been triggered", alert.Description) + alertProvider = core.CreateSlackCustomAlertProvider(cfg.Alerting.Slack, service, alert, result, false) + } else { + log.Printf("[alerting][handleAlertsToTrigger] Not sending Slack alert despite being triggered, because there is no Slack webhook configured") + } + } else if alert.Type == core.PagerDutyAlert { + if len(cfg.Alerting.PagerDuty) > 0 { + log.Printf("[alerting][handleAlertsToTrigger] Sending PagerDuty alert because alert with description='%s' has been triggered", alert.Description) + alertProvider = core.CreatePagerDutyCustomAlertProvider(cfg.Alerting.PagerDuty, "trigger", "", service, fmt.Sprintf("TRIGGERED: %s - %s", service.Name, alert.Description)) + } else { + log.Printf("[alerting][handleAlertsToTrigger] Not sending PagerDuty alert despite being triggered, because PagerDuty isn't configured properly") + } + } else if alert.Type == core.TwilioAlert { + if cfg.Alerting.Twilio != nil && cfg.Alerting.Twilio.IsValid() { + log.Printf("[alerting][handleAlertsToTrigger] Sending Twilio alert because alert with description='%s' has been triggered", alert.Description) + alertProvider = core.CreateTwilioCustomAlertProvider(cfg.Alerting.Twilio, fmt.Sprintf("TRIGGERED: %s - %s", service.Name, alert.Description)) + } else { + log.Printf("[alerting][handleAlertsToTrigger] Not sending Twilio alert despite being triggered, because Twilio config settings missing") + } + } else if alert.Type == core.CustomAlert { + if cfg.Alerting.Custom != nil && cfg.Alerting.Custom.IsValid() { + log.Printf("[alerting][handleAlertsToTrigger] Sending custom alert because alert with description='%s' has been triggered", alert.Description) + alertProvider = &core.CustomAlertProvider{ + Url: cfg.Alerting.Custom.Url, + Method: cfg.Alerting.Custom.Method, + Body: cfg.Alerting.Custom.Body, + Headers: cfg.Alerting.Custom.Headers, + } + } else { + log.Printf("[alerting][handleAlertsToTrigger] Not sending custom alert despite being triggered, because there is no custom url configured") + } + } + if alertProvider != nil { + // TODO: retry on error + var err error + if alert.Type == core.PagerDutyAlert { + var body []byte + body, err = alertProvider.Send(service.Name, alert.Description, true) + if err == nil { + var response pagerDutyResponse + err = json.Unmarshal(body, &response) + if err != nil { + log.Printf("[alerting][handleAlertsToTrigger] Ran into error unmarshaling pager duty response: %s", err.Error()) + } else { + alert.ResolveKey = response.DedupKey + } + } + } else { + _, err = alertProvider.Send(service.Name, alert.Description, false) + } + if err != nil { + log.Printf("[alerting][handleAlertsToTrigger] Ran into error sending an alert: %s", err.Error()) + } else { + alert.Triggered = true + } + } + } +} + +func handleAlertsToResolve(service *core.Service, result *core.Result, cfg *config.Config) { + service.NumberOfSuccessesInARow++ + for _, alert := range service.Alerts { + if !alert.Enabled || !alert.Triggered || alert.SuccessBeforeResolved > service.NumberOfSuccessesInARow { + continue + } + alert.Triggered = false + if !alert.SendOnResolved { + continue + } + var alertProvider *core.CustomAlertProvider + if alert.Type == core.SlackAlert { + if len(cfg.Alerting.Slack) > 0 { + log.Printf("[alerting][handleAlertsToResolve] Sending Slack alert because alert with description='%s' has been resolved", alert.Description) + alertProvider = core.CreateSlackCustomAlertProvider(cfg.Alerting.Slack, service, alert, result, true) + } else { + log.Printf("[alerting][handleAlertsToResolve] Not sending Slack alert despite being resolved, because there is no Slack webhook configured") + } + } else if alert.Type == core.PagerDutyAlert { + if len(cfg.Alerting.PagerDuty) > 0 { + log.Printf("[alerting][handleAlertsToResolve] Sending PagerDuty alert because alert with description='%s' has been resolved", alert.Description) + alertProvider = core.CreatePagerDutyCustomAlertProvider(cfg.Alerting.PagerDuty, "resolve", alert.ResolveKey, service, fmt.Sprintf("RESOLVED: %s - %s", service.Name, alert.Description)) + } else { + log.Printf("[alerting][handleAlertsToResolve] Not sending PagerDuty alert despite being resolved, because PagerDuty isn't configured properly") + } + } else if alert.Type == core.TwilioAlert { + if cfg.Alerting.Twilio != nil && cfg.Alerting.Twilio.IsValid() { + log.Printf("[alerting][handleAlertsToResolve] Sending Twilio alert because alert with description='%s' has been resolved", alert.Description) + alertProvider = core.CreateTwilioCustomAlertProvider(cfg.Alerting.Twilio, fmt.Sprintf("RESOLVED: %s - %s", service.Name, alert.Description)) + } else { + log.Printf("[alerting][handleAlertsToResolve] Not sending Twilio alert despite being resolved, because Twilio isn't configured properly") + } + } else if alert.Type == core.CustomAlert { + if cfg.Alerting.Custom != nil && cfg.Alerting.Custom.IsValid() { + log.Printf("[alerting][handleAlertsToResolve] Sending custom alert because alert with description='%s' has been resolved", alert.Description) + alertProvider = &core.CustomAlertProvider{ + Url: cfg.Alerting.Custom.Url, + Method: cfg.Alerting.Custom.Method, + Body: cfg.Alerting.Custom.Body, + Headers: cfg.Alerting.Custom.Headers, + } + } else { + log.Printf("[alerting][handleAlertsToResolve] Not sending custom alert despite being resolved, because the custom provider isn't configured properly") + } + } + if alertProvider != nil { + // TODO: retry on error + _, err := alertProvider.Send(service.Name, alert.Description, true) + if err != nil { + log.Printf("[alerting][handleAlertsToResolve] Ran into error sending an alert: %s", err.Error()) + } else { + if alert.Type == core.PagerDutyAlert { + alert.ResolveKey = "" + } + } + } + } + service.NumberOfFailuresInARow = 0 +} diff --git a/alerting/pagerduty.go b/alerting/pagerduty.go new file mode 100644 index 00000000..9ef4df03 --- /dev/null +++ b/alerting/pagerduty.go @@ -0,0 +1,7 @@ +package alerting + +type pagerDutyResponse struct { + Status string `json:"status"` + Message string `json:"message"` + DedupKey string `json:"dedup_key"` +} diff --git a/core/alert.go b/core/alert.go index ebded0a7..9a2e585f 100644 --- a/core/alert.go +++ b/core/alert.go @@ -16,12 +16,24 @@ type Alert struct { // SendOnResolved defines whether to send a second notification when the issue has been resolved SendOnResolved bool `yaml:"send-on-resolved"` + + // SuccessBeforeResolved defines whether to send a second notification when the issue has been resolved + SuccessBeforeResolved int `yaml:"success-before-resolved"` + + // ResolveKey is an optional field that is used by some providers (i.e. PagerDuty's dedup_key) to resolve + // ongoing/triggered incidents + ResolveKey string + + // Triggered is used to determine whether an alert has been triggered. When an alert is resolved, this value + // should be set back to false. It is used to prevent the same alert from going out twice. + Triggered bool } type AlertType string const ( - SlackAlert AlertType = "slack" - TwilioAlert AlertType = "twilio" - CustomAlert AlertType = "custom" + SlackAlert AlertType = "slack" + PagerDutyAlert AlertType = "pagerduty" + TwilioAlert AlertType = "twilio" + CustomAlert AlertType = "custom" ) diff --git a/core/alerting.go b/core/alerting.go index 06daa331..332f89d2 100644 --- a/core/alerting.go +++ b/core/alerting.go @@ -5,15 +5,17 @@ import ( "encoding/base64" "fmt" "github.com/TwinProduction/gatus/client" + "io/ioutil" "net/http" "net/url" "strings" ) type AlertingConfig struct { - Slack string `yaml:"slack"` - Twilio *TwilioAlertProvider `yaml:"twilio"` - Custom *CustomAlertProvider `yaml:"custom"` + Slack string `yaml:"slack"` + PagerDuty string `yaml:"pagerduty"` + Twilio *TwilioAlertProvider `yaml:"twilio"` + Custom *CustomAlertProvider `yaml:"custom"` } type TwilioAlertProvider struct { @@ -75,26 +77,32 @@ func (provider *CustomAlertProvider) buildRequest(serviceName, alertDescription return request } -func (provider *CustomAlertProvider) Send(serviceName, alertDescription string, resolved bool) error { +// Send a request to the alert provider and return the body +func (provider *CustomAlertProvider) Send(serviceName, alertDescription string, resolved bool) ([]byte, error) { request := provider.buildRequest(serviceName, alertDescription, resolved) response, err := client.GetHttpClient().Do(request) if err != nil { - return err + return nil, err } if response.StatusCode > 399 { - return fmt.Errorf("call to provider alert returned status code %d", response.StatusCode) + body, err := ioutil.ReadAll(response.Body) + if err != nil { + return nil, fmt.Errorf("call to provider alert returned status code %d", response.StatusCode) + } else { + return nil, fmt.Errorf("call to provider alert returned status code %d: %s", response.StatusCode, string(body)) + } } - return nil + return ioutil.ReadAll(response.Body) } func CreateSlackCustomAlertProvider(slackWebHookUrl string, service *Service, alert *Alert, result *Result, resolved bool) *CustomAlertProvider { var message string var color string if resolved { - message = fmt.Sprintf("An alert for *%s* has been resolved after %d failures in a row", service.Name, service.NumberOfFailuresInARow) + message = fmt.Sprintf("An alert for *%s* has been resolved after passing successfully %d time(s) in a row", service.Name, alert.SuccessBeforeResolved) color = "#36A64F" } else { - message = fmt.Sprintf("An alert for *%s* has been triggered", service.Name) + message = fmt.Sprintf("An alert for *%s* has been triggered due to having failed %d time(s) in a row", service.Name, alert.Threshold) color = "#DD0000" } var results string @@ -147,3 +155,24 @@ func CreateTwilioCustomAlertProvider(provider *TwilioAlertProvider, message stri }, } } + +// https://developer.pagerduty.com/docs/events-api-v2/trigger-events/ +func CreatePagerDutyCustomAlertProvider(routingKey, eventAction, resolveKey string, service *Service, message string) *CustomAlertProvider { + return &CustomAlertProvider{ + Url: "https://events.pagerduty.com/v2/enqueue", + Method: "POST", + Body: fmt.Sprintf(`{ + "routing_key": "%s", + "dedup_key": "%s", + "event_action": "%s", + "payload": { + "summary": "%s", + "source": "%s", + "severity": "critical" + } +}`, routingKey, resolveKey, eventAction, message, service.Name), + Headers: map[string]string{ + "Content-Type": "application/json", + }, + } +} diff --git a/core/service.go b/core/service.go index 2df3b05e..52e7aab6 100644 --- a/core/service.go +++ b/core/service.go @@ -46,7 +46,8 @@ type Service struct { // Alerts is the alerting configuration for the service in case of failure Alerts []*Alert `yaml:"alerts"` - NumberOfFailuresInARow int + NumberOfFailuresInARow int + NumberOfSuccessesInARow int } func (service *Service) Validate() { @@ -64,6 +65,9 @@ func (service *Service) Validate() { if alert.Threshold <= 0 { alert.Threshold = 3 } + if alert.SuccessBeforeResolved <= 0 { + alert.SuccessBeforeResolved = 2 + } } if len(service.Url) == 0 { panic(ErrNoUrl) diff --git a/watchdog/watchdog.go b/watchdog/watchdog.go index 2b5bb45e..8d4f6a97 100644 --- a/watchdog/watchdog.go +++ b/watchdog/watchdog.go @@ -3,6 +3,7 @@ package watchdog import ( "encoding/json" "fmt" + "github.com/TwinProduction/gatus/alerting" "github.com/TwinProduction/gatus/config" "github.com/TwinProduction/gatus/core" "github.com/TwinProduction/gatus/metric" @@ -70,7 +71,7 @@ func monitor(service *core.Service) { result.Duration.Round(time.Millisecond), extra, ) - handleAlerting(service, result) + alerting.Handle(service, result) if cfg.Debug { log.Printf("[watchdog][monitor] Waiting for interval=%s before monitoring serviceName=%s again", service.Interval, service.Name) } @@ -78,96 +79,3 @@ func monitor(service *core.Service) { time.Sleep(service.Interval) } } - -func handleAlerting(service *core.Service, result *core.Result) { - cfg := config.Get() - if cfg.Alerting == nil { - return - } - if result.Success { - if service.NumberOfFailuresInARow > 0 { - for _, alert := range service.Alerts { - if !alert.Enabled || !alert.SendOnResolved || alert.Threshold > service.NumberOfFailuresInARow { - continue - } - var alertProvider *core.CustomAlertProvider - if alert.Type == core.SlackAlert { - if len(cfg.Alerting.Slack) > 0 { - log.Printf("[watchdog][handleAlerting] Sending Slack alert because alert with description=%s has been resolved", alert.Description) - alertProvider = core.CreateSlackCustomAlertProvider(cfg.Alerting.Slack, service, alert, result, true) - } else { - log.Printf("[watchdog][handleAlerting] Not sending Slack alert despite being triggered, because there is no Slack webhook configured") - } - } else if alert.Type == core.TwilioAlert { - if cfg.Alerting.Twilio != nil && cfg.Alerting.Twilio.IsValid() { - log.Printf("[watchdog][handleAlerting] Sending Twilio alert because alert with description=%s has been resolved", alert.Description) - alertProvider = core.CreateTwilioCustomAlertProvider(cfg.Alerting.Twilio, fmt.Sprintf("RESOLVED: %s - %s", service.Name, alert.Description)) - } else { - log.Printf("[watchdog][handleAlerting] Not sending Twilio alert despite being resolved, because Twilio isn't configured properly") - } - } else if alert.Type == core.CustomAlert { - if cfg.Alerting.Custom != nil && cfg.Alerting.Custom.IsValid() { - log.Printf("[watchdog][handleAlerting] Sending custom alert because alert with description=%s has been resolved", alert.Description) - alertProvider = &core.CustomAlertProvider{ - Url: cfg.Alerting.Custom.Url, - Method: cfg.Alerting.Custom.Method, - Body: cfg.Alerting.Custom.Body, - Headers: cfg.Alerting.Custom.Headers, - } - } else { - log.Printf("[watchdog][handleAlerting] Not sending custom alert despite being resolved, because the custom provider isn't configured properly") - } - } - if alertProvider != nil { - err := alertProvider.Send(service.Name, alert.Description, true) - if err != nil { - log.Printf("[watchdog][handleAlerting] Ran into error sending an alert: %s", err.Error()) - } - } - } - } - service.NumberOfFailuresInARow = 0 - } else { - service.NumberOfFailuresInARow++ - for _, alert := range service.Alerts { - // If the alert hasn't been triggered, move to the next one - if !alert.Enabled || alert.Threshold != service.NumberOfFailuresInARow { - continue - } - var alertProvider *core.CustomAlertProvider - if alert.Type == core.SlackAlert { - if len(cfg.Alerting.Slack) > 0 { - log.Printf("[watchdog][handleAlerting] Sending Slack alert because alert with description=%s has been triggered", alert.Description) - alertProvider = core.CreateSlackCustomAlertProvider(cfg.Alerting.Slack, service, alert, result, false) - } else { - log.Printf("[watchdog][handleAlerting] Not sending Slack alert despite being triggered, because there is no Slack webhook configured") - } - } else if alert.Type == core.TwilioAlert { - if cfg.Alerting.Twilio != nil && cfg.Alerting.Twilio.IsValid() { - log.Printf("[watchdog][handleAlerting] Sending Twilio alert because alert with description=%s has been triggered", alert.Description) - alertProvider = core.CreateTwilioCustomAlertProvider(cfg.Alerting.Twilio, fmt.Sprintf("TRIGGERED: %s - %s", service.Name, alert.Description)) - } else { - log.Printf("[watchdog][handleAlerting] Not sending Twilio alert despite being triggered, because Twilio config settings missing") - } - } else if alert.Type == core.CustomAlert { - if cfg.Alerting.Custom != nil && cfg.Alerting.Custom.IsValid() { - log.Printf("[watchdog][handleAlerting] Sending custom alert because alert with description=%s has been triggered", alert.Description) - alertProvider = &core.CustomAlertProvider{ - Url: cfg.Alerting.Custom.Url, - Method: cfg.Alerting.Custom.Method, - Body: cfg.Alerting.Custom.Body, - Headers: cfg.Alerting.Custom.Headers, - } - } else { - log.Printf("[watchdog][handleAlerting] Not sending custom alert despite being triggered, because there is no custom url configured") - } - } - if alertProvider != nil { - err := alertProvider.Send(service.Name, alert.Description, false) - if err != nil { - log.Printf("[watchdog][handleAlerting] Ran into error sending an alert: %s", err.Error()) - } - } - } - } -}