From a85c5d54869e1e8e45cb8c0b22762b7f0c6b1b38 Mon Sep 17 00:00:00 2001 From: TwinProduction Date: Sat, 15 May 2021 21:31:32 -0400 Subject: [PATCH 1/6] Close #91: Implement default provider alert --- README.md | 58 ++++ alerting/provider/custom/custom.go | 8 + alerting/provider/discord/discord.go | 31 +- alerting/provider/mattermost/mattermost.go | 10 +- alerting/provider/messagebird/messagebird.go | 12 +- alerting/provider/pagerduty/pagerduty.go | 12 +- alerting/provider/provider.go | 25 ++ alerting/provider/provider_test.go | 153 ++++++++ alerting/provider/slack/slack.go | 10 +- alerting/provider/telegram/telegram.go | 12 +- alerting/provider/twilio/twilio.go | 12 +- config/config.go | 17 + config/config_test.go | 345 ++++++++++++++++++- core/alert.go | 41 ++- core/alert_test.go | 36 ++ core/service.go | 2 +- core/service_test.go | 5 +- watchdog/alerting.go | 18 +- 18 files changed, 765 insertions(+), 42 deletions(-) create mode 100644 alerting/provider/provider_test.go create mode 100644 core/alert_test.go diff --git a/README.md b/README.md index f25bf46f..b30799c7 100644 --- a/README.md +++ b/README.md @@ -250,6 +250,11 @@ ignored. | `alerting.custom.insecure` | Whether to skip verifying the server's certificate chain and host name | `false` | | `alerting.custom.body` | Custom alerting request body. | `""` | | `alerting.custom.headers` | Custom alerting request headers | `{}` | +| `alerting.*.default-alert.enabled` | Whether to enable the alert | N/A | +| `alerting.*.default-alert.failure-threshold` | Number of failures in a row needed before triggering the alert | N/A | +| `alerting.*.default-alert.success-threshold` | Number of successes in a row before an ongoing incident is marked as resolved | N/A | +| `alerting.*.default-alert.send-on-resolved` | Whether to send a notification once a triggered alert is marked as resolved | N/A | +| `alerting.*.default-alert.description` | Description of the alert. Will be included in the alert sent | N/A | #### Configuring Slack alerts @@ -503,6 +508,59 @@ As a result, the `[ALERT_TRIGGERED_OR_RESOLVED]` in the body of first example of `partial_outage` when an alert is triggered and `operational` when an alert is resolved. +#### Setting a default provider alert + +While you can specify the alert configuration directly in the service definition, it's tedious and may lead to a very +long configuration file. + +To avoid such problem, you can use the `default-alert` parameter present in each provider configuration: +```yaml +alerting: + slack: + webhook-url: "https://hooks.slack.com/services/**********/**********/**********" + default-alert: + enabled: true + description: "healthcheck failed" + send-on-resolved: true + failure-threshold: 5 + success-threshold: 5 +``` + +As a result, your service configuration looks a lot tidier: +```yaml +services: + - name: example + url: "https://example.org" + alerts: + - type: slack + conditions: + - "[STATUS] == 200" + + - name: other-example + url: "https://example.com" + alerts: + - type: slack + conditions: + - "[STATUS] == 200" +``` + +It also allows you to do things like this: +```yaml +services: + - name: twinnation + url: "https://twinnation.org/health" + alerts: + - type: slack + failure-threshold: 5 + - type: slack + failure-threshold: 10 + - type: slack + failure-threshold: 15 + conditions: + - "[STATUS] == 200" +``` + + ### Kubernetes (ALPHA) > **WARNING**: This feature is in ALPHA. This means that it is very likely to change in the near future, which means that diff --git a/alerting/provider/custom/custom.go b/alerting/provider/custom/custom.go index 7c390506..06c63bcf 100644 --- a/alerting/provider/custom/custom.go +++ b/alerting/provider/custom/custom.go @@ -22,6 +22,9 @@ type AlertProvider struct { Body string `yaml:"body,omitempty"` Headers map[string]string `yaml:"headers,omitempty"` Placeholders map[string]map[string]string `yaml:"placeholders,omitempty"` + + // DefaultAlert is the default alert configuration to use for services with an alert of the appropriate type + DefaultAlert *core.Alert `yaml:"default-alert"` } // IsValid returns whether the provider's configuration is valid @@ -112,3 +115,8 @@ func (provider *AlertProvider) Send(serviceName, alertDescription string, resolv } return ioutil.ReadAll(response.Body) } + +// GetDefaultAlert returns the provider's default alert configuration +func (provider AlertProvider) GetDefaultAlert() *core.Alert { + return provider.DefaultAlert +} diff --git a/alerting/provider/discord/discord.go b/alerting/provider/discord/discord.go index 060e7c69..bdf46baa 100644 --- a/alerting/provider/discord/discord.go +++ b/alerting/provider/discord/discord.go @@ -11,8 +11,32 @@ import ( // AlertProvider is the configuration necessary for sending an alert using Discord type AlertProvider struct { WebhookURL string `yaml:"webhook-url"` + + // DefaultAlert is the default alert configuration to use for services with an alert of the appropriate type + DefaultAlert *core.Alert `yaml:"default-alert"` } +//func (provider *AlertProvider) ParseWithDefaultAlert(alert *core.Alert) { +// if provider.DefaultAlert == nil { +// return +// } +// if alert.Enabled == nil { +// alert.Enabled = provider.DefaultAlert.Enabled +// } +// if alert.SendOnResolved == nil { +// alert.SendOnResolved = provider.DefaultAlert.SendOnResolved +// } +// if len(alert.Description) == 0 { +// alert.Description = provider.DefaultAlert.Description +// } +// if alert.FailureThreshold == 0 { +// alert.FailureThreshold = provider.DefaultAlert.FailureThreshold +// } +// if alert.SuccessThreshold == 0 { +// alert.SuccessThreshold = provider.DefaultAlert.SuccessThreshold +// } +//} + // IsValid returns whether the provider's configuration is valid func (provider *AlertProvider) IsValid() bool { return len(provider.WebhookURL) > 0 @@ -57,7 +81,12 @@ func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, aler ] } ] -}`, message, alert.Description, colorCode, results), +}`, message, alert.GetDescription(), colorCode, results), Headers: map[string]string{"Content-Type": "application/json"}, } } + +// GetDefaultAlert returns the provider's default alert configuration +func (provider AlertProvider) GetDefaultAlert() *core.Alert { + return provider.DefaultAlert +} diff --git a/alerting/provider/mattermost/mattermost.go b/alerting/provider/mattermost/mattermost.go index 11f4e827..35bc4820 100644 --- a/alerting/provider/mattermost/mattermost.go +++ b/alerting/provider/mattermost/mattermost.go @@ -12,6 +12,9 @@ import ( type AlertProvider struct { WebhookURL string `yaml:"webhook-url"` Insecure bool `yaml:"insecure,omitempty"` + + // DefaultAlert is the default alert configuration to use for services with an alert of the appropriate type + DefaultAlert *core.Alert `yaml:"default-alert"` } // IsValid returns whether the provider's configuration is valid @@ -69,7 +72,12 @@ func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, aler ] } ] -}`, message, message, alert.Description, color, service.URL, results), +}`, message, message, alert.GetDescription(), color, service.URL, results), Headers: map[string]string{"Content-Type": "application/json"}, } } + +// GetDefaultAlert returns the provider's default alert configuration +func (provider AlertProvider) GetDefaultAlert() *core.Alert { + return provider.DefaultAlert +} diff --git a/alerting/provider/messagebird/messagebird.go b/alerting/provider/messagebird/messagebird.go index 739c43e3..1dbe56c9 100644 --- a/alerting/provider/messagebird/messagebird.go +++ b/alerting/provider/messagebird/messagebird.go @@ -17,6 +17,9 @@ type AlertProvider struct { AccessKey string `yaml:"access-key"` Originator string `yaml:"originator"` Recipients string `yaml:"recipients"` + + // DefaultAlert is the default alert configuration to use for services with an alert of the appropriate type + DefaultAlert *core.Alert `yaml:"default-alert"` } // IsValid returns whether the provider's configuration is valid @@ -29,9 +32,9 @@ func (provider *AlertProvider) IsValid() bool { func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, _ *core.Result, resolved bool) *custom.AlertProvider { var message string if resolved { - message = fmt.Sprintf("RESOLVED: %s - %s", service.Name, alert.Description) + message = fmt.Sprintf("RESOLVED: %s - %s", service.Name, alert.GetDescription()) } else { - message = fmt.Sprintf("TRIGGERED: %s - %s", service.Name, alert.Description) + message = fmt.Sprintf("TRIGGERED: %s - %s", service.Name, alert.GetDescription()) } return &custom.AlertProvider{ @@ -48,3 +51,8 @@ func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, aler }, } } + +// GetDefaultAlert returns the provider's default alert configuration +func (provider AlertProvider) GetDefaultAlert() *core.Alert { + return provider.DefaultAlert +} diff --git a/alerting/provider/pagerduty/pagerduty.go b/alerting/provider/pagerduty/pagerduty.go index a835bba5..b5b1baec 100644 --- a/alerting/provider/pagerduty/pagerduty.go +++ b/alerting/provider/pagerduty/pagerduty.go @@ -11,6 +11,9 @@ import ( // AlertProvider is the configuration necessary for sending an alert using PagerDuty type AlertProvider struct { IntegrationKey string `yaml:"integration-key"` + + // DefaultAlert is the default alert configuration to use for services with an alert of the appropriate type + DefaultAlert *core.Alert `yaml:"default-alert"` } // IsValid returns whether the provider's configuration is valid @@ -24,11 +27,11 @@ func (provider *AlertProvider) IsValid() bool { func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, _ *core.Result, resolved bool) *custom.AlertProvider { var message, eventAction, resolveKey string if resolved { - message = fmt.Sprintf("RESOLVED: %s - %s", service.Name, alert.Description) + message = fmt.Sprintf("RESOLVED: %s - %s", service.Name, alert.GetDescription()) eventAction = "resolve" resolveKey = alert.ResolveKey } else { - message = fmt.Sprintf("TRIGGERED: %s - %s", service.Name, alert.Description) + message = fmt.Sprintf("TRIGGERED: %s - %s", service.Name, alert.GetDescription()) eventAction = "trigger" resolveKey = "" } @@ -50,3 +53,8 @@ func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, aler }, } } + +// GetDefaultAlert returns the provider's default alert configuration +func (provider AlertProvider) GetDefaultAlert() *core.Alert { + return provider.DefaultAlert +} diff --git a/alerting/provider/provider.go b/alerting/provider/provider.go index 81b97b85..1c381356 100644 --- a/alerting/provider/provider.go +++ b/alerting/provider/provider.go @@ -19,6 +19,31 @@ type AlertProvider interface { // ToCustomAlertProvider converts the provider into a custom.AlertProvider ToCustomAlertProvider(service *core.Service, alert *core.Alert, result *core.Result, resolved bool) *custom.AlertProvider + + // GetDefaultAlert returns the provider's default alert configuration + GetDefaultAlert() *core.Alert +} + +// ParseWithDefaultAlert parses a service alert by using the provider's default alert as a baseline +func ParseWithDefaultAlert(providerDefaultAlert, serviceAlert *core.Alert) { + if providerDefaultAlert == nil || serviceAlert == nil { + return + } + if serviceAlert.Enabled == nil { + serviceAlert.Enabled = providerDefaultAlert.Enabled + } + if serviceAlert.SendOnResolved == nil { + serviceAlert.SendOnResolved = providerDefaultAlert.SendOnResolved + } + if serviceAlert.Description == nil { + serviceAlert.Description = providerDefaultAlert.Description + } + if serviceAlert.FailureThreshold == 0 { + serviceAlert.FailureThreshold = providerDefaultAlert.FailureThreshold + } + if serviceAlert.SuccessThreshold == 0 { + serviceAlert.SuccessThreshold = providerDefaultAlert.SuccessThreshold + } } var ( diff --git a/alerting/provider/provider_test.go b/alerting/provider/provider_test.go new file mode 100644 index 00000000..7bda7baf --- /dev/null +++ b/alerting/provider/provider_test.go @@ -0,0 +1,153 @@ +package provider + +import ( + "testing" + + "github.com/TwinProduction/gatus/core" +) + +func TestParseWithDefaultAlert(t *testing.T) { + type Scenario struct { + Name string + DefaultAlert, ServiceAlert, ExpectedOutputAlert *core.Alert + } + enabled := true + disabled := false + firstDescription := "description-1" + secondDescription := "description-2" + scenarios := []Scenario{ + { + Name: "service-alert-type-only", + DefaultAlert: &core.Alert{ + Enabled: &enabled, + SendOnResolved: &enabled, + Description: &firstDescription, + FailureThreshold: 5, + SuccessThreshold: 10, + }, + ServiceAlert: &core.Alert{ + Type: core.DiscordAlert, + }, + ExpectedOutputAlert: &core.Alert{ + Type: core.DiscordAlert, + Enabled: &enabled, + SendOnResolved: &enabled, + Description: &firstDescription, + FailureThreshold: 5, + SuccessThreshold: 10, + }, + }, + { + Name: "service-alert-overwrites-default-alert", + DefaultAlert: &core.Alert{ + Enabled: &disabled, + SendOnResolved: &disabled, + Description: &firstDescription, + FailureThreshold: 5, + SuccessThreshold: 10, + }, + ServiceAlert: &core.Alert{ + Type: core.DiscordAlert, + Enabled: &enabled, + SendOnResolved: &enabled, + Description: &secondDescription, + FailureThreshold: 6, + SuccessThreshold: 11, + }, + ExpectedOutputAlert: &core.Alert{ + Type: core.DiscordAlert, + Enabled: &enabled, + SendOnResolved: &enabled, + Description: &secondDescription, + FailureThreshold: 6, + SuccessThreshold: 11, + }, + }, + { + Name: "service-alert-partially-overwrites-default-alert", + DefaultAlert: &core.Alert{ + Enabled: &enabled, + SendOnResolved: &enabled, + Description: &firstDescription, + FailureThreshold: 5, + SuccessThreshold: 10, + }, + ServiceAlert: &core.Alert{ + Type: core.DiscordAlert, + Enabled: nil, + SendOnResolved: nil, + FailureThreshold: 6, + SuccessThreshold: 11, + }, + ExpectedOutputAlert: &core.Alert{ + Type: core.DiscordAlert, + Enabled: &enabled, + SendOnResolved: &enabled, + Description: &firstDescription, + FailureThreshold: 6, + SuccessThreshold: 11, + }, + }, + { + Name: "default-alert-type-should-be-ignored", + DefaultAlert: &core.Alert{ + Type: core.TelegramAlert, + Enabled: &enabled, + SendOnResolved: &enabled, + Description: &firstDescription, + FailureThreshold: 5, + SuccessThreshold: 10, + }, + ServiceAlert: &core.Alert{ + Type: core.DiscordAlert, + }, + ExpectedOutputAlert: &core.Alert{ + Type: core.DiscordAlert, + Enabled: &enabled, + SendOnResolved: &enabled, + Description: &firstDescription, + FailureThreshold: 5, + SuccessThreshold: 10, + }, + }, + { + Name: "no-default-alert", + DefaultAlert: &core.Alert{ + Type: core.DiscordAlert, + Enabled: nil, + SendOnResolved: nil, + Description: &firstDescription, + FailureThreshold: 2, + SuccessThreshold: 5, + }, + ServiceAlert: nil, + ExpectedOutputAlert: nil, + }, + } + for _, scenario := range scenarios { + t.Run(scenario.Name, func(t *testing.T) { + ParseWithDefaultAlert(scenario.DefaultAlert, scenario.ServiceAlert) + if scenario.ExpectedOutputAlert == nil { + if scenario.ServiceAlert != nil { + t.Fail() + } + return + } + if scenario.ServiceAlert.IsEnabled() != scenario.ExpectedOutputAlert.IsEnabled() { + t.Errorf("expected ServiceAlert.IsEnabled() to be %v, got %v", scenario.ExpectedOutputAlert.IsEnabled(), scenario.ServiceAlert.IsEnabled()) + } + if scenario.ServiceAlert.IsSendingOnResolved() != scenario.ExpectedOutputAlert.IsSendingOnResolved() { + t.Errorf("expected ServiceAlert.IsSendingOnResolved() to be %v, got %v", scenario.ExpectedOutputAlert.IsSendingOnResolved(), scenario.ServiceAlert.IsSendingOnResolved()) + } + if scenario.ServiceAlert.GetDescription() != scenario.ExpectedOutputAlert.GetDescription() { + t.Errorf("expected ServiceAlert.GetDescription() to be %v, got %v", scenario.ExpectedOutputAlert.GetDescription(), scenario.ServiceAlert.GetDescription()) + } + if scenario.ServiceAlert.FailureThreshold != scenario.ExpectedOutputAlert.FailureThreshold { + t.Errorf("expected ServiceAlert.FailureThreshold to be %v, got %v", scenario.ExpectedOutputAlert.FailureThreshold, scenario.ServiceAlert.FailureThreshold) + } + if scenario.ServiceAlert.SuccessThreshold != scenario.ExpectedOutputAlert.SuccessThreshold { + t.Errorf("expected ServiceAlert.SuccessThreshold to be %v, got %v", scenario.ExpectedOutputAlert.SuccessThreshold, scenario.ServiceAlert.SuccessThreshold) + } + }) + } +} diff --git a/alerting/provider/slack/slack.go b/alerting/provider/slack/slack.go index 4524ff5b..ed5eb825 100644 --- a/alerting/provider/slack/slack.go +++ b/alerting/provider/slack/slack.go @@ -11,6 +11,9 @@ import ( // AlertProvider is the configuration necessary for sending an alert using Slack type AlertProvider struct { WebhookURL string `yaml:"webhook-url"` // Slack webhook URL + + // DefaultAlert is the default alert configuration to use for services with an alert of the appropriate type + DefaultAlert *core.Alert `yaml:"default-alert"` } // IsValid returns whether the provider's configuration is valid @@ -57,7 +60,12 @@ func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, aler ] } ] -}`, message, alert.Description, color, results), +}`, message, alert.GetDescription(), color, results), Headers: map[string]string{"Content-Type": "application/json"}, } } + +// GetDefaultAlert returns the provider's default alert configuration +func (provider AlertProvider) GetDefaultAlert() *core.Alert { + return provider.DefaultAlert +} diff --git a/alerting/provider/telegram/telegram.go b/alerting/provider/telegram/telegram.go index c2ebafa2..58608c56 100644 --- a/alerting/provider/telegram/telegram.go +++ b/alerting/provider/telegram/telegram.go @@ -12,6 +12,9 @@ import ( type AlertProvider struct { Token string `yaml:"token"` ID string `yaml:"id"` + + // DefaultAlert is the default alert configuration to use for services with an alert of the appropriate type + DefaultAlert *core.Alert `yaml:"default-alert"` } // IsValid returns whether the provider's configuration is valid @@ -37,8 +40,8 @@ func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, aler results += fmt.Sprintf("%s - `%s`\\n", prefix, conditionResult.Condition) } var text string - if len(alert.Description) > 0 { - text = fmt.Sprintf("⛑ *Gatus* \\n%s \\n*Description* \\n_%s_ \\n\\n*Condition results*\\n%s", message, alert.Description, results) + if len(alert.GetDescription()) > 0 { + text = fmt.Sprintf("⛑ *Gatus* \\n%s \\n*Description* \\n_%s_ \\n\\n*Condition results*\\n%s", message, alert.GetDescription(), results) } else { text = fmt.Sprintf("⛑ *Gatus* \\n%s \\n*Condition results*\\n%s", message, results) } @@ -49,3 +52,8 @@ func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, aler Headers: map[string]string{"Content-Type": "application/json"}, } } + +// GetDefaultAlert returns the provider's default alert configuration +func (provider AlertProvider) GetDefaultAlert() *core.Alert { + return provider.DefaultAlert +} diff --git a/alerting/provider/twilio/twilio.go b/alerting/provider/twilio/twilio.go index d1e9fc1b..5439a779 100644 --- a/alerting/provider/twilio/twilio.go +++ b/alerting/provider/twilio/twilio.go @@ -16,6 +16,9 @@ type AlertProvider struct { Token string `yaml:"token"` From string `yaml:"from"` To string `yaml:"to"` + + // DefaultAlert is the default alert configuration to use for services with an alert of the appropriate type + DefaultAlert *core.Alert `yaml:"default-alert"` } // IsValid returns whether the provider's configuration is valid @@ -27,9 +30,9 @@ func (provider *AlertProvider) IsValid() bool { func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, alert *core.Alert, _ *core.Result, resolved bool) *custom.AlertProvider { var message string if resolved { - message = fmt.Sprintf("RESOLVED: %s - %s", service.Name, alert.Description) + message = fmt.Sprintf("RESOLVED: %s - %s", service.Name, alert.GetDescription()) } else { - message = fmt.Sprintf("TRIGGERED: %s - %s", service.Name, alert.Description) + message = fmt.Sprintf("TRIGGERED: %s - %s", service.Name, alert.GetDescription()) } return &custom.AlertProvider{ URL: fmt.Sprintf("https://api.twilio.com/2010-04-01/Accounts/%s/Messages.json", provider.SID), @@ -45,3 +48,8 @@ func (provider *AlertProvider) ToCustomAlertProvider(service *core.Service, aler }, } } + +// GetDefaultAlert returns the provider's default alert configuration +func (provider AlertProvider) GetDefaultAlert() *core.Alert { + return provider.DefaultAlert +} diff --git a/config/config.go b/config/config.go index 31e6c06d..ba8d5272 100644 --- a/config/config.go +++ b/config/config.go @@ -222,6 +222,10 @@ func validateSecurityConfig(config *Config) { } } +// validateAlertingConfig validates the alerting configuration +// Note that the alerting configuration has to be validated before the service configuration, because the default alert +// returned by provider.AlertProvider.GetDefaultAlert() must be parsed before core.Service.ValidateAndSetDefaults() +// sets the default alert values when none are set. func validateAlertingConfig(config *Config) { if config.Alerting == nil { log.Printf("[config][validateAlertingConfig] Alerting is not configured") @@ -242,6 +246,19 @@ func validateAlertingConfig(config *Config) { alertProvider := GetAlertingProviderByAlertType(config, alertType) if alertProvider != nil { if alertProvider.IsValid() { + // Parse alerts with the provider's default alert + if alertProvider.GetDefaultAlert() != nil { + for _, service := range config.Services { + for alertIndex, alert := range service.Alerts { + if alertType == alert.Type { + if config.Debug { + log.Printf("[config][validateAlertingConfig] Parsing alert %d with provider's default alert for provider=%s in service=%s", alertIndex, alertType, service.Name) + } + provider.ParseWithDefaultAlert(alertProvider.GetDefaultAlert(), alert) + } + } + } + } validProviders = append(validProviders, alertType) } else { log.Printf("[config][validateAlertingConfig] Ignoring provider=%s because configuration is invalid", alertType) diff --git a/config/config_test.go b/config/config_test.go index 6912a801..279794c8 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -351,6 +351,8 @@ alerting: webhook-url: "http://example.org" pagerduty: integration-key: "00000000000000000000000000000000" + mattermost: + webhook-url: "http://example.com" messagebird: access-key: "1" originator: "31619191918" @@ -358,6 +360,12 @@ alerting: telegram: token: 123456:ABC-DEF1234ghIkl-zyx57W2v1u123ew11 id: 0123456789 + twilio: + sid: "1234" + token: "5678" + from: "+1-234-567-8901" + to: "+1-234-567-8901" + services: - name: twinnation url: https://twinnation.org/health @@ -369,14 +377,194 @@ services: failure-threshold: 7 success-threshold: 5 description: "Healthcheck failed 7 times in a row" + - type: mattermost + enabled: true - type: messagebird - type: discord enabled: true failure-threshold: 10 - type: telegram enabled: true + - type: twilio + enabled: true + failure-threshold: 12 + success-threshold: 15 conditions: - "[STATUS] == 200" +`)) + if err != nil { + t.Error("expected no error, got", err.Error()) + } + if config == nil { + t.Fatal("Config shouldn't have been nil") + } + // Alerting providers + if config.Alerting == nil { + t.Fatal("config.Alerting shouldn't have been nil") + } + if config.Alerting.Slack == nil || !config.Alerting.Slack.IsValid() { + t.Fatal("Slack alerting config should've been valid") + } + // Services + if len(config.Services) != 1 { + t.Error("There should've been 1 service") + } + if config.Services[0].URL != "https://twinnation.org/health" { + t.Errorf("URL should have been %s", "https://twinnation.org/health") + } + if config.Services[0].Interval != 60*time.Second { + t.Errorf("Interval should have been %s, because it is the default value", 60*time.Second) + } + if len(config.Services[0].Alerts) != 7 { + t.Fatal("There should've been 7 alerts configured") + } + + if config.Services[0].Alerts[0].Type != core.SlackAlert { + t.Errorf("The type of the alert should've been %s, but it was %s", core.SlackAlert, config.Services[0].Alerts[0].Type) + } + if !config.Services[0].Alerts[0].IsEnabled() { + t.Error("The alert should've been enabled") + } + if config.Services[0].Alerts[0].FailureThreshold != 3 { + t.Errorf("The default failure threshold of the alert should've been %d, but it was %d", 3, config.Services[0].Alerts[0].FailureThreshold) + } + if config.Services[0].Alerts[0].SuccessThreshold != 2 { + t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 2, config.Services[0].Alerts[0].SuccessThreshold) + } + + if config.Services[0].Alerts[1].Type != core.PagerDutyAlert { + t.Errorf("The type of the alert should've been %s, but it was %s", core.PagerDutyAlert, config.Services[0].Alerts[1].Type) + } + if config.Services[0].Alerts[1].GetDescription() != "Healthcheck failed 7 times in a row" { + t.Errorf("The description of the alert should've been %s, but it was %s", "Healthcheck failed 7 times in a row", config.Services[0].Alerts[1].GetDescription()) + } + if config.Services[0].Alerts[1].FailureThreshold != 7 { + t.Errorf("The failure threshold of the alert should've been %d, but it was %d", 7, config.Services[0].Alerts[1].FailureThreshold) + } + if config.Services[0].Alerts[1].SuccessThreshold != 5 { + t.Errorf("The success threshold of the alert should've been %d, but it was %d", 5, config.Services[0].Alerts[1].SuccessThreshold) + } + + if config.Services[0].Alerts[2].Type != core.MattermostAlert { + t.Errorf("The type of the alert should've been %s, but it was %s", core.MattermostAlert, config.Services[0].Alerts[2].Type) + } + if !config.Services[0].Alerts[2].IsEnabled() { + t.Error("The alert should've been enabled") + } + if config.Services[0].Alerts[2].FailureThreshold != 3 { + t.Errorf("The default failure threshold of the alert should've been %d, but it was %d", 3, config.Services[0].Alerts[2].FailureThreshold) + } + if config.Services[0].Alerts[2].SuccessThreshold != 2 { + t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 2, config.Services[0].Alerts[2].SuccessThreshold) + } + + if config.Services[0].Alerts[3].Type != core.MessagebirdAlert { + t.Errorf("The type of the alert should've been %s, but it was %s", core.MessagebirdAlert, config.Services[0].Alerts[3].Type) + } + if config.Services[0].Alerts[3].IsEnabled() { + t.Error("The alert should've been disabled") + } + + if config.Services[0].Alerts[4].Type != core.DiscordAlert { + t.Errorf("The type of the alert should've been %s, but it was %s", core.DiscordAlert, config.Services[0].Alerts[4].Type) + } + if !config.Services[0].Alerts[4].IsEnabled() { + t.Error("The alert should've been enabled") + } + if config.Services[0].Alerts[4].FailureThreshold != 10 { + t.Errorf("The failure threshold of the alert should've been %d, but it was %d", 10, config.Services[0].Alerts[4].FailureThreshold) + } + if config.Services[0].Alerts[4].SuccessThreshold != 2 { + t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 2, config.Services[0].Alerts[4].SuccessThreshold) + } + + if config.Services[0].Alerts[5].Type != core.TelegramAlert { + t.Errorf("The type of the alert should've been %s, but it was %s", core.TelegramAlert, config.Services[0].Alerts[5].Type) + } + if !config.Services[0].Alerts[5].IsEnabled() { + t.Error("The alert should've been enabled") + } + if config.Services[0].Alerts[5].FailureThreshold != 3 { + t.Errorf("The default failure threshold of the alert should've been %d, but it was %d", 3, config.Services[0].Alerts[5].FailureThreshold) + } + if config.Services[0].Alerts[5].SuccessThreshold != 2 { + t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 2, config.Services[0].Alerts[5].SuccessThreshold) + } + + if config.Services[0].Alerts[6].Type != core.TwilioAlert { + t.Errorf("The type of the alert should've been %s, but it was %s", core.TwilioAlert, config.Services[0].Alerts[6].Type) + } + if !config.Services[0].Alerts[6].IsEnabled() { + t.Error("The alert should've been enabled") + } + if config.Services[0].Alerts[6].FailureThreshold != 12 { + t.Errorf("The default failure threshold of the alert should've been %d, but it was %d", 12, config.Services[0].Alerts[6].FailureThreshold) + } + if config.Services[0].Alerts[6].SuccessThreshold != 15 { + t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 15, config.Services[0].Alerts[6].SuccessThreshold) + } +} + +func TestParseAndValidateConfigBytesWithAlertingAndDefaultAlert(t *testing.T) { + config, err := parseAndValidateConfigBytes([]byte(` +alerting: + slack: + webhook-url: "http://example.com" + default-alert: + enabled: true + discord: + webhook-url: "http://example.org" + default-alert: + enabled: true + failure-threshold: 10 + success-threshold: 1 + pagerduty: + integration-key: "00000000000000000000000000000000" + default-alert: + enabled: true + description: default description + failure-threshold: 7 + success-threshold: 5 + mattermost: + webhook-url: "http://example.com" + default-alert: + enabled: true + messagebird: + access-key: "1" + originator: "31619191918" + recipients: "31619191919" + default-alert: + enabled: false + send-on-resolved: true + telegram: + token: 123456:ABC-DEF1234ghIkl-zyx57W2v1u123ew11 + id: 0123456789 + default-alert: + enabled: true + twilio: + sid: "1234" + token: "5678" + from: "+1-234-567-8901" + to: "+1-234-567-8901" + default-alert: + enabled: true + failure-threshold: 12 + success-threshold: 15 + +services: + - name: twinnation + url: https://twinnation.org/health + alerts: + - type: slack + - type: pagerduty + - type: mattermost + - type: messagebird + - type: discord + success-threshold: 2 # test service alert override + - type: telegram + - type: twilio + conditions: + - "[STATUS] == 200" `)) if err != nil { t.Error("expected no error, got", err.Error()) @@ -443,14 +631,14 @@ services: if config.Services[0].Interval != 60*time.Second { t.Errorf("Interval should have been %s, because it is the default value", 60*time.Second) } - if len(config.Services[0].Alerts) != 5 { - t.Fatal("There should've been 5 alerts configured") + if len(config.Services[0].Alerts) != 7 { + t.Fatal("There should've been 7 alerts configured") } if config.Services[0].Alerts[0].Type != core.SlackAlert { t.Errorf("The type of the alert should've been %s, but it was %s", core.SlackAlert, config.Services[0].Alerts[0].Type) } - if !config.Services[0].Alerts[0].Enabled { + if !config.Services[0].Alerts[0].IsEnabled() { t.Error("The alert should've been enabled") } if config.Services[0].Alerts[0].FailureThreshold != 3 { @@ -463,28 +651,155 @@ services: if config.Services[0].Alerts[1].Type != core.PagerDutyAlert { t.Errorf("The type of the alert should've been %s, but it was %s", core.PagerDutyAlert, config.Services[0].Alerts[1].Type) } - if config.Services[0].Alerts[1].Description != "Healthcheck failed 7 times in a row" { - t.Errorf("The description of the alert should've been %s, but it was %s", "Healthcheck failed 7 times in a row", config.Services[0].Alerts[1].Description) + if config.Services[0].Alerts[1].GetDescription() != "default description" { + t.Errorf("The description of the alert should've been %s, but it was %s", "default description", config.Services[0].Alerts[1].GetDescription()) + } + if config.Services[0].Alerts[1].FailureThreshold != 7 { + t.Errorf("The failure threshold of the alert should've been %d, but it was %d", 7, config.Services[0].Alerts[1].FailureThreshold) } if config.Services[0].Alerts[1].SuccessThreshold != 5 { t.Errorf("The success threshold of the alert should've been %d, but it was %d", 5, config.Services[0].Alerts[1].SuccessThreshold) } - if config.Services[0].Alerts[2].Type != core.MessagebirdAlert { - t.Errorf("The type of the alert should've been %s, but it was %s", core.MessagebirdAlert, config.Services[0].Alerts[2].Type) + if config.Services[0].Alerts[2].Type != core.MattermostAlert { + t.Errorf("The type of the alert should've been %s, but it was %s", core.MattermostAlert, config.Services[0].Alerts[2].Type) } - if config.Services[0].Alerts[2].Enabled { - t.Error("The alert should've been disabled") + if !config.Services[0].Alerts[2].IsEnabled() { + t.Error("The alert should've been enabled") + } + if config.Services[0].Alerts[2].FailureThreshold != 3 { + t.Errorf("The default failure threshold of the alert should've been %d, but it was %d", 3, config.Services[0].Alerts[2].FailureThreshold) + } + if config.Services[0].Alerts[2].SuccessThreshold != 2 { + t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 2, config.Services[0].Alerts[2].SuccessThreshold) } - if config.Services[0].Alerts[3].Type != core.DiscordAlert { - t.Errorf("The type of the alert should've been %s, but it was %s", core.DiscordAlert, config.Services[0].Alerts[3].Type) + if config.Services[0].Alerts[3].Type != core.MessagebirdAlert { + t.Errorf("The type of the alert should've been %s, but it was %s", core.MessagebirdAlert, config.Services[0].Alerts[3].Type) } - if config.Services[0].Alerts[3].FailureThreshold != 10 { - t.Errorf("The failure threshold of the alert should've been %d, but it was %d", 10, config.Services[0].Alerts[3].FailureThreshold) + if config.Services[0].Alerts[3].IsEnabled() { + t.Error("The alert should've been disabled") } - if config.Services[0].Alerts[3].SuccessThreshold != 2 { - t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 2, config.Services[0].Alerts[3].SuccessThreshold) + if !config.Services[0].Alerts[3].IsSendingOnResolved() { + t.Error("The alert should be sending on resolve") + } + + if config.Services[0].Alerts[4].Type != core.DiscordAlert { + t.Errorf("The type of the alert should've been %s, but it was %s", core.DiscordAlert, config.Services[0].Alerts[4].Type) + } + if !config.Services[0].Alerts[4].IsEnabled() { + t.Error("The alert should've been enabled") + } + if config.Services[0].Alerts[4].FailureThreshold != 10 { + t.Errorf("The failure threshold of the alert should've been %d, but it was %d", 10, config.Services[0].Alerts[4].FailureThreshold) + } + if config.Services[0].Alerts[4].SuccessThreshold != 2 { + t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 2, config.Services[0].Alerts[4].SuccessThreshold) + } + + if config.Services[0].Alerts[5].Type != core.TelegramAlert { + t.Errorf("The type of the alert should've been %s, but it was %s", core.TelegramAlert, config.Services[0].Alerts[5].Type) + } + if !config.Services[0].Alerts[5].IsEnabled() { + t.Error("The alert should've been enabled") + } + if config.Services[0].Alerts[5].FailureThreshold != 3 { + t.Errorf("The default failure threshold of the alert should've been %d, but it was %d", 3, config.Services[0].Alerts[5].FailureThreshold) + } + if config.Services[0].Alerts[5].SuccessThreshold != 2 { + t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 2, config.Services[0].Alerts[5].SuccessThreshold) + } + + if config.Services[0].Alerts[6].Type != core.TwilioAlert { + t.Errorf("The type of the alert should've been %s, but it was %s", core.TwilioAlert, config.Services[0].Alerts[6].Type) + } + if !config.Services[0].Alerts[6].IsEnabled() { + t.Error("The alert should've been enabled") + } + if config.Services[0].Alerts[6].FailureThreshold != 12 { + t.Errorf("The default failure threshold of the alert should've been %d, but it was %d", 12, config.Services[0].Alerts[6].FailureThreshold) + } + if config.Services[0].Alerts[6].SuccessThreshold != 15 { + t.Errorf("The default success threshold of the alert should've been %d, but it was %d", 15, config.Services[0].Alerts[6].SuccessThreshold) + } +} + +func TestParseAndValidateConfigBytesWithAlertingAndDefaultAlertAndMultipleAlertsOfSameTypeWithOverriddenParameters(t *testing.T) { + config, err := parseAndValidateConfigBytes([]byte(` +alerting: + slack: + webhook-url: "http://example.com" + default-alert: + enabled: true + description: "description" + +services: + - name: twinnation + url: https://twinnation.org/health + alerts: + - type: slack + failure-threshold: 10 + - type: slack + failure-threshold: 20 + description: "wow" + - type: slack + enabled: false + failure-threshold: 30 + conditions: + - "[STATUS] == 200" +`)) + if err != nil { + t.Error("expected no error, got", err.Error()) + } + if config == nil { + t.Fatal("Config shouldn't have been nil") + } + // Alerting providers + if config.Alerting == nil { + t.Fatal("config.Alerting shouldn't have been nil") + } + if config.Alerting.Slack == nil || !config.Alerting.Slack.IsValid() { + t.Fatal("Slack alerting config should've been valid") + } + // Services + if len(config.Services) != 1 { + t.Error("There should've been 2 services") + } + if config.Services[0].Alerts[0].Type != core.SlackAlert { + t.Errorf("The type of the alert should've been %s, but it was %s", core.SlackAlert, config.Services[0].Alerts[0].Type) + } + if config.Services[0].Alerts[1].Type != core.SlackAlert { + t.Errorf("The type of the alert should've been %s, but it was %s", core.SlackAlert, config.Services[0].Alerts[1].Type) + } + if config.Services[0].Alerts[2].Type != core.SlackAlert { + t.Errorf("The type of the alert should've been %s, but it was %s", core.SlackAlert, config.Services[0].Alerts[2].Type) + } + if !config.Services[0].Alerts[0].IsEnabled() { + t.Error("The alert should've been enabled") + } + if !config.Services[0].Alerts[1].IsEnabled() { + t.Error("The alert should've been enabled") + } + if config.Services[0].Alerts[2].IsEnabled() { + t.Error("The alert should've been disabled") + } + if config.Services[0].Alerts[0].GetDescription() != "description" { + t.Errorf("The description of the alert should've been %s, but it was %s", "description", config.Services[0].Alerts[0].GetDescription()) + } + if config.Services[0].Alerts[1].GetDescription() != "wow" { + t.Errorf("The description of the alert should've been %s, but it was %s", "description", config.Services[0].Alerts[1].GetDescription()) + } + if config.Services[0].Alerts[2].GetDescription() != "description" { + t.Errorf("The description of the alert should've been %s, but it was %s", "description", config.Services[0].Alerts[2].GetDescription()) + } + if config.Services[0].Alerts[0].FailureThreshold != 10 { + t.Errorf("The failure threshold of the alert should've been %d, but it was %d", 10, config.Services[0].Alerts[0].FailureThreshold) + } + if config.Services[0].Alerts[1].FailureThreshold != 20 { + t.Errorf("The failure threshold of the alert should've been %d, but it was %d", 20, config.Services[0].Alerts[1].FailureThreshold) + } + if config.Services[0].Alerts[2].FailureThreshold != 30 { + t.Errorf("The failure threshold of the alert should've been %d, but it was %d", 30, config.Services[0].Alerts[2].FailureThreshold) } } diff --git a/core/alert.go b/core/alert.go index e81e02e4..18dee849 100644 --- a/core/alert.go +++ b/core/alert.go @@ -2,20 +2,29 @@ package core // Alert is the service's alert configuration type Alert struct { - // Type of alert + // Type of alert (required) Type AlertType `yaml:"type"` // Enabled defines whether or not the alert is enabled - Enabled bool `yaml:"enabled"` + // + // This is a pointer, because it is populated by YAML and we need to know whether it was explicitly set to a value + // or not for provider.ParseWithDefaultAlert to work. + Enabled *bool `yaml:"enabled"` // FailureThreshold is the number of failures in a row needed before triggering the alert FailureThreshold int `yaml:"failure-threshold"` // Description of the alert. Will be included in the alert sent. - Description string `yaml:"description"` + // + // This is a pointer, because it is populated by YAML and we need to know whether it was explicitly set to a value + // or not for provider.ParseWithDefaultAlert to work. + Description *string `yaml:"description"` // SendOnResolved defines whether to send a second notification when the issue has been resolved - SendOnResolved bool `yaml:"send-on-resolved"` + // + // This is a pointer, because it is populated by YAML and we need to know whether it was explicitly set to a value + // or not for provider.ParseWithDefaultAlert to work. Use Alert.IsSendingOnResolved() for a non-pointer + SendOnResolved *bool `yaml:"send-on-resolved"` // SuccessThreshold defines how many successful executions must happen in a row before an ongoing incident is marked as resolved SuccessThreshold int `yaml:"success-threshold"` @@ -35,6 +44,30 @@ type Alert struct { Triggered bool } +// GetDescription retrieves the description of the alert +func (alert Alert) GetDescription() string { + if alert.Description == nil { + return "" + } + return *alert.Description +} + +// IsEnabled returns whether an alert is enabled or not +func (alert Alert) IsEnabled() bool { + if alert.Enabled == nil { + return false + } + return *alert.Enabled +} + +// IsSendingOnResolved returns whether an alert is sending on resolve or not +func (alert Alert) IsSendingOnResolved() bool { + if alert.SendOnResolved == nil { + return false + } + return *alert.SendOnResolved +} + // AlertType is the type of the alert. // The value will generally be the name of the alert provider type AlertType string diff --git a/core/alert_test.go b/core/alert_test.go new file mode 100644 index 00000000..6c11891f --- /dev/null +++ b/core/alert_test.go @@ -0,0 +1,36 @@ +package core + +import "testing" + +func TestAlert_IsEnabled(t *testing.T) { + if (Alert{Enabled: nil}).IsEnabled() { + t.Error("alert.IsEnabled() should've returned false, because Enabled was set to nil") + } + if value := false; (Alert{Enabled: &value}).IsEnabled() { + t.Error("alert.IsEnabled() should've returned false, because Enabled was set to false") + } + if value := true; !(Alert{Enabled: &value}).IsEnabled() { + t.Error("alert.IsEnabled() should've returned true, because Enabled was set to true") + } +} + +func TestAlert_GetDescription(t *testing.T) { + if (Alert{Description: nil}).GetDescription() != "" { + t.Error("alert.GetDescription() should've returned an empty string, because Description was set to nil") + } + if value := "description"; (Alert{Description: &value}).GetDescription() != value { + t.Error("alert.GetDescription() should've returned false, because Description was set to 'description'") + } +} + +func TestAlert_IsSendingOnResolved(t *testing.T) { + if (Alert{SendOnResolved: nil}).IsSendingOnResolved() { + t.Error("alert.IsSendingOnResolved() should've returned false, because SendOnResolved was set to nil") + } + if value := false; (Alert{SendOnResolved: &value}).IsSendingOnResolved() { + t.Error("alert.IsSendingOnResolved() should've returned false, because SendOnResolved was set to false") + } + if value := true; !(Alert{SendOnResolved: &value}).IsSendingOnResolved() { + t.Error("alert.IsSendingOnResolved() should've returned true, because SendOnResolved was set to true") + } +} diff --git a/core/service.go b/core/service.go index 4c24df92..6db7290a 100644 --- a/core/service.go +++ b/core/service.go @@ -161,7 +161,7 @@ func (service *Service) GetAlertsTriggered() []Alert { return alerts } for _, alert := range service.Alerts { - if alert.Enabled && alert.FailureThreshold == service.NumberOfFailuresInARow { + if alert.IsEnabled() && alert.FailureThreshold == service.NumberOfFailuresInARow { alerts = append(alerts, *alert) continue } diff --git a/core/service_test.go b/core/service_test.go index 8eb55557..65dbcff5 100644 --- a/core/service_test.go +++ b/core/service_test.go @@ -28,7 +28,7 @@ func TestService_ValidateAndSetDefaults(t *testing.T) { if len(service.Alerts) != 1 { t.Error("Service should've had 1 alert") } - if service.Alerts[0].Enabled { + if service.Alerts[0].IsEnabled() { t.Error("Service alert should've defaulted to disabled") } if service.Alerts[0].SuccessThreshold != 2 { @@ -93,11 +93,12 @@ func TestService_ValidateAndSetDefaultsWithDNS(t *testing.T) { func TestService_GetAlertsTriggered(t *testing.T) { condition := Condition("[STATUS] == 200") + enabled := true service := Service{ Name: "twinnation-health", URL: "https://twinnation.org/health", Conditions: []*Condition{&condition}, - Alerts: []*Alert{{Type: PagerDutyAlert, Enabled: true}}, + Alerts: []*Alert{{Type: PagerDutyAlert, Enabled: &enabled}}, } service.ValidateAndSetDefaults() if service.NumberOfFailuresInARow != 0 { diff --git a/watchdog/alerting.go b/watchdog/alerting.go index 85df6fae..addbd2d2 100644 --- a/watchdog/alerting.go +++ b/watchdog/alerting.go @@ -26,25 +26,25 @@ func handleAlertsToTrigger(service *core.Service, result *core.Result, cfg *conf service.NumberOfFailuresInARow++ for _, alert := range service.Alerts { // If the alert hasn't been triggered, move to the next one - if !alert.Enabled || alert.FailureThreshold > service.NumberOfFailuresInARow { + if !alert.IsEnabled() || alert.FailureThreshold > service.NumberOfFailuresInARow { continue } if alert.Triggered { if cfg.Debug { - log.Printf("[watchdog][handleAlertsToTrigger] Alert for service=%s with description='%s' has already been TRIGGERED, skipping", service.Name, alert.Description) + log.Printf("[watchdog][handleAlertsToTrigger] Alert for service=%s with description='%s' has already been TRIGGERED, skipping", service.Name, alert.GetDescription()) } continue } alertProvider := config.GetAlertingProviderByAlertType(cfg, alert.Type) if alertProvider != nil && alertProvider.IsValid() { - log.Printf("[watchdog][handleAlertsToTrigger] Sending %s alert because alert for service=%s with description='%s' has been TRIGGERED", alert.Type, service.Name, alert.Description) + log.Printf("[watchdog][handleAlertsToTrigger] Sending %s alert because alert for service=%s with description='%s' has been TRIGGERED", alert.Type, service.Name, alert.GetDescription()) customAlertProvider := alertProvider.ToCustomAlertProvider(service, alert, result, false) // TODO: retry on error var err error // We need to extract the DedupKey from PagerDuty's response if alert.Type == core.PagerDutyAlert { var body []byte - if body, err = customAlertProvider.Send(service.Name, alert.Description, false); err == nil { + if body, err = customAlertProvider.Send(service.Name, alert.GetDescription(), false); err == nil { var response pagerDutyResponse if err = json.Unmarshal(body, &response); err != nil { log.Printf("[watchdog][handleAlertsToTrigger] Ran into error unmarshaling pagerduty response: %s", err.Error()) @@ -54,7 +54,7 @@ func handleAlertsToTrigger(service *core.Service, result *core.Result, cfg *conf } } else { // All other alert types don't need to extract anything from the body, so we can just send the request right away - _, err = customAlertProvider.Send(service.Name, alert.Description, false) + _, err = customAlertProvider.Send(service.Name, alert.GetDescription(), false) } if err != nil { log.Printf("[watchdog][handleAlertsToTrigger] Failed to send an alert for service=%s: %s", service.Name, err.Error()) @@ -70,21 +70,21 @@ func handleAlertsToTrigger(service *core.Service, result *core.Result, cfg *conf func handleAlertsToResolve(service *core.Service, result *core.Result, cfg *config.Config) { service.NumberOfSuccessesInARow++ for _, alert := range service.Alerts { - if !alert.Enabled || !alert.Triggered || alert.SuccessThreshold > service.NumberOfSuccessesInARow { + if !alert.IsEnabled() || !alert.Triggered || alert.SuccessThreshold > service.NumberOfSuccessesInARow { continue } // Even if the alert provider returns an error, we still set the alert's Triggered variable to false. // Further explanation can be found on Alert's Triggered field. alert.Triggered = false - if !alert.SendOnResolved { + if !alert.IsSendingOnResolved() { continue } alertProvider := config.GetAlertingProviderByAlertType(cfg, alert.Type) if alertProvider != nil && alertProvider.IsValid() { - log.Printf("[watchdog][handleAlertsToResolve] Sending %s alert because alert for service=%s with description='%s' has been RESOLVED", alert.Type, service.Name, alert.Description) + log.Printf("[watchdog][handleAlertsToResolve] Sending %s alert because alert for service=%s with description='%s' has been RESOLVED", alert.Type, service.Name, alert.GetDescription()) customAlertProvider := alertProvider.ToCustomAlertProvider(service, alert, result, true) // TODO: retry on error - _, err := customAlertProvider.Send(service.Name, alert.Description, true) + _, err := customAlertProvider.Send(service.Name, alert.GetDescription(), true) if err != nil { log.Printf("[watchdog][handleAlertsToResolve] Failed to send an alert for service=%s: %s", service.Name, err.Error()) } else { From 77de4c474273a6dce9939e10ae9051e81bc7531d Mon Sep 17 00:00:00 2001 From: TwinProduction Date: Sat, 15 May 2021 21:54:23 -0400 Subject: [PATCH 2/6] Minor fixes --- alerting/provider/telegram/telegram_test.go | 3 +- alerting/provider/twilio/twilio_test.go | 6 ++-- watchdog/alerting_test.go | 36 +++++++++++++-------- 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/alerting/provider/telegram/telegram_test.go b/alerting/provider/telegram/telegram_test.go index c4e7631b..7a56df60 100644 --- a/alerting/provider/telegram/telegram_test.go +++ b/alerting/provider/telegram/telegram_test.go @@ -46,7 +46,8 @@ func TestAlertProvider_ToCustomAlertProviderWithResolvedAlert(t *testing.T) { func TestAlertProvider_ToCustomAlertProviderWithTriggeredAlert(t *testing.T) { provider := AlertProvider{Token: "123456:ABC-DEF1234ghIkl-zyx57W2v1u123ew11", ID: "0123456789"} - customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{Description: "Healthcheck Successful"}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "UNSUCCESSFUL_CONDITION", Success: false}}}, false) + description := "Healthcheck Successful" + customAlertProvider := provider.ToCustomAlertProvider(&core.Service{}, &core.Alert{Description: &description}, &core.Result{ConditionResults: []*core.ConditionResult{{Condition: "UNSUCCESSFUL_CONDITION", Success: false}}}, false) if customAlertProvider == nil { t.Fatal("customAlertProvider shouldn't have been nil") } diff --git a/alerting/provider/twilio/twilio_test.go b/alerting/provider/twilio/twilio_test.go index 46dcfc49..4b89a393 100644 --- a/alerting/provider/twilio/twilio_test.go +++ b/alerting/provider/twilio/twilio_test.go @@ -31,7 +31,8 @@ func TestAlertProvider_ToCustomAlertProviderWithResolvedAlert(t *testing.T) { From: "3", To: "4", } - customAlertProvider := provider.ToCustomAlertProvider(&core.Service{Name: "service-name"}, &core.Alert{Description: "alert-description"}, &core.Result{}, true) + description := "alert-description" + customAlertProvider := provider.ToCustomAlertProvider(&core.Service{Name: "service-name"}, &core.Alert{Description: &description}, &core.Result{}, true) if customAlertProvider == nil { t.Fatal("customAlertProvider shouldn't have been nil") } @@ -56,7 +57,8 @@ func TestAlertProvider_ToCustomAlertProviderWithTriggeredAlert(t *testing.T) { From: "2", To: "1", } - customAlertProvider := provider.ToCustomAlertProvider(&core.Service{Name: "service-name"}, &core.Alert{Description: "alert-description"}, &core.Result{}, false) + description := "alert-description" + customAlertProvider := provider.ToCustomAlertProvider(&core.Service{Name: "service-name"}, &core.Alert{Description: &description}, &core.Result{}, false) if customAlertProvider == nil { t.Fatal("customAlertProvider shouldn't have been nil") } diff --git a/watchdog/alerting_test.go b/watchdog/alerting_test.go index e1973af3..acf056b2 100644 --- a/watchdog/alerting_test.go +++ b/watchdog/alerting_test.go @@ -25,15 +25,16 @@ func TestHandleAlerting(t *testing.T) { }, } config.Set(cfg) + enabled := true service := &core.Service{ URL: "http://example.com", Alerts: []*core.Alert{ { Type: core.CustomAlert, - Enabled: true, + Enabled: &enabled, FailureThreshold: 2, SuccessThreshold: 3, - SendOnResolved: true, + SendOnResolved: &enabled, Triggered: false, }, }, @@ -78,15 +79,16 @@ func TestHandleAlertingWithBadAlertProvider(t *testing.T) { Alerting: &alerting.Config{}, } config.Set(cfg) + enabled := true service := &core.Service{ URL: "http://example.com", Alerts: []*core.Alert{ { Type: core.CustomAlert, - Enabled: true, + Enabled: &enabled, FailureThreshold: 1, SuccessThreshold: 1, - SendOnResolved: true, + SendOnResolved: &enabled, Triggered: false, }, }, @@ -113,15 +115,16 @@ func TestHandleAlertingWhenTriggeredAlertIsAlmostResolvedButServiceStartFailingA }, } config.Set(cfg) + enabled := true service := &core.Service{ URL: "http://example.com", Alerts: []*core.Alert{ { Type: core.CustomAlert, - Enabled: true, + Enabled: &enabled, FailureThreshold: 2, SuccessThreshold: 3, - SendOnResolved: true, + SendOnResolved: &enabled, Triggered: true, }, }, @@ -147,15 +150,17 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedButSendOnResolvedIsFalse(t *t }, } config.Set(cfg) + enabled := true + disabled := false service := &core.Service{ URL: "http://example.com", Alerts: []*core.Alert{ { Type: core.CustomAlert, - Enabled: true, + Enabled: &enabled, FailureThreshold: 1, SuccessThreshold: 1, - SendOnResolved: false, + SendOnResolved: &disabled, Triggered: true, }, }, @@ -179,15 +184,16 @@ func TestHandleAlertingWhenTriggeredAlertIsResolvedPagerDuty(t *testing.T) { }, } config.Set(cfg) + enabled := true service := &core.Service{ URL: "http://example.com", Alerts: []*core.Alert{ { Type: core.PagerDutyAlert, - Enabled: true, + Enabled: &enabled, FailureThreshold: 1, SuccessThreshold: 1, - SendOnResolved: true, + SendOnResolved: &enabled, Triggered: false, }, }, @@ -215,15 +221,16 @@ func TestHandleAlertingWithProviderThatReturnsAnError(t *testing.T) { }, } config.Set(cfg) + enabled := true service := &core.Service{ URL: "http://example.com", Alerts: []*core.Alert{ { Type: core.CustomAlert, - Enabled: true, + Enabled: &enabled, FailureThreshold: 2, SuccessThreshold: 2, - SendOnResolved: true, + SendOnResolved: &enabled, Triggered: false, }, }, @@ -273,15 +280,16 @@ func TestHandleAlertingWithProviderThatOnlyReturnsErrorOnResolve(t *testing.T) { }, } config.Set(cfg) + enabled := true service := &core.Service{ URL: "http://example.com", Alerts: []*core.Alert{ { Type: core.CustomAlert, - Enabled: true, + Enabled: &enabled, FailureThreshold: 1, SuccessThreshold: 1, - SendOnResolved: true, + SendOnResolved: &enabled, Triggered: false, }, }, From 758428b312f08975caac93d91166e3ea78b5f34f Mon Sep 17 00:00:00 2001 From: TwinProduction Date: Sat, 15 May 2021 22:09:58 -0400 Subject: [PATCH 3/6] Improve test coverage --- alerting/provider/discord/discord.go | 21 --------------- alerting/provider/provider_test.go | 4 +-- config/config_test.go | 39 ++++++++++++++++++++++++++-- 3 files changed, 39 insertions(+), 25 deletions(-) diff --git a/alerting/provider/discord/discord.go b/alerting/provider/discord/discord.go index bdf46baa..348a5299 100644 --- a/alerting/provider/discord/discord.go +++ b/alerting/provider/discord/discord.go @@ -16,27 +16,6 @@ type AlertProvider struct { DefaultAlert *core.Alert `yaml:"default-alert"` } -//func (provider *AlertProvider) ParseWithDefaultAlert(alert *core.Alert) { -// if provider.DefaultAlert == nil { -// return -// } -// if alert.Enabled == nil { -// alert.Enabled = provider.DefaultAlert.Enabled -// } -// if alert.SendOnResolved == nil { -// alert.SendOnResolved = provider.DefaultAlert.SendOnResolved -// } -// if len(alert.Description) == 0 { -// alert.Description = provider.DefaultAlert.Description -// } -// if alert.FailureThreshold == 0 { -// alert.FailureThreshold = provider.DefaultAlert.FailureThreshold -// } -// if alert.SuccessThreshold == 0 { -// alert.SuccessThreshold = provider.DefaultAlert.SuccessThreshold -// } -//} - // IsValid returns whether the provider's configuration is valid func (provider *AlertProvider) IsValid() bool { return len(provider.WebhookURL) > 0 diff --git a/alerting/provider/provider_test.go b/alerting/provider/provider_test.go index 7bda7baf..8b98b1ed 100644 --- a/alerting/provider/provider_test.go +++ b/alerting/provider/provider_test.go @@ -47,7 +47,7 @@ func TestParseWithDefaultAlert(t *testing.T) { SuccessThreshold: 10, }, ServiceAlert: &core.Alert{ - Type: core.DiscordAlert, + Type: core.TelegramAlert, Enabled: &enabled, SendOnResolved: &enabled, Description: &secondDescription, @@ -55,7 +55,7 @@ func TestParseWithDefaultAlert(t *testing.T) { SuccessThreshold: 11, }, ExpectedOutputAlert: &core.Alert{ - Type: core.DiscordAlert, + Type: core.TelegramAlert, Enabled: &enabled, SendOnResolved: &enabled, Description: &secondDescription, diff --git a/config/config_test.go b/config/config_test.go index 279794c8..eb1551a9 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -507,6 +507,8 @@ services: func TestParseAndValidateConfigBytesWithAlertingAndDefaultAlert(t *testing.T) { config, err := parseAndValidateConfigBytes([]byte(` +debug: true + alerting: slack: webhook-url: "http://example.com" @@ -582,19 +584,36 @@ services: if config.Alerting.Slack == nil || !config.Alerting.Slack.IsValid() { t.Fatal("Slack alerting config should've been valid") } + if config.Alerting.Slack.GetDefaultAlert() == nil { + t.Fatal("Slack.GetDefaultAlert() shouldn't have returned nil") + } if config.Alerting.Slack.WebhookURL != "http://example.com" { t.Errorf("Slack webhook should've been %s, but was %s", "http://example.com", config.Alerting.Slack.WebhookURL) } + if config.Alerting.PagerDuty == nil || !config.Alerting.PagerDuty.IsValid() { t.Fatal("PagerDuty alerting config should've been valid") } + if config.Alerting.PagerDuty.GetDefaultAlert() == nil { + t.Fatal("PagerDuty.GetDefaultAlert() shouldn't have returned nil") + } if config.Alerting.PagerDuty.IntegrationKey != "00000000000000000000000000000000" { t.Errorf("PagerDuty integration key should've been %s, but was %s", "00000000000000000000000000000000", config.Alerting.PagerDuty.IntegrationKey) } + if config.Alerting.Mattermost == nil || !config.Alerting.Mattermost.IsValid() { + t.Fatal("Mattermost alerting config should've been valid") + } + if config.Alerting.Mattermost.GetDefaultAlert() == nil { + t.Fatal("Mattermost.GetDefaultAlert() shouldn't have returned nil") + } + if config.Alerting.Messagebird == nil || !config.Alerting.Messagebird.IsValid() { t.Fatal("Messagebird alerting config should've been valid") } + if config.Alerting.Messagebird.GetDefaultAlert() == nil { + t.Fatal("Messagebird.GetDefaultAlert() shouldn't have returned nil") + } if config.Alerting.Messagebird.AccessKey != "1" { t.Errorf("Messagebird access key should've been %s, but was %s", "1", config.Alerting.Messagebird.AccessKey) } @@ -608,18 +627,34 @@ services: if config.Alerting.Discord == nil || !config.Alerting.Discord.IsValid() { t.Fatal("Discord alerting config should've been valid") } + if config.Alerting.Discord.GetDefaultAlert() == nil { + t.Fatal("Discord.GetDefaultAlert() shouldn't have returned nil") + } if config.Alerting.Discord.WebhookURL != "http://example.org" { t.Errorf("Discord webhook should've been %s, but was %s", "http://example.org", config.Alerting.Discord.WebhookURL) } + if GetAlertingProviderByAlertType(config, core.DiscordAlert) != config.Alerting.Discord { + t.Error("expected discord configuration") + } + if config.Alerting.Telegram == nil || !config.Alerting.Telegram.IsValid() { + t.Fatal("Telegram alerting config should've been valid") + } + if config.Alerting.Telegram.GetDefaultAlert() == nil { + t.Fatal("Telegram.GetDefaultAlert() shouldn't have returned nil") + } if config.Alerting.Telegram.Token != "123456:ABC-DEF1234ghIkl-zyx57W2v1u123ew11" { t.Errorf("Telegram token should've been %s, but was %s", "123456:ABC-DEF1234ghIkl-zyx57W2v1u123ew11", config.Alerting.Telegram.Token) } if config.Alerting.Telegram.ID != "0123456789" { t.Errorf("Telegram ID should've been %s, but was %s", "012345689", config.Alerting.Telegram.ID) } - if GetAlertingProviderByAlertType(config, core.DiscordAlert) != config.Alerting.Discord { - t.Error("expected discord configuration") + + if config.Alerting.Twilio == nil || !config.Alerting.Twilio.IsValid() { + t.Fatal("Twilio alerting config should've been valid") + } + if config.Alerting.Twilio.GetDefaultAlert() == nil { + t.Fatal("Twilio.GetDefaultAlert() shouldn't have returned nil") } // Services if len(config.Services) != 1 { From 8106832d69ec530caa005c4d8bdf7ac534b46827 Mon Sep 17 00:00:00 2001 From: TwinProduction Date: Sat, 15 May 2021 22:24:13 -0400 Subject: [PATCH 4/6] Improve test coverage --- config/config_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/config_test.go b/config/config_test.go index eb1551a9..f9d5b788 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -1061,7 +1061,7 @@ services: func TestParseAndValidateConfigBytesWithValidSecurityConfig(t *testing.T) { const expectedUsername = "admin" const expectedPasswordHash = "6b97ed68d14eb3f1aa959ce5d49c7dc612e1eb1dafd73b1e705847483fd6a6c809f2ceb4e8df6ff9984c6298ff0285cace6614bf8daa9f0070101b6c89899e22" - config, err := parseAndValidateConfigBytes([]byte(fmt.Sprintf(` + config, err := parseAndValidateConfigBytes([]byte(fmt.Sprintf(`debug: true security: basic: username: "%s" From e3038f0e8003e8c06011f32b71143f409ead9819 Mon Sep 17 00:00:00 2001 From: TwinProduction Date: Sat, 15 May 2021 22:26:51 -0400 Subject: [PATCH 5/6] Add TestEvalWithInvalidData --- jsonpath/jsonpath_test.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/jsonpath/jsonpath_test.go b/jsonpath/jsonpath_test.go index dd3517fa..3e9063be 100644 --- a/jsonpath/jsonpath_test.go +++ b/jsonpath/jsonpath_test.go @@ -20,6 +20,15 @@ func TestEval(t *testing.T) { } } +func TestEvalWithInvalidData(t *testing.T) { + path := "simple" + data := `invalid data` + _, _, err := Eval(path, []byte(data)) + if err == nil { + t.Error("expected an error") + } +} + func TestEvalWithInvalidPath(t *testing.T) { path := "errors" data := `{}` From 57ef931d389baf9b5a5d6b3b05cf46af083ad5d8 Mon Sep 17 00:00:00 2001 From: TwinProduction Date: Sat, 15 May 2021 22:38:13 -0400 Subject: [PATCH 6/6] Add TestEvalWithArrayOfValuesAndInvalidIndex --- jsonpath/jsonpath.go | 2 +- jsonpath/jsonpath_test.go | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/jsonpath/jsonpath.go b/jsonpath/jsonpath.go index 2bbf00fa..dedf306f 100644 --- a/jsonpath/jsonpath.go +++ b/jsonpath/jsonpath.go @@ -41,7 +41,7 @@ func extractValue(currentKey string, value interface{}) interface{} { tmp := strings.SplitN(currentKey, "[", 3) arrayIndex, err := strconv.Atoi(strings.Replace(tmp[1], "]", "", 1)) if err != nil { - return value + return nil } currentKey := tmp[0] // if currentKey contains only an index (i.e. [0] or 0) diff --git a/jsonpath/jsonpath_test.go b/jsonpath/jsonpath_test.go index 3e9063be..ccadf78f 100644 --- a/jsonpath/jsonpath_test.go +++ b/jsonpath/jsonpath_test.go @@ -84,6 +84,16 @@ func TestEvalWithArrayOfValues(t *testing.T) { } } +func TestEvalWithArrayOfValuesAndInvalidIndex(t *testing.T) { + path := "ids[wat]" + data := `{"ids": [1, 2]}` + + _, _, err := Eval(path, []byte(data)) + if err == nil { + t.Error("Expected an error") + } +} + func TestEvalWithRootArrayOfValues(t *testing.T) { path := "[1]" data := `[1, 2]`