Start working on notifications when service is back to healthy (#9)
This commit is contained in:
		@ -13,6 +13,9 @@ type Alert struct {
 | 
			
		||||
 | 
			
		||||
	// Description of the alert. Will be included in the alert sent.
 | 
			
		||||
	Description string `yaml:"description"`
 | 
			
		||||
 | 
			
		||||
	// SendOnResolved defines whether to send a second notification when the issue has been resolved
 | 
			
		||||
	SendOnResolved bool `yaml:"send-on-resolved"`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type AlertType string
 | 
			
		||||
 | 
			
		||||
@ -21,6 +21,10 @@ type TwilioAlertProvider struct {
 | 
			
		||||
	To    string `yaml:"to"`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (provider *TwilioAlertProvider) IsValid() bool {
 | 
			
		||||
	return len(provider.Token) > 0 && len(provider.SID) > 0 && len(provider.From) > 0 && len(provider.To) > 0
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type CustomAlertProvider struct {
 | 
			
		||||
	Url     string            `yaml:"url"`
 | 
			
		||||
	Method  string            `yaml:"method,omitempty"`
 | 
			
		||||
@ -28,6 +32,10 @@ type CustomAlertProvider struct {
 | 
			
		||||
	Headers map[string]string `yaml:"headers,omitempty"`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (provider *CustomAlertProvider) IsValid() bool {
 | 
			
		||||
	return len(provider.Url) > 0
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (provider *CustomAlertProvider) buildRequest(serviceName, alertDescription string) *http.Request {
 | 
			
		||||
	body := provider.Body
 | 
			
		||||
	url := provider.Url
 | 
			
		||||
 | 
			
		||||
@ -46,7 +46,7 @@ type Service struct {
 | 
			
		||||
	// Alerts is the alerting configuration for the service in case of failure
 | 
			
		||||
	Alerts []*Alert `yaml:"alerts"`
 | 
			
		||||
 | 
			
		||||
	numberOfFailuresInARow int
 | 
			
		||||
	NumberOfFailuresInARow int
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (service *Service) Validate() {
 | 
			
		||||
@ -94,22 +94,16 @@ func (service *Service) EvaluateConditions() *Result {
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	result.Timestamp = time.Now()
 | 
			
		||||
	if result.Success {
 | 
			
		||||
		service.numberOfFailuresInARow = 0
 | 
			
		||||
		// TODO: Send notification that alert has been resolved?
 | 
			
		||||
	} else {
 | 
			
		||||
		service.numberOfFailuresInARow++
 | 
			
		||||
	}
 | 
			
		||||
	return result
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (service *Service) GetAlertsTriggered() []Alert {
 | 
			
		||||
	var alerts []Alert
 | 
			
		||||
	if service.numberOfFailuresInARow == 0 {
 | 
			
		||||
	if service.NumberOfFailuresInARow == 0 {
 | 
			
		||||
		return alerts
 | 
			
		||||
	}
 | 
			
		||||
	for _, alert := range service.Alerts {
 | 
			
		||||
		if alert.Enabled && alert.Threshold == service.numberOfFailuresInARow {
 | 
			
		||||
		if alert.Enabled && alert.Threshold == service.NumberOfFailuresInARow {
 | 
			
		||||
			alerts = append(alerts, *alert)
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										1
									
								
								go.sum
									
									
									
									
									
								
							
							
						
						
									
										1
									
								
								go.sum
									
									
									
									
									
								
							@ -18,6 +18,7 @@ github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/me
 | 
			
		||||
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
 | 
			
		||||
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
 | 
			
		||||
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
 | 
			
		||||
github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs=
 | 
			
		||||
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
 | 
			
		||||
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
 | 
			
		||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 | 
			
		||||
 | 
			
		||||
@ -57,35 +57,58 @@ func monitor(service *core.Service) {
 | 
			
		||||
			extra,
 | 
			
		||||
		)
 | 
			
		||||
 | 
			
		||||
		handleAlerting(service, result)
 | 
			
		||||
 | 
			
		||||
		log.Printf("[watchdog][monitor] Waiting for interval=%s before monitoring serviceName=%s", service.Interval, service.Name)
 | 
			
		||||
		time.Sleep(service.Interval)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func handleAlerting(service *core.Service, result *core.Result) {
 | 
			
		||||
	cfg := config.Get()
 | 
			
		||||
		if cfg.Alerting != nil {
 | 
			
		||||
			for _, alertTriggered := range service.GetAlertsTriggered() {
 | 
			
		||||
	if cfg.Alerting == nil {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
	if result.Success {
 | 
			
		||||
		if service.NumberOfFailuresInARow > 0 {
 | 
			
		||||
			for _, alert := range service.Alerts {
 | 
			
		||||
				if !alert.Enabled || !alert.SendOnResolved || alert.Threshold < service.NumberOfFailuresInARow {
 | 
			
		||||
					continue
 | 
			
		||||
				}
 | 
			
		||||
				// TODO
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		service.NumberOfFailuresInARow = 0
 | 
			
		||||
	} else {
 | 
			
		||||
		service.NumberOfFailuresInARow++
 | 
			
		||||
		for _, alert := range service.Alerts {
 | 
			
		||||
			// If the alert hasn't been triggered, move to the next one
 | 
			
		||||
			if !alert.Enabled || alert.Threshold != service.NumberOfFailuresInARow {
 | 
			
		||||
				continue
 | 
			
		||||
			}
 | 
			
		||||
			var alertProvider *core.CustomAlertProvider
 | 
			
		||||
				if alertTriggered.Type == core.SlackAlert {
 | 
			
		||||
			if alert.Type == core.SlackAlert {
 | 
			
		||||
				if len(cfg.Alerting.Slack) > 0 {
 | 
			
		||||
						log.Printf("[watchdog][monitor] Sending Slack alert because alert with description=%s has been triggered", alertTriggered.Description)
 | 
			
		||||
					log.Printf("[watchdog][monitor] Sending Slack alert because alert with description=%s has been triggered", alert.Description)
 | 
			
		||||
					alertProvider = &core.CustomAlertProvider{
 | 
			
		||||
						Url:     cfg.Alerting.Slack,
 | 
			
		||||
						Method:  "POST",
 | 
			
		||||
							Body:    fmt.Sprintf(`{"text":"*[Gatus]*\n*service:* %s\n*description:* %s"}`, service.Name, alertTriggered.Description),
 | 
			
		||||
						Body:    fmt.Sprintf(`{"text":"*[Gatus]*\n*service:* %s\n*description:* %s"}`, service.Name, alert.Description),
 | 
			
		||||
						Headers: map[string]string{"Content-Type": "application/json"},
 | 
			
		||||
					}
 | 
			
		||||
				} else {
 | 
			
		||||
					log.Printf("[watchdog][monitor] Not sending Slack alert despite being triggered, because there is no Slack webhook configured")
 | 
			
		||||
				}
 | 
			
		||||
				} else if alertTriggered.Type == core.TwilioAlert {
 | 
			
		||||
					if len(cfg.Alerting.Twilio.Token) > 0 &&
 | 
			
		||||
						len(cfg.Alerting.Twilio.SID) > 0 &&
 | 
			
		||||
						len(cfg.Alerting.Twilio.From) > 0 &&
 | 
			
		||||
						len(cfg.Alerting.Twilio.To) > 0 {
 | 
			
		||||
						log.Printf("[watchdog][monitor] Sending Twilio alert because alert with description=%s has been triggered", alertTriggered.Description)
 | 
			
		||||
			} else if alert.Type == core.TwilioAlert {
 | 
			
		||||
				if cfg.Alerting.Twilio != nil && cfg.Alerting.Twilio.IsValid() {
 | 
			
		||||
					log.Printf("[watchdog][monitor] Sending Twilio alert because alert with description=%s has been triggered", alert.Description)
 | 
			
		||||
					alertProvider = &core.CustomAlertProvider{
 | 
			
		||||
						Url:    fmt.Sprintf("https://api.twilio.com/2010-04-01/Accounts/%s/Messages.json", cfg.Alerting.Twilio.SID),
 | 
			
		||||
						Method: "POST",
 | 
			
		||||
						Body: url.Values{
 | 
			
		||||
							"To":   {cfg.Alerting.Twilio.To},
 | 
			
		||||
							"From": {cfg.Alerting.Twilio.From},
 | 
			
		||||
								"Body": {fmt.Sprintf("%s - %s", service.Name, alertTriggered.Description)},
 | 
			
		||||
							"Body": {fmt.Sprintf("%s - %s", service.Name, alert.Description)},
 | 
			
		||||
						}.Encode(),
 | 
			
		||||
						Headers: map[string]string{
 | 
			
		||||
							"Content-Type":  "application/x-www-form-urlencoded",
 | 
			
		||||
@ -95,9 +118,9 @@ func monitor(service *core.Service) {
 | 
			
		||||
				} else {
 | 
			
		||||
					log.Printf("[watchdog][monitor] Not sending Twilio alert despite being triggered, because twilio config settings missing")
 | 
			
		||||
				}
 | 
			
		||||
				} else if alertTriggered.Type == core.CustomAlert {
 | 
			
		||||
					if cfg.Alerting.Custom != nil && len(cfg.Alerting.Custom.Url) > 0 {
 | 
			
		||||
						log.Printf("[watchdog][monitor] Sending custom alert because alert with description=%s has been triggered", alertTriggered.Description)
 | 
			
		||||
			} else if alert.Type == core.CustomAlert {
 | 
			
		||||
				if cfg.Alerting.Custom != nil && cfg.Alerting.Custom.IsValid() {
 | 
			
		||||
					log.Printf("[watchdog][monitor] Sending custom alert because alert with description=%s has been triggered", alert.Description)
 | 
			
		||||
					alertProvider = &core.CustomAlertProvider{
 | 
			
		||||
						Url:     cfg.Alerting.Custom.Url,
 | 
			
		||||
						Method:  cfg.Alerting.Custom.Method,
 | 
			
		||||
@ -109,15 +132,11 @@ func monitor(service *core.Service) {
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
			if alertProvider != nil {
 | 
			
		||||
					err := alertProvider.Send(service.Name, alertTriggered.Description)
 | 
			
		||||
				err := alertProvider.Send(service.Name, alert.Description)
 | 
			
		||||
				if err != nil {
 | 
			
		||||
					log.Printf("[watchdog][monitor] Ran into error sending an alert: %s", err.Error())
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
		log.Printf("[watchdog][monitor] Waiting for interval=%s before monitoring serviceName=%s", service.Interval, service.Name)
 | 
			
		||||
		time.Sleep(service.Interval)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
		Reference in New Issue
	
	Block a user