refactor: Clean up code and change log format (#719)
This commit is contained in:
@ -31,13 +31,13 @@ func handleAlertsToTrigger(endpoint *core.Endpoint, result *core.Result, alertin
|
||||
}
|
||||
if endpointAlert.Triggered {
|
||||
if debug {
|
||||
log.Printf("[watchdog][handleAlertsToTrigger] Alert for endpoint=%s with description='%s' has already been TRIGGERED, skipping", endpoint.Name, endpointAlert.GetDescription())
|
||||
log.Printf("[watchdog.handleAlertsToTrigger] Alert for endpoint=%s with description='%s' has already been TRIGGERED, skipping", endpoint.Name, endpointAlert.GetDescription())
|
||||
}
|
||||
continue
|
||||
}
|
||||
alertProvider := alertingConfig.GetAlertingProviderByAlertType(endpointAlert.Type)
|
||||
if alertProvider != nil {
|
||||
log.Printf("[watchdog][handleAlertsToTrigger] Sending %s alert because alert for endpoint=%s with description='%s' has been TRIGGERED", endpointAlert.Type, endpoint.Name, endpointAlert.GetDescription())
|
||||
log.Printf("[watchdog.handleAlertsToTrigger] Sending %s alert because alert for endpoint=%s with description='%s' has been TRIGGERED", endpointAlert.Type, endpoint.Name, endpointAlert.GetDescription())
|
||||
var err error
|
||||
if os.Getenv("MOCK_ALERT_PROVIDER") == "true" {
|
||||
if os.Getenv("MOCK_ALERT_PROVIDER_ERROR") == "true" {
|
||||
@ -47,12 +47,12 @@ func handleAlertsToTrigger(endpoint *core.Endpoint, result *core.Result, alertin
|
||||
err = alertProvider.Send(endpoint, endpointAlert, result, false)
|
||||
}
|
||||
if err != nil {
|
||||
log.Printf("[watchdog][handleAlertsToTrigger] Failed to send an alert for endpoint=%s: %s", endpoint.Name, err.Error())
|
||||
log.Printf("[watchdog.handleAlertsToTrigger] Failed to send an alert for endpoint=%s: %s", endpoint.Name, err.Error())
|
||||
} else {
|
||||
endpointAlert.Triggered = true
|
||||
}
|
||||
} else {
|
||||
log.Printf("[watchdog][handleAlertsToResolve] Not sending alert of type=%s despite being TRIGGERED, because the provider wasn't configured properly", endpointAlert.Type)
|
||||
log.Printf("[watchdog.handleAlertsToResolve] Not sending alert of type=%s despite being TRIGGERED, because the provider wasn't configured properly", endpointAlert.Type)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -71,13 +71,13 @@ func handleAlertsToResolve(endpoint *core.Endpoint, result *core.Result, alertin
|
||||
}
|
||||
alertProvider := alertingConfig.GetAlertingProviderByAlertType(endpointAlert.Type)
|
||||
if alertProvider != nil {
|
||||
log.Printf("[watchdog][handleAlertsToResolve] Sending %s alert because alert for endpoint=%s with description='%s' has been RESOLVED", endpointAlert.Type, endpoint.Name, endpointAlert.GetDescription())
|
||||
log.Printf("[watchdog.handleAlertsToResolve] Sending %s alert because alert for endpoint=%s with description='%s' has been RESOLVED", endpointAlert.Type, endpoint.Name, endpointAlert.GetDescription())
|
||||
err := alertProvider.Send(endpoint, endpointAlert, result, true)
|
||||
if err != nil {
|
||||
log.Printf("[watchdog][handleAlertsToResolve] Failed to send an alert for endpoint=%s: %s", endpoint.Name, err.Error())
|
||||
log.Printf("[watchdog.handleAlertsToResolve] Failed to send an alert for endpoint=%s: %s", endpoint.Name, err.Error())
|
||||
}
|
||||
} else {
|
||||
log.Printf("[watchdog][handleAlertsToResolve] Not sending alert of type=%s despite being RESOLVED, because the provider wasn't configured properly", endpointAlert.Type)
|
||||
log.Printf("[watchdog.handleAlertsToResolve] Not sending alert of type=%s despite being RESOLVED, because the provider wasn't configured properly", endpointAlert.Type)
|
||||
}
|
||||
}
|
||||
endpoint.NumberOfFailuresInARow = 0
|
||||
|
@ -44,7 +44,7 @@ func monitor(endpoint *core.Endpoint, alertingConfig *alerting.Config, maintenan
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
log.Printf("[watchdog][monitor] Canceling current execution of group=%s; endpoint=%s", endpoint.Group, endpoint.Name)
|
||||
log.Printf("[watchdog.monitor] Canceling current execution of group=%s; endpoint=%s", endpoint.Group, endpoint.Name)
|
||||
return
|
||||
case <-time.After(endpoint.Interval):
|
||||
execute(endpoint, alertingConfig, maintenanceConfig, connectivityConfig, disableMonitoringLock, enabledMetrics, debug)
|
||||
@ -61,11 +61,11 @@ func execute(endpoint *core.Endpoint, alertingConfig *alerting.Config, maintenan
|
||||
}
|
||||
// If there's a connectivity checker configured, check if Gatus has internet connectivity
|
||||
if connectivityConfig != nil && connectivityConfig.Checker != nil && !connectivityConfig.Checker.IsConnected() {
|
||||
log.Println("[watchdog][execute] No connectivity; skipping execution")
|
||||
log.Println("[watchdog.execute] No connectivity; skipping execution")
|
||||
return
|
||||
}
|
||||
if debug {
|
||||
log.Printf("[watchdog][execute] Monitoring group=%s; endpoint=%s", endpoint.Group, endpoint.Name)
|
||||
log.Printf("[watchdog.execute] Monitoring group=%s; endpoint=%s", endpoint.Group, endpoint.Name)
|
||||
}
|
||||
result := endpoint.EvaluateHealth()
|
||||
if enabledMetrics {
|
||||
@ -73,25 +73,25 @@ func execute(endpoint *core.Endpoint, alertingConfig *alerting.Config, maintenan
|
||||
}
|
||||
UpdateEndpointStatuses(endpoint, result)
|
||||
if debug && !result.Success {
|
||||
log.Printf("[watchdog][execute] Monitored group=%s; endpoint=%s; success=%v; errors=%d; duration=%s; body=%s", endpoint.Group, endpoint.Name, result.Success, len(result.Errors), result.Duration.Round(time.Millisecond), result.Body)
|
||||
log.Printf("[watchdog.execute] Monitored group=%s; endpoint=%s; success=%v; errors=%d; duration=%s; body=%s", endpoint.Group, endpoint.Name, result.Success, len(result.Errors), result.Duration.Round(time.Millisecond), result.Body)
|
||||
} else {
|
||||
log.Printf("[watchdog][execute] Monitored group=%s; endpoint=%s; success=%v; errors=%d; duration=%s", endpoint.Group, endpoint.Name, result.Success, len(result.Errors), result.Duration.Round(time.Millisecond))
|
||||
log.Printf("[watchdog.execute] Monitored group=%s; endpoint=%s; success=%v; errors=%d; duration=%s", endpoint.Group, endpoint.Name, result.Success, len(result.Errors), result.Duration.Round(time.Millisecond))
|
||||
}
|
||||
if !maintenanceConfig.IsUnderMaintenance() {
|
||||
// TODO: Consider moving this after the monitoring lock is unlocked? I mean, how much noise can a single alerting provider cause...
|
||||
HandleAlerting(endpoint, result, alertingConfig, debug)
|
||||
} else if debug {
|
||||
log.Println("[watchdog][execute] Not handling alerting because currently in the maintenance window")
|
||||
log.Println("[watchdog.execute] Not handling alerting because currently in the maintenance window")
|
||||
}
|
||||
if debug {
|
||||
log.Printf("[watchdog][execute] Waiting for interval=%s before monitoring group=%s endpoint=%s again", endpoint.Interval, endpoint.Group, endpoint.Name)
|
||||
log.Printf("[watchdog.execute] Waiting for interval=%s before monitoring group=%s endpoint=%s again", endpoint.Interval, endpoint.Group, endpoint.Name)
|
||||
}
|
||||
}
|
||||
|
||||
// UpdateEndpointStatuses updates the slice of endpoint statuses
|
||||
func UpdateEndpointStatuses(endpoint *core.Endpoint, result *core.Result) {
|
||||
if err := store.Get().Insert(endpoint, result); err != nil {
|
||||
log.Println("[watchdog][UpdateEndpointStatuses] Failed to insert data in storage:", err.Error())
|
||||
log.Println("[watchdog.UpdateEndpointStatuses] Failed to insert data in storage:", err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user