Reset

2025-04-04 19:06:29 -07:00
parent ebb45b13bb
commit 393381d456
275 changed files with 56094 additions and 2 deletions
--- a/watchdog/alerting.go
+++ b/watchdog/alerting.go
@ -0,0 +1,97 @@
+package watchdog
+
+import (
+	"errors"
+	"os"
+
+	"github.com/TwiN/gatus/v5/alerting"
+	"github.com/TwiN/gatus/v5/config/endpoint"
+	"github.com/TwiN/gatus/v5/storage/store"
+	"github.com/TwiN/logr"
+)
+
+// HandleAlerting takes care of alerts to resolve and alerts to trigger based on result success or failure
+func HandleAlerting(ep *endpoint.Endpoint, result *endpoint.Result, alertingConfig *alerting.Config) {
+	if alertingConfig == nil {
+		return
+	}
+	if result.Success {
+		handleAlertsToResolve(ep, result, alertingConfig)
+	} else {
+		handleAlertsToTrigger(ep, result, alertingConfig)
+	}
+}
+
+func handleAlertsToTrigger(ep *endpoint.Endpoint, result *endpoint.Result, alertingConfig *alerting.Config) {
+	ep.NumberOfSuccessesInARow = 0
+	ep.NumberOfFailuresInARow++
+	for _, endpointAlert := range ep.Alerts {
+		// If the alert hasn't been triggered, move to the next one
+		if !endpointAlert.IsEnabled() || endpointAlert.FailureThreshold > ep.NumberOfFailuresInARow {
+			continue
+		}
+		if endpointAlert.Triggered {
+			logr.Debugf("[watchdog.handleAlertsToTrigger] Alert for endpoint with key=%s with description='%s' has already been TRIGGERED, skipping", ep.Key(), endpointAlert.GetDescription())
+			continue
+		}
+		alertProvider := alertingConfig.GetAlertingProviderByAlertType(endpointAlert.Type)
+		if alertProvider != nil {
+			logr.Infof("[watchdog.handleAlertsToTrigger] Sending %s alert because alert for endpoint with key=%s with description='%s' has been TRIGGERED", endpointAlert.Type, ep.Key(), endpointAlert.GetDescription())
+			var err error
+			if os.Getenv("MOCK_ALERT_PROVIDER") == "true" {
+				if os.Getenv("MOCK_ALERT_PROVIDER_ERROR") == "true" {
+					err = errors.New("error")
+				}
+			} else {
+				err = alertProvider.Send(ep, endpointAlert, result, false)
+			}
+			if err != nil {
+				logr.Errorf("[watchdog.handleAlertsToTrigger] Failed to send an alert for endpoint with key=%s: %s", ep.Key(), err.Error())
+			} else {
+				endpointAlert.Triggered = true
+				if err := store.Get().UpsertTriggeredEndpointAlert(ep, endpointAlert); err != nil {
+					logr.Errorf("[watchdog.handleAlertsToTrigger] Failed to persist triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error())
+				}
+			}
+		} else {
+			logr.Warnf("[watchdog.handleAlertsToTrigger] Not sending alert of type=%s endpoint with key=%s despite being TRIGGERED, because the provider wasn't configured properly", endpointAlert.Type, ep.Key())
+		}
+	}
+}
+
+func handleAlertsToResolve(ep *endpoint.Endpoint, result *endpoint.Result, alertingConfig *alerting.Config) {
+	ep.NumberOfSuccessesInARow++
+	for _, endpointAlert := range ep.Alerts {
+		isStillBelowSuccessThreshold := endpointAlert.SuccessThreshold > ep.NumberOfSuccessesInARow
+		if isStillBelowSuccessThreshold && endpointAlert.IsEnabled() && endpointAlert.Triggered {
+			// Persist NumberOfSuccessesInARow
+			if err := store.Get().UpsertTriggeredEndpointAlert(ep, endpointAlert); err != nil {
+				logr.Errorf("[watchdog.handleAlertsToResolve] Failed to update triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error())
+			}
+		}
+		if !endpointAlert.IsEnabled() || !endpointAlert.Triggered || isStillBelowSuccessThreshold {
+			continue
+		}
+		// Even if the alert provider returns an error, we still set the alert's Triggered variable to false.
+		// Further explanation can be found on Alert's Triggered field.
+		endpointAlert.Triggered = false
+		if err := store.Get().DeleteTriggeredEndpointAlert(ep, endpointAlert); err != nil {
+			logr.Errorf("[watchdog.handleAlertsToResolve] Failed to delete persisted triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error())
+		}
+		if !endpointAlert.IsSendingOnResolved() {
+			logr.Debugf("[watchdog.handleAlertsToResolve] Not sending request to provider of alert with type=%s for endpoint with key=%s despite being RESOLVED, because send-on-resolved is set to false", endpointAlert.Type, ep.Key())
+			continue
+		}
+		alertProvider := alertingConfig.GetAlertingProviderByAlertType(endpointAlert.Type)
+		if alertProvider != nil {
+			logr.Infof("[watchdog.handleAlertsToResolve] Sending %s alert because alert for endpoint with key=%s with description='%s' has been RESOLVED", endpointAlert.Type, ep.Key(), endpointAlert.GetDescription())
+			err := alertProvider.Send(ep, endpointAlert, result, true)
+			if err != nil {
+				logr.Errorf("[watchdog.handleAlertsToResolve] Failed to send an alert for endpoint with key=%s: %s", ep.Key(), err.Error())
+			}
+		} else {
+			logr.Warnf("[watchdog.handleAlertsToResolve] Not sending alert of type=%s for endpoint with key=%s despite being RESOLVED, because the provider wasn't configured properly", endpointAlert.Type, ep.Key())
+		}
+	}
+	ep.NumberOfFailuresInARow = 0
+}
--- a/watchdog/alerting_test.go
+++ b/watchdog/alerting_test.go
@ -0,0 +1,538 @@
+package watchdog
+
+import (
+	"os"
+	"testing"
+
+	"github.com/TwiN/gatus/v5/alerting"
+	"github.com/TwiN/gatus/v5/alerting/alert"
+	"github.com/TwiN/gatus/v5/alerting/provider/custom"
+	"github.com/TwiN/gatus/v5/alerting/provider/discord"
+	"github.com/TwiN/gatus/v5/alerting/provider/email"
+	"github.com/TwiN/gatus/v5/alerting/provider/jetbrainsspace"
+	"github.com/TwiN/gatus/v5/alerting/provider/matrix"
+	"github.com/TwiN/gatus/v5/alerting/provider/mattermost"
+	"github.com/TwiN/gatus/v5/alerting/provider/messagebird"
+	"github.com/TwiN/gatus/v5/alerting/provider/pagerduty"
+	"github.com/TwiN/gatus/v5/alerting/provider/pushover"
+	"github.com/TwiN/gatus/v5/alerting/provider/slack"
+	"github.com/TwiN/gatus/v5/alerting/provider/teams"
+	"github.com/TwiN/gatus/v5/alerting/provider/telegram"
+	"github.com/TwiN/gatus/v5/alerting/provider/twilio"
+	"github.com/TwiN/gatus/v5/config"
+	"github.com/TwiN/gatus/v5/config/endpoint"
+)
+
+func TestHandleAlerting(t *testing.T) {
+	_ = os.Setenv("MOCK_ALERT_PROVIDER", "true")
+	defer os.Clearenv()
+
+	cfg := &config.Config{
+		Alerting: &alerting.Config{
+			Custom: &custom.AlertProvider{
+				DefaultConfig: custom.Config{
+					URL:    "https://twin.sh/health",
+					Method: "GET",
+				},
+			},
+		},
+	}
+	enabled := true
+	ep := &endpoint.Endpoint{
+		URL: "https://example.com",
+		Alerts: []*alert.Alert{
+			{
+				Type:             alert.TypeCustom,
+				Enabled:          &enabled,
+				FailureThreshold: 2,
+				SuccessThreshold: 3,
+				SendOnResolved:   &enabled,
+				Triggered:        false,
+			},
+		},
+	}
+
+	verify(t, ep, 0, 0, false, "The alert shouldn't start triggered")
+	HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
+	verify(t, ep, 1, 0, false, "The alert shouldn't have triggered")
+	HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
+	verify(t, ep, 2, 0, true, "The alert should've triggered")
+	HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
+	verify(t, ep, 3, 0, true, "The alert should still be triggered")
+	HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
+	verify(t, ep, 4, 0, true, "The alert should still be triggered")
+	HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
+	verify(t, ep, 0, 1, true, "The alert should still be triggered (because endpoint.Alerts[0].SuccessThreshold is 3)")
+	HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
+	verify(t, ep, 0, 2, true, "The alert should still be triggered (because endpoint.Alerts[0].SuccessThreshold is 3)")
+	HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
+	verify(t, ep, 0, 3, false, "The alert should've been resolved")
+	HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
+	verify(t, ep, 0, 4, false, "The alert should no longer be triggered")
+}
+
+func TestHandleAlertingWhenAlertingConfigIsNil(t *testing.T) {
+	_ = os.Setenv("MOCK_ALERT_PROVIDER", "true")
+	defer os.Clearenv()
+	HandleAlerting(nil, nil, nil)
+}
+
+func TestHandleAlertingWithBadAlertProvider(t *testing.T) {
+	_ = os.Setenv("MOCK_ALERT_PROVIDER", "true")
+	defer os.Clearenv()
+
+	enabled := true
+	ep := &endpoint.Endpoint{
+		URL: "http://example.com",
+		Alerts: []*alert.Alert{
+			{
+				Type:             alert.TypeCustom,
+				Enabled:          &enabled,
+				FailureThreshold: 1,
+				SuccessThreshold: 1,
+				SendOnResolved:   &enabled,
+				Triggered:        false,
+			},
+		},
+	}
+
+	verify(t, ep, 0, 0, false, "The alert shouldn't start triggered")
+	HandleAlerting(ep, &endpoint.Result{Success: false}, &alerting.Config{})
+	verify(t, ep, 1, 0, false, "The alert shouldn't have triggered")
+	HandleAlerting(ep, &endpoint.Result{Success: false}, &alerting.Config{})
+	verify(t, ep, 2, 0, false, "The alert shouldn't have triggered, because the provider wasn't configured properly")
+}
+
+func TestHandleAlertingWhenTriggeredAlertIsAlmostResolvedButendpointStartFailingAgain(t *testing.T) {
+	_ = os.Setenv("MOCK_ALERT_PROVIDER", "true")
+	defer os.Clearenv()
+
+	cfg := &config.Config{
+		Alerting: &alerting.Config{
+			Custom: &custom.AlertProvider{
+				DefaultConfig: custom.Config{
+					URL:    "https://twin.sh/health",
+					Method: "GET",
+				},
+			},
+		},
+	}
+	enabled := true
+	ep := &endpoint.Endpoint{
+		URL: "https://example.com",
+		Alerts: []*alert.Alert{
+			{
+				Type:             alert.TypeCustom,
+				Enabled:          &enabled,
+				FailureThreshold: 2,
+				SuccessThreshold: 3,
+				SendOnResolved:   &enabled,
+				Triggered:        true,
+			},
+		},
+		NumberOfFailuresInARow: 1,
+	}
+
+	// This test simulate an alert that was already triggered
+	HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
+	verify(t, ep, 2, 0, true, "The alert was already triggered at the beginning of this test")
+}
+
+func TestHandleAlertingWhenTriggeredAlertIsResolvedButSendOnResolvedIsFalse(t *testing.T) {
+	_ = os.Setenv("MOCK_ALERT_PROVIDER", "true")
+	defer os.Clearenv()
+
+	cfg := &config.Config{
+		Alerting: &alerting.Config{
+			Custom: &custom.AlertProvider{
+				DefaultConfig: custom.Config{
+					URL:    "https://twin.sh/health",
+					Method: "GET",
+				},
+			},
+		},
+	}
+	enabled := true
+	disabled := false
+	ep := &endpoint.Endpoint{
+		URL: "https://example.com",
+		Alerts: []*alert.Alert{
+			{
+				Type:             alert.TypeCustom,
+				Enabled:          &enabled,
+				FailureThreshold: 1,
+				SuccessThreshold: 1,
+				SendOnResolved:   &disabled,
+				Triggered:        true,
+			},
+		},
+		NumberOfFailuresInARow: 1,
+	}
+
+	HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
+	verify(t, ep, 0, 1, false, "The alert should've been resolved")
+}
+
+func TestHandleAlertingWhenTriggeredAlertIsResolvedPagerDuty(t *testing.T) {
+	_ = os.Setenv("MOCK_ALERT_PROVIDER", "true")
+	defer os.Clearenv()
+
+	cfg := &config.Config{
+		Alerting: &alerting.Config{
+			PagerDuty: &pagerduty.AlertProvider{
+				DefaultConfig: pagerduty.Config{
+					IntegrationKey: "00000000000000000000000000000000",
+				},
+			},
+		},
+	}
+	enabled := true
+	ep := &endpoint.Endpoint{
+		URL: "https://example.com",
+		Alerts: []*alert.Alert{
+			{
+				Type:             alert.TypePagerDuty,
+				Enabled:          &enabled,
+				FailureThreshold: 1,
+				SuccessThreshold: 1,
+				SendOnResolved:   &enabled,
+				Triggered:        false,
+			},
+		},
+		NumberOfFailuresInARow: 0,
+	}
+
+	HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
+	verify(t, ep, 1, 0, true, "")
+
+	HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
+	verify(t, ep, 0, 1, false, "The alert should've been resolved")
+}
+
+func TestHandleAlertingWhenTriggeredAlertIsResolvedPushover(t *testing.T) {
+	_ = os.Setenv("MOCK_ALERT_PROVIDER", "true")
+	defer os.Clearenv()
+
+	cfg := &config.Config{
+		Alerting: &alerting.Config{
+			Pushover: &pushover.AlertProvider{
+				DefaultConfig: pushover.Config{
+					ApplicationToken: "000000000000000000000000000000",
+					UserKey:          "000000000000000000000000000000",
+				},
+			},
+		},
+	}
+	enabled := true
+	ep := &endpoint.Endpoint{
+		URL: "https://example.com",
+		Alerts: []*alert.Alert{
+			{
+				Type:             alert.TypePushover,
+				Enabled:          &enabled,
+				FailureThreshold: 1,
+				SuccessThreshold: 1,
+				SendOnResolved:   &enabled,
+				Triggered:        false,
+			},
+		},
+		NumberOfFailuresInARow: 0,
+	}
+
+	HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
+	verify(t, ep, 1, 0, true, "")
+
+	HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
+	verify(t, ep, 0, 1, false, "The alert should've been resolved")
+}
+
+func TestHandleAlertingWithProviderThatReturnsAnError(t *testing.T) {
+	_ = os.Setenv("MOCK_ALERT_PROVIDER", "true")
+	defer os.Clearenv()
+	enabled := true
+	scenarios := []struct {
+		Name           string
+		AlertingConfig *alerting.Config
+		AlertType      alert.Type
+	}{
+		{
+			Name:      "custom",
+			AlertType: alert.TypeCustom,
+			AlertingConfig: &alerting.Config{
+				Custom: &custom.AlertProvider{
+					DefaultConfig: custom.Config{
+						URL:    "https://twin.sh/health",
+						Method: "GET",
+					},
+				},
+			},
+		},
+		{
+			Name:      "discord",
+			AlertType: alert.TypeDiscord,
+			AlertingConfig: &alerting.Config{
+				Discord: &discord.AlertProvider{
+					DefaultConfig: discord.Config{
+						WebhookURL: "https://example.com",
+					},
+				},
+			},
+		},
+		{
+			Name:      "email",
+			AlertType: alert.TypeEmail,
+			AlertingConfig: &alerting.Config{
+				Email: &email.AlertProvider{
+					DefaultConfig: email.Config{
+						From:     "from@example.com",
+						Password: "hunter2",
+						Host:     "mail.example.com",
+						Port:     587,
+						To:       "to@example.com",
+					},
+				},
+			},
+		},
+		{
+			Name:      "jetbrainsspace",
+			AlertType: alert.TypeJetBrainsSpace,
+			AlertingConfig: &alerting.Config{
+				JetBrainsSpace: &jetbrainsspace.AlertProvider{
+					DefaultConfig: jetbrainsspace.Config{
+						Project:   "foo",
+						ChannelID: "bar",
+						Token:     "baz",
+					},
+				},
+			},
+		},
+		{
+			Name:      "mattermost",
+			AlertType: alert.TypeMattermost,
+			AlertingConfig: &alerting.Config{
+				Mattermost: &mattermost.AlertProvider{
+					DefaultConfig: mattermost.Config{
+						WebhookURL: "https://example.com",
+					},
+				},
+			},
+		},
+		{
+			Name:      "messagebird",
+			AlertType: alert.TypeMessagebird,
+			AlertingConfig: &alerting.Config{
+				Messagebird: &messagebird.AlertProvider{
+					DefaultConfig: messagebird.Config{
+						AccessKey:  "1",
+						Originator: "2",
+						Recipients: "3",
+					},
+				},
+			},
+		},
+		{
+			Name:      "pagerduty",
+			AlertType: alert.TypePagerDuty,
+			AlertingConfig: &alerting.Config{
+				PagerDuty: &pagerduty.AlertProvider{
+					DefaultConfig: pagerduty.Config{
+						IntegrationKey: "00000000000000000000000000000000",
+					},
+				},
+			},
+		},
+		{
+			Name:      "pushover",
+			AlertType: alert.TypePushover,
+			AlertingConfig: &alerting.Config{
+				Pushover: &pushover.AlertProvider{
+					DefaultConfig: pushover.Config{
+						ApplicationToken: "000000000000000000000000000000",
+						UserKey:          "000000000000000000000000000000",
+					},
+				},
+			},
+		},
+		{
+			Name:      "slack",
+			AlertType: alert.TypeSlack,
+			AlertingConfig: &alerting.Config{
+				Slack: &slack.AlertProvider{
+					DefaultConfig: slack.Config{
+						WebhookURL: "https://example.com",
+					},
+				},
+			},
+		},
+		{
+			Name:      "teams",
+			AlertType: alert.TypeTeams,
+			AlertingConfig: &alerting.Config{
+				Teams: &teams.AlertProvider{
+					DefaultConfig: teams.Config{
+						WebhookURL: "https://example.com",
+					},
+				},
+			},
+		},
+		{
+			Name:      "telegram",
+			AlertType: alert.TypeTelegram,
+			AlertingConfig: &alerting.Config{
+				Telegram: &telegram.AlertProvider{
+					DefaultConfig: telegram.Config{
+						Token: "1",
+						ID:    "2",
+					},
+				},
+			},
+		},
+		{
+			Name:      "twilio",
+			AlertType: alert.TypeTwilio,
+			AlertingConfig: &alerting.Config{
+				Twilio: &twilio.AlertProvider{
+					DefaultConfig: twilio.Config{
+						SID:   "1",
+						Token: "2",
+						From:  "3",
+						To:    "4",
+					},
+				},
+			},
+		},
+		{
+			Name:      "matrix",
+			AlertType: alert.TypeMatrix,
+			AlertingConfig: &alerting.Config{
+				Matrix: &matrix.AlertProvider{
+					DefaultConfig: matrix.Config{
+						ServerURL:      "https://example.com",
+						AccessToken:    "1",
+						InternalRoomID: "!a:example.com",
+					},
+				},
+			},
+		},
+	}
+
+	for _, scenario := range scenarios {
+		t.Run(scenario.Name, func(t *testing.T) {
+			ep := &endpoint.Endpoint{
+				URL: "https://example.com",
+				Alerts: []*alert.Alert{
+					{
+						Type:             scenario.AlertType,
+						Enabled:          &enabled,
+						FailureThreshold: 2,
+						SuccessThreshold: 2,
+						SendOnResolved:   &enabled,
+						Triggered:        false,
+					},
+				},
+			}
+			_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true")
+			HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig)
+			verify(t, ep, 1, 0, false, "")
+			HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig)
+			verify(t, ep, 2, 0, false, "The alert should have failed to trigger, because the alert provider is returning an error")
+			HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig)
+			verify(t, ep, 3, 0, false, "The alert should still not be triggered, because the alert provider is still returning an error")
+			HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig)
+			verify(t, ep, 4, 0, false, "The alert should still not be triggered, because the alert provider is still returning an error")
+			_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false")
+			HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig)
+			verify(t, ep, 5, 0, true, "The alert should've been triggered because the alert provider is no longer returning an error")
+			HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig)
+			verify(t, ep, 0, 1, true, "The alert should've still been triggered")
+			_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true")
+			HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig)
+			verify(t, ep, 0, 2, false, "The alert should've been resolved DESPITE THE ALERT PROVIDER RETURNING AN ERROR. See Alert.Triggered for further explanation.")
+			_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false")
+
+			// Make sure that everything's working as expected after a rough patch
+			HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig)
+			verify(t, ep, 1, 0, false, "")
+			HandleAlerting(ep, &endpoint.Result{Success: false}, scenario.AlertingConfig)
+			verify(t, ep, 2, 0, true, "The alert should have triggered")
+			HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig)
+			verify(t, ep, 0, 1, true, "The alert should still be triggered")
+			HandleAlerting(ep, &endpoint.Result{Success: true}, scenario.AlertingConfig)
+			verify(t, ep, 0, 2, false, "The alert should have been resolved")
+		})
+	}
+
+}
+
+func TestHandleAlertingWithProviderThatOnlyReturnsErrorOnResolve(t *testing.T) {
+	_ = os.Setenv("MOCK_ALERT_PROVIDER", "true")
+	defer os.Clearenv()
+
+	cfg := &config.Config{
+		Alerting: &alerting.Config{
+			Custom: &custom.AlertProvider{
+				DefaultConfig: custom.Config{
+					URL:    "https://twin.sh/health",
+					Method: "GET",
+				},
+			},
+		},
+	}
+	enabled := true
+	ep := &endpoint.Endpoint{
+		URL: "https://example.com",
+		Alerts: []*alert.Alert{
+			{
+				Type:             alert.TypeCustom,
+				Enabled:          &enabled,
+				FailureThreshold: 1,
+				SuccessThreshold: 1,
+				SendOnResolved:   &enabled,
+				Triggered:        false,
+			},
+		},
+	}
+
+	HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
+	verify(t, ep, 1, 0, true, "")
+	_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true")
+	HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
+	verify(t, ep, 0, 1, false, "")
+	_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false")
+	HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
+	verify(t, ep, 1, 0, true, "")
+	_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "true")
+	HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
+	verify(t, ep, 0, 1, false, "")
+	_ = os.Setenv("MOCK_ALERT_PROVIDER_ERROR", "false")
+
+	// Make sure that everything's working as expected after a rough patch
+	HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
+	verify(t, ep, 1, 0, true, "")
+	HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
+	verify(t, ep, 2, 0, true, "")
+	HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
+	verify(t, ep, 0, 1, false, "")
+	HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
+	verify(t, ep, 0, 2, false, "")
+}
+
+func verify(t *testing.T, ep *endpoint.Endpoint, expectedNumberOfFailuresInARow, expectedNumberOfSuccessInARow int, expectedTriggered bool, expectedTriggeredReason string) {
+	if ep.NumberOfFailuresInARow != expectedNumberOfFailuresInARow {
+		t.Errorf("endpoint.NumberOfFailuresInARow should've been %d, got %d", expectedNumberOfFailuresInARow, ep.NumberOfFailuresInARow)
+	}
+	if ep.NumberOfSuccessesInARow != expectedNumberOfSuccessInARow {
+		t.Errorf("endpoint.NumberOfSuccessesInARow should've been %d, got %d", expectedNumberOfSuccessInARow, ep.NumberOfSuccessesInARow)
+	}
+	if ep.Alerts[0].Triggered != expectedTriggered {
+		if len(expectedTriggeredReason) != 0 {
+			t.Error(expectedTriggeredReason)
+		} else {
+			if expectedTriggered {
+				t.Error("The alert should've been triggered")
+			} else {
+				t.Error("The alert shouldn't have been triggered")
+			}
+		}
+	}
+}
--- a/watchdog/watchdog.go
+++ b/watchdog/watchdog.go
@ -0,0 +1,113 @@
+package watchdog
+
+import (
+	"context"
+	"sync"
+	"time"
+
+	"github.com/TwiN/gatus/v5/alerting"
+	"github.com/TwiN/gatus/v5/config"
+	"github.com/TwiN/gatus/v5/config/connectivity"
+	"github.com/TwiN/gatus/v5/config/endpoint"
+	"github.com/TwiN/gatus/v5/config/maintenance"
+	"github.com/TwiN/gatus/v5/metrics"
+	"github.com/TwiN/gatus/v5/storage/store"
+	"github.com/TwiN/logr"
+)
+
+var (
+	// monitoringMutex is used to prevent multiple endpoint from being evaluated at the same time.
+	// Without this, conditions using response time may become inaccurate.
+	monitoringMutex sync.Mutex
+
+	ctx        context.Context
+	cancelFunc context.CancelFunc
+)
+
+// Monitor loops over each endpoint and starts a goroutine to monitor each endpoint separately
+func Monitor(cfg *config.Config) {
+	ctx, cancelFunc = context.WithCancel(context.Background())
+	for _, endpoint := range cfg.Endpoints {
+		if endpoint.IsEnabled() {
+			// To prevent multiple requests from running at the same time, we'll wait for a little before each iteration
+			time.Sleep(777 * time.Millisecond)
+			go monitor(endpoint, cfg.Alerting, cfg.Maintenance, cfg.Connectivity, cfg.DisableMonitoringLock, cfg.Metrics, ctx)
+		}
+	}
+}
+
+// monitor a single endpoint in a loop
+func monitor(ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, connectivityConfig *connectivity.Config, disableMonitoringLock bool, enabledMetrics bool, ctx context.Context) {
+	// Run it immediately on start
+	execute(ep, alertingConfig, maintenanceConfig, connectivityConfig, disableMonitoringLock, enabledMetrics)
+	// Loop for the next executions
+	ticker := time.NewTicker(ep.Interval)
+	defer ticker.Stop()
+	for {
+		select {
+		case <-ctx.Done():
+			logr.Warnf("[watchdog.monitor] Canceling current execution of group=%s; endpoint=%s; key=%s", ep.Group, ep.Name, ep.Key())
+			return
+		case <-ticker.C:
+			execute(ep, alertingConfig, maintenanceConfig, connectivityConfig, disableMonitoringLock, enabledMetrics)
+		}
+	}
+	// Just in case somebody wandered all the way to here and wonders, "what about ExternalEndpoints?"
+	// Alerting is checked every time an external endpoint is pushed to Gatus, so they're not monitored
+	// periodically like they are for normal endpoints.
+}
+
+func execute(ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, connectivityConfig *connectivity.Config, disableMonitoringLock bool, enabledMetrics bool) {
+	if !disableMonitoringLock {
+		// By placing the lock here, we prevent multiple endpoints from being monitored at the exact same time, which
+		// could cause performance issues and return inaccurate results
+		monitoringMutex.Lock()
+		defer monitoringMutex.Unlock()
+	}
+	// If there's a connectivity checker configured, check if Gatus has internet connectivity
+	if connectivityConfig != nil && connectivityConfig.Checker != nil && !connectivityConfig.Checker.IsConnected() {
+		logr.Infof("[watchdog.execute] No connectivity; skipping execution")
+		return
+	}
+	logr.Debugf("[watchdog.execute] Monitoring group=%s; endpoint=%s; key=%s", ep.Group, ep.Name, ep.Key())
+	result := ep.EvaluateHealth()
+	if enabledMetrics {
+		metrics.PublishMetricsForEndpoint(ep, result)
+	}
+	UpdateEndpointStatuses(ep, result)
+	if logr.GetThreshold() == logr.LevelDebug && !result.Success {
+		logr.Debugf("[watchdog.execute] Monitored group=%s; endpoint=%s; key=%s; success=%v; errors=%d; duration=%s; body=%s", ep.Group, ep.Name, ep.Key(), result.Success, len(result.Errors), result.Duration.Round(time.Millisecond), result.Body)
+	} else {
+		logr.Infof("[watchdog.execute] Monitored group=%s; endpoint=%s; key=%s; success=%v; errors=%d; duration=%s", ep.Group, ep.Name, ep.Key(), result.Success, len(result.Errors), result.Duration.Round(time.Millisecond))
+	}
+	inEndpointMaintenanceWindow := false
+	for _, maintenanceWindow := range ep.MaintenanceWindows {
+		if maintenanceWindow.IsUnderMaintenance() {
+			logr.Debug("[watchdog.execute] Under endpoint maintenance window")
+			inEndpointMaintenanceWindow = true
+		}
+	}
+	if !maintenanceConfig.IsUnderMaintenance() && !inEndpointMaintenanceWindow {
+		// TODO: Consider moving this after the monitoring lock is unlocked? I mean, how much noise can a single alerting provider cause...
+		HandleAlerting(ep, result, alertingConfig)
+	} else {
+		logr.Debug("[watchdog.execute] Not handling alerting because currently in the maintenance window")
+	}
+	logr.Debugf("[watchdog.execute] Waiting for interval=%s before monitoring group=%s endpoint=%s (key=%s) again", ep.Interval, ep.Group, ep.Name, ep.Key())
+}
+
+// UpdateEndpointStatuses updates the slice of endpoint statuses
+func UpdateEndpointStatuses(ep *endpoint.Endpoint, result *endpoint.Result) {
+	if err := store.Get().Insert(ep, result); err != nil {
+		logr.Errorf("[watchdog.UpdateEndpointStatuses] Failed to insert result in storage: %s", err.Error())
+	}
+}
+
+// Shutdown stops monitoring all endpoints
+func Shutdown(cfg *config.Config) {
+	// Disable all the old HTTP connections
+	for _, ep := range cfg.Endpoints {
+		ep.Close()
+	}
+	cancelFunc()
+}