feat(connectivity): Allow internet connection validation prior to endpoint execution (#461)
This commit is contained in:
		
							
								
								
									
										23
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										23
									
								
								README.md
									
									
									
									
									
								
							| @ -71,6 +71,7 @@ Have any feedback or questions? [Create a discussion](https://github.com/TwiN/ga | |||||||
|     - [OIDC](#oidc) |     - [OIDC](#oidc) | ||||||
|   - [TLS Encryption](#tls-encryption) |   - [TLS Encryption](#tls-encryption) | ||||||
|   - [Metrics](#metrics) |   - [Metrics](#metrics) | ||||||
|  |   - [Connectivity](#connectivity) | ||||||
|   - [Remote instances (EXPERIMENTAL)](#remote-instances-experimental) |   - [Remote instances (EXPERIMENTAL)](#remote-instances-experimental) | ||||||
| - [Deployment](#deployment) | - [Deployment](#deployment) | ||||||
|   - [Docker](#docker) |   - [Docker](#docker) | ||||||
| @ -1256,6 +1257,28 @@ endpoint on the same port your application is configured to run on (`web.port`). | |||||||
| See [examples/docker-compose-grafana-prometheus](.examples/docker-compose-grafana-prometheus) for further documentation as well as an example. | See [examples/docker-compose-grafana-prometheus](.examples/docker-compose-grafana-prometheus) for further documentation as well as an example. | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ### Connectivity | ||||||
|  | | Parameter                       | Description                                | Default       | | ||||||
|  | |:--------------------------------|:-------------------------------------------|:--------------| | ||||||
|  | | `connectivity`                  | Connectivity configuration                 | `{}`          | | ||||||
|  | | `connectivity.checker`          | Connectivity checker configuration         | Required `{}` | | ||||||
|  | | `connectivity.checker.target`   | Host to use for validating connectivity    | Required `""` | | ||||||
|  | | `connectivity.checker.interval` | Interval at which to validate connectivity | `1m`          | | ||||||
|  |  | ||||||
|  | While Gatus is used to monitor other services, it is possible for Gatus itself to lose connectivity to the internet. | ||||||
|  | In order to prevent Gatus from reporting endpoints as unhealthy when Gatus itself is unhealthy, you may configure  | ||||||
|  | Gatus to periodically check for internet connectivity. | ||||||
|  |  | ||||||
|  | All endpoint executions are skipped while the connectivity checker deems connectivity to be down. | ||||||
|  |  | ||||||
|  | ```yaml | ||||||
|  | connectivity: | ||||||
|  |   checker: | ||||||
|  |     target: 1.1.1.1:53 | ||||||
|  |     interval: 60s | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  |  | ||||||
| ### Remote instances (EXPERIMENTAL) | ### Remote instances (EXPERIMENTAL) | ||||||
| This feature allows you to retrieve endpoint statuses from a remote Gatus instance. | This feature allows you to retrieve endpoint statuses from a remote Gatus instance. | ||||||
|  |  | ||||||
|  | |||||||
| @ -191,6 +191,9 @@ func TestCanCreateTCPConnection(t *testing.T) { | |||||||
| 	if CanCreateTCPConnection("127.0.0.1", &Config{Timeout: 5 * time.Second}) { | 	if CanCreateTCPConnection("127.0.0.1", &Config{Timeout: 5 * time.Second}) { | ||||||
| 		t.Error("should've failed, because there's no port in the address") | 		t.Error("should've failed, because there's no port in the address") | ||||||
| 	} | 	} | ||||||
|  | 	if !CanCreateTCPConnection("1.1.1.1:53", &Config{Timeout: 5 * time.Second}) { | ||||||
|  | 		t.Error("should've succeeded, because that IP should always™ be up") | ||||||
|  | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
| // This test checks if a HTTP client configured with `configureOAuth2()` automatically | // This test checks if a HTTP client configured with `configureOAuth2()` automatically | ||||||
|  | |||||||
| @ -14,6 +14,7 @@ import ( | |||||||
| 	"github.com/TwiN/gatus/v5/alerting" | 	"github.com/TwiN/gatus/v5/alerting" | ||||||
| 	"github.com/TwiN/gatus/v5/alerting/alert" | 	"github.com/TwiN/gatus/v5/alerting/alert" | ||||||
| 	"github.com/TwiN/gatus/v5/alerting/provider" | 	"github.com/TwiN/gatus/v5/alerting/provider" | ||||||
|  | 	"github.com/TwiN/gatus/v5/config/connectivity" | ||||||
| 	"github.com/TwiN/gatus/v5/config/maintenance" | 	"github.com/TwiN/gatus/v5/config/maintenance" | ||||||
| 	"github.com/TwiN/gatus/v5/config/remote" | 	"github.com/TwiN/gatus/v5/config/remote" | ||||||
| 	"github.com/TwiN/gatus/v5/config/ui" | 	"github.com/TwiN/gatus/v5/config/ui" | ||||||
| @ -91,6 +92,9 @@ type Config struct { | |||||||
| 	// WARNING: This is in ALPHA and may change or be completely removed in the future | 	// WARNING: This is in ALPHA and may change or be completely removed in the future | ||||||
| 	Remote *remote.Config `yaml:"remote,omitempty"` | 	Remote *remote.Config `yaml:"remote,omitempty"` | ||||||
|  |  | ||||||
|  | 	// Connectivity is the configuration for connectivity | ||||||
|  | 	Connectivity *connectivity.Config `yaml:"connectivity,omitempty"` | ||||||
|  |  | ||||||
| 	configPath      string    // path to the file or directory from which config was loaded | 	configPath      string    // path to the file or directory from which config was loaded | ||||||
| 	lastFileModTime time.Time // last modification time | 	lastFileModTime time.Time // last modification time | ||||||
| } | } | ||||||
| @ -252,10 +256,20 @@ func parseAndValidateConfigBytes(yamlBytes []byte) (config *Config, err error) { | |||||||
| 		if err := validateRemoteConfig(config); err != nil { | 		if err := validateRemoteConfig(config); err != nil { | ||||||
| 			return nil, err | 			return nil, err | ||||||
| 		} | 		} | ||||||
|  | 		if err := validateConnectivityConfig(config); err != nil { | ||||||
|  | 			return nil, err | ||||||
|  | 		} | ||||||
| 	} | 	} | ||||||
| 	return | 	return | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func validateConnectivityConfig(config *Config) error { | ||||||
|  | 	if config.Connectivity != nil { | ||||||
|  | 		return config.Connectivity.ValidateAndSetDefaults() | ||||||
|  | 	} | ||||||
|  | 	return nil | ||||||
|  | } | ||||||
|  |  | ||||||
| func validateRemoteConfig(config *Config) error { | func validateRemoteConfig(config *Config) error { | ||||||
| 	if config.Remote != nil { | 	if config.Remote != nil { | ||||||
| 		if err := config.Remote.ValidateAndSetDefaults(); err != nil { | 		if err := config.Remote.ValidateAndSetDefaults(); err != nil { | ||||||
|  | |||||||
							
								
								
									
										53
									
								
								config/connectivity/connectivity.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								config/connectivity/connectivity.go
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,53 @@ | |||||||
|  | package connectivity | ||||||
|  |  | ||||||
|  | import ( | ||||||
|  | 	"errors" | ||||||
|  | 	"strings" | ||||||
|  | 	"time" | ||||||
|  |  | ||||||
|  | 	"github.com/TwiN/gatus/v5/client" | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | var ( | ||||||
|  | 	ErrInvalidInterval  = errors.New("connectivity.checker.interval must be 5s or higher") | ||||||
|  | 	ErrInvalidDNSTarget = errors.New("connectivity.checker.target must be suffixed with :53") | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | // Config is the configuration for the connectivity checker. | ||||||
|  | type Config struct { | ||||||
|  | 	Checker *Checker `yaml:"checker,omitempty"` | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (c *Config) ValidateAndSetDefaults() error { | ||||||
|  | 	if c.Checker != nil { | ||||||
|  | 		if c.Checker.Interval == 0 { | ||||||
|  | 			c.Checker.Interval = 60 * time.Second | ||||||
|  | 		} else if c.Checker.Interval < 5*time.Second { | ||||||
|  | 			return ErrInvalidInterval | ||||||
|  | 		} | ||||||
|  | 		if !strings.HasSuffix(c.Checker.Target, ":53") { | ||||||
|  | 			return ErrInvalidDNSTarget | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Checker is the configuration for making sure Gatus has access to the internet. | ||||||
|  | type Checker struct { | ||||||
|  | 	Target   string        `yaml:"target"` // e.g. 1.1.1.1:53 | ||||||
|  | 	Interval time.Duration `yaml:"interval,omitempty"` | ||||||
|  |  | ||||||
|  | 	isConnected bool | ||||||
|  | 	lastCheck   time.Time | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (c Checker) Check() bool { | ||||||
|  | 	return client.CanCreateTCPConnection(c.Target, &client.Config{Timeout: 5 * time.Second}) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (c *Checker) IsConnected() bool { | ||||||
|  | 	if now := time.Now(); now.After(c.lastCheck.Add(c.Interval)) { | ||||||
|  | 		c.lastCheck, c.isConnected = now, c.Check() | ||||||
|  | 	} | ||||||
|  | 	return c.isConnected | ||||||
|  | } | ||||||
							
								
								
									
										62
									
								
								config/connectivity/connectivity_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										62
									
								
								config/connectivity/connectivity_test.go
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,62 @@ | |||||||
|  | package connectivity | ||||||
|  |  | ||||||
|  | import ( | ||||||
|  | 	"fmt" | ||||||
|  | 	"testing" | ||||||
|  | 	"time" | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | func TestConfig(t *testing.T) { | ||||||
|  | 	scenarios := []struct { | ||||||
|  | 		name             string | ||||||
|  | 		cfg              *Config | ||||||
|  | 		expectedErr      error | ||||||
|  | 		expectedInterval time.Duration | ||||||
|  | 	}{ | ||||||
|  | 		{ | ||||||
|  | 			name:             "good-config", | ||||||
|  | 			cfg:              &Config{Checker: &Checker{Target: "1.1.1.1:53", Interval: 10 * time.Second}}, | ||||||
|  | 			expectedInterval: 10 * time.Second, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			name:             "good-config-with-default-interval", | ||||||
|  | 			cfg:              &Config{Checker: &Checker{Target: "8.8.8.8:53", Interval: 0}}, | ||||||
|  | 			expectedInterval: 60 * time.Second, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			name:        "config-with-interval-too-low", | ||||||
|  | 			cfg:         &Config{Checker: &Checker{Target: "1.1.1.1:53", Interval: 4 * time.Second}}, | ||||||
|  | 			expectedErr: ErrInvalidInterval, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			name:        "config-with-invalid-target-due-to-missing-port", | ||||||
|  | 			cfg:         &Config{Checker: &Checker{Target: "1.1.1.1", Interval: 15 * time.Second}}, | ||||||
|  | 			expectedErr: ErrInvalidDNSTarget, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			name:        "config-with-invalid-target-due-to-invalid-dns-port", | ||||||
|  | 			cfg:         &Config{Checker: &Checker{Target: "1.1.1.1:52", Interval: 15 * time.Second}}, | ||||||
|  | 			expectedErr: ErrInvalidDNSTarget, | ||||||
|  | 		}, | ||||||
|  | 	} | ||||||
|  | 	for _, scenario := range scenarios { | ||||||
|  | 		t.Run(scenario.name, func(t *testing.T) { | ||||||
|  | 			err := scenario.cfg.ValidateAndSetDefaults() | ||||||
|  | 			if fmt.Sprintf("%s", err) != fmt.Sprintf("%s", scenario.expectedErr) { | ||||||
|  | 				t.Errorf("expected error %v, got %v", scenario.expectedErr, err) | ||||||
|  | 			} | ||||||
|  | 			if err == nil && scenario.expectedErr == nil { | ||||||
|  | 				if scenario.cfg.Checker.Interval != scenario.expectedInterval { | ||||||
|  | 					t.Errorf("expected interval %v, got %v", scenario.expectedInterval, scenario.cfg.Checker.Interval) | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  | 		}) | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func TestChecker_IsConnected(t *testing.T) { | ||||||
|  | 	checker := &Checker{Target: "1.1.1.1:53", Interval: 10 * time.Second} | ||||||
|  | 	if !checker.IsConnected() { | ||||||
|  | 		t.Error("expected checker.IsConnected() to be true") | ||||||
|  | 	} | ||||||
|  | } | ||||||
| @ -8,6 +8,7 @@ import ( | |||||||
|  |  | ||||||
| 	"github.com/TwiN/gatus/v5/alerting" | 	"github.com/TwiN/gatus/v5/alerting" | ||||||
| 	"github.com/TwiN/gatus/v5/config" | 	"github.com/TwiN/gatus/v5/config" | ||||||
|  | 	"github.com/TwiN/gatus/v5/config/connectivity" | ||||||
| 	"github.com/TwiN/gatus/v5/config/maintenance" | 	"github.com/TwiN/gatus/v5/config/maintenance" | ||||||
| 	"github.com/TwiN/gatus/v5/core" | 	"github.com/TwiN/gatus/v5/core" | ||||||
| 	"github.com/TwiN/gatus/v5/metrics" | 	"github.com/TwiN/gatus/v5/metrics" | ||||||
| @ -30,15 +31,15 @@ func Monitor(cfg *config.Config) { | |||||||
| 		if endpoint.IsEnabled() { | 		if endpoint.IsEnabled() { | ||||||
| 			// To prevent multiple requests from running at the same time, we'll wait for a little before each iteration | 			// To prevent multiple requests from running at the same time, we'll wait for a little before each iteration | ||||||
| 			time.Sleep(777 * time.Millisecond) | 			time.Sleep(777 * time.Millisecond) | ||||||
| 			go monitor(endpoint, cfg.Alerting, cfg.Maintenance, cfg.DisableMonitoringLock, cfg.Metrics, cfg.Debug, ctx) | 			go monitor(endpoint, cfg.Alerting, cfg.Maintenance, cfg.Connectivity, cfg.DisableMonitoringLock, cfg.Metrics, cfg.Debug, ctx) | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
| // monitor a single endpoint in a loop | // monitor a single endpoint in a loop | ||||||
| func monitor(endpoint *core.Endpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, disableMonitoringLock, enabledMetrics, debug bool, ctx context.Context) { | func monitor(endpoint *core.Endpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, connectivityConfig *connectivity.Config, disableMonitoringLock, enabledMetrics, debug bool, ctx context.Context) { | ||||||
| 	// Run it immediately on start | 	// Run it immediately on start | ||||||
| 	execute(endpoint, alertingConfig, maintenanceConfig, disableMonitoringLock, enabledMetrics, debug) | 	execute(endpoint, alertingConfig, maintenanceConfig, connectivityConfig, disableMonitoringLock, enabledMetrics, debug) | ||||||
| 	// Loop for the next executions | 	// Loop for the next executions | ||||||
| 	for { | 	for { | ||||||
| 		select { | 		select { | ||||||
| @ -46,16 +47,22 @@ func monitor(endpoint *core.Endpoint, alertingConfig *alerting.Config, maintenan | |||||||
| 			log.Printf("[watchdog][monitor] Canceling current execution of group=%s; endpoint=%s", endpoint.Group, endpoint.Name) | 			log.Printf("[watchdog][monitor] Canceling current execution of group=%s; endpoint=%s", endpoint.Group, endpoint.Name) | ||||||
| 			return | 			return | ||||||
| 		case <-time.After(endpoint.Interval): | 		case <-time.After(endpoint.Interval): | ||||||
| 			execute(endpoint, alertingConfig, maintenanceConfig, disableMonitoringLock, enabledMetrics, debug) | 			execute(endpoint, alertingConfig, maintenanceConfig, connectivityConfig, disableMonitoringLock, enabledMetrics, debug) | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
| func execute(endpoint *core.Endpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, disableMonitoringLock, enabledMetrics, debug bool) { | func execute(endpoint *core.Endpoint, alertingConfig *alerting.Config, maintenanceConfig *maintenance.Config, connectivityConfig *connectivity.Config, disableMonitoringLock, enabledMetrics, debug bool) { | ||||||
| 	if !disableMonitoringLock { | 	if !disableMonitoringLock { | ||||||
| 		// By placing the lock here, we prevent multiple endpoints from being monitored at the exact same time, which | 		// By placing the lock here, we prevent multiple endpoints from being monitored at the exact same time, which | ||||||
| 		// could cause performance issues and return inaccurate results | 		// could cause performance issues and return inaccurate results | ||||||
| 		monitoringMutex.Lock() | 		monitoringMutex.Lock() | ||||||
|  | 		defer monitoringMutex.Unlock() | ||||||
|  | 	} | ||||||
|  | 	// If there's a connectivity checker configured, check if Gatus has internet connectivity | ||||||
|  | 	if connectivityConfig != nil && connectivityConfig.Checker != nil && !connectivityConfig.Checker.IsConnected() { | ||||||
|  | 		log.Println("[watchdog][execute] No connectivity; skipping execution") | ||||||
|  | 		return | ||||||
| 	} | 	} | ||||||
| 	if debug { | 	if debug { | ||||||
| 		log.Printf("[watchdog][execute] Monitoring group=%s; endpoint=%s", endpoint.Group, endpoint.Name) | 		log.Printf("[watchdog][execute] Monitoring group=%s; endpoint=%s", endpoint.Group, endpoint.Name) | ||||||
| @ -79,9 +86,6 @@ func execute(endpoint *core.Endpoint, alertingConfig *alerting.Config, maintenan | |||||||
| 	if debug { | 	if debug { | ||||||
| 		log.Printf("[watchdog][execute] Waiting for interval=%s before monitoring group=%s endpoint=%s again", endpoint.Interval, endpoint.Group, endpoint.Name) | 		log.Printf("[watchdog][execute] Waiting for interval=%s before monitoring group=%s endpoint=%s again", endpoint.Interval, endpoint.Group, endpoint.Name) | ||||||
| 	} | 	} | ||||||
| 	if !disableMonitoringLock { |  | ||||||
| 		monitoringMutex.Unlock() |  | ||||||
| 	} |  | ||||||
| } | } | ||||||
|  |  | ||||||
| // UpdateEndpointStatuses updates the slice of endpoint statuses | // UpdateEndpointStatuses updates the slice of endpoint statuses | ||||||
|  | |||||||
		Reference in New Issue
	
	Block a user