feat(maintenance): Per-endpoint maintenance configuration (#982)
* feat: add endpoint.maintenance-windows array for per-endpoint maintenance configuration * doc: initial entry for maintenance windows in endpoint config * doc: example documentation for per-endpoint configuration of maintenance windows * chore: var => := * test: add checks for maintenance window defaults in endpoint configuration * chore: clean up new-lines --------- Co-authored-by: TwiN <twin@linux.com>
This commit is contained in:
parent
7e122a9fd9
commit
a1f7bd7b73
14
README.md
14
README.md
@ -273,6 +273,7 @@ You can then configure alerts to be triggered when an endpoint is unhealthy once
|
||||
| `endpoints[].ssh.username` | SSH username (e.g. example). | Required `""` |
|
||||
| `endpoints[].ssh.password` | SSH password (e.g. password). | Required `""` |
|
||||
| `endpoints[].alerts` | List of all alerts for a given endpoint. <br />See [Alerting](#alerting). | `[]` |
|
||||
| `endpoints[].maintenance-windows` | List of all maintenance windows for a given endpoint. <br />See [Maintenance](#maintenance). | `[]` |
|
||||
| `endpoints[].client` | [Client configuration](#client-configuration). | `{}` |
|
||||
| `endpoints[].ui` | UI configuration at the endpoint level. | `{}` |
|
||||
| `endpoints[].ui.hide-conditions` | Whether to hide conditions from the results. Note that this only hides conditions from results evaluated from the moment this was enabled. | `false` |
|
||||
@ -1710,6 +1711,19 @@ maintenance:
|
||||
- Monday
|
||||
- Thursday
|
||||
```
|
||||
You can also specify maintenance windows on a per-endpoint basis:
|
||||
```yaml
|
||||
endpoints:
|
||||
- name: endpoint-1
|
||||
url: "https://example.org"
|
||||
maintenance-windows:
|
||||
- start: "07:30"
|
||||
duration: 40m
|
||||
timezone: "Europe/Berlin"
|
||||
- start: "14:30"
|
||||
duration: 1h
|
||||
timezone: "Europe/Berlin"
|
||||
```
|
||||
|
||||
|
||||
### Security
|
||||
|
@ -13,6 +13,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/TwiN/gatus/v5/config/maintenance"
|
||||
"github.com/TwiN/gatus/v5/alerting/alert"
|
||||
"github.com/TwiN/gatus/v5/client"
|
||||
"github.com/TwiN/gatus/v5/config/endpoint/dns"
|
||||
@ -104,6 +105,9 @@ type Endpoint struct {
|
||||
// Alerts is the alerting configuration for the endpoint in case of failure
|
||||
Alerts []*alert.Alert `yaml:"alerts,omitempty"`
|
||||
|
||||
// MaintenanceWindow is the configuration for per-endpoint maintenance windows
|
||||
MaintenanceWindows []*maintenance.Config `yaml:"maintenance-windows,omitempty"`
|
||||
|
||||
// DNSConfig is the configuration for DNS monitoring
|
||||
DNSConfig *dns.Config `yaml:"dns,omitempty"`
|
||||
|
||||
@ -219,6 +223,11 @@ func (e *Endpoint) ValidateAndSetDefaults() error {
|
||||
if e.Type() == TypeUNKNOWN {
|
||||
return ErrUnknownEndpointType
|
||||
}
|
||||
for _, maintenanceWindow := range e.MaintenanceWindows {
|
||||
if err := maintenanceWindow.ValidateAndSetDefaults(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// Make sure that the request can be created
|
||||
_, err := http.NewRequest(e.Method, e.URL, bytes.NewBuffer([]byte(e.Body)))
|
||||
if err != nil {
|
||||
|
@ -16,6 +16,7 @@ import (
|
||||
"github.com/TwiN/gatus/v5/config/endpoint/dns"
|
||||
"github.com/TwiN/gatus/v5/config/endpoint/ssh"
|
||||
"github.com/TwiN/gatus/v5/config/endpoint/ui"
|
||||
"github.com/TwiN/gatus/v5/config/maintenance"
|
||||
"github.com/TwiN/gatus/v5/test"
|
||||
)
|
||||
|
||||
@ -390,10 +391,11 @@ func TestEndpoint_Type(t *testing.T) {
|
||||
|
||||
func TestEndpoint_ValidateAndSetDefaults(t *testing.T) {
|
||||
endpoint := Endpoint{
|
||||
Name: "website-health",
|
||||
URL: "https://twin.sh/health",
|
||||
Conditions: []Condition{Condition("[STATUS] == 200")},
|
||||
Alerts: []*alert.Alert{{Type: alert.TypePagerDuty}},
|
||||
Name: "website-health",
|
||||
URL: "https://twin.sh/health",
|
||||
Conditions: []Condition{Condition("[STATUS] == 200")},
|
||||
Alerts: []*alert.Alert{{Type: alert.TypePagerDuty}},
|
||||
MaintenanceWindows: []*maintenance.Config{{Start: "03:50", Duration: 4 * time.Hour}},
|
||||
}
|
||||
if err := endpoint.ValidateAndSetDefaults(); err != nil {
|
||||
t.Errorf("Expected no error, got %v", err)
|
||||
@ -432,6 +434,15 @@ func TestEndpoint_ValidateAndSetDefaults(t *testing.T) {
|
||||
if endpoint.Alerts[0].FailureThreshold != 3 {
|
||||
t.Error("Endpoint alert should've defaulted to a failure threshold of 3")
|
||||
}
|
||||
if len(endpoint.MaintenanceWindows) != 1 {
|
||||
t.Error("Endpoint should've had 1 maintenance window")
|
||||
}
|
||||
if !endpoint.MaintenanceWindows[0].IsEnabled() {
|
||||
t.Error("Endpoint maintenance should've defaulted to true")
|
||||
}
|
||||
if endpoint.MaintenanceWindows[0].Timezone != "UTC" {
|
||||
t.Error("Endpoint maintenance should've defaulted to UTC")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEndpoint_ValidateAndSetDefaultsWithInvalidCondition(t *testing.T) {
|
||||
|
@ -80,7 +80,14 @@ func execute(ep *endpoint.Endpoint, alertingConfig *alerting.Config, maintenance
|
||||
} else {
|
||||
logr.Infof("[watchdog.execute] Monitored group=%s; endpoint=%s; key=%s; success=%v; errors=%d; duration=%s", ep.Group, ep.Name, ep.Key(), result.Success, len(result.Errors), result.Duration.Round(time.Millisecond))
|
||||
}
|
||||
if !maintenanceConfig.IsUnderMaintenance() {
|
||||
inEndpointMaintenanceWindow := false
|
||||
for _, maintenanceWindow := range ep.MaintenanceWindows {
|
||||
if maintenanceWindow.IsUnderMaintenance() {
|
||||
logr.Debug("[watchdog.execute] Under endpoint maintenance window")
|
||||
inEndpointMaintenanceWindow = true
|
||||
}
|
||||
}
|
||||
if !maintenanceConfig.IsUnderMaintenance() && !inEndpointMaintenanceWindow {
|
||||
// TODO: Consider moving this after the monitoring lock is unlocked? I mean, how much noise can a single alerting provider cause...
|
||||
HandleAlerting(ep, result, alertingConfig)
|
||||
} else {
|
||||
|
Loading…
x
Reference in New Issue
Block a user