diff --git a/README.md b/README.md index 6964e98f..4f300543 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ You can specify a custom path by setting the `GATUS_CONFIG_FILE` environment var metrics: true # Whether to expose metrics at /metrics services: - name: twinnation # Name of your service, can be anything - url: https://twinnation.org/actuator/health + url: https://twinnation.org/health interval: 15s # Duration to wait between every status check (opt. default: 10s) conditions: - "[STATUS] == 200" diff --git a/config.yaml b/config.yaml index 2a18b403..a84f258d 100644 --- a/config.yaml +++ b/config.yaml @@ -1,7 +1,7 @@ metrics: true services: - name: Twinnation - url: https://twinnation.org/actuator/health + url: https://twinnation.org/health interval: 30s conditions: - "[STATUS] == 200" diff --git a/core/types.go b/core/types.go index 1940b450..b5c39468 100644 --- a/core/types.go +++ b/core/types.go @@ -76,7 +76,7 @@ func (service *Service) EvaluateConditions() *Result { result.Success = false } for _, condition := range service.Conditions { - success := condition.Evaluate(result) + success := condition.evaluate(result) if !success { result.Success = false } @@ -93,7 +93,7 @@ type ConditionResult struct { type Condition string -func (c *Condition) Evaluate(result *Result) bool { +func (c *Condition) evaluate(result *Result) bool { condition := string(*c) if strings.Contains(condition, "==") { parts := sanitizeAndResolve(strings.Split(condition, "=="), result) diff --git a/core/types_test.go b/core/types_test.go index 9ad1f61b..4a55dde2 100644 --- a/core/types_test.go +++ b/core/types_test.go @@ -7,7 +7,7 @@ import ( func TestEvaluateWithIp(t *testing.T) { condition := Condition("[IP] == 127.0.0.1") result := &Result{Ip: "127.0.0.1"} - condition.Evaluate(result) + condition.evaluate(result) if !result.ConditionResults[0].Success { t.Errorf("Condition '%s' should have been a success", condition) } @@ -16,7 +16,7 @@ func TestEvaluateWithIp(t *testing.T) { func TestEvaluateWithStatus(t *testing.T) { condition := Condition("[STATUS] == 201") result := &Result{HttpStatus: 201} - condition.Evaluate(result) + condition.evaluate(result) if !result.ConditionResults[0].Success { t.Errorf("Condition '%s' should have been a success", condition) } @@ -25,7 +25,7 @@ func TestEvaluateWithStatus(t *testing.T) { func TestEvaluateWithFailure(t *testing.T) { condition := Condition("[STATUS] == 200") result := &Result{HttpStatus: 500} - condition.Evaluate(result) + condition.evaluate(result) if result.ConditionResults[0].Success { t.Errorf("Condition '%s' should have been a failure", condition) } diff --git a/example/docker-compose-grafana-prometheus/config.yaml b/example/docker-compose-grafana-prometheus/config.yaml index 4f43281b..dda12429 100644 --- a/example/docker-compose-grafana-prometheus/config.yaml +++ b/example/docker-compose-grafana-prometheus/config.yaml @@ -1,7 +1,7 @@ metrics: true services: - name: TwiNNatioN - url: https://twinnation.org/actuator/health + url: https://twinnation.org/health interval: 10s conditions: - "[STATUS] == 200" diff --git a/example/kubernetes/gatus.yaml b/example/kubernetes/gatus.yaml index ff8cbc66..3c2534fd 100644 --- a/example/kubernetes/gatus.yaml +++ b/example/kubernetes/gatus.yaml @@ -4,7 +4,7 @@ data: metrics: true services: - name: TwiNNatioN - url: https://twinnation.org/actuator/health + url: https://twinnation.org/health interval: 1m conditions: - "[STATUS] == 200" diff --git a/watchdog/watchdog.go b/watchdog/watchdog.go index c3a58e5b..1374c2e3 100644 --- a/watchdog/watchdog.go +++ b/watchdog/watchdog.go @@ -20,28 +20,32 @@ func GetServiceResults() *map[string][]*core.Result { func Monitor(cfg *config.Config) { for _, service := range cfg.Services { - go func(service *core.Service) { - for { - log.Printf("[watchdog][Monitor] Monitoring serviceName=%s", service.Name) - result := service.EvaluateConditions() - metric.PublishMetricsForService(service, result) - rwLock.Lock() - serviceResults[service.Name] = append(serviceResults[service.Name], result) - if len(serviceResults[service.Name]) > 20 { - serviceResults[service.Name] = serviceResults[service.Name][1:] - } - rwLock.Unlock() - log.Printf( - "[watchdog][Monitor] Finished monitoring serviceName=%s; errors=%d; requestDuration=%s", - service.Name, - len(result.Errors), - result.Duration.Round(time.Millisecond), - ) - log.Printf("[watchdog][Monitor] Waiting interval=%s before monitoring serviceName=%s", service.Interval, service.Name) - time.Sleep(service.Interval) - } - }(service) - // To prevent multiple requests from running exactly at the same time - time.Sleep(100 * time.Millisecond) + go monitor(service) + // To prevent multiple requests from running at the same time + time.Sleep(500 * time.Millisecond) + } +} + +func monitor(service *core.Service) { + for { + // By placing the lock here, we prevent multiple services from being monitored at the exact same time, which + // could cause performance issues and return inaccurate results + rwLock.Lock() + log.Printf("[watchdog][Monitor] Monitoring serviceName=%s", service.Name) + result := service.EvaluateConditions() + metric.PublishMetricsForService(service, result) + serviceResults[service.Name] = append(serviceResults[service.Name], result) + if len(serviceResults[service.Name]) > 20 { + serviceResults[service.Name] = serviceResults[service.Name][1:] + } + rwLock.Unlock() + log.Printf( + "[watchdog][Monitor] Finished monitoring serviceName=%s; errors=%d; requestDuration=%s", + service.Name, + len(result.Errors), + result.Duration.Round(time.Millisecond), + ) + log.Printf("[watchdog][Monitor] Waiting interval=%s before monitoring serviceName=%s", service.Interval, service.Name) + time.Sleep(service.Interval) } }