fix(logging): Replace log-level parameter by GATUS_LOG_LEVEL env var (#895)

* fix(logging): Replace log-level parameter by GATUS_LOG_LEVEL env var

* Improve log message if GATUS_LOG_LEVEL isn't set
This commit is contained in:
TwiN
2024-11-13 23:54:00 -05:00
committed by GitHub
parent 8060a77b1f
commit 01131755bc
29 changed files with 239 additions and 256 deletions

View File

@ -4,7 +4,6 @@ import (
"database/sql"
"errors"
"fmt"
"log"
"strconv"
"strings"
"time"
@ -14,6 +13,7 @@ import (
"github.com/TwiN/gatus/v5/storage/store/common"
"github.com/TwiN/gatus/v5/storage/store/common/paging"
"github.com/TwiN/gocache/v2"
"github.com/TwiN/logr"
_ "github.com/lib/pq"
_ "modernc.org/sqlite"
)
@ -237,12 +237,12 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
// Endpoint doesn't exist in the database, insert it
if endpointID, err = s.insertEndpoint(tx, ep); err != nil {
_ = tx.Rollback()
log.Printf("[sql.Insert] Failed to create endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to create endpoint with key=%s: %s", ep.Key(), err.Error())
return err
}
} else {
_ = tx.Rollback()
log.Printf("[sql.Insert] Failed to retrieve id of endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to retrieve id of endpoint with key=%s: %s", ep.Key(), err.Error())
return err
}
}
@ -253,12 +253,12 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
// of type EventStart, in which case we will have to create a new event of type EventHealthy or EventUnhealthy
// based on result.Success.
// 2. The lastResult.Success != result.Success. This implies that the endpoint went from healthy to unhealthy or
// vice-versa, in which case we will have to create a new event of type EventHealthy or EventUnhealthy
// vice versa, in which case we will have to create a new event of type EventHealthy or EventUnhealthy
// based on result.Success.
numberOfEvents, err := s.getNumberOfEventsByEndpointID(tx, endpointID)
if err != nil {
// Silently fail
log.Printf("[sql.Insert] Failed to retrieve total number of events for endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to retrieve total number of events for endpoint with key=%s: %s", ep.Key(), err.Error())
}
if numberOfEvents == 0 {
// There's no events yet, which means we need to add the EventStart and the first healthy/unhealthy event
@ -268,18 +268,18 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
})
if err != nil {
// Silently fail
log.Printf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", endpoint.EventStart, ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", endpoint.EventStart, ep.Key(), err.Error())
}
event := endpoint.NewEventFromResult(result)
if err = s.insertEndpointEvent(tx, endpointID, event); err != nil {
// Silently fail
log.Printf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", event.Type, ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", event.Type, ep.Key(), err.Error())
}
} else {
// Get the success value of the previous result
var lastResultSuccess bool
if lastResultSuccess, err = s.getLastEndpointResultSuccessValue(tx, endpointID); err != nil {
log.Printf("[sql.Insert] Failed to retrieve outcome of previous result for endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to retrieve outcome of previous result for endpoint with key=%s: %s", ep.Key(), err.Error())
} else {
// If we managed to retrieve the outcome of the previous result, we'll compare it with the new result.
// If the final outcome (success or failure) of the previous and the new result aren't the same, it means
@ -289,7 +289,7 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
event := endpoint.NewEventFromResult(result)
if err = s.insertEndpointEvent(tx, endpointID, event); err != nil {
// Silently fail
log.Printf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", event.Type, ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to insert event=%s for endpoint with key=%s: %s", event.Type, ep.Key(), err.Error())
}
}
}
@ -298,42 +298,42 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
// (since we're only deleting MaximumNumberOfEvents at a time instead of 1)
if numberOfEvents > eventsCleanUpThreshold {
if err = s.deleteOldEndpointEvents(tx, endpointID); err != nil {
log.Printf("[sql.Insert] Failed to delete old events for endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to delete old events for endpoint with key=%s: %s", ep.Key(), err.Error())
}
}
}
// Second, we need to insert the result.
if err = s.insertEndpointResult(tx, endpointID, result); err != nil {
log.Printf("[sql.Insert] Failed to insert result for endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to insert result for endpoint with key=%s: %s", ep.Key(), err.Error())
_ = tx.Rollback() // If we can't insert the result, we'll rollback now since there's no point continuing
return err
}
// Clean up old results
numberOfResults, err := s.getNumberOfResultsByEndpointID(tx, endpointID)
if err != nil {
log.Printf("[sql.Insert] Failed to retrieve total number of results for endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to retrieve total number of results for endpoint with key=%s: %s", ep.Key(), err.Error())
} else {
if numberOfResults > resultsCleanUpThreshold {
if err = s.deleteOldEndpointResults(tx, endpointID); err != nil {
log.Printf("[sql.Insert] Failed to delete old results for endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to delete old results for endpoint with key=%s: %s", ep.Key(), err.Error())
}
}
}
// Finally, we need to insert the uptime data.
// Because the uptime data significantly outlives the results, we can't rely on the results for determining the uptime
if err = s.updateEndpointUptime(tx, endpointID, result); err != nil {
log.Printf("[sql.Insert] Failed to update uptime for endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to update uptime for endpoint with key=%s: %s", ep.Key(), err.Error())
}
// Merge hourly uptime entries that can be merged into daily entries and clean up old uptime entries
numberOfUptimeEntries, err := s.getNumberOfUptimeEntriesByEndpointID(tx, endpointID)
if err != nil {
log.Printf("[sql.Insert] Failed to retrieve total number of uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to retrieve total number of uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error())
} else {
// Merge older hourly uptime entries into daily uptime entries if we have more than uptimeTotalEntriesMergeThreshold
if numberOfUptimeEntries >= uptimeTotalEntriesMergeThreshold {
log.Printf("[sql.Insert] Merging hourly uptime entries for endpoint with key=%s; This is a lot of work, it shouldn't happen too often", ep.Key())
logr.Infof("[sql.Insert] Merging hourly uptime entries for endpoint with key=%s; This is a lot of work, it shouldn't happen too often", ep.Key())
if err = s.mergeHourlyUptimeEntriesOlderThanMergeThresholdIntoDailyUptimeEntries(tx, endpointID); err != nil {
log.Printf("[sql.Insert] Failed to merge hourly uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to merge hourly uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error())
}
}
}
@ -342,11 +342,11 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
// but if Gatus was temporarily shut down, we might have some old entries that need to be cleaned up
ageOfOldestUptimeEntry, err := s.getAgeOfOldestEndpointUptimeEntry(tx, endpointID)
if err != nil {
log.Printf("[sql.Insert] Failed to retrieve oldest endpoint uptime entry for endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to retrieve oldest endpoint uptime entry for endpoint with key=%s: %s", ep.Key(), err.Error())
} else {
if ageOfOldestUptimeEntry > uptimeAgeCleanUpThreshold {
if err = s.deleteOldUptimeEntries(tx, endpointID, time.Now().Add(-(uptimeRetention + time.Hour))); err != nil {
log.Printf("[sql.Insert] Failed to delete old uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.Insert] Failed to delete old uptime entries for endpoint with key=%s: %s", ep.Key(), err.Error())
}
}
}
@ -356,7 +356,7 @@ func (s *Store) Insert(ep *endpoint.Endpoint, result *endpoint.Result) error {
s.writeThroughCache.Delete(cacheKey)
endpointKey, params, err := extractKeyAndParamsFromCacheKey(cacheKey)
if err != nil {
log.Printf("[sql.Insert] Silently deleting cache key %s instead of refreshing due to error: %s", cacheKey, err.Error())
logr.Errorf("[sql.Insert] Silently deleting cache key %s instead of refreshing due to error: %s", cacheKey, err.Error())
continue
}
// Retrieve the endpoint status by key, which will in turn refresh the cache
@ -387,7 +387,7 @@ func (s *Store) DeleteAllEndpointStatusesNotInKeys(keys []string) int {
result, err = s.db.Exec(query, args...)
}
if err != nil {
log.Printf("[sql.DeleteAllEndpointStatusesNotInKeys] Failed to delete rows that do not belong to any of keys=%v: %s", keys, err.Error())
logr.Errorf("[sql.DeleteAllEndpointStatusesNotInKeys] Failed to delete rows that do not belong to any of keys=%v: %s", keys, err.Error())
return 0
}
if s.writeThroughCache != nil {
@ -403,7 +403,7 @@ func (s *Store) DeleteAllEndpointStatusesNotInKeys(keys []string) int {
// GetTriggeredEndpointAlert returns whether the triggered alert for the specified endpoint as well as the necessary information to resolve it
func (s *Store) GetTriggeredEndpointAlert(ep *endpoint.Endpoint, alert *alert.Alert) (exists bool, resolveKey string, numberOfSuccessesInARow int, err error) {
//log.Printf("[sql.GetTriggeredEndpointAlert] Getting triggered alert with checksum=%s for endpoint with key=%s", alert.Checksum(), ep.Key())
//logr.Debugf("[sql.GetTriggeredEndpointAlert] Getting triggered alert with checksum=%s for endpoint with key=%s", alert.Checksum(), ep.Key())
err = s.db.QueryRow(
"SELECT resolve_key, number_of_successes_in_a_row FROM endpoint_alerts_triggered WHERE endpoint_id = (SELECT endpoint_id FROM endpoints WHERE endpoint_key = $1 LIMIT 1) AND configuration_checksum = $2",
ep.Key(),
@ -421,7 +421,7 @@ func (s *Store) GetTriggeredEndpointAlert(ep *endpoint.Endpoint, alert *alert.Al
// UpsertTriggeredEndpointAlert inserts/updates a triggered alert for an endpoint
// Used for persistence of triggered alerts across application restarts
func (s *Store) UpsertTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAlert *alert.Alert) error {
//log.Printf("[sql.UpsertTriggeredEndpointAlert] Upserting triggered alert with checksum=%s for endpoint with key=%s", triggeredAlert.Checksum(), ep.Key())
//logr.Debugf("[sql.UpsertTriggeredEndpointAlert] Upserting triggered alert with checksum=%s for endpoint with key=%s", triggeredAlert.Checksum(), ep.Key())
tx, err := s.db.Begin()
if err != nil {
return err
@ -433,12 +433,12 @@ func (s *Store) UpsertTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAle
// This shouldn't happen, but we'll handle it anyway
if endpointID, err = s.insertEndpoint(tx, ep); err != nil {
_ = tx.Rollback()
log.Printf("[sql.UpsertTriggeredEndpointAlert] Failed to create endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.UpsertTriggeredEndpointAlert] Failed to create endpoint with key=%s: %s", ep.Key(), err.Error())
return err
}
} else {
_ = tx.Rollback()
log.Printf("[sql.UpsertTriggeredEndpointAlert] Failed to retrieve id of endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.UpsertTriggeredEndpointAlert] Failed to retrieve id of endpoint with key=%s: %s", ep.Key(), err.Error())
return err
}
}
@ -457,7 +457,7 @@ func (s *Store) UpsertTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAle
)
if err != nil {
_ = tx.Rollback()
log.Printf("[sql.UpsertTriggeredEndpointAlert] Failed to persist triggered alert for endpoint with key=%s: %s", ep.Key(), err.Error())
logr.Errorf("[sql.UpsertTriggeredEndpointAlert] Failed to persist triggered alert for endpoint with key=%s: %s", ep.Key(), err.Error())
return err
}
if err = tx.Commit(); err != nil {
@ -468,7 +468,7 @@ func (s *Store) UpsertTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAle
// DeleteTriggeredEndpointAlert deletes a triggered alert for an endpoint
func (s *Store) DeleteTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAlert *alert.Alert) error {
//log.Printf("[sql.DeleteTriggeredEndpointAlert] Deleting triggered alert with checksum=%s for endpoint with key=%s", triggeredAlert.Checksum(), ep.Key())
//logr.Debugf("[sql.DeleteTriggeredEndpointAlert] Deleting triggered alert with checksum=%s for endpoint with key=%s", triggeredAlert.Checksum(), ep.Key())
_, err := s.db.Exec("DELETE FROM endpoint_alerts_triggered WHERE configuration_checksum = $1 AND endpoint_id = (SELECT endpoint_id FROM endpoints WHERE endpoint_key = $2 LIMIT 1)", triggeredAlert.Checksum(), ep.Key())
return err
}
@ -477,7 +477,7 @@ func (s *Store) DeleteTriggeredEndpointAlert(ep *endpoint.Endpoint, triggeredAle
// configurations are not provided in the checksums list.
// This prevents triggered alerts that have been removed or modified from lingering in the database.
func (s *Store) DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(ep *endpoint.Endpoint, checksums []string) int {
//log.Printf("[sql.DeleteAllTriggeredAlertsNotInChecksumsByEndpoint] Deleting triggered alerts for endpoint with key=%s that do not belong to any of checksums=%v", ep.Key(), checksums)
//logr.Debugf("[sql.DeleteAllTriggeredAlertsNotInChecksumsByEndpoint] Deleting triggered alerts for endpoint with key=%s that do not belong to any of checksums=%v", ep.Key(), checksums)
var err error
var result sql.Result
if len(checksums) == 0 {
@ -498,7 +498,7 @@ func (s *Store) DeleteAllTriggeredAlertsNotInChecksumsByEndpoint(ep *endpoint.En
result, err = s.db.Exec(query, args...)
}
if err != nil {
log.Printf("[sql.DeleteAllTriggeredAlertsNotInChecksumsByEndpoint] Failed to delete rows for endpoint with key=%s that do not belong to any of checksums=%v: %s", ep.Key(), checksums, err.Error())
logr.Errorf("[sql.DeleteAllTriggeredAlertsNotInChecksumsByEndpoint] Failed to delete rows for endpoint with key=%s that do not belong to any of checksums=%v: %s", ep.Key(), checksums, err.Error())
return 0
}
// Return number of rows deleted
@ -530,7 +530,7 @@ func (s *Store) Close() {
// insertEndpoint inserts an endpoint in the store and returns the generated id of said endpoint
func (s *Store) insertEndpoint(tx *sql.Tx, ep *endpoint.Endpoint) (int64, error) {
//log.Printf("[sql.insertEndpoint] Inserting endpoint with group=%s and name=%s", ep.Group, ep.Name)
//logr.Debugf("[sql.insertEndpoint] Inserting endpoint with group=%s and name=%s", ep.Group, ep.Name)
var id int64
err := tx.QueryRow(
"INSERT INTO endpoints (endpoint_key, endpoint_name, endpoint_group) VALUES ($1, $2, $3) RETURNING endpoint_id",
@ -655,12 +655,12 @@ func (s *Store) getEndpointStatusByKey(tx *sql.Tx, key string, parameters *pagin
endpointStatus := endpoint.NewStatus(group, endpointName)
if parameters.EventsPageSize > 0 {
if endpointStatus.Events, err = s.getEndpointEventsByEndpointID(tx, endpointID, parameters.EventsPage, parameters.EventsPageSize); err != nil {
log.Printf("[sql.getEndpointStatusByKey] Failed to retrieve events for key=%s: %s", key, err.Error())
logr.Errorf("[sql.getEndpointStatusByKey] Failed to retrieve events for key=%s: %s", key, err.Error())
}
}
if parameters.ResultsPageSize > 0 {
if endpointStatus.Results, err = s.getEndpointResultsByEndpointID(tx, endpointID, parameters.ResultsPage, parameters.ResultsPageSize); err != nil {
log.Printf("[sql.getEndpointStatusByKey] Failed to retrieve results for key=%s: %s", key, err.Error())
logr.Errorf("[sql.getEndpointStatusByKey] Failed to retrieve results for key=%s: %s", key, err.Error())
}
}
if s.writeThroughCache != nil {
@ -735,7 +735,7 @@ func (s *Store) getEndpointResultsByEndpointID(tx *sql.Tx, endpointID int64, pag
var joinedErrors string
err = rows.Scan(&id, &result.Success, &joinedErrors, &result.Connected, &result.HTTPStatus, &result.DNSRCode, &result.CertificateExpiration, &result.DomainExpiration, &result.Hostname, &result.IP, &result.Duration, &result.Timestamp)
if err != nil {
log.Printf("[sql.getEndpointResultsByEndpointID] Silently failed to retrieve endpoint result for endpointID=%d: %s", endpointID, err.Error())
logr.Errorf("[sql.getEndpointResultsByEndpointID] Silently failed to retrieve endpoint result for endpointID=%d: %s", endpointID, err.Error())
err = nil
}
if len(joinedErrors) != 0 {