Unify uptime hourly metrics under Uptime.HourlyStatistics and add metric for response time
This commit is contained in:
parent
347297a8ea
commit
e91462ce41
@ -1,6 +1,7 @@
|
|||||||
package core
|
package core
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"log"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -10,6 +11,7 @@ const (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// Uptime is the struct that contains the relevant data for calculating the uptime as well as the uptime itself
|
// Uptime is the struct that contains the relevant data for calculating the uptime as well as the uptime itself
|
||||||
|
// and some other statistics
|
||||||
type Uptime struct {
|
type Uptime struct {
|
||||||
// LastSevenDays is the uptime percentage over the past 7 days
|
// LastSevenDays is the uptime percentage over the past 7 days
|
||||||
LastSevenDays float64 `json:"7d"`
|
LastSevenDays float64 `json:"7d"`
|
||||||
@ -22,43 +24,62 @@ type Uptime struct {
|
|||||||
|
|
||||||
// SuccessfulExecutionsPerHour is a map containing the number of successes (value)
|
// SuccessfulExecutionsPerHour is a map containing the number of successes (value)
|
||||||
// for every hourly unix timestamps (key)
|
// for every hourly unix timestamps (key)
|
||||||
|
// Deprecated
|
||||||
SuccessfulExecutionsPerHour map[int64]uint64 `json:"-"`
|
SuccessfulExecutionsPerHour map[int64]uint64 `json:"-"`
|
||||||
|
|
||||||
// TotalExecutionsPerHour is a map containing the total number of checks (value)
|
// TotalExecutionsPerHour is a map containing the total number of checks (value)
|
||||||
// for every hourly unix timestamps (key)
|
// for every hourly unix timestamps (key)
|
||||||
|
// Deprecated
|
||||||
TotalExecutionsPerHour map[int64]uint64 `json:"-"`
|
TotalExecutionsPerHour map[int64]uint64 `json:"-"`
|
||||||
|
|
||||||
|
// HourlyStatistics is a map containing metrics collected (value) for every hourly unix timestamps (key)
|
||||||
|
HourlyStatistics map[int64]*HourlyUptimeStatistics `json:"-"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// HourlyUptimeStatistics is a struct containing all metrics collected over the course of an hour
|
||||||
|
type HourlyUptimeStatistics struct {
|
||||||
|
TotalExecutions uint64 // Total number of checks
|
||||||
|
SuccessfulExecutions uint64 // Number of successful executions
|
||||||
|
TotalExecutionsResponseTime uint64 // Total response time for all executions
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewUptime creates a new Uptime
|
// NewUptime creates a new Uptime
|
||||||
func NewUptime() *Uptime {
|
func NewUptime() *Uptime {
|
||||||
return &Uptime{
|
return &Uptime{
|
||||||
SuccessfulExecutionsPerHour: make(map[int64]uint64),
|
HourlyStatistics: make(map[int64]*HourlyUptimeStatistics),
|
||||||
TotalExecutionsPerHour: make(map[int64]uint64),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ProcessResult processes the result by extracting the relevant from the result and recalculating the uptime
|
// ProcessResult processes the result by extracting the relevant from the result and recalculating the uptime
|
||||||
// if necessary
|
// if necessary
|
||||||
func (uptime *Uptime) ProcessResult(result *Result) {
|
func (uptime *Uptime) ProcessResult(result *Result) {
|
||||||
if uptime.SuccessfulExecutionsPerHour == nil || uptime.TotalExecutionsPerHour == nil {
|
// XXX: Remove this on v3.0.0
|
||||||
uptime.SuccessfulExecutionsPerHour = make(map[int64]uint64)
|
if len(uptime.SuccessfulExecutionsPerHour) != 0 || len(uptime.TotalExecutionsPerHour) != 0 {
|
||||||
uptime.TotalExecutionsPerHour = make(map[int64]uint64)
|
uptime.migrateToHourlyStatistics()
|
||||||
|
}
|
||||||
|
if uptime.HourlyStatistics == nil {
|
||||||
|
uptime.HourlyStatistics = make(map[int64]*HourlyUptimeStatistics)
|
||||||
}
|
}
|
||||||
unixTimestampFlooredAtHour := result.Timestamp.Unix() - (result.Timestamp.Unix() % 3600)
|
unixTimestampFlooredAtHour := result.Timestamp.Unix() - (result.Timestamp.Unix() % 3600)
|
||||||
if result.Success {
|
hourlyStats, _ := uptime.HourlyStatistics[unixTimestampFlooredAtHour]
|
||||||
uptime.SuccessfulExecutionsPerHour[unixTimestampFlooredAtHour]++
|
if hourlyStats == nil {
|
||||||
|
hourlyStats = &HourlyUptimeStatistics{}
|
||||||
|
uptime.HourlyStatistics[unixTimestampFlooredAtHour] = hourlyStats
|
||||||
}
|
}
|
||||||
uptime.TotalExecutionsPerHour[unixTimestampFlooredAtHour]++
|
if result.Success {
|
||||||
|
hourlyStats.SuccessfulExecutions++
|
||||||
|
}
|
||||||
|
hourlyStats.TotalExecutions++
|
||||||
|
hourlyStats.TotalExecutionsResponseTime += uint64(result.Duration.Milliseconds())
|
||||||
// Clean up only when we're starting to have too many useless keys
|
// Clean up only when we're starting to have too many useless keys
|
||||||
// Note that this is only triggered when there are more entries than there should be after
|
// Note that this is only triggered when there are more entries than there should be after
|
||||||
// 10 days, despite the fact that we are deleting everything that's older than 7 days.
|
// 10 days, despite the fact that we are deleting everything that's older than 7 days.
|
||||||
// This is to prevent re-iterating on every `ProcessResult` as soon as the uptime has been logged for 7 days.
|
// This is to prevent re-iterating on every `ProcessResult` as soon as the uptime has been logged for 7 days.
|
||||||
if len(uptime.TotalExecutionsPerHour) > numberOfHoursInTenDays {
|
if len(uptime.HourlyStatistics) > numberOfHoursInTenDays {
|
||||||
sevenDaysAgo := time.Now().Add(-(sevenDays + time.Hour)).Unix()
|
sevenDaysAgo := time.Now().Add(-(sevenDays + time.Hour)).Unix()
|
||||||
for hourlyUnixTimestamp := range uptime.TotalExecutionsPerHour {
|
for hourlyUnixTimestamp := range uptime.HourlyStatistics {
|
||||||
if sevenDaysAgo > hourlyUnixTimestamp {
|
if sevenDaysAgo > hourlyUnixTimestamp {
|
||||||
delete(uptime.TotalExecutionsPerHour, hourlyUnixTimestamp)
|
delete(uptime.HourlyStatistics, hourlyUnixTimestamp)
|
||||||
delete(uptime.SuccessfulExecutionsPerHour, hourlyUnixTimestamp)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -77,6 +98,16 @@ func (uptime *Uptime) ProcessResult(result *Result) {
|
|||||||
uptime.recalculate()
|
uptime.recalculate()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// cute print
|
||||||
|
//b, _ := json.MarshalIndent(uptime.TotalExecutionsPerHour, "", " ")
|
||||||
|
//fmt.Println("TotalExecutionsPerHour:", string(b))
|
||||||
|
//b, _ = json.MarshalIndent(uptime.SuccessfulExecutionsPerHour, "", " ")
|
||||||
|
//fmt.Println("SuccessfulExecutionsPerHour:", string(b))
|
||||||
|
//b, _ = json.MarshalIndent(uptime.TotalRequestResponseTimePerHour, "", " ")
|
||||||
|
//fmt.Println("TotalRequestResponseTimePerHour:", string(b))
|
||||||
|
//for unixTimestamp, executions := range uptime.TotalExecutionsPerHour {
|
||||||
|
// fmt.Printf("average for %d was %d\n", unixTimestamp, uptime.TotalRequestResponseTimePerHour[unixTimestamp]/executions)
|
||||||
|
//}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (uptime *Uptime) recalculate() {
|
func (uptime *Uptime) recalculate() {
|
||||||
@ -86,17 +117,20 @@ func (uptime *Uptime) recalculate() {
|
|||||||
timestamp := now.Add(-sevenDays)
|
timestamp := now.Add(-sevenDays)
|
||||||
for now.Sub(timestamp) >= 0 {
|
for now.Sub(timestamp) >= 0 {
|
||||||
hourlyUnixTimestamp := timestamp.Unix() - (timestamp.Unix() % 3600)
|
hourlyUnixTimestamp := timestamp.Unix() - (timestamp.Unix() % 3600)
|
||||||
successCountForTimestamp := uptime.SuccessfulExecutionsPerHour[hourlyUnixTimestamp]
|
hourlyStats := uptime.HourlyStatistics[hourlyUnixTimestamp]
|
||||||
totalCountForTimestamp := uptime.TotalExecutionsPerHour[hourlyUnixTimestamp]
|
if hourlyStats == nil || hourlyStats.TotalExecutions == 0 {
|
||||||
uptimeBrackets["7d_success"] += successCountForTimestamp
|
timestamp = timestamp.Add(time.Hour)
|
||||||
uptimeBrackets["7d_total"] += totalCountForTimestamp
|
continue
|
||||||
|
}
|
||||||
|
uptimeBrackets["7d_success"] += hourlyStats.SuccessfulExecutions
|
||||||
|
uptimeBrackets["7d_total"] += hourlyStats.TotalExecutions
|
||||||
if now.Sub(timestamp) <= 24*time.Hour {
|
if now.Sub(timestamp) <= 24*time.Hour {
|
||||||
uptimeBrackets["24h_success"] += successCountForTimestamp
|
uptimeBrackets["24h_success"] += hourlyStats.SuccessfulExecutions
|
||||||
uptimeBrackets["24h_total"] += totalCountForTimestamp
|
uptimeBrackets["24h_total"] += hourlyStats.TotalExecutions
|
||||||
}
|
}
|
||||||
if now.Sub(timestamp) <= time.Hour {
|
if now.Sub(timestamp) <= time.Hour {
|
||||||
uptimeBrackets["1h_success"] += successCountForTimestamp
|
uptimeBrackets["1h_success"] += hourlyStats.SuccessfulExecutions
|
||||||
uptimeBrackets["1h_total"] += totalCountForTimestamp
|
uptimeBrackets["1h_total"] += hourlyStats.TotalExecutions
|
||||||
}
|
}
|
||||||
timestamp = timestamp.Add(time.Hour)
|
timestamp = timestamp.Add(time.Hour)
|
||||||
}
|
}
|
||||||
@ -110,3 +144,22 @@ func (uptime *Uptime) recalculate() {
|
|||||||
uptime.LastHour = float64(uptimeBrackets["1h_success"]) / float64(uptimeBrackets["1h_total"])
|
uptime.LastHour = float64(uptimeBrackets["1h_success"]) / float64(uptimeBrackets["1h_total"])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (uptime *Uptime) migrateToHourlyStatistics() {
|
||||||
|
log.Println("[migrateToHourlyStatistics] Got", len(uptime.SuccessfulExecutionsPerHour), "entries for successful executions and", len(uptime.TotalExecutionsPerHour), "entries for total executions")
|
||||||
|
uptime.HourlyStatistics = make(map[int64]*HourlyUptimeStatistics)
|
||||||
|
for hourlyUnixTimestamp, totalExecutions := range uptime.TotalExecutionsPerHour {
|
||||||
|
if totalExecutions == 0 {
|
||||||
|
log.Println("[migrateToHourlyStatistics] Skipping entry at", hourlyUnixTimestamp, "because total number of executions is 0")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
uptime.HourlyStatistics[hourlyUnixTimestamp] = &HourlyUptimeStatistics{
|
||||||
|
TotalExecutions: totalExecutions,
|
||||||
|
SuccessfulExecutions: uptime.SuccessfulExecutionsPerHour[hourlyUnixTimestamp],
|
||||||
|
TotalExecutionsResponseTime: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.Println("[migrateToHourlyStatistics] Migrated", len(uptime.HourlyStatistics), "entries")
|
||||||
|
uptime.SuccessfulExecutionsPerHour = nil
|
||||||
|
uptime.TotalExecutionsPerHour = nil
|
||||||
|
}
|
||||||
|
24
core/uptime_bench_test.go
Normal file
24
core/uptime_bench_test.go
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
package core
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func BenchmarkUptime_ProcessResult(b *testing.B) {
|
||||||
|
uptime := NewUptime()
|
||||||
|
now := time.Now()
|
||||||
|
now = time.Date(now.Year(), now.Month(), now.Day(), now.Hour(), 0, 0, 0, now.Location())
|
||||||
|
// Start 12000 days ago
|
||||||
|
timestamp := now.Add(-12000 * 24 * time.Hour)
|
||||||
|
for n := 0; n < b.N; n++ {
|
||||||
|
uptime.ProcessResult(&Result{
|
||||||
|
Duration: 18 * time.Millisecond,
|
||||||
|
Success: n%15 == 0,
|
||||||
|
Timestamp: timestamp,
|
||||||
|
})
|
||||||
|
// Simulate service with an interval of 3 minutes
|
||||||
|
timestamp = timestamp.Add(3 * time.Minute)
|
||||||
|
}
|
||||||
|
b.ReportAllocs()
|
||||||
|
}
|
@ -12,33 +12,39 @@ func TestUptime_ProcessResult(t *testing.T) {
|
|||||||
|
|
||||||
checkUptimes(t, serviceStatus, 0.00, 0.00, 0.00)
|
checkUptimes(t, serviceStatus, 0.00, 0.00, 0.00)
|
||||||
|
|
||||||
uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-7 * 24 * time.Hour), Success: true})
|
now := time.Now()
|
||||||
|
now = time.Date(now.Year(), now.Month(), now.Day(), now.Hour(), 0, 0, 0, now.Location())
|
||||||
|
uptime.ProcessResult(&Result{Timestamp: now.Add(-7 * 24 * time.Hour), Success: true})
|
||||||
checkUptimes(t, serviceStatus, 1.00, 0.00, 0.00)
|
checkUptimes(t, serviceStatus, 1.00, 0.00, 0.00)
|
||||||
|
|
||||||
uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-6 * 24 * time.Hour), Success: false})
|
uptime.ProcessResult(&Result{Timestamp: now.Add(-6 * 24 * time.Hour), Success: false})
|
||||||
checkUptimes(t, serviceStatus, 0.50, 0.00, 0.00)
|
checkUptimes(t, serviceStatus, 0.50, 0.00, 0.00)
|
||||||
|
|
||||||
uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-8 * 24 * time.Hour), Success: true})
|
uptime.ProcessResult(&Result{Timestamp: now.Add(-8 * 24 * time.Hour), Success: true})
|
||||||
checkUptimes(t, serviceStatus, 0.50, 0.00, 0.00)
|
checkUptimes(t, serviceStatus, 0.50, 0.00, 0.00)
|
||||||
|
|
||||||
uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-24 * time.Hour), Success: true})
|
uptime.ProcessResult(&Result{Timestamp: now.Add(-24 * time.Hour), Success: true})
|
||||||
uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-12 * time.Hour), Success: true})
|
uptime.ProcessResult(&Result{Timestamp: now.Add(-12 * time.Hour), Success: true})
|
||||||
checkUptimes(t, serviceStatus, 0.75, 1.00, 0.00)
|
checkUptimes(t, serviceStatus, 0.75, 1.00, 0.00)
|
||||||
|
|
||||||
uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-1 * time.Hour), Success: true})
|
uptime.ProcessResult(&Result{Timestamp: now.Add(-1 * time.Hour), Success: true, Duration: 10 * time.Millisecond})
|
||||||
uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-30 * time.Minute), Success: false})
|
checkHourlyStatistics(t, uptime.HourlyStatistics[now.Unix()-now.Unix()%3600-3600], 10, 1, 1)
|
||||||
uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-15 * time.Minute), Success: false})
|
uptime.ProcessResult(&Result{Timestamp: now.Add(-30 * time.Minute), Success: false, Duration: 500 * time.Millisecond})
|
||||||
uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-10 * time.Minute), Success: false})
|
checkHourlyStatistics(t, uptime.HourlyStatistics[now.Unix()-now.Unix()%3600-3600], 510, 2, 1)
|
||||||
|
uptime.ProcessResult(&Result{Timestamp: now.Add(-15 * time.Minute), Success: false, Duration: 25 * time.Millisecond})
|
||||||
|
checkHourlyStatistics(t, uptime.HourlyStatistics[now.Unix()-now.Unix()%3600-3600], 535, 3, 1)
|
||||||
|
|
||||||
|
uptime.ProcessResult(&Result{Timestamp: now.Add(-10 * time.Minute), Success: false})
|
||||||
checkUptimes(t, serviceStatus, 0.50, 0.50, 0.25)
|
checkUptimes(t, serviceStatus, 0.50, 0.50, 0.25)
|
||||||
|
|
||||||
uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-120 * time.Hour), Success: true})
|
uptime.ProcessResult(&Result{Timestamp: now.Add(-120 * time.Hour), Success: true})
|
||||||
uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-119 * time.Hour), Success: true})
|
uptime.ProcessResult(&Result{Timestamp: now.Add(-119 * time.Hour), Success: true})
|
||||||
uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-118 * time.Hour), Success: true})
|
uptime.ProcessResult(&Result{Timestamp: now.Add(-118 * time.Hour), Success: true})
|
||||||
uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-117 * time.Hour), Success: true})
|
uptime.ProcessResult(&Result{Timestamp: now.Add(-117 * time.Hour), Success: true})
|
||||||
uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-10 * time.Hour), Success: true})
|
uptime.ProcessResult(&Result{Timestamp: now.Add(-10 * time.Hour), Success: true})
|
||||||
uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-8 * time.Hour), Success: true})
|
uptime.ProcessResult(&Result{Timestamp: now.Add(-8 * time.Hour), Success: true})
|
||||||
uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-30 * time.Minute), Success: true})
|
uptime.ProcessResult(&Result{Timestamp: now.Add(-30 * time.Minute), Success: true})
|
||||||
uptime.ProcessResult(&Result{Timestamp: time.Now().Add(-25 * time.Minute), Success: true})
|
uptime.ProcessResult(&Result{Timestamp: now.Add(-25 * time.Minute), Success: true})
|
||||||
checkUptimes(t, serviceStatus, 0.75, 0.70, 0.50)
|
checkUptimes(t, serviceStatus, 0.75, 0.70, 0.50)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -51,8 +57,8 @@ func TestServiceStatus_AddResultUptimeIsCleaningUpAfterItself(t *testing.T) {
|
|||||||
timestamp := now.Add(-12 * 24 * time.Hour)
|
timestamp := now.Add(-12 * 24 * time.Hour)
|
||||||
for timestamp.Unix() <= now.Unix() {
|
for timestamp.Unix() <= now.Unix() {
|
||||||
serviceStatus.AddResult(&Result{Timestamp: timestamp, Success: true})
|
serviceStatus.AddResult(&Result{Timestamp: timestamp, Success: true})
|
||||||
if len(serviceStatus.Uptime.SuccessfulExecutionsPerHour) > numberOfHoursInTenDays {
|
if len(serviceStatus.Uptime.HourlyStatistics) > numberOfHoursInTenDays {
|
||||||
t.Errorf("At no point in time should there be more than %d entries in serviceStatus.SuccessfulExecutionsPerHour, but there are %d", numberOfHoursInTenDays, len(serviceStatus.Uptime.SuccessfulExecutionsPerHour))
|
t.Errorf("At no point in time should there be more than %d entries in serviceStatus.SuccessfulExecutionsPerHour, but there are %d", numberOfHoursInTenDays, len(serviceStatus.Uptime.HourlyStatistics))
|
||||||
}
|
}
|
||||||
if now.Sub(timestamp) > time.Hour && serviceStatus.Uptime.LastHour != 0 {
|
if now.Sub(timestamp) > time.Hour && serviceStatus.Uptime.LastHour != 0 {
|
||||||
t.Error("most recent timestamp > 1h ago, expected serviceStatus.Uptime.LastHour to be 0, got", serviceStatus.Uptime.LastHour)
|
t.Error("most recent timestamp > 1h ago, expected serviceStatus.Uptime.LastHour to be 0, got", serviceStatus.Uptime.LastHour)
|
||||||
@ -76,3 +82,15 @@ func checkUptimes(t *testing.T, status *ServiceStatus, expectedUptimeDuringLastS
|
|||||||
t.Errorf("expected status.Uptime.LastHour to be %f, got %f", expectedUptimeDuringLastHour, status.Uptime.LastHour)
|
t.Errorf("expected status.Uptime.LastHour to be %f, got %f", expectedUptimeDuringLastHour, status.Uptime.LastHour)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func checkHourlyStatistics(t *testing.T, hourlyUptimeStatistics *HourlyUptimeStatistics, expectedTotalExecutionsResponseTime uint64, expectedTotalExecutions uint64, expectedSuccessfulExecutions uint64) {
|
||||||
|
if hourlyUptimeStatistics.TotalExecutionsResponseTime != expectedTotalExecutionsResponseTime {
|
||||||
|
t.Error("TotalExecutionsResponseTime should've been", expectedTotalExecutionsResponseTime, "got", hourlyUptimeStatistics.TotalExecutionsResponseTime)
|
||||||
|
}
|
||||||
|
if hourlyUptimeStatistics.TotalExecutions != expectedTotalExecutions {
|
||||||
|
t.Error("TotalExecutions should've been", expectedTotalExecutions, "got", hourlyUptimeStatistics.TotalExecutions)
|
||||||
|
}
|
||||||
|
if hourlyUptimeStatistics.SuccessfulExecutions != expectedSuccessfulExecutions {
|
||||||
|
t.Error("SuccessfulExecutions should've been", expectedSuccessfulExecutions, "got", hourlyUptimeStatistics.SuccessfulExecutions)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user