diff --git a/model/status.go b/model/status.go index f032d673..04dab481 100644 --- a/model/status.go +++ b/model/status.go @@ -13,9 +13,10 @@ type Status struct { } type GridStatus struct { - TotalNodeCount int `json:"totalNodeCount"` - UnhealthyNodeCount int `json:"unhealthyNodeCount"` - Eps int `json:"eps"` + TotalNodeCount int `json:"totalNodeCount"` + UnhealthyNodeCount int `json:"unhealthyNodeCount"` + AwaitingRebootNodeCount int `json:"awaitingRebootNodeCount"` + Eps int `json:"eps"` } type AlertsStatus struct { @@ -37,11 +38,19 @@ type EngineState struct { SyncFailure bool `json:"syncFailure"` } +func (state *EngineState) IsFailureState() bool { + return state.IntegrityFailure || state.MigrationFailure || state.SyncFailure +} + func NewStatus() *Status { newStatus := &Status{ - Grid: &GridStatus{}, - Alerts: &AlertsStatus{}, - Detections: &DetectionsStatus{}, + Grid: &GridStatus{}, + Alerts: &AlertsStatus{}, + Detections: &DetectionsStatus{ + ElastAlert: &EngineState{}, + Strelka: &EngineState{}, + Suricata: &EngineState{}, + }, } return newStatus } diff --git a/server/modules/sostatus/sostatus.go b/server/modules/sostatus/sostatus.go index d8135a3e..28805bd7 100644 --- a/server/modules/sostatus/sostatus.go +++ b/server/modules/sostatus/sostatus.go @@ -96,6 +96,7 @@ func (status *SoStatus) Refresh(ctx context.Context) { func (status *SoStatus) refreshGrid(ctx context.Context) { unhealthyNodes := 0 nonCriticalNodes := 0 + awaitingRebootCount := 0 nodes := status.server.Datastore.GetNodes(ctx) for _, node := range nodes { @@ -134,16 +135,70 @@ func (status *SoStatus) refreshGrid(ctx context.Context) { if node.NonCriticalNode { nonCriticalNodes++ } + + if node.OsNeedsRestart == 1 { + awaitingRebootCount++ + } } status.currentStatus.Grid.TotalNodeCount = len(nodes) + if status.currentStatus.Grid.UnhealthyNodeCount == 0 && unhealthyNodes > 0 { + log.WithFields(log.Fields{ + "unhealthyNodes": unhealthyNodes, + "totalNodes": len(nodes), + }).Warn("Grid has entered an unhealthy state") + } else if status.currentStatus.Grid.UnhealthyNodeCount > 0 && unhealthyNodes == 0 { + log.WithFields(log.Fields{ + "unhealthyNodes": unhealthyNodes, + "totalNodes": len(nodes), + }).Info("Grid has returned to a healthy state") + } status.currentStatus.Grid.UnhealthyNodeCount = unhealthyNodes status.currentStatus.Grid.Eps = status.server.Metrics.GetGridEps(ctx) + if status.currentStatus.Grid.AwaitingRebootNodeCount == 0 && awaitingRebootCount > 0 { + log.WithFields(log.Fields{ + "awaitingRebootCount": awaitingRebootCount, + "totalNodes": len(nodes), + }).Info("Grid nodes are awaiting reboot") + } + status.currentStatus.Grid.AwaitingRebootNodeCount = awaitingRebootCount licensing.ValidateNodeCount(status.currentStatus.Grid.TotalNodeCount - nonCriticalNodes) } func (status *SoStatus) refreshDetections(ctx context.Context) { - status.currentStatus.Detections.ElastAlert = status.server.DetectionEngines[model.EngineNameElastAlert].GetState() - status.currentStatus.Detections.Suricata = status.server.DetectionEngines[model.EngineNameSuricata].GetState() - status.currentStatus.Detections.Strelka = status.server.DetectionEngines[model.EngineNameStrelka].GetState() + status.currentStatus.Detections.ElastAlert = status.checkDetectionEngineStatus("ElastAlert2", + status.currentStatus.Detections.ElastAlert, + status.server.DetectionEngines[model.EngineNameElastAlert].GetState()) + status.currentStatus.Detections.Suricata = status.checkDetectionEngineStatus("Suricata", + status.currentStatus.Detections.Suricata, + status.server.DetectionEngines[model.EngineNameSuricata].GetState()) + status.currentStatus.Detections.Strelka = status.checkDetectionEngineStatus("Strelka", + status.currentStatus.Detections.Strelka, + status.server.DetectionEngines[model.EngineNameStrelka].GetState()) +} + +func (status *SoStatus) checkDetectionEngineStatus(engineName string, oldState *model.EngineState, newState *model.EngineState) *model.EngineState { + if !oldState.IsFailureState() && newState.IsFailureState() { + log.WithFields(log.Fields{ + "currentStateIntegrityFailure": oldState.IntegrityFailure, + "currentStateMigrationFailure": oldState.MigrationFailure, + "currentStateSyncFailure": oldState.SyncFailure, + "newStateIntegrityFailure": newState.IntegrityFailure, + "newStateMigrationFailure": newState.MigrationFailure, + "newStateSyncFailure": newState.SyncFailure, + "engineName": engineName, + }).Warn("Detection engine has entered a failure state") + } else if oldState.IsFailureState() && !newState.IsFailureState() { + log.WithFields(log.Fields{ + "currentStateIntegrityFailure": oldState.IntegrityFailure, + "currentStateMigrationFailure": oldState.MigrationFailure, + "currentStateSyncFailure": oldState.SyncFailure, + "newStateIntegrityFailure": newState.IntegrityFailure, + "newStateMigrationFailure": newState.MigrationFailure, + "newStateSyncFailure": newState.SyncFailure, + "engineName": engineName, + }).Warn("Detection engine has returned to a healthy state") + } + + return newState } diff --git a/server/modules/sostatus/sostatus_test.go b/server/modules/sostatus/sostatus_test.go index c8fbd172..2202567a 100644 --- a/server/modules/sostatus/sostatus_test.go +++ b/server/modules/sostatus/sostatus_test.go @@ -11,6 +11,7 @@ import ( "testing" "github.com/security-onion-solutions/securityonion-soc/licensing" + "github.com/security-onion-solutions/securityonion-soc/model" "github.com/security-onion-solutions/securityonion-soc/server" "github.com/stretchr/testify/assert" ) @@ -52,5 +53,15 @@ func TestRefreshGrid(tester *testing.T) { status.refreshGrid(context.Background()) assert.Equal(tester, 2, status.currentStatus.Grid.UnhealthyNodeCount) assert.Equal(tester, 3, status.currentStatus.Grid.TotalNodeCount) + assert.Equal(tester, 0, status.currentStatus.Grid.AwaitingRebootNodeCount) assert.Equal(tester, 12, status.currentStatus.Grid.Eps) } + +func TestCheckDetectionEngineStatus(tester *testing.T) { + status, _ := NewTestStatus() + bad := &model.EngineState{ + SyncFailure: true, + } + good := &model.EngineState{} + assert.Equal(tester, bad, status.checkDetectionEngineStatus("foo", good, bad)) +}