Skip to content

Commit

Permalink
Merge pull request #633 from Security-Onion-Solutions/jertel/gsl
Browse files Browse the repository at this point in the history
add more grid state logging
  • Loading branch information
jertel authored Sep 9, 2024
2 parents 439c8b8 + 07c27e0 commit 59479ff
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 9 deletions.
21 changes: 15 additions & 6 deletions model/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@ type Status struct {
}

type GridStatus struct {
TotalNodeCount int `json:"totalNodeCount"`
UnhealthyNodeCount int `json:"unhealthyNodeCount"`
Eps int `json:"eps"`
TotalNodeCount int `json:"totalNodeCount"`
UnhealthyNodeCount int `json:"unhealthyNodeCount"`
AwaitingRebootNodeCount int `json:"awaitingRebootNodeCount"`
Eps int `json:"eps"`
}

type AlertsStatus struct {
Expand All @@ -37,11 +38,19 @@ type EngineState struct {
SyncFailure bool `json:"syncFailure"`
}

func (state *EngineState) IsFailureState() bool {
return state.IntegrityFailure || state.MigrationFailure || state.SyncFailure
}

func NewStatus() *Status {
newStatus := &Status{
Grid: &GridStatus{},
Alerts: &AlertsStatus{},
Detections: &DetectionsStatus{},
Grid: &GridStatus{},
Alerts: &AlertsStatus{},
Detections: &DetectionsStatus{
ElastAlert: &EngineState{},
Strelka: &EngineState{},
Suricata: &EngineState{},
},
}
return newStatus
}
61 changes: 58 additions & 3 deletions server/modules/sostatus/sostatus.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ func (status *SoStatus) Refresh(ctx context.Context) {
func (status *SoStatus) refreshGrid(ctx context.Context) {
unhealthyNodes := 0
nonCriticalNodes := 0
awaitingRebootCount := 0

nodes := status.server.Datastore.GetNodes(ctx)
for _, node := range nodes {
Expand Down Expand Up @@ -134,16 +135,70 @@ func (status *SoStatus) refreshGrid(ctx context.Context) {
if node.NonCriticalNode {
nonCriticalNodes++
}

if node.OsNeedsRestart == 1 {
awaitingRebootCount++
}
}
status.currentStatus.Grid.TotalNodeCount = len(nodes)
if status.currentStatus.Grid.UnhealthyNodeCount == 0 && unhealthyNodes > 0 {
log.WithFields(log.Fields{
"unhealthyNodes": unhealthyNodes,
"totalNodes": len(nodes),
}).Warn("Grid has entered an unhealthy state")
} else if status.currentStatus.Grid.UnhealthyNodeCount > 0 && unhealthyNodes == 0 {
log.WithFields(log.Fields{
"unhealthyNodes": unhealthyNodes,
"totalNodes": len(nodes),
}).Info("Grid has returned to a healthy state")
}
status.currentStatus.Grid.UnhealthyNodeCount = unhealthyNodes
status.currentStatus.Grid.Eps = status.server.Metrics.GetGridEps(ctx)
if status.currentStatus.Grid.AwaitingRebootNodeCount == 0 && awaitingRebootCount > 0 {
log.WithFields(log.Fields{
"awaitingRebootCount": awaitingRebootCount,
"totalNodes": len(nodes),
}).Info("Grid nodes are awaiting reboot")
}
status.currentStatus.Grid.AwaitingRebootNodeCount = awaitingRebootCount

licensing.ValidateNodeCount(status.currentStatus.Grid.TotalNodeCount - nonCriticalNodes)
}

func (status *SoStatus) refreshDetections(ctx context.Context) {
status.currentStatus.Detections.ElastAlert = status.server.DetectionEngines[model.EngineNameElastAlert].GetState()
status.currentStatus.Detections.Suricata = status.server.DetectionEngines[model.EngineNameSuricata].GetState()
status.currentStatus.Detections.Strelka = status.server.DetectionEngines[model.EngineNameStrelka].GetState()
status.currentStatus.Detections.ElastAlert = status.checkDetectionEngineStatus("ElastAlert2",
status.currentStatus.Detections.ElastAlert,
status.server.DetectionEngines[model.EngineNameElastAlert].GetState())
status.currentStatus.Detections.Suricata = status.checkDetectionEngineStatus("Suricata",
status.currentStatus.Detections.Suricata,
status.server.DetectionEngines[model.EngineNameSuricata].GetState())
status.currentStatus.Detections.Strelka = status.checkDetectionEngineStatus("Strelka",
status.currentStatus.Detections.Strelka,
status.server.DetectionEngines[model.EngineNameStrelka].GetState())
}

func (status *SoStatus) checkDetectionEngineStatus(engineName string, oldState *model.EngineState, newState *model.EngineState) *model.EngineState {
if !oldState.IsFailureState() && newState.IsFailureState() {
log.WithFields(log.Fields{
"currentStateIntegrityFailure": oldState.IntegrityFailure,
"currentStateMigrationFailure": oldState.MigrationFailure,
"currentStateSyncFailure": oldState.SyncFailure,
"newStateIntegrityFailure": newState.IntegrityFailure,
"newStateMigrationFailure": newState.MigrationFailure,
"newStateSyncFailure": newState.SyncFailure,
"engineName": engineName,
}).Warn("Detection engine has entered a failure state")
} else if oldState.IsFailureState() && !newState.IsFailureState() {
log.WithFields(log.Fields{
"currentStateIntegrityFailure": oldState.IntegrityFailure,
"currentStateMigrationFailure": oldState.MigrationFailure,
"currentStateSyncFailure": oldState.SyncFailure,
"newStateIntegrityFailure": newState.IntegrityFailure,
"newStateMigrationFailure": newState.MigrationFailure,
"newStateSyncFailure": newState.SyncFailure,
"engineName": engineName,
}).Warn("Detection engine has returned to a healthy state")
}

return newState
}
11 changes: 11 additions & 0 deletions server/modules/sostatus/sostatus_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"testing"

"github.com/security-onion-solutions/securityonion-soc/licensing"
"github.com/security-onion-solutions/securityonion-soc/model"
"github.com/security-onion-solutions/securityonion-soc/server"
"github.com/stretchr/testify/assert"
)
Expand Down Expand Up @@ -52,5 +53,15 @@ func TestRefreshGrid(tester *testing.T) {
status.refreshGrid(context.Background())
assert.Equal(tester, 2, status.currentStatus.Grid.UnhealthyNodeCount)
assert.Equal(tester, 3, status.currentStatus.Grid.TotalNodeCount)
assert.Equal(tester, 0, status.currentStatus.Grid.AwaitingRebootNodeCount)
assert.Equal(tester, 12, status.currentStatus.Grid.Eps)
}

func TestCheckDetectionEngineStatus(tester *testing.T) {
status, _ := NewTestStatus()
bad := &model.EngineState{
SyncFailure: true,
}
good := &model.EngineState{}
assert.Equal(tester, bad, status.checkDetectionEngineStatus("foo", good, bad))
}

0 comments on commit 59479ff

Please sign in to comment.