Skip to content

Commit

Permalink
NETOBSERV-1625: Add ebpf altering for flows drop (#632)
Browse files Browse the repository at this point in the history
* NETOBSERV-1625: Add ebpf altering for flows drop

Signed-off-by: Mohamed Mahmoud <mmahmoud@redhat.com>

* update agent alert description

Signed-off-by: Mohamed Mahmoud <mmahmoud@redhat.com>

---------

Signed-off-by: Mohamed Mahmoud <mmahmoud@redhat.com>
  • Loading branch information
msherif1234 authored Apr 30, 2024
1 parent a057c25 commit 13b2a78
Show file tree
Hide file tree
Showing 12 changed files with 186 additions and 0 deletions.
16 changes: 16 additions & 0 deletions apis/flowcollector/v1beta1/flowcollector_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,16 @@ const (
FlowRTT AgentFeature = "FlowRTT"
)

// Name of a ebpf agent alert.
// Possible values are:<br>
// - `NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.<br>
// +kubebuilder:validation:Enum:="NetObservDroppedFlows"
type EBPFAgentAlert string

const (
AlertDroppedFlows EBPFAgentAlert = "NetObservAgentFlowsDropped"
)

// `EBPFMetrics` defines the desired eBPF agent configuration regarding metrics
type EBPFMetrics struct {
// Metrics server endpoint configuration for Prometheus scraper
Expand All @@ -165,6 +175,12 @@ type EBPFMetrics struct {

// Set `enable` to `true` to enable eBPF agent metrics collection.
Enable *bool `json:"enable,omitempty"`

// `disableAlerts` is a list of alerts that should be disabled.
// Possible values are:<br>
// `NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.<br>
// +optional
DisableAlerts []EBPFAgentAlert `json:"disableAlerts"`
}

// `EBPFFlowFilter` defines the desired eBPF agent configuration regarding flow filtering
Expand Down
2 changes: 2 additions & 0 deletions apis/flowcollector/v1beta1/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions apis/flowcollector/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions apis/flowcollector/v1beta2/flowcollector_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,16 @@ const (
FlowRTT AgentFeature = "FlowRTT"
)

// Name of a ebpf agent alert.
// Possible values are:<br>
// - `NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.<br>
// +kubebuilder:validation:Enum:="NetObservDroppedFlows"
type EBPFAgentAlert string

const (
AlertDroppedFlows EBPFAgentAlert = "NetObservAgentFlowsDropped"
)

// `EBPFMetrics` defines the desired eBPF agent configuration regarding metrics
type EBPFMetrics struct {
// Metrics server endpoint configuration for Prometheus scraper
Expand All @@ -172,6 +182,12 @@ type EBPFMetrics struct {

// Set `enable` to `true` to enable eBPF agent metrics collection.
Enable *bool `json:"enable,omitempty"`

// `disableAlerts` is a list of alerts that should be disabled.
// Possible values are:<br>
// `NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.<br>
// +optional
DisableAlerts []EBPFAgentAlert `json:"disableAlerts"`
}

// `EBPFFlowFilter` defines the desired eBPF agent configuration regarding flow filtering
Expand Down
5 changes: 5 additions & 0 deletions apis/flowcollector/v1beta2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions bundle/manifests/flows.netobserv.io_flowcollectors.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,20 @@ spec:
description: '`metrics` defines the eBPF agent configuration
regarding metrics'
properties:
disableAlerts:
description: |-
`disableAlerts` is a list of alerts that should be disabled.
Possible values are:<br>
`NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.<br>
items:
description: |-
Name of a ebpf agent alert.
Possible values are:<br>
- `NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.<br>
enum:
- NetObservDroppedFlows
type: string
type: array
enable:
description: Set `enable` to `true` to enable eBPF agent
metrics collection.
Expand Down Expand Up @@ -3637,6 +3651,20 @@ spec:
description: '`metrics` defines the eBPF agent configuration
regarding metrics'
properties:
disableAlerts:
description: |-
`disableAlerts` is a list of alerts that should be disabled.
Possible values are:<br>
`NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.<br>
items:
description: |-
Name of a ebpf agent alert.
Possible values are:<br>
- `NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.<br>
enum:
- NetObservDroppedFlows
type: string
type: array
enable:
description: Set `enable` to `true` to enable eBPF agent
metrics collection.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -703,6 +703,8 @@ spec:
path: agent.ebpf.interfaces
- displayName: Metrics
path: agent.ebpf.metrics
- displayName: Disable alerts
path: agent.ebpf.metrics.disableAlerts
- displayName: Enable
path: agent.ebpf.metrics.enable
- displayName: Server
Expand Down
28 changes: 28 additions & 0 deletions config/crd/bases/flows.netobserv.io_flowcollectors.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,20 @@ spec:
metrics:
description: '`metrics` defines the eBPF agent configuration regarding metrics'
properties:
disableAlerts:
description: |-
`disableAlerts` is a list of alerts that should be disabled.
Possible values are:<br>
`NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.<br>
items:
description: |-
Name of a ebpf agent alert.
Possible values are:<br>
- `NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.<br>
enum:
- NetObservDroppedFlows
type: string
type: array
enable:
description: Set `enable` to `true` to enable eBPF agent metrics collection.
type: boolean
Expand Down Expand Up @@ -3349,6 +3363,20 @@ spec:
metrics:
description: '`metrics` defines the eBPF agent configuration regarding metrics'
properties:
disableAlerts:
description: |-
`disableAlerts` is a list of alerts that should be disabled.
Possible values are:<br>
`NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.<br>
items:
description: |-
Name of a ebpf agent alert.
Possible values are:<br>
- `NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.<br>
enum:
- NetObservDroppedFlows
type: string
type: array
enable:
description: Set `enable` to `true` to enable eBPF agent metrics collection.
type: boolean
Expand Down
1 change: 1 addition & 0 deletions controllers/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ const (
EBPFAgentName = "netobserv-ebpf-agent"
EBPFAgentMetricsSvcName = "ebpf-agent-svc-prom"
EBPFAgentMetricsSvcMonitoringName = "ebpf-agent-svc-monitor"
EBPFAgentPromoAlertRule = "ebpf-agent-prom-alert"
EBPFPrivilegedNSSuffix = "-privileged"
EBPFServiceAccount = EBPFAgentName
EBPFSecurityContext = EBPFAgentName
Expand Down
61 changes: 61 additions & 0 deletions controllers/ebpf/agent-metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ func (c *AgentController) reconcileMetricsService(ctx context.Context, target *f
if c.AvailableAPIs.HasSvcMonitor() {
c.Managed.TryDelete(ctx, c.serviceMonitor)
}
if c.AvailableAPIs.HasPromRule() {
c.Managed.TryDelete(ctx, c.prometheusRule)
}
return nil
}

Expand All @@ -37,6 +40,13 @@ func (c *AgentController) reconcileMetricsService(ctx context.Context, target *f
return err
}
}

if c.AvailableAPIs.HasPromRule() {
promRules := c.agentPrometheusRule(target)
if err := reconcilers.GenericReconcile(ctx, c.Managed, &c.Client, c.prometheusRule, promRules, &report, helper.PrometheusRuleChanged); err != nil {
return err
}
}
return nil
}

Expand Down Expand Up @@ -103,3 +113,54 @@ func (c *AgentController) promServiceMonitoring(target *flowslatest.FlowCollecto
},
}
}

func (c *AgentController) agentPrometheusRule(target *flowslatest.FlowCollectorEBPF) *monitoringv1.PrometheusRule {
rules := []monitoringv1.Rule{}
d := monitoringv1.Duration("10m")

// EBPF hashmap table is full Not receiving any new flows
if shouldAddAlert(flowslatest.AlertDroppedFlows, target.Metrics.DisableAlerts) {

rules = append(rules, monitoringv1.Rule{
Alert: string(flowslatest.AlertDroppedFlows),
Annotations: map[string]string{
"description": "NetObserv eBPF agent is not able to process new flows as it's hashmap is full. Hashmap table size can be increased by increasing cacheMaxFlows value in Flowcollector resource.",
"summary": "NetObserv eBPF is not able to process any new flows",
},
Expr: intstr.FromString("sum(rate(netobserv_agent_dropped_flows_total[1m])) > 0"),
For: &d,
Labels: map[string]string{
"severity": "warning",
"app": "netobserv",
},
})
}

prometheusRuleObject := monitoringv1.PrometheusRule{
ObjectMeta: metav1.ObjectMeta{
Name: constants.EBPFAgentPromoAlertRule,
Labels: map[string]string{
"app": constants.EBPFAgentName,
},
Namespace: c.PrivilegedNamespace(),
},
Spec: monitoringv1.PrometheusRuleSpec{
Groups: []monitoringv1.RuleGroup{
{
Name: "NetobservEBPFAgentAlerts",
Rules: rules,
},
},
},
}
return &prometheusRuleObject
}

func shouldAddAlert(name flowslatest.EBPFAgentAlert, disabledList []flowslatest.EBPFAgentAlert) bool {
for _, disabledAlert := range disabledList {
if name == disabledAlert {
return false
}
}
return true
}
4 changes: 4 additions & 0 deletions controllers/ebpf/agent_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ type AgentController struct {
volumes volumes.Builder
promSvc *corev1.Service
serviceMonitor *monitoringv1.ServiceMonitor
prometheusRule *monitoringv1.PrometheusRule
}

func NewAgentController(common *reconcilers.Instance) *AgentController {
Expand All @@ -117,6 +118,9 @@ func NewAgentController(common *reconcilers.Instance) *AgentController {
if common.AvailableAPIs.HasSvcMonitor() {
agent.serviceMonitor = common.Managed.NewServiceMonitor(constants.EBPFAgentMetricsSvcMonitoringName)
}
if common.AvailableAPIs.HasPromRule() {
agent.prometheusRule = common.Managed.NewPrometheusRule(constants.EBPFAgentPromoAlertRule)
}
return &agent
}

Expand Down
18 changes: 18 additions & 0 deletions docs/FlowCollector.md
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,15 @@ To filter a range of ports, use a "start-end" range, string format. For example
</tr>
</thead>
<tbody><tr>
<td><b>disableAlerts</b></td>
<td>[]enum</td>
<td>
`disableAlerts` is a list of alerts that should be disabled.
Possible values are:<br>
`NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.<br><br/>
</td>
<td>false</td>
</tr><tr>
<td><b>enable</b></td>
<td>boolean</td>
<td>
Expand Down Expand Up @@ -7606,6 +7615,15 @@ To filter a range of ports, use a "start-end" range, string format. For example
</tr>
</thead>
<tbody><tr>
<td><b>disableAlerts</b></td>
<td>[]enum</td>
<td>
`disableAlerts` is a list of alerts that should be disabled.
Possible values are:<br>
`NetObservDroppedFlows`, which is triggered when eBPF agent hashmap table is full.<br><br/>
</td>
<td>false</td>
</tr><tr>
<td><b>enable</b></td>
<td>boolean</td>
<td>
Expand Down

0 comments on commit 13b2a78

Please sign in to comment.