Skip to content

Commit

Permalink
Merge pull request #1857 from pkudikyala/NR-89144-Apache-Hadoop
Browse files Browse the repository at this point in the history
NR-89144 Apache Hadoop quickstart
  • Loading branch information
mdumpati authored Jul 19, 2023
2 parents 361d846 + 003e8a6 commit ae5627f
Show file tree
Hide file tree
Showing 16 changed files with 1,694 additions and 0 deletions.
32 changes: 32 additions & 0 deletions alert-policies/apache-hadoop/NameNodeDeadDataNodes.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Name of the alert
name: Dead DataNodes

# Description and details
description: |+
This alert is triggered if the dead DataNodes exceeds 1 for 5 minutes.
# Type of alert
type: STATIC

# NRQL query
nrql:

query: "SELECT latest(NumDeadDataNodes) AS 'Dead Data Nodes' FROM HadoopNameNodeSample"

# Function used to aggregate the NRQL query value(s) for comparison to the terms.threshold (Default: SINGLE_VALUE)
valueFunction: SINGLE_VALUE

# List of Critical and Warning thresholds for the condition
terms:
- priority: CRITICAL
# Operator used to compare against the threshold.
operator: ABOVE
# Value that triggers a violation
threshold: 1
# Time in seconds; 120 - 3600
thresholdDuration: 300
# How many data points must be in violation for the duration
thresholdOccurrences: ALL

# Duration after which a violation automatically closes
# Time in seconds; 300 - 2592000 (Default: 86400 [1 day])
violationTimeLimitSeconds: 86400
41 changes: 41 additions & 0 deletions alert-policies/apache-hadoop/NameNodeMissingBlocks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Name of the alert
name: Missing Blocks in HDFS

# Description and details
description: |+
This alert is triggered if the missing blocks exceeds 3 for 5 minutes.
# Type of alert
type: STATIC

# NRQL query
nrql:

query: "SELECT latest(NumberOfMissingBlocks) AS 'Missing Blocks' FROM HadoopNameNodeSample"

# Function used to aggregate the NRQL query value(s) for comparison to the terms.threshold (Default: SINGLE_VALUE)
valueFunction: SINGLE_VALUE

# List of Critical and Warning thresholds for the condition
terms:
- priority: CRITICAL
# Operator used to compare against the threshold.
operator: ABOVE
# Value that triggers a violation
threshold: 3
# Time in seconds; 120 - 3600
thresholdDuration: 300
# How many data points must be in violation for the duration
thresholdOccurrences: ALL
- priority: WARNING
# Operator used to compare against the threshold.
operator: ABOVE
# Value that triggers a violation
threshold: 1
# Time in seconds; 120 - 3600, must be a multiple of 60 for Baseline conditions
thresholdDuration: 300
# How many data points must be in violation for the duration
thresholdOccurrences: ALL

# Duration after which a violation automatically closes
# Time in seconds; 300 - 2592000 (Default: 86400 [1 day])
violationTimeLimitSeconds: 86400
41 changes: 41 additions & 0 deletions alert-policies/apache-hadoop/NameNodeVolumeFailures.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Name of the alert
name: NameNode Volume Failures

# Description and details
description: |+
This alert is triggered if the volume failures exceeds 2 for 5 minutes.
# Type of alert
type: STATIC

# NRQL query
nrql:

query: "SELECT latest(VolumeFailuresTotal) AS 'Volume Failures' FROM HadoopNameNodeSample"

# Function used to aggregate the NRQL query value(s) for comparison to the terms.threshold (Default: SINGLE_VALUE)
valueFunction: SINGLE_VALUE

# List of Critical and Warning thresholds for the condition
terms:
- priority: CRITICAL
# Operator used to compare against the threshold.
operator: ABOVE
# Value that triggers a violation
threshold: 2
# Time in seconds; 120 - 3600
thresholdDuration: 300
# How many data points must be in violation for the duration
thresholdOccurrences: ALL
- priority: WARNING
# Operator used to compare against the threshold.
operator: ABOVE
# Value that triggers a violation
threshold: 1
# Time in seconds; 120 - 3600, must be a multiple of 60 for Baseline conditions
thresholdDuration: 300
# How many data points must be in violation for the duration
thresholdOccurrences: ALL

# Duration after which a violation automatically closes
# Time in seconds; 300 - 2592000 (Default: 86400 [1 day])
violationTimeLimitSeconds: 86400
41 changes: 41 additions & 0 deletions alert-policies/apache-hadoop/UsedDiskPercentage.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Name of the alert
name: Used Disk Percent

# Description and details
description: |+
This alert is triggered when used disk space exceeds 90% for at least 5 minutes.
# Type of alert
type: STATIC

# NRQL query
nrql:

query: "SELECT latest(PercentUsed) FROM HadoopNameNodeSampleMetrics"

# Function used to aggregate the NRQL query value(s) for comparison to the terms.threshold (Default: SINGLE_VALUE)
valueFunction: SINGLE_VALUE

# List of Critical and Warning thresholds for the condition
terms:
- priority: CRITICAL
# Operator used to compare against the threshold.
operator: BELOW
# Value that triggers a violation
threshold: 90
# Time in seconds; 120 - 3600
thresholdDuration: 300
# How many data points must be in violation for the duration
thresholdOccurrences: ALL
- priority: WARNING
# Operator used to compare against the threshold.
operator: BELOW
# Value that triggers a violation
threshold: 85
# Time in seconds; 120 - 3600, must be a multiple of 60 for Baseline conditions
thresholdDuration: 300
# How many data points must be in violation for the duration
thresholdOccurrences: ALL

# Duration after which a violation automatically closes
# Time in seconds; 300 - 2592000 (Default: 86400 [1 day])
violationTimeLimitSeconds: 86400
Binary file added dashboards/apache-hadoop/apache-hadoop-01.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added dashboards/apache-hadoop/apache-hadoop-02.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added dashboards/apache-hadoop/apache-hadoop-03.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added dashboards/apache-hadoop/apache-hadoop-04.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added dashboards/apache-hadoop/apache-hadoop-05.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added dashboards/apache-hadoop/apache-hadoop-06.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added dashboards/apache-hadoop/apache-hadoop-07.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit ae5627f

Please sign in to comment.