Skip to content
This repository has been archived by the owner on Jun 1, 2023. It is now read-only.

Commit

Permalink
Add profiles, move puppetdb metric defaults
Browse files Browse the repository at this point in the history
  • Loading branch information
suckatrash committed Jun 14, 2019
1 parent 91186d7 commit 375b898
Show file tree
Hide file tree
Showing 16 changed files with 478 additions and 313 deletions.
3 changes: 3 additions & 0 deletions .fixtures.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,7 @@ fixtures:
yumrepo:
repo: https://github.com/puppetlabs/puppetlabs-yumrepo_core.git
ref: 1.0.2
telegraf:
repo: https://github.com/voxpupuli/puppet-telegraf.git
ref: v2.1.0
forge_modules:
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ group :development do
gem "json", '= 1.8.1', require: false if Gem::Version.new(RUBY_VERSION.dup) == Gem::Version.new('2.1.9')
gem "json", '= 2.0.4', require: false if Gem::Requirement.create('~> 2.4.2').satisfied_by?(Gem::Version.new(RUBY_VERSION.dup))
gem "json", '= 2.1.0', require: false if Gem::Requirement.create(['>= 2.5.0', '< 2.7.0']).satisfied_by?(Gem::Version.new(RUBY_VERSION.dup))
gem "toml-rb", '= 1.1.2', require: false, platforms: [:ruby]
gem "puppet-module-posix-default-r#{minor_version}", '~> 0.3', require: false, platforms: [:ruby]
gem "puppet-module-posix-dev-r#{minor_version}", '~> 0.3', require: false, platforms: [:ruby]
gem "puppet-module-win-default-r#{minor_version}", '~> 0.3', require: false, platforms: [:mswin, :mingw, :x64_mingw]
Expand Down
23 changes: 14 additions & 9 deletions manifests/init.pp
Original file line number Diff line number Diff line change
Expand Up @@ -86,16 +86,19 @@
# Defaults to `[$trusted['certname']]`
#
# @param influxdb_urls
# The string for telegraf's config defining where influxdb is
# An array for telegraf's config defining where influxdb instances are
#
# @param telegraf_db_name
# The database in influxdb where telefraf metrics are stored
#
# @param telegraf_agent_interval
# How often the telefraf agent queries for metrics
# How often the telefraf agent queries for metrics. Defaults to "5s"
#
# @param http_response_timeout
# How long to wait for the queries by telegraf to finish before giving up
# How long to wait for the queries by telegraf to finish before giving up. Defaults to "5s"
#
# @param pg_query_interval
# How often postgres queries will run when monitoring a postgres host. Defaults to "10m"
#
# @param overwrite_dashboards
# Whether to overwrite the example Grafana dashboards.
Expand Down Expand Up @@ -203,13 +206,15 @@
Boolean $enable_telegraf = $puppet_metrics_dashboard::params::enable_telegraf,
Boolean $configure_telegraf = $puppet_metrics_dashboard::params::configure_telegraf,
Boolean $consume_graphite = $puppet_metrics_dashboard::params::consume_graphite,
Puppet_metrics_dashboard::HostList $master_list = $puppet_metrics_dashboard::params::master_list,
Puppet_metrics_dashboard::HostList $puppetdb_list = $puppet_metrics_dashboard::params::puppetdb_list,
Puppet_metrics_dashboard::HostList $postgres_host_list = $puppet_metrics_dashboard::params::postgres_host_list,
String $influxdb_urls = $puppet_metrics_dashboard::params::influxdb_urls,
Puppet_metrics_dashboard::HostList $master_list = $puppet_metrics_dashboard::params::master_list,
Puppet_metrics_dashboard::HostList $puppetdb_list = $puppet_metrics_dashboard::params::puppetdb_list,
Puppet_metrics_dashboard::HostList $postgres_host_list = $puppet_metrics_dashboard::params::postgres_host_list,
Puppet_metrics_dashboard::Puppetdb_metric $puppetdb_metrics = $puppet_metrics_dashboard::params::puppetdb_metrics,
Array[String] $influxdb_urls = $puppet_metrics_dashboard::params::influxdb_urls,
String $telegraf_db_name = $puppet_metrics_dashboard::params::telegraf_db_name,
Integer[1] $telegraf_agent_interval = $puppet_metrics_dashboard::params::telegraf_agent_interval,
Integer[1] $http_response_timeout = $puppet_metrics_dashboard::params::http_response_timeout,
String[2] $telegraf_agent_interval = $puppet_metrics_dashboard::params::telegraf_agent_interval,
String[2] $http_response_timeout = $puppet_metrics_dashboard::params::http_response_timeout,
String[2] $pg_query_interval = $puppet_metrics_dashboard::params::pg_query_interval,
) inherits puppet_metrics_dashboard::params {
if $manage_repos {
contain puppet_metrics_dashboard::repos
Expand Down
174 changes: 171 additions & 3 deletions manifests/params.pp
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,181 @@
$master_list = [$trusted['certname']]
$puppetdb_list = [$trusted['certname']]
$postgres_host_list = [$trusted['certname']]
$influxdb_urls = "['http://localhost:8086']"
$influxdb_urls = ['http://localhost:8086']
$telegraf_db_name = 'telegraf'
$telegraf_agent_interval = 5
$http_response_timeout = 5 # this is the default value for the HTTP JSON Input
$telegraf_agent_interval = '5s'
$http_response_timeout = '5s' # this is the default value for the HTTP JSON Input
$pg_query_interval = '10m'

$overwrite_dashboards_file = '/opt/puppetlabs/puppet/cache/state/overwrite_dashboards_disabled'

$activemq_metrics = [
{ 'name' => 'amq_metrics',
'url' => 'org.apache.activemq:type=Broker,brokerName=localhost,destinationType=Queue,destinationName=puppetlabs.puppetdb.commands' },
]

$base_metrics = [
{ 'name' => 'global_command-parse-time',
'url' => 'puppetlabs.puppetdb.mq:name=global.command-parse-time' },
{ 'name' => 'global_discarded',
'url' => 'puppetlabs.puppetdb.mq:name=global.discarded' },
{ 'name' => 'global_fatal',
'url' => 'puppetlabs.puppetdb.mq:name=global.fatal' },
{ 'name' => 'global_message-persistence-time',
'url' => 'puppetlabs.puppetdb.mq:name=global.message-persistence-time' },
{ 'name' => 'global_retried',
'url' => 'puppetlabs.puppetdb.mq:name=global.retried' },
{ 'name' => 'global_retry-counts',
'url' => 'puppetlabs.puppetdb.mq:name=global.retry-counts' },
{ 'name' => 'global_seen',
'url' => 'puppetlabs.puppetdb.mq:name=global.seen' },
{ 'name' => 'global_processed',
'url' => 'puppetlabs.puppetdb.mq:name=global.processed' },
{ 'name' => 'global_processing-time',
'url' => 'puppetlabs.puppetdb.mq:name=global.processing-time' },
]

$base_metrics_through_4_2 = [
{ 'name' => 'global_generate-retry-message-time',
'url' => 'puppetlabs.puppetdb.mq:name=global.generate-retry-message-time' },
{ 'name' => 'global_retry-persistence-time',
'url' => 'puppetlabs.puppetdb.mq:name=global.retry-persistence-time' },
]

$storage_metrics = [
{ 'name' => 'storage_add-edges',
'url' => 'puppetlabs.puppetdb.storage:name=add-edges' },
{ 'name' => 'storage_add-resources',
'url' => 'puppetlabs.puppetdb.storage:name=add-resources' },
{ 'name' => 'storage_catalog-hash',
'url' => 'puppetlabs.puppetdb.storage:name=catalog-hash' },
{ 'name' => 'storage_catalog-hash-match-time',
'url' => 'puppetlabs.puppetdb.storage:name=catalog-hash-match-time' },
{ 'name' => 'storage_catalog-hash-miss-time',
'url' => 'puppetlabs.puppetdb.storage:name=catalog-hash-miss-time' },
{ 'name' => 'storage_gc-catalogs-time',
'url' => 'puppetlabs.puppetdb.storage:name=gc-catalogs-time' },
{ 'name' => 'storage_gc-environments-time',
'url' => 'puppetlabs.puppetdb.storage:name=gc-environments-time' },
{ 'name' => 'storage_gc-fact-paths',
'url' => 'puppetlabs.puppetdb.storage:name=gc-fact-paths' },
{ 'name' => 'storage_gc-params-time',
'url' => 'puppetlabs.puppetdb.storage:name=gc-params-time' },
{ 'name' => 'storage_gc-report-statuses',
'url' => 'puppetlabs.puppetdb.storage:name=gc-report-statuses' },
{ 'name' => 'storage_gc-time',
'url' => 'puppetlabs.puppetdb.storage:name=gc-time' },
{ 'name' => 'storage_new-catalog-time',
'url' => 'puppetlabs.puppetdb.storage:name=new-catalog-time' },
{ 'name' => 'storage_new-catalogs',
'url' => 'puppetlabs.puppetdb.storage:name=new-catalogs' },
{ 'name' => 'storage_replace-catalog-time',
'url' => 'puppetlabs.puppetdb.storage:name=replace-catalog-time' },
{ 'name' => 'storage_replace-facts-time',
'url' => 'puppetlabs.puppetdb.storage:name=replace-facts-time' },
{ 'name' => 'storage_resource-hashes',
'url' => 'puppetlabs.puppetdb.storage:name=resource-hashes' },
{ 'name' => 'storage_store-report-time',
'url' => 'puppetlabs.puppetdb.storage:name=store-report-time' },
]

#TODO: Track these on a less frequent cadence because they are slow to run
$storage_metrics_db_queries = [
{ 'name' => 'storage_catalog-volitilty',
'url' => 'puppetlabs.puppetdb.storage:name=catalog-volitilty' },
{ 'name' => 'storage_duplicate-catalogs',
'url' => 'puppetlabs.puppetdb.storage:name=duplicate-catalogs' },
{ 'name' => 'storage_duplicate-pct',
'url' => 'puppetlabs.puppetdb.storage:name=duplicate-pct' },
]

$numbers = $facts['pe_server_version'] ? {
/^2015.2/ => {'catalogs' => 6, 'facts' => 4, 'reports' => 6},
/^2015.3/ => {'catalogs' => 7, 'facts' => 4, 'reports' => 6},
/^2016.(1|2)/ => {'catalogs' => 8, 'facts' => 4, 'reports' => 7},
/^2016.(4|5)/ => {'catalogs' => 9, 'facts' => 5, 'reports' => 8},
/^2017.(1|2)/ => {'catalogs' => 9, 'facts' => 5, 'reports' => 8},
default => {'catalogs' => 9, 'facts' => 5, 'reports' => 8},
}

$version_specific_metrics = [
{ 'name' => 'mq_replace_catalog_retried',
'url' => "puppetlabs.puppetdb.mq:name=replace catalog.${numbers['catalogs']}.retried" },
{ 'name' => 'mq_replace_catalog_retry-counts',
'url' => "puppetlabs.puppetdb.mq:name=replace catalog.${numbers['catalogs']}.retry-counts" },
{ 'name' => 'mq_replace_facts_retried',
'url' => "puppetlabs.puppetdb.mq:name=replace facts.${numbers['facts']}.retried" },
{ 'name' => 'mq_replace_facts_retry-counts',
'url' => "puppetlabs.puppetdb.mq:name=replace facts.${numbers['facts']}.retry-counts" },
{ 'name' => 'mq_store_report_retried',
'url' => "puppetlabs.puppetdb.mq:name=store report.${numbers['reports']}.retried" },
{ 'name' => 'mq_store_reports_retry-counts',
'url' => "puppetlabs.puppetdb.mq:name=store report.${numbers['reports']}.retry-counts" },
]

$connection_pool_metrics = [
{ 'name' => 'PDBReadPool_pool_ActiveConnections',
'url' => 'puppetlabs.puppetdb.database:name=PDBReadPool.pool.ActiveConnections' },
{ 'name' => 'PDBReadPool_pool_IdleConnections',
'url' => 'puppetlabs.puppetdb.database:name=PDBReadPool.pool.IdleConnections' },
{ 'name' => 'PDBReadPool_pool_PendingConnections',
'url' => 'puppetlabs.puppetdb.database:name=PDBReadPool.pool.PendingConnections' },
{ 'name' => 'PDBReadPool_pool_TotalConnections',
'url' => 'puppetlabs.puppetdb.database:name=PDBReadPool.pool.TotalConnections' },
{ 'name' => 'PDBReadPool_pool_Usage',
'url' => 'puppetlabs.puppetdb.database:name=PDBReadPool.pool.Usage' },
{ 'name' => 'PDBReadPool_pool_Wait',
'url' => 'puppetlabs.puppetdb.database:name=PDBReadPool.pool.Wait' },
{ 'name' => 'PDBWritePool_pool_ActiveConnections',
'url' => 'puppetlabs.puppetdb.database:name=PDBWritePool.pool.ActiveConnections' },
{ 'name' => 'PDBWritePool_pool_IdleConnections',
'url' => 'puppetlabs.puppetdb.database:name=PDBWritePool.pool.IdleConnections' },
{ 'name' => 'PDBWritePool_pool_PendingConnections',
'url' => 'puppetlabs.puppetdb.database:name=PDBWritePool.pool.PendingConnections' },
{ 'name' => 'PDBWritePool_pool_TotalConnections',
'url' => 'puppetlabs.puppetdb.database:name=PDBWritePool.pool.TotalConnections' },
{ 'name' => 'PDBWritePool_pool_Usage',
'url' => 'puppetlabs.puppetdb.database:name=PDBWritePool.pool.Usage' },
{ 'name' => 'PDBWritePool_pool_Wait',
'url' => 'puppetlabs.puppetdb.database:name=PDBWritePool.pool.Wait' },
]

$ha_sync_metrics = [
{ 'name' => 'ha_last-sync-succeeded',
'url' => 'puppetlabs.puppetdb.ha:name=last-sync-succeeded' },
{ 'name' => 'ha_seconds-since-last-successful-sync',
'url' => 'puppetlabs.puppetdb.ha:name=seconds-since-last-successful-sync' },
{ 'name' => 'ha_failed-request-counter',
'url' => 'puppetlabs.puppetdb.ha:name=failed-request-counter' },
{ 'name' => 'ha_sync-duration',
'url' => 'puppetlabs.puppetdb.ha:name=sync-duration' },
{ 'name' => 'ha_catalogs-sync-duration',
'url' => 'puppetlabs.puppetdb.ha:name=catalogs-sync-duration' },
{ 'name' => 'ha_reports-sync-duration',
'url' => 'puppetlabs.puppetdb.ha:name=reports-sync-duration' },
{ 'name' => 'ha_factsets-sync-duration',
'url' => 'puppetlabs.puppetdb.ha:name=factsets-sync-duration' },
{ 'name' => 'ha_nodes-sync-duration',
'url' => 'puppetlabs.puppetdb.ha:name=nodes-sync-duration' },
{ 'name' => 'ha_record-transfer-duration',
'url' => 'puppetlabs.puppetdb.ha:name=record-transfer-duration' },
]

# lint:ignore:140chars
$puppetdb_metrics = $facts['pe_server_version'] ? {
/^2015./ =>
$activemq_metrics,
/^2016\.[45]\./ =>
$activemq_metrics + $base_metrics + $base_metrics_through_4_2 + $storage_metrics + $connection_pool_metrics + $version_specific_metrics + $ha_sync_metrics,
/^2016./ =>
$activemq_metrics + $base_metrics + $base_metrics_through_4_2 + $storage_metrics + $connection_pool_metrics + $version_specific_metrics,
/^201[78]\./ =>
$activemq_metrics + $base_metrics + $storage_metrics + $connection_pool_metrics + $version_specific_metrics + $ha_sync_metrics,
default =>
$base_metrics + $storage_metrics + $connection_pool_metrics + $version_specific_metrics,
}
# lint:endignore

case $facts['os']['family'] {
'RedHat': {
$influx_db_service_name = 'influxdb'
Expand Down
41 changes: 41 additions & 0 deletions manifests/profile/compiler.pp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# @summary Apply this class to a master or compiler to collect puppetserver metrics
#
# @param timeout
# Deafault timeout of http calls. Defaults to 5 seconds
#
# @param compiler
# The FQDN of the compiler / master. Defaults to the FQDN of the server where the profile is applied
#
define puppet_metrics_dashboard::profile::compiler (
String[2] $timeout = $puppet_metrics_dashboard::params::http_response_timeout,
Variant[String,Tuple[String, Integer]] $compiler = $facts['networking']['fqdn'],
Integer[1] $port = 8140,
String[2] $interval = '5s',
){

telegraf::input { "puppetserver_metrics_${compiler}":
plugin_type => 'httpjson',
options => [{
'name' => 'puppet_stats',
'servers' => [ "https://${compiler}:${port}/status/v1/services?level=debug" ],
'method' => 'GET',
'insecure_skip_verify' => true,
'response_timeout' => $timeout,
}],
notify => Service['telegraf'],
require => Package['telegraf'],
}

telegraf::input { "pe_last_file_sync_${compiler}":
plugin_type => 'http',
options => [{
'urls' => [ "https://${compiler}:${port}/status/v1/services/file-sync-client-service?level=debug" ],
'insecure_skip_verify' => true,
'data_format' => 'json',
'json_string_fields' => ['status_repos_puppet-code_latest_commit_date'],
'timeout' => $timeout,
}],
notify => Service['telegraf'],
require => Package['telegraf'],
}
}
54 changes: 54 additions & 0 deletions manifests/profile/master/postgres.pp
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# @summary Aplly this class to an agent running pe-postgresql to collect postgres metrics
#
# @param query_interval
# How often to run the queries in minutes. Defaults to 10 minutes.
#
# @param postgres_host
# The FQDN of the postgres host. Defaults to the FQDN of the server where the profile is applied
#
define puppet_metrics_dashboard::profile::master::postgres (
Variant[String,Tuple[String, Integer]] $postgres_host = $facts['networking']['fqdn'],
String[2] $query_interval = $puppet_metrics_dashboard::params::pg_query_interval,
Integer[1] $port = 5432,
){

if ! defined(Puppet_metrics_dashboard::Certs['telegraf']) {
puppet_metrics_dashboard::certs{'telegraf':
notify => Service['telegraf'],
require => Package['telegraf'],
before => Service['telegraf'],
}
}

telegraf::input { "pe_postgres_${postgres_host}":
plugin_type => 'postgresql_extensible',
options => [{
'interval' => $query_interval,
'address' => "postgres://telegraf@${postgres_host}:${port}/pe-puppetdb?sslmode=require&sslkey=/etc/telegraf/${trusted['certname']}_key.pem&sslcert=/etc/telegraf/${trusted['certname']}_cert.pem&sslrootcert=/etc/telegraf/ca.pem",
'outputaddress' => $facts['networking']['fqdn'],
'databases' => ['pe-puppetdb','pe-rbac','pe-activity','pe-classifier'],
'query' => [{
'sqlquery' => 'SELECT * FROM pg_stat_database',
'version' => 901,
'withdbname' => false,
},{
'sqlquery' => 'SELECT relname as s_table, pg_relation_size(relid) as size FROM pg_catalog.pg_statio_user_tables ORDER BY pg_total_relation_size(relid) DESC',
'version' => 901,
'withdbname' => false,
'tagvalue' => 's_table',
},{
'sqlquery' => 'SELECT relname as v_table, autovacuum_count, vacuum_count, n_live_tup, n_dead_tup FROM pg_stat_user_tables',
'version' => 901,
'withdbname' => false,
'tagvalue' => 'v_table',
},{
'sqlquery' => 'SELECT relname as io_table, heap_blks_read, heap_blks_hit, idx_blks_read, idx_blks_hit, toast_blks_read, toast_blks_hit, tidx_blks_read, tidx_blks_hit FROM pg_statio_user_tables',
'version' => 901,
'withdbname' => false,
'tagvalue' => 'io_table',
}]
}],
notify => Service['telegraf'],
require => Package['telegraf'],
}
}
Loading

0 comments on commit 375b898

Please sign in to comment.