diff --git a/.travis.yml b/.travis.yml index fc29780c4b..538f7f849c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -64,6 +64,7 @@ env: - TRAVIS_FLAVOR=etcd - TRAVIS_FLAVOR=pgbouncer - TRAVIS_FLAVOR=supervisord + - TRAVIS_FLAVOR=phpfpm # Override travis defaults with empty jobs before_install: echo "OVERRIDING TRAVIS STEPS" diff --git a/Rakefile b/Rakefile index a33623d0fd..2963db7e7e 100755 --- a/Rakefile +++ b/Rakefile @@ -19,6 +19,7 @@ require './ci/mongo' require './ci/mysql' require './ci/nginx' require './ci/pgbouncer' +require './ci/phpfpm' require './ci/postgres' require './ci/rabbitmq' require './ci/redis' diff --git a/checks.d/php.py b/checks.d/php.py deleted file mode 100755 index 6b65d07e20..0000000000 --- a/checks.d/php.py +++ /dev/null @@ -1,123 +0,0 @@ -# stdlib -import re -import urllib2 -import urlparse - -# project -from util import headers -from checks import AgentCheck -from checks.utils import add_basic_auth - -class Php(AgentCheck): - """Tracks basic php-fpm metrics via the status module - * accepted conn - * listen queue - * max listen queue - * listen queue len - * idle processes - * active processes - * total processes - * max active processes - * max children reached - * slow requests - - Requires php-fpm pools to have the status option. - See http://www.php.net/manual/de/install.fpm.configuration.php#pm.status-path for more details - - """ - - def check(self, instance): - if 'php_status_url' not in instance: - raise Exception('php instance missing "php_status_url" value.') - tags = instance.get('tags', []) - - response, content_type = self._get_data(instance) - metrics = self.parse_text(response, tags) - - funcs = { - 'gauge': self.gauge, - 'rate': self.rate, - 'increment': self.increment - } - for row in metrics: - try: - name, value, tags, metric_type = row - func = funcs[metric_type] - func(name, value, tags) - except Exception: - self.log.error(u'Could not submit metric: %s' % repr(row)) - - def _get_data(self, instance): - url = instance.get('php_status_url') - req = urllib2.Request(url, None, headers(self.agentConfig)) - if 'php_status_user' in instance and 'php_status_password' in instance: - add_basic_auth(req, instance['php_status_user'], instance['php_status_password']) - - # Submit a service check for status page availability. - parsed_url = urlparse.urlparse(url) - php_ping_host = parsed_url.hostname - php_ping_port = parsed_url.port or 80 - service_check_name = 'php_status.can_connect' - service_check_tags = ['host:%s' % php_ping_host, 'port:%s' % php_ping_port] - try: - response = urllib2.urlopen(req) - except Exception: - self.service_check(service_check_name, AgentCheck.CRITICAL) - raise - else: - self.service_check(service_check_name, AgentCheck.OK) - - body = response.read() - resp_headers = response.info() - return body, resp_headers.get('Content-Type', 'text/plain') - - @classmethod - def parse_text(cls, response, tags): - - GAUGES = { - 'listen queue': 'php.listen_queue', - 'max listen queue': 'php.max_listen_queue', - 'listen queue len': 'php.listen_queue_len', - 'idle processes': 'php.idle_processes', - 'active processes': 'php.active_processes', - 'total processes': 'php.total_processes', - 'max active processes': 'php.max_active_processes', - 'max children reached': 'php.max_children_reached' - } - - RATES = { - - } - - INCREMENTS = { - 'accepted conn': 'php.accepted_conn', - 'slow requests': 'php.slow_requests' - } - - output = [] - # Loop through and extract the numerical values - for line in response.split('\n'): - values = line.split(': ') - if len(values) == 2: # match - metric, value = values - try: - value = float(value) - except ValueError: - continue - - # Send metric as a gauge, if applicable - if metric in GAUGES: - metric_name = GAUGES[metric] - output.append((metric_name, value, tags, 'gauge')) - - # Send metric as a rate, if applicable - if metric in RATES: - metric_name = RATES[metric] - output.append((metric_name, value, tags, 'rate')) - - # Send metric as a increment, if applicable - if metric in INCREMENTS: - metric_name = INCREMENTS[metric] - output.append((metric_name, value, tags, 'increment')) - - return output \ No newline at end of file diff --git a/checks.d/php_fpm.py b/checks.d/php_fpm.py new file mode 100644 index 0000000000..ac103000c7 --- /dev/null +++ b/checks.d/php_fpm.py @@ -0,0 +1,125 @@ +# 3p +import requests + +# project +from checks import AgentCheck +from util import headers + + +class PHPFPMCheck(AgentCheck): + """ + Tracks basic php-fpm metrics via the status module + Requires php-fpm pools to have the status option. + See http://www.php.net/manual/de/install.fpm.configuration.php#pm.status-path for more details + """ + + SERVICE_CHECK_NAME = 'php_fpm.can_ping' + + GAUGES = { + 'listen queue': 'php_fpm.listen_queue.size', + 'idle processes': 'php_fpm.processes.idle', + 'active processes': 'php_fpm.processes.active', + 'total processes': 'php_fpm.processes.total', + } + + RATES = { + 'max children reached': 'php_fpm.processes.max_reached' + } + + COUNTERS = { + 'accepted conn': 'php_fpm.requests.accepted', + 'slow requests': 'php_fpm.requests.slow' + } + + def check(self, instance): + status_url = instance.get('status_url') + ping_url = instance.get('ping_url') + + auth = None + user = instance.get('user') + password = instance.get('password') + + tags = instance.get('tags', []) + + if user and password: + auth = (user, password) + + if status_url is None and ping_url is None: + raise Exception("No status_url or ping_url specified for this instance") + + pool = None + status_exception = None + if status_url is not None: + try: + pool = self._process_status(status_url, auth, tags) + except Exception as e: + status_exception = e + pass + + if ping_url is not None: + self._process_ping(ping_url, auth, tags, pool) + + if status_exception is not None: + raise status_exception + + def _process_status(self, status_url, auth, tags): + data = {} + try: + # TODO: adding the 'full' parameter gets you per-process detailed + # informations, which could be nice to parse and output as metrics + resp = requests.get(status_url, auth=auth, + headers=headers(self.agentConfig), + params={'json': True}) + resp.raise_for_status() + + data = resp.json() + except Exception as e: + self.log.error("Failed to get metrics from {0}.\nError {1}".format(status_url, e)) + raise + + pool_name = data.get('pool', 'default') + metric_tags = tags + ["pool:{0}".format(pool_name)] + + for key, mname in self.GAUGES.iteritems(): + if key not in data: + self.log.warn("Gauge metric {0} is missing from FPM status".format(key)) + continue + self.gauge(mname, int(data[key]), tags=metric_tags) + + for key, mname in self.RATES.iteritems(): + if key not in data: + self.log.warn("Rate metric {0} is missing from FPM status".format(key)) + continue + self.rate(mname, int(data[key]), tags=metric_tags) + + for key, mname in self.COUNTERS.iteritems(): + if key not in data: + self.log.warn("Counter metric {0} is missing from FPM status".format(key)) + continue + self.increment(mname, int(data[key]), tags=metric_tags) + + # return pool, to tag the service check with it if we have one + return pool_name + + def _process_ping(self, ping_url, auth, tags, pool_name): + sc_tags = tags[:] + if pool_name is not None: + sc_tags.append("pool:{0}".format(pool_name)) + + try: + # TODO: adding the 'full' parameter gets you per-process detailed + # informations, which could be nice to parse and output as metrics + resp = requests.get(ping_url, auth=auth, + headers=headers(self.agentConfig)) + resp.raise_for_status() + + if 'pong' not in resp.text: + raise Exception("Received unexpected reply to ping {0}".format(resp.text)) + + except Exception as e: + self.log.error("Failed to ping FPM pool {0} on URL {1}." + "\nError {2}".format(pool_name, ping_url, e)) + self.service_check(self.SERVICE_CHECK_NAME, + AgentCheck.CRITICAL, tags=sc_tags, message=str(e)) + else: + self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.OK, tags=sc_tags) diff --git a/checks.d/php_ping.py b/checks.d/php_ping.py deleted file mode 100755 index 81300ea110..0000000000 --- a/checks.d/php_ping.py +++ /dev/null @@ -1,70 +0,0 @@ -# stdlib -import re -import urllib2 -import urlparse - -# project -from util import headers -from checks import AgentCheck -from checks.utils import add_basic_auth - -class PhpPing(AgentCheck): - """Monitors php-fpm status via ping-url - - Requires php-fpm pools to have the status option. - See http://www.php.net/manual/de/install.fpm.configuration.php#ping.path for more details - - """ - - def check(self, instance): - if 'php_ping_url' not in instance: - raise Exception('php instance missing "php_ping_url" value.') - tags = instance.get('tags', []) - - response, content_type = self._get_data(instance) - metrics = self.parse_status(response, tags) - - funcs = { - 'gauge': self.gauge, - 'rate': self.rate - } - for row in metrics: - try: - name, value, tags, metric_type = row - func = funcs[metric_type] - func(name, value, tags) - except Exception: - self.log.error(u'Could not submit metric: %s' % repr(row)) - - def _get_data(self, instance): - url = instance.get('php_ping_url') - req = urllib2.Request(url, None, headers(self.agentConfig)) - if 'php_ping_user' in instance and 'php_ping_password' in instance: - add_basic_auth(req, instance['php_ping_user'], instance['php_ping_password']) - - # Submit a service check for status page availability. - parsed_url = urlparse.urlparse(url) - php_ping_host = parsed_url.hostname - php_ping_port = parsed_url.port or 80 - service_check_name = 'php_ping.can_connect' - service_check_tags = ['host:%s' % php_ping_host, 'port:%s' % php_ping_port] - try: - response = urllib2.urlopen(req) - except Exception: - self.service_check(service_check_name, AgentCheck.CRITICAL) - raise - else: - self.service_check(service_check_name, AgentCheck.OK) - - body = response.read() - resp_headers = response.info() - return body, resp_headers.get('Content-Type', 'text/plain') - - @classmethod - def parse_status(cls, raw, tags): - output = [] - parsed = re.search(r'pong', raw) - if parsed: - output.append(('php.ping', 1, tags, 'gauge')) - - return output diff --git a/ci/phpfpm.rb b/ci/phpfpm.rb new file mode 100644 index 0000000000..29d150db2d --- /dev/null +++ b/ci/phpfpm.rb @@ -0,0 +1,85 @@ +require './ci/common' + +def nginx_version + ENV['NGINX_VERSION'] || '1.7.9' +end + +def php_version + ENV['PHP_VERSION'] || '5.6.6' +end + +def phpfpm_rootdir + "#{ENV['INTEGRATIONS_DIR']}/phpfpm_#{php_version}" +end + +namespace :ci do + namespace :phpfpm do |flavor| + task :before_install => ['ci:common:before_install'] + + task :install => ['ci:common:install'] do + unless Dir.exist? File.expand_path(phpfpm_rootdir) + sh %(curl -s -L\ + -o $VOLATILE_DIR/nginx-#{nginx_version}.tar.gz\ + http://nginx.org/download/nginx-#{nginx_version}.tar.gz) + sh %(mkdir -p #{phpfpm_rootdir}) + sh %(mkdir -p $VOLATILE_DIR/nginx) + sh %(tar zxf $VOLATILE_DIR/nginx-#{nginx_version}.tar.gz\ + -C $VOLATILE_DIR/nginx --strip-components=1) + sh %(cd $VOLATILE_DIR/nginx\ + && ./configure --prefix=#{phpfpm_rootdir} --with-http_stub_status_module\ + && make -j $CONCURRENCY\ + && make install) + sh %(curl -s -L\ + -o $VOLATILE_DIR/php-#{php_version}.tar.bz2\ + http://us1.php.net/get/php-#{php_version}.tar.bz2/from/this/mirror) + sh %(mkdir -p $VOLATILE_DIR/php) + sh %(tar jxf $VOLATILE_DIR/php-#{php_version}.tar.bz2\ + -C $VOLATILE_DIR/php --strip-components=1) + sh %(cd $VOLATILE_DIR/php\ + && ./configure --prefix=#{phpfpm_rootdir} --enable-fpm\ + && make -j $CONCURRENCY\ + && make install) + end + end + + task :before_script => ['ci:common:before_script'] do + sh %(cp $TRAVIS_BUILD_DIR/ci/resources/phpfpm/nginx.conf\ + #{phpfpm_rootdir}/conf/nginx.conf) + sh %(cp $TRAVIS_BUILD_DIR/ci/resources/phpfpm/php-fpm.conf\ + #{phpfpm_rootdir}/etc/php-fpm.conf) + sh %(#{phpfpm_rootdir}/sbin/nginx -g "pid #{ENV['VOLATILE_DIR']}/nginx.pid;") + sh %(#{phpfpm_rootdir}/sbin/php-fpm -g #{ENV['VOLATILE_DIR']}/php-fpm.pid) + end + + task :script => ['ci:common:script'] do + this_provides = [ + 'phpfpm' + ] + Rake::Task['ci:common:run_tests'].invoke(this_provides) + end + + task :cleanup => ['ci:common:cleanup'] do + sh %(kill `cat $VOLATILE_DIR/nginx.pid`) + sh %(kill `cat $VOLATILE_DIR/php-fpm.pid`) + end + + task :execute do + exception = nil + begin + %w(before_install install before_script script).each do |t| + Rake::Task["#{flavor.scope.path}:#{t}"].invoke + end + rescue => e + exception = e + puts "Failed task: #{e.class} #{e.message}".red + end + if ENV['SKIP_CLEANUP'] + puts 'Skipping cleanup, disposable environments are great'.yellow + else + puts 'Cleaning up' + Rake::Task["#{flavor.scope.path}:cleanup"].invoke + end + fail exception if exception + end + end +end diff --git a/ci/resources/phpfpm/nginx.conf b/ci/resources/phpfpm/nginx.conf new file mode 100644 index 0000000000..49b994bd95 --- /dev/null +++ b/ci/resources/phpfpm/nginx.conf @@ -0,0 +1,21 @@ +worker_processes 1; +events { + worker_connections 1024; +} +http { + include mime.types; + default_type application/octet-stream; + sendfile on; + keepalive_timeout 65; + server { + listen 42424; + server_name localhost; + location ~ /(status|ping|\.*\.php)$ { + root html; + fastcgi_pass 127.0.0.1:9000; + fastcgi_index index.php; + fastcgi_param SCRIPT_FILENAME /scripts$fastcgi_script_name; + include fastcgi_params; + } + } +} diff --git a/ci/resources/phpfpm/php-fpm.conf b/ci/resources/phpfpm/php-fpm.conf new file mode 100644 index 0000000000..f9573d37f3 --- /dev/null +++ b/ci/resources/phpfpm/php-fpm.conf @@ -0,0 +1,11 @@ +[www] +user = nobody +group = nobody +listen = 127.0.0.1:9000 +pm = dynamic +pm.max_children = 5 +pm.start_servers = 2 +pm.min_spare_servers = 1 +pm.max_spare_servers = 3 +pm.status_path = /status +ping.path = /ping diff --git a/conf.d/php.yaml.example b/conf.d/php.yaml.example deleted file mode 100755 index 9ef53ad525..0000000000 --- a/conf.d/php.yaml.example +++ /dev/null @@ -1,8 +0,0 @@ -init_config: - -instances: - - php_status_url: http://example.com/php_status - # php_status_user: example_user - # php_status_password: example_password - tags: - - instance:foo diff --git a/conf.d/php_fpm.yaml.example b/conf.d/php_fpm.yaml.example new file mode 100755 index 0000000000..ad2cdcffd6 --- /dev/null +++ b/conf.d/php_fpm.yaml.example @@ -0,0 +1,24 @@ +init_config: + +instances: + - # Get metrics from your FPM pool with this URL + status_url: http://localhost/status + # Get a reliable service check of you FPM pool with that one + ping_url: http://localhost/ping + # These 2 URLs should follow the options from your FPM pool + # See http://php.net/manual/en/install.fpm.configuration.php + # * pm.status_path + # * ping.path + # You should configure your fastcgi passthru (nginx/apache) to + # catch these URLs and redirect them through the FPM pool target + # you want to monitor (FPM `listen` directive in the config, usually + # a UNIX socket or TCP socket. + # + # Use this if you have basic authentication on these pages + # user: bits + # password: D4T4D0G + # + # Array of custom tags + # By default metrics and service check will be tagged by pool and host + # tags: + # - instance:foo diff --git a/tests/test_php_fpm.py b/tests/test_php_fpm.py new file mode 100644 index 0000000000..fa613030f5 --- /dev/null +++ b/tests/test_php_fpm.py @@ -0,0 +1,90 @@ +# stdlib +import time + +# 3p +from nose.plugins.attrib import attr + +# project +from checks import AgentCheck +from tests.common import AgentCheckTest + +# sample from /status?json +# { +# "accepted conn": 350, +# "active processes": 1, +# "idle processes": 2, +# "listen queue": 0, +# "listen queue len": 0, +# "max active processes": 2, +# "max children reached": 0, +# "max listen queue": 0, +# "pool": "www", +# "process manager": "dynamic", +# "slow requests": 0, +# "start since": 4758, +# "start time": 1426601833, +# "total processes": 3 +# } + + +@attr(requires='phpfpm') +class PHPFPMCheckTest(AgentCheckTest): + CHECK_NAME = 'php_fpm' + + def test_bad_status(self): + instance = { + 'status_url': 'http://localhost:9001/status', + 'tags': ['expectedbroken'] + } + + self.assertRaises(Exception, self.run_check, {'instances': [instance]}) + + def test_bad_ping(self): + instance = { + 'ping_url': 'http://localhost:9001/status', + 'tags': ['expectedbroken'] + } + + self.run_check({'instances': [instance]}) + self.assertServiceCheck( + 'php_fpm.can_ping', + status=AgentCheck.CRITICAL, + tags=['expectedbroken'], + count=1 + ) + + self.coverage_report() + + def test_status(self): + instance = { + 'status_url': 'http://localhost:42424/status', + 'ping_url': 'http://localhost:42424/ping', + 'tags': ['cluster:forums'] + } + + self.run_check({'instances': [instance]}) + + metrics = [ + 'php_fpm.listen_queue.size', + 'php_fpm.processes.idle', + 'php_fpm.processes.active', + 'php_fpm.processes.total', + 'php_fpm.requests.slow', + 'php_fpm.requests.accepted', + ] + + expected_tags = ['cluster:forums', 'pool:www'] + + for mname in metrics: + self.assertMetric(mname, count=1, tags=expected_tags) + + self.assertMetric('php_fpm.processes.idle', count=1, value=1) + self.assertMetric('php_fpm.processes.total', count=1, value=2) + + self.assertServiceCheck('php_fpm.can_ping', status=AgentCheck.OK, + count=1) + time.sleep(1) + + # Run check second time to get the rate + self.run_check({'instances': [instance]}) + self.assertMetric('php_fpm.processes.max_reached', count=1)