diff --git a/docs/Configuration.md b/docs/Configuration.md index fe590cb73..a4778c51c 100644 --- a/docs/Configuration.md +++ b/docs/Configuration.md @@ -68,17 +68,19 @@ Each exporter additionally takes a unique set of environment variables to furthe The job of the commit time exporter is to find relevant builds in OpenShift and associate a commit from the build's source code repository with a container image built from that commit. We capture a timestamp for the commit, and the resulting image hash, so that the Deploy Time Exporter can later associate that image with a production deployment. -In order for proper collection, we require that all builds associated with a particular application be labelled with the same `app.kubernetes.io/name=` label. +In order for proper collection, we require that all builds associated with a particular application be labelled with the same `app.kubernetes.io/name=` label. + +Currently we only support GitHub. Open an issue or a pull request to request support for additional Git providers. (BitBucket and GitLab are on our roadmap.) #### Suggested Secrets -Create a secret containing your GitHub token. +Create a secret containing your Git username and token. - oc create secret generic github-secret --from-literal=GITHUB_USER= --from-literal=GITHUB_TOKEN= -n pelorus + oc create secret generic github-secret --from-literal=GIT_USER= --from-literal=GIT_TOKEN= -n pelorus -Create a secret containing your GitHub token and GitHub Enterprise API. An API example is `github.mycompany.com/api/v3` +Create a secret containing your Git username, token, and API. An API example is `github.mycompany.com/api/v3` - oc create secret generic github-secret --from-literal=GITHUB_USER= --from-literal=GITHUB_TOKEN= --from-literal=GITHUB_API= -n pelorus + oc create secret generic github-secret --from-literal=GIT_USER= --from-literal=GIT_TOKEN= --from-literal=GIT_API= -n pelorus #### Sample Values @@ -98,16 +100,21 @@ exporters: #### Environment Variables -This exporter supports several configuration options, passed via environment variables +This exporter provides several configuration options, passed via environment variables. + | Variable | Required | Explanation | Default Value | |---|---|---|---| +| `GIT_USER` | yes | User's github username | unset | +| `GIT_TOKEN` | yes | User's Github API Token | unset | +| `GIT_API` | no | Github API FQDN. This allows the override for Github Enterprise users. | `api.github.com` | | `LOG_LEVEL` | no | Set the log level. One of `DEBUG`, `INFO`, `WARNING`, `ERROR` | `INFO` | | `APP_LABEL` | no | Changes the label key used to identify applications | `app.kubernetes.io/name` | | `NAMESPACES` | no | Restricts the set of namespaces from which metrics will be collected. ex: `myapp-ns-dev,otherapp-ci` | unset; scans all namespaces | -| `GITHUB_USER` | yes | User's github username | unset | -| `GITHUB_TOKEN` | yes | User's Github API Token | unset | -| `GITHUB_API` | yes | Github API FQDN. This allows the override for Github Enterprise users. | `api.github.com` | +| DEPRECATED `GITHUB_USER` | no | User's github username | unset | +| DEPRECATED `GITHUB_TOKEN` | no | User's Github API Token | unset | +| DEPRECATED `GITHUB_API` | no | Github API FQDN. This allows the override for Github Enterprise users. | `api.github.com` | + ### Deploy Time Exporter @@ -118,7 +125,7 @@ In order for proper collection, we require that all deployments associated with #### Environment Variables -This exporter supports several configuration options, passed via environment variables +This exporter provides several configuration options, passed via environment variables | Variable | Required | Explanation | Default Value | |---|---|---|---| @@ -144,7 +151,7 @@ Create a secret containing your Jira information. #### Environment Variables -This exporter supports several configuration options, passed via environment variables +This exporter provides several configuration options, passed via environment variables | Variable | Required | Explanation | Default Value | |---|---|---|---| diff --git a/exporters/committime/app.py b/exporters/committime/app.py index 5c1b25e26..ee624ff50 100755 --- a/exporters/committime/app.py +++ b/exporters/committime/app.py @@ -1,20 +1,16 @@ #!/usr/bin/python3 -import json -import logging +from collector_bitbucket import BitbucketCommitCollector +from collector_gitlab import GitLabCommitCollector +from collector_github import GitHubCommitCollector import os import pelorus -import requests -import re import time -from datetime import datetime -from jsonpath_ng import parse from kubernetes import client - from openshift.dynamic import DynamicClient from prometheus_client import start_http_server -from prometheus_client.core import GaugeMetricFamily, REGISTRY +from prometheus_client.core import REGISTRY -REQUIRED_CONFIG = ['GITHUB_USER', 'GITHUB_TOKEN'] +REQUIRED_CONFIG = ['GIT_USER', 'GIT_TOKEN'] pelorus.load_kube_config() k8s_config = client.Configuration() @@ -22,233 +18,33 @@ dyn_client = DynamicClient(k8s_client) -class CommitCollector(object): - - def __init__(self, username, token, namespaces, apps, git_api=None): - self._username = username - self._token = token - self._namespaces = namespaces - self._apps = apps - self._git_api = git_api - self.commit_dict = {} - - def collect(self): - commit_metric = GaugeMetricFamily('github_commit_timestamp', - 'Commit timestamp', labels=['namespace', 'app', 'image_sha']) - commit_metrics = self.generate_commit_metrics_list(self._namespaces) - for my_metric in commit_metrics: - logging.info("Namespace: %s, App: %s, Build: %s, Timestamp: %s" - % ( - my_metric.namespace, - my_metric.name, - my_metric.build_name, - str(float(my_metric.commit_timestamp)) - ) - ) - commit_metric.add_metric([my_metric.namespace, my_metric.name, my_metric.image_hash], - my_metric.commit_timestamp) - yield commit_metric - - def match_image_id(self, replicationController, image_hash): - for container in replicationController.spec.template.spec.containers: - con_image = container.image - image_tokens = con_image.split('@') - if len(image_tokens) < 2: - return False - con_image_hash = image_tokens[1] - if con_image_hash == image_hash: - return True - return False - - def convert_date_time_offset_to_timestamp(self, date_time): - date_time = date_time[:-4] - timestamp = datetime.strptime(date_time, '%Y-%m-%d %H:%M:%S %z') - unixformattime = timestamp.timestamp() - return unixformattime - - def convert_timestamp_to_date_time(self, timestamp): - date_time = datetime(timestamp) - return date_time.strftime('%Y-%m-%dT%H:%M:%SZ') - - def generate_commit_metrics_list(self, namespaces): - - if not namespaces: - logging.info("No namespaces specified, watching all namespaces") - v1_namespaces = dyn_client.resources.get(api_version='v1', kind='Namespace') - namespaces = [namespace.metadata.name for namespace in v1_namespaces.get().items] - else: - logging.info("Watching namespaces: %s" % (namespaces)) - - # Initialize metrics list - metrics = [] - for namespace in namespaces: - # Initialized variables - builds = [] - apps = [] - builds_by_app = {} - app_label = pelorus.get_app_label() - logging.info("Searching for builds with label: %s in namespace: %s" % (app_label, namespace)) - - v1_builds = dyn_client.resources.get(api_version='build.openshift.io/v1', kind='Build') - # only use builds that have the app label - builds = v1_builds.get(namespace=namespace, label_selector=app_label) - - # use a jsonpath expression to find all values for the app label - jsonpath_str = "$['items'][*]['metadata']['labels']['" + str(app_label) + "']" - jsonpath_expr = parse(jsonpath_str) - - found = jsonpath_expr.find(builds) - - apps = [match.value for match in found] - - if not apps: - continue - # remove duplicates - apps = list(dict.fromkeys(apps)) - builds_by_app = {} - - for app in apps: - builds_by_app[app] = list(filter(lambda b: b.metadata.labels[app_label] == app, builds.items)) - - metrics += self.get_metrics_from_apps(builds_by_app, namespace) - - return metrics - - # get_metrics_from_apps - expects a sorted array of build data sorted by app label - def get_metrics_from_apps(self, apps, namespace): - metrics = [] - for app in apps: - - builds = apps[app] - - jenkins_builds = list(filter(lambda b: b.spec.strategy.type == 'JenkinsPipeline', builds)) - code_builds = list(filter(lambda b: b.spec.strategy.type in ['Source', 'Binary'], builds)) - - # assume for now that there will only be one repo/branch per app - # For jenkins pipelines, we need to grab the repo data - # then find associated s2i/docker builds from which to pull commit & image data - repo_url = None - if jenkins_builds: - # we will default to the repo listed in '.spec.source.git' - repo_url = jenkins_builds[0].spec.source.git.uri - - # however, in cases where the Jenkinsfile and source code are separate, we look for params - git_repo_regex = re.compile(r"(\w+://)(.+@)*([\w\d\.]+)(:[\d]+){0,1}/*(.*)") - for env in jenkins_builds[0].spec.strategy.jenkinsPipelineStrategy.env: - result = git_repo_regex.match(env.value) - if result: - repo_url = env.value - - for build in code_builds: - try: - metric = self.get_metric_from_build(build, app, namespace, repo_url) - except Exception: - logging.error("Cannot collect metrics from build: %s" % (build.metadata.name)) - - if metric: - metrics.append(metric) - return metrics - - def get_metric_from_build(self, build, app, namespace, repo_url): - try: - - metric = CommitTimeMetric(app, self._git_api) - - if build.spec.source.git: - repo_url = build.spec.source.git.uri - - metric.repo_url = repo_url - commit_sha = build.spec.revision.git.commit - metric.build_name = build.metadata.name - metric.build_config_name = build.metadata.labels.buildconfig - metric.namespace = build.metadata.namespace - labels = build.metadata.labels - metric.labels = json.loads(str(labels).replace("\'", "\"")) - - metric.commit_hash = commit_sha - metric.name = app + '-' + commit_sha - metric.commiter = build.spec.revision.git.author.name - metric.image_location = build.status.outputDockerImageReference - metric.image_hash = build.status.output.to.imageDigest - # Check the cache for the commit_time, if not call the API - metric_ts = self.commit_dict.get(commit_sha) - if metric_ts is None: - logging.debug("sha: %s, commit_timestamp not found in cache, executing API call." % (commit_sha)) - metric.getCommitTime() - # If commit time is None, then we could not get the value from the API - # This is due to calling a non Github API. - if metric.commit_time is None: - return None - # Add the timestamp to the cache - self.commit_dict[metric.commit_hash] = metric.commit_timestamp - else: - metric.commit_timestamp = self.commit_dict[commit_sha] - logging.debug("Returning sha: %s, commit_timestamp: %s, from cache." % ( - commit_sha, metric.commit_timestamp)) - - return metric - - except Exception as e: - logging.warning("Build %s/%s in app %s is missing required attributes to collect data. Skipping." - % (namespace, build.metadata.name, app)) - logging.debug(e, exc_info=True) - return None - - -class CommitTimeMetric: - _prefix_pattern = "https://%s/repos/" - _defaultapi = "api.github.com" - _prefix = _prefix_pattern % _defaultapi - _suffix = "/commits/" - - def __init__(self, app_name, _gitapi=None): - self.name = app_name - self.labels = None - self.repo_url = None - self.commiter = None - self.commit_hash = None - self.commit_time = None - self.commit_timestamp = None - self.build_name = None - self.build_config_name = None - self.image_location = None - self.image_name = None - self.image_tag = None - self.image_hash = None - if _gitapi is not None and len(_gitapi) > 0: - logging.info("Using non-default GitHub API: %s" % (_gitapi)) - self._prefix = self._prefix_pattern % _gitapi - - def getCommitTime(self): - myurl = self.repo_url - url_tokens = myurl.split("/") - url = self._prefix + url_tokens[3] + "/" + url_tokens[4].split(".")[0] + self._suffix + self.commit_hash - response = requests.get(url, auth=(username, token)) - if response.status_code != 200: - # This will occur when trying to make an API call to non-Github - logging.warning("Unable to retrieve commit time for build: %s, hash: %s, url: %s. Got http code: %s" % ( - self.build_name, self.commit_hash, url_tokens[2], str(response.status_code))) - else: - commit = response.json() - try: - self.commit_time = commit['commit']['committer']['date'] - self.commit_timestamp = pelorus.convert_date_time_to_timestamp(self.commit_time) - except Exception: - logging.error("Failed processing commit time for build %s" % self.build_name, exc_info=True) - logging.debug(commit) - raise +class GitFactory: + @staticmethod + def getCollector(username, token, namespaces, apps, git_api, git_provider): + if git_provider == "gitlab": + return GitLabCommitCollector("", "", "", "") + if git_provider == "github": + return GitHubCommitCollector(username, token, namespaces, apps, git_api) + if git_provider == "bitbucket": + return BitbucketCommitCollector("", "", "", "") if __name__ == "__main__": + pelorus.check_legacy_vars() pelorus.check_required_config(REQUIRED_CONFIG) - username = os.environ.get('GITHUB_USER') - token = os.environ.get('GITHUB_TOKEN') - git_api = os.environ.get('GITHUB_API') + + username = os.environ.get('GIT_USER') + token = os.environ.get('GIT_TOKEN') + git_api = os.environ.get('GIT_API') + git_provider = os.environ.get('GIT_PROVIDER', pelorus.DEFAULT_GIT) namespaces = None if os.environ.get('NAMESPACES') is not None: namespaces = [proj.strip() for proj in os.environ.get('NAMESPACES').split(",")] apps = None start_http_server(8080) - REGISTRY.register(CommitCollector(username, token, namespaces, apps, git_api)) + + collector = GitFactory.getCollector(username, token, namespaces, apps, git_api, git_provider) + REGISTRY.register(collector) + while True: time.sleep(1) diff --git a/exporters/committime/collector_base.py b/exporters/committime/collector_base.py new file mode 100644 index 000000000..ca0356314 --- /dev/null +++ b/exporters/committime/collector_base.py @@ -0,0 +1,193 @@ +from abc import abstractmethod +import json +import logging +import pelorus +import re +from jsonpath_ng import parse +from prometheus_client.core import GaugeMetricFamily +from kubernetes import client +from openshift.dynamic import DynamicClient + +pelorus.load_kube_config() +k8s_config = client.Configuration() +k8s_client = client.api_client.ApiClient(configuration=k8s_config) +dyn_client = DynamicClient(k8s_client) + + +class AbstractCommitCollector(pelorus.AbstractPelorusExporter): + """ + Base class for a CommitCollector. + This class should be extended for the system which contains the commit information. + """ + + def __init__(self, username, token, namespaces, apps, git_api=None): + """Constructor""" + self._username = username + self._token = token + self._namespaces = namespaces + self._apps = apps + self._git_api = git_api + self._commit_dict = {} + + def collect(self): + commit_metric = GaugeMetricFamily('github_commit_timestamp', + 'Commit timestamp', labels=['namespace', 'app', 'image_sha']) + commit_metrics = self.generate_metrics() + for my_metric in commit_metrics: + logging.info("Namespace: %s, App: %s, Build: %s, Timestamp: %s" + % ( + my_metric.namespace, + my_metric.name, + my_metric.build_name, + str(float(my_metric.commit_timestamp)) + ) + ) + commit_metric.add_metric([my_metric.namespace, my_metric.name, my_metric.image_hash], + my_metric.commit_timestamp) + yield commit_metric + + def generate_metrics(self): + """Method called by the collect to create a list of metrics to publish""" + # This will loop and look at OCP builds (calls get_git_commit_time) + if not self._namespaces: + logging.info("No namespaces specified, watching all namespaces") + v1_namespaces = dyn_client.resources.get(api_version='v1', kind='Namespace') + self._namespaces = [namespace.metadata.name for namespace in v1_namespaces.get().items] + else: + logging.info("Watching namespaces: %s" % (self._namespaces)) + + # Initialize metrics list + metrics = [] + for namespace in self._namespaces: + # Initialized variables + builds = [] + apps = [] + builds_by_app = {} + app_label = pelorus.get_app_label() + logging.info("Searching for builds with label: %s in namespace: %s" % (app_label, namespace)) + + v1_builds = dyn_client.resources.get(api_version='build.openshift.io/v1', kind='Build') + # only use builds that have the app label + builds = v1_builds.get(namespace=namespace, label_selector=app_label) + + # use a jsonpath expression to find all values for the app label + jsonpath_str = "$['items'][*]['metadata']['labels']['" + str(app_label) + "']" + jsonpath_expr = parse(jsonpath_str) + + found = jsonpath_expr.find(builds) + + apps = [match.value for match in found] + + if not apps: + continue + # remove duplicates + apps = list(dict.fromkeys(apps)) + builds_by_app = {} + + for app in apps: + builds_by_app[app] = list(filter(lambda b: b.metadata.labels[app_label] == app, builds.items)) + + metrics += self.get_metrics_from_apps(builds_by_app, namespace) + + return metrics + pass + + @abstractmethod + def get_commit_time(self): + # This will perform the API calls and parse out the necessary fields into metrics + pass + + # get_metrics_from_apps - expects a sorted array of build data sorted by app label + def get_metrics_from_apps(self, apps, namespace): + metrics = [] + for app in apps: + + builds = apps[app] + + jenkins_builds = list(filter(lambda b: b.spec.strategy.type == 'JenkinsPipeline', builds)) + code_builds = list(filter(lambda b: b.spec.strategy.type in ['Source', 'Binary'], builds)) + + # assume for now that there will only be one repo/branch per app + # For jenkins pipelines, we need to grab the repo data + # then find associated s2i/docker builds from which to pull commit & image data + repo_url = None + if jenkins_builds: + # we will default to the repo listed in '.spec.source.git' + repo_url = jenkins_builds[0].spec.source.git.uri + + # however, in cases where the Jenkinsfile and source code are separate, we look for params + git_repo_regex = re.compile(r"(\w+://)(.+@)*([\w\d\.]+)(:[\d]+){0,1}/*(.*)") + for env in jenkins_builds[0].spec.strategy.jenkinsPipelineStrategy.env: + result = git_repo_regex.match(env.value) + if result: + repo_url = env.value + + for build in code_builds: + try: + metric = self.get_metric_from_build(build, app, namespace, repo_url) + except Exception: + logging.error("Cannot collect metrics from build: %s" % (build.metadata.name)) + + if metric: + metrics.append(metric) + return metrics + + def get_metric_from_build(self, build, app, namespace, repo_url): + try: + + metric = CommitMetric(app, self._git_api) + + if build.spec.source.git: + repo_url = build.spec.source.git.uri + metric.repo_url = repo_url + commit_sha = build.spec.revision.git.commit + metric.build_name = build.metadata.name + metric.build_config_name = build.metadata.labels.buildconfig + metric.namespace = build.metadata.namespace + labels = build.metadata.labels + metric.labels = json.loads(str(labels).replace("\'", "\"")) + + metric.commit_hash = commit_sha + metric.name = app + '-' + commit_sha + metric.commiter = build.spec.revision.git.author.name + metric.image_location = build.status.outputDockerImageReference + metric.image_hash = build.status.output.to.imageDigest + # Check the cache for the commit_time, if not call the API + metric_ts = self._commit_dict.get(commit_sha) + if metric_ts is None: + logging.debug("sha: %s, commit_timestamp not found in cache, executing API call." % (commit_sha)) + metric.commit_time = self.get_commit_time(metric) + # If commit time is None, then we could not get the value from the API + if metric.commit_time is None: + return None + # Add the timestamp to the cache + self._commit_dict[metric.commit_hash] = metric.commit_timestamp + else: + metric.commit_timestamp = self._commit_dict[commit_sha] + logging.debug("Returning sha: %s, commit_timestamp: %s, from cache." % ( + commit_sha, metric.commit_timestamp)) + + return metric + + except Exception as e: + logging.warning("Build %s/%s in app %s is missing required attributes to collect data. Skipping." + % (namespace, build.metadata.name, app)) + logging.debug(e, exc_info=True) + return None + + +class CommitMetric(): + def __init__(self, app_name, _gitapi): + self.name = app_name + self.labels = None + self.repo_url = None + self.commiter = None + self.commit_hash = None + self.commit_time = None + self.commit_timestamp = None + self.build_name = None + self.build_config_name = None + self.image_location = None + self.image_name = None + self.image_tag = None + self.image_hash = None diff --git a/exporters/committime/collector_bitbucket.py b/exporters/committime/collector_bitbucket.py new file mode 100644 index 000000000..64d9600ac --- /dev/null +++ b/exporters/committime/collector_bitbucket.py @@ -0,0 +1,11 @@ +from collector_base import AbstractCommitCollector + + +class BitbucketCommitCollector(AbstractCommitCollector): + + def __init__(self, username, token, namespaces, apps): + super().__init__(username, token, namespaces, apps) + + def get_commit_time(self): + """Method called to collect data and send to Prometheus""" + print("BitBucket is not yet supported") diff --git a/exporters/committime/collector_github.py b/exporters/committime/collector_github.py new file mode 100644 index 000000000..1dd4934c5 --- /dev/null +++ b/exporters/committime/collector_github.py @@ -0,0 +1,40 @@ +from collector_base import AbstractCommitCollector +import logging +import pelorus +import requests + + +class GitHubCommitCollector(AbstractCommitCollector): + _prefix_pattern = "https://%s/repos/" + _defaultapi = "api.github.com" + _prefix = _prefix_pattern % _defaultapi + _suffix = "/commits/" + + def __init__(self, username, token, namespaces, apps, git_api=None): + super().__init__(username, token, namespaces, apps, git_api) + if self._git_api is not None and len(self._git_api) > 0: + logging.info("Using non-default API: %s" % (self._git_api)) + else: + self._git_api = self._defaultapi + self._prefix = self._prefix_pattern % self._git_api + + def get_commit_time(self, metric): + """Method called to collect data and send to Prometheus""" + myurl = metric.repo_url + url_tokens = myurl.split("/") + url = self._prefix + url_tokens[3] + "/" + url_tokens[4].split(".")[0] + self._suffix + metric.commit_hash + response = requests.get(url, auth=(self._username, self._token)) + if response.status_code != 200: + # This will occur when trying to make an API call to non-Github + logging.warning("Unable to retrieve commit time for build: %s, hash: %s, url: %s. Got http code: %s" % ( + metric.build_name, metric.commit_hash, url_tokens[2], str(response.status_code))) + else: + commit = response.json() + try: + metric.commit_time = commit['commit']['committer']['date'] + metric.commit_timestamp = pelorus.convert_date_time_to_timestamp(metric.commit_time) + except Exception: + logging.error("Failed processing commit time for build %s" % metric.build_name, exc_info=True) + logging.debug(commit) + raise + return metric diff --git a/exporters/committime/collector_gitlab.py b/exporters/committime/collector_gitlab.py new file mode 100644 index 000000000..724eb5216 --- /dev/null +++ b/exporters/committime/collector_gitlab.py @@ -0,0 +1,11 @@ +from collector_base import AbstractCommitCollector + + +class GitLabCommitCollector(AbstractCommitCollector): + + def __init__(self, username, token, namespaces, apps): + super().__init__(username, token, namespaces, apps) + + def get_commit_time(self): + """Method called to collect data and send to Prometheus""" + print("GitLab is not yet supported") diff --git a/exporters/pelorus.py b/exporters/pelorus.py index bac8cbf50..864ae99a0 100644 --- a/exporters/pelorus.py +++ b/exporters/pelorus.py @@ -1,3 +1,4 @@ +from abc import ABC import logging import os import sys @@ -9,6 +10,7 @@ DEFAULT_LOG_LEVEL = 'INFO' DEFAULT_LOG_FORMAT = '%(asctime)-15s %(levelname)-8s %(message)s' DEFAULT_LOG_DATE_FORMAT = '%m-%d-%Y %H:%M:%S' +DEFAULT_GIT = "github" loglevel = os.getenv('LOG_LEVEL', DEFAULT_LOG_LEVEL) numeric_level = getattr(logging, loglevel.upper(), None) @@ -57,3 +59,23 @@ def check_required_config(vars): if missing_configs: logging.error("This program will exit.") sys.exit(1) + + +def check_legacy_vars(): + username = os.environ.get('GITHUB_USER') + token = os.environ.get('GITHUB_TOKEN') + api = os.environ.get('GITHUB_API') + if username is not None: + os.environ['GIT_USER'] = username + if token is not None: + os.environ['GIT_TOKEN'] = token + if api is not None: + os.environ['GIT_API'] = api + + +class AbstractPelorusExporter(ABC): + """ + Base class for PelorusExporter + """ + def __init_(): + pass