Skip to content

Commit

Permalink
Add reliability calculations support to Mentix (#1649)
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel-WWU-IT authored May 17, 2021
1 parent 8ea3ae0 commit 3387212
Show file tree
Hide file tree
Showing 13 changed files with 447 additions and 1 deletion.
5 changes: 5 additions & 0 deletions changelog/unreleased/mentix-add-reliab.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Enhancement: Add reliability calculations support to Mentix

To make reliability calculations possible, a new exporter has been added to Mentix that reads scheduled downtimes from the GOCDB and exposes it through Prometheus metrics.

https://github.com/cs3org/reva/pull/1649
12 changes: 12 additions & 0 deletions docs/content/en/docs/config/http/services/mentix/_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,15 @@ update_interval = "15m"
{{< /highlight >}}
{{% /dir %}}

## Services
{{% dir name="critical_types" type="[]string" default="[]" %}}
The service types that are considered as critical/essential.
{{< highlight toml >}}
[http.services.mentix.services]
critical_types = ["REVAD]
{{< /highlight >}}
{{% /dir %}}

## Connectors
Mentix is decoupled from the actual sources of the mesh data by using so-called _connectors_. A connector is used to gather the data from a certain source, which are then converted into Mentix' own internal format.

Expand Down Expand Up @@ -63,6 +72,9 @@ Mentix exposes its data via an HTTP endpoint using the `webapi` exporter. Data c
- files:
- '/usr/share/prom/sciencemesh_services.json'
```

- **metrics**
The [Metrics](metrics) exporter exposes various site-specific metrics through Prometheus.

## Site Accounts service
Mentix uses the Reva site accounts service to query information about site accounts. The following settings must be configured properly:
Expand Down
19 changes: 19 additions & 0 deletions docs/content/en/docs/config/http/services/mentix/metrics/_index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
---
title: "metrics"
linkTitle: "metrics"
weight: 10
description: >
Configuration for the Metrics exporter of the Mentix service
---

{{% pageinfo %}}
The Metrics exporter exposes site-specific metrics through Prometheus.
{{% /pageinfo %}}

{{% dir name="enabled_connectors" type="[]string" default="*" %}}
A list of all enabled connectors for the exporter.
{{< highlight toml >}}
[http.services.mentix.exporters.metrics]
enabled_connectors = ["gocdb"]
{{< /highlight >}}
{{% /dir %}}
6 changes: 6 additions & 0 deletions examples/mentix/mentix.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ address = "http://sciencemesh-test.uni-muenster.de"
[http.services.mentix.connectors.localfile]
file = "/usr/share/revad/sites.json"

# Configure the service types that are considered as critical/essential
[http.services.mentix.services]
critical_types = ["REVAD"]

# Enable the WebAPI exporter
[http.services.mentix.exporters.webapi]
endpoint = "/"
Expand All @@ -20,6 +24,8 @@ endpoint = "/"
[http.services.mentix.exporters.siteloc]
# If this setting is omitted, all connectors will be used as data sources
enabled_connectors = ["gocdb"]
# Enable the Metrics exporter
[http.services.mentix.exporters.metrics]

# Enable the site registration importer
[http.services.mentix.importers.sitereg]
Expand Down
1 change: 1 addition & 0 deletions internal/http/services/mentix/mentix.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ func applyDefaultConfig(conf *config.Configuration) {
addDefaultConnector(&conf.Exporters.SiteLocations.EnabledConnectors)

addDefaultConnector(&conf.Exporters.PrometheusSD.EnabledConnectors)
addDefaultConnector(&conf.Exporters.Metrics.EnabledConnectors)
}

// New returns a new Mentix service.
Expand Down
8 changes: 8 additions & 0 deletions pkg/mentix/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ type Configuration struct {

UpdateInterval string `mapstructure:"update_interval"`

Services struct {
CriticalTypes []string `mapstructure:"critical_types"`
} `mapstructure:"services"`

Importers struct {
SiteRegistration struct {
Endpoint string `mapstructure:"endpoint"`
Expand Down Expand Up @@ -68,6 +72,10 @@ type Configuration struct {
BlackboxOutputFile string `mapstructure:"blackbox_output_file"`
EnabledConnectors []string `mapstructure:"enabled_connectors"`
} `mapstructure:"promsd"`

Metrics struct {
EnabledConnectors []string `mapstructure:"enabled_connectors"`
} `mapstructure:"metrics"`
} `mapstructure:"exporters"`

AccountsService struct {
Expand Down
4 changes: 3 additions & 1 deletion pkg/mentix/config/ids.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ const (
ExporterIDCS3API = "cs3api"
// ExporterIDSiteLocations is the identifier for the Site Locations exporter.
ExporterIDSiteLocations = "siteloc"
// ExporterIDPrometheusSD is the identifier for the PrometheusSD SD exporter.
// ExporterIDPrometheusSD is the identifier for the PrometheusSD exporter.
ExporterIDPrometheusSD = "promsd"
// ExporterIDMetrics is the identifier for the Metrics exporter.
ExporterIDMetrics = "metrics"
)
36 changes: 36 additions & 0 deletions pkg/mentix/connectors/gocdb.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"net/url"
"path"
"strings"
"time"

"github.com/rs/zerolog"

Expand Down Expand Up @@ -73,6 +74,11 @@ func (connector *GOCDBConnector) RetrieveMeshData() (*meshdata.MeshData, error)
if err := connector.queryServices(meshData, site); err != nil {
return nil, fmt.Errorf("could not query services of site '%v': %v", site.Name, err)
}

// Get downtimes scheduled for the current site
if err := connector.queryDowntimes(meshData, site); err != nil {
return nil, fmt.Errorf("could not query downtimes of site '%v': %v", site.Name, err)
}
}

meshData.InferMissingData()
Expand Down Expand Up @@ -152,6 +158,7 @@ func (connector *GOCDBConnector) querySites(meshData *meshdata.MeshData) error {
Latitude: site.Latitude,
Services: nil,
Properties: properties,
Downtimes: meshdata.Downtimes{},
}
meshData.Sites = append(meshData.Sites, meshsite)
}
Expand Down Expand Up @@ -216,6 +223,35 @@ func (connector *GOCDBConnector) queryServices(meshData *meshdata.MeshData, site
return nil
}

func (connector *GOCDBConnector) queryDowntimes(meshData *meshdata.MeshData, site *meshdata.Site) error {
var downtimes gocdb.Downtimes
if err := connector.query(&downtimes, "get_downtime_nested_services", false, true, network.URLParams{"topentity": site.Name, "ongoing_only": "yes"}); err != nil {
return err
}

// Copy retrieved data into the mesh data
site.Downtimes.Clear()
for _, dt := range downtimes.Downtimes {
if !strings.EqualFold(dt.Severity, "outage") { // Only take real outages into account
continue
}

services := make([]string, 0, len(dt.AffectedServices.Services))
for _, service := range dt.AffectedServices.Services {
// Only add critical services to the list of affected services
for _, svcType := range connector.conf.Services.CriticalTypes {
if strings.EqualFold(svcType, service.Type) {
services = append(services, service.Type)
}
}
}

_, _ = site.Downtimes.ScheduleDowntime(time.Unix(dt.StartDate, 0), time.Unix(dt.EndDate, 0), services)
}

return nil
}

func (connector *GOCDBConnector) findServiceType(meshData *meshdata.MeshData, name string) *meshdata.ServiceType {
for _, serviceType := range meshData.ServiceTypes {
if strings.EqualFold(serviceType.Name, name) {
Expand Down
24 changes: 24 additions & 0 deletions pkg/mentix/connectors/gocdb/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,27 @@ type Service struct {
type Services struct {
Services []*Service `xml:"SERVICE_ENDPOINT"`
}

// DowntimeService represents a service scheduled for downtime.
type DowntimeService struct {
Type string `xml:"SERVICE_TYPE"`
}

// DowntimeServices represents a list of DowntimeService objects.
type DowntimeServices struct {
Services []*DowntimeService `xml:"SERVICE"`
}

// Downtime is a scheduled downtime for a site.
type Downtime struct {
Severity string `xml:"SEVERITY"`
StartDate int64 `xml:"START_DATE"`
EndDate int64 `xml:"END_DATE"`

AffectedServices DowntimeServices `xml:"SERVICES"`
}

// Downtimes represents a list of Downtime objects.
type Downtimes struct {
Downtimes []*Downtime `xml:"DOWNTIME"`
}
84 changes: 84 additions & 0 deletions pkg/mentix/exchangers/exporters/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// Copyright 2018-2020 CERN
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// In applying this license, CERN does not waive the privileges and immunities
// granted to it by virtue of its status as an Intergovernmental Organization
// or submit itself to any jurisdiction.

package exporters

import (
"github.com/cs3org/reva/pkg/mentix/config"
"github.com/cs3org/reva/pkg/mentix/exchangers/exporters/metrics"
"github.com/cs3org/reva/pkg/mentix/meshdata"
"github.com/pkg/errors"
"github.com/rs/zerolog"
)

// MetricsExporter exposes various Prometheus metrics.
type MetricsExporter struct {
BaseExporter

metrics *metrics.Metrics
}

// Activate activates the exporter.
func (exporter *MetricsExporter) Activate(conf *config.Configuration, log *zerolog.Logger) error {
if err := exporter.BaseExporter.Activate(conf, log); err != nil {
return err
}

// Create the metrics handler
m, err := metrics.New(conf, log)
if err != nil {
return errors.Wrap(err, "unable to create metrics")
}
exporter.metrics = m

// Store Metrics specifics
exporter.SetEnabledConnectors(conf.Exporters.Metrics.EnabledConnectors)

return nil
}

// Update is called whenever the mesh data set has changed to reflect these changes.
func (exporter *MetricsExporter) Update(meshDataSet meshdata.Map) error {
if err := exporter.BaseExporter.Update(meshDataSet); err != nil {
return err
}

// Data is read, so acquire a read lock
exporter.Locker().RLock()
defer exporter.Locker().RUnlock()

if err := exporter.metrics.Update(exporter.MeshData()); err != nil {
return errors.Wrap(err, "error while updating the metrics")
}

return nil
}

// GetID returns the ID of the exporter.
func (exporter *MetricsExporter) GetID() string {
return config.ExporterIDMetrics
}

// GetName returns the display name of the exporter.
func (exporter *MetricsExporter) GetName() string {
return "Metrics"
}

func init() {
registerExporter(&MetricsExporter{})
}
Loading

0 comments on commit 3387212

Please sign in to comment.