Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle Query Updates #27547

Merged
merged 10 commits into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions changelog/27547.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
```release-note:improvement
activity log: Changes how new client counts in the current month are estimated, in order to return more
visibly sensible totals.
```
77 changes: 11 additions & 66 deletions vault/activity_log.go
Original file line number Diff line number Diff line change
Expand Up @@ -1830,54 +1830,29 @@ func (a *ActivityLog) handleQuery(ctx context.Context, startTime, endTime time.T
pq = storedQuery
}

// Calculate the namespace response breakdowns and totals for entities and tokens from the initial
// namespace data.
totalCounts, byNamespaceResponse, err := a.calculateByNamespaceResponseForQuery(ctx, pq.Namespaces)
if err != nil {
return nil, err
}

// If we need to add the current month's client counts into the total, compute the namespace
// breakdown for the current month as well.
var partialByMonth map[int64]*processMonth
var partialByNamespace map[string]*processByNamespace
var byNamespaceResponseCurrent []*ResponseNamespace
var totalCurrentCounts *ResponseCounts
if computePartial {
// Traverse through current month's activitylog data and group clients
// into months and namespaces
a.fragmentLock.RLock()
partialByMonth, partialByNamespace = a.populateNamespaceAndMonthlyBreakdowns()
partialByMonth, _ = a.populateNamespaceAndMonthlyBreakdowns()
a.fragmentLock.RUnlock()

// Convert the byNamespace breakdowns into structs that are
// consumable by the /activity endpoint, so as to reuse code between these two
// endpoints.
byNamespaceComputation := a.transformALNamespaceBreakdowns(partialByNamespace)

// Calculate the namespace response breakdowns and totals for entities
// and tokens from current month namespace data.
totalCurrentCounts, byNamespaceResponseCurrent, err = a.calculateByNamespaceResponseForQuery(ctx, byNamespaceComputation)
// Estimate the current month totals. These record contains is complete with all the
// current month data, grouped by namespace and mounts
currentMonth, err := a.computeCurrentMonthForBillingPeriod(ctx, partialByMonth, startTime, endTime)
if err != nil {
return nil, err
}

// Create a mapping of namespace id to slice index, so that we can efficiently update our results without
// having to traverse the entire namespace response slice every time.
nsrMap := make(map[string]int)
for i, nr := range byNamespaceResponse {
nsrMap[nr.NamespaceID] = i
}
// Combine the existing months precomputed query with the current month data
pq.CombineWithCurrentMonth(currentMonth)
}

// Rather than blindly appending, which will create duplicates, check our existing counts against the current
// month counts, and append or update as necessary. We also want to account for mounts and their counts.
for _, nrc := range byNamespaceResponseCurrent {
if ndx, ok := nsrMap[nrc.NamespaceID]; ok {
byNamespaceResponse[ndx].Add(nrc)
} else {
byNamespaceResponse = append(byNamespaceResponse, nrc)
}
}
// Convert the namespace data into a protobuf format that can be returned in the response
totalCounts, byNamespaceResponse, err := a.calculateByNamespaceResponseForQuery(ctx, pq.Namespaces)
if err != nil {
return nil, err
}

// Sort clients within each namespace
Expand All @@ -1887,34 +1862,6 @@ func (a *ActivityLog) handleQuery(ctx context.Context, startTime, endTime time.T
totalCounts, byNamespaceResponse = a.limitNamespacesInALResponse(byNamespaceResponse, limitNamespaces)
}

distinctEntitiesResponse := totalCounts.EntityClients
if computePartial {
currentMonth, err := a.computeCurrentMonthForBillingPeriod(ctx, partialByMonth, startTime, endTime)
if err != nil {
return nil, err
}

// Add the namespace attribution for the current month to the newly computed current month value. Note
// that transformMonthBreakdowns calculates a superstruct of the required namespace struct due to its
// primary use-case being for precomputedQueryWorker, but we will reuse this code for brevity and extract
// the namespaces from it.
currentMonthNamespaceAttribution := a.transformMonthBreakdowns(partialByMonth)

// Ensure that there is only one element in this list -- if not, warn.
if len(currentMonthNamespaceAttribution) > 1 {
a.logger.Warn("more than one month worth of namespace and mount attribution calculated for "+
"current month values", "number of months", len(currentMonthNamespaceAttribution))
}
if len(currentMonthNamespaceAttribution) == 0 {
a.logger.Warn("no month data found, returning query with no namespace attribution for current month")
} else {
currentMonth.Namespaces = currentMonthNamespaceAttribution[0].Namespaces
currentMonth.NewClients.Namespaces = currentMonthNamespaceAttribution[0].NewClients.Namespaces
}
pq.Months = append(pq.Months, currentMonth)
distinctEntitiesResponse += pq.Months[len(pq.Months)-1].NewClients.Counts.EntityClients
}

// Now populate the response based on breakdowns.
responseData := make(map[string]interface{})
responseData["start_time"] = pq.StartTime.Format(time.RFC3339)
Expand All @@ -1931,8 +1878,6 @@ func (a *ActivityLog) handleQuery(ctx context.Context, startTime, endTime time.T
}

responseData["by_namespace"] = byNamespaceResponse
totalCounts.Add(totalCurrentCounts)
totalCounts.DistinctEntities = distinctEntitiesResponse
responseData["total"] = totalCounts

// Create and populate the month response structs based on the monthly breakdown.
Expand Down
172 changes: 172 additions & 0 deletions vault/external_tests/activity_testonly/activity_testonly_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@ package activity_testonly
import (
"context"
"encoding/json"
"fmt"
"math"
"testing"
"time"

"github.com/hashicorp/vault/api"
"github.com/hashicorp/vault/helper/namespace"
"github.com/hashicorp/vault/helper/testhelpers"
"github.com/hashicorp/vault/helper/testhelpers/minimal"
"github.com/hashicorp/vault/helper/timeutil"
Expand Down Expand Up @@ -447,3 +450,172 @@ func Test_ActivityLog_MountDeduplication(t *testing.T) {
"secret/": 1,
}, mountSet)
}

// TestHandleQuery_MultipleMounts creates a cluster with
// two userpass mounts. It then tests verifies that
// the total new counts are calculated within a reasonably level of accuracy for
// various numbers of clients in each mount.
func TestHandleQuery_MultipleMounts(t *testing.T) {
tests := map[string]struct {
twoMonthsAgo [][]int
oneMonthAgo [][]int
currentMonth [][]int
expectedNewClients int
expectedTotalAccuracy float64
}{
"low volume, all mounts": {
twoMonthsAgo: [][]int{
{20, 20},
},
oneMonthAgo: [][]int{
{30, 30},
},
currentMonth: [][]int{
{40, 40},
},
expectedNewClients: 80,
expectedTotalAccuracy: 1,
},
"medium volume, all mounts": {
twoMonthsAgo: [][]int{
{200, 200},
},
oneMonthAgo: [][]int{
{300, 300},
},
currentMonth: [][]int{
{400, 400},
},
expectedNewClients: 800,
expectedTotalAccuracy: 0.98,
},
"higher volume, all mounts": {
twoMonthsAgo: [][]int{
{200, 200},
},
oneMonthAgo: [][]int{
{300, 300},
},
currentMonth: [][]int{
{2000, 5000},
},
expectedNewClients: 7000,
expectedTotalAccuracy: 0.95,
},
"higher volume, no repeats": {
twoMonthsAgo: [][]int{
{200, 200},
},
oneMonthAgo: [][]int{
{300, 300},
},
currentMonth: [][]int{
{4000, 6000},
},
expectedNewClients: 10000,
expectedTotalAccuracy: 0.98,
},
}

for i, tt := range tests {
testname := fmt.Sprintf("%s", i)
t.Run(testname, func(t *testing.T) {
var err error
cluster := minimal.NewTestSoloCluster(t, nil)
client := cluster.Cores[0].Client
_, err = client.Logical().Write("sys/internal/counters/config", map[string]interface{}{
"enabled": "enable",
})
require.NoError(t, err)

// Create two namespaces
namespaces := []string{namespace.RootNamespaceID}
mounts := make(map[string][]string)

// Add two userpass mounts to each namespace
for _, ns := range namespaces {
err = client.WithNamespace(ns).Sys().EnableAuthWithOptions("userpass1", &api.EnableAuthOptions{
Type: "userpass",
})
require.NoError(t, err)
err = client.WithNamespace(ns).Sys().EnableAuthWithOptions("userpass2", &api.EnableAuthOptions{
Type: "userpass",
})
require.NoError(t, err)
mounts[ns] = []string{"auth/userpass1", "auth/userpass2"}
}

activityLogGenerator := clientcountutil.NewActivityLogData(client)

// Write two months ago data
activityLogGenerator = activityLogGenerator.NewPreviousMonthData(2)
for nsIndex, nsId := range namespaces {
for mountIndex, mount := range mounts[nsId] {
activityLogGenerator = activityLogGenerator.
NewClientsSeen(tt.twoMonthsAgo[nsIndex][mountIndex], clientcountutil.WithClientNamespace(nsId), clientcountutil.WithClientMount(mount))
}
}

// Write previous months data
activityLogGenerator = activityLogGenerator.NewPreviousMonthData(1)
for nsIndex, nsId := range namespaces {
for mountIndex, mount := range mounts[nsId] {
activityLogGenerator = activityLogGenerator.
NewClientsSeen(tt.oneMonthAgo[nsIndex][mountIndex], clientcountutil.WithClientNamespace(nsId), clientcountutil.WithClientMount(mount))
}
}

// Write current month data
activityLogGenerator = activityLogGenerator.NewCurrentMonthData()
for nsIndex, nsPath := range namespaces {
for mountIndex, mount := range mounts[nsPath] {
activityLogGenerator = activityLogGenerator.
RepeatedClientSeen(clientcountutil.WithClientNamespace(nsPath), clientcountutil.WithClientMount(mount)).
NewClientsSeen(tt.currentMonth[nsIndex][mountIndex], clientcountutil.WithClientNamespace(nsPath), clientcountutil.WithClientMount(mount))
}
}

// Write all the client count data
_, err = activityLogGenerator.Write(context.Background(), generation.WriteOptions_WRITE_PRECOMPUTED_QUERIES, generation.WriteOptions_WRITE_ENTITIES)
require.NoError(t, err)

endOfCurrentMonth := timeutil.EndOfMonth(time.Now().UTC())

// query activity log
resp, err := client.Logical().ReadWithData("sys/internal/counters/activity", map[string][]string{
"end_time": {endOfCurrentMonth.Format(time.RFC3339)},
"start_time": {timeutil.StartOfMonth(timeutil.MonthsPreviousTo(2, time.Now().UTC())).Format(time.RFC3339)},
})
require.NoError(t, err)

// Ensure that the month response is the same as the totals, because all clients
// are new clients and there will be no approximation in the single month partial
// case
monthsRaw, ok := resp.Data["months"]
if !ok {
t.Fatalf("malformed results. got %v", resp.Data)
}
monthsResponse := make([]*vault.ResponseMonth, 0)
err = mapstructure.Decode(monthsRaw, &monthsResponse)

currentMonthClients := monthsResponse[len(monthsResponse)-1]

// Now verify that the new client totals for ALL namespaces are approximately accurate (there are no namespaces in CE)
newClientsError := math.Abs((float64)(currentMonthClients.NewClients.Counts.Clients - tt.expectedNewClients))
newClientsErrorMargin := newClientsError / (float64)(tt.expectedNewClients)
expectedAccuracyCalc := (1 - tt.expectedTotalAccuracy) * 100 / 100
if newClientsErrorMargin > expectedAccuracyCalc {
t.Fatalf("bad accuracy: expected %+v, found %+v", expectedAccuracyCalc, newClientsErrorMargin)
}

// Verify that the totals for the clients are visibly sensible (that is the total of all the individual new clients per namespace)
total := 0
for _, newClientCounts := range currentMonthClients.NewClients.Namespaces {
total += newClientCounts.Counts.Clients
}
if diff := math.Abs(float64(currentMonthClients.NewClients.Counts.Clients - total)); diff >= 1 {
t.Fatalf("total expected was %d but got %d", currentMonthClients.NewClients.Counts.Clients, total)
}
})
}
}
Loading