Skip to content

Commit

Permalink
Handle Query Updates (#27547)
Browse files Browse the repository at this point in the history
* Simplify handle query to return updated estimation values

* Added changelog

* remove comment

* Revert "Simplify handle query to return updated estimation values"

This reverts commit b67969c.

* temp

* Revert "temp"

This reverts commit 4932979.

* CE files handle query update

* Revert "CE files handle query update"

This reverts commit 8dafa2d.

* CE Changes

* Delete vault/external_tests/upgrade_testing/upgrade_testing_binary/upgrade_test.go
  • Loading branch information
divyaac authored and miagilepner committed Aug 15, 2024
1 parent 0c0caf6 commit 185445b
Show file tree
Hide file tree
Showing 3 changed files with 187 additions and 66 deletions.
4 changes: 4 additions & 0 deletions changelog/27547.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
```release-note:improvement
activity log: Changes how new client counts in the current month are estimated, in order to return more
visibly sensible totals.
```
77 changes: 11 additions & 66 deletions vault/activity_log.go
Original file line number Diff line number Diff line change
Expand Up @@ -1831,54 +1831,29 @@ func (a *ActivityLog) handleQuery(ctx context.Context, startTime, endTime time.T
pq = storedQuery
}

// Calculate the namespace response breakdowns and totals for entities and tokens from the initial
// namespace data.
totalCounts, byNamespaceResponse, err := a.calculateByNamespaceResponseForQuery(ctx, pq.Namespaces)
if err != nil {
return nil, err
}

// If we need to add the current month's client counts into the total, compute the namespace
// breakdown for the current month as well.
var partialByMonth map[int64]*processMonth
var partialByNamespace map[string]*processByNamespace
var byNamespaceResponseCurrent []*ResponseNamespace
var totalCurrentCounts *ResponseCounts
if computePartial {
// Traverse through current month's activitylog data and group clients
// into months and namespaces
a.fragmentLock.RLock()
partialByMonth, partialByNamespace = a.populateNamespaceAndMonthlyBreakdowns()
partialByMonth, _ = a.populateNamespaceAndMonthlyBreakdowns()
a.fragmentLock.RUnlock()

// Convert the byNamespace breakdowns into structs that are
// consumable by the /activity endpoint, so as to reuse code between these two
// endpoints.
byNamespaceComputation := a.transformALNamespaceBreakdowns(partialByNamespace)

// Calculate the namespace response breakdowns and totals for entities
// and tokens from current month namespace data.
totalCurrentCounts, byNamespaceResponseCurrent, err = a.calculateByNamespaceResponseForQuery(ctx, byNamespaceComputation)
// Estimate the current month totals. These record contains is complete with all the
// current month data, grouped by namespace and mounts
currentMonth, err := a.computeCurrentMonthForBillingPeriod(ctx, partialByMonth, startTime, endTime)
if err != nil {
return nil, err
}

// Create a mapping of namespace id to slice index, so that we can efficiently update our results without
// having to traverse the entire namespace response slice every time.
nsrMap := make(map[string]int)
for i, nr := range byNamespaceResponse {
nsrMap[nr.NamespaceID] = i
}
// Combine the existing months precomputed query with the current month data
pq.CombineWithCurrentMonth(currentMonth)
}

// Rather than blindly appending, which will create duplicates, check our existing counts against the current
// month counts, and append or update as necessary. We also want to account for mounts and their counts.
for _, nrc := range byNamespaceResponseCurrent {
if ndx, ok := nsrMap[nrc.NamespaceID]; ok {
byNamespaceResponse[ndx].Add(nrc)
} else {
byNamespaceResponse = append(byNamespaceResponse, nrc)
}
}
// Convert the namespace data into a protobuf format that can be returned in the response
totalCounts, byNamespaceResponse, err := a.calculateByNamespaceResponseForQuery(ctx, pq.Namespaces)
if err != nil {
return nil, err
}

// Sort clients within each namespace
Expand All @@ -1888,34 +1863,6 @@ func (a *ActivityLog) handleQuery(ctx context.Context, startTime, endTime time.T
totalCounts, byNamespaceResponse = a.limitNamespacesInALResponse(byNamespaceResponse, limitNamespaces)
}

distinctEntitiesResponse := totalCounts.EntityClients
if computePartial {
currentMonth, err := a.computeCurrentMonthForBillingPeriod(ctx, partialByMonth, startTime, endTime)
if err != nil {
return nil, err
}

// Add the namespace attribution for the current month to the newly computed current month value. Note
// that transformMonthBreakdowns calculates a superstruct of the required namespace struct due to its
// primary use-case being for precomputedQueryWorker, but we will reuse this code for brevity and extract
// the namespaces from it.
currentMonthNamespaceAttribution := a.transformMonthBreakdowns(partialByMonth)

// Ensure that there is only one element in this list -- if not, warn.
if len(currentMonthNamespaceAttribution) > 1 {
a.logger.Warn("more than one month worth of namespace and mount attribution calculated for "+
"current month values", "number of months", len(currentMonthNamespaceAttribution))
}
if len(currentMonthNamespaceAttribution) == 0 {
a.logger.Warn("no month data found, returning query with no namespace attribution for current month")
} else {
currentMonth.Namespaces = currentMonthNamespaceAttribution[0].Namespaces
currentMonth.NewClients.Namespaces = currentMonthNamespaceAttribution[0].NewClients.Namespaces
}
pq.Months = append(pq.Months, currentMonth)
distinctEntitiesResponse += pq.Months[len(pq.Months)-1].NewClients.Counts.EntityClients
}

// Now populate the response based on breakdowns.
responseData := make(map[string]interface{})
responseData["start_time"] = pq.StartTime.Format(time.RFC3339)
Expand All @@ -1932,8 +1879,6 @@ func (a *ActivityLog) handleQuery(ctx context.Context, startTime, endTime time.T
}

responseData["by_namespace"] = byNamespaceResponse
totalCounts.Add(totalCurrentCounts)
totalCounts.DistinctEntities = distinctEntitiesResponse
responseData["total"] = totalCounts

// Create and populate the month response structs based on the monthly breakdown.
Expand Down
172 changes: 172 additions & 0 deletions vault/external_tests/activity_testonly/activity_testonly_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@ package activity_testonly
import (
"context"
"encoding/json"
"fmt"
"math"
"testing"
"time"

"github.com/hashicorp/vault/api"
"github.com/hashicorp/vault/helper/namespace"
"github.com/hashicorp/vault/helper/testhelpers"
"github.com/hashicorp/vault/helper/testhelpers/minimal"
"github.com/hashicorp/vault/helper/timeutil"
Expand Down Expand Up @@ -447,3 +450,172 @@ func Test_ActivityLog_MountDeduplication(t *testing.T) {
"secret/": 1,
}, mountSet)
}

// TestHandleQuery_MultipleMounts creates a cluster with
// two userpass mounts. It then tests verifies that
// the total new counts are calculated within a reasonably level of accuracy for
// various numbers of clients in each mount.
func TestHandleQuery_MultipleMounts(t *testing.T) {
tests := map[string]struct {
twoMonthsAgo [][]int
oneMonthAgo [][]int
currentMonth [][]int
expectedNewClients int
expectedTotalAccuracy float64
}{
"low volume, all mounts": {
twoMonthsAgo: [][]int{
{20, 20},
},
oneMonthAgo: [][]int{
{30, 30},
},
currentMonth: [][]int{
{40, 40},
},
expectedNewClients: 80,
expectedTotalAccuracy: 1,
},
"medium volume, all mounts": {
twoMonthsAgo: [][]int{
{200, 200},
},
oneMonthAgo: [][]int{
{300, 300},
},
currentMonth: [][]int{
{400, 400},
},
expectedNewClients: 800,
expectedTotalAccuracy: 0.98,
},
"higher volume, all mounts": {
twoMonthsAgo: [][]int{
{200, 200},
},
oneMonthAgo: [][]int{
{300, 300},
},
currentMonth: [][]int{
{2000, 5000},
},
expectedNewClients: 7000,
expectedTotalAccuracy: 0.95,
},
"higher volume, no repeats": {
twoMonthsAgo: [][]int{
{200, 200},
},
oneMonthAgo: [][]int{
{300, 300},
},
currentMonth: [][]int{
{4000, 6000},
},
expectedNewClients: 10000,
expectedTotalAccuracy: 0.98,
},
}

for i, tt := range tests {
testname := fmt.Sprintf("%s", i)
t.Run(testname, func(t *testing.T) {
var err error
cluster := minimal.NewTestSoloCluster(t, nil)
client := cluster.Cores[0].Client
_, err = client.Logical().Write("sys/internal/counters/config", map[string]interface{}{
"enabled": "enable",
})
require.NoError(t, err)

// Create two namespaces
namespaces := []string{namespace.RootNamespaceID}
mounts := make(map[string][]string)

// Add two userpass mounts to each namespace
for _, ns := range namespaces {
err = client.WithNamespace(ns).Sys().EnableAuthWithOptions("userpass1", &api.EnableAuthOptions{
Type: "userpass",
})
require.NoError(t, err)
err = client.WithNamespace(ns).Sys().EnableAuthWithOptions("userpass2", &api.EnableAuthOptions{
Type: "userpass",
})
require.NoError(t, err)
mounts[ns] = []string{"auth/userpass1", "auth/userpass2"}
}

activityLogGenerator := clientcountutil.NewActivityLogData(client)

// Write two months ago data
activityLogGenerator = activityLogGenerator.NewPreviousMonthData(2)
for nsIndex, nsId := range namespaces {
for mountIndex, mount := range mounts[nsId] {
activityLogGenerator = activityLogGenerator.
NewClientsSeen(tt.twoMonthsAgo[nsIndex][mountIndex], clientcountutil.WithClientNamespace(nsId), clientcountutil.WithClientMount(mount))
}
}

// Write previous months data
activityLogGenerator = activityLogGenerator.NewPreviousMonthData(1)
for nsIndex, nsId := range namespaces {
for mountIndex, mount := range mounts[nsId] {
activityLogGenerator = activityLogGenerator.
NewClientsSeen(tt.oneMonthAgo[nsIndex][mountIndex], clientcountutil.WithClientNamespace(nsId), clientcountutil.WithClientMount(mount))
}
}

// Write current month data
activityLogGenerator = activityLogGenerator.NewCurrentMonthData()
for nsIndex, nsPath := range namespaces {
for mountIndex, mount := range mounts[nsPath] {
activityLogGenerator = activityLogGenerator.
RepeatedClientSeen(clientcountutil.WithClientNamespace(nsPath), clientcountutil.WithClientMount(mount)).
NewClientsSeen(tt.currentMonth[nsIndex][mountIndex], clientcountutil.WithClientNamespace(nsPath), clientcountutil.WithClientMount(mount))
}
}

// Write all the client count data
_, err = activityLogGenerator.Write(context.Background(), generation.WriteOptions_WRITE_PRECOMPUTED_QUERIES, generation.WriteOptions_WRITE_ENTITIES)
require.NoError(t, err)

endOfCurrentMonth := timeutil.EndOfMonth(time.Now().UTC())

// query activity log
resp, err := client.Logical().ReadWithData("sys/internal/counters/activity", map[string][]string{
"end_time": {endOfCurrentMonth.Format(time.RFC3339)},
"start_time": {timeutil.StartOfMonth(timeutil.MonthsPreviousTo(2, time.Now().UTC())).Format(time.RFC3339)},
})
require.NoError(t, err)

// Ensure that the month response is the same as the totals, because all clients
// are new clients and there will be no approximation in the single month partial
// case
monthsRaw, ok := resp.Data["months"]
if !ok {
t.Fatalf("malformed results. got %v", resp.Data)
}
monthsResponse := make([]*vault.ResponseMonth, 0)
err = mapstructure.Decode(monthsRaw, &monthsResponse)

currentMonthClients := monthsResponse[len(monthsResponse)-1]

// Now verify that the new client totals for ALL namespaces are approximately accurate (there are no namespaces in CE)
newClientsError := math.Abs((float64)(currentMonthClients.NewClients.Counts.Clients - tt.expectedNewClients))
newClientsErrorMargin := newClientsError / (float64)(tt.expectedNewClients)
expectedAccuracyCalc := (1 - tt.expectedTotalAccuracy) * 100 / 100
if newClientsErrorMargin > expectedAccuracyCalc {
t.Fatalf("bad accuracy: expected %+v, found %+v", expectedAccuracyCalc, newClientsErrorMargin)
}

// Verify that the totals for the clients are visibly sensible (that is the total of all the individual new clients per namespace)
total := 0
for _, newClientCounts := range currentMonthClients.NewClients.Namespaces {
total += newClientCounts.Counts.Clients
}
if diff := math.Abs(float64(currentMonthClients.NewClients.Counts.Clients - total)); diff >= 1 {
t.Fatalf("total expected was %d but got %d", currentMonthClients.NewClients.Counts.Clients, total)
}
})
}
}

0 comments on commit 185445b

Please sign in to comment.