Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: extract key fields from rules config #1327

Merged
merged 3 commits into from
Sep 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 103 additions & 0 deletions config/sampler_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,22 @@ type V2SamplerConfig struct {
Samplers map[string]*V2SamplerChoice `json:"samplers" yaml:"Samplers,omitempty" validate:"required"`
}

type GetSamplingFielder interface {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we call this interface SamplerConfig instead as it's implemented by all sampler configs? If we need to introduce other shared functionality in the future, it enables us to add more shared logic without adding more interfaces.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason why I chose this name is bc it's easy to know the intend of this interface. If in the future we find ourselves needing to implement more common method for SamplerConfigs, we can have a name reflect those methods. SamplerConfig in a bit too generic to know what exactly the interface is trying to achieve from my perspective

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, it's an internal interface so isn't that important if have to change it in the future. I guess it depends on which side of generic vs specific interface you. As a generic interface that all sample implementations will need to implement, I prefer SamplerConfig. If it were only going to be implemented on a subset of samplers, a more discrete name is better.

Either way, happy to continue with it's current name and we can change later if we need to 😄

GetSamplingFields() []string
}

var _ GetSamplingFielder = (*DeterministicSamplerConfig)(nil)

type DeterministicSamplerConfig struct {
SampleRate int `json:"samplerate" yaml:"SampleRate,omitempty" default:"1" validate:"required,gte=1"`
}

func (d *DeterministicSamplerConfig) GetSamplingFields() []string {
return nil
}

var _ GetSamplingFielder = (*DynamicSamplerConfig)(nil)

type DynamicSamplerConfig struct {
SampleRate int64 `json:"samplerate" yaml:"SampleRate,omitempty" validate:"required,gte=1"`
ClearFrequency Duration `json:"clearfrequency" yaml:"ClearFrequency,omitempty"`
Expand All @@ -161,6 +173,12 @@ type DynamicSamplerConfig struct {
UseTraceLength bool `json:"usetracelength" yaml:"UseTraceLength,omitempty"`
}

func (d *DynamicSamplerConfig) GetSamplingFields() []string {
return d.FieldList
}

var _ GetSamplingFielder = (*EMADynamicSamplerConfig)(nil)

type EMADynamicSamplerConfig struct {
GoalSampleRate int `json:"goalsamplerate" yaml:"GoalSampleRate,omitempty" validate:"gte=1"`
AdjustmentInterval Duration `json:"adjustmentinterval" yaml:"AdjustmentInterval,omitempty"`
Expand All @@ -173,6 +191,12 @@ type EMADynamicSamplerConfig struct {
UseTraceLength bool `json:"usetracelength" yaml:"UseTraceLength,omitempty"`
}

func (d *EMADynamicSamplerConfig) GetSamplingFields() []string {
return d.FieldList
}

var _ GetSamplingFielder = (*EMAThroughputSamplerConfig)(nil)

type EMAThroughputSamplerConfig struct {
GoalThroughputPerSec int `json:"goalthroughputpersec" yaml:"GoalThroughputPerSec,omitempty"`
UseClusterSize bool `json:"useclustersize" yaml:"UseClusterSize,omitempty"`
Expand All @@ -187,6 +211,12 @@ type EMAThroughputSamplerConfig struct {
UseTraceLength bool `json:"usetracelength" yaml:"UseTraceLength,omitempty"`
}

func (d *EMAThroughputSamplerConfig) GetSamplingFields() []string {
return d.FieldList
}

var _ GetSamplingFielder = (*WindowedThroughputSamplerConfig)(nil)

type WindowedThroughputSamplerConfig struct {
UpdateFrequency Duration `json:"updatefrequency" yaml:"UpdateFrequency,omitempty"`
LookbackFrequency Duration `json:"lookbackfrequency" yaml:"LookbackFrequency,omitempty"`
Expand All @@ -197,6 +227,12 @@ type WindowedThroughputSamplerConfig struct {
UseTraceLength bool `json:"usetracelength" yaml:"UseTraceLength,omitempty"`
}

func (d *WindowedThroughputSamplerConfig) GetSamplingFields() []string {
return d.FieldList
}

var _ GetSamplingFielder = (*TotalThroughputSamplerConfig)(nil)

type TotalThroughputSamplerConfig struct {
GoalThroughputPerSec int `json:"goalthroughputpersec" yaml:"GoalThroughputPerSec,omitempty" validate:"gte=1"`
UseClusterSize bool `json:"useclustersize" yaml:"UseClusterSize,omitempty"`
Expand All @@ -206,12 +242,50 @@ type TotalThroughputSamplerConfig struct {
UseTraceLength bool `json:"usetracelength" yaml:"UseTraceLength,omitempty"`
}

func (d *TotalThroughputSamplerConfig) GetSamplingFields() []string {
return d.FieldList
}

var _ GetSamplingFielder = (*RulesBasedSamplerConfig)(nil)

type RulesBasedSamplerConfig struct {
// Rules has deliberately different names for json and yaml for conversion from old to new format
Rules []*RulesBasedSamplerRule `json:"rule" yaml:"Rules,omitempty"`
CheckNestedFields bool `json:"checknestedfields" yaml:"CheckNestedFields,omitempty"`
}

func (r *RulesBasedSamplerConfig) GetSamplingFields() []string {
fields := make(generics.Set[string], 0)

for _, rule := range r.Rules {
if rule == nil {
continue
}

for _, condition := range rule.Conditions {
// Field and Fields are mutually exclusive, so we only need to check one.
if condition.Fields != nil {
fields.Add(condition.Fields...)
continue
}

if condition.Field != "" {
fields.Add(condition.Field)
}
}

if rule.Sampler != nil {
fields.Add(rule.Sampler.GetSamplingFields()...)
}
}

return fields.Members()
}

var _ GetSamplingFielder = (*RulesBasedDownstreamSampler)(nil)

// RulesBasedDownstreamSampler is a sampler that can be used as a downstream sampler in a rules-based sampler.
// Only one of the fields should be set.
type RulesBasedDownstreamSampler struct {
DynamicSampler *DynamicSamplerConfig `json:"dynamicsampler" yaml:"DynamicSampler,omitempty"`
EMADynamicSampler *EMADynamicSamplerConfig `json:"emadynamicsampler" yaml:"EMADynamicSampler,omitempty"`
Expand All @@ -221,6 +295,35 @@ type RulesBasedDownstreamSampler struct {
DeterministicSampler *DeterministicSamplerConfig `json:"deterministicsampler" yaml:"DeterministicSampler,omitempty"`
}

func (r *RulesBasedDownstreamSampler) GetSamplingFields() []string {

if r.DeterministicSampler != nil {
return r.DeterministicSampler.GetSamplingFields()
}

if r.DynamicSampler != nil {
return r.DynamicSampler.GetSamplingFields()
}

if r.EMADynamicSampler != nil {
return r.EMADynamicSampler.GetSamplingFields()
}

if r.EMAThroughputSampler != nil {
return r.EMAThroughputSampler.GetSamplingFields()
}

if r.WindowedThroughputSampler != nil {
return r.WindowedThroughputSampler.GetSamplingFields()
}

if r.TotalThroughputSampler != nil {
return r.TotalThroughputSampler.GetSamplingFields()
}

return []string{}
}

type RulesBasedSamplerRule struct {
// Conditions has deliberately different names for json and yaml for conversion from old to new format
Name string `json:"name" yaml:"Name,omitempty"`
Expand Down
4 changes: 4 additions & 0 deletions sample/deterministic.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,7 @@ func (d *DeterministicSampler) GetSampleRate(trace *types.Trace) (rate uint, kee

return uint(d.sampleRate), shouldKeep, "deterministic/chance", ""
}

func (d *DeterministicSampler) GetKeyFields() []string {
return d.Config.GetSamplingFields()
}
8 changes: 7 additions & 1 deletion sample/dynamic.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ type DynamicSampler struct {
prefix string
lastMetrics map[string]int64

key *traceKey
key *traceKey
keyFields []string

dynsampler dynsampler.Sampler
}
Expand All @@ -42,6 +43,7 @@ func (d *DynamicSampler) Start() error {
d.maxKeys = 500
}
d.prefix = "dynamic_"
d.keyFields = d.Config.GetSamplingFields()

// spin up the actual dynamic sampler
d.dynsampler = &dynsampler.AvgSampleRate{
Expand Down Expand Up @@ -96,3 +98,7 @@ func (d *DynamicSampler) GetSampleRate(trace *types.Trace) (rate uint, keep bool
}
return rate, shouldKeep, "dynamic", key
}

func (d *DynamicSampler) GetKeyFields() []string {
return d.keyFields
}
8 changes: 7 additions & 1 deletion sample/dynamic_ema.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ type EMADynamicSampler struct {
prefix string
lastMetrics map[string]int64

key *traceKey
key *traceKey
keyFields []string

dynsampler dynsampler.Sampler
}
Expand All @@ -47,6 +48,7 @@ func (d *EMADynamicSampler) Start() error {
d.maxKeys = 500
}
d.prefix = "emadynamic_"
d.keyFields = d.Config.GetSamplingFields()

// spin up the actual dynamic sampler
d.dynsampler = &dynsampler.EMASampleRate{
Expand Down Expand Up @@ -104,3 +106,7 @@ func (d *EMADynamicSampler) GetSampleRate(trace *types.Trace) (rate uint, keep b
}
return rate, shouldKeep, "emadynamic", key
}

func (d *EMADynamicSampler) GetKeyFields() []string {
return d.keyFields
}
8 changes: 7 additions & 1 deletion sample/ema_throughput.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ type EMAThroughputSampler struct {
prefix string
lastMetrics map[string]int64

key *traceKey
key *traceKey
keyFields []string

dynsampler *dynsampler.EMAThroughput
}
Expand All @@ -56,6 +57,7 @@ func (d *EMAThroughputSampler) Start() error {
}
d.prefix = "emathroughput_"

d.keyFields = d.Config.GetSamplingFields()
// spin up the actual dynamic sampler
d.dynsampler = &dynsampler.EMAThroughput{
GoalThroughputPerSec: d.goalThroughputPerSec / d.clusterSize,
Expand Down Expand Up @@ -121,3 +123,7 @@ func (d *EMAThroughputSampler) GetSampleRate(trace *types.Trace) (rate uint, kee
}
return rate, shouldKeep, "emathroughput", key
}

func (d *EMAThroughputSampler) GetKeyFields() []string {
return d.keyFields
}
16 changes: 11 additions & 5 deletions sample/rules.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@ import (
var _ ClusterSizer = (*RulesBasedSampler)(nil)

type RulesBasedSampler struct {
Config *config.RulesBasedSamplerConfig
Logger logger.Logger
Metrics metrics.Metrics
samplers map[string]Sampler
prefix string
Config *config.RulesBasedSamplerConfig
Logger logger.Logger
Metrics metrics.Metrics
samplers map[string]Sampler
prefix string
keyFields []string
}

const RootPrefix = "root."
Expand All @@ -35,6 +36,7 @@ func (s *RulesBasedSampler) Start() error {
s.Metrics.Register(s.prefix+"sample_rate", "histogram")

s.samplers = make(map[string]Sampler)
s.keyFields = s.Config.GetSamplingFields()

for _, rule := range s.Config.Rules {
for _, cond := range rule.Conditions {
Expand Down Expand Up @@ -162,6 +164,10 @@ func (s *RulesBasedSampler) GetSampleRate(trace *types.Trace) (rate uint, keep b
return 1, true, "no rule matched", ""
}

func (s *RulesBasedSampler) GetKeyFields() []string {
return s.keyFields
}

func ruleMatchesTrace(t *types.Trace, rule *config.RulesBasedSamplerRule, checkNestedFields bool) bool {
// We treat a rule with no conditions as a match.
if rule.Conditions == nil {
Expand Down
Loading
Loading