Skip to content

Commit c433503

Browse files
committed
feat: remove binding hit mechanism between input token and total token
1 parent 5980541 commit c433503

File tree

3 files changed

+45
-79
lines changed

3 files changed

+45
-79
lines changed

core/llm_token_ratelimit/ratelimit_checker.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,11 @@ func (c *PETAChecker) checkLimitKey(ctx *Context, rule *MatchedRule) bool {
129129
return true
130130
}
131131

132+
logging.Info("[LLMTokenRateLimit] check with PETA strategy",
133+
"limitKey", rule.LimitKey,
134+
"requestID", ctx.Get(KeyRequestID),
135+
)
136+
132137
prompts := []string{}
133138
reqInfos := extractRequestInfos(ctx)
134139
if reqInfos != nil {
@@ -165,11 +170,9 @@ func (c *PETAChecker) checkLimitKey(ctx *Context, rule *MatchedRule) bool {
165170
)
166171
return true
167172
}
168-
logging.Info("[LLMTokenRateLimit] withhold infos",
169-
"limitKey", rule.LimitKey,
173+
logging.Info("[LLMTokenRateLimit] withhold completed",
170174
"current_capacity", result[0],
171175
"waiting_time(ms)", result[1],
172-
"estimated_token", result[2],
173176
"difference", result[3],
174177
"tokenization_length", length,
175178
"requestID", ctx.Get(KeyRequestID),

core/llm_token_ratelimit/rule_collector.go

Lines changed: 31 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,23 @@ func (c *BaseRuleCollector) Collect(ctx *Context, rule *Rule) []*MatchedRule {
4040

4141
reqInfos := extractRequestInfos(ctx) // allow nil for global rate limit
4242

43-
ruleMap := make(map[string]*MatchedRule)
43+
resourceHash := generateHash(rule.Resource)
44+
ruleStrategy := rule.Strategy.String()
45+
46+
estimatedSize := 0
47+
for _, item := range rule.SpecificItems {
48+
if item.KeyItems != nil {
49+
estimatedSize += len(item.KeyItems)
50+
}
51+
}
52+
53+
ruleMap := make(map[string]*MatchedRule, estimatedSize)
4454

4555
for _, specificItem := range rule.SpecificItems {
56+
if specificItem.KeyItems == nil {
57+
continue
58+
}
59+
4660
identifierChecker := globalRuleMatcher.getIdentifierChecker(specificItem.Identifier.Type)
4761
if identifierChecker == nil {
4862
logging.Error(errors.New("unknown identifier.type"),
@@ -52,9 +66,9 @@ func (c *BaseRuleCollector) Collect(ctx *Context, rule *Rule) []*MatchedRule {
5266
)
5367
continue
5468
}
55-
if specificItem.KeyItems == nil {
56-
continue
57-
}
69+
70+
identifierType := specificItem.Identifier.Type.String()
71+
5872
for _, keyItem := range specificItem.KeyItems {
5973
if !identifierChecker.Check(ctx, reqInfos, specificItem.Identifier, keyItem.Key) {
6074
continue
@@ -69,17 +83,22 @@ func (c *BaseRuleCollector) Collect(ctx *Context, rule *Rule) []*MatchedRule {
6983
continue
7084
}
7185

72-
params := &BaseLimitKeyParams{
73-
Resource: generateHash(rule.Resource),
74-
Strategy: rule.Strategy,
75-
IdentifierType: specificItem.Identifier.Type,
76-
TimeWindow: timeWindow,
77-
TokenSize: keyItem.Token.Number,
78-
CountStrategy: keyItem.Token.CountStrategy,
86+
limitKey := fmt.Sprintf(RedisRatelimitKeyFormat,
87+
resourceHash,
88+
ruleStrategy,
89+
identifierType,
90+
timeWindow,
91+
keyItem.Token.CountStrategy.String(),
92+
)
93+
ruleMap[limitKey] = &MatchedRule{
94+
Strategy: rule.Strategy,
95+
LimitKey: limitKey,
96+
TimeWindow: timeWindow,
97+
TokenSize: keyItem.Token.Number,
98+
CountStrategy: keyItem.Token.CountStrategy,
7999
// PETA
80100
Encoding: rule.Encoding,
81101
}
82-
c.addMatchedRule(ctx, params, ruleMap)
83102
}
84103
}
85104

@@ -89,59 +108,3 @@ func (c *BaseRuleCollector) Collect(ctx *Context, rule *Rule) []*MatchedRule {
89108
}
90109
return rules
91110
}
92-
93-
func (c *BaseRuleCollector) addMatchedRule(ctx *Context, params *BaseLimitKeyParams, ruleMap map[string]*MatchedRule) {
94-
if c == nil {
95-
return
96-
}
97-
if params.CountStrategy != TotalTokens {
98-
limitKey, err := c.generateLimitKey(params)
99-
if err != nil {
100-
logging.Error(err, "failed to generate LimitKey in llm_token_ratelimit.BaseRuleCollector.addMatchedRule()",
101-
"params", params,
102-
"requestID", ctx.Get(KeyRequestID),
103-
)
104-
return
105-
}
106-
ruleMap[limitKey] = &MatchedRule{
107-
Strategy: params.Strategy,
108-
LimitKey: limitKey,
109-
TimeWindow: params.TimeWindow,
110-
TokenSize: params.TokenSize,
111-
CountStrategy: params.CountStrategy,
112-
// PETA
113-
Encoding: params.Encoding,
114-
}
115-
}
116-
params.CountStrategy = TotalTokens
117-
limitKey, err := c.generateLimitKey(params)
118-
if err != nil {
119-
logging.Error(err, "failed to generate LimitKey in llm_token_ratelimit.BaseRuleCollector.addMatchedRule()",
120-
"params", params,
121-
"requestID", ctx.Get(KeyRequestID),
122-
)
123-
return
124-
}
125-
ruleMap[limitKey] = &MatchedRule{
126-
Strategy: params.Strategy,
127-
LimitKey: limitKey,
128-
TimeWindow: params.TimeWindow,
129-
TokenSize: params.TokenSize,
130-
CountStrategy: params.CountStrategy,
131-
// PETA
132-
Encoding: params.Encoding,
133-
}
134-
}
135-
136-
func (c *BaseRuleCollector) generateLimitKey(params *BaseLimitKeyParams) (string, error) {
137-
if c == nil {
138-
return "", fmt.Errorf("BaseRuleCollector is nil")
139-
}
140-
return fmt.Sprintf(RedisRatelimitKeyFormat,
141-
params.Resource,
142-
params.Strategy.String(),
143-
params.IdentifierType.String(),
144-
params.TimeWindow,
145-
params.CountStrategy.String(),
146-
), nil
147-
}

core/llm_token_ratelimit/token_updater.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -112,12 +112,6 @@ func (u *PETAUpdater) updateLimitKey(ctx *Context, rule *MatchedRule, infos *Use
112112
return
113113
}
114114
actualToken := calculator.Calculate(ctx, infos)
115-
logging.Info("[LLMTokenRateLimit] correct infos",
116-
"limitKey", rule.LimitKey,
117-
"estimated_token", rule.EstimatedToken,
118-
"actual_token", actualToken,
119-
"requestID", ctx.Get(KeyRequestID),
120-
)
121115

122116
slidingWindowKey := fmt.Sprintf(PETASlidingWindowKeyFormat, generateHash(rule.LimitKey), rule.LimitKey)
123117
tokenBucketKey := fmt.Sprintf(PETATokenBucketKeyFormat, generateHash(rule.LimitKey), rule.LimitKey)
@@ -145,10 +139,16 @@ func (u *PETAUpdater) updateLimitKey(ctx *Context, rule *MatchedRule, infos *Use
145139
correctResult := result[0]
146140
if correctResult != PETACorrectOK && correctResult != PETACorrectOverestimateError { // Temporarily unable to handle overestimation cases
147141
logging.Warn("[LLMTokenRateLimit] failed to update the limit key",
148-
"limitKey", rule.LimitKey,
149-
"correctResult", correctResult,
142+
"correct_result", correctResult,
150143
"requestID", ctx.Get(KeyRequestID),
151144
)
152145
return
153146
}
147+
148+
logging.Info("[LLMTokenRateLimit] correct completed",
149+
"estimated_token", rule.EstimatedToken,
150+
"actual_token", actualToken,
151+
"correct_result", correctResult,
152+
"requestID", ctx.Get(KeyRequestID),
153+
)
154154
}

0 commit comments

Comments
 (0)