diff --git a/controller/relay-text.go b/controller/relay-text.go
index 70370de5..f33ebfb8 100644
--- a/controller/relay-text.go
+++ b/controller/relay-text.go
@@ -52,6 +52,7 @@ func relayTextHelper(c *gin.Context, relayMode int) *OpenAIErrorWithStatusCode {
 	tokenId := c.GetInt("token_id")
 	userId := c.GetInt("id")
 	group := c.GetString("group")
+	tokenQuota := c.GetInt("token_quota")
 	startTime := time.Now()
 	var textRequest GeneralOpenAIRequest
 
@@ -261,10 +262,20 @@ func relayTextHelper(c *gin.Context, relayMode int) *OpenAIErrorWithStatusCode {
 		return errorWrapper(err, "decrease_user_quota_failed", http.StatusInternalServerError)
 	}
 	if userQuota > 100*preConsumedQuota {
-		// in this case, we do not pre-consume quota
-		// because the user has enough quota
-		preConsumedQuota = 0
-		common.LogInfo(c.Request.Context(), fmt.Sprintf("user %d has enough quota %d, trusted and no need to pre-consume", userId, userQuota))
+		// 用户额度充足，判断令牌额度是否充足
+		if tokenQuota != -1 {
+			// 非无限令牌，判断令牌额度是否充足
+			if tokenQuota > 100*preConsumedQuota {
+				// 令牌额度充足，信任令牌
+				preConsumedQuota = 0
+				common.LogInfo(c.Request.Context(), fmt.Sprintf("user %d quota %d and token %d quota %d are enough, trusted and no need to pre-consume", userId, userQuota, tokenId, tokenQuota))
+			}
+		} else {
+			// in this case, we do not pre-consume quota
+			// because the user has enough quota
+			preConsumedQuota = 0
+			common.LogInfo(c.Request.Context(), fmt.Sprintf("user %d with unlimited token has enough quota %d, trusted and no need to pre-consume", userId, userQuota))
+		}
 	}
 	if preConsumedQuota > 0 {
 		userQuota, err = model.PreConsumeTokenQuota(tokenId, preConsumedQuota)
diff --git a/middleware/auth.go b/middleware/auth.go
index e12b81b2..1b97f9f2 100644
--- a/middleware/auth.go
+++ b/middleware/auth.go
@@ -115,6 +115,11 @@ func TokenAuth() func(c *gin.Context) {
 		c.Set("id", token.UserId)
 		c.Set("token_id", token.Id)
 		c.Set("token_name", token.Name)
+		if !token.UnlimitedQuota {
+			c.Set("token_quota", token.RemainQuota)
+		} else {
+			c.Set("token_quota", -1)
+		}
 		if token.ModelLimitsEnabled {
 			c.Set("token_model_limit_enabled", true)
 			c.Set("token_model_limit", token.GetModelLimitsMap())