Skip to content

Commit

Permalink
🐛 fix: chat model image calculation problem
Browse files Browse the repository at this point in the history
  • Loading branch information
MartialBE committed Jul 20, 2024
1 parent e4096dd commit 69594da
Showing 1 changed file with 77 additions and 21 deletions.
98 changes: 77 additions & 21 deletions common/token.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,19 +120,21 @@ func CountTokenMessages(messages []types.ChatCompletionMessage, model string, pr
continue
}
imageUrl, ok := m["image_url"].(map[string]any)
if ok {
url := imageUrl["url"].(string)
detail := ""
if imageUrl["detail"] != nil {
detail = imageUrl["detail"].(string)
}
imageTokens, err := countImageTokens(url, detail)
if err != nil {
//Due to the excessive length of the error information, only extract and record the most critical part.
logger.SysError("error counting image tokens: " + err.Error())
} else {
tokenNum += imageTokens
}
if !ok {
continue
}
url := imageUrl["url"].(string)
detail := ""
if imageUrl["detail"] != nil {
detail = imageUrl["detail"].(string)
}
countImageTokens := getCountImageFun(model)
imageTokens, err := countImageTokens(url, detail, model)
if err != nil {
//Due to the excessive length of the error information, only extract and record the most critical part.
logger.SysError("error counting image tokens: " + err.Error())
} else {
tokenNum += imageTokens
}
}
}
Expand All @@ -147,17 +149,54 @@ func CountTokenMessages(messages []types.ChatCompletionMessage, model string, pr
return tokenNum
}

const (
lowDetailCost = 85
highDetailCostPerTile = 170
additionalCost = 85
)
func getCountImageFun(model string) CountImageFun {
for prefix, fun := range CountImageFunMap {
if strings.HasPrefix(model, prefix) {
return fun
}
}
return CountImageFunMap["gpt-"]
}

type CountImageFun func(url, detail, modelName string) (int, error)

var CountImageFunMap = map[string]CountImageFun{
"gpt-": countOpenaiImageTokens,
"gemini-": countGeminiImageTokens,
"claude-": countClaudeImageTokens,
"glm-": countGlmImageTokens,
}

type OpenAIImageCost struct {
Low int
High int
Additional int
}

var OpenAIImageCostMap = map[string]*OpenAIImageCost{
"general": {
Low: 85,
High: 170,
Additional: 85,
},
"gpt-4o-mini": {
Low: 2833,
High: 5667,
Additional: 2833,
},
}

// https://platform.openai.com/docs/guides/vision/calculating-costs
// https://github.com/openai/openai-cookbook/blob/05e3f9be4c7a2ae7ecf029a7c32065b024730ebe/examples/How_to_count_tokens_with_tiktoken.ipynb
func countImageTokens(url string, detail string) (_ int, err error) {
func countOpenaiImageTokens(url, detail, modelName string) (_ int, err error) {
// var fetchSize = true
var width, height int
var openAIImageCost *OpenAIImageCost
if strings.HasPrefix(modelName, "gpt-4o-mini") {
openAIImageCost = OpenAIImageCostMap["gpt-4o-mini"]
} else {
openAIImageCost = OpenAIImageCostMap["general"]
}
// Reference: https://platform.openai.com/docs/guides/vision/low-or-high-fidelity-image-understanding
// detail == "auto" is undocumented on how it works, it just said the model will use the auto setting which will look at the image input size and decide if it should use the low or high setting.
// According to the official guide, "low" disable the high-res model,
Expand Down Expand Up @@ -189,7 +228,7 @@ func countImageTokens(url string, detail string) (_ int, err error) {
}
switch detail {
case "low":
return lowDetailCost, nil
return openAIImageCost.Low, nil
case "high":
width, height, err = image.GetImageSize(url)
if err != nil {
Expand All @@ -206,13 +245,30 @@ func countImageTokens(url string, detail string) (_ int, err error) {
height = int(float64(height) * ratio)
}
numSquares := int(math.Ceil(float64(width)/512) * math.Ceil(float64(height)/512))
result := numSquares*highDetailCostPerTile + additionalCost
result := numSquares*openAIImageCost.High + openAIImageCost.Additional
return result, nil
default:
return 0, errors.New("invalid detail option")
}
}

func countGeminiImageTokens(_, _, _ string) (int, error) {
return 258, nil
}

func countClaudeImageTokens(url, _, _ string) (int, error) {
width, height, err := image.GetImageSize(url)
if err != nil {
return 0, err
}

return int(math.Ceil(float64(width*height) / 750)), nil
}

func countGlmImageTokens(_, _, _ string) (int, error) {
return 1047, nil
}

func CountTokenInput(input any, model string) int {
switch v := input.(type) {
case string:
Expand Down

0 comments on commit 69594da

Please sign in to comment.