Fix OCM usage counting & Update pricing
This commit is contained in:
@@ -425,6 +425,8 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons
|
||||
usage.OutputTokens,
|
||||
usage.CacheReadInputTokens,
|
||||
usage.CacheCreationInputTokens,
|
||||
usage.CacheCreation.Ephemeral5mInputTokens,
|
||||
usage.CacheCreation.Ephemeral1hInputTokens,
|
||||
username,
|
||||
)
|
||||
}
|
||||
@@ -485,6 +487,8 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons
|
||||
accumulatedUsage.InputTokens = messageStart.Message.Usage.InputTokens
|
||||
accumulatedUsage.CacheReadInputTokens = messageStart.Message.Usage.CacheReadInputTokens
|
||||
accumulatedUsage.CacheCreationInputTokens = messageStart.Message.Usage.CacheCreationInputTokens
|
||||
accumulatedUsage.CacheCreation.Ephemeral5mInputTokens = messageStart.Message.Usage.CacheCreation.Ephemeral5mInputTokens
|
||||
accumulatedUsage.CacheCreation.Ephemeral1hInputTokens = messageStart.Message.Usage.CacheCreation.Ephemeral1hInputTokens
|
||||
}
|
||||
case "message_delta":
|
||||
messageDelta := event.AsMessageDelta()
|
||||
@@ -519,6 +523,8 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons
|
||||
accumulatedUsage.OutputTokens,
|
||||
accumulatedUsage.CacheReadInputTokens,
|
||||
accumulatedUsage.CacheCreationInputTokens,
|
||||
accumulatedUsage.CacheCreation.Ephemeral5mInputTokens,
|
||||
accumulatedUsage.CacheCreation.Ephemeral1hInputTokens,
|
||||
username,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -13,12 +13,14 @@ import (
|
||||
)
|
||||
|
||||
type UsageStats struct {
|
||||
RequestCount int `json:"request_count"`
|
||||
MessagesCount int `json:"messages_count"`
|
||||
InputTokens int64 `json:"input_tokens"`
|
||||
OutputTokens int64 `json:"output_tokens"`
|
||||
CacheReadInputTokens int64 `json:"cache_read_input_tokens"`
|
||||
CacheCreationInputTokens int64 `json:"cache_creation_input_tokens"`
|
||||
RequestCount int `json:"request_count"`
|
||||
MessagesCount int `json:"messages_count"`
|
||||
InputTokens int64 `json:"input_tokens"`
|
||||
OutputTokens int64 `json:"output_tokens"`
|
||||
CacheReadInputTokens int64 `json:"cache_read_input_tokens"`
|
||||
CacheCreationInputTokens int64 `json:"cache_creation_input_tokens"`
|
||||
CacheCreation5MinuteInputTokens int64 `json:"cache_creation_5m_input_tokens,omitempty"`
|
||||
CacheCreation1HourInputTokens int64 `json:"cache_creation_1h_input_tokens,omitempty"`
|
||||
}
|
||||
|
||||
type CostCombination struct {
|
||||
@@ -41,13 +43,15 @@ type AggregatedUsage struct {
|
||||
}
|
||||
|
||||
type UsageStatsJSON struct {
|
||||
RequestCount int `json:"request_count"`
|
||||
MessagesCount int `json:"messages_count"`
|
||||
InputTokens int64 `json:"input_tokens"`
|
||||
OutputTokens int64 `json:"output_tokens"`
|
||||
CacheReadInputTokens int64 `json:"cache_read_input_tokens"`
|
||||
CacheCreationInputTokens int64 `json:"cache_creation_input_tokens"`
|
||||
CostUSD float64 `json:"cost_usd"`
|
||||
RequestCount int `json:"request_count"`
|
||||
MessagesCount int `json:"messages_count"`
|
||||
InputTokens int64 `json:"input_tokens"`
|
||||
OutputTokens int64 `json:"output_tokens"`
|
||||
CacheReadInputTokens int64 `json:"cache_read_input_tokens"`
|
||||
CacheCreationInputTokens int64 `json:"cache_creation_input_tokens"`
|
||||
CacheCreation5MinuteInputTokens int64 `json:"cache_creation_5m_input_tokens,omitempty"`
|
||||
CacheCreation1HourInputTokens int64 `json:"cache_creation_1h_input_tokens,omitempty"`
|
||||
CostUSD float64 `json:"cost_usd"`
|
||||
}
|
||||
|
||||
type CostCombinationJSON struct {
|
||||
@@ -69,10 +73,11 @@ type AggregatedUsageJSON struct {
|
||||
}
|
||||
|
||||
type ModelPricing struct {
|
||||
InputPrice float64
|
||||
OutputPrice float64
|
||||
CacheReadPrice float64
|
||||
CacheWritePrice float64
|
||||
InputPrice float64
|
||||
OutputPrice float64
|
||||
CacheReadPrice float64
|
||||
CacheWritePrice5Minute float64
|
||||
CacheWritePrice1Hour float64
|
||||
}
|
||||
|
||||
type modelFamily struct {
|
||||
@@ -82,143 +87,205 @@ type modelFamily struct {
|
||||
}
|
||||
|
||||
var (
|
||||
opus4Pricing = ModelPricing{
|
||||
InputPrice: 15.0,
|
||||
OutputPrice: 75.0,
|
||||
CacheReadPrice: 1.5,
|
||||
CacheWritePrice: 18.75,
|
||||
opus46StandardPricing = ModelPricing{
|
||||
InputPrice: 5.0,
|
||||
OutputPrice: 25.0,
|
||||
CacheReadPrice: 0.5,
|
||||
CacheWritePrice5Minute: 6.25,
|
||||
CacheWritePrice1Hour: 10.0,
|
||||
}
|
||||
|
||||
sonnet4StandardPricing = ModelPricing{
|
||||
InputPrice: 3.0,
|
||||
OutputPrice: 15.0,
|
||||
CacheReadPrice: 0.3,
|
||||
CacheWritePrice: 3.75,
|
||||
}
|
||||
|
||||
sonnet4PremiumPricing = ModelPricing{
|
||||
InputPrice: 6.0,
|
||||
OutputPrice: 22.5,
|
||||
CacheReadPrice: 0.6,
|
||||
CacheWritePrice: 7.5,
|
||||
}
|
||||
|
||||
haiku4Pricing = ModelPricing{
|
||||
InputPrice: 1.0,
|
||||
OutputPrice: 5.0,
|
||||
CacheReadPrice: 0.1,
|
||||
CacheWritePrice: 1.25,
|
||||
}
|
||||
|
||||
haiku35Pricing = ModelPricing{
|
||||
InputPrice: 0.8,
|
||||
OutputPrice: 4.0,
|
||||
CacheReadPrice: 0.08,
|
||||
CacheWritePrice: 1.0,
|
||||
}
|
||||
|
||||
sonnet35Pricing = ModelPricing{
|
||||
InputPrice: 3.0,
|
||||
OutputPrice: 15.0,
|
||||
CacheReadPrice: 0.3,
|
||||
CacheWritePrice: 3.75,
|
||||
opus46PremiumPricing = ModelPricing{
|
||||
InputPrice: 10.0,
|
||||
OutputPrice: 37.5,
|
||||
CacheReadPrice: 1.0,
|
||||
CacheWritePrice5Minute: 12.5,
|
||||
CacheWritePrice1Hour: 20.0,
|
||||
}
|
||||
|
||||
opus45Pricing = ModelPricing{
|
||||
InputPrice: 5.0,
|
||||
OutputPrice: 25.0,
|
||||
CacheReadPrice: 0.5,
|
||||
CacheWritePrice: 6.25,
|
||||
InputPrice: 5.0,
|
||||
OutputPrice: 25.0,
|
||||
CacheReadPrice: 0.5,
|
||||
CacheWritePrice5Minute: 6.25,
|
||||
CacheWritePrice1Hour: 10.0,
|
||||
}
|
||||
|
||||
opus4Pricing = ModelPricing{
|
||||
InputPrice: 15.0,
|
||||
OutputPrice: 75.0,
|
||||
CacheReadPrice: 1.5,
|
||||
CacheWritePrice5Minute: 18.75,
|
||||
CacheWritePrice1Hour: 30.0,
|
||||
}
|
||||
|
||||
sonnet46StandardPricing = ModelPricing{
|
||||
InputPrice: 3.0,
|
||||
OutputPrice: 15.0,
|
||||
CacheReadPrice: 0.3,
|
||||
CacheWritePrice5Minute: 3.75,
|
||||
CacheWritePrice1Hour: 6.0,
|
||||
}
|
||||
|
||||
sonnet46PremiumPricing = ModelPricing{
|
||||
InputPrice: 6.0,
|
||||
OutputPrice: 22.5,
|
||||
CacheReadPrice: 0.6,
|
||||
CacheWritePrice5Minute: 7.5,
|
||||
CacheWritePrice1Hour: 12.0,
|
||||
}
|
||||
|
||||
sonnet45StandardPricing = ModelPricing{
|
||||
InputPrice: 3.0,
|
||||
OutputPrice: 15.0,
|
||||
CacheReadPrice: 0.3,
|
||||
CacheWritePrice: 3.75,
|
||||
InputPrice: 3.0,
|
||||
OutputPrice: 15.0,
|
||||
CacheReadPrice: 0.3,
|
||||
CacheWritePrice5Minute: 3.75,
|
||||
CacheWritePrice1Hour: 6.0,
|
||||
}
|
||||
|
||||
sonnet45PremiumPricing = ModelPricing{
|
||||
InputPrice: 6.0,
|
||||
OutputPrice: 22.5,
|
||||
CacheReadPrice: 0.6,
|
||||
CacheWritePrice: 7.5,
|
||||
InputPrice: 6.0,
|
||||
OutputPrice: 22.5,
|
||||
CacheReadPrice: 0.6,
|
||||
CacheWritePrice5Minute: 7.5,
|
||||
CacheWritePrice1Hour: 12.0,
|
||||
}
|
||||
|
||||
sonnet4StandardPricing = ModelPricing{
|
||||
InputPrice: 3.0,
|
||||
OutputPrice: 15.0,
|
||||
CacheReadPrice: 0.3,
|
||||
CacheWritePrice5Minute: 3.75,
|
||||
CacheWritePrice1Hour: 6.0,
|
||||
}
|
||||
|
||||
sonnet4PremiumPricing = ModelPricing{
|
||||
InputPrice: 6.0,
|
||||
OutputPrice: 22.5,
|
||||
CacheReadPrice: 0.6,
|
||||
CacheWritePrice5Minute: 7.5,
|
||||
CacheWritePrice1Hour: 12.0,
|
||||
}
|
||||
|
||||
sonnet37Pricing = ModelPricing{
|
||||
InputPrice: 3.0,
|
||||
OutputPrice: 15.0,
|
||||
CacheReadPrice: 0.3,
|
||||
CacheWritePrice5Minute: 3.75,
|
||||
CacheWritePrice1Hour: 6.0,
|
||||
}
|
||||
|
||||
sonnet35Pricing = ModelPricing{
|
||||
InputPrice: 3.0,
|
||||
OutputPrice: 15.0,
|
||||
CacheReadPrice: 0.3,
|
||||
CacheWritePrice5Minute: 3.75,
|
||||
CacheWritePrice1Hour: 6.0,
|
||||
}
|
||||
|
||||
haiku45Pricing = ModelPricing{
|
||||
InputPrice: 1.0,
|
||||
OutputPrice: 5.0,
|
||||
CacheReadPrice: 0.1,
|
||||
CacheWritePrice: 1.25,
|
||||
InputPrice: 1.0,
|
||||
OutputPrice: 5.0,
|
||||
CacheReadPrice: 0.1,
|
||||
CacheWritePrice5Minute: 1.25,
|
||||
CacheWritePrice1Hour: 2.0,
|
||||
}
|
||||
|
||||
haiku4Pricing = ModelPricing{
|
||||
InputPrice: 1.0,
|
||||
OutputPrice: 5.0,
|
||||
CacheReadPrice: 0.1,
|
||||
CacheWritePrice5Minute: 1.25,
|
||||
CacheWritePrice1Hour: 2.0,
|
||||
}
|
||||
|
||||
haiku35Pricing = ModelPricing{
|
||||
InputPrice: 0.8,
|
||||
OutputPrice: 4.0,
|
||||
CacheReadPrice: 0.08,
|
||||
CacheWritePrice5Minute: 1.0,
|
||||
CacheWritePrice1Hour: 1.6,
|
||||
}
|
||||
|
||||
haiku3Pricing = ModelPricing{
|
||||
InputPrice: 0.25,
|
||||
OutputPrice: 1.25,
|
||||
CacheReadPrice: 0.03,
|
||||
CacheWritePrice: 0.3,
|
||||
InputPrice: 0.25,
|
||||
OutputPrice: 1.25,
|
||||
CacheReadPrice: 0.03,
|
||||
CacheWritePrice5Minute: 0.3,
|
||||
CacheWritePrice1Hour: 0.5,
|
||||
}
|
||||
|
||||
opus3Pricing = ModelPricing{
|
||||
InputPrice: 15.0,
|
||||
OutputPrice: 75.0,
|
||||
CacheReadPrice: 1.5,
|
||||
CacheWritePrice: 18.75,
|
||||
InputPrice: 15.0,
|
||||
OutputPrice: 75.0,
|
||||
CacheReadPrice: 1.5,
|
||||
CacheWritePrice5Minute: 18.75,
|
||||
CacheWritePrice1Hour: 30.0,
|
||||
}
|
||||
|
||||
modelFamilies = []modelFamily{
|
||||
{
|
||||
pattern: regexp.MustCompile(`^claude-opus-4-5-`),
|
||||
pattern: regexp.MustCompile(`^claude-opus-4-6(?:-|$)`),
|
||||
standardPricing: opus46StandardPricing,
|
||||
premiumPricing: &opus46PremiumPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^claude-opus-4-5(?:-|$)`),
|
||||
standardPricing: opus45Pricing,
|
||||
premiumPricing: nil,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^claude-(?:opus-4-|4-opus-|opus-4-1-)`),
|
||||
pattern: regexp.MustCompile(`^claude-(?:opus-4(?:-|$)|4-opus-)`),
|
||||
standardPricing: opus4Pricing,
|
||||
premiumPricing: nil,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^claude-(?:opus-3-|3-opus-)`),
|
||||
pattern: regexp.MustCompile(`^claude-(?:opus-3(?:-|$)|3-opus-)`),
|
||||
standardPricing: opus3Pricing,
|
||||
premiumPricing: nil,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^claude-(?:sonnet-4-5-|4-5-sonnet-)`),
|
||||
pattern: regexp.MustCompile(`^claude-(?:sonnet-4-6(?:-|$)|4-6-sonnet-)`),
|
||||
standardPricing: sonnet46StandardPricing,
|
||||
premiumPricing: &sonnet46PremiumPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^claude-(?:sonnet-4-5(?:-|$)|4-5-sonnet-)`),
|
||||
standardPricing: sonnet45StandardPricing,
|
||||
premiumPricing: &sonnet45PremiumPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^claude-3-7-sonnet-`),
|
||||
pattern: regexp.MustCompile(`^claude-(?:sonnet-4(?:-|$)|4-sonnet-)`),
|
||||
standardPricing: sonnet4StandardPricing,
|
||||
premiumPricing: &sonnet4PremiumPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^claude-(?:sonnet-4-|4-sonnet-)`),
|
||||
standardPricing: sonnet4StandardPricing,
|
||||
premiumPricing: &sonnet4PremiumPricing,
|
||||
pattern: regexp.MustCompile(`^claude-3-7-sonnet(?:-|$)`),
|
||||
standardPricing: sonnet37Pricing,
|
||||
premiumPricing: nil,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^claude-3-5-sonnet-`),
|
||||
pattern: regexp.MustCompile(`^claude-3-5-sonnet(?:-|$)`),
|
||||
standardPricing: sonnet35Pricing,
|
||||
premiumPricing: nil,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^claude-(?:haiku-4-5-|4-5-haiku-)`),
|
||||
pattern: regexp.MustCompile(`^claude-(?:haiku-4-5(?:-|$)|4-5-haiku-)`),
|
||||
standardPricing: haiku45Pricing,
|
||||
premiumPricing: nil,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^claude-haiku-4-`),
|
||||
pattern: regexp.MustCompile(`^claude-haiku-4(?:-|$)`),
|
||||
standardPricing: haiku4Pricing,
|
||||
premiumPricing: nil,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^claude-3-5-haiku-`),
|
||||
pattern: regexp.MustCompile(`^claude-3-5-haiku(?:-|$)`),
|
||||
standardPricing: haiku35Pricing,
|
||||
premiumPricing: nil,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^claude-3-haiku-`),
|
||||
pattern: regexp.MustCompile(`^claude-3-haiku(?:-|$)`),
|
||||
standardPricing: haiku3Pricing,
|
||||
premiumPricing: nil,
|
||||
},
|
||||
@@ -243,10 +310,20 @@ func getPricing(model string, contextWindow int) ModelPricing {
|
||||
func calculateCost(stats UsageStats, model string, contextWindow int) float64 {
|
||||
pricing := getPricing(model, contextWindow)
|
||||
|
||||
cacheCreationCost := 0.0
|
||||
if stats.CacheCreation5MinuteInputTokens > 0 || stats.CacheCreation1HourInputTokens > 0 {
|
||||
cacheCreationCost =
|
||||
float64(stats.CacheCreation5MinuteInputTokens)*pricing.CacheWritePrice5Minute +
|
||||
float64(stats.CacheCreation1HourInputTokens)*pricing.CacheWritePrice1Hour
|
||||
} else {
|
||||
// Backward compatibility for usage files generated before TTL split tracking.
|
||||
cacheCreationCost = float64(stats.CacheCreationInputTokens) * pricing.CacheWritePrice5Minute
|
||||
}
|
||||
|
||||
cost := (float64(stats.InputTokens)*pricing.InputPrice +
|
||||
float64(stats.OutputTokens)*pricing.OutputPrice +
|
||||
float64(stats.CacheReadInputTokens)*pricing.CacheReadPrice +
|
||||
float64(stats.CacheCreationInputTokens)*pricing.CacheWritePrice) / 1_000_000
|
||||
cacheCreationCost) / 1_000_000
|
||||
|
||||
return math.Round(cost*100) / 100
|
||||
}
|
||||
@@ -273,13 +350,15 @@ func (u *AggregatedUsage) ToJSON() *AggregatedUsageJSON {
|
||||
Model: combo.Model,
|
||||
ContextWindow: combo.ContextWindow,
|
||||
Total: UsageStatsJSON{
|
||||
RequestCount: combo.Total.RequestCount,
|
||||
MessagesCount: combo.Total.MessagesCount,
|
||||
InputTokens: combo.Total.InputTokens,
|
||||
OutputTokens: combo.Total.OutputTokens,
|
||||
CacheReadInputTokens: combo.Total.CacheReadInputTokens,
|
||||
CacheCreationInputTokens: combo.Total.CacheCreationInputTokens,
|
||||
CostUSD: totalCost,
|
||||
RequestCount: combo.Total.RequestCount,
|
||||
MessagesCount: combo.Total.MessagesCount,
|
||||
InputTokens: combo.Total.InputTokens,
|
||||
OutputTokens: combo.Total.OutputTokens,
|
||||
CacheReadInputTokens: combo.Total.CacheReadInputTokens,
|
||||
CacheCreationInputTokens: combo.Total.CacheCreationInputTokens,
|
||||
CacheCreation5MinuteInputTokens: combo.Total.CacheCreation5MinuteInputTokens,
|
||||
CacheCreation1HourInputTokens: combo.Total.CacheCreation1HourInputTokens,
|
||||
CostUSD: totalCost,
|
||||
},
|
||||
ByUser: make(map[string]UsageStatsJSON),
|
||||
}
|
||||
@@ -289,13 +368,15 @@ func (u *AggregatedUsage) ToJSON() *AggregatedUsageJSON {
|
||||
result.Costs.ByUser[user] += userCost
|
||||
|
||||
comboJSON.ByUser[user] = UsageStatsJSON{
|
||||
RequestCount: userStats.RequestCount,
|
||||
MessagesCount: userStats.MessagesCount,
|
||||
InputTokens: userStats.InputTokens,
|
||||
OutputTokens: userStats.OutputTokens,
|
||||
CacheReadInputTokens: userStats.CacheReadInputTokens,
|
||||
CacheCreationInputTokens: userStats.CacheCreationInputTokens,
|
||||
CostUSD: userCost,
|
||||
RequestCount: userStats.RequestCount,
|
||||
MessagesCount: userStats.MessagesCount,
|
||||
InputTokens: userStats.InputTokens,
|
||||
OutputTokens: userStats.OutputTokens,
|
||||
CacheReadInputTokens: userStats.CacheReadInputTokens,
|
||||
CacheCreationInputTokens: userStats.CacheCreationInputTokens,
|
||||
CacheCreation5MinuteInputTokens: userStats.CacheCreation5MinuteInputTokens,
|
||||
CacheCreation1HourInputTokens: userStats.CacheCreation1HourInputTokens,
|
||||
CostUSD: userCost,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -367,7 +448,13 @@ func (u *AggregatedUsage) Save() error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (u *AggregatedUsage) AddUsage(model string, contextWindow int, messagesCount int, inputTokens, outputTokens, cacheReadTokens, cacheCreationTokens int64, user string) error {
|
||||
func (u *AggregatedUsage) AddUsage(
|
||||
model string,
|
||||
contextWindow int,
|
||||
messagesCount int,
|
||||
inputTokens, outputTokens, cacheReadTokens, cacheCreationTokens, cacheCreation5MinuteTokens, cacheCreation1HourTokens int64,
|
||||
user string,
|
||||
) error {
|
||||
if model == "" {
|
||||
return E.New("model cannot be empty")
|
||||
}
|
||||
@@ -400,6 +487,10 @@ func (u *AggregatedUsage) AddUsage(model string, contextWindow int, messagesCoun
|
||||
combo = &u.Combinations[len(u.Combinations)-1]
|
||||
}
|
||||
|
||||
if cacheCreationTokens == 0 {
|
||||
cacheCreationTokens = cacheCreation5MinuteTokens + cacheCreation1HourTokens
|
||||
}
|
||||
|
||||
// Update total stats
|
||||
combo.Total.RequestCount++
|
||||
combo.Total.MessagesCount += messagesCount
|
||||
@@ -407,6 +498,8 @@ func (u *AggregatedUsage) AddUsage(model string, contextWindow int, messagesCoun
|
||||
combo.Total.OutputTokens += outputTokens
|
||||
combo.Total.CacheReadInputTokens += cacheReadTokens
|
||||
combo.Total.CacheCreationInputTokens += cacheCreationTokens
|
||||
combo.Total.CacheCreation5MinuteInputTokens += cacheCreation5MinuteTokens
|
||||
combo.Total.CacheCreation1HourInputTokens += cacheCreation1HourTokens
|
||||
|
||||
// Update per-user stats if user is specified
|
||||
if user != "" {
|
||||
@@ -417,6 +510,8 @@ func (u *AggregatedUsage) AddUsage(model string, contextWindow int, messagesCoun
|
||||
userStats.OutputTokens += outputTokens
|
||||
userStats.CacheReadInputTokens += cacheReadTokens
|
||||
userStats.CacheCreationInputTokens += cacheCreationTokens
|
||||
userStats.CacheCreation5MinuteInputTokens += cacheCreation5MinuteTokens
|
||||
userStats.CacheCreation1HourInputTokens += cacheCreation1HourTokens
|
||||
combo.ByUser[user] = userStats
|
||||
}
|
||||
|
||||
|
||||
@@ -406,7 +406,9 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons
|
||||
isChatCompletions := path == "/v1/chat/completions"
|
||||
mediaType, _, err := mime.ParseMediaType(response.Header.Get("Content-Type"))
|
||||
isStreaming := err == nil && mediaType == "text/event-stream"
|
||||
|
||||
if !isStreaming && !isChatCompletions && response.Header.Get("Content-Type") == "" {
|
||||
isStreaming = true
|
||||
}
|
||||
if !isStreaming {
|
||||
bodyBytes, err := io.ReadAll(response.Body)
|
||||
if err != nil {
|
||||
@@ -414,13 +416,14 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons
|
||||
return
|
||||
}
|
||||
|
||||
var responseModel string
|
||||
var responseModel, serviceTier string
|
||||
var inputTokens, outputTokens, cachedTokens int64
|
||||
|
||||
if isChatCompletions {
|
||||
var chatCompletion openai.ChatCompletion
|
||||
if json.Unmarshal(bodyBytes, &chatCompletion) == nil {
|
||||
responseModel = chatCompletion.Model
|
||||
serviceTier = string(chatCompletion.ServiceTier)
|
||||
inputTokens = chatCompletion.Usage.PromptTokens
|
||||
outputTokens = chatCompletion.Usage.CompletionTokens
|
||||
cachedTokens = chatCompletion.Usage.PromptTokensDetails.CachedTokens
|
||||
@@ -429,6 +432,7 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons
|
||||
var responsesResponse responses.Response
|
||||
if json.Unmarshal(bodyBytes, &responsesResponse) == nil {
|
||||
responseModel = string(responsesResponse.Model)
|
||||
serviceTier = string(responsesResponse.ServiceTier)
|
||||
inputTokens = responsesResponse.Usage.InputTokens
|
||||
outputTokens = responsesResponse.Usage.OutputTokens
|
||||
cachedTokens = responsesResponse.Usage.InputTokensDetails.CachedTokens
|
||||
@@ -440,7 +444,7 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons
|
||||
responseModel = requestModel
|
||||
}
|
||||
if responseModel != "" {
|
||||
s.usageTracker.AddUsage(responseModel, inputTokens, outputTokens, cachedTokens, username)
|
||||
s.usageTracker.AddUsage(responseModel, inputTokens, outputTokens, cachedTokens, serviceTier, username)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -455,7 +459,7 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons
|
||||
}
|
||||
|
||||
var inputTokens, outputTokens, cachedTokens int64
|
||||
var responseModel string
|
||||
var responseModel, serviceTier string
|
||||
buffer := make([]byte, buf.BufferSize)
|
||||
var leftover []byte
|
||||
|
||||
@@ -490,6 +494,9 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons
|
||||
if chatChunk.Model != "" {
|
||||
responseModel = chatChunk.Model
|
||||
}
|
||||
if chatChunk.ServiceTier != "" {
|
||||
serviceTier = string(chatChunk.ServiceTier)
|
||||
}
|
||||
if chatChunk.Usage.PromptTokens > 0 {
|
||||
inputTokens = chatChunk.Usage.PromptTokens
|
||||
cachedTokens = chatChunk.Usage.PromptTokensDetails.CachedTokens
|
||||
@@ -506,6 +513,9 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons
|
||||
if string(completedEvent.Response.Model) != "" {
|
||||
responseModel = string(completedEvent.Response.Model)
|
||||
}
|
||||
if completedEvent.Response.ServiceTier != "" {
|
||||
serviceTier = string(completedEvent.Response.ServiceTier)
|
||||
}
|
||||
if completedEvent.Response.Usage.InputTokens > 0 {
|
||||
inputTokens = completedEvent.Response.Usage.InputTokens
|
||||
cachedTokens = completedEvent.Response.Usage.InputTokensDetails.CachedTokens
|
||||
@@ -534,7 +544,7 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons
|
||||
|
||||
if inputTokens > 0 || outputTokens > 0 {
|
||||
if responseModel != "" {
|
||||
s.usageTracker.AddUsage(responseModel, inputTokens, outputTokens, cachedTokens, username)
|
||||
s.usageTracker.AddUsage(responseModel, inputTokens, outputTokens, cachedTokens, serviceTier, username)
|
||||
}
|
||||
}
|
||||
return
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"math"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -42,9 +43,10 @@ func (u *UsageStats) UnmarshalJSON(data []byte) error {
|
||||
}
|
||||
|
||||
type CostCombination struct {
|
||||
Model string `json:"model"`
|
||||
Total UsageStats `json:"total"`
|
||||
ByUser map[string]UsageStats `json:"by_user"`
|
||||
Model string `json:"model"`
|
||||
ServiceTier string `json:"service_tier,omitempty"`
|
||||
Total UsageStats `json:"total"`
|
||||
ByUser map[string]UsageStats `json:"by_user"`
|
||||
}
|
||||
|
||||
type AggregatedUsage struct {
|
||||
@@ -68,9 +70,10 @@ type UsageStatsJSON struct {
|
||||
}
|
||||
|
||||
type CostCombinationJSON struct {
|
||||
Model string `json:"model"`
|
||||
Total UsageStatsJSON `json:"total"`
|
||||
ByUser map[string]UsageStatsJSON `json:"by_user"`
|
||||
Model string `json:"model"`
|
||||
ServiceTier string `json:"service_tier,omitempty"`
|
||||
Total UsageStatsJSON `json:"total"`
|
||||
ByUser map[string]UsageStatsJSON `json:"by_user"`
|
||||
}
|
||||
|
||||
type CostsSummaryJSON struct {
|
||||
@@ -95,7 +98,123 @@ type modelFamily struct {
|
||||
pricing ModelPricing
|
||||
}
|
||||
|
||||
const (
|
||||
serviceTierAuto = "auto"
|
||||
serviceTierDefault = "default"
|
||||
serviceTierFlex = "flex"
|
||||
serviceTierPriority = "priority"
|
||||
serviceTierScale = "scale"
|
||||
)
|
||||
|
||||
var (
|
||||
gpt52Pricing = ModelPricing{
|
||||
InputPrice: 1.75,
|
||||
OutputPrice: 14.0,
|
||||
CachedInputPrice: 0.175,
|
||||
}
|
||||
|
||||
gpt5Pricing = ModelPricing{
|
||||
InputPrice: 1.25,
|
||||
OutputPrice: 10.0,
|
||||
CachedInputPrice: 0.125,
|
||||
}
|
||||
|
||||
gpt5MiniPricing = ModelPricing{
|
||||
InputPrice: 0.25,
|
||||
OutputPrice: 2.0,
|
||||
CachedInputPrice: 0.025,
|
||||
}
|
||||
|
||||
gpt5NanoPricing = ModelPricing{
|
||||
InputPrice: 0.05,
|
||||
OutputPrice: 0.4,
|
||||
CachedInputPrice: 0.005,
|
||||
}
|
||||
|
||||
gpt52CodexPricing = ModelPricing{
|
||||
InputPrice: 1.75,
|
||||
OutputPrice: 14.0,
|
||||
CachedInputPrice: 0.175,
|
||||
}
|
||||
|
||||
gpt51CodexPricing = ModelPricing{
|
||||
InputPrice: 1.25,
|
||||
OutputPrice: 10.0,
|
||||
CachedInputPrice: 0.125,
|
||||
}
|
||||
|
||||
gpt51CodexMiniPricing = ModelPricing{
|
||||
InputPrice: 0.25,
|
||||
OutputPrice: 2.0,
|
||||
CachedInputPrice: 0.025,
|
||||
}
|
||||
|
||||
gpt52ProPricing = ModelPricing{
|
||||
InputPrice: 21.0,
|
||||
OutputPrice: 168.0,
|
||||
CachedInputPrice: 21.0,
|
||||
}
|
||||
|
||||
gpt5ProPricing = ModelPricing{
|
||||
InputPrice: 15.0,
|
||||
OutputPrice: 120.0,
|
||||
CachedInputPrice: 15.0,
|
||||
}
|
||||
|
||||
gpt52FlexPricing = ModelPricing{
|
||||
InputPrice: 0.875,
|
||||
OutputPrice: 7.0,
|
||||
CachedInputPrice: 0.0875,
|
||||
}
|
||||
|
||||
gpt5FlexPricing = ModelPricing{
|
||||
InputPrice: 0.625,
|
||||
OutputPrice: 5.0,
|
||||
CachedInputPrice: 0.0625,
|
||||
}
|
||||
|
||||
gpt5MiniFlexPricing = ModelPricing{
|
||||
InputPrice: 0.125,
|
||||
OutputPrice: 1.0,
|
||||
CachedInputPrice: 0.0125,
|
||||
}
|
||||
|
||||
gpt5NanoFlexPricing = ModelPricing{
|
||||
InputPrice: 0.025,
|
||||
OutputPrice: 0.2,
|
||||
CachedInputPrice: 0.0025,
|
||||
}
|
||||
|
||||
gpt52PriorityPricing = ModelPricing{
|
||||
InputPrice: 3.5,
|
||||
OutputPrice: 28.0,
|
||||
CachedInputPrice: 0.35,
|
||||
}
|
||||
|
||||
gpt5PriorityPricing = ModelPricing{
|
||||
InputPrice: 2.5,
|
||||
OutputPrice: 20.0,
|
||||
CachedInputPrice: 0.25,
|
||||
}
|
||||
|
||||
gpt5MiniPriorityPricing = ModelPricing{
|
||||
InputPrice: 0.45,
|
||||
OutputPrice: 3.6,
|
||||
CachedInputPrice: 0.045,
|
||||
}
|
||||
|
||||
gpt52CodexPriorityPricing = ModelPricing{
|
||||
InputPrice: 3.5,
|
||||
OutputPrice: 28.0,
|
||||
CachedInputPrice: 0.35,
|
||||
}
|
||||
|
||||
gpt51CodexPriorityPricing = ModelPricing{
|
||||
InputPrice: 2.5,
|
||||
OutputPrice: 20.0,
|
||||
CachedInputPrice: 0.25,
|
||||
}
|
||||
|
||||
gpt4oPricing = ModelPricing{
|
||||
InputPrice: 2.5,
|
||||
OutputPrice: 10.0,
|
||||
@@ -111,7 +230,19 @@ var (
|
||||
gpt4oAudioPricing = ModelPricing{
|
||||
InputPrice: 2.5,
|
||||
OutputPrice: 10.0,
|
||||
CachedInputPrice: 1.25,
|
||||
CachedInputPrice: 2.5,
|
||||
}
|
||||
|
||||
gpt4oMiniAudioPricing = ModelPricing{
|
||||
InputPrice: 0.15,
|
||||
OutputPrice: 0.6,
|
||||
CachedInputPrice: 0.15,
|
||||
}
|
||||
|
||||
gptAudioMiniPricing = ModelPricing{
|
||||
InputPrice: 0.6,
|
||||
OutputPrice: 2.4,
|
||||
CachedInputPrice: 0.6,
|
||||
}
|
||||
|
||||
o1Pricing = ModelPricing{
|
||||
@@ -120,6 +251,12 @@ var (
|
||||
CachedInputPrice: 7.5,
|
||||
}
|
||||
|
||||
o1ProPricing = ModelPricing{
|
||||
InputPrice: 150.0,
|
||||
OutputPrice: 600.0,
|
||||
CachedInputPrice: 150.0,
|
||||
}
|
||||
|
||||
o1MiniPricing = ModelPricing{
|
||||
InputPrice: 1.1,
|
||||
OutputPrice: 4.4,
|
||||
@@ -135,13 +272,55 @@ var (
|
||||
o3Pricing = ModelPricing{
|
||||
InputPrice: 2.0,
|
||||
OutputPrice: 8.0,
|
||||
CachedInputPrice: 1.0,
|
||||
CachedInputPrice: 0.5,
|
||||
}
|
||||
|
||||
o3ProPricing = ModelPricing{
|
||||
InputPrice: 20.0,
|
||||
OutputPrice: 80.0,
|
||||
CachedInputPrice: 20.0,
|
||||
}
|
||||
|
||||
o3DeepResearchPricing = ModelPricing{
|
||||
InputPrice: 10.0,
|
||||
OutputPrice: 40.0,
|
||||
CachedInputPrice: 2.5,
|
||||
}
|
||||
|
||||
o4MiniPricing = ModelPricing{
|
||||
InputPrice: 1.1,
|
||||
OutputPrice: 4.4,
|
||||
CachedInputPrice: 0.55,
|
||||
CachedInputPrice: 0.275,
|
||||
}
|
||||
|
||||
o4MiniDeepResearchPricing = ModelPricing{
|
||||
InputPrice: 2.0,
|
||||
OutputPrice: 8.0,
|
||||
CachedInputPrice: 0.5,
|
||||
}
|
||||
|
||||
o3FlexPricing = ModelPricing{
|
||||
InputPrice: 1.0,
|
||||
OutputPrice: 4.0,
|
||||
CachedInputPrice: 0.25,
|
||||
}
|
||||
|
||||
o4MiniFlexPricing = ModelPricing{
|
||||
InputPrice: 0.55,
|
||||
OutputPrice: 2.2,
|
||||
CachedInputPrice: 0.138,
|
||||
}
|
||||
|
||||
o3PriorityPricing = ModelPricing{
|
||||
InputPrice: 3.5,
|
||||
OutputPrice: 14.0,
|
||||
CachedInputPrice: 0.875,
|
||||
}
|
||||
|
||||
o4MiniPriorityPricing = ModelPricing{
|
||||
InputPrice: 2.0,
|
||||
OutputPrice: 8.0,
|
||||
CachedInputPrice: 0.5,
|
||||
}
|
||||
|
||||
gpt41Pricing = ModelPricing{
|
||||
@@ -162,69 +341,358 @@ var (
|
||||
CachedInputPrice: 0.025,
|
||||
}
|
||||
|
||||
modelFamilies = []modelFamily{
|
||||
gpt41PriorityPricing = ModelPricing{
|
||||
InputPrice: 3.5,
|
||||
OutputPrice: 14.0,
|
||||
CachedInputPrice: 0.875,
|
||||
}
|
||||
|
||||
gpt41MiniPriorityPricing = ModelPricing{
|
||||
InputPrice: 0.7,
|
||||
OutputPrice: 2.8,
|
||||
CachedInputPrice: 0.175,
|
||||
}
|
||||
|
||||
gpt41NanoPriorityPricing = ModelPricing{
|
||||
InputPrice: 0.2,
|
||||
OutputPrice: 0.8,
|
||||
CachedInputPrice: 0.05,
|
||||
}
|
||||
|
||||
gpt4oPriorityPricing = ModelPricing{
|
||||
InputPrice: 4.25,
|
||||
OutputPrice: 17.0,
|
||||
CachedInputPrice: 2.125,
|
||||
}
|
||||
|
||||
gpt4oMiniPriorityPricing = ModelPricing{
|
||||
InputPrice: 0.25,
|
||||
OutputPrice: 1.0,
|
||||
CachedInputPrice: 0.125,
|
||||
}
|
||||
|
||||
standardModelFamilies = []modelFamily{
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-4\.1-nano`),
|
||||
pricing: gpt41NanoPricing,
|
||||
pattern: regexp.MustCompile(`^gpt-5\.2-codex(?:$|-)`),
|
||||
pricing: gpt52CodexPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-4\.1-mini`),
|
||||
pricing: gpt41MiniPricing,
|
||||
pattern: regexp.MustCompile(`^gpt-5\.1-codex-max(?:$|-)`),
|
||||
pricing: gpt51CodexPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-4\.1`),
|
||||
pricing: gpt41Pricing,
|
||||
pattern: regexp.MustCompile(`^gpt-5\.1-codex-mini(?:$|-)`),
|
||||
pricing: gpt51CodexMiniPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^o4-mini`),
|
||||
pattern: regexp.MustCompile(`^gpt-5\.1-codex(?:$|-)`),
|
||||
pricing: gpt51CodexPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-5-codex(?:$|-)`),
|
||||
pricing: gpt51CodexPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-5\.2-chat-latest$`),
|
||||
pricing: gpt52Pricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-5\.1-chat-latest$`),
|
||||
pricing: gpt5Pricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-5-chat-latest$`),
|
||||
pricing: gpt5Pricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-5\.2-pro(?:$|-)`),
|
||||
pricing: gpt52ProPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-5-pro(?:$|-)`),
|
||||
pricing: gpt5ProPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-5-mini(?:$|-)`),
|
||||
pricing: gpt5MiniPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-5-nano(?:$|-)`),
|
||||
pricing: gpt5NanoPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-5\.2(?:$|-)`),
|
||||
pricing: gpt52Pricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-5\.1(?:$|-)`),
|
||||
pricing: gpt5Pricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-5(?:$|-)`),
|
||||
pricing: gpt5Pricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^o4-mini-deep-research(?:$|-)`),
|
||||
pricing: o4MiniDeepResearchPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^o4-mini(?:$|-)`),
|
||||
pricing: o4MiniPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^o3-mini`),
|
||||
pattern: regexp.MustCompile(`^o3-pro(?:$|-)`),
|
||||
pricing: o3ProPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^o3-deep-research(?:$|-)`),
|
||||
pricing: o3DeepResearchPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^o3-mini(?:$|-)`),
|
||||
pricing: o3MiniPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^o3`),
|
||||
pattern: regexp.MustCompile(`^o3(?:$|-)`),
|
||||
pricing: o3Pricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^o1-mini`),
|
||||
pattern: regexp.MustCompile(`^o1-pro(?:$|-)`),
|
||||
pricing: o1ProPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^o1-mini(?:$|-)`),
|
||||
pricing: o1MiniPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^o1`),
|
||||
pattern: regexp.MustCompile(`^o1(?:$|-)`),
|
||||
pricing: o1Pricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-4o-audio`),
|
||||
pattern: regexp.MustCompile(`^gpt-4o-mini-audio(?:$|-)`),
|
||||
pricing: gpt4oMiniAudioPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-audio-mini(?:$|-)`),
|
||||
pricing: gptAudioMiniPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^(?:gpt-4o-audio|gpt-audio)(?:$|-)`),
|
||||
pricing: gpt4oAudioPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-4o-mini`),
|
||||
pattern: regexp.MustCompile(`^gpt-4\.1-nano(?:$|-)`),
|
||||
pricing: gpt41NanoPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-4\.1-mini(?:$|-)`),
|
||||
pricing: gpt41MiniPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-4\.1(?:$|-)`),
|
||||
pricing: gpt41Pricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-4o-mini(?:$|-)`),
|
||||
pricing: gpt4oMiniPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-4o`),
|
||||
pattern: regexp.MustCompile(`^gpt-4o(?:$|-)`),
|
||||
pricing: gpt4oPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^chatgpt-4o`),
|
||||
pattern: regexp.MustCompile(`^chatgpt-4o(?:$|-)`),
|
||||
pricing: gpt4oPricing,
|
||||
},
|
||||
}
|
||||
|
||||
flexModelFamilies = []modelFamily{
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-5-mini(?:$|-)`),
|
||||
pricing: gpt5MiniFlexPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-5-nano(?:$|-)`),
|
||||
pricing: gpt5NanoFlexPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-5\.2(?:$|-)`),
|
||||
pricing: gpt52FlexPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-5\.1(?:$|-)`),
|
||||
pricing: gpt5FlexPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-5(?:$|-)`),
|
||||
pricing: gpt5FlexPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^o4-mini(?:$|-)`),
|
||||
pricing: o4MiniFlexPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^o3(?:$|-)`),
|
||||
pricing: o3FlexPricing,
|
||||
},
|
||||
}
|
||||
|
||||
priorityModelFamilies = []modelFamily{
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-5\.2-codex(?:$|-)`),
|
||||
pricing: gpt52CodexPriorityPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-5\.1-codex-max(?:$|-)`),
|
||||
pricing: gpt51CodexPriorityPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-5\.1-codex(?:$|-)`),
|
||||
pricing: gpt51CodexPriorityPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-5-codex(?:$|-)`),
|
||||
pricing: gpt51CodexPriorityPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-5-mini(?:$|-)`),
|
||||
pricing: gpt5MiniPriorityPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-5\.2(?:$|-)`),
|
||||
pricing: gpt52PriorityPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-5\.1(?:$|-)`),
|
||||
pricing: gpt5PriorityPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-5(?:$|-)`),
|
||||
pricing: gpt5PriorityPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^o4-mini(?:$|-)`),
|
||||
pricing: o4MiniPriorityPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^o3(?:$|-)`),
|
||||
pricing: o3PriorityPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-4\.1-nano(?:$|-)`),
|
||||
pricing: gpt41NanoPriorityPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-4\.1-mini(?:$|-)`),
|
||||
pricing: gpt41MiniPriorityPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-4\.1(?:$|-)`),
|
||||
pricing: gpt41PriorityPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-4o-mini(?:$|-)`),
|
||||
pricing: gpt4oMiniPriorityPricing,
|
||||
},
|
||||
{
|
||||
pattern: regexp.MustCompile(`^gpt-4o(?:$|-)`),
|
||||
pricing: gpt4oPriorityPricing,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
func getPricing(model string) ModelPricing {
|
||||
func modelFamiliesForTier(serviceTier string) []modelFamily {
|
||||
switch serviceTier {
|
||||
case serviceTierFlex:
|
||||
return flexModelFamilies
|
||||
case serviceTierPriority:
|
||||
return priorityModelFamilies
|
||||
default:
|
||||
return standardModelFamilies
|
||||
}
|
||||
}
|
||||
|
||||
func findPricingInFamilies(model string, modelFamilies []modelFamily) (ModelPricing, bool) {
|
||||
for _, family := range modelFamilies {
|
||||
if family.pattern.MatchString(model) {
|
||||
return family.pricing
|
||||
return family.pricing, true
|
||||
}
|
||||
}
|
||||
return ModelPricing{}, false
|
||||
}
|
||||
|
||||
func normalizeServiceTier(serviceTier string) string {
|
||||
switch strings.ToLower(strings.TrimSpace(serviceTier)) {
|
||||
case "", serviceTierAuto, serviceTierDefault:
|
||||
return serviceTierDefault
|
||||
case serviceTierFlex:
|
||||
return serviceTierFlex
|
||||
case serviceTierPriority:
|
||||
return serviceTierPriority
|
||||
case serviceTierScale:
|
||||
// Scale-tier requests are prepaid differently and not listed in this usage file.
|
||||
return serviceTierDefault
|
||||
default:
|
||||
return serviceTierDefault
|
||||
}
|
||||
}
|
||||
|
||||
func getPricing(model string, serviceTier string) ModelPricing {
|
||||
normalizedServiceTier := normalizeServiceTier(serviceTier)
|
||||
modelFamilies := modelFamiliesForTier(normalizedServiceTier)
|
||||
|
||||
if pricing, found := findPricingInFamilies(model, modelFamilies); found {
|
||||
return pricing
|
||||
}
|
||||
|
||||
normalizedModel := normalizeGPT5Model(model)
|
||||
if normalizedModel != model {
|
||||
if pricing, found := findPricingInFamilies(normalizedModel, modelFamilies); found {
|
||||
return pricing
|
||||
}
|
||||
}
|
||||
|
||||
if normalizedServiceTier != serviceTierDefault {
|
||||
if pricing, found := findPricingInFamilies(model, standardModelFamilies); found {
|
||||
return pricing
|
||||
}
|
||||
if normalizedModel != model {
|
||||
if pricing, found := findPricingInFamilies(normalizedModel, standardModelFamilies); found {
|
||||
return pricing
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return gpt4oPricing
|
||||
}
|
||||
|
||||
func calculateCost(stats UsageStats, model string) float64 {
|
||||
pricing := getPricing(model)
|
||||
func normalizeGPT5Model(model string) string {
|
||||
if !strings.HasPrefix(model, "gpt-5.") {
|
||||
return model
|
||||
}
|
||||
|
||||
switch {
|
||||
case strings.Contains(model, "-codex-mini"):
|
||||
return "gpt-5.1-codex-mini"
|
||||
case strings.Contains(model, "-codex-max"):
|
||||
return "gpt-5.1-codex-max"
|
||||
case strings.Contains(model, "-codex"):
|
||||
return "gpt-5.2-codex"
|
||||
case strings.Contains(model, "-chat-latest"):
|
||||
return "gpt-5.2-chat-latest"
|
||||
case strings.Contains(model, "-pro"):
|
||||
return "gpt-5.2-pro"
|
||||
case strings.Contains(model, "-mini"):
|
||||
return "gpt-5-mini"
|
||||
case strings.Contains(model, "-nano"):
|
||||
return "gpt-5-nano"
|
||||
default:
|
||||
return "gpt-5.2"
|
||||
}
|
||||
}
|
||||
|
||||
func calculateCost(stats UsageStats, model string, serviceTier string) float64 {
|
||||
pricing := getPricing(model, serviceTier)
|
||||
|
||||
regularInputTokens := stats.InputTokens - stats.CachedTokens
|
||||
if regularInputTokens < 0 {
|
||||
@@ -252,12 +720,13 @@ func (u *AggregatedUsage) ToJSON() *AggregatedUsageJSON {
|
||||
}
|
||||
|
||||
for i, combo := range u.Combinations {
|
||||
totalCost := calculateCost(combo.Total, combo.Model)
|
||||
totalCost := calculateCost(combo.Total, combo.Model, combo.ServiceTier)
|
||||
|
||||
result.Costs.TotalUSD += totalCost
|
||||
|
||||
comboJSON := CostCombinationJSON{
|
||||
Model: combo.Model,
|
||||
Model: combo.Model,
|
||||
ServiceTier: combo.ServiceTier,
|
||||
Total: UsageStatsJSON{
|
||||
RequestCount: combo.Total.RequestCount,
|
||||
InputTokens: combo.Total.InputTokens,
|
||||
@@ -269,7 +738,7 @@ func (u *AggregatedUsage) ToJSON() *AggregatedUsageJSON {
|
||||
}
|
||||
|
||||
for user, userStats := range combo.ByUser {
|
||||
userCost := calculateCost(userStats, combo.Model)
|
||||
userCost := calculateCost(userStats, combo.Model, combo.ServiceTier)
|
||||
result.Costs.ByUser[user] += userCost
|
||||
|
||||
comboJSON.ByUser[user] = UsageStatsJSON{
|
||||
@@ -318,6 +787,7 @@ func (u *AggregatedUsage) Load() error {
|
||||
u.Combinations = temp.Combinations
|
||||
|
||||
for i := range u.Combinations {
|
||||
u.Combinations[i].ServiceTier = normalizeServiceTier(u.Combinations[i].ServiceTier)
|
||||
if u.Combinations[i].ByUser == nil {
|
||||
u.Combinations[i].ByUser = make(map[string]UsageStats)
|
||||
}
|
||||
@@ -349,11 +819,13 @@ func (u *AggregatedUsage) Save() error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (u *AggregatedUsage) AddUsage(model string, inputTokens, outputTokens, cachedTokens int64, user string) error {
|
||||
func (u *AggregatedUsage) AddUsage(model string, inputTokens, outputTokens, cachedTokens int64, serviceTier string, user string) error {
|
||||
if model == "" {
|
||||
return E.New("model cannot be empty")
|
||||
}
|
||||
|
||||
normalizedServiceTier := normalizeServiceTier(serviceTier)
|
||||
|
||||
u.mutex.Lock()
|
||||
defer u.mutex.Unlock()
|
||||
|
||||
@@ -361,7 +833,11 @@ func (u *AggregatedUsage) AddUsage(model string, inputTokens, outputTokens, cach
|
||||
|
||||
var combo *CostCombination
|
||||
for i := range u.Combinations {
|
||||
if u.Combinations[i].Model == model {
|
||||
comboServiceTier := normalizeServiceTier(u.Combinations[i].ServiceTier)
|
||||
if u.Combinations[i].ServiceTier != comboServiceTier {
|
||||
u.Combinations[i].ServiceTier = comboServiceTier
|
||||
}
|
||||
if u.Combinations[i].Model == model && comboServiceTier == normalizedServiceTier {
|
||||
combo = &u.Combinations[i]
|
||||
break
|
||||
}
|
||||
@@ -369,9 +845,10 @@ func (u *AggregatedUsage) AddUsage(model string, inputTokens, outputTokens, cach
|
||||
|
||||
if combo == nil {
|
||||
newCombo := CostCombination{
|
||||
Model: model,
|
||||
Total: UsageStats{},
|
||||
ByUser: make(map[string]UsageStats),
|
||||
Model: model,
|
||||
ServiceTier: normalizedServiceTier,
|
||||
Total: UsageStats{},
|
||||
ByUser: make(map[string]UsageStats),
|
||||
}
|
||||
u.Combinations = append(u.Combinations, newCombo)
|
||||
combo = &u.Combinations[len(u.Combinations)-1]
|
||||
|
||||
Reference in New Issue
Block a user