Fix OCM usage counting & Update pricing

2026-02-24 19:16:40 +08:00
parent d48236da94
commit 22cf675263
6 changed files with 748 additions and 156 deletions
--- a/service/ocm/service.go
+++ b/service/ocm/service.go
@@ -406,7 +406,9 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons
 	isChatCompletions := path == "/v1/chat/completions"
 	mediaType, _, err := mime.ParseMediaType(response.Header.Get("Content-Type"))
 	isStreaming := err == nil && mediaType == "text/event-stream"
-
+	if !isStreaming && !isChatCompletions && response.Header.Get("Content-Type") == "" {
+		isStreaming = true
+	}
 	if !isStreaming {
 		bodyBytes, err := io.ReadAll(response.Body)
 		if err != nil {
@@ -414,13 +416,14 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons
 			return
 		}

-		var responseModel string
+		var responseModel, serviceTier string
 		var inputTokens, outputTokens, cachedTokens int64

 		if isChatCompletions {
 			var chatCompletion openai.ChatCompletion
 			if json.Unmarshal(bodyBytes, &chatCompletion) == nil {
 				responseModel = chatCompletion.Model
+				serviceTier = string(chatCompletion.ServiceTier)
 				inputTokens = chatCompletion.Usage.PromptTokens
 				outputTokens = chatCompletion.Usage.CompletionTokens
 				cachedTokens = chatCompletion.Usage.PromptTokensDetails.CachedTokens
@@ -429,6 +432,7 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons
 			var responsesResponse responses.Response
 			if json.Unmarshal(bodyBytes, &responsesResponse) == nil {
 				responseModel = string(responsesResponse.Model)
+				serviceTier = string(responsesResponse.ServiceTier)
 				inputTokens = responsesResponse.Usage.InputTokens
 				outputTokens = responsesResponse.Usage.OutputTokens
 				cachedTokens = responsesResponse.Usage.InputTokensDetails.CachedTokens
@@ -440,7 +444,7 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons
 				responseModel = requestModel
 			}
 			if responseModel != "" {
-				s.usageTracker.AddUsage(responseModel, inputTokens, outputTokens, cachedTokens, username)
+				s.usageTracker.AddUsage(responseModel, inputTokens, outputTokens, cachedTokens, serviceTier, username)
 			}
 		}

@@ -455,7 +459,7 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons
 	}

 	var inputTokens, outputTokens, cachedTokens int64
-	var responseModel string
+	var responseModel, serviceTier string
 	buffer := make([]byte, buf.BufferSize)
 	var leftover []byte

@@ -490,6 +494,9 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons
 							if chatChunk.Model != "" {
 								responseModel = chatChunk.Model
 							}
+							if chatChunk.ServiceTier != "" {
+								serviceTier = string(chatChunk.ServiceTier)
+							}
 							if chatChunk.Usage.PromptTokens > 0 {
 								inputTokens = chatChunk.Usage.PromptTokens
 								cachedTokens = chatChunk.Usage.PromptTokensDetails.CachedTokens
@@ -506,6 +513,9 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons
 								if string(completedEvent.Response.Model) != "" {
 									responseModel = string(completedEvent.Response.Model)
 								}
+								if completedEvent.Response.ServiceTier != "" {
+									serviceTier = string(completedEvent.Response.ServiceTier)
+								}
 								if completedEvent.Response.Usage.InputTokens > 0 {
 									inputTokens = completedEvent.Response.Usage.InputTokens
 									cachedTokens = completedEvent.Response.Usage.InputTokensDetails.CachedTokens
@@ -534,7 +544,7 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons

 			if inputTokens > 0 || outputTokens > 0 {
 				if responseModel != "" {
-					s.usageTracker.AddUsage(responseModel, inputTokens, outputTokens, cachedTokens, username)
+					s.usageTracker.AddUsage(responseModel, inputTokens, outputTokens, cachedTokens, serviceTier, username)
 				}
 			}
 			return
--- a/service/ocm/service_usage.go
+++ b/service/ocm/service_usage.go
@@ -5,6 +5,7 @@ import (
 	"math"
 	"os"
 	"regexp"
+	"strings"
 	"sync"
 	"time"

@@ -42,9 +43,10 @@ func (u *UsageStats) UnmarshalJSON(data []byte) error {
 }

 type CostCombination struct {
-	Model  string                `json:"model"`
-	Total  UsageStats            `json:"total"`
-	ByUser map[string]UsageStats `json:"by_user"`
+	Model       string                `json:"model"`
+	ServiceTier string                `json:"service_tier,omitempty"`
+	Total       UsageStats            `json:"total"`
+	ByUser      map[string]UsageStats `json:"by_user"`
 }

 type AggregatedUsage struct {
@@ -68,9 +70,10 @@ type UsageStatsJSON struct {
 }

 type CostCombinationJSON struct {
-	Model  string                    `json:"model"`
-	Total  UsageStatsJSON            `json:"total"`
-	ByUser map[string]UsageStatsJSON `json:"by_user"`
+	Model       string                    `json:"model"`
+	ServiceTier string                    `json:"service_tier,omitempty"`
+	Total       UsageStatsJSON            `json:"total"`
+	ByUser      map[string]UsageStatsJSON `json:"by_user"`
 }

 type CostsSummaryJSON struct {
@@ -95,7 +98,123 @@ type modelFamily struct {
 	pricing ModelPricing
 }

+const (
+	serviceTierAuto     = "auto"
+	serviceTierDefault  = "default"
+	serviceTierFlex     = "flex"
+	serviceTierPriority = "priority"
+	serviceTierScale    = "scale"
+)
+
 var (
+	gpt52Pricing = ModelPricing{
+		InputPrice:       1.75,
+		OutputPrice:      14.0,
+		CachedInputPrice: 0.175,
+	}
+
+	gpt5Pricing = ModelPricing{
+		InputPrice:       1.25,
+		OutputPrice:      10.0,
+		CachedInputPrice: 0.125,
+	}
+
+	gpt5MiniPricing = ModelPricing{
+		InputPrice:       0.25,
+		OutputPrice:      2.0,
+		CachedInputPrice: 0.025,
+	}
+
+	gpt5NanoPricing = ModelPricing{
+		InputPrice:       0.05,
+		OutputPrice:      0.4,
+		CachedInputPrice: 0.005,
+	}
+
+	gpt52CodexPricing = ModelPricing{
+		InputPrice:       1.75,
+		OutputPrice:      14.0,
+		CachedInputPrice: 0.175,
+	}
+
+	gpt51CodexPricing = ModelPricing{
+		InputPrice:       1.25,
+		OutputPrice:      10.0,
+		CachedInputPrice: 0.125,
+	}
+
+	gpt51CodexMiniPricing = ModelPricing{
+		InputPrice:       0.25,
+		OutputPrice:      2.0,
+		CachedInputPrice: 0.025,
+	}
+
+	gpt52ProPricing = ModelPricing{
+		InputPrice:       21.0,
+		OutputPrice:      168.0,
+		CachedInputPrice: 21.0,
+	}
+
+	gpt5ProPricing = ModelPricing{
+		InputPrice:       15.0,
+		OutputPrice:      120.0,
+		CachedInputPrice: 15.0,
+	}
+
+	gpt52FlexPricing = ModelPricing{
+		InputPrice:       0.875,
+		OutputPrice:      7.0,
+		CachedInputPrice: 0.0875,
+	}
+
+	gpt5FlexPricing = ModelPricing{
+		InputPrice:       0.625,
+		OutputPrice:      5.0,
+		CachedInputPrice: 0.0625,
+	}
+
+	gpt5MiniFlexPricing = ModelPricing{
+		InputPrice:       0.125,
+		OutputPrice:      1.0,
+		CachedInputPrice: 0.0125,
+	}
+
+	gpt5NanoFlexPricing = ModelPricing{
+		InputPrice:       0.025,
+		OutputPrice:      0.2,
+		CachedInputPrice: 0.0025,
+	}
+
+	gpt52PriorityPricing = ModelPricing{
+		InputPrice:       3.5,
+		OutputPrice:      28.0,
+		CachedInputPrice: 0.35,
+	}
+
+	gpt5PriorityPricing = ModelPricing{
+		InputPrice:       2.5,
+		OutputPrice:      20.0,
+		CachedInputPrice: 0.25,
+	}
+
+	gpt5MiniPriorityPricing = ModelPricing{
+		InputPrice:       0.45,
+		OutputPrice:      3.6,
+		CachedInputPrice: 0.045,
+	}
+
+	gpt52CodexPriorityPricing = ModelPricing{
+		InputPrice:       3.5,
+		OutputPrice:      28.0,
+		CachedInputPrice: 0.35,
+	}
+
+	gpt51CodexPriorityPricing = ModelPricing{
+		InputPrice:       2.5,
+		OutputPrice:      20.0,
+		CachedInputPrice: 0.25,
+	}
+
 	gpt4oPricing = ModelPricing{
 		InputPrice:       2.5,
 		OutputPrice:      10.0,
@@ -111,7 +230,19 @@ var (
 	gpt4oAudioPricing = ModelPricing{
 		InputPrice:       2.5,
 		OutputPrice:      10.0,
-		CachedInputPrice: 1.25,
+		CachedInputPrice: 2.5,
+	}
+
+	gpt4oMiniAudioPricing = ModelPricing{
+		InputPrice:       0.15,
+		OutputPrice:      0.6,
+		CachedInputPrice: 0.15,
+	}
+
+	gptAudioMiniPricing = ModelPricing{
+		InputPrice:       0.6,
+		OutputPrice:      2.4,
+		CachedInputPrice: 0.6,
 	}

 	o1Pricing = ModelPricing{
@@ -120,6 +251,12 @@ var (
 		CachedInputPrice: 7.5,
 	}

+	o1ProPricing = ModelPricing{
+		InputPrice:       150.0,
+		OutputPrice:      600.0,
+		CachedInputPrice: 150.0,
+	}
+
 	o1MiniPricing = ModelPricing{
 		InputPrice:       1.1,
 		OutputPrice:      4.4,
@@ -135,13 +272,55 @@ var (
 	o3Pricing = ModelPricing{
 		InputPrice:       2.0,
 		OutputPrice:      8.0,
-		CachedInputPrice: 1.0,
+		CachedInputPrice: 0.5,
+	}
+
+	o3ProPricing = ModelPricing{
+		InputPrice:       20.0,
+		OutputPrice:      80.0,
+		CachedInputPrice: 20.0,
+	}
+
+	o3DeepResearchPricing = ModelPricing{
+		InputPrice:       10.0,
+		OutputPrice:      40.0,
+		CachedInputPrice: 2.5,
 	}

 	o4MiniPricing = ModelPricing{
 		InputPrice:       1.1,
 		OutputPrice:      4.4,
-		CachedInputPrice: 0.55,
+		CachedInputPrice: 0.275,
+	}
+
+	o4MiniDeepResearchPricing = ModelPricing{
+		InputPrice:       2.0,
+		OutputPrice:      8.0,
+		CachedInputPrice: 0.5,
+	}
+
+	o3FlexPricing = ModelPricing{
+		InputPrice:       1.0,
+		OutputPrice:      4.0,
+		CachedInputPrice: 0.25,
+	}
+
+	o4MiniFlexPricing = ModelPricing{
+		InputPrice:       0.55,
+		OutputPrice:      2.2,
+		CachedInputPrice: 0.138,
+	}
+
+	o3PriorityPricing = ModelPricing{
+		InputPrice:       3.5,
+		OutputPrice:      14.0,
+		CachedInputPrice: 0.875,
+	}
+
+	o4MiniPriorityPricing = ModelPricing{
+		InputPrice:       2.0,
+		OutputPrice:      8.0,
+		CachedInputPrice: 0.5,
 	}

 	gpt41Pricing = ModelPricing{
@@ -162,69 +341,358 @@ var (
 		CachedInputPrice: 0.025,
 	}

-	modelFamilies = []modelFamily{
+	gpt41PriorityPricing = ModelPricing{
+		InputPrice:       3.5,
+		OutputPrice:      14.0,
+		CachedInputPrice: 0.875,
+	}
+
+	gpt41MiniPriorityPricing = ModelPricing{
+		InputPrice:       0.7,
+		OutputPrice:      2.8,
+		CachedInputPrice: 0.175,
+	}
+
+	gpt41NanoPriorityPricing = ModelPricing{
+		InputPrice:       0.2,
+		OutputPrice:      0.8,
+		CachedInputPrice: 0.05,
+	}
+
+	gpt4oPriorityPricing = ModelPricing{
+		InputPrice:       4.25,
+		OutputPrice:      17.0,
+		CachedInputPrice: 2.125,
+	}
+
+	gpt4oMiniPriorityPricing = ModelPricing{
+		InputPrice:       0.25,
+		OutputPrice:      1.0,
+		CachedInputPrice: 0.125,
+	}
+
+	standardModelFamilies = []modelFamily{
 		{
-			pattern: regexp.MustCompile(`^gpt-4\.1-nano`),
-			pricing: gpt41NanoPricing,
+			pattern: regexp.MustCompile(`^gpt-5\.2-codex(?:$|-)`),
+			pricing: gpt52CodexPricing,
 		},
 		{
-			pattern: regexp.MustCompile(`^gpt-4\.1-mini`),
-			pricing: gpt41MiniPricing,
+			pattern: regexp.MustCompile(`^gpt-5\.1-codex-max(?:$|-)`),
+			pricing: gpt51CodexPricing,
 		},
 		{
-			pattern: regexp.MustCompile(`^gpt-4\.1`),
-			pricing: gpt41Pricing,
+			pattern: regexp.MustCompile(`^gpt-5\.1-codex-mini(?:$|-)`),
+			pricing: gpt51CodexMiniPricing,
 		},
 		{
-			pattern: regexp.MustCompile(`^o4-mini`),
+			pattern: regexp.MustCompile(`^gpt-5\.1-codex(?:$|-)`),
+			pricing: gpt51CodexPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-5-codex(?:$|-)`),
+			pricing: gpt51CodexPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-5\.2-chat-latest$`),
+			pricing: gpt52Pricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-5\.1-chat-latest$`),
+			pricing: gpt5Pricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-5-chat-latest$`),
+			pricing: gpt5Pricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-5\.2-pro(?:$|-)`),
+			pricing: gpt52ProPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-5-pro(?:$|-)`),
+			pricing: gpt5ProPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-5-mini(?:$|-)`),
+			pricing: gpt5MiniPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-5-nano(?:$|-)`),
+			pricing: gpt5NanoPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-5\.2(?:$|-)`),
+			pricing: gpt52Pricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-5\.1(?:$|-)`),
+			pricing: gpt5Pricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-5(?:$|-)`),
+			pricing: gpt5Pricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^o4-mini-deep-research(?:$|-)`),
+			pricing: o4MiniDeepResearchPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^o4-mini(?:$|-)`),
 			pricing: o4MiniPricing,
 		},
 		{
-			pattern: regexp.MustCompile(`^o3-mini`),
+			pattern: regexp.MustCompile(`^o3-pro(?:$|-)`),
+			pricing: o3ProPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^o3-deep-research(?:$|-)`),
+			pricing: o3DeepResearchPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^o3-mini(?:$|-)`),
 			pricing: o3MiniPricing,
 		},
 		{
-			pattern: regexp.MustCompile(`^o3`),
+			pattern: regexp.MustCompile(`^o3(?:$|-)`),
 			pricing: o3Pricing,
 		},
 		{
-			pattern: regexp.MustCompile(`^o1-mini`),
+			pattern: regexp.MustCompile(`^o1-pro(?:$|-)`),
+			pricing: o1ProPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^o1-mini(?:$|-)`),
 			pricing: o1MiniPricing,
 		},
 		{
-			pattern: regexp.MustCompile(`^o1`),
+			pattern: regexp.MustCompile(`^o1(?:$|-)`),
 			pricing: o1Pricing,
 		},
 		{
-			pattern: regexp.MustCompile(`^gpt-4o-audio`),
+			pattern: regexp.MustCompile(`^gpt-4o-mini-audio(?:$|-)`),
+			pricing: gpt4oMiniAudioPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-audio-mini(?:$|-)`),
+			pricing: gptAudioMiniPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^(?:gpt-4o-audio|gpt-audio)(?:$|-)`),
 			pricing: gpt4oAudioPricing,
 		},
 		{
-			pattern: regexp.MustCompile(`^gpt-4o-mini`),
+			pattern: regexp.MustCompile(`^gpt-4\.1-nano(?:$|-)`),
+			pricing: gpt41NanoPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-4\.1-mini(?:$|-)`),
+			pricing: gpt41MiniPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-4\.1(?:$|-)`),
+			pricing: gpt41Pricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-4o-mini(?:$|-)`),
 			pricing: gpt4oMiniPricing,
 		},
 		{
-			pattern: regexp.MustCompile(`^gpt-4o`),
+			pattern: regexp.MustCompile(`^gpt-4o(?:$|-)`),
 			pricing: gpt4oPricing,
 		},
 		{
-			pattern: regexp.MustCompile(`^chatgpt-4o`),
+			pattern: regexp.MustCompile(`^chatgpt-4o(?:$|-)`),
 			pricing: gpt4oPricing,
 		},
 	}
+
+	flexModelFamilies = []modelFamily{
+		{
+			pattern: regexp.MustCompile(`^gpt-5-mini(?:$|-)`),
+			pricing: gpt5MiniFlexPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-5-nano(?:$|-)`),
+			pricing: gpt5NanoFlexPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-5\.2(?:$|-)`),
+			pricing: gpt52FlexPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-5\.1(?:$|-)`),
+			pricing: gpt5FlexPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-5(?:$|-)`),
+			pricing: gpt5FlexPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^o4-mini(?:$|-)`),
+			pricing: o4MiniFlexPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^o3(?:$|-)`),
+			pricing: o3FlexPricing,
+		},
+	}
+
+	priorityModelFamilies = []modelFamily{
+		{
+			pattern: regexp.MustCompile(`^gpt-5\.2-codex(?:$|-)`),
+			pricing: gpt52CodexPriorityPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-5\.1-codex-max(?:$|-)`),
+			pricing: gpt51CodexPriorityPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-5\.1-codex(?:$|-)`),
+			pricing: gpt51CodexPriorityPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-5-codex(?:$|-)`),
+			pricing: gpt51CodexPriorityPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-5-mini(?:$|-)`),
+			pricing: gpt5MiniPriorityPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-5\.2(?:$|-)`),
+			pricing: gpt52PriorityPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-5\.1(?:$|-)`),
+			pricing: gpt5PriorityPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-5(?:$|-)`),
+			pricing: gpt5PriorityPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^o4-mini(?:$|-)`),
+			pricing: o4MiniPriorityPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^o3(?:$|-)`),
+			pricing: o3PriorityPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-4\.1-nano(?:$|-)`),
+			pricing: gpt41NanoPriorityPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-4\.1-mini(?:$|-)`),
+			pricing: gpt41MiniPriorityPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-4\.1(?:$|-)`),
+			pricing: gpt41PriorityPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-4o-mini(?:$|-)`),
+			pricing: gpt4oMiniPriorityPricing,
+		},
+		{
+			pattern: regexp.MustCompile(`^gpt-4o(?:$|-)`),
+			pricing: gpt4oPriorityPricing,
+		},
+	}
 )

-func getPricing(model string) ModelPricing {
+func modelFamiliesForTier(serviceTier string) []modelFamily {
+	switch serviceTier {
+	case serviceTierFlex:
+		return flexModelFamilies
+	case serviceTierPriority:
+		return priorityModelFamilies
+	default:
+		return standardModelFamilies
+	}
+}
+
+func findPricingInFamilies(model string, modelFamilies []modelFamily) (ModelPricing, bool) {
 	for _, family := range modelFamilies {
 		if family.pattern.MatchString(model) {
-			return family.pricing
+			return family.pricing, true
 		}
 	}
+	return ModelPricing{}, false
+}
+
+func normalizeServiceTier(serviceTier string) string {
+	switch strings.ToLower(strings.TrimSpace(serviceTier)) {
+	case "", serviceTierAuto, serviceTierDefault:
+		return serviceTierDefault
+	case serviceTierFlex:
+		return serviceTierFlex
+	case serviceTierPriority:
+		return serviceTierPriority
+	case serviceTierScale:
+		// Scale-tier requests are prepaid differently and not listed in this usage file.
+		return serviceTierDefault
+	default:
+		return serviceTierDefault
+	}
+}
+
+func getPricing(model string, serviceTier string) ModelPricing {
+	normalizedServiceTier := normalizeServiceTier(serviceTier)
+	modelFamilies := modelFamiliesForTier(normalizedServiceTier)
+
+	if pricing, found := findPricingInFamilies(model, modelFamilies); found {
+		return pricing
+	}
+
+	normalizedModel := normalizeGPT5Model(model)
+	if normalizedModel != model {
+		if pricing, found := findPricingInFamilies(normalizedModel, modelFamilies); found {
+			return pricing
+		}
+	}
+
+	if normalizedServiceTier != serviceTierDefault {
+		if pricing, found := findPricingInFamilies(model, standardModelFamilies); found {
+			return pricing
+		}
+		if normalizedModel != model {
+			if pricing, found := findPricingInFamilies(normalizedModel, standardModelFamilies); found {
+				return pricing
+			}
+		}
+	}
+
 	return gpt4oPricing
 }

-func calculateCost(stats UsageStats, model string) float64 {
-	pricing := getPricing(model)
+func normalizeGPT5Model(model string) string {
+	if !strings.HasPrefix(model, "gpt-5.") {
+		return model
+	}
+
+	switch {
+	case strings.Contains(model, "-codex-mini"):
+		return "gpt-5.1-codex-mini"
+	case strings.Contains(model, "-codex-max"):
+		return "gpt-5.1-codex-max"
+	case strings.Contains(model, "-codex"):
+		return "gpt-5.2-codex"
+	case strings.Contains(model, "-chat-latest"):
+		return "gpt-5.2-chat-latest"
+	case strings.Contains(model, "-pro"):
+		return "gpt-5.2-pro"
+	case strings.Contains(model, "-mini"):
+		return "gpt-5-mini"
+	case strings.Contains(model, "-nano"):
+		return "gpt-5-nano"
+	default:
+		return "gpt-5.2"
+	}
+}
+
+func calculateCost(stats UsageStats, model string, serviceTier string) float64 {
+	pricing := getPricing(model, serviceTier)

 	regularInputTokens := stats.InputTokens - stats.CachedTokens
 	if regularInputTokens < 0 {
@@ -252,12 +720,13 @@ func (u *AggregatedUsage) ToJSON() *AggregatedUsageJSON {
 	}

 	for i, combo := range u.Combinations {
-		totalCost := calculateCost(combo.Total, combo.Model)
+		totalCost := calculateCost(combo.Total, combo.Model, combo.ServiceTier)

 		result.Costs.TotalUSD += totalCost

 		comboJSON := CostCombinationJSON{
-			Model: combo.Model,
+			Model:       combo.Model,
+			ServiceTier: combo.ServiceTier,
 			Total: UsageStatsJSON{
 				RequestCount: combo.Total.RequestCount,
 				InputTokens:  combo.Total.InputTokens,
@@ -269,7 +738,7 @@ func (u *AggregatedUsage) ToJSON() *AggregatedUsageJSON {
 		}

 		for user, userStats := range combo.ByUser {
-			userCost := calculateCost(userStats, combo.Model)
+			userCost := calculateCost(userStats, combo.Model, combo.ServiceTier)
 			result.Costs.ByUser[user] += userCost

 			comboJSON.ByUser[user] = UsageStatsJSON{
@@ -318,6 +787,7 @@ func (u *AggregatedUsage) Load() error {
 	u.Combinations = temp.Combinations

 	for i := range u.Combinations {
+		u.Combinations[i].ServiceTier = normalizeServiceTier(u.Combinations[i].ServiceTier)
 		if u.Combinations[i].ByUser == nil {
 			u.Combinations[i].ByUser = make(map[string]UsageStats)
 		}
@@ -349,11 +819,13 @@ func (u *AggregatedUsage) Save() error {
 	return err
 }

-func (u *AggregatedUsage) AddUsage(model string, inputTokens, outputTokens, cachedTokens int64, user string) error {
+func (u *AggregatedUsage) AddUsage(model string, inputTokens, outputTokens, cachedTokens int64, serviceTier string, user string) error {
 	if model == "" {
 		return E.New("model cannot be empty")
 	}

+	normalizedServiceTier := normalizeServiceTier(serviceTier)
+
 	u.mutex.Lock()
 	defer u.mutex.Unlock()

@@ -361,7 +833,11 @@ func (u *AggregatedUsage) AddUsage(model string, inputTokens, outputTokens, cach

 	var combo *CostCombination
 	for i := range u.Combinations {
-		if u.Combinations[i].Model == model {
+		comboServiceTier := normalizeServiceTier(u.Combinations[i].ServiceTier)
+		if u.Combinations[i].ServiceTier != comboServiceTier {
+			u.Combinations[i].ServiceTier = comboServiceTier
+		}
+		if u.Combinations[i].Model == model && comboServiceTier == normalizedServiceTier {
 			combo = &u.Combinations[i]
 			break
 		}
@@ -369,9 +845,10 @@ func (u *AggregatedUsage) AddUsage(model string, inputTokens, outputTokens, cach

 	if combo == nil {
 		newCombo := CostCombination{
-			Model:  model,
-			Total:  UsageStats{},
-			ByUser: make(map[string]UsageStats),
+			Model:       model,
+			ServiceTier: normalizedServiceTier,
+			Total:       UsageStats{},
+			ByUser:      make(map[string]UsageStats),
 		}
 		u.Combinations = append(u.Combinations, newCombo)
 		combo = &u.Combinations[len(u.Combinations)-1]