Merge branch 'main' into litellm_fixes_bedrock_nova_transform

2025-04-24 18:24:20 +00:00 · 2025-04-16 19:00:01 -07:00 · 2025-04-16 19:00:01 -07:00 · e418076b35
commit e418076b35
parent ae1e79b338 7ca553b235
33 changed files with 2248 additions and 765 deletions
--- a/docs/my-website/package-lock.json
+++ b/docs/my-website/package-lock.json
@ -12455,9 +12455,10 @@
      }
    },
    "node_modules/http-proxy-middleware": {
-      "version": "2.0.7",
-      "resolved": "https://registry.npmjs.org/http-proxy-middleware/-/http-proxy-middleware-2.0.7.tgz",
-      "integrity": "sha512-fgVY8AV7qU7z/MmXJ/rxwbrtQH4jBQ9m7kp3llF0liB7glmFeVZFBepQb32T3y8n8k2+AEYuMPCpinYW+/CuRA==",
+      "version": "2.0.9",
+      "resolved": "https://registry.npmjs.org/http-proxy-middleware/-/http-proxy-middleware-2.0.9.tgz",
+      "integrity": "sha512-c1IyJYLYppU574+YI7R4QyX2ystMtVXZwIdzazUIPIJsHuWNd+mho2j+bKoHftndicGj9yh+xjd+l0yj7VeT1Q==",
+      "license": "MIT",
      "dependencies": {
        "@types/http-proxy": "^1.17.8",
        "http-proxy": "^1.18.1",
--- a/litellm-proxy-extras/litellm_proxy_extras/migrations/20250416115320_add_tag_table_to_db/migration.sql
+++ b/litellm-proxy-extras/litellm_proxy_extras/migrations/20250416115320_add_tag_table_to_db/migration.sql
@ -0,0 +1,45 @@
+-- AlterTable
+ALTER TABLE "LiteLLM_DailyTeamSpend" ADD COLUMN     "cache_creation_input_tokens" INTEGER NOT NULL DEFAULT 0,
+ADD COLUMN     "cache_read_input_tokens" INTEGER NOT NULL DEFAULT 0;
+
+-- CreateTable
+CREATE TABLE "LiteLLM_DailyTagSpend" (
+    "id" TEXT NOT NULL,
+    "tag" TEXT NOT NULL,
+    "date" TEXT NOT NULL,
+    "api_key" TEXT NOT NULL,
+    "model" TEXT NOT NULL,
+    "model_group" TEXT,
+    "custom_llm_provider" TEXT,
+    "prompt_tokens" INTEGER NOT NULL DEFAULT 0,
+    "completion_tokens" INTEGER NOT NULL DEFAULT 0,
+    "cache_read_input_tokens" INTEGER NOT NULL DEFAULT 0,
+    "cache_creation_input_tokens" INTEGER NOT NULL DEFAULT 0,
+    "spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
+    "api_requests" INTEGER NOT NULL DEFAULT 0,
+    "successful_requests" INTEGER NOT NULL DEFAULT 0,
+    "failed_requests" INTEGER NOT NULL DEFAULT 0,
+    "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    "updated_at" TIMESTAMP(3) NOT NULL,
+
+    CONSTRAINT "LiteLLM_DailyTagSpend_pkey" PRIMARY KEY ("id")
+);
+
+-- CreateIndex
+CREATE UNIQUE INDEX "LiteLLM_DailyTagSpend_tag_key" ON "LiteLLM_DailyTagSpend"("tag");
+
+-- CreateIndex
+CREATE INDEX "LiteLLM_DailyTagSpend_date_idx" ON "LiteLLM_DailyTagSpend"("date");
+
+-- CreateIndex
+CREATE INDEX "LiteLLM_DailyTagSpend_tag_idx" ON "LiteLLM_DailyTagSpend"("tag");
+
+-- CreateIndex
+CREATE INDEX "LiteLLM_DailyTagSpend_api_key_idx" ON "LiteLLM_DailyTagSpend"("api_key");
+
+-- CreateIndex
+CREATE INDEX "LiteLLM_DailyTagSpend_model_idx" ON "LiteLLM_DailyTagSpend"("model");
+
+-- CreateIndex
+CREATE UNIQUE INDEX "LiteLLM_DailyTagSpend_tag_date_api_key_model_custom_llm_pro_key" ON "LiteLLM_DailyTagSpend"("tag", "date", "api_key", "model", "custom_llm_provider");
+
--- a/litellm-proxy-extras/litellm_proxy_extras/migrations/20250416151339_drop_tag_uniqueness_requirement/migration.sql
+++ b/litellm-proxy-extras/litellm_proxy_extras/migrations/20250416151339_drop_tag_uniqueness_requirement/migration.sql
@ -0,0 +1,3 @@
+-- DropIndex
+DROP INDEX "LiteLLM_DailyTagSpend_tag_key";
+
--- a/litellm-proxy-extras/litellm_proxy_extras/schema.prisma
+++ b/litellm-proxy-extras/litellm_proxy_extras/schema.prisma
@ -342,6 +342,60 @@ model LiteLLM_DailyUserSpend {
  @@index([model])
 }

+// Track daily team spend metrics per model and key
+model LiteLLM_DailyTeamSpend {
+  id                  String   @id @default(uuid())
+  team_id             String
+  date                String
+  api_key             String   
+  model               String   
+  model_group         String?  
+  custom_llm_provider String?  
+  prompt_tokens       Int      @default(0)
+  completion_tokens   Int      @default(0)
+  cache_read_input_tokens     Int      @default(0)
+  cache_creation_input_tokens Int      @default(0)
+  spend               Float    @default(0.0)
+  api_requests        Int      @default(0)
+  successful_requests Int      @default(0)
+  failed_requests     Int      @default(0)
+  created_at          DateTime @default(now())
+  updated_at          DateTime @updatedAt
+
+  @@unique([team_id, date, api_key, model, custom_llm_provider])
+  @@index([date])
+  @@index([team_id])
+  @@index([api_key])
+  @@index([model])
+}
+
+// Track daily team spend metrics per model and key
+model LiteLLM_DailyTagSpend {
+  id                  String   @id @default(uuid())
+  tag                 String   
+  date                String
+  api_key             String   
+  model               String   
+  model_group         String?  
+  custom_llm_provider String?  
+  prompt_tokens       Int      @default(0)
+  completion_tokens   Int      @default(0)
+  cache_read_input_tokens     Int      @default(0)
+  cache_creation_input_tokens Int      @default(0)
+  spend               Float    @default(0.0)
+  api_requests        Int      @default(0)
+  successful_requests Int      @default(0)
+  failed_requests     Int      @default(0)
+  created_at          DateTime @default(now())
+  updated_at          DateTime @updatedAt
+
+  @@unique([tag, date, api_key, model, custom_llm_provider])
+  @@index([date])
+  @@index([tag])
+  @@index([api_key])
+  @@index([model])
+}
+

 // Track the status of cron jobs running. Only allow one pod to run the job at a time
 model LiteLLM_CronJob {
--- a/litellm/constants.py
+++ b/litellm/constants.py
@ -21,10 +21,14 @@ DEFAULT_MAX_TOKENS = 256  # used when providers need a default
 MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB = 1024  # 1MB = 1024KB
 SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000  # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic.

+########## Networking constants ##############################################################
+_DEFAULT_TTL_FOR_HTTPX_CLIENTS = 3600  # 1 hour, re-use the same httpx client for 1 hour
+
 ########### v2 Architecture constants for managing writing updates to the database ###########
 REDIS_UPDATE_BUFFER_KEY = "litellm_spend_update_buffer"
 REDIS_DAILY_SPEND_UPDATE_BUFFER_KEY = "litellm_daily_spend_update_buffer"
 REDIS_DAILY_TEAM_SPEND_UPDATE_BUFFER_KEY = "litellm_daily_team_spend_update_buffer"
+REDIS_DAILY_TAG_SPEND_UPDATE_BUFFER_KEY = "litellm_daily_tag_spend_update_buffer"
 MAX_REDIS_BUFFER_DEQUEUE_COUNT = 100
 MAX_SIZE_IN_MEMORY_QUEUE = 10000
 MAX_IN_MEMORY_QUEUE_FLUSH_COUNT = 1000
--- a/litellm/integrations/azure_storage/azure_storage.py
+++ b/litellm/integrations/azure_storage/azure_storage.py
@ -1,12 +1,13 @@
 import asyncio
 import json
 import os
+import time
 import uuid
 from datetime import datetime, timedelta
 from typing import List, Optional

 from litellm._logging import verbose_logger
-from litellm.constants import AZURE_STORAGE_MSFT_VERSION
+from litellm.constants import _DEFAULT_TTL_FOR_HTTPX_CLIENTS, AZURE_STORAGE_MSFT_VERSION
 from litellm.integrations.custom_batch_logger import CustomBatchLogger
 from litellm.llms.azure.common_utils import get_azure_ad_token_from_entra_id
 from litellm.llms.custom_httpx.http_handler import (
@ -48,14 +49,17 @@ class AzureBlobStorageLogger(CustomBatchLogger):
                    "Missing required environment variable: AZURE_STORAGE_FILE_SYSTEM"
                )
            self.azure_storage_file_system: str = _azure_storage_file_system
+            self._service_client = None
+            # Time that the azure service client expires, in order to reset the connection pool and keep it fresh
+            self._service_client_timeout: Optional[float] = None

            # Internal variables used for Token based authentication
-            self.azure_auth_token: Optional[
-                str
-            ] = None  # the Azure AD token to use for Azure Storage API requests
-            self.token_expiry: Optional[
-                datetime
-            ] = None  # the expiry time of the currentAzure AD token
+            self.azure_auth_token: Optional[str] = (
+                None  # the Azure AD token to use for Azure Storage API requests
+            )
+            self.token_expiry: Optional[datetime] = (
+                None  # the expiry time of the currentAzure AD token
+            )

            asyncio.create_task(self.periodic_flush())
            self.flush_lock = asyncio.Lock()
@ -324,6 +328,25 @@ class AzureBlobStorageLogger(CustomBatchLogger):
                f"AzureBlobStorageLogger is only available for premium users. {CommonProxyErrors.not_premium_user}"
            )

+    async def get_service_client(self):
+        from azure.storage.filedatalake.aio import DataLakeServiceClient
+
+        # expire old clients to recover from connection issues
+        if (
+            self._service_client_timeout
+            and self._service_client
+            and self._service_client_timeout > time.time()
+        ):
+            await self._service_client.close()
+            self._service_client = None
+        if not self._service_client:
+            self._service_client = DataLakeServiceClient(
+                account_url=f"https://{self.azure_storage_account_name}.dfs.core.windows.net",
+                credential=self.azure_storage_account_key,
+            )
+            self._service_client_timeout = time.time() + _DEFAULT_TTL_FOR_HTTPX_CLIENTS
+        return self._service_client
+
    async def upload_to_azure_data_lake_with_azure_account_key(
        self, payload: StandardLoggingPayload
    ):
@ -332,13 +355,10 @@ class AzureBlobStorageLogger(CustomBatchLogger):

        This is used when Azure Storage Account Key is set - Azure Storage Account Key does not work directly with Azure Rest API
        """
-        from azure.storage.filedatalake.aio import DataLakeServiceClient

        # Create an async service client
-        service_client = DataLakeServiceClient(
-            account_url=f"https://{self.azure_storage_account_name}.dfs.core.windows.net",
-            credential=self.azure_storage_account_key,
-        )
+
+        service_client = await self.get_service_client()
        # Get file system client
        file_system_client = service_client.get_file_system_client(
            file_system=self.azure_storage_file_system
--- a/litellm/litellm_core_utils/streaming_chunk_builder_utils.py
+++ b/litellm/litellm_core_utils/streaming_chunk_builder_utils.py
@ -161,7 +161,6 @@ class ChunkProcessor:
                            name=tool_call_data["name"],
                        ),
                        type=tool_call_data["type"] or "function",
-                        index=index,
                    )
                )

--- a/litellm/llms/base_llm/base_model_iterator.py
+++ b/litellm/llms/base_llm/base_model_iterator.py
@ -1,9 +1,16 @@
 import json
 from abc import abstractmethod
-from typing import Optional, Union
+from typing import List, Optional, Union, cast

 import litellm
-from litellm.types.utils import GenericStreamingChunk, ModelResponseStream
+from litellm.types.utils import (
+    Choices,
+    Delta,
+    GenericStreamingChunk,
+    ModelResponse,
+    ModelResponseStream,
+    StreamingChoices,
+)


 class BaseModelResponseIterator:
@ -121,6 +128,59 @@ class BaseModelResponseIterator:
            raise RuntimeError(f"Error parsing chunk: {e},\nReceived chunk: {chunk}")


+class MockResponseIterator:  # for returning ai21 streaming responses
+    def __init__(
+        self, model_response: ModelResponse, json_mode: Optional[bool] = False
+    ):
+        self.model_response = model_response
+        self.json_mode = json_mode
+        self.is_done = False
+
+    # Sync iterator
+    def __iter__(self):
+        return self
+
+    def _chunk_parser(self, chunk_data: ModelResponse) -> ModelResponseStream:
+        try:
+            streaming_choices: List[StreamingChoices] = []
+            for choice in chunk_data.choices:
+                streaming_choices.append(
+                    StreamingChoices(
+                        index=choice.index,
+                        delta=Delta(
+                            **cast(Choices, choice).message.model_dump(),
+                        ),
+                        finish_reason=choice.finish_reason,
+                    )
+                )
+            processed_chunk = ModelResponseStream(
+                id=chunk_data.id,
+                object="chat.completion",
+                created=chunk_data.created,
+                model=chunk_data.model,
+                choices=streaming_choices,
+            )
+            return processed_chunk
+        except Exception as e:
+            raise ValueError(f"Failed to decode chunk: {chunk_data}. Error: {e}")
+
+    def __next__(self):
+        if self.is_done:
+            raise StopIteration
+        self.is_done = True
+        return self._chunk_parser(self.model_response)
+
+    # Async iterator
+    def __aiter__(self):
+        return self
+
+    async def __anext__(self):
+        if self.is_done:
+            raise StopAsyncIteration
+        self.is_done = True
+        return self._chunk_parser(self.model_response)
+
+
 class FakeStreamResponseIterator:
    def __init__(self, model_response, json_mode: Optional[bool] = False):
        self.model_response = model_response
--- a/litellm/llms/custom_httpx/http_handler.py
+++ b/litellm/llms/custom_httpx/http_handler.py
@ -8,6 +8,7 @@ import httpx
 from httpx import USE_CLIENT_DEFAULT, AsyncHTTPTransport, HTTPTransport

 import litellm
+from litellm.constants import _DEFAULT_TTL_FOR_HTTPX_CLIENTS
 from litellm.litellm_core_utils.logging_utils import track_llm_api_timing
 from litellm.types.llms.custom_http import *

@ -31,7 +32,6 @@ headers = {

 # https://www.python-httpx.org/advanced/timeouts
 _DEFAULT_TIMEOUT = httpx.Timeout(timeout=5.0, connect=5.0)
-_DEFAULT_TTL_FOR_HTTPX_CLIENTS = 3600  # 1 hour, re-use the same httpx client for 1 hour


 def mask_sensitive_info(error_message):
--- a/litellm/llms/custom_httpx/llm_http_handler.py
+++ b/litellm/llms/custom_httpx/llm_http_handler.py
@ -11,6 +11,7 @@ from litellm._logging import verbose_logger
 from litellm.llms.base_llm.audio_transcription.transformation import (
    BaseAudioTranscriptionConfig,
 )
+from litellm.llms.base_llm.base_model_iterator import MockResponseIterator
 from litellm.llms.base_llm.chat.transformation import BaseConfig
 from litellm.llms.base_llm.embedding.transformation import BaseEmbeddingConfig
 from litellm.llms.base_llm.files.transformation import BaseFilesConfig
@ -231,6 +232,7 @@ class BaseLLMHTTPHandler:
    ):
        json_mode: bool = optional_params.pop("json_mode", False)
        extra_body: Optional[dict] = optional_params.pop("extra_body", None)
+        fake_stream = fake_stream or optional_params.pop("fake_stream", False)

        provider_config = ProviderConfigManager.get_provider_chat_config(
            model=model, provider=litellm.LlmProviders(custom_llm_provider)
@ -317,6 +319,7 @@ class BaseLLMHTTPHandler:
                    ),
                    litellm_params=litellm_params,
                    json_mode=json_mode,
+                    optional_params=optional_params,
                )

            else:
@ -378,6 +381,7 @@ class BaseLLMHTTPHandler:
                ),
                litellm_params=litellm_params,
                json_mode=json_mode,
+                optional_params=optional_params,
            )
            return CustomStreamWrapper(
                completion_stream=completion_stream,
@ -426,6 +430,7 @@ class BaseLLMHTTPHandler:
        model: str,
        messages: list,
        logging_obj,
+        optional_params: dict,
        litellm_params: dict,
        timeout: Union[float, httpx.Timeout],
        fake_stream: bool = False,
@ -457,11 +462,22 @@ class BaseLLMHTTPHandler:
        )

        if fake_stream is True:
-            completion_stream = provider_config.get_model_response_iterator(
-                streaming_response=response.json(),
-                sync_stream=True,
+            model_response: (ModelResponse) = provider_config.transform_response(
+                model=model,
+                raw_response=response,
+                model_response=litellm.ModelResponse(),
+                logging_obj=logging_obj,
+                request_data=data,
+                messages=messages,
+                optional_params=optional_params,
+                litellm_params=litellm_params,
+                encoding=None,
                json_mode=json_mode,
            )
+
+            completion_stream: Any = MockResponseIterator(
+                model_response=model_response, json_mode=json_mode
+            )
        else:
            completion_stream = provider_config.get_model_response_iterator(
                streaming_response=response.iter_lines(),
@ -491,6 +507,7 @@ class BaseLLMHTTPHandler:
        logging_obj: LiteLLMLoggingObj,
        data: dict,
        litellm_params: dict,
+        optional_params: dict,
        fake_stream: bool = False,
        client: Optional[AsyncHTTPHandler] = None,
        json_mode: Optional[bool] = None,
@ -509,6 +526,7 @@ class BaseLLMHTTPHandler:
            )

        completion_stream, _response_headers = await self.make_async_call_stream_helper(
+            model=model,
            custom_llm_provider=custom_llm_provider,
            provider_config=provider_config,
            api_base=api_base,
@ -520,6 +538,8 @@ class BaseLLMHTTPHandler:
            fake_stream=fake_stream,
            client=client,
            litellm_params=litellm_params,
+            optional_params=optional_params,
+            json_mode=json_mode,
        )
        streamwrapper = CustomStreamWrapper(
            completion_stream=completion_stream,
@ -531,6 +551,7 @@ class BaseLLMHTTPHandler:

    async def make_async_call_stream_helper(
        self,
+        model: str,
        custom_llm_provider: str,
        provider_config: BaseConfig,
        api_base: str,
@ -540,8 +561,10 @@ class BaseLLMHTTPHandler:
        logging_obj: LiteLLMLoggingObj,
        timeout: Union[float, httpx.Timeout],
        litellm_params: dict,
+        optional_params: dict,
        fake_stream: bool = False,
        client: Optional[AsyncHTTPHandler] = None,
+        json_mode: Optional[bool] = None,
    ) -> Tuple[Any, httpx.Headers]:
        """
        Helper function for making an async call with stream.
@ -572,8 +595,21 @@ class BaseLLMHTTPHandler:
        )

        if fake_stream is True:
-            completion_stream = provider_config.get_model_response_iterator(
-                streaming_response=response.json(), sync_stream=False
+            model_response: (ModelResponse) = provider_config.transform_response(
+                model=model,
+                raw_response=response,
+                model_response=litellm.ModelResponse(),
+                logging_obj=logging_obj,
+                request_data=data,
+                messages=messages,
+                optional_params=optional_params,
+                litellm_params=litellm_params,
+                encoding=None,
+                json_mode=json_mode,
+            )
+
+            completion_stream: Any = MockResponseIterator(
+                model_response=model_response, json_mode=json_mode
            )
        else:
            completion_stream = provider_config.get_model_response_iterator(
@ -598,8 +634,12 @@ class BaseLLMHTTPHandler:
        """
        Some providers like Bedrock invoke do not support the stream parameter in the request body, we only pass `stream` in the request body the provider supports it.
        """
+
        if fake_stream is True:
-            return data
+            # remove 'stream' from data
+            new_data = data.copy()
+            new_data.pop("stream", None)
+            return new_data
        if provider_config.supports_stream_param_in_request_body is True:
            data["stream"] = True
        return data
--- a/litellm/llms/groq/chat/transformation.py
+++ b/litellm/llms/groq/chat/transformation.py
@ -14,10 +14,10 @@ from litellm.types.llms.openai import (
    ChatCompletionToolParamFunctionChunk,
 )

-from ...openai.chat.gpt_transformation import OpenAIGPTConfig
+from ...openai_like.chat.transformation import OpenAILikeChatConfig


-class GroqChatConfig(OpenAIGPTConfig):
+class GroqChatConfig(OpenAILikeChatConfig):
    frequency_penalty: Optional[int] = None
    function_call: Optional[Union[str, dict]] = None
    functions: Optional[list] = None
@ -132,8 +132,11 @@ class GroqChatConfig(OpenAIGPTConfig):
        optional_params: dict,
        model: str,
        drop_params: bool = False,
+        replace_max_completion_tokens_with_max_tokens: bool = False,  # groq supports max_completion_tokens
    ) -> dict:
        _response_format = non_default_params.get("response_format")
+        if self._should_fake_stream(non_default_params):
+            optional_params["fake_stream"] = True
        if _response_format is not None and isinstance(_response_format, dict):
            json_schema: Optional[dict] = None
            if "response_schema" in _response_format:
@ -160,6 +163,8 @@ class GroqChatConfig(OpenAIGPTConfig):
                non_default_params.pop(
                    "response_format", None
                )  # only remove if it's a json_schema - handled via using groq's tool calling params.
-        return super().map_openai_params(
+        optional_params = super().map_openai_params(
            non_default_params, optional_params, model, drop_params
        )
+
+        return optional_params
--- a/litellm/llms/openai_like/chat/transformation.py
+++ b/litellm/llms/openai_like/chat/transformation.py
@ -7,7 +7,7 @@ from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Union
 import httpx

 from litellm.secret_managers.main import get_secret_str
-from litellm.types.llms.openai import ChatCompletionAssistantMessage
+from litellm.types.llms.openai import AllMessageValues, ChatCompletionAssistantMessage
 from litellm.types.utils import ModelResponse

 from ...openai.chat.gpt_transformation import OpenAIGPTConfig
@ -25,7 +25,6 @@ class OpenAILikeChatConfig(OpenAIGPTConfig):
        self,
        api_base: Optional[str],
        api_key: Optional[str],
-        model: Optional[str] = None,
    ) -> Tuple[Optional[str], Optional[str]]:
        api_base = api_base or get_secret_str("OPENAI_LIKE_API_BASE")  # type: ignore
        dynamic_api_key = (
@ -74,8 +73,8 @@ class OpenAILikeChatConfig(OpenAIGPTConfig):
        messages: List,
        print_verbose,
        encoding,
-        json_mode: bool,
-        custom_llm_provider: str,
+        json_mode: Optional[bool],
+        custom_llm_provider: Optional[str],
        base_model: Optional[str],
    ) -> ModelResponse:
        response_json = response.json()
@ -97,14 +96,46 @@ class OpenAILikeChatConfig(OpenAIGPTConfig):

        returned_response = ModelResponse(**response_json)

-        returned_response.model = (
-            custom_llm_provider + "/" + (returned_response.model or "")
-        )
+        if custom_llm_provider is not None:
+            returned_response.model = (
+                custom_llm_provider + "/" + (returned_response.model or "")
+            )

        if base_model is not None:
            returned_response._hidden_params["model"] = base_model
        return returned_response

+    def transform_response(
+        self,
+        model: str,
+        raw_response: httpx.Response,
+        model_response: ModelResponse,
+        logging_obj: LiteLLMLoggingObj,
+        request_data: dict,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        litellm_params: dict,
+        encoding: Any,
+        api_key: Optional[str] = None,
+        json_mode: Optional[bool] = None,
+    ) -> ModelResponse:
+        return OpenAILikeChatConfig._transform_response(
+            model=model,
+            response=raw_response,
+            model_response=model_response,
+            stream=optional_params.get("stream", False),
+            logging_obj=logging_obj,
+            optional_params=optional_params,
+            api_key=api_key,
+            data=request_data,
+            messages=messages,
+            print_verbose=None,
+            encoding=None,
+            json_mode=json_mode,
+            custom_llm_provider=None,
+            base_model=None,
+        )
+
    def map_openai_params(
        self,
        non_default_params: dict,
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -600,6 +600,40 @@
        "supports_vision": true,
        "supports_prompt_caching": true
    },
+    "o3": {
+        "max_tokens": 100000,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 100000,
+        "input_cost_per_token": 1e-5,
+        "output_cost_per_token": 4e-5,
+        "cache_read_input_token_cost": 2.5e-6,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": false,
+        "supports_vision": true,
+        "supports_prompt_caching": true,
+        "supports_response_schema": true,
+        "supports_reasoning": true,
+        "supports_tool_choice": true
+    },
+    "o3-2025-04-16": {
+        "max_tokens": 100000,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 100000,
+        "input_cost_per_token": 1e-5,
+        "output_cost_per_token": 4e-5,
+        "cache_read_input_token_cost": 2.5e-6,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": false,
+        "supports_vision": true,
+        "supports_prompt_caching": true,
+        "supports_response_schema": true,
+        "supports_reasoning": true,
+        "supports_tool_choice": true
+    },
    "o3-mini": {
        "max_tokens": 100000,
        "max_input_tokens": 200000,
@ -634,6 +668,40 @@
        "supports_reasoning": true,
        "supports_tool_choice": true
    },
+    "o4-mini": {
+        "max_tokens": 100000,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 100000,
+        "input_cost_per_token": 1.1e-6,
+        "output_cost_per_token": 4.4e-6,
+        "cache_read_input_token_cost": 2.75e-7,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": false,
+        "supports_vision": true,
+        "supports_prompt_caching": true,
+        "supports_response_schema": true,
+        "supports_reasoning": true,
+        "supports_tool_choice": true
+    },
+    "o4-mini-2025-04-16": {
+        "max_tokens": 100000,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 100000,
+        "input_cost_per_token": 1.1e-6,
+        "output_cost_per_token": 4.4e-6,
+        "cache_read_input_token_cost": 2.75e-7,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": false,
+        "supports_vision": true,
+        "supports_prompt_caching": true,
+        "supports_response_schema": true,
+        "supports_reasoning": true,
+        "supports_tool_choice": true
+    },
    "o1-mini-2024-09-12": {
        "max_tokens": 65536,
        "max_input_tokens": 128000,
--- a/litellm/proxy/_experimental/out/onboarding.html
+++ b/litellm/proxy/_experimental/out/onboarding.html
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -650,9 +650,9 @@ class GenerateRequestBase(LiteLLMPydanticObjectBase):
    allowed_cache_controls: Optional[list] = []
    config: Optional[dict] = {}
    permissions: Optional[dict] = {}
-    model_max_budget: Optional[dict] = (
-        {}
-    )  # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {}
+    model_max_budget: Optional[
+        dict
+    ] = {}  # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {}

    model_config = ConfigDict(protected_namespaces=())
    model_rpm_limit: Optional[dict] = None
@ -908,12 +908,12 @@ class NewCustomerRequest(BudgetNewRequest):
    alias: Optional[str] = None  # human-friendly alias
    blocked: bool = False  # allow/disallow requests for this end-user
    budget_id: Optional[str] = None  # give either a budget_id or max_budget
-    allowed_model_region: Optional[AllowedModelRegion] = (
-        None  # require all user requests to use models in this specific region
-    )
-    default_model: Optional[str] = (
-        None  # if no equivalent model in allowed region - default all requests to this model
-    )
+    allowed_model_region: Optional[
+        AllowedModelRegion
+    ] = None  # require all user requests to use models in this specific region
+    default_model: Optional[
+        str
+    ] = None  # if no equivalent model in allowed region - default all requests to this model

    @model_validator(mode="before")
    @classmethod
@ -935,12 +935,12 @@ class UpdateCustomerRequest(LiteLLMPydanticObjectBase):
    blocked: bool = False  # allow/disallow requests for this end-user
    max_budget: Optional[float] = None
    budget_id: Optional[str] = None  # give either a budget_id or max_budget
-    allowed_model_region: Optional[AllowedModelRegion] = (
-        None  # require all user requests to use models in this specific region
-    )
-    default_model: Optional[str] = (
-        None  # if no equivalent model in allowed region - default all requests to this model
-    )
+    allowed_model_region: Optional[
+        AllowedModelRegion
+    ] = None  # require all user requests to use models in this specific region
+    default_model: Optional[
+        str
+    ] = None  # if no equivalent model in allowed region - default all requests to this model


 class DeleteCustomerRequest(LiteLLMPydanticObjectBase):
@ -1076,9 +1076,9 @@ class BlockKeyRequest(LiteLLMPydanticObjectBase):

 class AddTeamCallback(LiteLLMPydanticObjectBase):
    callback_name: str
-    callback_type: Optional[Literal["success", "failure", "success_and_failure"]] = (
-        "success_and_failure"
-    )
+    callback_type: Optional[
+        Literal["success", "failure", "success_and_failure"]
+    ] = "success_and_failure"
    callback_vars: Dict[str, str]

    @model_validator(mode="before")
@ -1335,9 +1335,9 @@ class ConfigList(LiteLLMPydanticObjectBase):
    stored_in_db: Optional[bool]
    field_default_value: Any
    premium_field: bool = False
-    nested_fields: Optional[List[FieldDetail]] = (
-        None  # For nested dictionary or Pydantic fields
-    )
+    nested_fields: Optional[
+        List[FieldDetail]
+    ] = None  # For nested dictionary or Pydantic fields


 class ConfigGeneralSettings(LiteLLMPydanticObjectBase):
@ -1604,9 +1604,9 @@ class LiteLLM_OrganizationMembershipTable(LiteLLMPydanticObjectBase):
    budget_id: Optional[str] = None
    created_at: datetime
    updated_at: datetime
-    user: Optional[Any] = (
-        None  # You might want to replace 'Any' with a more specific type if available
-    )
+    user: Optional[
+        Any
+    ] = None  # You might want to replace 'Any' with a more specific type if available
    litellm_budget_table: Optional[LiteLLM_BudgetTable] = None

    model_config = ConfigDict(protected_namespaces=())
@ -2354,9 +2354,9 @@ class TeamModelDeleteRequest(BaseModel):
 # Organization Member Requests
 class OrganizationMemberAddRequest(OrgMemberAddRequest):
    organization_id: str
-    max_budget_in_organization: Optional[float] = (
-        None  # Users max budget within the organization
-    )
+    max_budget_in_organization: Optional[
+        float
+    ] = None  # Users max budget within the organization


 class OrganizationMemberDeleteRequest(MemberDeleteRequest):
@ -2545,9 +2545,9 @@ class ProviderBudgetResponse(LiteLLMPydanticObjectBase):
    Maps provider names to their budget configs.
    """

-    providers: Dict[str, ProviderBudgetResponseObject] = (
-        {}
-    )  # Dictionary mapping provider names to their budget configurations
+    providers: Dict[
+        str, ProviderBudgetResponseObject
+    ] = {}  # Dictionary mapping provider names to their budget configurations


 class ProxyStateVariables(TypedDict):
@ -2675,9 +2675,9 @@ class LiteLLM_JWTAuth(LiteLLMPydanticObjectBase):
    enforce_rbac: bool = False
    roles_jwt_field: Optional[str] = None  # v2 on role mappings
    role_mappings: Optional[List[RoleMapping]] = None
-    object_id_jwt_field: Optional[str] = (
-        None  # can be either user / team, inferred from the role mapping
-    )
+    object_id_jwt_field: Optional[
+        str
+    ] = None  # can be either user / team, inferred from the role mapping
    scope_mappings: Optional[List[ScopeMapping]] = None
    enforce_scope_based_access: bool = False
    enforce_team_based_model_access: bool = False
@ -2799,6 +2799,10 @@ class DailyUserSpendTransaction(BaseDailySpendTransaction):
    user_id: str


+class DailyTagSpendTransaction(BaseDailySpendTransaction):
+    tag: str
+
+
 class DBSpendUpdateTransactions(TypedDict):
    """
    Internal Data Structure for buffering spend updates in Redis or in memory before committing them to the database
--- a/litellm/proxy/db/db_spend_update_writer.py
+++ b/litellm/proxy/db/db_spend_update_writer.py
@ -11,7 +11,7 @@ import os
 import time
 import traceback
 from datetime import datetime, timedelta
-from typing import TYPE_CHECKING, Any, Dict, Literal, Optional, Union, cast
+from typing import TYPE_CHECKING, Any, Dict, Literal, Optional, Union, cast, overload

 import litellm
 from litellm._logging import verbose_proxy_logger
@ -20,6 +20,7 @@ from litellm.constants import DB_SPEND_UPDATE_JOB_NAME
 from litellm.proxy._types import (
    DB_CONNECTION_ERROR_TYPES,
    BaseDailySpendTransaction,
+    DailyTagSpendTransaction,
    DailyTeamSpendTransaction,
    DailyUserSpendTransaction,
    DBSpendUpdateTransactions,
@ -61,6 +62,7 @@ class DBSpendUpdateWriter:
        self.spend_update_queue = SpendUpdateQueue()
        self.daily_spend_update_queue = DailySpendUpdateQueue()
        self.daily_team_spend_update_queue = DailySpendUpdateQueue()
+        self.daily_tag_spend_update_queue = DailySpendUpdateQueue()

    async def update_database(
        # LiteLLM management object fields
@ -170,6 +172,13 @@ class DBSpendUpdateWriter:
                )
            )

+            asyncio.create_task(
+                self.add_spend_log_transaction_to_daily_tag_transaction(
+                    payload=payload,
+                    prisma_client=prisma_client,
+                )
+            )
+
            verbose_proxy_logger.debug("Runs spend update on all tables")
        except Exception:
            verbose_proxy_logger.debug(
@ -394,6 +403,7 @@ class DBSpendUpdateWriter:
            spend_update_queue=self.spend_update_queue,
            daily_spend_update_queue=self.daily_spend_update_queue,
            daily_team_spend_update_queue=self.daily_team_spend_update_queue,
+            daily_tag_spend_update_queue=self.daily_tag_spend_update_queue,
        )

        # Only commit from redis to db if this pod is the leader
@ -495,6 +505,20 @@ class DBSpendUpdateWriter:
            daily_spend_transactions=daily_team_spend_update_transactions,
        )

+        ################## Daily Tag Spend Update Transactions ##################
+        # Aggregate all in memory daily tag spend transactions and commit to db
+        daily_tag_spend_update_transactions = cast(
+            Dict[str, DailyTagSpendTransaction],
+            await self.daily_tag_spend_update_queue.flush_and_get_aggregated_daily_spend_update_transactions(),
+        )
+
+        await DBSpendUpdateWriter.update_daily_tag_spend(
+            n_retry_times=n_retry_times,
+            prisma_client=prisma_client,
+            proxy_logging_obj=proxy_logging_obj,
+            daily_spend_transactions=daily_tag_spend_update_transactions,
+        )
+
    async def _commit_spend_updates_to_db(  # noqa: PLR0915
        self,
        prisma_client: PrismaClient,
@ -740,6 +764,208 @@ class DBSpendUpdateWriter:
                        e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj
                    )

+    @overload
+    @staticmethod
+    async def _update_daily_spend(
+        n_retry_times: int,
+        prisma_client: PrismaClient,
+        proxy_logging_obj: ProxyLogging,
+        daily_spend_transactions: Dict[str, DailyUserSpendTransaction],
+        entity_type: Literal["user"],
+        entity_id_field: str,
+        table_name: str,
+        unique_constraint_name: str,
+    ) -> None:
+        ...
+
+    @overload
+    @staticmethod
+    async def _update_daily_spend(
+        n_retry_times: int,
+        prisma_client: PrismaClient,
+        proxy_logging_obj: ProxyLogging,
+        daily_spend_transactions: Dict[str, DailyTeamSpendTransaction],
+        entity_type: Literal["team"],
+        entity_id_field: str,
+        table_name: str,
+        unique_constraint_name: str,
+    ) -> None:
+        ...
+
+    @overload
+    @staticmethod
+    async def _update_daily_spend(
+        n_retry_times: int,
+        prisma_client: PrismaClient,
+        proxy_logging_obj: ProxyLogging,
+        daily_spend_transactions: Dict[str, DailyTagSpendTransaction],
+        entity_type: Literal["tag"],
+        entity_id_field: str,
+        table_name: str,
+        unique_constraint_name: str,
+    ) -> None:
+        ...
+
+    @staticmethod
+    async def _update_daily_spend(
+        n_retry_times: int,
+        prisma_client: PrismaClient,
+        proxy_logging_obj: ProxyLogging,
+        daily_spend_transactions: Union[
+            Dict[str, DailyUserSpendTransaction],
+            Dict[str, DailyTeamSpendTransaction],
+            Dict[str, DailyTagSpendTransaction],
+        ],
+        entity_type: Literal["user", "team", "tag"],
+        entity_id_field: str,
+        table_name: str,
+        unique_constraint_name: str,
+    ) -> None:
+        """
+        Generic function to update daily spend for any entity type (user, team, tag)
+        """
+        from litellm.proxy.utils import _raise_failed_update_spend_exception
+
+        verbose_proxy_logger.debug(
+            f"Daily {entity_type.capitalize()} Spend transactions: {len(daily_spend_transactions)}"
+        )
+        BATCH_SIZE = 100
+        start_time = time.time()
+
+        try:
+            for i in range(n_retry_times + 1):
+                try:
+                    transactions_to_process = dict(
+                        list(daily_spend_transactions.items())[:BATCH_SIZE]
+                    )
+
+                    if len(transactions_to_process) == 0:
+                        verbose_proxy_logger.debug(
+                            f"No new transactions to process for daily {entity_type} spend update"
+                        )
+                        break
+
+                    async with prisma_client.db.batch_() as batcher:
+                        for _, transaction in transactions_to_process.items():
+                            entity_id = transaction.get(entity_id_field)
+                            if not entity_id:
+                                continue
+
+                            # Construct the where clause dynamically
+                            where_clause = {
+                                unique_constraint_name: {
+                                    entity_id_field: entity_id,
+                                    "date": transaction["date"],
+                                    "api_key": transaction["api_key"],
+                                    "model": transaction["model"],
+                                    "custom_llm_provider": transaction.get(
+                                        "custom_llm_provider"
+                                    ),
+                                }
+                            }
+
+                            # Get the table dynamically
+                            table = getattr(batcher, table_name)
+
+                            # Common data structure for both create and update
+                            common_data = {
+                                entity_id_field: entity_id,
+                                "date": transaction["date"],
+                                "api_key": transaction["api_key"],
+                                "model": transaction["model"],
+                                "model_group": transaction.get("model_group"),
+                                "custom_llm_provider": transaction.get(
+                                    "custom_llm_provider"
+                                ),
+                                "prompt_tokens": transaction["prompt_tokens"],
+                                "completion_tokens": transaction["completion_tokens"],
+                                "spend": transaction["spend"],
+                                "api_requests": transaction["api_requests"],
+                                "successful_requests": transaction[
+                                    "successful_requests"
+                                ],
+                                "failed_requests": transaction["failed_requests"],
+                            }
+
+                            # Add cache-related fields if they exist
+                            if "cache_read_input_tokens" in transaction:
+                                common_data[
+                                    "cache_read_input_tokens"
+                                ] = transaction.get("cache_read_input_tokens", 0)
+                            if "cache_creation_input_tokens" in transaction:
+                                common_data[
+                                    "cache_creation_input_tokens"
+                                ] = transaction.get("cache_creation_input_tokens", 0)
+
+                            # Create update data structure
+                            update_data = {
+                                "prompt_tokens": {
+                                    "increment": transaction["prompt_tokens"]
+                                },
+                                "completion_tokens": {
+                                    "increment": transaction["completion_tokens"]
+                                },
+                                "spend": {"increment": transaction["spend"]},
+                                "api_requests": {
+                                    "increment": transaction["api_requests"]
+                                },
+                                "successful_requests": {
+                                    "increment": transaction["successful_requests"]
+                                },
+                                "failed_requests": {
+                                    "increment": transaction["failed_requests"]
+                                },
+                            }
+
+                            # Add cache-related fields to update if they exist
+                            if "cache_read_input_tokens" in transaction:
+                                update_data["cache_read_input_tokens"] = {
+                                    "increment": transaction.get(
+                                        "cache_read_input_tokens", 0
+                                    )
+                                }
+                            if "cache_creation_input_tokens" in transaction:
+                                update_data["cache_creation_input_tokens"] = {
+                                    "increment": transaction.get(
+                                        "cache_creation_input_tokens", 0
+                                    )
+                                }
+
+                            table.upsert(
+                                where=where_clause,
+                                data={
+                                    "create": common_data,
+                                    "update": update_data,
+                                },
+                            )
+
+                    verbose_proxy_logger.info(
+                        f"Processed {len(transactions_to_process)} daily {entity_type} transactions in {time.time() - start_time:.2f}s"
+                    )
+
+                    # Remove processed transactions
+                    for key in transactions_to_process.keys():
+                        daily_spend_transactions.pop(key, None)
+
+                    break
+
+                except DB_CONNECTION_ERROR_TYPES as e:
+                    if i >= n_retry_times:
+                        _raise_failed_update_spend_exception(
+                            e=e,
+                            start_time=start_time,
+                            proxy_logging_obj=proxy_logging_obj,
+                        )
+                    await asyncio.sleep(2**i)
+
+        except Exception as e:
+            if "transactions_to_process" in locals():
+                for key in transactions_to_process.keys():  # type: ignore
+                    daily_spend_transactions.pop(key, None)
+            _raise_failed_update_spend_exception(
+                e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj
+            )
+
    @staticmethod
    async def update_daily_user_spend(
        n_retry_times: int,
@ -750,144 +976,16 @@ class DBSpendUpdateWriter:
        """
        Batch job to update LiteLLM_DailyUserSpend table using in-memory daily_spend_transactions
        """
-        from litellm.proxy.utils import _raise_failed_update_spend_exception
-
-        ### UPDATE DAILY USER SPEND ###
-        verbose_proxy_logger.debug(
-            "Daily User Spend transactions: {}".format(len(daily_spend_transactions))
+        await DBSpendUpdateWriter._update_daily_spend(
+            n_retry_times=n_retry_times,
+            prisma_client=prisma_client,
+            proxy_logging_obj=proxy_logging_obj,
+            daily_spend_transactions=daily_spend_transactions,
+            entity_type="user",
+            entity_id_field="user_id",
+            table_name="litellm_dailyuserspend",
+            unique_constraint_name="user_id_date_api_key_model_custom_llm_provider",
        )
-        BATCH_SIZE = (
-            100  # Number of aggregated records to update in each database operation
-        )
-        start_time = time.time()
-
-        try:
-            for i in range(n_retry_times + 1):
-                try:
-                    # Get transactions to process
-                    transactions_to_process = dict(
-                        list(daily_spend_transactions.items())[:BATCH_SIZE]
-                    )
-
-                    if len(transactions_to_process) == 0:
-                        verbose_proxy_logger.debug(
-                            "No new transactions to process for daily spend update"
-                        )
-                        break
-
-                    # Update DailyUserSpend table in batches
-                    async with prisma_client.db.batch_() as batcher:
-                        for _, transaction in transactions_to_process.items():
-                            user_id = transaction.get("user_id")
-                            if not user_id:  # Skip if no user_id
-                                continue
-
-                            batcher.litellm_dailyuserspend.upsert(
-                                where={
-                                    "user_id_date_api_key_model_custom_llm_provider": {
-                                        "user_id": user_id,
-                                        "date": transaction["date"],
-                                        "api_key": transaction["api_key"],
-                                        "model": transaction["model"],
-                                        "custom_llm_provider": transaction.get(
-                                            "custom_llm_provider"
-                                        ),
-                                    }
-                                },
-                                data={
-                                    "create": {
-                                        "user_id": user_id,
-                                        "date": transaction["date"],
-                                        "api_key": transaction["api_key"],
-                                        "model": transaction["model"],
-                                        "model_group": transaction.get("model_group"),
-                                        "custom_llm_provider": transaction.get(
-                                            "custom_llm_provider"
-                                        ),
-                                        "prompt_tokens": transaction["prompt_tokens"],
-                                        "completion_tokens": transaction[
-                                            "completion_tokens"
-                                        ],
-                                        "cache_read_input_tokens": transaction.get(
-                                            "cache_read_input_tokens", 0
-                                        ),
-                                        "cache_creation_input_tokens": transaction.get(
-                                            "cache_creation_input_tokens", 0
-                                        ),
-                                        "spend": transaction["spend"],
-                                        "api_requests": transaction["api_requests"],
-                                        "successful_requests": transaction[
-                                            "successful_requests"
-                                        ],
-                                        "failed_requests": transaction[
-                                            "failed_requests"
-                                        ],
-                                    },
-                                    "update": {
-                                        "prompt_tokens": {
-                                            "increment": transaction["prompt_tokens"]
-                                        },
-                                        "completion_tokens": {
-                                            "increment": transaction[
-                                                "completion_tokens"
-                                            ]
-                                        },
-                                        "cache_read_input_tokens": {
-                                            "increment": transaction.get(
-                                                "cache_read_input_tokens", 0
-                                            )
-                                        },
-                                        "cache_creation_input_tokens": {
-                                            "increment": transaction.get(
-                                                "cache_creation_input_tokens", 0
-                                            )
-                                        },
-                                        "spend": {"increment": transaction["spend"]},
-                                        "api_requests": {
-                                            "increment": transaction["api_requests"]
-                                        },
-                                        "successful_requests": {
-                                            "increment": transaction[
-                                                "successful_requests"
-                                            ]
-                                        },
-                                        "failed_requests": {
-                                            "increment": transaction["failed_requests"]
-                                        },
-                                    },
-                                },
-                            )
-
-                    verbose_proxy_logger.info(
-                        f"Processed {len(transactions_to_process)} daily spend transactions in {time.time() - start_time:.2f}s"
-                    )
-
-                    # Remove processed transactions
-                    for key in transactions_to_process.keys():
-                        daily_spend_transactions.pop(key, None)
-
-                    verbose_proxy_logger.debug(
-                        f"Processed {len(transactions_to_process)} daily spend transactions in {time.time() - start_time:.2f}s"
-                    )
-                    break
-
-                except DB_CONNECTION_ERROR_TYPES as e:
-                    if i >= n_retry_times:
-                        _raise_failed_update_spend_exception(
-                            e=e,
-                            start_time=start_time,
-                            proxy_logging_obj=proxy_logging_obj,
-                        )
-                    await asyncio.sleep(2**i)  # Exponential backoff
-
-        except Exception as e:
-            # Remove processed transactions even if there was an error
-            if "transactions_to_process" in locals():
-                for key in transactions_to_process.keys():  # type: ignore
-                    daily_spend_transactions.pop(key, None)
-            _raise_failed_update_spend_exception(
-                e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj
-            )

    @staticmethod
    async def update_daily_team_spend(
@ -899,140 +997,53 @@ class DBSpendUpdateWriter:
        """
        Batch job to update LiteLLM_DailyTeamSpend table using in-memory daily_spend_transactions
        """
-        from litellm.proxy.utils import _raise_failed_update_spend_exception
-
-        ### UPDATE DAILY USER SPEND ###
-        verbose_proxy_logger.debug(
-            "Daily Team Spend transactions: {}".format(len(daily_spend_transactions))
+        await DBSpendUpdateWriter._update_daily_spend(
+            n_retry_times=n_retry_times,
+            prisma_client=prisma_client,
+            proxy_logging_obj=proxy_logging_obj,
+            daily_spend_transactions=daily_spend_transactions,
+            entity_type="team",
+            entity_id_field="team_id",
+            table_name="litellm_dailyteamspend",
+            unique_constraint_name="team_id_date_api_key_model_custom_llm_provider",
        )
-        BATCH_SIZE = (
-            100  # Number of aggregated records to update in each database operation
+
+    @staticmethod
+    async def update_daily_tag_spend(
+        n_retry_times: int,
+        prisma_client: PrismaClient,
+        proxy_logging_obj: ProxyLogging,
+        daily_spend_transactions: Dict[str, DailyTagSpendTransaction],
+    ):
+        """
+        Batch job to update LiteLLM_DailyTagSpend table using in-memory daily_spend_transactions
+        """
+        await DBSpendUpdateWriter._update_daily_spend(
+            n_retry_times=n_retry_times,
+            prisma_client=prisma_client,
+            proxy_logging_obj=proxy_logging_obj,
+            daily_spend_transactions=daily_spend_transactions,
+            entity_type="tag",
+            entity_id_field="tag",
+            table_name="litellm_dailytagspend",
+            unique_constraint_name="tag_date_api_key_model_custom_llm_provider",
        )
-        start_time = time.time()
-
-        try:
-            for i in range(n_retry_times + 1):
-                try:
-                    # Get transactions to process
-                    transactions_to_process = dict(
-                        list(daily_spend_transactions.items())[:BATCH_SIZE]
-                    )
-
-                    if len(transactions_to_process) == 0:
-                        verbose_proxy_logger.debug(
-                            "No new transactions to process for daily spend update"
-                        )
-                        break
-
-                    # Update DailyUserSpend table in batches
-                    async with prisma_client.db.batch_() as batcher:
-                        for _, transaction in transactions_to_process.items():
-                            team_id = transaction.get("team_id")
-                            if not team_id:  # Skip if no team_id
-                                continue
-
-                            batcher.litellm_dailyteamspend.upsert(
-                                where={
-                                    "team_id_date_api_key_model_custom_llm_provider": {
-                                        "team_id": team_id,
-                                        "date": transaction["date"],
-                                        "api_key": transaction["api_key"],
-                                        "model": transaction["model"],
-                                        "custom_llm_provider": transaction.get(
-                                            "custom_llm_provider"
-                                        ),
-                                    }
-                                },
-                                data={
-                                    "create": {
-                                        "team_id": team_id,
-                                        "date": transaction["date"],
-                                        "api_key": transaction["api_key"],
-                                        "model": transaction["model"],
-                                        "model_group": transaction.get("model_group"),
-                                        "custom_llm_provider": transaction.get(
-                                            "custom_llm_provider"
-                                        ),
-                                        "prompt_tokens": transaction["prompt_tokens"],
-                                        "completion_tokens": transaction[
-                                            "completion_tokens"
-                                        ],
-                                        "spend": transaction["spend"],
-                                        "api_requests": transaction["api_requests"],
-                                        "successful_requests": transaction[
-                                            "successful_requests"
-                                        ],
-                                        "failed_requests": transaction[
-                                            "failed_requests"
-                                        ],
-                                    },
-                                    "update": {
-                                        "prompt_tokens": {
-                                            "increment": transaction["prompt_tokens"]
-                                        },
-                                        "completion_tokens": {
-                                            "increment": transaction[
-                                                "completion_tokens"
-                                            ]
-                                        },
-                                        "spend": {"increment": transaction["spend"]},
-                                        "api_requests": {
-                                            "increment": transaction["api_requests"]
-                                        },
-                                        "successful_requests": {
-                                            "increment": transaction[
-                                                "successful_requests"
-                                            ]
-                                        },
-                                        "failed_requests": {
-                                            "increment": transaction["failed_requests"]
-                                        },
-                                    },
-                                },
-                            )
-
-                    verbose_proxy_logger.info(
-                        f"Processed {len(transactions_to_process)} daily team transactions in {time.time() - start_time:.2f}s"
-                    )
-
-                    # Remove processed transactions
-                    for key in transactions_to_process.keys():
-                        daily_spend_transactions.pop(key, None)
-
-                    verbose_proxy_logger.debug(
-                        f"Processed {len(transactions_to_process)} daily spend transactions in {time.time() - start_time:.2f}s"
-                    )
-                    break
-
-                except DB_CONNECTION_ERROR_TYPES as e:
-                    if i >= n_retry_times:
-                        _raise_failed_update_spend_exception(
-                            e=e,
-                            start_time=start_time,
-                            proxy_logging_obj=proxy_logging_obj,
-                        )
-                    await asyncio.sleep(2**i)  # Exponential backoff
-
-        except Exception as e:
-            # Remove processed transactions even if there was an error
-            if "transactions_to_process" in locals():
-                for key in transactions_to_process.keys():  # type: ignore
-                    daily_spend_transactions.pop(key, None)
-            _raise_failed_update_spend_exception(
-                e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj
-            )

    async def _common_add_spend_log_transaction_to_daily_transaction(
        self,
        payload: Union[dict, SpendLogsPayload],
        prisma_client: PrismaClient,
-        type: Literal["user", "team"] = "user",
+        type: Literal["user", "team", "request_tags"] = "user",
    ) -> Optional[BaseDailySpendTransaction]:
        common_expected_keys = ["startTime", "api_key", "model", "custom_llm_provider"]
        if type == "user":
            expected_keys = ["user", *common_expected_keys]
-        else:
+        elif type == "team":
            expected_keys = ["team_id", *common_expected_keys]
+        elif type == "request_tags":
+            expected_keys = ["request_tags", *common_expected_keys]
+        else:
+            raise ValueError(f"Invalid type: {type}")

        if not all(key in payload for key in expected_keys):
            verbose_proxy_logger.debug(
@ -1143,3 +1154,44 @@ class DBSpendUpdateWriter:
        await self.daily_team_spend_update_queue.add_update(
            update={daily_transaction_key: daily_transaction}
        )
+
+    async def add_spend_log_transaction_to_daily_tag_transaction(
+        self,
+        payload: SpendLogsPayload,
+        prisma_client: Optional[PrismaClient] = None,
+    ) -> None:
+        if prisma_client is None:
+            verbose_proxy_logger.debug(
+                "prisma_client is None. Skipping writing spend logs to db."
+            )
+            return
+
+        base_daily_transaction = (
+            await self._common_add_spend_log_transaction_to_daily_transaction(
+                payload, prisma_client, "request_tags"
+            )
+        )
+        if base_daily_transaction is None:
+            return
+        if payload["request_tags"] is None:
+            verbose_proxy_logger.debug(
+                "request_tags is None for request. Skipping incrementing tag spend."
+            )
+            return
+
+        request_tags = []
+        if isinstance(payload["request_tags"], str):
+            request_tags = json.loads(payload["request_tags"])
+        elif isinstance(payload["request_tags"], list):
+            request_tags = payload["request_tags"]
+        else:
+            raise ValueError(f"Invalid request_tags: {payload['request_tags']}")
+        for tag in request_tags:
+            daily_transaction_key = f"{tag}_{base_daily_transaction['date']}_{payload['api_key']}_{payload['model']}_{payload['custom_llm_provider']}"
+            daily_transaction = DailyTagSpendTransaction(
+                tag=tag, **base_daily_transaction
+            )
+
+            await self.daily_tag_spend_update_queue.add_update(
+                update={daily_transaction_key: daily_transaction}
+            )
--- a/litellm/proxy/db/db_transaction_queue/redis_update_buffer.py
+++ b/litellm/proxy/db/db_transaction_queue/redis_update_buffer.py
@ -13,6 +13,7 @@ from litellm.caching import RedisCache
 from litellm.constants import (
    MAX_REDIS_BUFFER_DEQUEUE_COUNT,
    REDIS_DAILY_SPEND_UPDATE_BUFFER_KEY,
+    REDIS_DAILY_TAG_SPEND_UPDATE_BUFFER_KEY,
    REDIS_DAILY_TEAM_SPEND_UPDATE_BUFFER_KEY,
    REDIS_UPDATE_BUFFER_KEY,
 )
@ -68,11 +69,41 @@ class RedisUpdateBuffer:
            return False
        return _use_redis_transaction_buffer

+    async def _store_transactions_in_redis(
+        self,
+        transactions: Any,
+        redis_key: str,
+        service_type: ServiceTypes,
+    ) -> None:
+        """
+        Helper method to store transactions in Redis and emit an event
+
+        Args:
+            transactions: The transactions to store
+            redis_key: The Redis key to store under
+            service_type: The service type for event emission
+        """
+        if transactions is None or len(transactions) == 0:
+            return
+
+        list_of_transactions = [safe_dumps(transactions)]
+        if self.redis_cache is None:
+            return
+        current_redis_buffer_size = await self.redis_cache.async_rpush(
+            key=redis_key,
+            values=list_of_transactions,
+        )
+        await self._emit_new_item_added_to_redis_buffer_event(
+            queue_size=current_redis_buffer_size,
+            service=service_type,
+        )
+
    async def store_in_memory_spend_updates_in_redis(
        self,
        spend_update_queue: SpendUpdateQueue,
        daily_spend_update_queue: DailySpendUpdateQueue,
        daily_team_spend_update_queue: DailySpendUpdateQueue,
+        daily_tag_spend_update_queue: DailySpendUpdateQueue,
    ):
        """
        Stores the in-memory spend updates to Redis
@ -124,18 +155,23 @@ class RedisUpdateBuffer:
            )
            return

+        # Get all transactions
        db_spend_update_transactions = (
            await spend_update_queue.flush_and_get_aggregated_db_spend_update_transactions()
        )
-        verbose_proxy_logger.debug(
-            "ALL DB SPEND UPDATE TRANSACTIONS: %s", db_spend_update_transactions
-        )
        daily_spend_update_transactions = (
            await daily_spend_update_queue.flush_and_get_aggregated_daily_spend_update_transactions()
        )
        daily_team_spend_update_transactions = (
            await daily_team_spend_update_queue.flush_and_get_aggregated_daily_spend_update_transactions()
        )
+        daily_tag_spend_update_transactions = (
+            await daily_tag_spend_update_queue.flush_and_get_aggregated_daily_spend_update_transactions()
+        )
+
+        verbose_proxy_logger.debug(
+            "ALL DB SPEND UPDATE TRANSACTIONS: %s", db_spend_update_transactions
+        )
        verbose_proxy_logger.debug(
            "ALL DAILY SPEND UPDATE TRANSACTIONS: %s", daily_spend_update_transactions
        )
@ -147,40 +183,29 @@ class RedisUpdateBuffer:
        ):
            return

-        list_of_transactions = [safe_dumps(db_spend_update_transactions)]
-        current_redis_buffer_size = await self.redis_cache.async_rpush(
-            key=REDIS_UPDATE_BUFFER_KEY,
-            values=list_of_transactions,
-        )
-        await self._emit_new_item_added_to_redis_buffer_event(
-            queue_size=current_redis_buffer_size,
-            service=ServiceTypes.REDIS_SPEND_UPDATE_QUEUE,
+        # Store all transaction types using the helper method
+        await self._store_transactions_in_redis(
+            transactions=db_spend_update_transactions,
+            redis_key=REDIS_UPDATE_BUFFER_KEY,
+            service_type=ServiceTypes.REDIS_SPEND_UPDATE_QUEUE,
        )

-        list_of_daily_spend_update_transactions = [
-            safe_dumps(daily_spend_update_transactions)
-        ]
-
-        current_redis_buffer_size = await self.redis_cache.async_rpush(
-            key=REDIS_DAILY_SPEND_UPDATE_BUFFER_KEY,
-            values=list_of_daily_spend_update_transactions,
-        )
-        await self._emit_new_item_added_to_redis_buffer_event(
-            queue_size=current_redis_buffer_size,
-            service=ServiceTypes.REDIS_DAILY_SPEND_UPDATE_QUEUE,
+        await self._store_transactions_in_redis(
+            transactions=daily_spend_update_transactions,
+            redis_key=REDIS_DAILY_SPEND_UPDATE_BUFFER_KEY,
+            service_type=ServiceTypes.REDIS_DAILY_SPEND_UPDATE_QUEUE,
        )

-        list_of_daily_team_spend_update_transactions = [
-            safe_dumps(daily_team_spend_update_transactions)
-        ]
-
-        current_redis_buffer_size = await self.redis_cache.async_rpush(
-            key=REDIS_DAILY_TEAM_SPEND_UPDATE_BUFFER_KEY,
-            values=list_of_daily_team_spend_update_transactions,
+        await self._store_transactions_in_redis(
+            transactions=daily_team_spend_update_transactions,
+            redis_key=REDIS_DAILY_TEAM_SPEND_UPDATE_BUFFER_KEY,
+            service_type=ServiceTypes.REDIS_DAILY_TEAM_SPEND_UPDATE_QUEUE,
        )
-        await self._emit_new_item_added_to_redis_buffer_event(
-            queue_size=current_redis_buffer_size,
-            service=ServiceTypes.REDIS_DAILY_TEAM_SPEND_UPDATE_QUEUE,
+
+        await self._store_transactions_in_redis(
+            transactions=daily_tag_spend_update_transactions,
+            redis_key=REDIS_DAILY_TAG_SPEND_UPDATE_BUFFER_KEY,
+            service_type=ServiceTypes.REDIS_DAILY_TAG_SPEND_UPDATE_QUEUE,
        )

    @staticmethod
--- a/litellm/proxy/management_endpoints/common_daily_activity.py
+++ b/litellm/proxy/management_endpoints/common_daily_activity.py
@ -0,0 +1,254 @@
+from datetime import datetime
+from typing import Any, Dict, List, Optional, Union
+
+from fastapi import HTTPException, status
+
+from litellm._logging import verbose_proxy_logger
+from litellm.proxy._types import CommonProxyErrors
+from litellm.proxy.utils import PrismaClient
+from litellm.types.proxy.management_endpoints.common_daily_activity import (
+    BreakdownMetrics,
+    DailySpendData,
+    DailySpendMetadata,
+    KeyMetadata,
+    KeyMetricWithMetadata,
+    MetricWithMetadata,
+    SpendAnalyticsPaginatedResponse,
+    SpendMetrics,
+)
+
+
+def update_metrics(existing_metrics: SpendMetrics, record: Any) -> SpendMetrics:
+    """Update metrics with new record data."""
+    existing_metrics.spend += record.spend
+    existing_metrics.prompt_tokens += record.prompt_tokens
+    existing_metrics.completion_tokens += record.completion_tokens
+    existing_metrics.total_tokens += record.prompt_tokens + record.completion_tokens
+    existing_metrics.cache_read_input_tokens += record.cache_read_input_tokens
+    existing_metrics.cache_creation_input_tokens += record.cache_creation_input_tokens
+    existing_metrics.api_requests += record.api_requests
+    existing_metrics.successful_requests += record.successful_requests
+    existing_metrics.failed_requests += record.failed_requests
+    return existing_metrics
+
+
+def update_breakdown_metrics(
+    breakdown: BreakdownMetrics,
+    record: Any,
+    model_metadata: Dict[str, Dict[str, Any]],
+    provider_metadata: Dict[str, Dict[str, Any]],
+    api_key_metadata: Dict[str, Dict[str, Any]],
+    entity_id_field: Optional[str] = None,
+) -> BreakdownMetrics:
+    """Updates breakdown metrics for a single record using the existing update_metrics function"""
+
+    # Update model breakdown
+    if record.model not in breakdown.models:
+        breakdown.models[record.model] = MetricWithMetadata(
+            metrics=SpendMetrics(),
+            metadata=model_metadata.get(
+                record.model, {}
+            ),  # Add any model-specific metadata here
+        )
+    breakdown.models[record.model].metrics = update_metrics(
+        breakdown.models[record.model].metrics, record
+    )
+
+    # Update provider breakdown
+    provider = record.custom_llm_provider or "unknown"
+    if provider not in breakdown.providers:
+        breakdown.providers[provider] = MetricWithMetadata(
+            metrics=SpendMetrics(),
+            metadata=provider_metadata.get(
+                provider, {}
+            ),  # Add any provider-specific metadata here
+        )
+    breakdown.providers[provider].metrics = update_metrics(
+        breakdown.providers[provider].metrics, record
+    )
+
+    # Update api key breakdown
+    if record.api_key not in breakdown.api_keys:
+        breakdown.api_keys[record.api_key] = KeyMetricWithMetadata(
+            metrics=SpendMetrics(),
+            metadata=KeyMetadata(
+                key_alias=api_key_metadata.get(record.api_key, {}).get(
+                    "key_alias", None
+                )
+            ),  # Add any api_key-specific metadata here
+        )
+    breakdown.api_keys[record.api_key].metrics = update_metrics(
+        breakdown.api_keys[record.api_key].metrics, record
+    )
+
+    # Update entity-specific metrics if entity_id_field is provided
+    if entity_id_field:
+        entity_value = getattr(record, entity_id_field, None)
+        if entity_value:
+            if entity_value not in breakdown.entities:
+                breakdown.entities[entity_value] = MetricWithMetadata(
+                    metrics=SpendMetrics(), metadata={}
+                )
+            breakdown.entities[entity_value].metrics = update_metrics(
+                breakdown.entities[entity_value].metrics, record
+            )
+
+    return breakdown
+
+
+async def get_daily_activity(
+    prisma_client: Optional[PrismaClient],
+    table_name: str,
+    entity_id_field: str,
+    entity_id: Optional[Union[str, List[str]]],
+    start_date: Optional[str],
+    end_date: Optional[str],
+    model: Optional[str],
+    api_key: Optional[str],
+    page: int,
+    page_size: int,
+) -> SpendAnalyticsPaginatedResponse:
+    """Common function to get daily activity for any entity type."""
+    if prisma_client is None:
+        raise HTTPException(
+            status_code=500,
+            detail={"error": CommonProxyErrors.db_not_connected_error.value},
+        )
+
+    if start_date is None or end_date is None:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail={"error": "Please provide start_date and end_date"},
+        )
+
+    try:
+        # Build filter conditions
+        where_conditions: Dict[str, Any] = {
+            "date": {
+                "gte": start_date,
+                "lte": end_date,
+            }
+        }
+
+        if model:
+            where_conditions["model"] = model
+        if api_key:
+            where_conditions["api_key"] = api_key
+        if entity_id:
+            if isinstance(entity_id, list):
+                where_conditions[entity_id_field] = {"in": entity_id}
+            else:
+                where_conditions[entity_id_field] = entity_id
+
+        # Get total count for pagination
+        total_count = await getattr(prisma_client.db, table_name).count(
+            where=where_conditions
+        )
+
+        # Fetch paginated results
+        daily_spend_data = await getattr(prisma_client.db, table_name).find_many(
+            where=where_conditions,
+            order=[
+                {"date": "desc"},
+            ],
+            skip=(page - 1) * page_size,
+            take=page_size,
+        )
+
+        # Get all unique API keys from the spend data
+        api_keys = set()
+        for record in daily_spend_data:
+            if record.api_key:
+                api_keys.add(record.api_key)
+
+        # Fetch key aliases in bulk
+        api_key_metadata: Dict[str, Dict[str, Any]] = {}
+        model_metadata: Dict[str, Dict[str, Any]] = {}
+        provider_metadata: Dict[str, Dict[str, Any]] = {}
+        if api_keys:
+            key_records = await prisma_client.db.litellm_verificationtoken.find_many(
+                where={"token": {"in": list(api_keys)}}
+            )
+            api_key_metadata.update(
+                {k.token: {"key_alias": k.key_alias} for k in key_records}
+            )
+
+        # Process results
+        results = []
+        total_metrics = SpendMetrics()
+        grouped_data: Dict[str, Dict[str, Any]] = {}
+
+        for record in daily_spend_data:
+            date_str = record.date
+            if date_str not in grouped_data:
+                grouped_data[date_str] = {
+                    "metrics": SpendMetrics(),
+                    "breakdown": BreakdownMetrics(),
+                }
+
+            # Update metrics
+            grouped_data[date_str]["metrics"] = update_metrics(
+                grouped_data[date_str]["metrics"], record
+            )
+            # Update breakdowns
+            grouped_data[date_str]["breakdown"] = update_breakdown_metrics(
+                grouped_data[date_str]["breakdown"],
+                record,
+                model_metadata,
+                provider_metadata,
+                api_key_metadata,
+                entity_id_field=entity_id_field,
+            )
+
+            # Update total metrics
+            total_metrics.spend += record.spend
+            total_metrics.prompt_tokens += record.prompt_tokens
+            total_metrics.completion_tokens += record.completion_tokens
+            total_metrics.total_tokens += (
+                record.prompt_tokens + record.completion_tokens
+            )
+            total_metrics.cache_read_input_tokens += record.cache_read_input_tokens
+            total_metrics.cache_creation_input_tokens += (
+                record.cache_creation_input_tokens
+            )
+            total_metrics.api_requests += record.api_requests
+            total_metrics.successful_requests += record.successful_requests
+            total_metrics.failed_requests += record.failed_requests
+
+        # Convert grouped data to response format
+        for date_str, data in grouped_data.items():
+            results.append(
+                DailySpendData(
+                    date=datetime.strptime(date_str, "%Y-%m-%d").date(),
+                    metrics=data["metrics"],
+                    breakdown=data["breakdown"],
+                )
+            )
+
+        # Sort results by date
+        results.sort(key=lambda x: x.date, reverse=True)
+
+        return SpendAnalyticsPaginatedResponse(
+            results=results,
+            metadata=DailySpendMetadata(
+                total_spend=total_metrics.spend,
+                total_prompt_tokens=total_metrics.prompt_tokens,
+                total_completion_tokens=total_metrics.completion_tokens,
+                total_tokens=total_metrics.total_tokens,
+                total_api_requests=total_metrics.api_requests,
+                total_successful_requests=total_metrics.successful_requests,
+                total_failed_requests=total_metrics.failed_requests,
+                total_cache_read_input_tokens=total_metrics.cache_read_input_tokens,
+                total_cache_creation_input_tokens=total_metrics.cache_creation_input_tokens,
+                page=page,
+                total_pages=-(-total_count // page_size),  # Ceiling division
+                has_more=(page * page_size) < total_count,
+            ),
+        )
+
+    except Exception as e:
+        verbose_proxy_logger.exception(f"Error fetching daily activity: {str(e)}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail={"error": f"Failed to fetch analytics: {str(e)}"},
+        )
--- a/litellm/proxy/management_endpoints/internal_user_endpoints.py
+++ b/litellm/proxy/management_endpoints/internal_user_endpoints.py
@ -14,9 +14,8 @@ These are members of a Team on LiteLLM
 import asyncio
 import traceback
 import uuid
-from datetime import date, datetime, timedelta, timezone
-from enum import Enum
-from typing import Any, Dict, List, Optional, TypedDict, Union, cast
+from datetime import datetime, timedelta, timezone
+from typing import Any, Dict, List, Optional, Union, cast

 import fastapi
 from fastapi import APIRouter, Depends, Header, HTTPException, Request, status
@ -33,6 +32,17 @@ from litellm.proxy.management_endpoints.key_management_endpoints import (
 from litellm.proxy.management_helpers.audit_logs import create_audit_log_for_update
 from litellm.proxy.management_helpers.utils import management_endpoint_wrapper
 from litellm.proxy.utils import handle_exception_on_proxy
+from litellm.types.proxy.management_endpoints.common_daily_activity import (
+    BreakdownMetrics,
+    DailySpendData,
+    DailySpendMetadata,
+    KeyMetadata,
+    KeyMetricWithMetadata,
+    LiteLLM_DailyUserSpend,
+    MetricWithMetadata,
+    SpendAnalyticsPaginatedResponse,
+    SpendMetrics,
+)

 router = APIRouter()

@ -82,9 +92,9 @@ def _update_internal_new_user_params(data_json: dict, data: NewUserRequest) -> d
        data_json["user_id"] = str(uuid.uuid4())
    auto_create_key = data_json.pop("auto_create_key", True)
    if auto_create_key is False:
-        data_json["table_name"] = (
-            "user"  # only create a user, don't create key if 'auto_create_key' set to False
-        )
+        data_json[
+            "table_name"
+        ] = "user"  # only create a user, don't create key if 'auto_create_key' set to False

    is_internal_user = False
    if data.user_role and data.user_role.is_internal_user_role:
@ -651,9 +661,9 @@ def _update_internal_user_params(data_json: dict, data: UpdateUserRequest) -> di
        "budget_duration" not in non_default_values
    ):  # applies internal user limits, if user role updated
        if is_internal_user and litellm.internal_user_budget_duration is not None:
-            non_default_values["budget_duration"] = (
-                litellm.internal_user_budget_duration
-            )
+            non_default_values[
+                "budget_duration"
+            ] = litellm.internal_user_budget_duration
            duration_s = duration_in_seconds(
                duration=non_default_values["budget_duration"]
            )
@ -964,13 +974,13 @@ async def get_users(
            "in": user_id_list,  # Now passing a list of strings as required by Prisma
        }

-    users: Optional[List[LiteLLM_UserTable]] = (
-        await prisma_client.db.litellm_usertable.find_many(
-            where=where_conditions,
-            skip=skip,
-            take=page_size,
-            order={"created_at": "desc"},
-        )
+    users: Optional[
+        List[LiteLLM_UserTable]
+    ] = await prisma_client.db.litellm_usertable.find_many(
+        where=where_conditions,
+        skip=skip,
+        take=page_size,
+        order={"created_at": "desc"},
    )

    # Get total count of user rows
@ -1225,13 +1235,13 @@ async def ui_view_users(
            }

        # Query users with pagination and filters
-        users: Optional[List[BaseModel]] = (
-            await prisma_client.db.litellm_usertable.find_many(
-                where=where_conditions,
-                skip=skip,
-                take=page_size,
-                order={"created_at": "desc"},
-            )
+        users: Optional[
+            List[BaseModel]
+        ] = await prisma_client.db.litellm_usertable.find_many(
+            where=where_conditions,
+            skip=skip,
+            take=page_size,
+            order={"created_at": "desc"},
        )

        if not users:
@ -1244,111 +1254,6 @@ async def ui_view_users(
        raise HTTPException(status_code=500, detail=f"Error searching users: {str(e)}")


-class GroupByDimension(str, Enum):
-    DATE = "date"
-    MODEL = "model"
-    API_KEY = "api_key"
-    TEAM = "team"
-    ORGANIZATION = "organization"
-    MODEL_GROUP = "model_group"
-    PROVIDER = "custom_llm_provider"
-
-
-class SpendMetrics(BaseModel):
-    spend: float = Field(default=0.0)
-    prompt_tokens: int = Field(default=0)
-    completion_tokens: int = Field(default=0)
-    cache_read_input_tokens: int = Field(default=0)
-    cache_creation_input_tokens: int = Field(default=0)
-    total_tokens: int = Field(default=0)
-    successful_requests: int = Field(default=0)
-    failed_requests: int = Field(default=0)
-    api_requests: int = Field(default=0)
-
-
-class MetricBase(BaseModel):
-    metrics: SpendMetrics
-
-
-class MetricWithMetadata(MetricBase):
-    metadata: Dict[str, Any] = Field(default_factory=dict)
-
-
-class KeyMetadata(BaseModel):
-    """Metadata for a key"""
-
-    key_alias: Optional[str] = None
-
-
-class KeyMetricWithMetadata(MetricBase):
-    """Base class for metrics with additional metadata"""
-
-    metadata: KeyMetadata = Field(default_factory=KeyMetadata)
-
-
-class BreakdownMetrics(BaseModel):
-    """Breakdown of spend by different dimensions"""
-
-    models: Dict[str, MetricWithMetadata] = Field(
-        default_factory=dict
-    )  # model -> {metrics, metadata}
-    providers: Dict[str, MetricWithMetadata] = Field(
-        default_factory=dict
-    )  # provider -> {metrics, metadata}
-    api_keys: Dict[str, KeyMetricWithMetadata] = Field(
-        default_factory=dict
-    )  # api_key -> {metrics, metadata}
-
-
-class DailySpendData(BaseModel):
-    date: date
-    metrics: SpendMetrics
-    breakdown: BreakdownMetrics = Field(default_factory=BreakdownMetrics)
-
-
-class DailySpendMetadata(BaseModel):
-    total_spend: float = Field(default=0.0)
-    total_prompt_tokens: int = Field(default=0)
-    total_completion_tokens: int = Field(default=0)
-    total_tokens: int = Field(default=0)
-    total_api_requests: int = Field(default=0)
-    total_successful_requests: int = Field(default=0)
-    total_failed_requests: int = Field(default=0)
-    total_cache_read_input_tokens: int = Field(default=0)
-    total_cache_creation_input_tokens: int = Field(default=0)
-    page: int = Field(default=1)
-    total_pages: int = Field(default=1)
-    has_more: bool = Field(default=False)
-
-
-class SpendAnalyticsPaginatedResponse(BaseModel):
-    results: List[DailySpendData]
-    metadata: DailySpendMetadata = Field(default_factory=DailySpendMetadata)
-
-
-class LiteLLM_DailyUserSpend(BaseModel):
-    id: str
-    user_id: str
-    date: str
-    api_key: str
-    model: str
-    model_group: Optional[str] = None
-    custom_llm_provider: Optional[str] = None
-    prompt_tokens: int = 0
-    completion_tokens: int = 0
-    cache_read_input_tokens: int = 0
-    cache_creation_input_tokens: int = 0
-    spend: float = 0.0
-    api_requests: int = 0
-    successful_requests: int = 0
-    failed_requests: int = 0
-
-
-class GroupedData(TypedDict):
-    metrics: SpendMetrics
-    breakdown: BreakdownMetrics
-
-
 def update_metrics(
    group_metrics: SpendMetrics, record: LiteLLM_DailyUserSpend
 ) -> SpendMetrics:
@ -1494,9 +1399,9 @@ async def get_user_daily_activity(
            user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN
            and user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY
        ):
-            where_conditions["user_id"] = (
-                user_api_key_dict.user_id
-            )  # only allow access to own data
+            where_conditions[
+                "user_id"
+            ] = user_api_key_dict.user_id  # only allow access to own data

        # Get total count for pagination
        total_count = await prisma_client.db.litellm_dailyuserspend.count(
--- a/litellm/proxy/management_endpoints/tag_management_endpoints.py
+++ b/litellm/proxy/management_endpoints/tag_management_endpoints.py
@ -12,7 +12,7 @@ All /tag management endpoints

 import datetime
 import json
-from typing import Dict
+from typing import Dict, Optional

 from fastapi import APIRouter, Depends, HTTPException

@ -20,6 +20,10 @@ from litellm._logging import verbose_proxy_logger
 from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm.proxy.management_endpoints.common_daily_activity import (
+    SpendAnalyticsPaginatedResponse,
+    get_daily_activity,
+)
 from litellm.types.tag_management import (
    TagConfig,
    TagDeleteRequest,
@ -354,3 +358,52 @@ async def delete_tag(
        return {"message": f"Tag {data.name} deleted successfully"}
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.get(
+    "/tag/daily/activity",
+    response_model=SpendAnalyticsPaginatedResponse,
+    tags=["tag management"],
+    dependencies=[Depends(user_api_key_auth)],
+)
+async def get_tag_daily_activity(
+    tags: Optional[str] = None,
+    start_date: Optional[str] = None,
+    end_date: Optional[str] = None,
+    model: Optional[str] = None,
+    api_key: Optional[str] = None,
+    page: int = 1,
+    page_size: int = 10,
+):
+    """
+    Get daily activity for specific tags or all tags.
+
+    Args:
+        tags (Optional[str]): Comma-separated list of tags to filter by. If not provided, returns data for all tags.
+        start_date (Optional[str]): Start date for the activity period (YYYY-MM-DD).
+        end_date (Optional[str]): End date for the activity period (YYYY-MM-DD).
+        model (Optional[str]): Filter by model name.
+        api_key (Optional[str]): Filter by API key.
+        page (int): Page number for pagination.
+        page_size (int): Number of items per page.
+
+    Returns:
+        SpendAnalyticsPaginatedResponse: Paginated response containing daily activity data.
+    """
+    from litellm.proxy.proxy_server import prisma_client
+
+    # Convert comma-separated tags string to list if provided
+    tag_list = tags.split(",") if tags else None
+
+    return await get_daily_activity(
+        prisma_client=prisma_client,
+        table_name="litellm_dailytagspend",
+        entity_id_field="tag",
+        entity_id=tag_list,
+        start_date=start_date,
+        end_date=end_date,
+        model=model,
+        api_key=api_key,
+        page=page,
+        page_size=page_size,
+    )
--- a/litellm/proxy/management_endpoints/team_endpoints.py
+++ b/litellm/proxy/management_endpoints/team_endpoints.py
@ -62,6 +62,9 @@ from litellm.proxy.management_endpoints.common_utils import (
    _is_user_team_admin,
    _set_object_metadata_field,
 )
+from litellm.proxy.management_endpoints.tag_management_endpoints import (
+    get_daily_activity,
+)
 from litellm.proxy.management_helpers.team_member_permission_checks import (
    TeamMemberPermissionChecks,
 )
@ -75,6 +78,9 @@ from litellm.proxy.utils import (
    handle_exception_on_proxy,
 )
 from litellm.router import Router
+from litellm.types.proxy.management_endpoints.common_daily_activity import (
+    SpendAnalyticsPaginatedResponse,
+)
 from litellm.types.proxy.management_endpoints.team_endpoints import (
    GetTeamMemberPermissionsResponse,
    UpdateTeamMemberPermissionsRequest,
@ -515,12 +521,12 @@ async def update_team(
            updated_kv["model_id"] = _model_id

    updated_kv = prisma_client.jsonify_team_object(db_data=updated_kv)
-    team_row: Optional[LiteLLM_TeamTable] = (
-        await prisma_client.db.litellm_teamtable.update(
-            where={"team_id": data.team_id},
-            data=updated_kv,
-            include={"litellm_model_table": True},  # type: ignore
-        )
+    team_row: Optional[
+        LiteLLM_TeamTable
+    ] = await prisma_client.db.litellm_teamtable.update(
+        where={"team_id": data.team_id},
+        data=updated_kv,
+        include={"litellm_model_table": True},  # type: ignore
    )

    if team_row is None or team_row.team_id is None:
@ -1146,10 +1152,10 @@ async def delete_team(
    team_rows: List[LiteLLM_TeamTable] = []
    for team_id in data.team_ids:
        try:
-            team_row_base: Optional[BaseModel] = (
-                await prisma_client.db.litellm_teamtable.find_unique(
-                    where={"team_id": team_id}
-                )
+            team_row_base: Optional[
+                BaseModel
+            ] = await prisma_client.db.litellm_teamtable.find_unique(
+                where={"team_id": team_id}
            )
            if team_row_base is None:
                raise Exception
@ -1307,10 +1313,10 @@ async def team_info(
            )

        try:
-            team_info: Optional[BaseModel] = (
-                await prisma_client.db.litellm_teamtable.find_unique(
-                    where={"team_id": team_id}
-                )
+            team_info: Optional[
+                BaseModel
+            ] = await prisma_client.db.litellm_teamtable.find_unique(
+                where={"team_id": team_id}
            )
            if team_info is None:
                raise Exception
@ -2079,3 +2085,52 @@ async def update_team_member_permissions(
    )

    return updated_team
+
+
+@router.get(
+    "/team/daily/activity",
+    response_model=SpendAnalyticsPaginatedResponse,
+    tags=["team management"],
+    dependencies=[Depends(user_api_key_auth)],
+)
+async def get_team_daily_activity(
+    team_ids: Optional[str] = None,
+    start_date: Optional[str] = None,
+    end_date: Optional[str] = None,
+    model: Optional[str] = None,
+    api_key: Optional[str] = None,
+    page: int = 1,
+    page_size: int = 10,
+):
+    """
+    Get daily activity for specific teams or all teams.
+
+    Args:
+        team_ids (Optional[str]): Comma-separated list of team IDs to filter by. If not provided, returns data for all teams.
+        start_date (Optional[str]): Start date for the activity period (YYYY-MM-DD).
+        end_date (Optional[str]): End date for the activity period (YYYY-MM-DD).
+        model (Optional[str]): Filter by model name.
+        api_key (Optional[str]): Filter by API key.
+        page (int): Page number for pagination.
+        page_size (int): Number of items per page.
+
+    Returns:
+        SpendAnalyticsPaginatedResponse: Paginated response containing daily activity data.
+    """
+    from litellm.proxy.proxy_server import prisma_client
+
+    # Convert comma-separated tags string to list if provided
+    team_ids_list = team_ids.split(",") if team_ids else None
+
+    return await get_daily_activity(
+        prisma_client=prisma_client,
+        table_name="litellm_dailyteamspend",
+        entity_id_field="team_id",
+        entity_id=team_ids_list,
+        start_date=start_date,
+        end_date=end_date,
+        model=model,
+        api_key=api_key,
+        page=page,
+        page_size=page_size,
+    )
--- a/litellm/proxy/schema.prisma
+++ b/litellm/proxy/schema.prisma
@ -353,6 +353,8 @@ model LiteLLM_DailyTeamSpend {
  custom_llm_provider String?  
  prompt_tokens       Int      @default(0)
  completion_tokens   Int      @default(0)
+  cache_read_input_tokens     Int      @default(0)
+  cache_creation_input_tokens Int      @default(0)
  spend               Float    @default(0.0)
  api_requests        Int      @default(0)
  successful_requests Int      @default(0)
@ -367,6 +369,33 @@ model LiteLLM_DailyTeamSpend {
  @@index([model])
 }

+// Track daily team spend metrics per model and key
+model LiteLLM_DailyTagSpend {
+  id                  String   @id @default(uuid())
+  tag                 String   
+  date                String
+  api_key             String   
+  model               String   
+  model_group         String?  
+  custom_llm_provider String?  
+  prompt_tokens       Int      @default(0)
+  completion_tokens   Int      @default(0)
+  cache_read_input_tokens     Int      @default(0)
+  cache_creation_input_tokens Int      @default(0)
+  spend               Float    @default(0.0)
+  api_requests        Int      @default(0)
+  successful_requests Int      @default(0)
+  failed_requests     Int      @default(0)
+  created_at          DateTime @default(now())
+  updated_at          DateTime @updatedAt
+
+  @@unique([tag, date, api_key, model, custom_llm_provider])
+  @@index([date])
+  @@index([tag])
+  @@index([api_key])
+  @@index([model])
+}
+

 // Track the status of cron jobs running. Only allow one pod to run the job at a time
 model LiteLLM_CronJob {
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -2796,50 +2796,3 @@ def _premium_user_check():
                "error": f"This feature is only available for LiteLLM Enterprise users. {CommonProxyErrors.not_premium_user.value}"
            },
        )
-
-
-async def _update_daily_spend_batch(prisma_client, spend_aggregates):
-    """Helper function to update daily spend in batches"""
-    async with prisma_client.db.batch_() as batcher:
-        for (
-            user_id,
-            date,
-            api_key,
-            model,
-            model_group,
-            provider,
-        ), metrics in spend_aggregates.items():
-            if not user_id:  # Skip if no user_id
-                continue
-
-            batcher.litellm_dailyuserspend.upsert(
-                where={
-                    "user_id_date_api_key_model_custom_llm_provider": {
-                        "user_id": user_id,
-                        "date": date,
-                        "api_key": api_key,
-                        "model": model,
-                        "custom_llm_provider": provider,
-                    }
-                },
-                data={
-                    "create": {
-                        "user_id": user_id,
-                        "date": date,
-                        "api_key": api_key,
-                        "model": model,
-                        "model_group": model_group,
-                        "custom_llm_provider": provider,
-                        "prompt_tokens": metrics["prompt_tokens"],
-                        "completion_tokens": metrics["completion_tokens"],
-                        "spend": metrics["spend"],
-                    },
-                    "update": {
-                        "prompt_tokens": {"increment": metrics["prompt_tokens"]},
-                        "completion_tokens": {
-                            "increment": metrics["completion_tokens"]
-                        },
-                        "spend": {"increment": metrics["spend"]},
-                    },
-                },
-            )
--- a/litellm/types/proxy/management_endpoints/common_daily_activity.py
+++ b/litellm/types/proxy/management_endpoints/common_daily_activity.py
@ -0,0 +1,113 @@
+from datetime import date
+from enum import Enum
+from typing import Any, Dict, List, Optional, TypedDict
+
+from pydantic import BaseModel, Field
+
+
+class GroupByDimension(str, Enum):
+    DATE = "date"
+    MODEL = "model"
+    API_KEY = "api_key"
+    TEAM = "team"
+    ORGANIZATION = "organization"
+    MODEL_GROUP = "model_group"
+    PROVIDER = "custom_llm_provider"
+
+
+class SpendMetrics(BaseModel):
+    spend: float = Field(default=0.0)
+    prompt_tokens: int = Field(default=0)
+    completion_tokens: int = Field(default=0)
+    cache_read_input_tokens: int = Field(default=0)
+    cache_creation_input_tokens: int = Field(default=0)
+    total_tokens: int = Field(default=0)
+    successful_requests: int = Field(default=0)
+    failed_requests: int = Field(default=0)
+    api_requests: int = Field(default=0)
+
+
+class MetricBase(BaseModel):
+    metrics: SpendMetrics
+
+
+class MetricWithMetadata(MetricBase):
+    metadata: Dict[str, Any] = Field(default_factory=dict)
+
+
+class KeyMetadata(BaseModel):
+    """Metadata for a key"""
+
+    key_alias: Optional[str] = None
+
+
+class KeyMetricWithMetadata(MetricBase):
+    """Base class for metrics with additional metadata"""
+
+    metadata: KeyMetadata = Field(default_factory=KeyMetadata)
+
+
+class BreakdownMetrics(BaseModel):
+    """Breakdown of spend by different dimensions"""
+
+    models: Dict[str, MetricWithMetadata] = Field(
+        default_factory=dict
+    )  # model -> {metrics, metadata}
+    providers: Dict[str, MetricWithMetadata] = Field(
+        default_factory=dict
+    )  # provider -> {metrics, metadata}
+    api_keys: Dict[str, KeyMetricWithMetadata] = Field(
+        default_factory=dict
+    )  # api_key -> {metrics, metadata}
+    entities: Dict[str, MetricWithMetadata] = Field(
+        default_factory=dict
+    )  # entity -> {metrics, metadata}
+
+
+class DailySpendData(BaseModel):
+    date: date
+    metrics: SpendMetrics
+    breakdown: BreakdownMetrics = Field(default_factory=BreakdownMetrics)
+
+
+class DailySpendMetadata(BaseModel):
+    total_spend: float = Field(default=0.0)
+    total_prompt_tokens: int = Field(default=0)
+    total_completion_tokens: int = Field(default=0)
+    total_tokens: int = Field(default=0)
+    total_api_requests: int = Field(default=0)
+    total_successful_requests: int = Field(default=0)
+    total_failed_requests: int = Field(default=0)
+    total_cache_read_input_tokens: int = Field(default=0)
+    total_cache_creation_input_tokens: int = Field(default=0)
+    page: int = Field(default=1)
+    total_pages: int = Field(default=1)
+    has_more: bool = Field(default=False)
+
+
+class SpendAnalyticsPaginatedResponse(BaseModel):
+    results: List[DailySpendData]
+    metadata: DailySpendMetadata = Field(default_factory=DailySpendMetadata)
+
+
+class LiteLLM_DailyUserSpend(BaseModel):
+    id: str
+    user_id: str
+    date: str
+    api_key: str
+    model: str
+    model_group: Optional[str] = None
+    custom_llm_provider: Optional[str] = None
+    prompt_tokens: int = 0
+    completion_tokens: int = 0
+    cache_read_input_tokens: int = 0
+    cache_creation_input_tokens: int = 0
+    spend: float = 0.0
+    api_requests: int = 0
+    successful_requests: int = 0
+    failed_requests: int = 0
+
+
+class GroupedData(TypedDict):
+    metrics: SpendMetrics
+    breakdown: BreakdownMetrics
--- a/litellm/types/services.py
+++ b/litellm/types/services.py
@ -34,6 +34,7 @@ class ServiceTypes(str, enum.Enum):
    IN_MEMORY_DAILY_SPEND_UPDATE_QUEUE = "in_memory_daily_spend_update_queue"
    REDIS_DAILY_SPEND_UPDATE_QUEUE = "redis_daily_spend_update_queue"
    REDIS_DAILY_TEAM_SPEND_UPDATE_QUEUE = "redis_daily_team_spend_update_queue"
+    REDIS_DAILY_TAG_SPEND_UPDATE_QUEUE = "redis_daily_tag_spend_update_queue"
    # spend update queue - current spend of key, user, team
    IN_MEMORY_SPEND_UPDATE_QUEUE = "in_memory_spend_update_queue"
    REDIS_SPEND_UPDATE_QUEUE = "redis_spend_update_queue"
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@ -1011,7 +1011,9 @@ class ModelResponseStream(ModelResponseBase):

    def __init__(
        self,
-        choices: Optional[List[Union[StreamingChoices, dict, BaseModel]]] = None,
+        choices: Optional[
+            Union[List[StreamingChoices], Union[StreamingChoices, dict, BaseModel]]
+        ] = None,
        id: Optional[str] = None,
        created: Optional[int] = None,
        provider_specific_fields: Optional[Dict[str, Any]] = None,
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -600,6 +600,40 @@
        "supports_vision": true,
        "supports_prompt_caching": true
    },
+    "o3": {
+        "max_tokens": 100000,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 100000,
+        "input_cost_per_token": 1e-5,
+        "output_cost_per_token": 4e-5,
+        "cache_read_input_token_cost": 2.5e-6,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": false,
+        "supports_vision": true,
+        "supports_prompt_caching": true,
+        "supports_response_schema": true,
+        "supports_reasoning": true,
+        "supports_tool_choice": true
+    },
+    "o3-2025-04-16": {
+        "max_tokens": 100000,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 100000,
+        "input_cost_per_token": 1e-5,
+        "output_cost_per_token": 4e-5,
+        "cache_read_input_token_cost": 2.5e-6,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": false,
+        "supports_vision": true,
+        "supports_prompt_caching": true,
+        "supports_response_schema": true,
+        "supports_reasoning": true,
+        "supports_tool_choice": true
+    },
    "o3-mini": {
        "max_tokens": 100000,
        "max_input_tokens": 200000,
@ -634,6 +668,40 @@
        "supports_reasoning": true,
        "supports_tool_choice": true
    },
+    "o4-mini": {
+        "max_tokens": 100000,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 100000,
+        "input_cost_per_token": 1.1e-6,
+        "output_cost_per_token": 4.4e-6,
+        "cache_read_input_token_cost": 2.75e-7,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": false,
+        "supports_vision": true,
+        "supports_prompt_caching": true,
+        "supports_response_schema": true,
+        "supports_reasoning": true,
+        "supports_tool_choice": true
+    },
+    "o4-mini-2025-04-16": {
+        "max_tokens": 100000,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 100000,
+        "input_cost_per_token": 1.1e-6,
+        "output_cost_per_token": 4.4e-6,
+        "cache_read_input_token_cost": 2.75e-7,
+        "litellm_provider": "openai",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": false,
+        "supports_vision": true,
+        "supports_prompt_caching": true,
+        "supports_response_schema": true,
+        "supports_reasoning": true,
+        "supports_tool_choice": true
+    },
    "o1-mini-2024-09-12": {
        "max_tokens": 65536,
        "max_input_tokens": 128000,
--- a/requirements.txt
+++ b/requirements.txt
@ -29,7 +29,7 @@ python-multipart==0.0.18 # admin UI
 Pillow==11.0.0
 azure-ai-contentsafety==1.0.0 # for azure content safety
 azure-identity==1.16.1 # for azure content safety
-azure-storage-file-datalake==12.15.0 # for azure buck storage logging
+azure-storage-file-datalake==12.20.0 # for azure buck storage logging
 opentelemetry-api==1.25.0
 opentelemetry-sdk==1.25.0
 opentelemetry-exporter-otlp==1.25.0
@ -51,4 +51,4 @@ tenacity==8.2.3  # for retrying requests, when litellm.num_retries set
 pydantic==2.10.2 # proxy + openai req.
 jsonschema==4.22.0 # validating json schema
 websockets==13.1.0 # for realtime API
-####
+####
--- a/schema.prisma
+++ b/schema.prisma
@ -353,6 +353,8 @@ model LiteLLM_DailyTeamSpend {
  custom_llm_provider String?  
  prompt_tokens       Int      @default(0)
  completion_tokens   Int      @default(0)
+  cache_read_input_tokens     Int      @default(0)
+  cache_creation_input_tokens Int      @default(0)
  spend               Float    @default(0.0)
  api_requests        Int      @default(0)
  successful_requests Int      @default(0)
@ -367,6 +369,33 @@ model LiteLLM_DailyTeamSpend {
  @@index([model])
 }

+// Track daily team spend metrics per model and key
+model LiteLLM_DailyTagSpend {
+  id                  String   @id @default(uuid())
+  tag                 String   
+  date                String
+  api_key             String   
+  model               String   
+  model_group         String?  
+  custom_llm_provider String?  
+  prompt_tokens       Int      @default(0)
+  completion_tokens   Int      @default(0)
+  cache_read_input_tokens     Int      @default(0)
+  cache_creation_input_tokens Int      @default(0)
+  spend               Float    @default(0.0)
+  api_requests        Int      @default(0)
+  successful_requests Int      @default(0)
+  failed_requests     Int      @default(0)
+  created_at          DateTime @default(now())
+  updated_at          DateTime @updatedAt
+
+  @@unique([tag, date, api_key, model, custom_llm_provider])
+  @@index([date])
+  @@index([tag])
+  @@index([api_key])
+  @@index([model])
+}
+

 // Track the status of cron jobs running. Only allow one pod to run the job at a time
 model LiteLLM_CronJob {
--- a/tests/litellm/litellm_core_utils/test_streaming_chunk_builder_utils.py
+++ b/tests/litellm/litellm_core_utils/test_streaming_chunk_builder_utils.py
@ -135,7 +135,6 @@ def test_get_combined_tool_content():
                name="get_current_weather",
            ),
            type="function",
-            index=0,
        ),
        ChatCompletionMessageToolCall(
            id="call_rrns",
@ -144,7 +143,6 @@ def test_get_combined_tool_content():
                name="get_current_weather",
            ),
            type="function",
-            index=1,
        ),
        ChatCompletionMessageToolCall(
            id="call_0k29",
@ -153,6 +151,5 @@ def test_get_combined_tool_content():
                name="get_current_weather",
            ),
            type="function",
-            index=2,
        ),
    ]
--- a/ui/litellm-dashboard/src/components/entity_usage.tsx
+++ b/ui/litellm-dashboard/src/components/entity_usage.tsx
@ -0,0 +1,503 @@
+import React, { useState, useEffect } from "react";
+import { 
+  BarChart, Card, Title, Text, 
+  Grid, Col, DateRangePicker, DateRangePickerValue,
+  Table, TableHead, TableRow, TableHeaderCell, TableBody, TableCell,
+  DonutChart,
+  TabPanel, TabGroup, TabList, Tab, TabPanels
+} from "@tremor/react";
+import { Select } from 'antd';
+import { ActivityMetrics, processActivityData } from './activity_metrics';
+import { SpendMetrics, DailyData } from './usage/types';
+import { tagDailyActivityCall, teamDailyActivityCall } from './networking';
+
+interface EntityMetrics {
+  metrics: {
+    spend: number;
+    prompt_tokens: number;
+    completion_tokens: number;
+    cache_read_input_tokens: number;
+    cache_creation_input_tokens: number;
+    total_tokens: number;
+    successful_requests: number;
+    failed_requests: number;
+    api_requests: number;
+  };
+  metadata: Record<string, any>;
+}
+
+interface BreakdownMetrics {
+  models: Record<string, any>;
+  providers: Record<string, any>;
+  api_keys: Record<string, any>;
+  entities: Record<string, EntityMetrics>;
+}
+
+interface ExtendedDailyData extends DailyData {
+  breakdown: BreakdownMetrics;
+}
+
+interface EntitySpendData {
+  results: ExtendedDailyData[];
+  metadata: {
+    total_spend: number;
+    total_api_requests: number;
+    total_successful_requests: number;
+    total_failed_requests: number;
+    total_tokens: number;
+  };
+}
+
+interface EntityUsageProps {
+  accessToken: string | null;
+  entityType: 'tag' | 'team';
+  entityId?: string | null;
+}
+
+const EntityUsage: React.FC<EntityUsageProps> = ({
+  accessToken,
+  entityType,
+  entityId
+}) => {
+  const [spendData, setSpendData] = useState<EntitySpendData>({ 
+    results: [], 
+    metadata: {
+      total_spend: 0,
+      total_api_requests: 0,
+      total_successful_requests: 0,
+      total_failed_requests: 0,
+      total_tokens: 0
+    }
+  });
+
+  const modelMetrics = processActivityData(spendData);
+
+  const [selectedTags, setSelectedTags] = useState<string[]>([]);
+  const [dateValue, setDateValue] = useState<DateRangePickerValue>({
+    from: new Date(Date.now() - 28 * 24 * 60 * 60 * 1000),
+    to: new Date(),
+  });
+
+  const fetchSpendData = async () => {
+    if (!accessToken || !dateValue.from || !dateValue.to) return;
+    const startTime = dateValue.from;
+    const endTime = dateValue.to;
+    
+    if (entityType === 'tag') {
+      const data = await tagDailyActivityCall(
+        accessToken, 
+        startTime, 
+        endTime, 
+        1, 
+        selectedTags.length > 0 ? selectedTags : null
+      );
+      setSpendData(data);
+    } else if (entityType === 'team') {
+      const data = await teamDailyActivityCall(
+        accessToken, 
+        startTime, 
+        endTime, 
+        1, 
+        selectedTags.length > 0 ? selectedTags : null
+      );
+      setSpendData(data);
+    } else {
+      throw new Error("Invalid entity type");
+    }
+  };
+
+  useEffect(() => {
+    fetchSpendData();
+  }, [accessToken, dateValue, entityId, selectedTags]);
+
+  const getTopModels = () => {
+    const modelSpend: { [key: string]: any } = {};
+    spendData.results.forEach(day => {
+      Object.entries(day.breakdown.models || {}).forEach(([model, metrics]) => {
+        if (!modelSpend[model]) {
+          modelSpend[model] = {
+            spend: 0,
+            requests: 0,
+            successful_requests: 0,
+            failed_requests: 0,
+            tokens: 0
+          };
+        }
+        try {
+          modelSpend[model].spend += metrics.metrics.spend;
+        } catch (e) {
+          console.log(`Error adding spend for ${model}: ${e}, got metrics: ${JSON.stringify(metrics)}`);
+        }
+        modelSpend[model].requests += metrics.metrics.api_requests;
+        modelSpend[model].successful_requests += metrics.metrics.successful_requests;
+        modelSpend[model].failed_requests += metrics.metrics.failed_requests;
+        modelSpend[model].tokens += metrics.metrics.total_tokens;
+      });
+    });
+    
+    return Object.entries(modelSpend)
+      .map(([model, metrics]) => ({
+        key: model,
+        ...metrics
+      }))
+      .sort((a, b) => b.spend - a.spend)
+      .slice(0, 5);
+  };
+
+  const getTopApiKeys = () => {
+    const apiKeySpend: { [key: string]: any } = {};
+    spendData.results.forEach(day => {
+      Object.entries(day.breakdown.api_keys || {}).forEach(([key, metrics]) => {
+        if (!apiKeySpend[key]) {
+          apiKeySpend[key] = {
+            key: key,
+            spend: 0,
+            requests: 0,
+            successful_requests: 0,
+            failed_requests: 0,
+            tokens: 0
+          };
+        }
+        apiKeySpend[key].spend += metrics.metrics.spend;
+        apiKeySpend[key].requests += metrics.metrics.api_requests;
+        apiKeySpend[key].successful_requests += metrics.metrics.successful_requests;
+        apiKeySpend[key].failed_requests += metrics.metrics.failed_requests;
+        apiKeySpend[key].tokens += metrics.metrics.total_tokens;
+      });
+    });
+    
+    return Object.values(apiKeySpend)
+      .sort((a, b) => b.spend - a.spend)
+      .slice(0, 5);
+  };
+
+  const getProviderSpend = () => {
+    const providerSpend: { [key: string]: any } = {};
+    spendData.results.forEach(day => {
+      Object.entries(day.breakdown.providers || {}).forEach(([provider, metrics]) => {
+        if (!providerSpend[provider]) {
+          providerSpend[provider] = {
+            provider,
+            spend: 0,
+            requests: 0,
+            successful_requests: 0,
+            failed_requests: 0,
+            tokens: 0
+          };
+        }
+        try {
+          providerSpend[provider].spend += metrics.metrics.spend;
+          providerSpend[provider].requests += metrics.metrics.api_requests;
+          providerSpend[provider].successful_requests += metrics.metrics.successful_requests;
+          providerSpend[provider].failed_requests += metrics.metrics.failed_requests;
+          providerSpend[provider].tokens += metrics.metrics.total_tokens;
+        } catch (e) {
+          console.log(`Error processing provider ${provider}: ${e}`);
+        }
+      });
+    });
+    
+    return Object.values(providerSpend)
+      .filter(provider => provider.spend > 0)
+      .sort((a, b) => b.spend - a.spend);
+  };
+
+  const getAllTags = () => {
+    const tags = new Set<string>();
+    spendData.results.forEach(day => {
+      Object.keys(day.breakdown.entities || {}).forEach(tag => {
+        tags.add(tag);
+      });
+    });
+    return Array.from(tags).map(tag => ({
+      label: tag,
+      value: tag
+    }));
+  };
+
+  const filterDataByTags = (data: any[]) => {
+    if (selectedTags.length === 0) return data;
+    return data.filter(item => selectedTags.includes(item.entity));
+  };
+
+  const getEntityBreakdown = () => {
+    const entitySpend: { [key: string]: any } = {};
+    spendData.results.forEach(day => {
+      Object.entries(day.breakdown.entities || {}).forEach(([entity, data]) => {
+        if (!entitySpend[entity]) {
+          entitySpend[entity] = {
+            entity,
+            spend: 0,
+            requests: 0,
+            successful_requests: 0,
+            failed_requests: 0,
+            tokens: 0
+          };
+        }
+        entitySpend[entity].spend += data.metrics.spend;
+        entitySpend[entity].requests += data.metrics.api_requests;
+        entitySpend[entity].successful_requests += data.metrics.successful_requests;
+        entitySpend[entity].failed_requests += data.metrics.failed_requests;
+        entitySpend[entity].tokens += data.metrics.total_tokens;
+      });
+    });
+    
+    const result = Object.values(entitySpend)
+      .sort((a, b) => b.spend - a.spend);
+    
+    return filterDataByTags(result);
+  };
+
+  
+
+  return (
+    <div style={{ width: "100%" }}>
+      <Grid numItems={2} className="gap-2 w-full mb-4">
+          <Col>
+            <Text>Select Time Range</Text>
+            <DateRangePicker
+              enableSelect={true}
+              value={dateValue}
+              onValueChange={setDateValue}
+            />
+          </Col>
+          <Col>
+            <Text>Filter by {entityType === 'tag' ? 'Tags' : 'Teams'}</Text>
+            <Select
+              mode="multiple"
+              style={{ width: '100%' }}
+              placeholder={`Select ${entityType === 'tag' ? 'tags' : 'teams'} to filter...`}
+              value={selectedTags}
+              onChange={setSelectedTags}
+              options={getAllTags()}
+              className="mt-2"
+              allowClear
+            />
+          </Col>
+        </Grid>
+      <TabGroup>
+        <TabList variant="solid" className="mt-1">
+          <Tab>Cost</Tab>
+          <Tab>Activity</Tab>
+        </TabList>
+        <TabPanels>
+          <TabPanel>
+            <Grid numItems={2} className="gap-2 w-full">
+              {/* Total Spend Card */}
+              <Col numColSpan={2}>
+                <Card>
+                  <Title>{entityType === 'tag' ? 'Tag' : 'Team'} Spend Overview</Title>
+                  <Grid numItems={5} className="gap-4 mt-4">
+                    <Card>
+                      <Title>Total Spend</Title>
+                      <Text className="text-2xl font-bold mt-2">
+                        ${spendData.metadata.total_spend.toFixed(2)}
+                      </Text>
+                    </Card>
+                    <Card>
+                      <Title>Total Requests</Title>
+                      <Text className="text-2xl font-bold mt-2">
+                        {spendData.metadata.total_api_requests.toLocaleString()}
+                      </Text>
+                    </Card>
+                    <Card>
+                      <Title>Successful Requests</Title>
+                      <Text className="text-2xl font-bold mt-2 text-green-600">
+                        {spendData.metadata.total_successful_requests.toLocaleString()}
+                      </Text>
+                    </Card>
+                    <Card>
+                      <Title>Failed Requests</Title>
+                      <Text className="text-2xl font-bold mt-2 text-red-600">
+                        {spendData.metadata.total_failed_requests.toLocaleString()}
+                      </Text>
+                    </Card>
+                    <Card>
+                      <Title>Total Tokens</Title>
+                      <Text className="text-2xl font-bold mt-2">
+                        {spendData.metadata.total_tokens.toLocaleString()}
+                      </Text>
+                    </Card>
+                  </Grid>
+                </Card>
+              </Col>
+
+              {/* Daily Spend Chart */}
+              <Col numColSpan={2}>
+                <Card>
+                  <Title>Daily Spend</Title>
+                  <BarChart
+                    data={[...spendData.results].sort((a, b) => 
+                      new Date(a.date).getTime() - new Date(b.date).getTime()
+                    )}
+                    index="date"
+                    categories={["metrics.spend"]}
+                    colors={["cyan"]}
+                    valueFormatter={(value) => `$${value.toFixed(2)}`}
+                    yAxisWidth={100}
+                    showLegend={false}
+                  />
+                </Card>
+              </Col>
+
+              {/* Entity Breakdown Section */}
+              <Col numColSpan={2}>
+                <Card>
+                  <div className="flex flex-col space-y-4">
+                    <div className="flex flex-col space-y-2">
+                      <Title>Spend Per {entityType === 'tag' ? 'Tag' : 'Team'}</Title>
+                      <div className="flex items-center text-sm text-gray-500">
+                        <span>Get Started Tracking cost per {entityType} </span>
+                        <a href="https://docs.litellm.ai/docs/proxy/tags" className="text-blue-500 hover:text-blue-700 ml-1">
+                          here
+                        </a>
+                      </div>
+                    </div>
+                    <Grid numItems={2}>
+                      <Col numColSpan={1}>
+                        <BarChart
+                          className="mt-4 h-52"
+                          data={getEntityBreakdown()}
+                          index="entity"
+                          categories={["spend"]}
+                          colors={["cyan"]}
+                          valueFormatter={(value) => `$${value.toFixed(4)}`}
+                          layout="vertical"
+                          showLegend={false}
+                          yAxisWidth={100}
+                        />
+                      </Col>
+                      <Col numColSpan={1}>
+                        <Table>
+                          <TableHead>
+                            <TableRow>
+                              <TableHeaderCell>{entityType === 'tag' ? 'Tag' : 'Team'}</TableHeaderCell>
+                              <TableHeaderCell>Spend</TableHeaderCell>
+                              <TableHeaderCell className="text-green-600">Successful</TableHeaderCell>
+                              <TableHeaderCell className="text-red-600">Failed</TableHeaderCell>
+                              <TableHeaderCell>Tokens</TableHeaderCell>
+                            </TableRow>
+                          </TableHead>
+                          <TableBody>
+                            {getEntityBreakdown()
+                              .filter(entity => entity.spend > 0)
+                              .map((entity) => (
+                                <TableRow key={entity.entity}>
+                                  <TableCell>{entity.entity}</TableCell>
+                                  <TableCell>${entity.spend.toFixed(4)}</TableCell>
+                                  <TableCell className="text-green-600">
+                                    {entity.successful_requests.toLocaleString()}
+                                  </TableCell>
+                                  <TableCell className="text-red-600">
+                                    {entity.failed_requests.toLocaleString()}
+                                  </TableCell>
+                                  <TableCell>{entity.tokens.toLocaleString()}</TableCell>
+                                </TableRow>
+                              ))}
+                          </TableBody>
+                        </Table>
+                      </Col>
+                    </Grid>
+                  </div>
+                </Card>
+              </Col>
+
+
+              {/* Top API Keys */}
+              <Col numColSpan={1}>
+                <Card>
+                  <Title>Top API Keys</Title>
+                  <BarChart
+                    className="mt-4 h-40"
+                    data={getTopApiKeys()}
+                    index="key"
+                    categories={["spend"]}
+                    colors={["cyan"]}
+                    valueFormatter={(value) => `$${value.toFixed(2)}`}
+                    layout="vertical"
+                    yAxisWidth={200}
+                    showLegend={false}
+                  />
+                </Card>
+              </Col>
+
+              {/* Top Models */}
+              <Col numColSpan={1}>
+                <Card>
+                  <Title>Top Models</Title>
+                  <BarChart
+                    className="mt-4 h-40"
+                    data={getTopModels()}
+                    index="key"
+                    categories={["spend"]}
+                    colors={["cyan"]}
+                    valueFormatter={(value) => `$${value.toFixed(2)}`}
+                    layout="vertical"
+                    yAxisWidth={200}
+                    showLegend={false}
+                  />
+                </Card>
+              </Col>
+
+              
+
+              {/* Spend by Provider */}
+              <Col numColSpan={2}>
+                <Card>
+                  <div className="flex flex-col space-y-4">
+                    <Title>Provider Usage</Title>
+                    <Grid numItems={2}>
+                      <Col numColSpan={1}>
+                        <DonutChart
+                          className="mt-4 h-40"
+                          data={getProviderSpend()}
+                          index="provider"
+                          category="spend"
+                          valueFormatter={(value) => `$${value.toFixed(2)}`}
+                          colors={["cyan", "blue", "indigo", "violet", "purple"]}
+                        />
+                      </Col>
+                      <Col numColSpan={1}>
+                        <Table>
+                          <TableHead>
+                            <TableRow>
+                              <TableHeaderCell>Provider</TableHeaderCell>
+                              <TableHeaderCell>Spend</TableHeaderCell>
+                              <TableHeaderCell className="text-green-600">Successful</TableHeaderCell>
+                              <TableHeaderCell className="text-red-600">Failed</TableHeaderCell>
+                              <TableHeaderCell>Tokens</TableHeaderCell>
+                            </TableRow>
+                          </TableHead>
+                          <TableBody>
+                            {getProviderSpend().map((provider) => (
+                              <TableRow key={provider.provider}>
+                                <TableCell>{provider.provider}</TableCell>
+                                <TableCell>${provider.spend.toFixed(2)}</TableCell>
+                                <TableCell className="text-green-600">
+                                  {provider.successful_requests.toLocaleString()}
+                                </TableCell>
+                                <TableCell className="text-red-600">
+                                  {provider.failed_requests.toLocaleString()}
+                                </TableCell>
+                                <TableCell>{provider.tokens.toLocaleString()}</TableCell>
+                              </TableRow>
+                            ))}
+                          </TableBody>
+                        </Table>
+                      </Col>
+                    </Grid>
+                  </div>
+                </Card>
+              </Col>
+            </Grid>
+          </TabPanel>
+          <TabPanel>
+          <ActivityMetrics modelMetrics={modelMetrics} />
+          </TabPanel>
+        </TabPanels>
+      </TabGroup>
+    </div>
+  );
+};
+
+export default EntityUsage; 
--- a/ui/litellm-dashboard/src/components/networking.tsx
+++ b/ui/litellm-dashboard/src/components/networking.tsx
@ -1144,6 +1144,89 @@ export const userDailyActivityCall = async (accessToken: String, startTime: Date
    throw error;
  }
 };
+
+export const tagDailyActivityCall = async (accessToken: String, startTime: Date, endTime: Date, page: number = 1, tags: string[] | null = null) => {
+  /**
+   * Get daily user activity on proxy
+   */
+  try {
+    let url = proxyBaseUrl ? `${proxyBaseUrl}/tag/daily/activity` : `/tag/daily/activity`;
+    const queryParams = new URLSearchParams();
+    queryParams.append('start_date', startTime.toISOString());
+    queryParams.append('end_date', endTime.toISOString());
+    queryParams.append('page_size', '1000');
+    queryParams.append('page', page.toString());
+    if (tags) {
+      queryParams.append('tags', tags.join(','));
+    }
+    const queryString = queryParams.toString();
+    if (queryString) {
+      url += `?${queryString}`;
+    }
+
+    const response = await fetch(url, {
+      method: "GET",
+      headers: {
+        [globalLitellmHeaderName]: `Bearer ${accessToken}`,
+        "Content-Type": "application/json",
+      },
+    });
+
+    if (!response.ok) {
+      const errorData = await response.text();
+      handleError(errorData);
+      throw new Error("Network response was not ok");
+    }
+
+    const data = await response.json();
+    return data;
+  } catch (error) {
+    console.error("Failed to create key:", error);
+    throw error;
+  }
+};
+
+export const teamDailyActivityCall = async (accessToken: String, startTime: Date, endTime: Date, page: number = 1, teamIds: string[] | null = null) => {
+  /**
+   * Get daily user activity on proxy
+   */
+  try {
+    let url = proxyBaseUrl ? `${proxyBaseUrl}/team/daily/activity` : `/team/daily/activity`;
+    const queryParams = new URLSearchParams();
+    queryParams.append('start_date', startTime.toISOString());
+    queryParams.append('end_date', endTime.toISOString());
+    queryParams.append('page_size', '1000');
+    queryParams.append('page', page.toString());
+    if (teamIds) {
+      queryParams.append('team_ids', teamIds.join(','));
+    }
+    const queryString = queryParams.toString();
+    if (queryString) {
+      url += `?${queryString}`;
+    }
+
+    const response = await fetch(url, {
+      method: "GET",
+      headers: {
+        [globalLitellmHeaderName]: `Bearer ${accessToken}`,
+        "Content-Type": "application/json",
+      },
+    });
+
+    if (!response.ok) {
+      const errorData = await response.text();
+      handleError(errorData);
+      throw new Error("Network response was not ok");
+    }
+
+    const data = await response.json();
+    return data;
+  } catch (error) {
+    console.error("Failed to create key:", error);
+    throw error;
+  }
+};
+
 export const getTotalSpendCall = async (accessToken: String) => {
  /**
   * Get all models on proxy
--- a/ui/litellm-dashboard/src/components/new_usage.tsx
+++ b/ui/litellm-dashboard/src/components/new_usage.tsx
@ -22,6 +22,7 @@ import ViewUserSpend from "./view_user_spend";
 import TopKeyView from "./top_key_view";
 import { ActivityMetrics, processActivityData } from './activity_metrics';
 import { SpendMetrics, DailyData, ModelActivityData, MetricWithMetadata, KeyMetricWithMetadata } from './usage/types';
+import EntityUsage from './entity_usage';

 interface NewUsagePageProps {
  accessToken: string | null;
@ -230,228 +231,255 @@ const NewUsagePage: React.FC<NewUsagePageProps> = ({

  return (
    <div style={{ width: "100%" }} className="p-8">
-      <Text>Experimental Usage page, using new `/user/daily/activity` endpoint.</Text>
-      <Grid numItems={2} className="gap-2 w-full mb-4">
-        <Col>
-          <Text>Select Time Range</Text>
-          <DateRangePicker
-            enableSelect={true}
-            value={dateValue}
-            onValueChange={(value) => {
-              setDateValue(value);
-            }}
-          />
-        </Col>
-      </Grid>
+      <Text>Usage Analytics Dashboard</Text>
      <TabGroup>
        <TabList variant="solid" className="mt-1">
-          <Tab>Cost</Tab>
-          <Tab>Activity</Tab>
+          <Tab>Your Usage</Tab>
+          <Tab>Tag Usage</Tab>
+          <Tab>Team Usage</Tab>
        </TabList>
        <TabPanels>
-          {/* Cost Panel */}
+          {/* Your Usage Panel */}
          <TabPanel>
-            <Grid numItems={2} className="gap-2 w-full">
-              {/* Total Spend Card */}
-              <Col numColSpan={2}>
-                <Text className="text-tremor-default text-tremor-content dark:text-dark-tremor-content mb-2 mt-2 text-lg">
-                  Project Spend {new Date().toLocaleString('default', { month: 'long' })} 1 - {new Date(new Date().getFullYear(), new Date().getMonth() + 1, 0).getDate()}
-                </Text>
-                
-                <ViewUserSpend
-                  userID={userID}
-                  userRole={userRole}
-                  accessToken={accessToken}
-                  userSpend={totalSpend}
-                  selectedTeam={null}
-                  userMaxBudget={null}
+            <Grid numItems={2} className="gap-2 w-full mb-4">
+              <Col>
+                <Text>Select Time Range</Text>
+                <DateRangePicker
+                  enableSelect={true}
+                  value={dateValue}
+                  onValueChange={(value) => {
+                    setDateValue(value);
+                  }}
                />
              </Col>
-
-              <Col numColSpan={2}>
-                <Card>
-                  <Title>Usage Metrics</Title>
-                  <Grid numItems={5} className="gap-4 mt-4">
-                    <Card>
-                      <Title>Total Requests</Title>
-                      <Text className="text-2xl font-bold mt-2">
-                        {userSpendData.metadata?.total_api_requests?.toLocaleString() || 0}
+            </Grid>
+            <TabGroup>
+              <TabList variant="solid" className="mt-1">
+                <Tab>Cost</Tab>
+                <Tab>Activity</Tab>
+              </TabList>
+              <TabPanels>
+                {/* Cost Panel */}
+                <TabPanel>
+                  <Grid numItems={2} className="gap-2 w-full">
+                    {/* Total Spend Card */}
+                    <Col numColSpan={2}>
+                      <Text className="text-tremor-default text-tremor-content dark:text-dark-tremor-content mb-2 mt-2 text-lg">
+                        Project Spend {new Date().toLocaleString('default', { month: 'long' })} 1 - {new Date(new Date().getFullYear(), new Date().getMonth() + 1, 0).getDate()}
                      </Text>
-                    </Card>
-                    <Card>
-                      <Title>Successful Requests</Title>
-                      <Text className="text-2xl font-bold mt-2 text-green-600">
-                        {userSpendData.metadata?.total_successful_requests?.toLocaleString() || 0}
-                      </Text>
-                    </Card>
-                    <Card>
-                      <Title>Failed Requests</Title>
-                      <Text className="text-2xl font-bold mt-2 text-red-600">
-                        {userSpendData.metadata?.total_failed_requests?.toLocaleString() || 0}
-                      </Text>
-                    </Card>
-                    <Card>
-                      <Title>Total Tokens</Title>
-                      <Text className="text-2xl font-bold mt-2">
-                        {userSpendData.metadata?.total_tokens?.toLocaleString() || 0}
-                      </Text>
-                    </Card>
-                    <Card>
-                      <Title>Average Cost per Request</Title>
-                      <Text className="text-2xl font-bold mt-2">
-                        ${((totalSpend || 0) / (userSpendData.metadata?.total_api_requests || 1)).toFixed(4)}
-                      </Text>
-                    </Card>
-                  </Grid>
-                </Card>
-              </Col>
-
-              {/* Daily Spend Chart */}
-              <Col numColSpan={2}>
-                <Card>
-                  <Title>Daily Spend</Title>
-                  <BarChart
-                    data={[...userSpendData.results].sort((a, b) => 
-                      new Date(a.date).getTime() - new Date(b.date).getTime()
-                    )}
-                    index="date"
-                    categories={["metrics.spend"]}
-                    colors={["cyan"]}
-                    valueFormatter={(value) => `$${value.toFixed(2)}`}
-                    yAxisWidth={100}
-                    showLegend={false}
-                    customTooltip={({ payload, active }) => {
-                      if (!active || !payload?.[0]) return null;
-                      const data = payload[0].payload;
-                      return (
-                        <div className="bg-white p-4 shadow-lg rounded-lg border">
-                          <p className="font-bold">{data.date}</p>
-                          <p className="text-cyan-500">Spend: ${data.metrics.spend.toFixed(2)}</p>
-                          <p className="text-gray-600">Requests: {data.metrics.api_requests}</p>
-                          <p className="text-gray-600">Successful: {data.metrics.successful_requests}</p>
-                          <p className="text-gray-600">Failed: {data.metrics.failed_requests}</p>
-                          <p className="text-gray-600">Tokens: {data.metrics.total_tokens}</p>
-                        </div>
-                      );
-                    }}
-                  />
-                </Card>
-              </Col>
-              {/* Top API Keys */}
-              <Col numColSpan={1}>
-                <Card className="h-full">
-                  <Title>Top API Keys</Title>
-                  <TopKeyView
-                    topKeys={getTopKeys()}
-                    accessToken={accessToken}
-                    userID={userID}
-                    userRole={userRole}
-                    teams={null}
-                  />
-                </Card>
-              </Col>
-
-              {/* Top Models */}
-              <Col numColSpan={1}>
-                <Card className="h-full">
-                  <div className="flex justify-between items-center mb-4">
-                    <Title>Top Models</Title>
-                  </div>
-                  <BarChart
-                    className="mt-4 h-40"
-                    data={getTopModels()}
-                    index="key"
-                    categories={["spend"]}
-                    colors={["cyan"]}
-                    valueFormatter={(value) => `$${value.toFixed(2)}`}
-                    layout="vertical"
-                    yAxisWidth={200}
-                    showLegend={false}
-                    customTooltip={({ payload, active }) => {
-                      if (!active || !payload?.[0]) return null;
-                      const data = payload[0].payload;
-                      return (
-                        <div className="bg-white p-4 shadow-lg rounded-lg border">
-                          <p className="font-bold">{data.key}</p>
-                          <p className="text-cyan-500">Spend: ${data.spend.toFixed(2)}</p>
-                          <p className="text-gray-600">Total Requests: {data.requests.toLocaleString()}</p>
-                          <p className="text-green-600">Successful: {data.successful_requests.toLocaleString()}</p>
-                          <p className="text-red-600">Failed: {data.failed_requests.toLocaleString()}</p>
-                          <p className="text-gray-600">Tokens: {data.tokens.toLocaleString()}</p>
-                        </div>
-                      );
-                    }}
-                  />
-                </Card>
-              </Col>
-
-              {/* Spend by Provider */}
-              <Col numColSpan={2}>
-                <Card className="h-full">
-                  <div className="flex justify-between items-center mb-4">
-                    <Title>Spend by Provider</Title>
-                  </div>
-                  <Grid numItems={2}>
-                    <Col numColSpan={1}>
-                      <DonutChart
-                        className="mt-4 h-40"
-                        data={getProviderSpend()}
-                        index="provider"
-                        category="spend"
-                        valueFormatter={(value) => `$${value.toFixed(2)}`}
-                        colors={["cyan"]}
+                      
+                      <ViewUserSpend
+                        userID={userID}
+                        userRole={userRole}
+                        accessToken={accessToken}
+                        userSpend={totalSpend}
+                        selectedTeam={null}
+                        userMaxBudget={null}
                      />
                    </Col>
-                    <Col numColSpan={1}>
-                      <Table>
-                        <TableHead>
-                          <TableRow>
-                            <TableHeaderCell>Provider</TableHeaderCell>
-                            <TableHeaderCell>Spend</TableHeaderCell>
-                            <TableHeaderCell className="text-green-600">Successful</TableHeaderCell>
-                            <TableHeaderCell className="text-red-600">Failed</TableHeaderCell>
-                            <TableHeaderCell>Tokens</TableHeaderCell>
-                          </TableRow>
-                        </TableHead>
-                        <TableBody>
-                          {getProviderSpend()
-                            .filter(provider => provider.spend > 0)
-                            .map((provider) => (
-                              <TableRow key={provider.provider}>
-                                <TableCell>{provider.provider}</TableCell>
-                                <TableCell>
-                                  ${provider.spend < 0.00001
-                                      ? "less than 0.00001" 
-                                      : provider.spend.toFixed(2)}
-                              </TableCell>
-                              <TableCell className="text-green-600">
-                                {provider.successful_requests.toLocaleString()}
-                              </TableCell>
-                              <TableCell className="text-red-600">
-                                {provider.failed_requests.toLocaleString()}
-                              </TableCell>
-                              <TableCell>{provider.tokens.toLocaleString()}</TableCell>
-                            </TableRow>
-                          ))}
-                        </TableBody>
-                      </Table>
-                    </Col>
-                  </Grid>
-                </Card>
-              </Col>

-              {/* Usage Metrics */}
-              
-            </Grid>
+                    <Col numColSpan={2}>
+                      <Card>
+                        <Title>Usage Metrics</Title>
+                        <Grid numItems={5} className="gap-4 mt-4">
+                          <Card>
+                            <Title>Total Requests</Title>
+                            <Text className="text-2xl font-bold mt-2">
+                              {userSpendData.metadata?.total_api_requests?.toLocaleString() || 0}
+                            </Text>
+                          </Card>
+                          <Card>
+                            <Title>Successful Requests</Title>
+                            <Text className="text-2xl font-bold mt-2 text-green-600">
+                              {userSpendData.metadata?.total_successful_requests?.toLocaleString() || 0}
+                            </Text>
+                          </Card>
+                          <Card>
+                            <Title>Failed Requests</Title>
+                            <Text className="text-2xl font-bold mt-2 text-red-600">
+                              {userSpendData.metadata?.total_failed_requests?.toLocaleString() || 0}
+                            </Text>
+                          </Card>
+                          <Card>
+                            <Title>Total Tokens</Title>
+                            <Text className="text-2xl font-bold mt-2">
+                              {userSpendData.metadata?.total_tokens?.toLocaleString() || 0}
+                            </Text>
+                          </Card>
+                          <Card>
+                            <Title>Average Cost per Request</Title>
+                            <Text className="text-2xl font-bold mt-2">
+                              ${((totalSpend || 0) / (userSpendData.metadata?.total_api_requests || 1)).toFixed(4)}
+                            </Text>
+                          </Card>
+                        </Grid>
+                      </Card>
+                    </Col>
+
+                    {/* Daily Spend Chart */}
+                    <Col numColSpan={2}>
+                      <Card>
+                        <Title>Daily Spend</Title>
+                        <BarChart
+                          data={[...userSpendData.results].sort((a, b) => 
+                            new Date(a.date).getTime() - new Date(b.date).getTime()
+                          )}
+                          index="date"
+                          categories={["metrics.spend"]}
+                          colors={["cyan"]}
+                          valueFormatter={(value) => `$${value.toFixed(2)}`}
+                          yAxisWidth={100}
+                          showLegend={false}
+                          customTooltip={({ payload, active }) => {
+                            if (!active || !payload?.[0]) return null;
+                            const data = payload[0].payload;
+                            return (
+                              <div className="bg-white p-4 shadow-lg rounded-lg border">
+                                <p className="font-bold">{data.date}</p>
+                                <p className="text-cyan-500">Spend: ${data.metrics.spend.toFixed(2)}</p>
+                                <p className="text-gray-600">Requests: {data.metrics.api_requests}</p>
+                                <p className="text-gray-600">Successful: {data.metrics.successful_requests}</p>
+                                <p className="text-gray-600">Failed: {data.metrics.failed_requests}</p>
+                                <p className="text-gray-600">Tokens: {data.metrics.total_tokens}</p>
+                              </div>
+                            );
+                          }}
+                        />
+                      </Card>
+                    </Col>
+                    {/* Top API Keys */}
+                    <Col numColSpan={1}>
+                      <Card className="h-full">
+                        <Title>Top API Keys</Title>
+                        <TopKeyView
+                          topKeys={getTopKeys()}
+                          accessToken={accessToken}
+                          userID={userID}
+                          userRole={userRole}
+                          teams={null}
+                        />
+                      </Card>
+                    </Col>
+
+                    {/* Top Models */}
+                    <Col numColSpan={1}>
+                      <Card className="h-full">
+                        <div className="flex justify-between items-center mb-4">
+                          <Title>Top Models</Title>
+                        </div>
+                        <BarChart
+                          className="mt-4 h-40"
+                          data={getTopModels()}
+                          index="key"
+                          categories={["spend"]}
+                          colors={["cyan"]}
+                          valueFormatter={(value) => `$${value.toFixed(2)}`}
+                          layout="vertical"
+                          yAxisWidth={200}
+                          showLegend={false}
+                          customTooltip={({ payload, active }) => {
+                            if (!active || !payload?.[0]) return null;
+                            const data = payload[0].payload;
+                            return (
+                              <div className="bg-white p-4 shadow-lg rounded-lg border">
+                                <p className="font-bold">{data.key}</p>
+                                <p className="text-cyan-500">Spend: ${data.spend.toFixed(2)}</p>
+                                <p className="text-gray-600">Total Requests: {data.requests.toLocaleString()}</p>
+                                <p className="text-green-600">Successful: {data.successful_requests.toLocaleString()}</p>
+                                <p className="text-red-600">Failed: {data.failed_requests.toLocaleString()}</p>
+                                <p className="text-gray-600">Tokens: {data.tokens.toLocaleString()}</p>
+                              </div>
+                            );
+                          }}
+                        />
+                      </Card>
+                    </Col>
+
+                    {/* Spend by Provider */}
+                    <Col numColSpan={2}>
+                      <Card className="h-full">
+                        <div className="flex justify-between items-center mb-4">
+                          <Title>Spend by Provider</Title>
+                        </div>
+                        <Grid numItems={2}>
+                          <Col numColSpan={1}>
+                            <DonutChart
+                              className="mt-4 h-40"
+                              data={getProviderSpend()}
+                              index="provider"
+                              category="spend"
+                              valueFormatter={(value) => `$${value.toFixed(2)}`}
+                              colors={["cyan"]}
+                            />
+                          </Col>
+                          <Col numColSpan={1}>
+                            <Table>
+                              <TableHead>
+                                <TableRow>
+                                  <TableHeaderCell>Provider</TableHeaderCell>
+                                  <TableHeaderCell>Spend</TableHeaderCell>
+                                  <TableHeaderCell className="text-green-600">Successful</TableHeaderCell>
+                                  <TableHeaderCell className="text-red-600">Failed</TableHeaderCell>
+                                  <TableHeaderCell>Tokens</TableHeaderCell>
+                                </TableRow>
+                              </TableHead>
+                              <TableBody>
+                                {getProviderSpend()
+                                  .filter(provider => provider.spend > 0)
+                                  .map((provider) => (
+                                    <TableRow key={provider.provider}>
+                                      <TableCell>{provider.provider}</TableCell>
+                                      <TableCell>
+                                        ${provider.spend < 0.00001
+                                            ? "less than 0.00001" 
+                                            : provider.spend.toFixed(2)}
+                                    </TableCell>
+                                    <TableCell className="text-green-600">
+                                      {provider.successful_requests.toLocaleString()}
+                                    </TableCell>
+                                    <TableCell className="text-red-600">
+                                      {provider.failed_requests.toLocaleString()}
+                                    </TableCell>
+                                    <TableCell>{provider.tokens.toLocaleString()}</TableCell>
+                                  </TableRow>
+                                ))}
+                              </TableBody>
+                            </Table>
+                          </Col>
+                        </Grid>
+                      </Card>
+                    </Col>
+
+                    {/* Usage Metrics */}
+                    
+                  </Grid>
+                </TabPanel>
+
+                {/* Activity Panel */}
+                <TabPanel>
+                  <ActivityMetrics modelMetrics={modelMetrics} />
+                </TabPanel>
+              </TabPanels>
+            </TabGroup>
          </TabPanel>

-          {/* Activity Panel */}
+          {/* Tag Usage Panel */}
          <TabPanel>
-            <ActivityMetrics modelMetrics={modelMetrics} />
+            <EntityUsage 
+              accessToken={accessToken}
+              entityType="tag"
+            />
+          </TabPanel>
+
+          {/* Team Usage Panel */}
+          <TabPanel>
+            <EntityUsage 
+              accessToken={accessToken}
+              entityType="team"
+            />
          </TabPanel>
        </TabPanels>
      </TabGroup>
-      
    </div>
  );
 };