Merge branch 'main' into litellm_gemini_refactoring

2024-06-17 19:50:56 -07:00 · 2024-06-17 19:50:56 -07:00 · 63216f42b8
commit 63216f42b8
parent fca2ffb480 116fab526d
27 changed files with 335 additions and 182 deletions
--- a/docs/my-website/docs/observability/athina_integration.md
+++ b/docs/my-website/docs/observability/athina_integration.md
@ -2,6 +2,15 @@ import Image from '@theme/IdealImage';
 # Athina
 :::tip
 This is community maintained, Please make an issue if you run into a bug
 https://github.com/BerriAI/litellm
 :::
 [Athina](https://athina.ai/) is an evaluation framework and production monitoring platform for your LLM-powered app. Athina is designed to enhance the performance and reliability of AI applications through real-time monitoring, granular analytics, and plug-and-play evaluations.
 <Image img={require('../../img/athina_dashboard.png')} />
--- a/docs/my-website/docs/observability/greenscale_integration.md
+++ b/docs/my-website/docs/observability/greenscale_integration.md
@ -1,5 +1,14 @@
 # Greenscale - Track LLM Spend and Responsible Usage
 :::tip
 This is community maintained, Please make an issue if you run into a bug
 https://github.com/BerriAI/litellm
 :::
 [Greenscale](https://greenscale.ai/) is a production monitoring platform for your LLM-powered app that provides you granular key insights into your GenAI spending and responsible usage. Greenscale only captures metadata to minimize the exposure risk of personally identifiable information (PII).
 ## Getting Started
--- a/docs/my-website/docs/observability/helicone_integration.md
+++ b/docs/my-website/docs/observability/helicone_integration.md
@ -1,4 +1,13 @@
 # Helicone Tutorial 
 :::tip
 This is community maintained, Please make an issue if you run into a bug
 https://github.com/BerriAI/litellm
 :::
 [Helicone](https://helicone.ai/) is an open source observability platform that proxies your OpenAI traffic and provides you key insights into your spend, latency and usage.
 ## Use Helicone to log requests across all LLM Providers (OpenAI, Azure, Anthropic, Cohere, Replicate, PaLM)
--- a/docs/my-website/docs/observability/langfuse_integration.md
+++ b/docs/my-website/docs/observability/langfuse_integration.md
@ -1,6 +1,6 @@
 import Image from '@theme/IdealImage';
-# Langfuse - Logging LLM Input/Output
+# 🔥 Langfuse - Logging LLM Input/Output
 LangFuse is open Source Observability & Analytics for LLM Apps
 Detailed production traces and a granular view on quality, cost and latency
--- a/docs/my-website/docs/observability/langsmith_integration.md
+++ b/docs/my-website/docs/observability/langsmith_integration.md
@ -1,6 +1,16 @@
 import Image from '@theme/IdealImage';
 # Langsmith - Logging LLM Input/Output
 :::tip
 This is community maintained, Please make an issue if you run into a bug
 https://github.com/BerriAI/litellm
 :::
 An all-in-one developer platform for every step of the application lifecycle
 https://smith.langchain.com/
--- a/docs/my-website/docs/observability/logfire_integration.md
+++ b/docs/my-website/docs/observability/logfire_integration.md
@ -1,6 +1,6 @@
 import Image from '@theme/IdealImage';
-# Logfire - Logging LLM Input/Output
+# 🔥 Logfire - Logging LLM Input/Output
 Logfire is open Source Observability & Analytics for LLM Apps
 Detailed production traces and a granular view on quality, cost and latency
@ -14,10 +14,14 @@ join our [discord](https://discord.gg/wuPM9dRgDw)
 ## Pre-Requisites
-Ensure you have run `pip install logfire` for this integration
+Ensure you have installed the following packages to use this integration
 ```shell
-pip install logfire litellm
+pip install litellm
 pip install opentelemetry-api==1.25.0
 pip install opentelemetry-sdk==1.25.0
 pip install opentelemetry-exporter-otlp==1.25.0
 ```
 ## Quick Start
@ -25,8 +29,7 @@ pip install logfire litellm
 Get your Logfire token from [Logfire](https://logfire.pydantic.dev/)
 ```python
-litellm.success_callback = ["logfire"]
+litellm.callbacks = ["logfire"]
 litellm.failure_callback = ["logfire"] # logs errors to logfire
 ```
 ```python
--- a/docs/my-website/docs/observability/lunary_integration.md
+++ b/docs/my-website/docs/observability/lunary_integration.md
@ -1,5 +1,13 @@
 # Lunary - Logging and tracing LLM input/output
 :::tip
 This is community maintained, Please make an issue if you run into a bug
 https://github.com/BerriAI/litellm
 :::
 [Lunary](https://lunary.ai/) is an open-source AI developer platform providing observability, prompt management, and evaluation tools for AI developers.
 <video controls width='900' >
--- a/docs/my-website/docs/observability/promptlayer_integration.md
+++ b/docs/my-website/docs/observability/promptlayer_integration.md
@ -2,6 +2,15 @@ import Image from '@theme/IdealImage';
 # Promptlayer Tutorial
 :::tip
 This is community maintained, Please make an issue if you run into a bug
 https://github.com/BerriAI/litellm
 :::
 Promptlayer is a platform for prompt engineers. Log OpenAI requests. Search usage history. Track performance. Visually manage prompt templates.
 <Image img={require('../../img/promptlayer.png')} />
--- a/docs/my-website/docs/observability/sentry.md
+++ b/docs/my-website/docs/observability/sentry.md
@ -1,5 +1,14 @@
 import Image from '@theme/IdealImage';
 :::tip
 This is community maintained, Please make an issue if you run into a bug
 https://github.com/BerriAI/litellm
 :::
 # Sentry - Log LLM Exceptions
 [Sentry](https://sentry.io/) provides error monitoring for production. LiteLLM can add breadcrumbs and send exceptions to Sentry with this integration
--- a/docs/my-website/docs/observability/supabase_integration.md
+++ b/docs/my-website/docs/observability/supabase_integration.md
@ -1,4 +1,12 @@
 # Supabase Tutorial 
 :::tip
 This is community maintained, Please make an issue if you run into a bug
 https://github.com/BerriAI/litellm
 :::
 [Supabase](https://supabase.com/) is an open source Firebase alternative.
 Start your project with a Postgres database, Authentication, instant APIs, Edge Functions, Realtime subscriptions, Storage, and Vector embeddings.
--- a/docs/my-website/docs/observability/wandb_integration.md
+++ b/docs/my-website/docs/observability/wandb_integration.md
@ -1,6 +1,16 @@
 import Image from '@theme/IdealImage';
 # Weights & Biases - Logging LLM Input/Output
 :::tip
 This is community maintained, Please make an issue if you run into a bug
 https://github.com/BerriAI/litellm
 :::
 Weights & Biases helps AI developers build better models faster https://wandb.ai
 <Image img={require('../../img/wandb.png')} />
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@ -172,10 +172,8 @@ const sidebars = {
    "proxy/custom_pricing",
    "routing",
    "scheduler",
    "rules",
    "set_keys",
    "budget_manager",
    "contributing",
    "secret",
    "completion/token_usage",
    "load_test",
@ -183,11 +181,11 @@ const sidebars = {
      type: "category",
      label: "Logging & Observability",
      items: [
        "observability/langfuse_integration",
        "observability/logfire_integration",
        "debugging/local_debugging",
        "observability/raw_request_response",
        "observability/callbacks",
        "observability/custom_callback",
        "observability/langfuse_integration",
        "observability/sentry",
        "observability/lago",
        "observability/openmeter",
@ -233,6 +231,8 @@ const sidebars = {
      label: "Extras",
      items: [
        "extras/contributing",
        "contributing",
        "rules",
        "proxy_server",
        {
          type: "category",
--- a/litellm/init.py
+++ b/litellm/init.py
@ -37,7 +37,7 @@ input_callback: List[Union[str, Callable]] = []
 success_callback: List[Union[str, Callable]] = []
 failure_callback: List[Union[str, Callable]] = []
 service_callback: List[Union[str, Callable]] = []
-_custom_logger_compatible_callbacks_literal = Literal["lago", "openmeter"]
+_custom_logger_compatible_callbacks_literal = Literal["lago", "openmeter", "logfire"]
 callbacks: List[Union[Callable, _custom_logger_compatible_callbacks_literal]] = []
 _langfuse_default_tags: Optional[
    List[
--- a/litellm/_logging.py
+++ b/litellm/_logging.py
@ -23,8 +23,12 @@ class JsonFormatter(Formatter):
        super(JsonFormatter, self).__init__()
    def format(self, record):
-        json_record = {}
+        json_record = {
-        json_record["message"] = record.getMessage()
+            "message": record.getMessage(),
            "level": record.levelname,
            "timestamp": self.formatTime(record, self.datefmt),
        }
        return json.dumps(json_record)
--- a/litellm/integrations/opentelemetry.py
+++ b/litellm/integrations/opentelemetry.py
@ -1,20 +1,21 @@
 import os
 from dataclasses import dataclass
 from datetime import datetime
 import litellm
 from litellm.integrations.custom_logger import CustomLogger
 from litellm._logging import verbose_logger
 from litellm.types.services import ServiceLoggerPayload
 from functools import wraps
-from typing import Union, Optional, TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, Optional, Union
 import litellm
 from litellm._logging import verbose_logger
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.types.services import ServiceLoggerPayload
 if TYPE_CHECKING:
    from opentelemetry.trace import Span as _Span
-    from litellm.proxy.proxy_server import UserAPIKeyAuth as _UserAPIKeyAuth
+
    from litellm.proxy._types import (
        ManagementEndpointLoggingPayload as _ManagementEndpointLoggingPayload,
    )
    from litellm.proxy.proxy_server import UserAPIKeyAuth as _UserAPIKeyAuth
    Span = _Span
    UserAPIKeyAuth = _UserAPIKeyAuth
@ -107,8 +108,9 @@ class OpenTelemetry(CustomLogger):
        start_time: Optional[datetime] = None,
        end_time: Optional[datetime] = None,
    ):
        from opentelemetry import trace
        from datetime import datetime
        from opentelemetry import trace
        from opentelemetry.trace import Status, StatusCode
        _start_time_ns = start_time
@ -145,8 +147,9 @@ class OpenTelemetry(CustomLogger):
        start_time: Optional[datetime] = None,
        end_time: Optional[datetime] = None,
    ):
        from opentelemetry import trace
        from datetime import datetime
        from opentelemetry import trace
        from opentelemetry.trace import Status, StatusCode
        _start_time_ns = start_time
@ -179,8 +182,8 @@ class OpenTelemetry(CustomLogger):
    async def async_post_call_failure_hook(
        self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth
    ):
        from opentelemetry.trace import Status, StatusCode
        from opentelemetry import trace
        from opentelemetry.trace import Status, StatusCode
        parent_otel_span = user_api_key_dict.parent_otel_span
        if parent_otel_span is not None:
@ -202,8 +205,8 @@ class OpenTelemetry(CustomLogger):
            parent_otel_span.end(end_time=self._to_ns(datetime.now()))
    def _handle_sucess(self, kwargs, response_obj, start_time, end_time):
        from opentelemetry.trace import Status, StatusCode
        from opentelemetry import trace
        from opentelemetry.trace import Status, StatusCode
        verbose_logger.debug(
            "OpenTelemetry Logger: Logging kwargs: %s, OTEL config settings=%s",
@ -253,9 +256,10 @@ class OpenTelemetry(CustomLogger):
        span.end(end_time=self._to_ns(end_time))
    def set_tools_attributes(self, span: Span, tools):
        from litellm.proxy._types import SpanAttributes
        import json
        from litellm.proxy._types import SpanAttributes
        if not tools:
            return
@ -320,7 +324,7 @@ class OpenTelemetry(CustomLogger):
            )
        span.set_attribute(
-            SpanAttributes.LLM_IS_STREAMING, optional_params.get("stream", False)
+            SpanAttributes.LLM_IS_STREAMING, str(optional_params.get("stream", False))
        )
        if optional_params.get("tools"):
@ -439,7 +443,7 @@ class OpenTelemetry(CustomLogger):
        #############################################
        ########## LLM Response Attributes ##########
        #############################################
-        if _raw_response:
+        if _raw_response and isinstance(_raw_response, str):
            # cast sr -> dict
            import json
@ -478,10 +482,10 @@ class OpenTelemetry(CustomLogger):
        return _parent_context
    def _get_span_context(self, kwargs):
        from opentelemetry import trace
        from opentelemetry.trace.propagation.tracecontext import (
            TraceContextTextMapPropagator,
        )
        from opentelemetry import trace
        litellm_params = kwargs.get("litellm_params", {}) or {}
        proxy_server_request = litellm_params.get("proxy_server_request", {}) or {}
@ -505,17 +509,17 @@ class OpenTelemetry(CustomLogger):
            return TraceContextTextMapPropagator().extract(carrier=carrier), None
    def _get_span_processor(self):
-        from opentelemetry.sdk.trace.export import (
+        from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (
-            SpanExporter,
+            OTLPSpanExporter as OTLPSpanExporterGRPC,
            SimpleSpanProcessor,
            BatchSpanProcessor,
            ConsoleSpanExporter,
        )
        from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
            OTLPSpanExporter as OTLPSpanExporterHTTP,
        )
-        from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (
+        from opentelemetry.sdk.trace.export import (
-            OTLPSpanExporter as OTLPSpanExporterGRPC,
+            BatchSpanProcessor,
            ConsoleSpanExporter,
            SimpleSpanProcessor,
            SpanExporter,
        )
        verbose_logger.debug(
@ -574,8 +578,9 @@ class OpenTelemetry(CustomLogger):
        logging_payload: ManagementEndpointLoggingPayload,
        parent_otel_span: Optional[Span] = None,
    ):
        from opentelemetry import trace
        from datetime import datetime
        from opentelemetry import trace
        from opentelemetry.trace import Status, StatusCode
        _start_time_ns = logging_payload.start_time
@ -619,8 +624,9 @@ class OpenTelemetry(CustomLogger):
        logging_payload: ManagementEndpointLoggingPayload,
        parent_otel_span: Optional[Span] = None,
    ):
        from opentelemetry import trace
        from datetime import datetime
        from opentelemetry import trace
        from opentelemetry.trace import Status, StatusCode
        _start_time_ns = logging_payload.start_time
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -10,7 +10,7 @@ import sys
 import time
 import traceback
 import uuid
-from typing import Callable, Optional
+from typing import Any, Callable, Dict, List, Optional
 import litellm
 from litellm import (
@ -72,6 +72,8 @@ from ..integrations.supabase import Supabase
 from ..integrations.traceloop import TraceloopLogger
 from ..integrations.weights_biases import WeightsBiasesLogger
 _in_memory_loggers: List[Any] = []
 class Logging:
    global supabaseClient, liteDebuggerClient, promptLayerLogger, weightsBiasesLogger, langsmithLogger, logfireLogger, capture_exception, add_breadcrumb, lunaryLogger, logfireLogger, prometheusLogger, slack_app
@ -1612,6 +1614,7 @@ class Logging:
                            level=LogfireLevel.ERROR.value,
                            print_verbose=print_verbose,
                        )
                except Exception as e:
                    print_verbose(
                        f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging with integrations {str(e)}"
@ -1786,6 +1789,37 @@ def _init_custom_logger_compatible_class(
    logging_integration: litellm._custom_logger_compatible_callbacks_literal,
 ) -> Callable:
    if logging_integration == "lago":
-        return LagoLogger()  # type: ignore
+        for callback in _in_memory_loggers:
            if isinstance(callback, LagoLogger):
                return callback  # type: ignore
        lago_logger = LagoLogger()
        _in_memory_loggers.append(lago_logger)
        return lago_logger  # type: ignore
    elif logging_integration == "openmeter":
-        return OpenMeterLogger()  # type: ignore
+        for callback in _in_memory_loggers:
            if isinstance(callback, OpenMeterLogger):
                return callback  # type: ignore
        _openmeter_logger = OpenMeterLogger()
        _in_memory_loggers.append(_openmeter_logger)
        return _openmeter_logger  # type: ignore
    elif logging_integration == "logfire":
        if "LOGFIRE_TOKEN" not in os.environ:
            raise ValueError("LOGFIRE_TOKEN not found in environment variables")
        from litellm.integrations.opentelemetry import (
            OpenTelemetry,
            OpenTelemetryConfig,
        )
        otel_config = OpenTelemetryConfig(
            exporter="otlp_http",
            endpoint="https://logfire-api.pydantic.dev/v1/traces",
            headers=f"Authorization={os.getenv('LOGFIRE_TOKEN')}",
        )
        for callback in _in_memory_loggers:
            if isinstance(callback, OpenTelemetry):
                return callback  # type: ignore
        _otel_logger = OpenTelemetry(config=otel_config)
        _in_memory_loggers.append(_otel_logger)
        return _otel_logger  # type: ignore
--- a/litellm/main.py
+++ b/litellm/main.py
@ -107,19 +107,17 @@ from .llms.databricks import DatabricksChatCompletion
 from .llms.huggingface_restapi import Huggingface
 from .llms.openai import OpenAIChatCompletion, OpenAITextCompletion
 from .llms.predibase import PredibaseChatCompletion
 from .llms.bedrock_httpx import BedrockLLM, BedrockConverseLLM
 from .llms.vertex_httpx import VertexLLM
 from .llms.triton import TritonChatCompletion
 from .llms.text_completion_codestral import CodestralTextCompletion
 from .llms.prompt_templates.factory import (
    custom_prompt,
    function_call_prompt,
    map_system_message_pt,
    prompt_factory,
 )
 from .llms.text_completion_codestral import CodestralTextCompletion
 from .llms.triton import TritonChatCompletion
 from .llms.vertex_httpx import VertexLLM
 from .types.llms.openai import HttpxBinaryResponseContent
 from .types.utils import ChatCompletionMessageToolCall
 encoding = tiktoken.get_encoding("cl100k_base")
 from litellm.utils import (
@ -431,6 +429,7 @@ def mock_completion(
    messages: List,
    stream: Optional[bool] = False,
    mock_response: Union[str, Exception] = "This is a mock request",
    mock_tool_calls: Optional[List] = None,
    logging=None,
    custom_llm_provider=None,
    **kwargs,
@ -499,6 +498,12 @@ def mock_completion(
        model_response["created"] = int(time.time())
        model_response["model"] = model
        if mock_tool_calls:
            model_response["choices"][0]["message"]["tool_calls"] = [
                ChatCompletionMessageToolCall(**tool_call)
                for tool_call in mock_tool_calls
            ]
        setattr(
            model_response,
            "usage",
@ -612,6 +617,7 @@ def completion(
    args = locals()
    api_base = kwargs.get("api_base", None)
    mock_response = kwargs.get("mock_response", None)
    mock_tool_calls = kwargs.get("mock_tool_calls", None)
    force_timeout = kwargs.get("force_timeout", 600)  ## deprecated
    logger_fn = kwargs.get("logger_fn", None)
    verbose = kwargs.get("verbose", False)
@ -930,12 +936,13 @@ def completion(
            litellm_params=litellm_params,
            custom_llm_provider=custom_llm_provider,
        )
-        if mock_response:
+        if mock_response or mock_tool_calls:
            return mock_completion(
                model,
                messages,
                stream=stream,
                mock_response=mock_response,
                mock_tool_calls=mock_tool_calls,
                logging=logging,
                acompletion=acompletion,
                mock_delay=kwargs.get("mock_delay", None),
--- a/litellm/proxy/_logging.py
+++ b/litellm/proxy/_logging.py
@ -1,7 +1,8 @@
 import json
 import logging
 from logging import Formatter
 import os
 from logging import Formatter
 from litellm import json_logs
 # Set default log level to INFO
@ -14,8 +15,11 @@ class JsonFormatter(Formatter):
        super(JsonFormatter, self).__init__()
    def format(self, record):
-        json_record = {}
+        json_record = {
-        json_record["message"] = record.getMessage()
+            "message": record.getMessage(),
            "level": record.levelname,
            "timestamp": self.formatTime(record, self.datefmt),
        }
        return json.dumps(json_record)
--- a/litellm/proxy/_super_secret_config.yaml
+++ b/litellm/proxy/_super_secret_config.yaml
@ -79,6 +79,7 @@ litellm_settings:
  success_callback: ["langfuse"]
  failure_callback: ["langfuse"]
  cache: true
  json_logs: true
 general_settings:
  alerting: ["slack"]
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -1,13 +1,17 @@
 from pydantic import BaseModel, Extra, Field, model_validator, Json, ConfigDict
 from dataclasses import fields
 import enum
-from typing import Optional, List, Union, Dict, Literal, Any, TypedDict, TYPE_CHECKING
+import json
 import os
 import sys
 import uuid
 from dataclasses import fields
 from datetime import datetime
-import uuid, json, sys, os
+from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, TypedDict, Union
-from litellm.types.router import UpdateRouterConfig
+
-from litellm.types.utils import ProviderField
+from pydantic import BaseModel, ConfigDict, Extra, Field, Json, model_validator
 from typing_extensions import Annotated
 from litellm.types.router import UpdateRouterConfig
 from litellm.types.utils import ProviderField
 if TYPE_CHECKING:
    from opentelemetry.trace import Span as _Span
@ -283,12 +287,16 @@ class LiteLLMRoutes(enum.Enum):
        "/metrics",
    ]
-    internal_user_routes: List = [
+    internal_user_routes: List = (
        [
            "/key/generate",
            "/key/update",
            "/key/delete",
            "/key/info",
-    ] + spend_tracking_routes
+        ]
        + spend_tracking_routes
        + sso_only_routes
    )
 # class LiteLLMAllowedRoutes(LiteLLMBase):
--- a/litellm/proxy/auth/user_api_key_auth.py
+++ b/litellm/proxy/auth/user_api_key_auth.py
@ -7,59 +7,56 @@ Returns a UserAPIKeyAuth object if the API key is valid
 """
 import asyncio
 import json
 import secrets
 import traceback
 from datetime import datetime, timedelta, timezone
 from typing import Optional
 import secrets
 from uuid import uuid4
 import fastapi
 from fastapi import Request
 from pydantic import BaseModel
 import litellm
 import traceback
 import asyncio
 from fastapi import (
    FastAPI,
    Request,
    HTTPException,
    status,
    Path,
    Depends,
-    Header,
+    FastAPI,
    Response,
    Form,
    UploadFile,
    File,
    Form,
    Header,
    HTTPException,
    Path,
    Request,
    Response,
    UploadFile,
    status,
 )
 from fastapi.responses import (
    StreamingResponse,
    FileResponse,
    ORJSONResponse,
    JSONResponse,
 )
 from fastapi.openapi.utils import get_openapi
 from fastapi.responses import RedirectResponse
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.staticfiles import StaticFiles
+from fastapi.openapi.utils import get_openapi
 from fastapi.responses import (
    FileResponse,
    JSONResponse,
    ORJSONResponse,
    RedirectResponse,
    StreamingResponse,
 )
 from fastapi.security.api_key import APIKeyHeader
-from litellm.proxy._types import *
+from fastapi.staticfiles import StaticFiles
-from litellm._logging import verbose_logger, verbose_proxy_logger
+from pydantic import BaseModel
 import litellm
 from litellm._logging import verbose_logger, verbose_proxy_logger
 from litellm.proxy._types import *
 from litellm.proxy.auth.auth_checks import (
    allowed_routes_check,
    common_checks,
    get_actual_routes,
    get_end_user_object,
    get_org_object,
    get_team_object,
    get_user_object,
    allowed_routes_check,
    get_actual_routes,
    log_to_opentelemetry,
 )
 from litellm.proxy.utils import _to_ns
 from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
-
+from litellm.proxy.utils import _to_ns
 api_key_header = APIKeyHeader(
    name="Authorization", auto_error=False, description="Bearer token"
@ -88,20 +85,20 @@ async def user_api_key_auth(
 ) -> UserAPIKeyAuth:
    from litellm.proxy.proxy_server import (
-        litellm_proxy_admin_name,
+        allowed_routes_check,
        common_checks,
        master_key,
        prisma_client,
        llm_model_list,
        user_custom_auth,
        custom_db_client,
        general_settings,
        proxy_logging_obj,
        open_telemetry_logger,
        user_api_key_cache,
        jwt_handler,
        allowed_routes_check,
        get_actual_routes,
        jwt_handler,
        litellm_proxy_admin_name,
        llm_model_list,
        master_key,
        open_telemetry_logger,
        prisma_client,
        proxy_logging_obj,
        user_api_key_cache,
        user_custom_auth,
    )
    try:
@ -1004,7 +1001,7 @@ async def user_api_key_auth(
                ):
                    pass
-                elif _user_role == LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY:
+                elif _user_role == LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY.value:
                    if route in LiteLLMRoutes.openai_routes.value:
                        raise HTTPException(
                            status_code=status.HTTP_403_FORBIDDEN,
@ -1031,7 +1028,7 @@ async def user_api_key_auth(
                                detail=f"user not allowed to access this route, role= {_user_role}. Trying to access: {route}",
                            )
                elif (
-                    _user_role == LitellmUserRoles.INTERNAL_USER
+                    _user_role == LitellmUserRoles.INTERNAL_USER.value
                    and route in LiteLLMRoutes.internal_user_routes.value
                ):
                    pass
@ -1059,6 +1056,7 @@ async def user_api_key_auth(
            # this token is only used for managing the ui
            allowed_routes = [
                "/sso",
                "/sso/get/logout_url",
                "/login",
                "/key/generate",
                "/key/update",
@ -1144,8 +1142,8 @@ async def user_api_key_auth(
            raise Exception()
    except Exception as e:
        verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.user_api_key_auth(): Exception occured - {}".format(
+            "litellm.proxy.proxy_server.user_api_key_auth(): Exception occured - {}\n{}".format(
-                str(e)
+                str(e), traceback.format_exc()
            )
        )
@ -1156,7 +1154,6 @@ async def user_api_key_auth(
                user_api_key_dict=UserAPIKeyAuth(parent_otel_span=parent_otel_span),
            )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, litellm.BudgetExceededError):
            raise ProxyException(
                message=e.message, type="auth_error", param=None, code=400
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -24,9 +24,9 @@ general_settings:
 litellm_settings:
  success_callback: ["prometheus"]
  callbacks: ["otel"]
  failure_callback: ["prometheus"]
  store_audit_logs: true
  turn_off_message_logging: true
  redact_messages_in_exceptions: True
  enforced_params:  
    - user
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -12,6 +12,7 @@ import os
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import os
 from unittest.mock import MagicMock, patch
--- a/litellm/tests/test_file_types.py
+++ b/litellm/tests/test_file_types.py
@ -0,0 +1,54 @@
 from litellm.types.files import (
    FILE_EXTENSIONS,
    FILE_MIME_TYPES,
    FileType,
    get_file_extension_from_mime_type,
    get_file_type_from_extension,
    get_file_extension_for_file_type,
    get_file_mime_type_for_file_type,
    get_file_mime_type_from_extension,
 )
 import pytest
 class TestFileConsts:
    def test_all_file_types_have_extensions(self):
        for file_type in FileType:
            assert file_type in FILE_EXTENSIONS.keys()
    def test_all_file_types_have_mime_types(self):
        for file_type in FileType:
            assert file_type in FILE_MIME_TYPES.keys()
    def test_get_file_extension_from_mime_type(self):
        assert get_file_extension_from_mime_type("audio/aac") == "aac"
        assert get_file_extension_from_mime_type("application/pdf") == "pdf"
        with pytest.raises(ValueError):
            get_file_extension_from_mime_type("application/unknown")
    def test_get_file_type_from_extension(self):
        assert get_file_type_from_extension("aac") == FileType.AAC
        assert get_file_type_from_extension("pdf") == FileType.PDF
        with pytest.raises(ValueError):
            get_file_type_from_extension("unknown")
    def test_get_file_extension_for_file_type(self):
        assert get_file_extension_for_file_type(FileType.AAC) == "aac"
        assert get_file_extension_for_file_type(FileType.PDF) == "pdf"
    def test_get_file_mime_type_for_file_type(self):
        assert get_file_mime_type_for_file_type(FileType.AAC) == "audio/aac"
        assert get_file_mime_type_for_file_type(FileType.PDF) == "application/pdf"
    def test_get_file_mime_type_from_extension(self):
        assert get_file_mime_type_from_extension("aac") == "audio/aac"
        assert get_file_mime_type_from_extension("pdf") == "application/pdf"
    def test_uppercase_extensions(self):
        # Test that uppercase extensions return the correct file type
        assert get_file_type_from_extension("AAC") == FileType.AAC
        assert get_file_type_from_extension("PDF") == FileType.PDF
        # Test that uppercase extensions return the correct MIME type
        assert get_file_mime_type_from_extension("AAC") == "audio/aac"
        assert get_file_mime_type_from_extension("PDF") == "application/pdf"
--- a/litellm/tests/test_logfire.py
+++ b/litellm/tests/test_logfire.py
@ -1,12 +1,16 @@
-import sys
+import asyncio
 import os
 import json
 import logging
 import os
 import sys
 import time
 import logfire
 import litellm
 import pytest
-from logfire.testing import TestExporter, SimpleSpanProcessor
+
 import litellm
 from litellm._logging import verbose_logger, verbose_proxy_logger
 verbose_logger.setLevel(logging.DEBUG)
 sys.path.insert(0, os.path.abspath("../.."))
@ -17,19 +21,13 @@ sys.path.insert(0, os.path.abspath("../.."))
 # 4. Test logfire logging for completion while streaming is enabled
-@pytest.mark.skip(reason="Breaks on ci/cd")
+@pytest.mark.skip(reason="Breaks on ci/cd but works locally")
@pytest.mark.parametrize("stream", [False, True])
 def test_completion_logfire_logging(stream):
-    litellm.success_callback = ["logfire"]
+    from litellm.integrations.opentelemetry import OpenTelemetry, OpenTelemetryConfig
    litellm.set_verbose = True
-    exporter = TestExporter()
+    litellm.callbacks = ["logfire"]
-    logfire.configure(
+    litellm.set_verbose = True
        send_to_logfire=False,
        console=False,
        processors=[SimpleSpanProcessor(exporter)],
        collect_system_metrics=False,
    )
    messages = [{"role": "user", "content": "what llm are u"}]
    temperature = 0.3
    max_tokens = 10
@ -47,41 +45,16 @@ def test_completion_logfire_logging(stream):
            print(chunk)
    time.sleep(5)
    exported_spans = exporter.exported_spans_as_dict()
    assert len(exported_spans) == 1
    assert (
        exported_spans[0]["attributes"]["logfire.msg"]
        == "Chat Completion with 'gpt-3.5-turbo'"
    )
    request_data = json.loads(exported_spans[0]["attributes"]["request_data"])
    assert request_data["model"] == "gpt-3.5-turbo"
    assert request_data["messages"] == messages
    assert "completion_tokens" in request_data["usage"]
    assert "prompt_tokens" in request_data["usage"]
    assert "total_tokens" in request_data["usage"]
    assert request_data["response"]["choices"][0]["message"]["content"]
    assert request_data["modelParameters"]["max_tokens"] == max_tokens
    assert request_data["modelParameters"]["temperature"] == temperature
-@pytest.mark.skip(reason="Breaks on ci/cd")
+@pytest.mark.skip(reason="Breaks on ci/cd but works locally")
@pytest.mark.asyncio
@pytest.mark.parametrize("stream", [False, True])
 async def test_acompletion_logfire_logging(stream):
-    litellm.success_callback = ["logfire"]
+    from litellm.integrations.opentelemetry import OpenTelemetry, OpenTelemetryConfig
    litellm.set_verbose = True
-    exporter = TestExporter()
+    litellm.callbacks = ["logfire"]
-    logfire.configure(
+    litellm.set_verbose = True
        send_to_logfire=False,
        console=False,
        processors=[SimpleSpanProcessor(exporter)],
        collect_system_metrics=False,
    )
    messages = [{"role": "user", "content": "what llm are u"}]
    temperature = 0.3
    max_tokens = 10
@ -90,30 +63,11 @@ async def test_acompletion_logfire_logging(stream):
        messages=messages,
        max_tokens=max_tokens,
        temperature=temperature,
        stream=stream,
    )
    print(response)
    if stream:
-        for chunk in response:
+        async for chunk in response:
            print(chunk)
-    time.sleep(5)
+    await asyncio.sleep(5)
    exported_spans = exporter.exported_spans_as_dict()
    print("exported_spans", exported_spans)
    assert len(exported_spans) == 1
    assert (
        exported_spans[0]["attributes"]["logfire.msg"]
        == "Chat Completion with 'gpt-3.5-turbo'"
    )
    request_data = json.loads(exported_spans[0]["attributes"]["request_data"])
    assert request_data["model"] == "gpt-3.5-turbo"
    assert request_data["messages"] == messages
    assert "completion_tokens" in request_data["usage"]
    assert "prompt_tokens" in request_data["usage"]
    assert "total_tokens" in request_data["usage"]
    assert request_data["response"]["choices"][0]["message"]["content"]
    assert request_data["modelParameters"]["max_tokens"] == max_tokens
    assert request_data["modelParameters"]["temperature"] == temperature
--- a/litellm/types/files.py
+++ b/litellm/types/files.py
@ -151,23 +151,16 @@ Util Functions
 """
 def get_file_mime_type_from_extension(extension: str) -> str:
    for file_type, extensions in FILE_EXTENSIONS.items():
        if extension in extensions:
            return FILE_MIME_TYPES[file_type]
    raise ValueError(f"Unknown mime type for extension: {extension}")
 def get_file_extension_from_mime_type(mime_type: str) -> str:
    for file_type, mime in FILE_MIME_TYPES.items():
-        if mime == mime_type:
+        if mime.lower() == mime_type.lower():
            return FILE_EXTENSIONS[file_type][0]
    raise ValueError(f"Unknown extension for mime type: {mime_type}")
 def get_file_type_from_extension(extension: str) -> FileType:
    for file_type, extensions in FILE_EXTENSIONS.items():
-        if extension in extensions:
+        if extension.lower() in extensions:
            return file_type
    raise ValueError(f"Unknown file type for extension: {extension}")
@ -181,6 +174,11 @@ def get_file_mime_type_for_file_type(file_type: FileType) -> str:
    return FILE_MIME_TYPES[file_type]
 def get_file_mime_type_from_extension(extension: str) -> str:
    file_type = get_file_type_from_extension(extension)
    return get_file_mime_type_for_file_type(file_type)
 """
 FileType Type Groupings (Videos, Images, etc)
 """
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -340,6 +340,7 @@ def function_setup(
    try:
        global callback_list, add_breadcrumb, user_logger_fn, Logging
        function_id = kwargs["id"] if "id" in kwargs else None
        if len(litellm.callbacks) > 0:
            for callback in litellm.callbacks:
                # check if callback is a string - e.g. "lago", "openmeter"