Merge branch 'main' into litellm_gemini_refactoring

This commit is contained in:
Krish Dholakia 2024-06-17 19:50:56 -07:00 committed by GitHub
commit 63216f42b8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
27 changed files with 335 additions and 182 deletions

View file

@ -2,6 +2,15 @@ import Image from '@theme/IdealImage';
# Athina # Athina
:::tip
This is community maintained, Please make an issue if you run into a bug
https://github.com/BerriAI/litellm
:::
[Athina](https://athina.ai/) is an evaluation framework and production monitoring platform for your LLM-powered app. Athina is designed to enhance the performance and reliability of AI applications through real-time monitoring, granular analytics, and plug-and-play evaluations. [Athina](https://athina.ai/) is an evaluation framework and production monitoring platform for your LLM-powered app. Athina is designed to enhance the performance and reliability of AI applications through real-time monitoring, granular analytics, and plug-and-play evaluations.
<Image img={require('../../img/athina_dashboard.png')} /> <Image img={require('../../img/athina_dashboard.png')} />

View file

@ -1,5 +1,14 @@
# Greenscale - Track LLM Spend and Responsible Usage # Greenscale - Track LLM Spend and Responsible Usage
:::tip
This is community maintained, Please make an issue if you run into a bug
https://github.com/BerriAI/litellm
:::
[Greenscale](https://greenscale.ai/) is a production monitoring platform for your LLM-powered app that provides you granular key insights into your GenAI spending and responsible usage. Greenscale only captures metadata to minimize the exposure risk of personally identifiable information (PII). [Greenscale](https://greenscale.ai/) is a production monitoring platform for your LLM-powered app that provides you granular key insights into your GenAI spending and responsible usage. Greenscale only captures metadata to minimize the exposure risk of personally identifiable information (PII).
## Getting Started ## Getting Started

View file

@ -1,4 +1,13 @@
# Helicone Tutorial # Helicone Tutorial
:::tip
This is community maintained, Please make an issue if you run into a bug
https://github.com/BerriAI/litellm
:::
[Helicone](https://helicone.ai/) is an open source observability platform that proxies your OpenAI traffic and provides you key insights into your spend, latency and usage. [Helicone](https://helicone.ai/) is an open source observability platform that proxies your OpenAI traffic and provides you key insights into your spend, latency and usage.
## Use Helicone to log requests across all LLM Providers (OpenAI, Azure, Anthropic, Cohere, Replicate, PaLM) ## Use Helicone to log requests across all LLM Providers (OpenAI, Azure, Anthropic, Cohere, Replicate, PaLM)

View file

@ -1,6 +1,6 @@
import Image from '@theme/IdealImage'; import Image from '@theme/IdealImage';
# Langfuse - Logging LLM Input/Output # 🔥 Langfuse - Logging LLM Input/Output
LangFuse is open Source Observability & Analytics for LLM Apps LangFuse is open Source Observability & Analytics for LLM Apps
Detailed production traces and a granular view on quality, cost and latency Detailed production traces and a granular view on quality, cost and latency

View file

@ -1,6 +1,16 @@
import Image from '@theme/IdealImage'; import Image from '@theme/IdealImage';
# Langsmith - Logging LLM Input/Output # Langsmith - Logging LLM Input/Output
:::tip
This is community maintained, Please make an issue if you run into a bug
https://github.com/BerriAI/litellm
:::
An all-in-one developer platform for every step of the application lifecycle An all-in-one developer platform for every step of the application lifecycle
https://smith.langchain.com/ https://smith.langchain.com/

View file

@ -1,6 +1,6 @@
import Image from '@theme/IdealImage'; import Image from '@theme/IdealImage';
# Logfire - Logging LLM Input/Output # 🔥 Logfire - Logging LLM Input/Output
Logfire is open Source Observability & Analytics for LLM Apps Logfire is open Source Observability & Analytics for LLM Apps
Detailed production traces and a granular view on quality, cost and latency Detailed production traces and a granular view on quality, cost and latency
@ -14,10 +14,14 @@ join our [discord](https://discord.gg/wuPM9dRgDw)
## Pre-Requisites ## Pre-Requisites
Ensure you have run `pip install logfire` for this integration Ensure you have installed the following packages to use this integration
```shell ```shell
pip install logfire litellm pip install litellm
pip install opentelemetry-api==1.25.0
pip install opentelemetry-sdk==1.25.0
pip install opentelemetry-exporter-otlp==1.25.0
``` ```
## Quick Start ## Quick Start
@ -25,8 +29,7 @@ pip install logfire litellm
Get your Logfire token from [Logfire](https://logfire.pydantic.dev/) Get your Logfire token from [Logfire](https://logfire.pydantic.dev/)
```python ```python
litellm.success_callback = ["logfire"] litellm.callbacks = ["logfire"]
litellm.failure_callback = ["logfire"] # logs errors to logfire
``` ```
```python ```python

View file

@ -1,5 +1,13 @@
# Lunary - Logging and tracing LLM input/output # Lunary - Logging and tracing LLM input/output
:::tip
This is community maintained, Please make an issue if you run into a bug
https://github.com/BerriAI/litellm
:::
[Lunary](https://lunary.ai/) is an open-source AI developer platform providing observability, prompt management, and evaluation tools for AI developers. [Lunary](https://lunary.ai/) is an open-source AI developer platform providing observability, prompt management, and evaluation tools for AI developers.
<video controls width='900' > <video controls width='900' >

View file

@ -2,6 +2,15 @@ import Image from '@theme/IdealImage';
# Promptlayer Tutorial # Promptlayer Tutorial
:::tip
This is community maintained, Please make an issue if you run into a bug
https://github.com/BerriAI/litellm
:::
Promptlayer is a platform for prompt engineers. Log OpenAI requests. Search usage history. Track performance. Visually manage prompt templates. Promptlayer is a platform for prompt engineers. Log OpenAI requests. Search usage history. Track performance. Visually manage prompt templates.
<Image img={require('../../img/promptlayer.png')} /> <Image img={require('../../img/promptlayer.png')} />

View file

@ -1,5 +1,14 @@
import Image from '@theme/IdealImage'; import Image from '@theme/IdealImage';
:::tip
This is community maintained, Please make an issue if you run into a bug
https://github.com/BerriAI/litellm
:::
# Sentry - Log LLM Exceptions # Sentry - Log LLM Exceptions
[Sentry](https://sentry.io/) provides error monitoring for production. LiteLLM can add breadcrumbs and send exceptions to Sentry with this integration [Sentry](https://sentry.io/) provides error monitoring for production. LiteLLM can add breadcrumbs and send exceptions to Sentry with this integration

View file

@ -1,4 +1,12 @@
# Supabase Tutorial # Supabase Tutorial
:::tip
This is community maintained, Please make an issue if you run into a bug
https://github.com/BerriAI/litellm
:::
[Supabase](https://supabase.com/) is an open source Firebase alternative. [Supabase](https://supabase.com/) is an open source Firebase alternative.
Start your project with a Postgres database, Authentication, instant APIs, Edge Functions, Realtime subscriptions, Storage, and Vector embeddings. Start your project with a Postgres database, Authentication, instant APIs, Edge Functions, Realtime subscriptions, Storage, and Vector embeddings.

View file

@ -1,6 +1,16 @@
import Image from '@theme/IdealImage'; import Image from '@theme/IdealImage';
# Weights & Biases - Logging LLM Input/Output # Weights & Biases - Logging LLM Input/Output
:::tip
This is community maintained, Please make an issue if you run into a bug
https://github.com/BerriAI/litellm
:::
Weights & Biases helps AI developers build better models faster https://wandb.ai Weights & Biases helps AI developers build better models faster https://wandb.ai
<Image img={require('../../img/wandb.png')} /> <Image img={require('../../img/wandb.png')} />

View file

@ -172,10 +172,8 @@ const sidebars = {
"proxy/custom_pricing", "proxy/custom_pricing",
"routing", "routing",
"scheduler", "scheduler",
"rules",
"set_keys", "set_keys",
"budget_manager", "budget_manager",
"contributing",
"secret", "secret",
"completion/token_usage", "completion/token_usage",
"load_test", "load_test",
@ -183,11 +181,11 @@ const sidebars = {
type: "category", type: "category",
label: "Logging & Observability", label: "Logging & Observability",
items: [ items: [
"observability/langfuse_integration",
"observability/logfire_integration",
"debugging/local_debugging", "debugging/local_debugging",
"observability/raw_request_response", "observability/raw_request_response",
"observability/callbacks",
"observability/custom_callback", "observability/custom_callback",
"observability/langfuse_integration",
"observability/sentry", "observability/sentry",
"observability/lago", "observability/lago",
"observability/openmeter", "observability/openmeter",
@ -233,6 +231,8 @@ const sidebars = {
label: "Extras", label: "Extras",
items: [ items: [
"extras/contributing", "extras/contributing",
"contributing",
"rules",
"proxy_server", "proxy_server",
{ {
type: "category", type: "category",

View file

@ -37,7 +37,7 @@ input_callback: List[Union[str, Callable]] = []
success_callback: List[Union[str, Callable]] = [] success_callback: List[Union[str, Callable]] = []
failure_callback: List[Union[str, Callable]] = [] failure_callback: List[Union[str, Callable]] = []
service_callback: List[Union[str, Callable]] = [] service_callback: List[Union[str, Callable]] = []
_custom_logger_compatible_callbacks_literal = Literal["lago", "openmeter"] _custom_logger_compatible_callbacks_literal = Literal["lago", "openmeter", "logfire"]
callbacks: List[Union[Callable, _custom_logger_compatible_callbacks_literal]] = [] callbacks: List[Union[Callable, _custom_logger_compatible_callbacks_literal]] = []
_langfuse_default_tags: Optional[ _langfuse_default_tags: Optional[
List[ List[

View file

@ -23,8 +23,12 @@ class JsonFormatter(Formatter):
super(JsonFormatter, self).__init__() super(JsonFormatter, self).__init__()
def format(self, record): def format(self, record):
json_record = {} json_record = {
json_record["message"] = record.getMessage() "message": record.getMessage(),
"level": record.levelname,
"timestamp": self.formatTime(record, self.datefmt),
}
return json.dumps(json_record) return json.dumps(json_record)

View file

@ -1,20 +1,21 @@
import os import os
from dataclasses import dataclass from dataclasses import dataclass
from datetime import datetime from datetime import datetime
import litellm
from litellm.integrations.custom_logger import CustomLogger
from litellm._logging import verbose_logger
from litellm.types.services import ServiceLoggerPayload
from functools import wraps from functools import wraps
from typing import Union, Optional, TYPE_CHECKING, Any from typing import TYPE_CHECKING, Any, Optional, Union
import litellm
from litellm._logging import verbose_logger
from litellm.integrations.custom_logger import CustomLogger
from litellm.types.services import ServiceLoggerPayload
if TYPE_CHECKING: if TYPE_CHECKING:
from opentelemetry.trace import Span as _Span from opentelemetry.trace import Span as _Span
from litellm.proxy.proxy_server import UserAPIKeyAuth as _UserAPIKeyAuth
from litellm.proxy._types import ( from litellm.proxy._types import (
ManagementEndpointLoggingPayload as _ManagementEndpointLoggingPayload, ManagementEndpointLoggingPayload as _ManagementEndpointLoggingPayload,
) )
from litellm.proxy.proxy_server import UserAPIKeyAuth as _UserAPIKeyAuth
Span = _Span Span = _Span
UserAPIKeyAuth = _UserAPIKeyAuth UserAPIKeyAuth = _UserAPIKeyAuth
@ -107,8 +108,9 @@ class OpenTelemetry(CustomLogger):
start_time: Optional[datetime] = None, start_time: Optional[datetime] = None,
end_time: Optional[datetime] = None, end_time: Optional[datetime] = None,
): ):
from opentelemetry import trace
from datetime import datetime from datetime import datetime
from opentelemetry import trace
from opentelemetry.trace import Status, StatusCode from opentelemetry.trace import Status, StatusCode
_start_time_ns = start_time _start_time_ns = start_time
@ -145,8 +147,9 @@ class OpenTelemetry(CustomLogger):
start_time: Optional[datetime] = None, start_time: Optional[datetime] = None,
end_time: Optional[datetime] = None, end_time: Optional[datetime] = None,
): ):
from opentelemetry import trace
from datetime import datetime from datetime import datetime
from opentelemetry import trace
from opentelemetry.trace import Status, StatusCode from opentelemetry.trace import Status, StatusCode
_start_time_ns = start_time _start_time_ns = start_time
@ -179,8 +182,8 @@ class OpenTelemetry(CustomLogger):
async def async_post_call_failure_hook( async def async_post_call_failure_hook(
self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth
): ):
from opentelemetry.trace import Status, StatusCode
from opentelemetry import trace from opentelemetry import trace
from opentelemetry.trace import Status, StatusCode
parent_otel_span = user_api_key_dict.parent_otel_span parent_otel_span = user_api_key_dict.parent_otel_span
if parent_otel_span is not None: if parent_otel_span is not None:
@ -202,8 +205,8 @@ class OpenTelemetry(CustomLogger):
parent_otel_span.end(end_time=self._to_ns(datetime.now())) parent_otel_span.end(end_time=self._to_ns(datetime.now()))
def _handle_sucess(self, kwargs, response_obj, start_time, end_time): def _handle_sucess(self, kwargs, response_obj, start_time, end_time):
from opentelemetry.trace import Status, StatusCode
from opentelemetry import trace from opentelemetry import trace
from opentelemetry.trace import Status, StatusCode
verbose_logger.debug( verbose_logger.debug(
"OpenTelemetry Logger: Logging kwargs: %s, OTEL config settings=%s", "OpenTelemetry Logger: Logging kwargs: %s, OTEL config settings=%s",
@ -253,9 +256,10 @@ class OpenTelemetry(CustomLogger):
span.end(end_time=self._to_ns(end_time)) span.end(end_time=self._to_ns(end_time))
def set_tools_attributes(self, span: Span, tools): def set_tools_attributes(self, span: Span, tools):
from litellm.proxy._types import SpanAttributes
import json import json
from litellm.proxy._types import SpanAttributes
if not tools: if not tools:
return return
@ -320,7 +324,7 @@ class OpenTelemetry(CustomLogger):
) )
span.set_attribute( span.set_attribute(
SpanAttributes.LLM_IS_STREAMING, optional_params.get("stream", False) SpanAttributes.LLM_IS_STREAMING, str(optional_params.get("stream", False))
) )
if optional_params.get("tools"): if optional_params.get("tools"):
@ -439,7 +443,7 @@ class OpenTelemetry(CustomLogger):
############################################# #############################################
########## LLM Response Attributes ########## ########## LLM Response Attributes ##########
############################################# #############################################
if _raw_response: if _raw_response and isinstance(_raw_response, str):
# cast sr -> dict # cast sr -> dict
import json import json
@ -478,10 +482,10 @@ class OpenTelemetry(CustomLogger):
return _parent_context return _parent_context
def _get_span_context(self, kwargs): def _get_span_context(self, kwargs):
from opentelemetry import trace
from opentelemetry.trace.propagation.tracecontext import ( from opentelemetry.trace.propagation.tracecontext import (
TraceContextTextMapPropagator, TraceContextTextMapPropagator,
) )
from opentelemetry import trace
litellm_params = kwargs.get("litellm_params", {}) or {} litellm_params = kwargs.get("litellm_params", {}) or {}
proxy_server_request = litellm_params.get("proxy_server_request", {}) or {} proxy_server_request = litellm_params.get("proxy_server_request", {}) or {}
@ -505,17 +509,17 @@ class OpenTelemetry(CustomLogger):
return TraceContextTextMapPropagator().extract(carrier=carrier), None return TraceContextTextMapPropagator().extract(carrier=carrier), None
def _get_span_processor(self): def _get_span_processor(self):
from opentelemetry.sdk.trace.export import ( from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (
SpanExporter, OTLPSpanExporter as OTLPSpanExporterGRPC,
SimpleSpanProcessor,
BatchSpanProcessor,
ConsoleSpanExporter,
) )
from opentelemetry.exporter.otlp.proto.http.trace_exporter import ( from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
OTLPSpanExporter as OTLPSpanExporterHTTP, OTLPSpanExporter as OTLPSpanExporterHTTP,
) )
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import ( from opentelemetry.sdk.trace.export import (
OTLPSpanExporter as OTLPSpanExporterGRPC, BatchSpanProcessor,
ConsoleSpanExporter,
SimpleSpanProcessor,
SpanExporter,
) )
verbose_logger.debug( verbose_logger.debug(
@ -574,8 +578,9 @@ class OpenTelemetry(CustomLogger):
logging_payload: ManagementEndpointLoggingPayload, logging_payload: ManagementEndpointLoggingPayload,
parent_otel_span: Optional[Span] = None, parent_otel_span: Optional[Span] = None,
): ):
from opentelemetry import trace
from datetime import datetime from datetime import datetime
from opentelemetry import trace
from opentelemetry.trace import Status, StatusCode from opentelemetry.trace import Status, StatusCode
_start_time_ns = logging_payload.start_time _start_time_ns = logging_payload.start_time
@ -619,8 +624,9 @@ class OpenTelemetry(CustomLogger):
logging_payload: ManagementEndpointLoggingPayload, logging_payload: ManagementEndpointLoggingPayload,
parent_otel_span: Optional[Span] = None, parent_otel_span: Optional[Span] = None,
): ):
from opentelemetry import trace
from datetime import datetime from datetime import datetime
from opentelemetry import trace
from opentelemetry.trace import Status, StatusCode from opentelemetry.trace import Status, StatusCode
_start_time_ns = logging_payload.start_time _start_time_ns = logging_payload.start_time

View file

@ -10,7 +10,7 @@ import sys
import time import time
import traceback import traceback
import uuid import uuid
from typing import Callable, Optional from typing import Any, Callable, Dict, List, Optional
import litellm import litellm
from litellm import ( from litellm import (
@ -72,6 +72,8 @@ from ..integrations.supabase import Supabase
from ..integrations.traceloop import TraceloopLogger from ..integrations.traceloop import TraceloopLogger
from ..integrations.weights_biases import WeightsBiasesLogger from ..integrations.weights_biases import WeightsBiasesLogger
_in_memory_loggers: List[Any] = []
class Logging: class Logging:
global supabaseClient, liteDebuggerClient, promptLayerLogger, weightsBiasesLogger, langsmithLogger, logfireLogger, capture_exception, add_breadcrumb, lunaryLogger, logfireLogger, prometheusLogger, slack_app global supabaseClient, liteDebuggerClient, promptLayerLogger, weightsBiasesLogger, langsmithLogger, logfireLogger, capture_exception, add_breadcrumb, lunaryLogger, logfireLogger, prometheusLogger, slack_app
@ -1612,6 +1614,7 @@ class Logging:
level=LogfireLevel.ERROR.value, level=LogfireLevel.ERROR.value,
print_verbose=print_verbose, print_verbose=print_verbose,
) )
except Exception as e: except Exception as e:
print_verbose( print_verbose(
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging with integrations {str(e)}" f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging with integrations {str(e)}"
@ -1786,6 +1789,37 @@ def _init_custom_logger_compatible_class(
logging_integration: litellm._custom_logger_compatible_callbacks_literal, logging_integration: litellm._custom_logger_compatible_callbacks_literal,
) -> Callable: ) -> Callable:
if logging_integration == "lago": if logging_integration == "lago":
return LagoLogger() # type: ignore for callback in _in_memory_loggers:
if isinstance(callback, LagoLogger):
return callback # type: ignore
lago_logger = LagoLogger()
_in_memory_loggers.append(lago_logger)
return lago_logger # type: ignore
elif logging_integration == "openmeter": elif logging_integration == "openmeter":
return OpenMeterLogger() # type: ignore for callback in _in_memory_loggers:
if isinstance(callback, OpenMeterLogger):
return callback # type: ignore
_openmeter_logger = OpenMeterLogger()
_in_memory_loggers.append(_openmeter_logger)
return _openmeter_logger # type: ignore
elif logging_integration == "logfire":
if "LOGFIRE_TOKEN" not in os.environ:
raise ValueError("LOGFIRE_TOKEN not found in environment variables")
from litellm.integrations.opentelemetry import (
OpenTelemetry,
OpenTelemetryConfig,
)
otel_config = OpenTelemetryConfig(
exporter="otlp_http",
endpoint="https://logfire-api.pydantic.dev/v1/traces",
headers=f"Authorization={os.getenv('LOGFIRE_TOKEN')}",
)
for callback in _in_memory_loggers:
if isinstance(callback, OpenTelemetry):
return callback # type: ignore
_otel_logger = OpenTelemetry(config=otel_config)
_in_memory_loggers.append(_otel_logger)
return _otel_logger # type: ignore

View file

@ -107,19 +107,17 @@ from .llms.databricks import DatabricksChatCompletion
from .llms.huggingface_restapi import Huggingface from .llms.huggingface_restapi import Huggingface
from .llms.openai import OpenAIChatCompletion, OpenAITextCompletion from .llms.openai import OpenAIChatCompletion, OpenAITextCompletion
from .llms.predibase import PredibaseChatCompletion from .llms.predibase import PredibaseChatCompletion
from .llms.bedrock_httpx import BedrockLLM, BedrockConverseLLM
from .llms.vertex_httpx import VertexLLM
from .llms.triton import TritonChatCompletion
from .llms.text_completion_codestral import CodestralTextCompletion
from .llms.prompt_templates.factory import ( from .llms.prompt_templates.factory import (
custom_prompt, custom_prompt,
function_call_prompt, function_call_prompt,
map_system_message_pt, map_system_message_pt,
prompt_factory, prompt_factory,
) )
from .llms.text_completion_codestral import CodestralTextCompletion
from .llms.triton import TritonChatCompletion from .llms.triton import TritonChatCompletion
from .llms.vertex_httpx import VertexLLM from .llms.vertex_httpx import VertexLLM
from .types.llms.openai import HttpxBinaryResponseContent from .types.llms.openai import HttpxBinaryResponseContent
from .types.utils import ChatCompletionMessageToolCall
encoding = tiktoken.get_encoding("cl100k_base") encoding = tiktoken.get_encoding("cl100k_base")
from litellm.utils import ( from litellm.utils import (
@ -431,6 +429,7 @@ def mock_completion(
messages: List, messages: List,
stream: Optional[bool] = False, stream: Optional[bool] = False,
mock_response: Union[str, Exception] = "This is a mock request", mock_response: Union[str, Exception] = "This is a mock request",
mock_tool_calls: Optional[List] = None,
logging=None, logging=None,
custom_llm_provider=None, custom_llm_provider=None,
**kwargs, **kwargs,
@ -499,6 +498,12 @@ def mock_completion(
model_response["created"] = int(time.time()) model_response["created"] = int(time.time())
model_response["model"] = model model_response["model"] = model
if mock_tool_calls:
model_response["choices"][0]["message"]["tool_calls"] = [
ChatCompletionMessageToolCall(**tool_call)
for tool_call in mock_tool_calls
]
setattr( setattr(
model_response, model_response,
"usage", "usage",
@ -612,6 +617,7 @@ def completion(
args = locals() args = locals()
api_base = kwargs.get("api_base", None) api_base = kwargs.get("api_base", None)
mock_response = kwargs.get("mock_response", None) mock_response = kwargs.get("mock_response", None)
mock_tool_calls = kwargs.get("mock_tool_calls", None)
force_timeout = kwargs.get("force_timeout", 600) ## deprecated force_timeout = kwargs.get("force_timeout", 600) ## deprecated
logger_fn = kwargs.get("logger_fn", None) logger_fn = kwargs.get("logger_fn", None)
verbose = kwargs.get("verbose", False) verbose = kwargs.get("verbose", False)
@ -930,12 +936,13 @@ def completion(
litellm_params=litellm_params, litellm_params=litellm_params,
custom_llm_provider=custom_llm_provider, custom_llm_provider=custom_llm_provider,
) )
if mock_response: if mock_response or mock_tool_calls:
return mock_completion( return mock_completion(
model, model,
messages, messages,
stream=stream, stream=stream,
mock_response=mock_response, mock_response=mock_response,
mock_tool_calls=mock_tool_calls,
logging=logging, logging=logging,
acompletion=acompletion, acompletion=acompletion,
mock_delay=kwargs.get("mock_delay", None), mock_delay=kwargs.get("mock_delay", None),

View file

@ -1,7 +1,8 @@
import json import json
import logging import logging
from logging import Formatter
import os import os
from logging import Formatter
from litellm import json_logs from litellm import json_logs
# Set default log level to INFO # Set default log level to INFO
@ -14,8 +15,11 @@ class JsonFormatter(Formatter):
super(JsonFormatter, self).__init__() super(JsonFormatter, self).__init__()
def format(self, record): def format(self, record):
json_record = {} json_record = {
json_record["message"] = record.getMessage() "message": record.getMessage(),
"level": record.levelname,
"timestamp": self.formatTime(record, self.datefmt),
}
return json.dumps(json_record) return json.dumps(json_record)

View file

@ -79,6 +79,7 @@ litellm_settings:
success_callback: ["langfuse"] success_callback: ["langfuse"]
failure_callback: ["langfuse"] failure_callback: ["langfuse"]
cache: true cache: true
json_logs: true
general_settings: general_settings:
alerting: ["slack"] alerting: ["slack"]

View file

@ -1,13 +1,17 @@
from pydantic import BaseModel, Extra, Field, model_validator, Json, ConfigDict
from dataclasses import fields
import enum import enum
from typing import Optional, List, Union, Dict, Literal, Any, TypedDict, TYPE_CHECKING import json
import os
import sys
import uuid
from dataclasses import fields
from datetime import datetime from datetime import datetime
import uuid, json, sys, os from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, TypedDict, Union
from litellm.types.router import UpdateRouterConfig
from litellm.types.utils import ProviderField from pydantic import BaseModel, ConfigDict, Extra, Field, Json, model_validator
from typing_extensions import Annotated from typing_extensions import Annotated
from litellm.types.router import UpdateRouterConfig
from litellm.types.utils import ProviderField
if TYPE_CHECKING: if TYPE_CHECKING:
from opentelemetry.trace import Span as _Span from opentelemetry.trace import Span as _Span
@ -283,12 +287,16 @@ class LiteLLMRoutes(enum.Enum):
"/metrics", "/metrics",
] ]
internal_user_routes: List = [ internal_user_routes: List = (
[
"/key/generate", "/key/generate",
"/key/update", "/key/update",
"/key/delete", "/key/delete",
"/key/info", "/key/info",
] + spend_tracking_routes ]
+ spend_tracking_routes
+ sso_only_routes
)
# class LiteLLMAllowedRoutes(LiteLLMBase): # class LiteLLMAllowedRoutes(LiteLLMBase):

View file

@ -7,59 +7,56 @@ Returns a UserAPIKeyAuth object if the API key is valid
""" """
import asyncio
import json import json
import secrets
import traceback
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
from typing import Optional from typing import Optional
import secrets
from uuid import uuid4 from uuid import uuid4
import fastapi import fastapi
from fastapi import Request
from pydantic import BaseModel
import litellm
import traceback
import asyncio
from fastapi import ( from fastapi import (
FastAPI,
Request,
HTTPException,
status,
Path,
Depends, Depends,
Header, FastAPI,
Response,
Form,
UploadFile,
File, File,
Form,
Header,
HTTPException,
Path,
Request,
Response,
UploadFile,
status,
) )
from fastapi.responses import (
StreamingResponse,
FileResponse,
ORJSONResponse,
JSONResponse,
)
from fastapi.openapi.utils import get_openapi
from fastapi.responses import RedirectResponse
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles from fastapi.openapi.utils import get_openapi
from fastapi.responses import (
FileResponse,
JSONResponse,
ORJSONResponse,
RedirectResponse,
StreamingResponse,
)
from fastapi.security.api_key import APIKeyHeader from fastapi.security.api_key import APIKeyHeader
from litellm.proxy._types import * from fastapi.staticfiles import StaticFiles
from litellm._logging import verbose_logger, verbose_proxy_logger from pydantic import BaseModel
import litellm
from litellm._logging import verbose_logger, verbose_proxy_logger
from litellm.proxy._types import *
from litellm.proxy.auth.auth_checks import ( from litellm.proxy.auth.auth_checks import (
allowed_routes_check,
common_checks, common_checks,
get_actual_routes,
get_end_user_object, get_end_user_object,
get_org_object, get_org_object,
get_team_object, get_team_object,
get_user_object, get_user_object,
allowed_routes_check,
get_actual_routes,
log_to_opentelemetry, log_to_opentelemetry,
) )
from litellm.proxy.utils import _to_ns
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
from litellm.proxy.utils import _to_ns
api_key_header = APIKeyHeader( api_key_header = APIKeyHeader(
name="Authorization", auto_error=False, description="Bearer token" name="Authorization", auto_error=False, description="Bearer token"
@ -88,20 +85,20 @@ async def user_api_key_auth(
) -> UserAPIKeyAuth: ) -> UserAPIKeyAuth:
from litellm.proxy.proxy_server import ( from litellm.proxy.proxy_server import (
litellm_proxy_admin_name, allowed_routes_check,
common_checks, common_checks,
master_key,
prisma_client,
llm_model_list,
user_custom_auth,
custom_db_client, custom_db_client,
general_settings, general_settings,
proxy_logging_obj,
open_telemetry_logger,
user_api_key_cache,
jwt_handler,
allowed_routes_check,
get_actual_routes, get_actual_routes,
jwt_handler,
litellm_proxy_admin_name,
llm_model_list,
master_key,
open_telemetry_logger,
prisma_client,
proxy_logging_obj,
user_api_key_cache,
user_custom_auth,
) )
try: try:
@ -1004,7 +1001,7 @@ async def user_api_key_auth(
): ):
pass pass
elif _user_role == LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY: elif _user_role == LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY.value:
if route in LiteLLMRoutes.openai_routes.value: if route in LiteLLMRoutes.openai_routes.value:
raise HTTPException( raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN, status_code=status.HTTP_403_FORBIDDEN,
@ -1031,7 +1028,7 @@ async def user_api_key_auth(
detail=f"user not allowed to access this route, role= {_user_role}. Trying to access: {route}", detail=f"user not allowed to access this route, role= {_user_role}. Trying to access: {route}",
) )
elif ( elif (
_user_role == LitellmUserRoles.INTERNAL_USER _user_role == LitellmUserRoles.INTERNAL_USER.value
and route in LiteLLMRoutes.internal_user_routes.value and route in LiteLLMRoutes.internal_user_routes.value
): ):
pass pass
@ -1059,6 +1056,7 @@ async def user_api_key_auth(
# this token is only used for managing the ui # this token is only used for managing the ui
allowed_routes = [ allowed_routes = [
"/sso", "/sso",
"/sso/get/logout_url",
"/login", "/login",
"/key/generate", "/key/generate",
"/key/update", "/key/update",
@ -1144,8 +1142,8 @@ async def user_api_key_auth(
raise Exception() raise Exception()
except Exception as e: except Exception as e:
verbose_proxy_logger.error( verbose_proxy_logger.error(
"litellm.proxy.proxy_server.user_api_key_auth(): Exception occured - {}".format( "litellm.proxy.proxy_server.user_api_key_auth(): Exception occured - {}\n{}".format(
str(e) str(e), traceback.format_exc()
) )
) )
@ -1156,7 +1154,6 @@ async def user_api_key_auth(
user_api_key_dict=UserAPIKeyAuth(parent_otel_span=parent_otel_span), user_api_key_dict=UserAPIKeyAuth(parent_otel_span=parent_otel_span),
) )
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, litellm.BudgetExceededError): if isinstance(e, litellm.BudgetExceededError):
raise ProxyException( raise ProxyException(
message=e.message, type="auth_error", param=None, code=400 message=e.message, type="auth_error", param=None, code=400

View file

@ -24,9 +24,9 @@ general_settings:
litellm_settings: litellm_settings:
success_callback: ["prometheus"] success_callback: ["prometheus"]
callbacks: ["otel"]
failure_callback: ["prometheus"] failure_callback: ["prometheus"]
store_audit_logs: true store_audit_logs: true
turn_off_message_logging: true
redact_messages_in_exceptions: True redact_messages_in_exceptions: True
enforced_params: enforced_params:
- user - user

View file

@ -12,6 +12,7 @@ import os
sys.path.insert( sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
import os import os
from unittest.mock import MagicMock, patch from unittest.mock import MagicMock, patch

View file

@ -0,0 +1,54 @@
from litellm.types.files import (
FILE_EXTENSIONS,
FILE_MIME_TYPES,
FileType,
get_file_extension_from_mime_type,
get_file_type_from_extension,
get_file_extension_for_file_type,
get_file_mime_type_for_file_type,
get_file_mime_type_from_extension,
)
import pytest
class TestFileConsts:
def test_all_file_types_have_extensions(self):
for file_type in FileType:
assert file_type in FILE_EXTENSIONS.keys()
def test_all_file_types_have_mime_types(self):
for file_type in FileType:
assert file_type in FILE_MIME_TYPES.keys()
def test_get_file_extension_from_mime_type(self):
assert get_file_extension_from_mime_type("audio/aac") == "aac"
assert get_file_extension_from_mime_type("application/pdf") == "pdf"
with pytest.raises(ValueError):
get_file_extension_from_mime_type("application/unknown")
def test_get_file_type_from_extension(self):
assert get_file_type_from_extension("aac") == FileType.AAC
assert get_file_type_from_extension("pdf") == FileType.PDF
with pytest.raises(ValueError):
get_file_type_from_extension("unknown")
def test_get_file_extension_for_file_type(self):
assert get_file_extension_for_file_type(FileType.AAC) == "aac"
assert get_file_extension_for_file_type(FileType.PDF) == "pdf"
def test_get_file_mime_type_for_file_type(self):
assert get_file_mime_type_for_file_type(FileType.AAC) == "audio/aac"
assert get_file_mime_type_for_file_type(FileType.PDF) == "application/pdf"
def test_get_file_mime_type_from_extension(self):
assert get_file_mime_type_from_extension("aac") == "audio/aac"
assert get_file_mime_type_from_extension("pdf") == "application/pdf"
def test_uppercase_extensions(self):
# Test that uppercase extensions return the correct file type
assert get_file_type_from_extension("AAC") == FileType.AAC
assert get_file_type_from_extension("PDF") == FileType.PDF
# Test that uppercase extensions return the correct MIME type
assert get_file_mime_type_from_extension("AAC") == "audio/aac"
assert get_file_mime_type_from_extension("PDF") == "application/pdf"

View file

@ -1,12 +1,16 @@
import sys import asyncio
import os
import json import json
import logging
import os
import sys
import time import time
import logfire
import litellm
import pytest import pytest
from logfire.testing import TestExporter, SimpleSpanProcessor
import litellm
from litellm._logging import verbose_logger, verbose_proxy_logger
verbose_logger.setLevel(logging.DEBUG)
sys.path.insert(0, os.path.abspath("../..")) sys.path.insert(0, os.path.abspath("../.."))
@ -17,19 +21,13 @@ sys.path.insert(0, os.path.abspath("../.."))
# 4. Test logfire logging for completion while streaming is enabled # 4. Test logfire logging for completion while streaming is enabled
@pytest.mark.skip(reason="Breaks on ci/cd") @pytest.mark.skip(reason="Breaks on ci/cd but works locally")
@pytest.mark.parametrize("stream", [False, True]) @pytest.mark.parametrize("stream", [False, True])
def test_completion_logfire_logging(stream): def test_completion_logfire_logging(stream):
litellm.success_callback = ["logfire"] from litellm.integrations.opentelemetry import OpenTelemetry, OpenTelemetryConfig
litellm.set_verbose = True
exporter = TestExporter() litellm.callbacks = ["logfire"]
logfire.configure( litellm.set_verbose = True
send_to_logfire=False,
console=False,
processors=[SimpleSpanProcessor(exporter)],
collect_system_metrics=False,
)
messages = [{"role": "user", "content": "what llm are u"}] messages = [{"role": "user", "content": "what llm are u"}]
temperature = 0.3 temperature = 0.3
max_tokens = 10 max_tokens = 10
@ -47,41 +45,16 @@ def test_completion_logfire_logging(stream):
print(chunk) print(chunk)
time.sleep(5) time.sleep(5)
exported_spans = exporter.exported_spans_as_dict()
assert len(exported_spans) == 1
assert (
exported_spans[0]["attributes"]["logfire.msg"]
== "Chat Completion with 'gpt-3.5-turbo'"
)
request_data = json.loads(exported_spans[0]["attributes"]["request_data"])
assert request_data["model"] == "gpt-3.5-turbo"
assert request_data["messages"] == messages
assert "completion_tokens" in request_data["usage"]
assert "prompt_tokens" in request_data["usage"]
assert "total_tokens" in request_data["usage"]
assert request_data["response"]["choices"][0]["message"]["content"]
assert request_data["modelParameters"]["max_tokens"] == max_tokens
assert request_data["modelParameters"]["temperature"] == temperature
@pytest.mark.skip(reason="Breaks on ci/cd") @pytest.mark.skip(reason="Breaks on ci/cd but works locally")
@pytest.mark.asyncio @pytest.mark.asyncio
@pytest.mark.parametrize("stream", [False, True]) @pytest.mark.parametrize("stream", [False, True])
async def test_acompletion_logfire_logging(stream): async def test_acompletion_logfire_logging(stream):
litellm.success_callback = ["logfire"] from litellm.integrations.opentelemetry import OpenTelemetry, OpenTelemetryConfig
litellm.set_verbose = True
exporter = TestExporter() litellm.callbacks = ["logfire"]
logfire.configure( litellm.set_verbose = True
send_to_logfire=False,
console=False,
processors=[SimpleSpanProcessor(exporter)],
collect_system_metrics=False,
)
messages = [{"role": "user", "content": "what llm are u"}] messages = [{"role": "user", "content": "what llm are u"}]
temperature = 0.3 temperature = 0.3
max_tokens = 10 max_tokens = 10
@ -90,30 +63,11 @@ async def test_acompletion_logfire_logging(stream):
messages=messages, messages=messages,
max_tokens=max_tokens, max_tokens=max_tokens,
temperature=temperature, temperature=temperature,
stream=stream,
) )
print(response) print(response)
if stream: if stream:
for chunk in response: async for chunk in response:
print(chunk) print(chunk)
time.sleep(5) await asyncio.sleep(5)
exported_spans = exporter.exported_spans_as_dict()
print("exported_spans", exported_spans)
assert len(exported_spans) == 1
assert (
exported_spans[0]["attributes"]["logfire.msg"]
== "Chat Completion with 'gpt-3.5-turbo'"
)
request_data = json.loads(exported_spans[0]["attributes"]["request_data"])
assert request_data["model"] == "gpt-3.5-turbo"
assert request_data["messages"] == messages
assert "completion_tokens" in request_data["usage"]
assert "prompt_tokens" in request_data["usage"]
assert "total_tokens" in request_data["usage"]
assert request_data["response"]["choices"][0]["message"]["content"]
assert request_data["modelParameters"]["max_tokens"] == max_tokens
assert request_data["modelParameters"]["temperature"] == temperature

View file

@ -151,23 +151,16 @@ Util Functions
""" """
def get_file_mime_type_from_extension(extension: str) -> str:
for file_type, extensions in FILE_EXTENSIONS.items():
if extension in extensions:
return FILE_MIME_TYPES[file_type]
raise ValueError(f"Unknown mime type for extension: {extension}")
def get_file_extension_from_mime_type(mime_type: str) -> str: def get_file_extension_from_mime_type(mime_type: str) -> str:
for file_type, mime in FILE_MIME_TYPES.items(): for file_type, mime in FILE_MIME_TYPES.items():
if mime == mime_type: if mime.lower() == mime_type.lower():
return FILE_EXTENSIONS[file_type][0] return FILE_EXTENSIONS[file_type][0]
raise ValueError(f"Unknown extension for mime type: {mime_type}") raise ValueError(f"Unknown extension for mime type: {mime_type}")
def get_file_type_from_extension(extension: str) -> FileType: def get_file_type_from_extension(extension: str) -> FileType:
for file_type, extensions in FILE_EXTENSIONS.items(): for file_type, extensions in FILE_EXTENSIONS.items():
if extension in extensions: if extension.lower() in extensions:
return file_type return file_type
raise ValueError(f"Unknown file type for extension: {extension}") raise ValueError(f"Unknown file type for extension: {extension}")
@ -181,6 +174,11 @@ def get_file_mime_type_for_file_type(file_type: FileType) -> str:
return FILE_MIME_TYPES[file_type] return FILE_MIME_TYPES[file_type]
def get_file_mime_type_from_extension(extension: str) -> str:
file_type = get_file_type_from_extension(extension)
return get_file_mime_type_for_file_type(file_type)
""" """
FileType Type Groupings (Videos, Images, etc) FileType Type Groupings (Videos, Images, etc)
""" """

View file

@ -340,6 +340,7 @@ def function_setup(
try: try:
global callback_list, add_breadcrumb, user_logger_fn, Logging global callback_list, add_breadcrumb, user_logger_fn, Logging
function_id = kwargs["id"] if "id" in kwargs else None function_id = kwargs["id"] if "id" in kwargs else None
if len(litellm.callbacks) > 0: if len(litellm.callbacks) > 0:
for callback in litellm.callbacks: for callback in litellm.callbacks:
# check if callback is a string - e.g. "lago", "openmeter" # check if callback is a string - e.g. "lago", "openmeter"