forked from phoenix/litellm-mirror
Merge branch 'main' into litellm_braintrust_integration
This commit is contained in:
commit
e770ac48a2
39 changed files with 1369 additions and 76 deletions
|
@ -15,7 +15,7 @@ services:
|
|||
ports:
|
||||
- "4000:4000" # Map the container port to the host, change the host port if necessary
|
||||
environment:
|
||||
DATABASE_URL: "postgresql://postgres:example@db:5432/postgres"
|
||||
DATABASE_URL: "postgresql://llmproxy:dbpassword9090@db:5432/litellm"
|
||||
STORE_MODEL_IN_DB: "True" # allows adding models to proxy via UI
|
||||
env_file:
|
||||
- .env # Load local .env file
|
||||
|
@ -25,9 +25,11 @@ services:
|
|||
image: postgres
|
||||
restart: always
|
||||
environment:
|
||||
POSTGRES_PASSWORD: example
|
||||
POSTGRES_DB: litellm
|
||||
POSTGRES_USER: llmproxy
|
||||
POSTGRES_PASSWORD: dbpassword9090
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready"]
|
||||
test: ["CMD-SHELL", "pg_isready -d litellm -U llmproxy"]
|
||||
interval: 1s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
|
|
72
docs/my-website/docs/observability/arize_integration.md
Normal file
72
docs/my-website/docs/observability/arize_integration.md
Normal file
|
@ -0,0 +1,72 @@
|
|||
import Image from '@theme/IdealImage';
|
||||
|
||||
# 🔥 Arize AI - Logging LLM Input/Output
|
||||
|
||||
AI Observability and Evaluation Platform
|
||||
|
||||
:::tip
|
||||
|
||||
This is community maintained, Please make an issue if you run into a bug
|
||||
https://github.com/BerriAI/litellm
|
||||
|
||||
:::
|
||||
|
||||
|
||||
|
||||
## Pre-Requisites
|
||||
Make an account on [Arize AI](https://app.arize.com/auth/login)
|
||||
|
||||
## Quick Start
|
||||
Use just 2 lines of code, to instantly log your responses **across all providers** with arize
|
||||
|
||||
|
||||
```python
|
||||
litellm.callbacks = ["arize"]
|
||||
```
|
||||
```python
|
||||
import litellm
|
||||
import os
|
||||
|
||||
os.environ["ARIZE_SPACE_KEY"] = ""
|
||||
os.environ["ARIZE_API_KEY"] = "" # defaults to litellm-completion
|
||||
|
||||
# LLM API Keys
|
||||
os.environ['OPENAI_API_KEY']=""
|
||||
|
||||
# set arize as a callback, litellm will send the data to arize
|
||||
litellm.callbacks = ["arize"]
|
||||
|
||||
# openai call
|
||||
response = litellm.completion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[
|
||||
{"role": "user", "content": "Hi 👋 - i'm openai"}
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### Using with LiteLLM Proxy
|
||||
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-4
|
||||
litellm_params:
|
||||
model: openai/fake
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
|
||||
litellm_settings:
|
||||
callbacks: ["arize"]
|
||||
|
||||
environment_variables:
|
||||
ARIZE_SPACE_KEY: "d0*****"
|
||||
ARIZE_API_KEY: "141a****"
|
||||
```
|
||||
|
||||
## Support & Talk to Founders
|
||||
|
||||
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
|
||||
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
|
||||
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
|
||||
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
|
|
@ -1,6 +1,6 @@
|
|||
import Image from '@theme/IdealImage';
|
||||
|
||||
# Langsmith - Logging LLM Input/Output
|
||||
# 🦜 Langsmith - Logging LLM Input/Output
|
||||
|
||||
|
||||
:::tip
|
||||
|
|
|
@ -48,6 +48,20 @@ A number of these headers could be useful for troubleshooting, but the
|
|||
`x-litellm-call-id` is the one that is most useful for tracking a request across
|
||||
components in your system, including in logging tools.
|
||||
|
||||
## Redacting UserAPIKeyInfo
|
||||
|
||||
Redact information about the user api key (hashed token, user_id, team id, etc.), from logs.
|
||||
|
||||
Currently supported for Langfuse, OpenTelemetry, Logfire, ArizeAI logging.
|
||||
|
||||
```yaml
|
||||
litellm_settings:
|
||||
callbacks: ["langfuse"]
|
||||
redact_user_api_key_info: true
|
||||
```
|
||||
|
||||
Removes any field with `user_api_key_*` from metadata.
|
||||
|
||||
## Logging Proxy Input/Output - Langfuse
|
||||
|
||||
We will use the `--config` to set `litellm.success_callback = ["langfuse"]` this will log all successfull LLM calls to langfuse. Make sure to set `LANGFUSE_PUBLIC_KEY` and `LANGFUSE_SECRET_KEY` in your environment
|
||||
|
@ -202,6 +216,9 @@ print(response)
|
|||
|
||||
### Team based Logging to Langfuse
|
||||
|
||||
[👉 Tutorial - Allow each team to use their own Langfuse Project / custom callbacks](team_logging)
|
||||
<!--
|
||||
|
||||
**Example:**
|
||||
|
||||
This config would send langfuse logs to 2 different langfuse projects, based on the team id
|
||||
|
@ -228,7 +245,7 @@ curl -X POST 'http://0.0.0.0:4000/key/generate' \
|
|||
-d '{"team_id": "ishaans-secret-project"}'
|
||||
```
|
||||
|
||||
All requests made with these keys will log data to their team-specific logging.
|
||||
All requests made with these keys will log data to their team-specific logging. -->
|
||||
|
||||
### Redacting Messages, Response Content from Langfuse Logging
|
||||
|
||||
|
@ -1106,6 +1123,52 @@ environment_variables:
|
|||
```
|
||||
|
||||
|
||||
2. Start Proxy
|
||||
|
||||
```
|
||||
litellm --config /path/to/config.yaml
|
||||
```
|
||||
|
||||
3. Test it!
|
||||
|
||||
```bash
|
||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data ' {
|
||||
"model": "fake-openai-endpoint",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello, Claude gm!"
|
||||
}
|
||||
],
|
||||
}
|
||||
'
|
||||
```
|
||||
Expect to see your log on Langfuse
|
||||
<Image img={require('../../img/langsmith_new.png')} />
|
||||
|
||||
|
||||
## Logging LLM IO to Arize AI
|
||||
|
||||
1. Set `success_callback: ["arize"]` on litellm config.yaml
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-4
|
||||
litellm_params:
|
||||
model: openai/fake
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
|
||||
litellm_settings:
|
||||
callbacks: ["arize"]
|
||||
|
||||
environment_variables:
|
||||
ARIZE_SPACE_KEY: "d0*****"
|
||||
ARIZE_API_KEY: "141a****"
|
||||
```
|
||||
|
||||
2. Start Proxy
|
||||
|
||||
```
|
||||
|
|
|
@ -71,7 +71,13 @@ curl --location 'http://0.0.0.0:4000/v1/chat/completions' \
|
|||
}'
|
||||
```
|
||||
|
||||
## Team Based Logging
|
||||
|
||||
[👉 Tutorial - Allow each team to use their own Langfuse Project / custom callbacks](team_logging.md)
|
||||
|
||||
|
||||
|
||||
<!--
|
||||
## Logging / Caching
|
||||
|
||||
Turn on/off logging and caching for a specific team id.
|
||||
|
@ -102,4 +108,4 @@ curl -X POST 'http://0.0.0.0:4000/key/generate' \
|
|||
-d '{"team_id": "ishaans-secret-project"}'
|
||||
```
|
||||
|
||||
All requests made with these keys will log data to their team-specific logging.
|
||||
All requests made with these keys will log data to their team-specific logging. -->
|
||||
|
|
84
docs/my-website/docs/proxy/team_logging.md
Normal file
84
docs/my-website/docs/proxy/team_logging.md
Normal file
|
@ -0,0 +1,84 @@
|
|||
import Image from '@theme/IdealImage';
|
||||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
# 👥📊 Team Based Logging
|
||||
|
||||
Allow each team to use their own Langfuse Project / custom callbacks
|
||||
|
||||
**This allows you to do the following**
|
||||
```
|
||||
Team 1 -> Logs to Langfuse Project 1
|
||||
Team 2 -> Logs to Langfuse Project 2
|
||||
Team 3 -> Logs to Langsmith
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
|
||||
## 1. Set callback for team
|
||||
|
||||
```shell
|
||||
curl -X POST 'http:/localhost:4000/team/dbe2f686-a686-4896-864a-4c3924458709/callback' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-H 'Authorization: Bearer sk-1234' \
|
||||
-d '{
|
||||
"callback_name": "langfuse",
|
||||
"callback_type": "success",
|
||||
"callback_vars": {
|
||||
"langfuse_public_key": "pk",
|
||||
"langfuse_secret_key": "sk_",
|
||||
"langfuse_host": "https://cloud.langfuse.com"
|
||||
}
|
||||
|
||||
}'
|
||||
```
|
||||
|
||||
#### Supported Values
|
||||
|
||||
| Field | Supported Values | Notes |
|
||||
|-------|------------------|-------|
|
||||
| `callback_name` | `"langfuse"` | Currently only supports "langfuse" |
|
||||
| `callback_type` | `"success"`, `"failure"`, `"success_and_failure"` | |
|
||||
| `callback_vars` | | dict of callback settings |
|
||||
| `langfuse_public_key` | string | Required |
|
||||
| `langfuse_secret_key` | string | Required |
|
||||
| `langfuse_host` | string | Optional (defaults to https://cloud.langfuse.com) |
|
||||
|
||||
## 2. Create key for team
|
||||
|
||||
All keys created for team `dbe2f686-a686-4896-864a-4c3924458709` will log to langfuse project specified on [Step 1. Set callback for team](#1-set-callback-for-team)
|
||||
|
||||
|
||||
```shell
|
||||
curl --location 'http://0.0.0.0:4000/key/generate' \
|
||||
--header 'Authorization: Bearer sk-1234' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data '{
|
||||
"team_id": "dbe2f686-a686-4896-864a-4c3924458709"
|
||||
}'
|
||||
```
|
||||
|
||||
|
||||
## 3. Make `/chat/completion` request for team
|
||||
|
||||
```shell
|
||||
curl -i http://localhost:4000/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer sk-KbUuE0WNptC0jXapyMmLBA" \
|
||||
-d '{
|
||||
"model": "gpt-4",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Hello, Claude gm!"}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
Expect this to be logged on the langfuse project specified on [Step 1. Set callback for team](#1-set-callback-for-team)
|
||||
|
||||
## Team Logging Endpoints
|
||||
|
||||
- [`POST /team/{team_id}/callback` Add a success/failure callback to a team](https://litellm-api.up.railway.app/#/team%20management/add_team_callbacks_team__team_id__callback_post)
|
||||
- [`GET /team/{team_id}/callback` - Get the success/failure callbacks and variables for a team](https://litellm-api.up.railway.app/#/team%20management/get_team_callbacks_team__team_id__callback_get)
|
||||
|
||||
|
||||
|
|
@ -44,19 +44,20 @@ const sidebars = {
|
|||
"proxy/cost_tracking",
|
||||
"proxy/self_serve",
|
||||
"proxy/virtual_keys",
|
||||
"proxy/tag_routing",
|
||||
"proxy/users",
|
||||
"proxy/team_budgets",
|
||||
"proxy/customers",
|
||||
"proxy/billing",
|
||||
"proxy/guardrails",
|
||||
"proxy/token_auth",
|
||||
"proxy/alerting",
|
||||
{
|
||||
type: "category",
|
||||
label: "🪢 Logging",
|
||||
items: ["proxy/logging", "proxy/streaming_logging"],
|
||||
},
|
||||
"proxy/team_logging",
|
||||
"proxy/guardrails",
|
||||
"proxy/tag_routing",
|
||||
"proxy/users",
|
||||
"proxy/team_budgets",
|
||||
"proxy/customers",
|
||||
"proxy/billing",
|
||||
"proxy/token_auth",
|
||||
"proxy/alerting",
|
||||
"proxy/ui",
|
||||
"proxy/prometheus",
|
||||
"proxy/pass_through",
|
||||
|
@ -192,6 +193,8 @@ const sidebars = {
|
|||
items: [
|
||||
"observability/langfuse_integration",
|
||||
"observability/logfire_integration",
|
||||
"observability/langsmith_integration",
|
||||
"observability/arize_integration",
|
||||
"debugging/local_debugging",
|
||||
"observability/raw_request_response",
|
||||
"observability/custom_callback",
|
||||
|
@ -203,7 +206,6 @@ const sidebars = {
|
|||
"observability/openmeter",
|
||||
"observability/promptlayer_integration",
|
||||
"observability/wandb_integration",
|
||||
"observability/langsmith_integration",
|
||||
"observability/slack_integration",
|
||||
"observability/traceloop_integration",
|
||||
"observability/athina_integration",
|
||||
|
|
|
@ -4,7 +4,7 @@ import warnings
|
|||
warnings.filterwarnings("ignore", message=".*conflict with protected namespace.*")
|
||||
### INIT VARIABLES ###
|
||||
import threading, requests, os
|
||||
from typing import Callable, List, Optional, Dict, Union, Any, Literal
|
||||
from typing import Callable, List, Optional, Dict, Union, Any, Literal, get_args
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||
from litellm.caching import Cache
|
||||
from litellm._logging import (
|
||||
|
@ -45,7 +45,11 @@ _custom_logger_compatible_callbacks_literal = Literal[
|
|||
"langsmith",
|
||||
"galileo",
|
||||
"braintrust",
|
||||
"arize",
|
||||
]
|
||||
_known_custom_logger_compatible_callbacks: List = list(
|
||||
get_args(_custom_logger_compatible_callbacks_literal)
|
||||
)
|
||||
callbacks: List[Union[Callable, _custom_logger_compatible_callbacks_literal]] = []
|
||||
_langfuse_default_tags: Optional[
|
||||
List[
|
||||
|
@ -73,6 +77,7 @@ post_call_rules: List[Callable] = []
|
|||
turn_off_message_logging: Optional[bool] = False
|
||||
log_raw_request_response: bool = False
|
||||
redact_messages_in_exceptions: Optional[bool] = False
|
||||
redact_user_api_key_info: Optional[bool] = False
|
||||
store_audit_logs = False # Enterprise feature, allow users to see audit logs
|
||||
## end of callbacks #############
|
||||
|
||||
|
|
286
litellm/integrations/_types/open_inference.py
Normal file
286
litellm/integrations/_types/open_inference.py
Normal file
|
@ -0,0 +1,286 @@
|
|||
from enum import Enum
|
||||
|
||||
|
||||
class SpanAttributes:
|
||||
OUTPUT_VALUE = "output.value"
|
||||
OUTPUT_MIME_TYPE = "output.mime_type"
|
||||
"""
|
||||
The type of output.value. If unspecified, the type is plain text by default.
|
||||
If type is JSON, the value is a string representing a JSON object.
|
||||
"""
|
||||
INPUT_VALUE = "input.value"
|
||||
INPUT_MIME_TYPE = "input.mime_type"
|
||||
"""
|
||||
The type of input.value. If unspecified, the type is plain text by default.
|
||||
If type is JSON, the value is a string representing a JSON object.
|
||||
"""
|
||||
|
||||
EMBEDDING_EMBEDDINGS = "embedding.embeddings"
|
||||
"""
|
||||
A list of objects containing embedding data, including the vector and represented piece of text.
|
||||
"""
|
||||
EMBEDDING_MODEL_NAME = "embedding.model_name"
|
||||
"""
|
||||
The name of the embedding model.
|
||||
"""
|
||||
|
||||
LLM_FUNCTION_CALL = "llm.function_call"
|
||||
"""
|
||||
For models and APIs that support function calling. Records attributes such as the function
|
||||
name and arguments to the called function.
|
||||
"""
|
||||
LLM_INVOCATION_PARAMETERS = "llm.invocation_parameters"
|
||||
"""
|
||||
Invocation parameters passed to the LLM or API, such as the model name, temperature, etc.
|
||||
"""
|
||||
LLM_INPUT_MESSAGES = "llm.input_messages"
|
||||
"""
|
||||
Messages provided to a chat API.
|
||||
"""
|
||||
LLM_OUTPUT_MESSAGES = "llm.output_messages"
|
||||
"""
|
||||
Messages received from a chat API.
|
||||
"""
|
||||
LLM_MODEL_NAME = "llm.model_name"
|
||||
"""
|
||||
The name of the model being used.
|
||||
"""
|
||||
LLM_PROMPTS = "llm.prompts"
|
||||
"""
|
||||
Prompts provided to a completions API.
|
||||
"""
|
||||
LLM_PROMPT_TEMPLATE = "llm.prompt_template.template"
|
||||
"""
|
||||
The prompt template as a Python f-string.
|
||||
"""
|
||||
LLM_PROMPT_TEMPLATE_VARIABLES = "llm.prompt_template.variables"
|
||||
"""
|
||||
A list of input variables to the prompt template.
|
||||
"""
|
||||
LLM_PROMPT_TEMPLATE_VERSION = "llm.prompt_template.version"
|
||||
"""
|
||||
The version of the prompt template being used.
|
||||
"""
|
||||
LLM_TOKEN_COUNT_PROMPT = "llm.token_count.prompt"
|
||||
"""
|
||||
Number of tokens in the prompt.
|
||||
"""
|
||||
LLM_TOKEN_COUNT_COMPLETION = "llm.token_count.completion"
|
||||
"""
|
||||
Number of tokens in the completion.
|
||||
"""
|
||||
LLM_TOKEN_COUNT_TOTAL = "llm.token_count.total"
|
||||
"""
|
||||
Total number of tokens, including both prompt and completion.
|
||||
"""
|
||||
|
||||
TOOL_NAME = "tool.name"
|
||||
"""
|
||||
Name of the tool being used.
|
||||
"""
|
||||
TOOL_DESCRIPTION = "tool.description"
|
||||
"""
|
||||
Description of the tool's purpose, typically used to select the tool.
|
||||
"""
|
||||
TOOL_PARAMETERS = "tool.parameters"
|
||||
"""
|
||||
Parameters of the tool represented a dictionary JSON string, e.g.
|
||||
see https://platform.openai.com/docs/guides/gpt/function-calling
|
||||
"""
|
||||
|
||||
RETRIEVAL_DOCUMENTS = "retrieval.documents"
|
||||
|
||||
METADATA = "metadata"
|
||||
"""
|
||||
Metadata attributes are used to store user-defined key-value pairs.
|
||||
For example, LangChain uses metadata to store user-defined attributes for a chain.
|
||||
"""
|
||||
|
||||
TAG_TAGS = "tag.tags"
|
||||
"""
|
||||
Custom categorical tags for the span.
|
||||
"""
|
||||
|
||||
OPENINFERENCE_SPAN_KIND = "openinference.span.kind"
|
||||
|
||||
SESSION_ID = "session.id"
|
||||
"""
|
||||
The id of the session
|
||||
"""
|
||||
USER_ID = "user.id"
|
||||
"""
|
||||
The id of the user
|
||||
"""
|
||||
|
||||
|
||||
class MessageAttributes:
|
||||
"""
|
||||
Attributes for a message sent to or from an LLM
|
||||
"""
|
||||
|
||||
MESSAGE_ROLE = "message.role"
|
||||
"""
|
||||
The role of the message, such as "user", "agent", "function".
|
||||
"""
|
||||
MESSAGE_CONTENT = "message.content"
|
||||
"""
|
||||
The content of the message to or from the llm, must be a string.
|
||||
"""
|
||||
MESSAGE_CONTENTS = "message.contents"
|
||||
"""
|
||||
The message contents to the llm, it is an array of
|
||||
`message_content` prefixed attributes.
|
||||
"""
|
||||
MESSAGE_NAME = "message.name"
|
||||
"""
|
||||
The name of the message, often used to identify the function
|
||||
that was used to generate the message.
|
||||
"""
|
||||
MESSAGE_TOOL_CALLS = "message.tool_calls"
|
||||
"""
|
||||
The tool calls generated by the model, such as function calls.
|
||||
"""
|
||||
MESSAGE_FUNCTION_CALL_NAME = "message.function_call_name"
|
||||
"""
|
||||
The function name that is a part of the message list.
|
||||
This is populated for role 'function' or 'agent' as a mechanism to identify
|
||||
the function that was called during the execution of a tool.
|
||||
"""
|
||||
MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON = "message.function_call_arguments_json"
|
||||
"""
|
||||
The JSON string representing the arguments passed to the function
|
||||
during a function call.
|
||||
"""
|
||||
|
||||
|
||||
class MessageContentAttributes:
|
||||
"""
|
||||
Attributes for the contents of user messages sent to an LLM.
|
||||
"""
|
||||
|
||||
MESSAGE_CONTENT_TYPE = "message_content.type"
|
||||
"""
|
||||
The type of the content, such as "text" or "image".
|
||||
"""
|
||||
MESSAGE_CONTENT_TEXT = "message_content.text"
|
||||
"""
|
||||
The text content of the message, if the type is "text".
|
||||
"""
|
||||
MESSAGE_CONTENT_IMAGE = "message_content.image"
|
||||
"""
|
||||
The image content of the message, if the type is "image".
|
||||
An image can be made available to the model by passing a link to
|
||||
the image or by passing the base64 encoded image directly in the
|
||||
request.
|
||||
"""
|
||||
|
||||
|
||||
class ImageAttributes:
|
||||
"""
|
||||
Attributes for images
|
||||
"""
|
||||
|
||||
IMAGE_URL = "image.url"
|
||||
"""
|
||||
An http or base64 image url
|
||||
"""
|
||||
|
||||
|
||||
class DocumentAttributes:
|
||||
"""
|
||||
Attributes for a document.
|
||||
"""
|
||||
|
||||
DOCUMENT_ID = "document.id"
|
||||
"""
|
||||
The id of the document.
|
||||
"""
|
||||
DOCUMENT_SCORE = "document.score"
|
||||
"""
|
||||
The score of the document
|
||||
"""
|
||||
DOCUMENT_CONTENT = "document.content"
|
||||
"""
|
||||
The content of the document.
|
||||
"""
|
||||
DOCUMENT_METADATA = "document.metadata"
|
||||
"""
|
||||
The metadata of the document represented as a dictionary
|
||||
JSON string, e.g. `"{ 'title': 'foo' }"`
|
||||
"""
|
||||
|
||||
|
||||
class RerankerAttributes:
|
||||
"""
|
||||
Attributes for a reranker
|
||||
"""
|
||||
|
||||
RERANKER_INPUT_DOCUMENTS = "reranker.input_documents"
|
||||
"""
|
||||
List of documents as input to the reranker
|
||||
"""
|
||||
RERANKER_OUTPUT_DOCUMENTS = "reranker.output_documents"
|
||||
"""
|
||||
List of documents as output from the reranker
|
||||
"""
|
||||
RERANKER_QUERY = "reranker.query"
|
||||
"""
|
||||
Query string for the reranker
|
||||
"""
|
||||
RERANKER_MODEL_NAME = "reranker.model_name"
|
||||
"""
|
||||
Model name of the reranker
|
||||
"""
|
||||
RERANKER_TOP_K = "reranker.top_k"
|
||||
"""
|
||||
Top K parameter of the reranker
|
||||
"""
|
||||
|
||||
|
||||
class EmbeddingAttributes:
|
||||
"""
|
||||
Attributes for an embedding
|
||||
"""
|
||||
|
||||
EMBEDDING_TEXT = "embedding.text"
|
||||
"""
|
||||
The text represented by the embedding.
|
||||
"""
|
||||
EMBEDDING_VECTOR = "embedding.vector"
|
||||
"""
|
||||
The embedding vector.
|
||||
"""
|
||||
|
||||
|
||||
class ToolCallAttributes:
|
||||
"""
|
||||
Attributes for a tool call
|
||||
"""
|
||||
|
||||
TOOL_CALL_FUNCTION_NAME = "tool_call.function.name"
|
||||
"""
|
||||
The name of function that is being called during a tool call.
|
||||
"""
|
||||
TOOL_CALL_FUNCTION_ARGUMENTS_JSON = "tool_call.function.arguments"
|
||||
"""
|
||||
The JSON string representing the arguments passed to the function
|
||||
during a tool call.
|
||||
"""
|
||||
|
||||
|
||||
class OpenInferenceSpanKindValues(Enum):
|
||||
TOOL = "TOOL"
|
||||
CHAIN = "CHAIN"
|
||||
LLM = "LLM"
|
||||
RETRIEVER = "RETRIEVER"
|
||||
EMBEDDING = "EMBEDDING"
|
||||
AGENT = "AGENT"
|
||||
RERANKER = "RERANKER"
|
||||
UNKNOWN = "UNKNOWN"
|
||||
GUARDRAIL = "GUARDRAIL"
|
||||
EVALUATOR = "EVALUATOR"
|
||||
|
||||
|
||||
class OpenInferenceMimeTypeValues(Enum):
|
||||
TEXT = "text/plain"
|
||||
JSON = "application/json"
|
114
litellm/integrations/arize_ai.py
Normal file
114
litellm/integrations/arize_ai.py
Normal file
|
@ -0,0 +1,114 @@
|
|||
"""
|
||||
arize AI is OTEL compatible
|
||||
|
||||
this file has Arize ai specific helper functions
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Optional, Union
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from opentelemetry.trace import Span as _Span
|
||||
|
||||
Span = _Span
|
||||
else:
|
||||
Span = Any
|
||||
|
||||
|
||||
def set_arize_ai_attributes(span: Span, kwargs, response_obj):
|
||||
from litellm.integrations._types.open_inference import (
|
||||
MessageAttributes,
|
||||
MessageContentAttributes,
|
||||
OpenInferenceSpanKindValues,
|
||||
SpanAttributes,
|
||||
)
|
||||
|
||||
optional_params = kwargs.get("optional_params", {})
|
||||
litellm_params = kwargs.get("litellm_params", {}) or {}
|
||||
|
||||
#############################################
|
||||
############ LLM CALL METADATA ##############
|
||||
#############################################
|
||||
# commented out for now - looks like Arize AI could not log this
|
||||
# metadata = litellm_params.get("metadata", {}) or {}
|
||||
# span.set_attribute(SpanAttributes.METADATA, str(metadata))
|
||||
|
||||
#############################################
|
||||
########## LLM Request Attributes ###########
|
||||
#############################################
|
||||
|
||||
# The name of the LLM a request is being made to
|
||||
if kwargs.get("model"):
|
||||
span.set_attribute(SpanAttributes.LLM_MODEL_NAME, kwargs.get("model"))
|
||||
|
||||
span.set_attribute(
|
||||
SpanAttributes.OPENINFERENCE_SPAN_KIND, OpenInferenceSpanKindValues.LLM.value
|
||||
)
|
||||
messages = kwargs.get("messages")
|
||||
|
||||
# for /chat/completions
|
||||
# https://docs.arize.com/arize/large-language-models/tracing/semantic-conventions
|
||||
if messages:
|
||||
span.set_attribute(
|
||||
SpanAttributes.INPUT_VALUE,
|
||||
messages[-1].get("content", ""), # get the last message for input
|
||||
)
|
||||
|
||||
# LLM_INPUT_MESSAGES shows up under `input_messages` tab on the span page
|
||||
for idx, msg in enumerate(messages):
|
||||
# Set the role per message
|
||||
span.set_attribute(
|
||||
f"{SpanAttributes.LLM_INPUT_MESSAGES}.{idx}.{MessageAttributes.MESSAGE_ROLE}",
|
||||
msg["role"],
|
||||
)
|
||||
# Set the content per message
|
||||
span.set_attribute(
|
||||
f"{SpanAttributes.LLM_INPUT_MESSAGES}.{idx}.{MessageAttributes.MESSAGE_CONTENT}",
|
||||
msg.get("content", ""),
|
||||
)
|
||||
|
||||
# The Generative AI Provider: Azure, OpenAI, etc.
|
||||
span.set_attribute(SpanAttributes.LLM_INVOCATION_PARAMETERS, str(optional_params))
|
||||
|
||||
if optional_params.get("user"):
|
||||
span.set_attribute(SpanAttributes.USER_ID, optional_params.get("user"))
|
||||
|
||||
#############################################
|
||||
########## LLM Response Attributes ##########
|
||||
# https://docs.arize.com/arize/large-language-models/tracing/semantic-conventions
|
||||
#############################################
|
||||
for choice in response_obj.get("choices"):
|
||||
response_message = choice.get("message", {})
|
||||
span.set_attribute(
|
||||
SpanAttributes.OUTPUT_VALUE, response_message.get("content", "")
|
||||
)
|
||||
|
||||
# This shows up under `output_messages` tab on the span page
|
||||
# This code assumes a single response
|
||||
span.set_attribute(
|
||||
f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.0.{MessageAttributes.MESSAGE_ROLE}",
|
||||
response_message["role"],
|
||||
)
|
||||
span.set_attribute(
|
||||
f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.0.{MessageAttributes.MESSAGE_CONTENT}",
|
||||
response_message.get("content", ""),
|
||||
)
|
||||
|
||||
usage = response_obj.get("usage")
|
||||
if usage:
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_TOKEN_COUNT_TOTAL,
|
||||
usage.get("total_tokens"),
|
||||
)
|
||||
|
||||
# The number of tokens used in the LLM response (completion).
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_TOKEN_COUNT_COMPLETION,
|
||||
usage.get("completion_tokens"),
|
||||
)
|
||||
|
||||
# The number of tokens used in the LLM prompt.
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_TOKEN_COUNT_PROMPT,
|
||||
usage.get("prompt_tokens"),
|
||||
)
|
||||
pass
|
|
@ -8,6 +8,7 @@ from packaging.version import Version
|
|||
|
||||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.litellm_core_utils.redact_messages import redact_user_api_key_info
|
||||
|
||||
|
||||
class LangFuseLogger:
|
||||
|
@ -382,6 +383,8 @@ class LangFuseLogger:
|
|||
mask_input = clean_metadata.pop("mask_input", False)
|
||||
mask_output = clean_metadata.pop("mask_output", False)
|
||||
|
||||
clean_metadata = redact_user_api_key_info(metadata=clean_metadata)
|
||||
|
||||
if trace_name is None and existing_trace_id is None:
|
||||
# just log `litellm-{call_type}` as the trace name
|
||||
## DO NOT SET TRACE_NAME if trace-id set. this can lead to overwriting of past traces.
|
||||
|
|
|
@ -1,17 +1,21 @@
|
|||
#### What this does ####
|
||||
# On success + failure, log events to Logfire
|
||||
|
||||
import dotenv, os
|
||||
import os
|
||||
|
||||
import dotenv
|
||||
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
import traceback
|
||||
import uuid
|
||||
from litellm._logging import print_verbose, verbose_logger
|
||||
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, NamedTuple
|
||||
|
||||
from typing_extensions import LiteralString
|
||||
|
||||
from litellm._logging import print_verbose, verbose_logger
|
||||
from litellm.litellm_core_utils.redact_messages import redact_user_api_key_info
|
||||
|
||||
|
||||
class SpanConfig(NamedTuple):
|
||||
message_template: LiteralString
|
||||
|
@ -135,6 +139,8 @@ class LogfireLogger:
|
|||
else:
|
||||
clean_metadata[key] = value
|
||||
|
||||
clean_metadata = redact_user_api_key_info(metadata=clean_metadata)
|
||||
|
||||
# Build the initial payload
|
||||
payload = {
|
||||
"id": id,
|
||||
|
|
|
@ -2,11 +2,12 @@ import os
|
|||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from functools import wraps
|
||||
from typing import TYPE_CHECKING, Any, Optional, Union
|
||||
from typing import TYPE_CHECKING, Any, Dict, Optional, Union
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.litellm_core_utils.redact_messages import redact_user_api_key_info
|
||||
from litellm.types.services import ServiceLoggerPayload
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
@ -27,9 +28,10 @@ else:
|
|||
|
||||
|
||||
LITELLM_TRACER_NAME = os.getenv("OTEL_TRACER_NAME", "litellm")
|
||||
LITELLM_RESOURCE = {
|
||||
LITELLM_RESOURCE: Dict[Any, Any] = {
|
||||
"service.name": os.getenv("OTEL_SERVICE_NAME", "litellm"),
|
||||
"deployment.environment": os.getenv("OTEL_ENVIRONMENT_NAME", "production"),
|
||||
"model_id": os.getenv("OTEL_SERVICE_NAME", "litellm"),
|
||||
}
|
||||
RAW_REQUEST_SPAN_NAME = "raw_gen_ai_request"
|
||||
LITELLM_REQUEST_SPAN_NAME = "litellm_request"
|
||||
|
@ -68,7 +70,9 @@ class OpenTelemetryConfig:
|
|||
|
||||
|
||||
class OpenTelemetry(CustomLogger):
|
||||
def __init__(self, config=OpenTelemetryConfig.from_env()):
|
||||
def __init__(
|
||||
self, config=OpenTelemetryConfig.from_env(), callback_name: Optional[str] = None
|
||||
):
|
||||
from opentelemetry import trace
|
||||
from opentelemetry.sdk.resources import Resource
|
||||
from opentelemetry.sdk.trace import TracerProvider
|
||||
|
@ -79,6 +83,7 @@ class OpenTelemetry(CustomLogger):
|
|||
self.OTEL_HEADERS = self.config.headers
|
||||
provider = TracerProvider(resource=Resource(attributes=LITELLM_RESOURCE))
|
||||
provider.add_span_processor(self._get_span_processor())
|
||||
self.callback_name = callback_name
|
||||
|
||||
trace.set_tracer_provider(provider)
|
||||
self.tracer = trace.get_tracer(LITELLM_TRACER_NAME)
|
||||
|
@ -120,8 +125,8 @@ class OpenTelemetry(CustomLogger):
|
|||
from opentelemetry import trace
|
||||
from opentelemetry.trace import Status, StatusCode
|
||||
|
||||
_start_time_ns = start_time
|
||||
_end_time_ns = end_time
|
||||
_start_time_ns = 0
|
||||
_end_time_ns = 0
|
||||
|
||||
if isinstance(start_time, float):
|
||||
_start_time_ns = int(int(start_time) * 1e9)
|
||||
|
@ -159,8 +164,8 @@ class OpenTelemetry(CustomLogger):
|
|||
from opentelemetry import trace
|
||||
from opentelemetry.trace import Status, StatusCode
|
||||
|
||||
_start_time_ns = start_time
|
||||
_end_time_ns = end_time
|
||||
_start_time_ns = 0
|
||||
_end_time_ns = 0
|
||||
|
||||
if isinstance(start_time, float):
|
||||
_start_time_ns = int(int(start_time) * 1e9)
|
||||
|
@ -294,6 +299,11 @@ class OpenTelemetry(CustomLogger):
|
|||
return isinstance(value, (str, bool, int, float))
|
||||
|
||||
def set_attributes(self, span: Span, kwargs, response_obj):
|
||||
if self.callback_name == "arize":
|
||||
from litellm.integrations.arize_ai import set_arize_ai_attributes
|
||||
|
||||
set_arize_ai_attributes(span, kwargs, response_obj)
|
||||
return
|
||||
from litellm.proxy._types import SpanAttributes
|
||||
|
||||
optional_params = kwargs.get("optional_params", {})
|
||||
|
@ -306,7 +316,9 @@ class OpenTelemetry(CustomLogger):
|
|||
#############################################
|
||||
metadata = litellm_params.get("metadata", {}) or {}
|
||||
|
||||
for key, value in metadata.items():
|
||||
clean_metadata = redact_user_api_key_info(metadata=metadata)
|
||||
|
||||
for key, value in clean_metadata.items():
|
||||
if self.is_primitive(value):
|
||||
span.set_attribute("metadata.{}".format(key), value)
|
||||
|
||||
|
@ -612,8 +624,8 @@ class OpenTelemetry(CustomLogger):
|
|||
from opentelemetry import trace
|
||||
from opentelemetry.trace import Status, StatusCode
|
||||
|
||||
_start_time_ns = logging_payload.start_time
|
||||
_end_time_ns = logging_payload.end_time
|
||||
_start_time_ns = 0
|
||||
_end_time_ns = 0
|
||||
|
||||
start_time = logging_payload.start_time
|
||||
end_time = logging_payload.end_time
|
||||
|
@ -658,8 +670,8 @@ class OpenTelemetry(CustomLogger):
|
|||
from opentelemetry import trace
|
||||
from opentelemetry.trace import Status, StatusCode
|
||||
|
||||
_start_time_ns = logging_payload.start_time
|
||||
_end_time_ns = logging_payload.end_time
|
||||
_start_time_ns = 0
|
||||
_end_time_ns = 0
|
||||
|
||||
start_time = logging_payload.start_time
|
||||
end_time = logging_payload.end_time
|
||||
|
|
|
@ -1962,6 +1962,43 @@ def _init_custom_logger_compatible_class(
|
|||
_langsmith_logger = LangsmithLogger()
|
||||
_in_memory_loggers.append(_langsmith_logger)
|
||||
return _langsmith_logger # type: ignore
|
||||
elif logging_integration == "arize":
|
||||
if "ARIZE_SPACE_KEY" not in os.environ:
|
||||
raise ValueError("ARIZE_SPACE_KEY not found in environment variables")
|
||||
if "ARIZE_API_KEY" not in os.environ:
|
||||
raise ValueError("ARIZE_API_KEY not found in environment variables")
|
||||
from litellm.integrations.opentelemetry import (
|
||||
OpenTelemetry,
|
||||
OpenTelemetryConfig,
|
||||
)
|
||||
|
||||
otel_config = OpenTelemetryConfig(
|
||||
exporter="otlp_grpc",
|
||||
endpoint="https://otlp.arize.com/v1",
|
||||
)
|
||||
os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = (
|
||||
f"space_key={os.getenv('ARIZE_SPACE_KEY')},api_key={os.getenv('ARIZE_API_KEY')}"
|
||||
)
|
||||
for callback in _in_memory_loggers:
|
||||
if (
|
||||
isinstance(callback, OpenTelemetry)
|
||||
and callback.callback_name == "arize"
|
||||
):
|
||||
return callback # type: ignore
|
||||
_otel_logger = OpenTelemetry(config=otel_config, callback_name="arize")
|
||||
_in_memory_loggers.append(_otel_logger)
|
||||
return _otel_logger # type: ignore
|
||||
|
||||
elif logging_integration == "otel":
|
||||
from litellm.integrations.opentelemetry import OpenTelemetry
|
||||
|
||||
for callback in _in_memory_loggers:
|
||||
if isinstance(callback, OpenTelemetry):
|
||||
return callback # type: ignore
|
||||
|
||||
otel_logger = OpenTelemetry()
|
||||
_in_memory_loggers.append(otel_logger)
|
||||
return otel_logger # type: ignore
|
||||
|
||||
elif logging_integration == "galileo":
|
||||
for callback in _in_memory_loggers:
|
||||
|
@ -2039,6 +2076,25 @@ def get_custom_logger_compatible_class(
|
|||
for callback in _in_memory_loggers:
|
||||
if isinstance(callback, LangsmithLogger):
|
||||
return callback
|
||||
elif logging_integration == "otel":
|
||||
from litellm.integrations.opentelemetry import OpenTelemetry
|
||||
|
||||
for callback in _in_memory_loggers:
|
||||
if isinstance(callback, OpenTelemetry):
|
||||
return callback
|
||||
elif logging_integration == "arize":
|
||||
from litellm.integrations.opentelemetry import OpenTelemetry
|
||||
|
||||
if "ARIZE_SPACE_KEY" not in os.environ:
|
||||
raise ValueError("ARIZE_SPACE_KEY not found in environment variables")
|
||||
if "ARIZE_API_KEY" not in os.environ:
|
||||
raise ValueError("ARIZE_API_KEY not found in environment variables")
|
||||
for callback in _in_memory_loggers:
|
||||
if (
|
||||
isinstance(callback, OpenTelemetry)
|
||||
and callback.callback_name == "arize"
|
||||
):
|
||||
return callback
|
||||
elif logging_integration == "logfire":
|
||||
if "LOGFIRE_TOKEN" not in os.environ:
|
||||
raise ValueError("LOGFIRE_TOKEN not found in environment variables")
|
||||
|
|
|
@ -87,3 +87,33 @@ def redact_message_input_output_from_logging(
|
|||
|
||||
# by default return result
|
||||
return result
|
||||
|
||||
|
||||
def redact_user_api_key_info(metadata: dict) -> dict:
|
||||
"""
|
||||
removes any user_api_key_info before passing to logging object, if flag set
|
||||
|
||||
Usage:
|
||||
|
||||
SDK
|
||||
```python
|
||||
litellm.redact_user_api_key_info = True
|
||||
```
|
||||
|
||||
PROXY:
|
||||
```yaml
|
||||
litellm_settings:
|
||||
redact_user_api_key_info: true
|
||||
```
|
||||
"""
|
||||
if litellm.redact_user_api_key_info is not True:
|
||||
return metadata
|
||||
|
||||
new_metadata = {}
|
||||
for k, v in metadata.items():
|
||||
if isinstance(k, str) and k.startswith("user_api_key"):
|
||||
pass
|
||||
else:
|
||||
new_metadata[k] = v
|
||||
|
||||
return new_metadata
|
||||
|
|
|
@ -968,7 +968,7 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
except openai.UnprocessableEntityError as e:
|
||||
## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
|
||||
if litellm.drop_params is True or drop_params is True:
|
||||
if e.body is not None and e.body.get("detail"): # type: ignore
|
||||
if e.body is not None and isinstance(e.body, dict) and e.body.get("detail"): # type: ignore
|
||||
detail = e.body.get("detail") # type: ignore
|
||||
invalid_params: List[str] = []
|
||||
if (
|
||||
|
@ -1100,7 +1100,7 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
except openai.UnprocessableEntityError as e:
|
||||
## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
|
||||
if litellm.drop_params is True or drop_params is True:
|
||||
if e.body is not None and e.body.get("detail"): # type: ignore
|
||||
if e.body is not None and isinstance(e.body, dict) and e.body.get("detail"): # type: ignore
|
||||
detail = e.body.get("detail") # type: ignore
|
||||
invalid_params: List[str] = []
|
||||
if (
|
||||
|
@ -1231,7 +1231,7 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
except openai.UnprocessableEntityError as e:
|
||||
## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
|
||||
if litellm.drop_params is True or drop_params is True:
|
||||
if e.body is not None and e.body.get("detail"): # type: ignore
|
||||
if e.body is not None and isinstance(e.body, dict) and e.body.get("detail"): # type: ignore
|
||||
detail = e.body.get("detail") # type: ignore
|
||||
invalid_params: List[str] = []
|
||||
if (
|
||||
|
|
|
@ -1491,6 +1491,10 @@ def completion(
|
|||
or get_secret("ANTHROPIC_BASE_URL")
|
||||
or "https://api.anthropic.com/v1/complete"
|
||||
)
|
||||
|
||||
if api_base is not None and not api_base.endswith("/v1/complete"):
|
||||
api_base += "/v1/complete"
|
||||
|
||||
response = anthropic_text_completions.completion(
|
||||
model=model,
|
||||
messages=messages,
|
||||
|
@ -1517,6 +1521,10 @@ def completion(
|
|||
or get_secret("ANTHROPIC_BASE_URL")
|
||||
or "https://api.anthropic.com/v1/messages"
|
||||
)
|
||||
|
||||
if api_base is not None and not api_base.endswith("/v1/messages"):
|
||||
api_base += "/v1/messages"
|
||||
|
||||
response = anthropic_chat_completions.completion(
|
||||
model=model,
|
||||
messages=messages,
|
||||
|
|
|
@ -4,6 +4,6 @@ model_list:
|
|||
model: groq/llama3-groq-70b-8192-tool-use-preview
|
||||
api_key: os.environ/GROQ_API_KEY
|
||||
|
||||
|
||||
litellm_settings:
|
||||
callbacks: ["braintrust"]
|
||||
callbacks: ["logfire"]
|
||||
redact_user_api_key_info: true
|
||||
|
|
|
@ -228,6 +228,10 @@ class LiteLLMRoutes(enum.Enum):
|
|||
"/utils/token_counter",
|
||||
]
|
||||
|
||||
anthropic_routes: List = [
|
||||
"/v1/messages",
|
||||
]
|
||||
|
||||
info_routes: List = [
|
||||
"/key/info",
|
||||
"/team/info",
|
||||
|
@ -880,6 +884,26 @@ class BlockTeamRequest(LiteLLMBase):
|
|||
team_id: str # required
|
||||
|
||||
|
||||
class AddTeamCallback(LiteLLMBase):
|
||||
callback_name: str
|
||||
callback_type: Literal["success", "failure", "success_and_failure"]
|
||||
# for now - only supported for langfuse
|
||||
callback_vars: Dict[
|
||||
Literal["langfuse_public_key", "langfuse_secret_key", "langfuse_host"], str
|
||||
]
|
||||
|
||||
|
||||
class TeamCallbackMetadata(LiteLLMBase):
|
||||
success_callback: Optional[List[str]] = []
|
||||
failure_callback: Optional[List[str]] = []
|
||||
# for now - only supported for langfuse
|
||||
callback_vars: Optional[
|
||||
Dict[
|
||||
Literal["langfuse_public_key", "langfuse_secret_key", "langfuse_host"], str
|
||||
]
|
||||
] = {}
|
||||
|
||||
|
||||
class LiteLLM_TeamTable(TeamBase):
|
||||
spend: Optional[float] = None
|
||||
max_parallel_requests: Optional[int] = None
|
||||
|
@ -1232,6 +1256,7 @@ class LiteLLM_VerificationTokenView(LiteLLM_VerificationToken):
|
|||
soft_budget: Optional[float] = None
|
||||
team_model_aliases: Optional[Dict] = None
|
||||
team_member_spend: Optional[float] = None
|
||||
team_metadata: Optional[Dict] = None
|
||||
|
||||
# End User Params
|
||||
end_user_id: Optional[str] = None
|
||||
|
@ -1677,3 +1702,5 @@ class ProxyErrorTypes(str, enum.Enum):
|
|||
budget_exceeded = "budget_exceeded"
|
||||
expired_key = "expired_key"
|
||||
auth_error = "auth_error"
|
||||
internal_server_error = "internal_server_error"
|
||||
bad_request_error = "bad_request_error"
|
||||
|
|
|
@ -24,7 +24,7 @@ from litellm.proxy._types import (
|
|||
LitellmUserRoles,
|
||||
UserAPIKeyAuth,
|
||||
)
|
||||
from litellm.proxy.auth.auth_utils import is_openai_route
|
||||
from litellm.proxy.auth.auth_utils import is_llm_api_route
|
||||
from litellm.proxy.utils import PrismaClient, ProxyLogging, log_to_opentelemetry
|
||||
from litellm.types.services import ServiceLoggerPayload, ServiceTypes
|
||||
|
||||
|
@ -57,6 +57,7 @@ def common_checks(
|
|||
4. If end_user (either via JWT or 'user' passed to /chat/completions, /embeddings endpoint) is in budget
|
||||
5. [OPTIONAL] If 'enforce_end_user' enabled - did developer pass in 'user' param for openai endpoints
|
||||
6. [OPTIONAL] If 'litellm.max_budget' is set (>0), is proxy under budget
|
||||
7. [OPTIONAL] If guardrails modified - is request allowed to change this
|
||||
"""
|
||||
_model = request_body.get("model", None)
|
||||
if team_object is not None and team_object.blocked is True:
|
||||
|
@ -106,7 +107,7 @@ def common_checks(
|
|||
general_settings.get("enforce_user_param", None) is not None
|
||||
and general_settings["enforce_user_param"] == True
|
||||
):
|
||||
if is_openai_route(route=route) and "user" not in request_body:
|
||||
if is_llm_api_route(route=route) and "user" not in request_body:
|
||||
raise Exception(
|
||||
f"'user' param not passed in. 'enforce_user_param'={general_settings['enforce_user_param']}"
|
||||
)
|
||||
|
@ -122,7 +123,7 @@ def common_checks(
|
|||
+ CommonProxyErrors.not_premium_user.value
|
||||
)
|
||||
|
||||
if is_openai_route(route=route):
|
||||
if is_llm_api_route(route=route):
|
||||
# loop through each enforced param
|
||||
# example enforced_params ['user', 'metadata', 'metadata.generation_name']
|
||||
for enforced_param in general_settings["enforced_params"]:
|
||||
|
@ -150,7 +151,7 @@ def common_checks(
|
|||
and global_proxy_spend is not None
|
||||
# only run global budget checks for OpenAI routes
|
||||
# Reason - the Admin UI should continue working if the proxy crosses it's global budget
|
||||
and is_openai_route(route=route)
|
||||
and is_llm_api_route(route=route)
|
||||
and route != "/v1/models"
|
||||
and route != "/models"
|
||||
):
|
||||
|
@ -158,6 +159,22 @@ def common_checks(
|
|||
raise litellm.BudgetExceededError(
|
||||
current_cost=global_proxy_spend, max_budget=litellm.max_budget
|
||||
)
|
||||
|
||||
_request_metadata: dict = request_body.get("metadata", {}) or {}
|
||||
if _request_metadata.get("guardrails"):
|
||||
# check if team allowed to modify guardrails
|
||||
from litellm.proxy.guardrails.guardrail_helpers import can_modify_guardrails
|
||||
|
||||
can_modify: bool = can_modify_guardrails(team_object)
|
||||
if can_modify is False:
|
||||
from fastapi import HTTPException
|
||||
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail={
|
||||
"error": "Your team does not have permission to modify guardrails."
|
||||
},
|
||||
)
|
||||
return True
|
||||
|
||||
|
||||
|
|
|
@ -46,7 +46,7 @@ def route_in_additonal_public_routes(current_route: str):
|
|||
return False
|
||||
|
||||
|
||||
def is_openai_route(route: str) -> bool:
|
||||
def is_llm_api_route(route: str) -> bool:
|
||||
"""
|
||||
Helper to checks if provided route is an OpenAI route
|
||||
|
||||
|
@ -59,6 +59,9 @@ def is_openai_route(route: str) -> bool:
|
|||
if route in LiteLLMRoutes.openai_routes.value:
|
||||
return True
|
||||
|
||||
if route in LiteLLMRoutes.anthropic_routes.value:
|
||||
return True
|
||||
|
||||
# fuzzy match routes like "/v1/threads/thread_49EIN5QF32s4mH20M7GFKdlZ"
|
||||
# Check for routes with placeholders
|
||||
for openai_route in LiteLLMRoutes.openai_routes.value:
|
||||
|
|
|
@ -57,7 +57,7 @@ from litellm.proxy.auth.auth_checks import (
|
|||
log_to_opentelemetry,
|
||||
)
|
||||
from litellm.proxy.auth.auth_utils import (
|
||||
is_openai_route,
|
||||
is_llm_api_route,
|
||||
route_in_additonal_public_routes,
|
||||
)
|
||||
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
|
||||
|
@ -924,6 +924,7 @@ async def user_api_key_auth(
|
|||
rpm_limit=valid_token.team_rpm_limit,
|
||||
blocked=valid_token.team_blocked,
|
||||
models=valid_token.team_models,
|
||||
metadata=valid_token.team_metadata,
|
||||
)
|
||||
|
||||
user_api_key_cache.set_cache(
|
||||
|
@ -994,9 +995,9 @@ async def user_api_key_auth(
|
|||
_user_role = _get_user_role(user_id_information=user_id_information)
|
||||
|
||||
if not _is_user_proxy_admin(user_id_information): # if non-admin
|
||||
if is_openai_route(route=route):
|
||||
if is_llm_api_route(route=route):
|
||||
pass
|
||||
elif is_openai_route(route=request["route"].name):
|
||||
elif is_llm_api_route(route=request["route"].name):
|
||||
pass
|
||||
elif (
|
||||
route in LiteLLMRoutes.info_routes.value
|
||||
|
@ -1049,7 +1050,7 @@ async def user_api_key_auth(
|
|||
|
||||
pass
|
||||
elif _user_role == LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY.value:
|
||||
if is_openai_route(route=route):
|
||||
if is_llm_api_route(route=route):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail=f"user not allowed to access this OpenAI routes, role= {_user_role}",
|
||||
|
|
|
@ -23,12 +23,11 @@ def initialize_callbacks_on_proxy(
|
|||
)
|
||||
if isinstance(value, list):
|
||||
imported_list: List[Any] = []
|
||||
known_compatible_callbacks = list(
|
||||
get_args(litellm._custom_logger_compatible_callbacks_literal)
|
||||
)
|
||||
for callback in value: # ["presidio", <my-custom-callback>]
|
||||
|
||||
if isinstance(callback, str) and callback in known_compatible_callbacks:
|
||||
if (
|
||||
isinstance(callback, str)
|
||||
and callback in litellm._known_custom_logger_compatible_callbacks
|
||||
):
|
||||
imported_list.append(callback)
|
||||
elif isinstance(callback, str) and callback == "otel":
|
||||
from litellm.integrations.opentelemetry import OpenTelemetry
|
||||
|
|
|
@ -1,9 +1,26 @@
|
|||
from typing import Dict
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.proxy.proxy_server import UserAPIKeyAuth
|
||||
from litellm.proxy.proxy_server import LiteLLM_TeamTable, UserAPIKeyAuth
|
||||
from litellm.types.guardrails import *
|
||||
|
||||
|
||||
def can_modify_guardrails(team_obj: Optional[LiteLLM_TeamTable]) -> bool:
|
||||
if team_obj is None:
|
||||
return True
|
||||
|
||||
team_metadata = team_obj.metadata or {}
|
||||
|
||||
if team_metadata.get("guardrails", None) is not None and isinstance(
|
||||
team_metadata.get("guardrails"), Dict
|
||||
):
|
||||
if team_metadata.get("guardrails", {}).get("modify_guardrails", None) is False:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
async def should_proceed_based_on_metadata(data: dict, guardrail_name: str) -> bool:
|
||||
"""
|
||||
checks if this guardrail should be applied to this call
|
||||
|
|
|
@ -4,7 +4,7 @@ from typing import TYPE_CHECKING, Any, Dict, Optional
|
|||
from fastapi import Request
|
||||
|
||||
from litellm._logging import verbose_logger, verbose_proxy_logger
|
||||
from litellm.proxy._types import CommonProxyErrors, UserAPIKeyAuth
|
||||
from litellm.proxy._types import CommonProxyErrors, TeamCallbackMetadata, UserAPIKeyAuth
|
||||
from litellm.types.utils import SupportedCacheControls
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
@ -207,6 +207,29 @@ async def add_litellm_data_to_request(
|
|||
**data,
|
||||
} # add the team-specific configs to the completion call
|
||||
|
||||
# Team Callbacks controls
|
||||
if user_api_key_dict.team_metadata is not None:
|
||||
team_metadata = user_api_key_dict.team_metadata
|
||||
if "callback_settings" in team_metadata:
|
||||
callback_settings = team_metadata.get("callback_settings", None) or {}
|
||||
callback_settings_obj = TeamCallbackMetadata(**callback_settings)
|
||||
"""
|
||||
callback_settings = {
|
||||
{
|
||||
'callback_vars': {'langfuse_public_key': 'pk', 'langfuse_secret_key': 'sk_'},
|
||||
'failure_callback': [],
|
||||
'success_callback': ['langfuse', 'langfuse']
|
||||
}
|
||||
}
|
||||
"""
|
||||
data["success_callback"] = callback_settings_obj.success_callback
|
||||
data["failure_callback"] = callback_settings_obj.failure_callback
|
||||
|
||||
if callback_settings_obj.callback_vars is not None:
|
||||
# unpack callback_vars in data
|
||||
for k, v in callback_settings_obj.callback_vars.items():
|
||||
data[k] = v
|
||||
|
||||
return data
|
||||
|
||||
|
||||
|
|
279
litellm/proxy/management_endpoints/team_callback_endpoints.py
Normal file
279
litellm/proxy/management_endpoints/team_callback_endpoints.py
Normal file
|
@ -0,0 +1,279 @@
|
|||
"""
|
||||
Endpoints to control callbacks per team
|
||||
|
||||
Use this when each team should control its own callbacks
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import copy
|
||||
import json
|
||||
import traceback
|
||||
import uuid
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import List, Optional
|
||||
|
||||
import fastapi
|
||||
from fastapi import APIRouter, Depends, Header, HTTPException, Request, status
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.proxy._types import (
|
||||
AddTeamCallback,
|
||||
LiteLLM_TeamTable,
|
||||
ProxyErrorTypes,
|
||||
ProxyException,
|
||||
TeamCallbackMetadata,
|
||||
UserAPIKeyAuth,
|
||||
)
|
||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||
from litellm.proxy.management_helpers.utils import (
|
||||
add_new_member,
|
||||
management_endpoint_wrapper,
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post(
|
||||
"/team/{team_id:path}/callback",
|
||||
tags=["team management"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
@management_endpoint_wrapper
|
||||
async def add_team_callbacks(
|
||||
data: AddTeamCallback,
|
||||
http_request: Request,
|
||||
team_id: str,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
litellm_changed_by: Optional[str] = Header(
|
||||
None,
|
||||
description="The litellm-changed-by header enables tracking of actions performed by authorized users on behalf of other users, providing an audit trail for accountability",
|
||||
),
|
||||
):
|
||||
"""
|
||||
Add a success/failure callback to a team
|
||||
|
||||
Use this if if you want different teams to have different success/failure callbacks
|
||||
|
||||
Example curl:
|
||||
```
|
||||
curl -X POST 'http:/localhost:4000/team/dbe2f686-a686-4896-864a-4c3924458709/callback' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-H 'Authorization: Bearer sk-1234' \
|
||||
-d '{
|
||||
"callback_name": "langfuse",
|
||||
"callback_type": "success",
|
||||
"callback_vars": {"langfuse_public_key": "pk-lf-xxxx1", "langfuse_secret_key": "sk-xxxxx"}
|
||||
|
||||
}'
|
||||
```
|
||||
|
||||
This means for the team where team_id = dbe2f686-a686-4896-864a-4c3924458709, all LLM calls will be logged to langfuse using the public key pk-lf-xxxx1 and the secret key sk-xxxxx
|
||||
|
||||
"""
|
||||
try:
|
||||
from litellm.proxy.proxy_server import (
|
||||
_duration_in_seconds,
|
||||
create_audit_log_for_update,
|
||||
litellm_proxy_admin_name,
|
||||
prisma_client,
|
||||
)
|
||||
|
||||
if prisma_client is None:
|
||||
raise HTTPException(status_code=500, detail={"error": "No db connected"})
|
||||
|
||||
# Check if team_id exists already
|
||||
_existing_team = await prisma_client.get_data(
|
||||
team_id=team_id, table_name="team", query_type="find_unique"
|
||||
)
|
||||
if _existing_team is None:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail={
|
||||
"error": f"Team id = {team_id} does not exist. Please use a different team id."
|
||||
},
|
||||
)
|
||||
|
||||
# store team callback settings in metadata
|
||||
team_metadata = _existing_team.metadata
|
||||
team_callback_settings = team_metadata.get("callback_settings", {})
|
||||
# expect callback settings to be
|
||||
team_callback_settings_obj = TeamCallbackMetadata(**team_callback_settings)
|
||||
if data.callback_type == "success":
|
||||
if team_callback_settings_obj.success_callback is None:
|
||||
team_callback_settings_obj.success_callback = []
|
||||
|
||||
if data.callback_name in team_callback_settings_obj.success_callback:
|
||||
raise ProxyException(
|
||||
message=f"callback_name = {data.callback_name} already exists in failure_callback, for team_id = {team_id}. \n Existing failure_callback = {team_callback_settings_obj.success_callback}",
|
||||
code=status.HTTP_400_BAD_REQUEST,
|
||||
type=ProxyErrorTypes.bad_request_error,
|
||||
param="callback_name",
|
||||
)
|
||||
|
||||
team_callback_settings_obj.success_callback.append(data.callback_name)
|
||||
elif data.callback_type == "failure":
|
||||
if team_callback_settings_obj.failure_callback is None:
|
||||
team_callback_settings_obj.failure_callback = []
|
||||
|
||||
if data.callback_name in team_callback_settings_obj.failure_callback:
|
||||
raise ProxyException(
|
||||
message=f"callback_name = {data.callback_name} already exists in failure_callback, for team_id = {team_id}. \n Existing failure_callback = {team_callback_settings_obj.failure_callback}",
|
||||
code=status.HTTP_400_BAD_REQUEST,
|
||||
type=ProxyErrorTypes.bad_request_error,
|
||||
param="callback_name",
|
||||
)
|
||||
team_callback_settings_obj.failure_callback.append(data.callback_name)
|
||||
elif data.callback_type == "success_and_failure":
|
||||
if team_callback_settings_obj.success_callback is None:
|
||||
team_callback_settings_obj.success_callback = []
|
||||
if team_callback_settings_obj.failure_callback is None:
|
||||
team_callback_settings_obj.failure_callback = []
|
||||
if data.callback_name in team_callback_settings_obj.success_callback:
|
||||
raise ProxyException(
|
||||
message=f"callback_name = {data.callback_name} already exists in success_callback, for team_id = {team_id}. \n Existing success_callback = {team_callback_settings_obj.success_callback}",
|
||||
code=status.HTTP_400_BAD_REQUEST,
|
||||
type=ProxyErrorTypes.bad_request_error,
|
||||
param="callback_name",
|
||||
)
|
||||
|
||||
if data.callback_name in team_callback_settings_obj.failure_callback:
|
||||
raise ProxyException(
|
||||
message=f"callback_name = {data.callback_name} already exists in failure_callback, for team_id = {team_id}. \n Existing failure_callback = {team_callback_settings_obj.failure_callback}",
|
||||
code=status.HTTP_400_BAD_REQUEST,
|
||||
type=ProxyErrorTypes.bad_request_error,
|
||||
param="callback_name",
|
||||
)
|
||||
|
||||
team_callback_settings_obj.success_callback.append(data.callback_name)
|
||||
team_callback_settings_obj.failure_callback.append(data.callback_name)
|
||||
for var, value in data.callback_vars.items():
|
||||
if team_callback_settings_obj.callback_vars is None:
|
||||
team_callback_settings_obj.callback_vars = {}
|
||||
team_callback_settings_obj.callback_vars[var] = value
|
||||
|
||||
team_callback_settings_obj_dict = team_callback_settings_obj.model_dump()
|
||||
|
||||
team_metadata["callback_settings"] = team_callback_settings_obj_dict
|
||||
team_metadata_json = json.dumps(team_metadata) # update team_metadata
|
||||
|
||||
new_team_row = await prisma_client.db.litellm_teamtable.update(
|
||||
where={"team_id": team_id}, data={"metadata": team_metadata_json} # type: ignore
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"data": new_team_row,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.error(
|
||||
"litellm.proxy.proxy_server.add_team_callbacks(): Exception occured - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
verbose_proxy_logger.debug(traceback.format_exc())
|
||||
if isinstance(e, HTTPException):
|
||||
raise ProxyException(
|
||||
message=getattr(e, "detail", f"Internal Server Error({str(e)})"),
|
||||
type=ProxyErrorTypes.internal_server_error.value,
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", status.HTTP_500_INTERNAL_SERVER_ERROR),
|
||||
)
|
||||
elif isinstance(e, ProxyException):
|
||||
raise e
|
||||
raise ProxyException(
|
||||
message="Internal Server Error, " + str(e),
|
||||
type=ProxyErrorTypes.internal_server_error.value,
|
||||
param=getattr(e, "param", "None"),
|
||||
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/team/{team_id:path}/callback",
|
||||
tags=["team management"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
@management_endpoint_wrapper
|
||||
async def get_team_callbacks(
|
||||
http_request: Request,
|
||||
team_id: str,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
Get the success/failure callbacks and variables for a team
|
||||
|
||||
Example curl:
|
||||
```
|
||||
curl -X GET 'http://localhost:4000/team/dbe2f686-a686-4896-864a-4c3924458709/callback' \
|
||||
-H 'Authorization: Bearer sk-1234'
|
||||
```
|
||||
|
||||
This will return the callback settings for the team with id dbe2f686-a686-4896-864a-4c3924458709
|
||||
|
||||
Returns {
|
||||
"status": "success",
|
||||
"data": {
|
||||
"team_id": team_id,
|
||||
"success_callbacks": team_callback_settings_obj.success_callback,
|
||||
"failure_callbacks": team_callback_settings_obj.failure_callback,
|
||||
"callback_vars": team_callback_settings_obj.callback_vars,
|
||||
},
|
||||
}
|
||||
"""
|
||||
try:
|
||||
from litellm.proxy.proxy_server import prisma_client
|
||||
|
||||
if prisma_client is None:
|
||||
raise HTTPException(status_code=500, detail={"error": "No db connected"})
|
||||
|
||||
# Check if team_id exists
|
||||
_existing_team = await prisma_client.get_data(
|
||||
team_id=team_id, table_name="team", query_type="find_unique"
|
||||
)
|
||||
if _existing_team is None:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail={"error": f"Team id = {team_id} does not exist."},
|
||||
)
|
||||
|
||||
# Retrieve team callback settings from metadata
|
||||
team_metadata = _existing_team.metadata
|
||||
team_callback_settings = team_metadata.get("callback_settings", {})
|
||||
|
||||
# Convert to TeamCallbackMetadata object for consistent structure
|
||||
team_callback_settings_obj = TeamCallbackMetadata(**team_callback_settings)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"data": {
|
||||
"team_id": team_id,
|
||||
"success_callbacks": team_callback_settings_obj.success_callback,
|
||||
"failure_callbacks": team_callback_settings_obj.failure_callback,
|
||||
"callback_vars": team_callback_settings_obj.callback_vars,
|
||||
},
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.error(
|
||||
"litellm.proxy.proxy_server.get_team_callbacks(): Exception occurred - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
verbose_proxy_logger.debug(traceback.format_exc())
|
||||
if isinstance(e, HTTPException):
|
||||
raise ProxyException(
|
||||
message=getattr(e, "detail", f"Internal Server Error({str(e)})"),
|
||||
type=ProxyErrorTypes.internal_server_error.value,
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", status.HTTP_500_INTERNAL_SERVER_ERROR),
|
||||
)
|
||||
elif isinstance(e, ProxyException):
|
||||
raise e
|
||||
raise ProxyException(
|
||||
message="Internal Server Error, " + str(e),
|
||||
type=ProxyErrorTypes.internal_server_error.value,
|
||||
param=getattr(e, "param", "None"),
|
||||
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
)
|
|
@ -363,6 +363,7 @@ async def update_team(
|
|||
# set the budget_reset_at in DB
|
||||
updated_kv["budget_reset_at"] = reset_at
|
||||
|
||||
updated_kv = prisma_client.jsonify_object(data=updated_kv)
|
||||
team_row: Optional[
|
||||
LiteLLM_TeamTable
|
||||
] = await prisma_client.db.litellm_teamtable.update(
|
||||
|
|
|
@ -1,10 +1,15 @@
|
|||
model_list:
|
||||
- model_name: gpt-4
|
||||
litellm_params:
|
||||
model: openai/fake
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
- model_name: fireworks-llama-v3-70b-instruct
|
||||
litellm_params:
|
||||
model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct
|
||||
api_key: "os.environ/FIREWORKS_AI_API_KEY"
|
||||
|
||||
router_settings:
|
||||
enable_tag_filtering: True # 👈 Key Change
|
||||
api_key: "os.environ/FIREWORKS"
|
||||
general_settings:
|
||||
master_key: sk-1234
|
||||
|
||||
litellm_settings:
|
||||
callbacks: ["arize"]
|
|
@ -170,6 +170,9 @@ from litellm.proxy.management_endpoints.key_management_endpoints import (
|
|||
from litellm.proxy.management_endpoints.key_management_endpoints import (
|
||||
router as key_management_router,
|
||||
)
|
||||
from litellm.proxy.management_endpoints.team_callback_endpoints import (
|
||||
router as team_callback_router,
|
||||
)
|
||||
from litellm.proxy.management_endpoints.team_endpoints import router as team_router
|
||||
from litellm.proxy.openai_files_endpoints.files_endpoints import (
|
||||
router as openai_files_router,
|
||||
|
@ -9457,3 +9460,4 @@ app.include_router(analytics_router)
|
|||
app.include_router(debugging_endpoints_router)
|
||||
app.include_router(ui_crud_endpoints_router)
|
||||
app.include_router(openai_files_router)
|
||||
app.include_router(team_callback_router)
|
||||
|
|
22
litellm/proxy/tests/test_anthropic_sdk.py
Normal file
22
litellm/proxy/tests/test_anthropic_sdk.py
Normal file
|
@ -0,0 +1,22 @@
|
|||
import os
|
||||
|
||||
from anthropic import Anthropic
|
||||
|
||||
client = Anthropic(
|
||||
# This is the default and can be omitted
|
||||
base_url="http://localhost:4000",
|
||||
# this is a litellm proxy key :) - not a real anthropic key
|
||||
api_key="sk-s4xN1IiLTCytwtZFJaYQrA",
|
||||
)
|
||||
|
||||
message = client.messages.create(
|
||||
max_tokens=1024,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello, Claude",
|
||||
}
|
||||
],
|
||||
model="claude-3-opus-20240229",
|
||||
)
|
||||
print(message.content)
|
|
@ -25,7 +25,7 @@ from typing_extensions import overload
|
|||
import litellm
|
||||
import litellm.litellm_core_utils
|
||||
import litellm.litellm_core_utils.litellm_logging
|
||||
from litellm import EmbeddingResponse, ImageResponse, ModelResponse
|
||||
from litellm import EmbeddingResponse, ImageResponse, ModelResponse, get_litellm_params
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm._service_logger import ServiceLogging, ServiceTypes
|
||||
from litellm.caching import DualCache, RedisCache
|
||||
|
@ -50,7 +50,7 @@ from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter
|
|||
from litellm.proxy.hooks.parallel_request_limiter import (
|
||||
_PROXY_MaxParallelRequestsHandler,
|
||||
)
|
||||
from litellm.types.utils import CallTypes
|
||||
from litellm.types.utils import CallTypes, LoggedLiteLLMParams
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from opentelemetry.trace import Span as _Span
|
||||
|
@ -602,14 +602,20 @@ class ProxyLogging:
|
|||
if litellm_logging_obj is not None:
|
||||
## UPDATE LOGGING INPUT
|
||||
_optional_params = {}
|
||||
_litellm_params = {}
|
||||
|
||||
litellm_param_keys = LoggedLiteLLMParams.__annotations__.keys()
|
||||
for k, v in request_data.items():
|
||||
if k != "model" and k != "user" and k != "litellm_params":
|
||||
if k in litellm_param_keys:
|
||||
_litellm_params[k] = v
|
||||
elif k != "model" and k != "user":
|
||||
_optional_params[k] = v
|
||||
|
||||
litellm_logging_obj.update_environment_variables(
|
||||
model=request_data.get("model", ""),
|
||||
user=request_data.get("user", ""),
|
||||
optional_params=_optional_params,
|
||||
litellm_params=request_data.get("litellm_params", {}),
|
||||
litellm_params=_litellm_params,
|
||||
)
|
||||
|
||||
input: Union[list, str, dict] = ""
|
||||
|
@ -1313,8 +1319,10 @@ class PrismaClient:
|
|||
t.tpm_limit AS team_tpm_limit,
|
||||
t.rpm_limit AS team_rpm_limit,
|
||||
t.models AS team_models,
|
||||
t.metadata AS team_metadata,
|
||||
t.blocked AS team_blocked,
|
||||
t.team_alias AS team_alias,
|
||||
t.metadata AS team_metadata,
|
||||
tm.spend AS team_member_spend,
|
||||
m.aliases as team_model_aliases
|
||||
FROM "LiteLLM_VerificationToken" AS v
|
||||
|
|
29
litellm/tests/test_arize_ai.py
Normal file
29
litellm/tests/test_arize_ai.py
Normal file
|
@ -0,0 +1,29 @@
|
|||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
|
||||
import pytest
|
||||
from dotenv import load_dotenv
|
||||
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.integrations.opentelemetry import OpenTelemetry, OpenTelemetryConfig
|
||||
|
||||
load_dotenv()
|
||||
import logging
|
||||
|
||||
|
||||
@pytest.mark.asyncio()
|
||||
async def test_async_otel_callback():
|
||||
litellm.set_verbose = True
|
||||
litellm.success_callback = ["arize"]
|
||||
|
||||
await litellm.acompletion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": "hi test from local arize"}],
|
||||
mock_response="hello",
|
||||
temperature=0.1,
|
||||
user="OTEL_USER",
|
||||
)
|
|
@ -234,6 +234,7 @@ class CompletionCustomHandler(
|
|||
)
|
||||
assert isinstance(kwargs["optional_params"], dict)
|
||||
assert isinstance(kwargs["litellm_params"], dict)
|
||||
assert isinstance(kwargs["litellm_params"]["metadata"], Optional[dict])
|
||||
assert isinstance(kwargs["start_time"], (datetime, type(None)))
|
||||
assert isinstance(kwargs["stream"], bool)
|
||||
assert isinstance(kwargs["user"], (str, type(None)))
|
||||
|
|
|
@ -64,6 +64,30 @@ async def test_content_policy_exception_azure():
|
|||
pytest.fail(f"An exception occurred - {str(e)}")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_content_policy_exception_openai():
|
||||
try:
|
||||
# this is ony a test - we needed some way to invoke the exception :(
|
||||
litellm.set_verbose = True
|
||||
response = await litellm.acompletion(
|
||||
model="gpt-3.5-turbo-0613",
|
||||
stream=True,
|
||||
messages=[
|
||||
{"role": "user", "content": "Gimme the lyrics to Don't Stop Me Now"}
|
||||
],
|
||||
)
|
||||
async for chunk in response:
|
||||
print(chunk)
|
||||
except litellm.ContentPolicyViolationError as e:
|
||||
print("caught a content policy violation error! Passed")
|
||||
print("exception", e)
|
||||
assert e.llm_provider == "openai"
|
||||
pass
|
||||
except Exception as e:
|
||||
print()
|
||||
pytest.fail(f"An exception occurred - {str(e)}")
|
||||
|
||||
|
||||
# Test 1: Context Window Errors
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
@pytest.mark.parametrize("model", exception_models)
|
||||
|
|
|
@ -19,7 +19,7 @@ import pytest
|
|||
|
||||
import litellm
|
||||
from litellm.proxy._types import LiteLLMRoutes
|
||||
from litellm.proxy.auth.auth_utils import is_openai_route
|
||||
from litellm.proxy.auth.auth_utils import is_llm_api_route
|
||||
from litellm.proxy.proxy_server import app
|
||||
|
||||
# Configure logging
|
||||
|
@ -77,8 +77,8 @@ def test_routes_on_litellm_proxy():
|
|||
("/v1/non_existent_endpoint", False),
|
||||
],
|
||||
)
|
||||
def test_is_openai_route(route: str, expected: bool):
|
||||
assert is_openai_route(route) == expected
|
||||
def test_is_llm_api_route(route: str, expected: bool):
|
||||
assert is_llm_api_route(route) == expected
|
||||
|
||||
|
||||
# Test case for routes that are similar but should return False
|
||||
|
@ -91,5 +91,10 @@ def test_is_openai_route(route: str, expected: bool):
|
|||
"/engines/model/invalid/completions",
|
||||
],
|
||||
)
|
||||
def test_is_openai_route_similar_but_false(route: str):
|
||||
assert is_openai_route(route) == False
|
||||
def test_is_llm_api_route_similar_but_false(route: str):
|
||||
assert is_llm_api_route(route) == False
|
||||
|
||||
|
||||
def test_anthropic_api_routes():
|
||||
# allow non proxy admins to call anthropic api routes
|
||||
assert is_llm_api_route(route="/v1/messages") is True
|
||||
|
|
|
@ -173,6 +173,63 @@ def test_chat_completion(mock_acompletion, client_no_auth):
|
|||
pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}")
|
||||
|
||||
|
||||
@mock_patch_acompletion()
|
||||
@pytest.mark.asyncio
|
||||
async def test_team_disable_guardrails(mock_acompletion, client_no_auth):
|
||||
"""
|
||||
If team not allowed to turn on/off guardrails
|
||||
|
||||
Raise 403 forbidden error, if request is made by team on `/key/generate` or `/chat/completions`.
|
||||
"""
|
||||
import asyncio
|
||||
import json
|
||||
import time
|
||||
|
||||
from fastapi import HTTPException, Request
|
||||
from starlette.datastructures import URL
|
||||
|
||||
from litellm.proxy._types import LiteLLM_TeamTable, ProxyException, UserAPIKeyAuth
|
||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||
from litellm.proxy.proxy_server import hash_token, user_api_key_cache
|
||||
|
||||
_team_id = "1234"
|
||||
user_key = "sk-12345678"
|
||||
|
||||
valid_token = UserAPIKeyAuth(
|
||||
team_id=_team_id,
|
||||
team_blocked=True,
|
||||
token=hash_token(user_key),
|
||||
last_refreshed_at=time.time(),
|
||||
)
|
||||
await asyncio.sleep(1)
|
||||
team_obj = LiteLLM_TeamTable(
|
||||
team_id=_team_id,
|
||||
blocked=False,
|
||||
last_refreshed_at=time.time(),
|
||||
metadata={"guardrails": {"modify_guardrails": False}},
|
||||
)
|
||||
user_api_key_cache.set_cache(key=hash_token(user_key), value=valid_token)
|
||||
user_api_key_cache.set_cache(key="team_id:{}".format(_team_id), value=team_obj)
|
||||
|
||||
setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
|
||||
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
||||
setattr(litellm.proxy.proxy_server, "prisma_client", "hello-world")
|
||||
|
||||
request = Request(scope={"type": "http"})
|
||||
request._url = URL(url="/chat/completions")
|
||||
|
||||
body = {"metadata": {"guardrails": {"hide_secrets": False}}}
|
||||
json_bytes = json.dumps(body).encode("utf-8")
|
||||
|
||||
request._body = json_bytes
|
||||
|
||||
try:
|
||||
await user_api_key_auth(request=request, api_key="Bearer " + user_key)
|
||||
pytest.fail("Expected to raise 403 forbidden error.")
|
||||
except ProxyException as e:
|
||||
assert e.code == 403
|
||||
|
||||
|
||||
from litellm.tests.test_custom_callback_input import CompletionCustomHandler
|
||||
|
||||
|
||||
|
|
|
@ -1029,3 +1029,22 @@ class GenericImageParsingChunk(TypedDict):
|
|||
class ResponseFormatChunk(TypedDict, total=False):
|
||||
type: Required[Literal["json_object", "text"]]
|
||||
response_schema: dict
|
||||
|
||||
|
||||
class LoggedLiteLLMParams(TypedDict, total=False):
|
||||
force_timeout: Optional[float]
|
||||
custom_llm_provider: Optional[str]
|
||||
api_base: Optional[str]
|
||||
litellm_call_id: Optional[str]
|
||||
model_alias_map: Optional[dict]
|
||||
metadata: Optional[dict]
|
||||
model_info: Optional[dict]
|
||||
proxy_server_request: Optional[dict]
|
||||
acompletion: Optional[bool]
|
||||
preset_cache_key: Optional[str]
|
||||
no_log: Optional[bool]
|
||||
input_cost_per_second: Optional[float]
|
||||
input_cost_per_token: Optional[float]
|
||||
output_cost_per_token: Optional[float]
|
||||
output_cost_per_second: Optional[float]
|
||||
cooldown_time: Optional[float]
|
||||
|
|
|
@ -158,6 +158,7 @@ from typing import (
|
|||
Tuple,
|
||||
Union,
|
||||
cast,
|
||||
get_args,
|
||||
)
|
||||
|
||||
from .caching import Cache
|
||||
|
@ -405,7 +406,6 @@ def function_setup(
|
|||
# Pop the async items from input_callback in reverse order to avoid index issues
|
||||
for index in reversed(removed_async_items):
|
||||
litellm.input_callback.pop(index)
|
||||
|
||||
if len(litellm.success_callback) > 0:
|
||||
removed_async_items = []
|
||||
for index, callback in enumerate(litellm.success_callback): # type: ignore
|
||||
|
@ -417,9 +417,9 @@ def function_setup(
|
|||
# we only support async dynamo db logging for acompletion/aembedding since that's used on proxy
|
||||
litellm._async_success_callback.append(callback)
|
||||
removed_async_items.append(index)
|
||||
elif callback == "langsmith":
|
||||
elif callback in litellm._known_custom_logger_compatible_callbacks:
|
||||
callback_class = litellm.litellm_core_utils.litellm_logging._init_custom_logger_compatible_class( # type: ignore
|
||||
callback, internal_usage_cache=None, llm_router=None
|
||||
callback, internal_usage_cache=None, llm_router=None # type: ignore
|
||||
)
|
||||
|
||||
# don't double add a callback
|
||||
|
@ -8808,11 +8808,14 @@ class CustomStreamWrapper:
|
|||
str_line.choices[0].content_filter_result
|
||||
)
|
||||
else:
|
||||
error_message = "Azure Response={}".format(
|
||||
str(dict(str_line))
|
||||
error_message = "{} Response={}".format(
|
||||
self.custom_llm_provider, str(dict(str_line))
|
||||
)
|
||||
raise litellm.AzureOpenAIError(
|
||||
status_code=400, message=error_message
|
||||
|
||||
raise litellm.ContentPolicyViolationError(
|
||||
message=error_message,
|
||||
llm_provider=self.custom_llm_provider,
|
||||
model=self.model,
|
||||
)
|
||||
|
||||
# checking for logprobs
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "1.41.26"
|
||||
version = "1.41.27"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT"
|
||||
|
@ -91,7 +91,7 @@ requires = ["poetry-core", "wheel"]
|
|||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.commitizen]
|
||||
version = "1.41.26"
|
||||
version = "1.41.27"
|
||||
version_files = [
|
||||
"pyproject.toml:^version"
|
||||
]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue