Merge branch 'main' into litellm_fix_pass_through_endpoints

This commit is contained in:
Krish Dholakia 2024-08-14 14:59:38 -07:00 committed by GitHub
commit aa2267bddb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
33 changed files with 965 additions and 173 deletions

View file

@ -125,6 +125,7 @@ jobs:
pip install tiktoken
pip install aiohttp
pip install click
pip install "boto3==1.34.34"
pip install jinja2
pip install tokenizers
pip install openai
@ -287,6 +288,7 @@ jobs:
pip install "pytest==7.3.1"
pip install "pytest-mock==3.12.0"
pip install "pytest-asyncio==0.21.1"
pip install "boto3==1.34.34"
pip install mypy
pip install pyarrow
pip install numpydoc

View file

@ -62,6 +62,11 @@ COPY --from=builder /wheels/ /wheels/
RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels
# Generate prisma client
ENV PRISMA_BINARY_CACHE_DIR=/app/prisma
RUN mkdir -p /.cache
RUN chmod -R 777 /.cache
RUN pip install nodejs-bin
RUN pip install prisma
RUN prisma generate
RUN chmod +x entrypoint.sh

View file

@ -62,6 +62,11 @@ RUN pip install PyJWT --no-cache-dir
RUN chmod +x build_admin_ui.sh && ./build_admin_ui.sh
# Generate prisma client
ENV PRISMA_BINARY_CACHE_DIR=/app/prisma
RUN mkdir -p /.cache
RUN chmod -R 777 /.cache
RUN pip install nodejs-bin
RUN pip install prisma
RUN prisma generate
RUN chmod +x entrypoint.sh

View file

@ -84,17 +84,20 @@ from litellm import completion
# add to env var
os.environ["OPENAI_API_KEY"] = ""
messages = [{"role": "user", "content": "List 5 cookie recipes"}]
messages = [{"role": "user", "content": "List 5 important events in the XIX century"}]
class CalendarEvent(BaseModel):
name: str
date: str
participants: list[str]
class EventsList(BaseModel):
events: list[CalendarEvent]
resp = completion(
model="gpt-4o-2024-08-06",
messages=messages,
response_format=CalendarEvent
response_format=EventsList
)
print("Received={}".format(resp))

View file

@ -705,6 +705,29 @@ docker run ghcr.io/berriai/litellm:main-latest \
Provide an ssl certificate when starting litellm proxy server
### 3. Providing LiteLLM config.yaml file as a s3 Object/url
Use this if you cannot mount a config file on your deployment service (example - AWS Fargate, Railway etc)
LiteLLM Proxy will read your config.yaml from an s3 Bucket
Set the following .env vars
```shell
LITELLM_CONFIG_BUCKET_NAME = "litellm-proxy" # your bucket name on s3
LITELLM_CONFIG_BUCKET_OBJECT_KEY = "litellm_proxy_config.yaml" # object key on s3
```
Start litellm proxy with these env vars - litellm will read your config from s3
```shell
docker run --name litellm-proxy \
-e DATABASE_URL=<database_url> \
-e LITELLM_CONFIG_BUCKET_NAME=<bucket_name> \
-e LITELLM_CONFIG_BUCKET_OBJECT_KEY="<object_key>> \
-p 4000:4000 \
ghcr.io/berriai/litellm-database:main-latest
```
## Platform-specific Guide
<Tabs>

View file

@ -17,7 +17,7 @@ model_list:
## Get Model Information - `/model/info`
Retrieve detailed information about each model listed in the `/model/info` endpoint, including descriptions from the `config.yaml` file, and additional model info (e.g. max tokens, cost per input token, etc.) pulled the model_info you set and the litellm model cost map. Sensitive details like API keys are excluded for security purposes.
Retrieve detailed information about each model listed in the `/model/info` endpoint, including descriptions from the `config.yaml` file, and additional model info (e.g. max tokens, cost per input token, etc.) pulled from the model_info you set and the [litellm model cost map](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json). Sensitive details like API keys are excluded for security purposes.
<Tabs
defaultValue="curl"
@ -35,14 +35,10 @@ curl -X GET "http://0.0.0.0:4000/model/info" \
## Add a New Model
Add a new model to the list in the `config.yaml` by providing the model parameters. This allows you to update the model list without restarting the proxy.
Add a new model to the proxy via the `/model/new` API, to add models without restarting the proxy.
<Tabs
defaultValue="curl"
values={[
{ label: 'cURL', value: 'curl', },
]}>
<TabItem value="curl">
<Tabs>
<TabItem value="API">
```bash
curl -X POST "http://0.0.0.0:4000/model/new" \
@ -50,6 +46,21 @@ curl -X POST "http://0.0.0.0:4000/model/new" \
-H "Content-Type: application/json" \
-d '{ "model_name": "azure-gpt-turbo", "litellm_params": {"model": "azure/gpt-3.5-turbo", "api_key": "os.environ/AZURE_API_KEY", "api_base": "my-azure-api-base"} }'
```
</TabItem>
<TabItem value="Yaml">
```yaml
model_list:
- model_name: gpt-3.5-turbo ### RECEIVED MODEL NAME ### `openai.chat.completions.create(model="gpt-3.5-turbo",...)`
litellm_params: # all params accepted by litellm.completion() - https://github.com/BerriAI/litellm/blob/9b46ec05b02d36d6e4fb5c32321e51e7f56e4a6e/litellm/types/router.py#L297
model: azure/gpt-turbo-small-eu ### MODEL NAME sent to `litellm.completion()` ###
api_base: https://my-endpoint-europe-berri-992.openai.azure.com/
api_key: "os.environ/AZURE_API_KEY_EU" # does os.getenv("AZURE_API_KEY_EU")
rpm: 6 # [OPTIONAL] Rate limit for this deployment: in requests per minute (rpm)
model_info:
my_custom_key: my_custom_value # additional model metadata
```
</TabItem>
</Tabs>
@ -86,3 +97,82 @@ Keep in mind that as both endpoints are in [BETA], you may need to visit the ass
- Add a New Model: [Issue #964](https://github.com/BerriAI/litellm/issues/964)
Feedback on the beta endpoints is valuable and helps improve the API for all users.
## Add Additional Model Information
If you want the ability to add a display name, description, and labels for models, just use `model_info:`
```yaml
model_list:
- model_name: "gpt-4"
litellm_params:
model: "gpt-4"
api_key: "os.environ/OPENAI_API_KEY"
model_info: # 👈 KEY CHANGE
my_custom_key: "my_custom_value"
```
### Usage
1. Add additional information to model
```yaml
model_list:
- model_name: "gpt-4"
litellm_params:
model: "gpt-4"
api_key: "os.environ/OPENAI_API_KEY"
model_info: # 👈 KEY CHANGE
my_custom_key: "my_custom_value"
```
2. Call with `/model/info`
Use a key with access to the model `gpt-4`.
```bash
curl -L -X GET 'http://0.0.0.0:4000/v1/model/info' \
-H 'Authorization: Bearer LITELLM_KEY' \
```
3. **Expected Response**
Returned `model_info = Your custom model_info + (if exists) LITELLM MODEL INFO`
[**How LiteLLM Model Info is found**](https://github.com/BerriAI/litellm/blob/9b46ec05b02d36d6e4fb5c32321e51e7f56e4a6e/litellm/proxy/proxy_server.py#L7460)
[Tell us how this can be improved!](https://github.com/BerriAI/litellm/issues)
```bash
{
"data": [
{
"model_name": "gpt-4",
"litellm_params": {
"model": "gpt-4"
},
"model_info": {
"id": "e889baacd17f591cce4c63639275ba5e8dc60765d6c553e6ee5a504b19e50ddc",
"db_model": false,
"my_custom_key": "my_custom_value", # 👈 CUSTOM INFO
"key": "gpt-4", # 👈 KEY in LiteLLM MODEL INFO/COST MAP - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json
"max_tokens": 4096,
"max_input_tokens": 8192,
"max_output_tokens": 4096,
"input_cost_per_token": 3e-05,
"input_cost_per_character": null,
"input_cost_per_token_above_128k_tokens": null,
"output_cost_per_token": 6e-05,
"output_cost_per_character": null,
"output_cost_per_token_above_128k_tokens": null,
"output_cost_per_character_above_128k_tokens": null,
"output_vector_size": null,
"litellm_provider": "openai",
"mode": "chat"
}
},
]
}
```

View file

@ -72,15 +72,15 @@ http://localhost:4000/metrics
| Metric Name | Description |
|----------------------|--------------------------------------|
| `deployment_state` | The state of the deployment: 0 = healthy, 1 = partial outage, 2 = complete outage. |
| `litellm_deployment_state` | The state of the deployment: 0 = healthy, 1 = partial outage, 2 = complete outage. |
| `litellm_remaining_requests_metric` | Track `x-ratelimit-remaining-requests` returned from LLM API Deployment |
| `litellm_remaining_tokens` | Track `x-ratelimit-remaining-tokens` return from LLM API Deployment |
`llm_deployment_success_responses` | Total number of successful LLM API calls for deployment |
| `llm_deployment_failure_responses` | Total number of failed LLM API calls for deployment |
| `llm_deployment_total_requests` | Total number of LLM API calls for deployment - success + failure |
| `llm_deployment_latency_per_output_token` | Latency per output token for deployment |
| `llm_deployment_successful_fallbacks` | Number of successful fallback requests from primary model -> fallback model |
| `llm_deployment_failed_fallbacks` | Number of failed fallback requests from primary model -> fallback model |
`litellm_deployment_success_responses` | Total number of successful LLM API calls for deployment |
| `litellm_deployment_failure_responses` | Total number of failed LLM API calls for deployment |
| `litellm_deployment_total_requests` | Total number of LLM API calls for deployment - success + failure |
| `litellm_deployment_latency_per_output_token` | Latency per output token for deployment |
| `litellm_deployment_successful_fallbacks` | Number of successful fallback requests from primary model -> fallback model |
| `litellm_deployment_failed_fallbacks` | Number of failed fallback requests from primary model -> fallback model |

View file

@ -151,7 +151,7 @@ const sidebars = {
},
{
type: "category",
label: "Chat Completions (litellm.completion)",
label: "Chat Completions (litellm.completion + PROXY)",
link: {
type: "generated-index",
title: "Chat Completions",

View file

@ -1,5 +1,6 @@
import json
import os
import uuid
from datetime import datetime
from typing import Any, Dict, List, Optional, TypedDict, Union
@ -29,6 +30,8 @@ class GCSBucketPayload(TypedDict):
end_time: str
response_cost: Optional[float]
spend_log_metadata: str
exception: Optional[str]
log_event_type: Optional[str]
class GCSBucketLogger(CustomLogger):
@ -79,6 +82,7 @@ class GCSBucketLogger(CustomLogger):
logging_payload: GCSBucketPayload = await self.get_gcs_payload(
kwargs, response_obj, start_time_str, end_time_str
)
logging_payload["log_event_type"] = "successful_api_call"
json_logged_payload = json.dumps(logging_payload)
@ -103,7 +107,56 @@ class GCSBucketLogger(CustomLogger):
verbose_logger.error("GCS Bucket logging error: %s", str(e))
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
pass
from litellm.proxy.proxy_server import premium_user
if premium_user is not True:
raise ValueError(
f"GCS Bucket logging is a premium feature. Please upgrade to use it. {CommonProxyErrors.not_premium_user.value}"
)
try:
verbose_logger.debug(
"GCS Logger: async_log_failure_event logging kwargs: %s, response_obj: %s",
kwargs,
response_obj,
)
start_time_str = start_time.strftime("%Y-%m-%d %H:%M:%S")
end_time_str = end_time.strftime("%Y-%m-%d %H:%M:%S")
headers = await self.construct_request_headers()
logging_payload: GCSBucketPayload = await self.get_gcs_payload(
kwargs, response_obj, start_time_str, end_time_str
)
logging_payload["log_event_type"] = "failed_api_call"
_litellm_params = kwargs.get("litellm_params") or {}
metadata = _litellm_params.get("metadata") or {}
json_logged_payload = json.dumps(logging_payload)
# Get the current date
current_date = datetime.now().strftime("%Y-%m-%d")
# Modify the object_name to include the date-based folder
object_name = f"{current_date}/failure-{uuid.uuid4().hex}"
if "gcs_log_id" in metadata:
object_name = metadata["gcs_log_id"]
response = await self.async_httpx_client.post(
headers=headers,
url=f"https://storage.googleapis.com/upload/storage/v1/b/{self.BUCKET_NAME}/o?uploadType=media&name={object_name}",
data=json_logged_payload,
)
if response.status_code != 200:
verbose_logger.error("GCS Bucket logging error: %s", str(response.text))
verbose_logger.debug("GCS Bucket response %s", response)
verbose_logger.debug("GCS Bucket status code %s", response.status_code)
verbose_logger.debug("GCS Bucket response.text %s", response.text)
except Exception as e:
verbose_logger.error("GCS Bucket logging error: %s", str(e))
async def construct_request_headers(self) -> Dict[str, str]:
from litellm import vertex_chat_completion
@ -139,10 +192,19 @@ class GCSBucketLogger(CustomLogger):
optional_params=kwargs.get("optional_params", None),
)
response_dict = {}
if response_obj:
response_dict = convert_litellm_response_object_to_dict(
response_obj=response_obj
)
exception_str = None
# Handle logging exception attributes
if "exception" in kwargs:
exception_str = kwargs.get("exception", "")
if not isinstance(exception_str, str):
exception_str = str(exception_str)
_spend_log_payload: SpendLogsPayload = get_logging_payload(
kwargs=kwargs,
response_obj=response_obj,
@ -156,8 +218,10 @@ class GCSBucketLogger(CustomLogger):
response_obj=response_dict,
start_time=start_time,
end_time=end_time,
spend_log_metadata=_spend_log_payload["metadata"],
spend_log_metadata=_spend_log_payload.get("metadata", ""),
response_cost=kwargs.get("response_cost", None),
exception=exception_str,
log_event_type=None,
)
return gcs_payload

View file

@ -141,42 +141,42 @@ class PrometheusLogger(CustomLogger):
]
# Metric for deployment state
self.deployment_state = Gauge(
"deployment_state",
self.litellm_deployment_state = Gauge(
"litellm_deployment_state",
"LLM Deployment Analytics - The state of the deployment: 0 = healthy, 1 = partial outage, 2 = complete outage",
labelnames=_logged_llm_labels,
)
self.llm_deployment_success_responses = Counter(
name="llm_deployment_success_responses",
self.litellm_deployment_success_responses = Counter(
name="litellm_deployment_success_responses",
documentation="LLM Deployment Analytics - Total number of successful LLM API calls via litellm",
labelnames=_logged_llm_labels,
)
self.llm_deployment_failure_responses = Counter(
name="llm_deployment_failure_responses",
self.litellm_deployment_failure_responses = Counter(
name="litellm_deployment_failure_responses",
documentation="LLM Deployment Analytics - Total number of failed LLM API calls via litellm",
labelnames=_logged_llm_labels,
)
self.llm_deployment_total_requests = Counter(
name="llm_deployment_total_requests",
self.litellm_deployment_total_requests = Counter(
name="litellm_deployment_total_requests",
documentation="LLM Deployment Analytics - Total number of LLM API calls via litellm - success + failure",
labelnames=_logged_llm_labels,
)
# Deployment Latency tracking
self.llm_deployment_latency_per_output_token = Histogram(
name="llm_deployment_latency_per_output_token",
self.litellm_deployment_latency_per_output_token = Histogram(
name="litellm_deployment_latency_per_output_token",
documentation="LLM Deployment Analytics - Latency per output token",
labelnames=_logged_llm_labels,
)
self.llm_deployment_successful_fallbacks = Counter(
"llm_deployment_successful_fallbacks",
self.litellm_deployment_successful_fallbacks = Counter(
"litellm_deployment_successful_fallbacks",
"LLM Deployment Analytics - Number of successful fallback requests from primary model -> fallback model",
["primary_model", "fallback_model"],
)
self.llm_deployment_failed_fallbacks = Counter(
"llm_deployment_failed_fallbacks",
self.litellm_deployment_failed_fallbacks = Counter(
"litellm_deployment_failed_fallbacks",
"LLM Deployment Analytics - Number of failed fallback requests from primary model -> fallback model",
["primary_model", "fallback_model"],
)
@ -358,14 +358,14 @@ class PrometheusLogger(CustomLogger):
api_provider=llm_provider,
)
self.llm_deployment_failure_responses.labels(
self.litellm_deployment_failure_responses.labels(
litellm_model_name=litellm_model_name,
model_id=model_id,
api_base=api_base,
api_provider=llm_provider,
).inc()
self.llm_deployment_total_requests.labels(
self.litellm_deployment_total_requests.labels(
litellm_model_name=litellm_model_name,
model_id=model_id,
api_base=api_base,
@ -438,14 +438,14 @@ class PrometheusLogger(CustomLogger):
api_provider=llm_provider,
)
self.llm_deployment_success_responses.labels(
self.litellm_deployment_success_responses.labels(
litellm_model_name=litellm_model_name,
model_id=model_id,
api_base=api_base,
api_provider=llm_provider,
).inc()
self.llm_deployment_total_requests.labels(
self.litellm_deployment_total_requests.labels(
litellm_model_name=litellm_model_name,
model_id=model_id,
api_base=api_base,
@ -475,7 +475,7 @@ class PrometheusLogger(CustomLogger):
latency_per_token = None
if output_tokens is not None and output_tokens > 0:
latency_per_token = _latency_seconds / output_tokens
self.llm_deployment_latency_per_output_token.labels(
self.litellm_deployment_latency_per_output_token.labels(
litellm_model_name=litellm_model_name,
model_id=model_id,
api_base=api_base,
@ -497,7 +497,7 @@ class PrometheusLogger(CustomLogger):
kwargs,
)
_new_model = kwargs.get("model")
self.llm_deployment_successful_fallbacks.labels(
self.litellm_deployment_successful_fallbacks.labels(
primary_model=original_model_group, fallback_model=_new_model
).inc()
@ -508,11 +508,11 @@ class PrometheusLogger(CustomLogger):
kwargs,
)
_new_model = kwargs.get("model")
self.llm_deployment_failed_fallbacks.labels(
self.litellm_deployment_failed_fallbacks.labels(
primary_model=original_model_group, fallback_model=_new_model
).inc()
def set_deployment_state(
def set_litellm_deployment_state(
self,
state: int,
litellm_model_name: str,
@ -520,7 +520,7 @@ class PrometheusLogger(CustomLogger):
api_base: str,
api_provider: str,
):
self.deployment_state.labels(
self.litellm_deployment_state.labels(
litellm_model_name, model_id, api_base, api_provider
).set(state)
@ -531,7 +531,7 @@ class PrometheusLogger(CustomLogger):
api_base: str,
api_provider: str,
):
self.set_deployment_state(
self.set_litellm_deployment_state(
0, litellm_model_name, model_id, api_base, api_provider
)
@ -542,7 +542,7 @@ class PrometheusLogger(CustomLogger):
api_base: str,
api_provider: str,
):
self.set_deployment_state(
self.set_litellm_deployment_state(
1, litellm_model_name, model_id, api_base, api_provider
)
@ -553,7 +553,7 @@ class PrometheusLogger(CustomLogger):
api_base: str,
api_provider: str,
):
self.set_deployment_state(
self.set_litellm_deployment_state(
2, litellm_model_name, model_id, api_base, api_provider
)

View file

@ -41,8 +41,8 @@ async def get_fallback_metric_from_prometheus():
"""
response_message = ""
relevant_metrics = [
"llm_deployment_successful_fallbacks_total",
"llm_deployment_failed_fallbacks_total",
"litellm_deployment_successful_fallbacks_total",
"litellm_deployment_failed_fallbacks_total",
]
for metric in relevant_metrics:
response_json = await get_metric_from_prometheus(

View file

@ -1055,8 +1055,8 @@ class BedrockLLM(BaseLLM):
},
)
raise BedrockError(
status_code=400,
message="Bedrock HTTPX: Unsupported provider={}, model={}".format(
status_code=404,
message="Bedrock HTTPX: Unknown provider={}, model={}".format(
provider, model
),
)

View file

@ -601,12 +601,13 @@ def ollama_embeddings(
):
return asyncio.run(
ollama_aembeddings(
api_base,
model,
prompts,
optional_params,
logging_obj,
model_response,
encoding,
api_base=api_base,
model=model,
prompts=prompts,
model_response=model_response,
optional_params=optional_params,
logging_obj=logging_obj,
encoding=encoding,
)
)

View file

@ -356,6 +356,7 @@ def ollama_completion_stream(url, api_key, data, logging_obj):
"json": data,
"method": "POST",
"timeout": litellm.request_timeout,
"follow_redirects": True
}
if api_key is not None:
_request["headers"] = {"Authorization": "Bearer {}".format(api_key)}

View file

@ -1701,12 +1701,12 @@ def cohere_messages_pt_v2(
assistant_tool_calls: List[ToolCallObject] = []
## MERGE CONSECUTIVE ASSISTANT CONTENT ##
while msg_i < len(messages) and messages[msg_i]["role"] == "assistant":
assistant_text = (
messages[msg_i].get("content") or ""
) # either string or none
if assistant_text:
assistant_content += assistant_text
if isinstance(messages[msg_i]["content"], list):
for m in messages[msg_i]["content"]:
if m.get("type", "") == "text":
assistant_content += m["text"]
else:
assistant_content += messages[msg_i]["content"]
if messages[msg_i].get(
"tool_calls", []
): # support assistant tool invoke conversion

View file

@ -240,10 +240,10 @@ class TritonChatCompletion(BaseLLM):
handler = HTTPHandler()
if stream:
return self._handle_stream(
handler, api_base, data_for_triton, model, logging_obj
handler, api_base, json_data_for_triton, model, logging_obj
)
else:
response = handler.post(url=api_base, data=data_for_triton, headers=headers)
response = handler.post(url=api_base, data=json_data_for_triton, headers=headers)
return self._handle_response(
response, model_response, logging_obj, type_of_model=type_of_model
)

View file

@ -57,6 +57,18 @@
"supports_parallel_function_calling": true,
"supports_vision": true
},
"chatgpt-4o-latest": {
"max_tokens": 4096,
"max_input_tokens": 128000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000005,
"output_cost_per_token": 0.000015,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true
},
"gpt-4o-2024-05-13": {
"max_tokens": 4096,
"max_input_tokens": 128000,
@ -2062,7 +2074,8 @@
"litellm_provider": "vertex_ai-anthropic_models",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true
"supports_vision": true,
"supports_assistant_prefill": true
},
"vertex_ai/claude-3-5-sonnet@20240620": {
"max_tokens": 4096,
@ -2073,7 +2086,8 @@
"litellm_provider": "vertex_ai-anthropic_models",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true
"supports_vision": true,
"supports_assistant_prefill": true
},
"vertex_ai/claude-3-haiku@20240307": {
"max_tokens": 4096,
@ -2084,7 +2098,8 @@
"litellm_provider": "vertex_ai-anthropic_models",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true
"supports_vision": true,
"supports_assistant_prefill": true
},
"vertex_ai/claude-3-opus@20240229": {
"max_tokens": 4096,
@ -2095,7 +2110,8 @@
"litellm_provider": "vertex_ai-anthropic_models",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true
"supports_vision": true,
"supports_assistant_prefill": true
},
"vertex_ai/meta/llama3-405b-instruct-maas": {
"max_tokens": 32000,
@ -4519,6 +4535,69 @@
"litellm_provider": "perplexity",
"mode": "chat"
},
"perplexity/llama-3.1-70b-instruct": {
"max_tokens": 131072,
"max_input_tokens": 131072,
"max_output_tokens": 131072,
"input_cost_per_token": 0.000001,
"output_cost_per_token": 0.000001,
"litellm_provider": "perplexity",
"mode": "chat"
},
"perplexity/llama-3.1-8b-instruct": {
"max_tokens": 131072,
"max_input_tokens": 131072,
"max_output_tokens": 131072,
"input_cost_per_token": 0.0000002,
"output_cost_per_token": 0.0000002,
"litellm_provider": "perplexity",
"mode": "chat"
},
"perplexity/llama-3.1-sonar-huge-128k-online": {
"max_tokens": 127072,
"max_input_tokens": 127072,
"max_output_tokens": 127072,
"input_cost_per_token": 0.000005,
"output_cost_per_token": 0.000005,
"litellm_provider": "perplexity",
"mode": "chat"
},
"perplexity/llama-3.1-sonar-large-128k-online": {
"max_tokens": 127072,
"max_input_tokens": 127072,
"max_output_tokens": 127072,
"input_cost_per_token": 0.000001,
"output_cost_per_token": 0.000001,
"litellm_provider": "perplexity",
"mode": "chat"
},
"perplexity/llama-3.1-sonar-large-128k-chat": {
"max_tokens": 131072,
"max_input_tokens": 131072,
"max_output_tokens": 131072,
"input_cost_per_token": 0.000001,
"output_cost_per_token": 0.000001,
"litellm_provider": "perplexity",
"mode": "chat"
},
"perplexity/llama-3.1-sonar-small-128k-chat": {
"max_tokens": 131072,
"max_input_tokens": 131072,
"max_output_tokens": 131072,
"input_cost_per_token": 0.0000002,
"output_cost_per_token": 0.0000002,
"litellm_provider": "perplexity",
"mode": "chat"
},
"perplexity/llama-3.1-sonar-small-128k-online": {
"max_tokens": 127072,
"max_input_tokens": 127072,
"max_output_tokens": 127072,
"input_cost_per_token": 0.0000002,
"output_cost_per_token": 0.0000002,
"litellm_provider": "perplexity",
"mode": "chat"
},
"perplexity/pplx-7b-chat": {
"max_tokens": 8192,
"max_input_tokens": 8192,

View file

@ -1,13 +1,7 @@
model_list:
- model_name: "*"
- model_name: "gpt-4"
litellm_params:
model: "*"
model: "gpt-4"
model_info:
my_custom_key: "my_custom_value"
general_settings:
master_key: sk-1234
pass_through_endpoints:
- path: "/api/public/ingestion" # route you want to add to LiteLLM Proxy Server
target: "https://us.cloud.langfuse.com/api/public/ingestion" # URL this route should forward
headers:
LANGFUSE_PUBLIC_KEY: "os.environ/LANGFUSE_PUBLIC_KEY" # your langfuse account public key
LANGFUSE_SECRET_KEY: "os.environ/LANGFUSE_SECRET_KEY" # your langfuse account secret key

View file

@ -12,7 +12,7 @@ import json
import secrets
import traceback
from datetime import datetime, timedelta, timezone
from typing import Optional
from typing import Optional, Tuple
from uuid import uuid4
import fastapi
@ -125,7 +125,7 @@ async def user_api_key_auth(
# Check 2. FILTER IP ADDRESS
await check_if_request_size_is_safe(request=request)
is_valid_ip = _check_valid_ip(
is_valid_ip, passed_in_ip = _check_valid_ip(
allowed_ips=general_settings.get("allowed_ips", None),
use_x_forwarded_for=general_settings.get("use_x_forwarded_for", False),
request=request,
@ -134,7 +134,7 @@ async def user_api_key_auth(
if not is_valid_ip:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access forbidden: IP address not allowed.",
detail=f"Access forbidden: IP address {passed_in_ip} not allowed.",
)
pass_through_endpoints: Optional[List[dict]] = general_settings.get(
@ -1251,12 +1251,12 @@ def _check_valid_ip(
allowed_ips: Optional[List[str]],
request: Request,
use_x_forwarded_for: Optional[bool] = False,
) -> bool:
) -> Tuple[bool, Optional[str]]:
"""
Returns if ip is allowed or not
"""
if allowed_ips is None: # if not set, assume true
return True
return True, None
# if general_settings.get("use_x_forwarded_for") is True then use x-forwarded-for
client_ip = None
@ -1267,9 +1267,9 @@ def _check_valid_ip(
# Check if IP address is allowed
if client_ip not in allowed_ips:
return False
return False, client_ip
return True
return True, client_ip
def get_api_key_from_custom_header(

View file

@ -0,0 +1,53 @@
import tempfile
import boto3
import yaml
from litellm._logging import verbose_proxy_logger
def get_file_contents_from_s3(bucket_name, object_key):
# v0 rely on boto3 for authentication - allowing boto3 to handle IAM credentials etc
from botocore.config import Config
from botocore.credentials import Credentials
from litellm.main import bedrock_converse_chat_completion
credentials: Credentials = bedrock_converse_chat_completion.get_credentials()
s3_client = boto3.client(
"s3",
aws_access_key_id=credentials.access_key,
aws_secret_access_key=credentials.secret_key,
aws_session_token=credentials.token, # Optional, if using temporary credentials
)
try:
verbose_proxy_logger.debug(
f"Retrieving {object_key} from S3 bucket: {bucket_name}"
)
response = s3_client.get_object(Bucket=bucket_name, Key=object_key)
verbose_proxy_logger.debug(f"Response: {response}")
# Read the file contents
file_contents = response["Body"].read().decode("utf-8")
verbose_proxy_logger.debug(f"File contents retrieved from S3")
# Create a temporary file with YAML extension
with tempfile.NamedTemporaryFile(delete=False, suffix=".yaml") as temp_file:
temp_file.write(file_contents.encode("utf-8"))
temp_file_path = temp_file.name
verbose_proxy_logger.debug(f"File stored temporarily at: {temp_file_path}")
# Load the YAML file content
with open(temp_file_path, "r") as yaml_file:
config = yaml.safe_load(yaml_file)
return config
except Exception as e:
verbose_proxy_logger.error(f"Error retrieving file contents: {str(e)}")
return None
# # Example usage
# bucket_name = 'litellm-proxy'
# object_key = 'litellm_proxy_config.yaml'

View file

@ -5,7 +5,12 @@ from fastapi import Request
import litellm
from litellm._logging import verbose_logger, verbose_proxy_logger
from litellm.proxy._types import CommonProxyErrors, TeamCallbackMetadata, UserAPIKeyAuth
from litellm.proxy._types import (
AddTeamCallback,
CommonProxyErrors,
TeamCallbackMetadata,
UserAPIKeyAuth,
)
from litellm.types.utils import SupportedCacheControls
if TYPE_CHECKING:
@ -59,6 +64,42 @@ def safe_add_api_version_from_query_params(data: dict, request: Request):
verbose_logger.error("error checking api version in query params: %s", str(e))
def convert_key_logging_metadata_to_callback(
data: AddTeamCallback, team_callback_settings_obj: Optional[TeamCallbackMetadata]
) -> TeamCallbackMetadata:
if team_callback_settings_obj is None:
team_callback_settings_obj = TeamCallbackMetadata()
if data.callback_type == "success":
if team_callback_settings_obj.success_callback is None:
team_callback_settings_obj.success_callback = []
if data.callback_name not in team_callback_settings_obj.success_callback:
team_callback_settings_obj.success_callback.append(data.callback_name)
elif data.callback_type == "failure":
if team_callback_settings_obj.failure_callback is None:
team_callback_settings_obj.failure_callback = []
if data.callback_name not in team_callback_settings_obj.failure_callback:
team_callback_settings_obj.failure_callback.append(data.callback_name)
elif data.callback_type == "success_and_failure":
if team_callback_settings_obj.success_callback is None:
team_callback_settings_obj.success_callback = []
if team_callback_settings_obj.failure_callback is None:
team_callback_settings_obj.failure_callback = []
if data.callback_name not in team_callback_settings_obj.success_callback:
team_callback_settings_obj.success_callback.append(data.callback_name)
if data.callback_name in team_callback_settings_obj.failure_callback:
team_callback_settings_obj.failure_callback.append(data.callback_name)
for var, value in data.callback_vars.items():
if team_callback_settings_obj.callback_vars is None:
team_callback_settings_obj.callback_vars = {}
team_callback_settings_obj.callback_vars[var] = litellm.get_secret(value)
return team_callback_settings_obj
async def add_litellm_data_to_request(
data: dict,
request: Request,
@ -224,6 +265,7 @@ async def add_litellm_data_to_request(
} # add the team-specific configs to the completion call
# Team Callbacks controls
callback_settings_obj: Optional[TeamCallbackMetadata] = None
if user_api_key_dict.team_metadata is not None:
team_metadata = user_api_key_dict.team_metadata
if "callback_settings" in team_metadata:
@ -241,6 +283,18 @@ async def add_litellm_data_to_request(
}
}
"""
elif (
user_api_key_dict.metadata is not None
and "logging" in user_api_key_dict.metadata
):
for item in user_api_key_dict.metadata["logging"]:
callback_settings_obj = convert_key_logging_metadata_to_callback(
data=AddTeamCallback(**item),
team_callback_settings_obj=callback_settings_obj,
)
if callback_settings_obj is not None:
data["success_callback"] = callback_settings_obj.success_callback
data["failure_callback"] = callback_settings_obj.failure_callback

View file

@ -39,7 +39,4 @@ general_settings:
litellm_settings:
fallbacks: [{"gemini-1.5-pro-001": ["gpt-4o"]}]
success_callback: ["langfuse", "prometheus"]
langfuse_default_tags: ["cache_hit", "cache_key", "proxy_base_url", "user_api_key_alias", "user_api_key_user_id", "user_api_key_user_email", "user_api_key_team_alias", "semantic-similarity", "proxy_base_url"]
failure_callback: ["prometheus"]
cache: True
callbacks: ["gcs_bucket"]

View file

@ -151,6 +151,7 @@ from litellm.proxy.common_utils.http_parsing_utils import (
check_file_size_under_limit,
)
from litellm.proxy.common_utils.init_callbacks import initialize_callbacks_on_proxy
from litellm.proxy.common_utils.load_config_utils import get_file_contents_from_s3
from litellm.proxy.common_utils.openai_endpoint_utils import (
remove_sensitive_info_from_deployment,
)
@ -1402,6 +1403,17 @@ class ProxyConfig:
global master_key, user_config_file_path, otel_logging, user_custom_auth, user_custom_auth_path, user_custom_key_generate, use_background_health_checks, health_check_interval, use_queue, custom_db_client, proxy_budget_rescheduler_max_time, proxy_budget_rescheduler_min_time, ui_access_mode, litellm_master_key_hash, proxy_batch_write_at, disable_spend_logs, prompt_injection_detection_obj, redis_usage_cache, store_model_in_db, premium_user, open_telemetry_logger, health_check_details
# Load existing config
if os.environ.get("LITELLM_CONFIG_BUCKET_NAME") is not None:
bucket_name = os.environ.get("LITELLM_CONFIG_BUCKET_NAME")
object_key = os.environ.get("LITELLM_CONFIG_BUCKET_OBJECT_KEY")
verbose_proxy_logger.debug(
"bucket_name: %s, object_key: %s", bucket_name, object_key
)
config = get_file_contents_from_s3(
bucket_name=bucket_name, object_key=object_key
)
else:
# default to file
config = await self.get_config(config_file_path=config_file_path)
## PRINT YAML FOR CONFIRMING IT WORKS
printed_yaml = copy.deepcopy(config)
@ -2601,6 +2613,15 @@ async def startup_event():
)
else:
await initialize(**worker_config)
elif os.environ.get("LITELLM_CONFIG_BUCKET_NAME") is not None:
(
llm_router,
llm_model_list,
general_settings,
) = await proxy_config.load_config(
router=llm_router, config_file_path=worker_config
)
else:
# if not, assume it's a json string
worker_config = json.loads(os.getenv("WORKER_CONFIG"))

View file

@ -21,6 +21,8 @@ def get_logging_payload(
if kwargs is None:
kwargs = {}
if response_obj is None:
response_obj = {}
# standardize this function to be used across, s3, dynamoDB, langfuse logging
litellm_params = kwargs.get("litellm_params", {})
metadata = (

View file

@ -1159,8 +1159,8 @@ def test_bedrock_tools_pt_invalid_names():
assert result[1]["toolSpec"]["name"] == "another_invalid_name"
def test_bad_request_error():
with pytest.raises(litellm.BadRequestError):
def test_not_found_error():
with pytest.raises(litellm.NotFoundError):
completion(
model="bedrock/bad_model",
messages=[

View file

@ -3705,19 +3705,21 @@ def test_completion_anyscale_api():
# test_completion_anyscale_api()
@pytest.mark.skip(reason="flaky test, times out frequently")
# @pytest.mark.skip(reason="flaky test, times out frequently")
def test_completion_cohere():
try:
# litellm.set_verbose=True
messages = [
{"role": "system", "content": "You're a good bot"},
{"role": "assistant", "content": [{"text": "2", "type": "text"}]},
{"role": "assistant", "content": [{"text": "3", "type": "text"}]},
{
"role": "user",
"content": "Hey",
},
]
response = completion(
model="command-nightly",
model="command-r",
messages=messages,
)
print(response)

View file

@ -1,23 +1,27 @@
# What is this?
## Test to make sure function call response always works with json.loads() -> no extra parsing required. Relevant issue - https://github.com/BerriAI/litellm/issues/2654
import sys, os
import os
import sys
import traceback
from dotenv import load_dotenv
load_dotenv()
import os, io
import io
import os
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest
import litellm
import json
import warnings
from litellm import completion
from typing import List
import pytest
import litellm
from litellm import completion
# Just a stub to keep the sample code simple
class Trade:
@ -78,6 +82,7 @@ def trade(model_name: str) -> List[Trade]:
},
}
try:
response = completion(
model_name,
[
@ -129,7 +134,8 @@ def trade(model_name: str) -> List[Trade]:
"function": {"name": tool_spec["function"]["name"]}, # type: ignore
},
)
except litellm.InternalServerError:
pass
calls = response.choices[0].message.tool_calls
trades = [trade for call in calls for trade in parse_call(call)]
return trades

View file

@ -147,6 +147,117 @@ async def test_basic_gcs_logger():
assert gcs_payload["response_cost"] > 0.0
assert gcs_payload["log_event_type"] == "successful_api_call"
gcs_payload["spend_log_metadata"] = json.loads(gcs_payload["spend_log_metadata"])
assert (
gcs_payload["spend_log_metadata"]["user_api_key"]
== "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b"
)
assert (
gcs_payload["spend_log_metadata"]["user_api_key_user_id"]
== "116544810872468347480"
)
# Delete Object from GCS
print("deleting object from GCS")
await gcs_logger.delete_gcs_object(object_name=object_name)
@pytest.mark.asyncio
async def test_basic_gcs_logger_failure():
load_vertex_ai_credentials()
gcs_logger = GCSBucketLogger()
print("GCSBucketLogger", gcs_logger)
gcs_log_id = f"failure-test-{uuid.uuid4().hex}"
litellm.callbacks = [gcs_logger]
try:
response = await litellm.acompletion(
model="gpt-3.5-turbo",
temperature=0.7,
messages=[{"role": "user", "content": "This is a test"}],
max_tokens=10,
user="ishaan-2",
mock_response=litellm.BadRequestError(
model="gpt-3.5-turbo",
message="Error: 400: Bad Request: Invalid API key, please check your API key and try again.",
llm_provider="openai",
),
metadata={
"gcs_log_id": gcs_log_id,
"tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"],
"user_api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
"user_api_key_alias": None,
"user_api_end_user_max_budget": None,
"litellm_api_version": "0.0.0",
"global_max_parallel_requests": None,
"user_api_key_user_id": "116544810872468347480",
"user_api_key_org_id": None,
"user_api_key_team_id": None,
"user_api_key_team_alias": None,
"user_api_key_metadata": {},
"requester_ip_address": "127.0.0.1",
"spend_logs_metadata": {"hello": "world"},
"headers": {
"content-type": "application/json",
"user-agent": "PostmanRuntime/7.32.3",
"accept": "*/*",
"postman-token": "92300061-eeaa-423b-a420-0b44896ecdc4",
"host": "localhost:4000",
"accept-encoding": "gzip, deflate, br",
"connection": "keep-alive",
"content-length": "163",
},
"endpoint": "http://localhost:4000/chat/completions",
"model_group": "gpt-3.5-turbo",
"deployment": "azure/chatgpt-v-2",
"model_info": {
"id": "4bad40a1eb6bebd1682800f16f44b9f06c52a6703444c99c7f9f32e9de3693b4",
"db_model": False,
},
"api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
"caching_groups": None,
"raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-2', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
},
)
except:
pass
await asyncio.sleep(5)
# Get the current date
# Get the current date
current_date = datetime.now().strftime("%Y-%m-%d")
# Modify the object_name to include the date-based folder
object_name = gcs_log_id
print("object_name", object_name)
# Check if object landed on GCS
object_from_gcs = await gcs_logger.download_gcs_object(object_name=object_name)
print("object from gcs=", object_from_gcs)
# convert object_from_gcs from bytes to DICT
parsed_data = json.loads(object_from_gcs)
print("object_from_gcs as dict", parsed_data)
print("type of object_from_gcs", type(parsed_data))
gcs_payload = GCSBucketPayload(**parsed_data)
print("gcs_payload", gcs_payload)
assert gcs_payload["request_kwargs"]["model"] == "gpt-3.5-turbo"
assert gcs_payload["request_kwargs"]["messages"] == [
{"role": "user", "content": "This is a test"}
]
assert gcs_payload["response_cost"] == 0
assert gcs_payload["log_event_type"] == "failed_api_call"
gcs_payload["spend_log_metadata"] = json.loads(gcs_payload["spend_log_metadata"])
assert (

View file

@ -76,6 +76,6 @@ async def test_async_prometheus_success_logging():
print("metrics from prometheus", metrics)
assert metrics["litellm_requests_metric_total"] == 1.0
assert metrics["litellm_total_tokens_total"] == 30.0
assert metrics["llm_deployment_success_responses_total"] == 1.0
assert metrics["llm_deployment_total_requests_total"] == 1.0
assert metrics["llm_deployment_latency_per_output_token_bucket"] == 1.0
assert metrics["litellm_deployment_success_responses_total"] == 1.0
assert metrics["litellm_deployment_total_requests_total"] == 1.0
assert metrics["litellm_deployment_latency_per_output_token_bucket"] == 1.0

View file

@ -966,3 +966,203 @@ async def test_user_info_team_list(prisma_client):
pass
mock_client.assert_called()
@pytest.mark.skip(reason="Local test")
@pytest.mark.asyncio
async def test_add_callback_via_key(prisma_client):
"""
Test if callback specified in key, is used.
"""
global headers
import json
from fastapi import HTTPException, Request, Response
from starlette.datastructures import URL
from litellm.proxy.proxy_server import chat_completion
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
await litellm.proxy.proxy_server.prisma_client.connect()
litellm.set_verbose = True
try:
# Your test data
test_data = {
"model": "azure/chatgpt-v-2",
"messages": [
{"role": "user", "content": "write 1 sentence poem"},
],
"max_tokens": 10,
"mock_response": "Hello world",
"api_key": "my-fake-key",
}
request = Request(scope={"type": "http", "method": "POST", "headers": {}})
request._url = URL(url="/chat/completions")
json_bytes = json.dumps(test_data).encode("utf-8")
request._body = json_bytes
with patch.object(
litellm.litellm_core_utils.litellm_logging,
"LangFuseLogger",
new=MagicMock(),
) as mock_client:
resp = await chat_completion(
request=request,
fastapi_response=Response(),
user_api_key_dict=UserAPIKeyAuth(
metadata={
"logging": [
{
"callback_name": "langfuse", # 'otel', 'langfuse', 'lunary'
"callback_type": "success", # set, if required by integration - future improvement, have logging tools work for success + failure by default
"callback_vars": {
"langfuse_public_key": "os.environ/LANGFUSE_PUBLIC_KEY",
"langfuse_secret_key": "os.environ/LANGFUSE_SECRET_KEY",
"langfuse_host": "https://us.cloud.langfuse.com",
},
}
]
}
),
)
print(resp)
mock_client.assert_called()
mock_client.return_value.log_event.assert_called()
args, kwargs = mock_client.return_value.log_event.call_args
kwargs = kwargs["kwargs"]
assert "user_api_key_metadata" in kwargs["litellm_params"]["metadata"]
assert (
"logging"
in kwargs["litellm_params"]["metadata"]["user_api_key_metadata"]
)
checked_keys = False
for item in kwargs["litellm_params"]["metadata"]["user_api_key_metadata"][
"logging"
]:
for k, v in item["callback_vars"].items():
print("k={}, v={}".format(k, v))
if "key" in k:
assert "os.environ" in v
checked_keys = True
assert checked_keys
except Exception as e:
pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}")
@pytest.mark.asyncio
async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client):
import json
from fastapi import HTTPException, Request, Response
from starlette.datastructures import URL
from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
await litellm.proxy.proxy_server.prisma_client.connect()
proxy_config = getattr(litellm.proxy.proxy_server, "proxy_config")
request = Request(scope={"type": "http", "method": "POST", "headers": {}})
request._url = URL(url="/chat/completions")
test_data = {
"model": "azure/chatgpt-v-2",
"messages": [
{"role": "user", "content": "write 1 sentence poem"},
],
"max_tokens": 10,
"mock_response": "Hello world",
"api_key": "my-fake-key",
}
json_bytes = json.dumps(test_data).encode("utf-8")
request._body = json_bytes
data = {
"data": {
"model": "azure/chatgpt-v-2",
"messages": [{"role": "user", "content": "write 1 sentence poem"}],
"max_tokens": 10,
"mock_response": "Hello world",
"api_key": "my-fake-key",
},
"request": request,
"user_api_key_dict": UserAPIKeyAuth(
token=None,
key_name=None,
key_alias=None,
spend=0.0,
max_budget=None,
expires=None,
models=[],
aliases={},
config={},
user_id=None,
team_id=None,
max_parallel_requests=None,
metadata={
"logging": [
{
"callback_name": "langfuse",
"callback_type": "success",
"callback_vars": {
"langfuse_public_key": "os.environ/LANGFUSE_PUBLIC_KEY",
"langfuse_secret_key": "os.environ/LANGFUSE_SECRET_KEY",
"langfuse_host": "https://us.cloud.langfuse.com",
},
}
]
},
tpm_limit=None,
rpm_limit=None,
budget_duration=None,
budget_reset_at=None,
allowed_cache_controls=[],
permissions={},
model_spend={},
model_max_budget={},
soft_budget_cooldown=False,
litellm_budget_table=None,
org_id=None,
team_spend=None,
team_alias=None,
team_tpm_limit=None,
team_rpm_limit=None,
team_max_budget=None,
team_models=[],
team_blocked=False,
soft_budget=None,
team_model_aliases=None,
team_member_spend=None,
team_metadata=None,
end_user_id=None,
end_user_tpm_limit=None,
end_user_rpm_limit=None,
end_user_max_budget=None,
last_refreshed_at=None,
api_key=None,
user_role=None,
allowed_model_region=None,
parent_otel_span=None,
),
"proxy_config": proxy_config,
"general_settings": {},
"version": "0.0.0",
}
new_data = await add_litellm_data_to_request(**data)
assert "success_callback" in new_data
assert new_data["success_callback"] == ["langfuse"]
assert "langfuse_public_key" in new_data
assert "langfuse_secret_key" in new_data

View file

@ -44,7 +44,7 @@ def test_check_valid_ip(
request = Request(client_ip)
assert _check_valid_ip(allowed_ips, request) == expected_result # type: ignore
assert _check_valid_ip(allowed_ips, request)[0] == expected_result # type: ignore
# test x-forwarder for is used when user has opted in
@ -72,7 +72,7 @@ def test_check_valid_ip_sent_with_x_forwarded_for(
request = Request(client_ip, headers={"X-Forwarded-For": client_ip})
assert _check_valid_ip(allowed_ips, request, use_x_forwarded_for=True) == expected_result # type: ignore
assert _check_valid_ip(allowed_ips, request, use_x_forwarded_for=True)[0] == expected_result # type: ignore
@pytest.mark.asyncio

View file

@ -57,6 +57,18 @@
"supports_parallel_function_calling": true,
"supports_vision": true
},
"chatgpt-4o-latest": {
"max_tokens": 4096,
"max_input_tokens": 128000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000005,
"output_cost_per_token": 0.000015,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true
},
"gpt-4o-2024-05-13": {
"max_tokens": 4096,
"max_input_tokens": 128000,
@ -2062,7 +2074,8 @@
"litellm_provider": "vertex_ai-anthropic_models",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true
"supports_vision": true,
"supports_assistant_prefill": true
},
"vertex_ai/claude-3-5-sonnet@20240620": {
"max_tokens": 4096,
@ -2073,7 +2086,8 @@
"litellm_provider": "vertex_ai-anthropic_models",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true
"supports_vision": true,
"supports_assistant_prefill": true
},
"vertex_ai/claude-3-haiku@20240307": {
"max_tokens": 4096,
@ -2084,7 +2098,8 @@
"litellm_provider": "vertex_ai-anthropic_models",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true
"supports_vision": true,
"supports_assistant_prefill": true
},
"vertex_ai/claude-3-opus@20240229": {
"max_tokens": 4096,
@ -2095,7 +2110,8 @@
"litellm_provider": "vertex_ai-anthropic_models",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true
"supports_vision": true,
"supports_assistant_prefill": true
},
"vertex_ai/meta/llama3-405b-instruct-maas": {
"max_tokens": 32000,
@ -4519,6 +4535,69 @@
"litellm_provider": "perplexity",
"mode": "chat"
},
"perplexity/llama-3.1-70b-instruct": {
"max_tokens": 131072,
"max_input_tokens": 131072,
"max_output_tokens": 131072,
"input_cost_per_token": 0.000001,
"output_cost_per_token": 0.000001,
"litellm_provider": "perplexity",
"mode": "chat"
},
"perplexity/llama-3.1-8b-instruct": {
"max_tokens": 131072,
"max_input_tokens": 131072,
"max_output_tokens": 131072,
"input_cost_per_token": 0.0000002,
"output_cost_per_token": 0.0000002,
"litellm_provider": "perplexity",
"mode": "chat"
},
"perplexity/llama-3.1-sonar-huge-128k-online": {
"max_tokens": 127072,
"max_input_tokens": 127072,
"max_output_tokens": 127072,
"input_cost_per_token": 0.000005,
"output_cost_per_token": 0.000005,
"litellm_provider": "perplexity",
"mode": "chat"
},
"perplexity/llama-3.1-sonar-large-128k-online": {
"max_tokens": 127072,
"max_input_tokens": 127072,
"max_output_tokens": 127072,
"input_cost_per_token": 0.000001,
"output_cost_per_token": 0.000001,
"litellm_provider": "perplexity",
"mode": "chat"
},
"perplexity/llama-3.1-sonar-large-128k-chat": {
"max_tokens": 131072,
"max_input_tokens": 131072,
"max_output_tokens": 131072,
"input_cost_per_token": 0.000001,
"output_cost_per_token": 0.000001,
"litellm_provider": "perplexity",
"mode": "chat"
},
"perplexity/llama-3.1-sonar-small-128k-chat": {
"max_tokens": 131072,
"max_input_tokens": 131072,
"max_output_tokens": 131072,
"input_cost_per_token": 0.0000002,
"output_cost_per_token": 0.0000002,
"litellm_provider": "perplexity",
"mode": "chat"
},
"perplexity/llama-3.1-sonar-small-128k-online": {
"max_tokens": 127072,
"max_input_tokens": 127072,
"max_output_tokens": 127072,
"input_cost_per_token": 0.0000002,
"output_cost_per_token": 0.0000002,
"litellm_provider": "perplexity",
"mode": "chat"
},
"perplexity/pplx-7b-chat": {
"max_tokens": 8192,
"max_input_tokens": 8192,

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "litellm"
version = "1.43.9"
version = "1.43.12"
description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"]
license = "MIT"
@ -91,7 +91,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api"
[tool.commitizen]
version = "1.43.9"
version = "1.43.12"
version_files = [
"pyproject.toml:^version"
]