mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-28 04:04:31 +00:00
Merge branch 'main' into litellm_auth_fix
This commit is contained in:
commit
ced4582ecb
24 changed files with 483 additions and 59 deletions
|
@ -11,7 +11,7 @@
|
||||||
<p align="center">Call all LLM APIs using the OpenAI format [Bedrock, Huggingface, VertexAI, TogetherAI, Azure, OpenAI, Groq etc.]
|
<p align="center">Call all LLM APIs using the OpenAI format [Bedrock, Huggingface, VertexAI, TogetherAI, Azure, OpenAI, Groq etc.]
|
||||||
<br>
|
<br>
|
||||||
</p>
|
</p>
|
||||||
<h4 align="center"><a href="https://docs.litellm.ai/docs/simple_proxy" target="_blank">LiteLLM Proxy Server</a> | <a href="https://docs.litellm.ai/docs/hosted" target="_blank"> Hosted Proxy (Preview)</a> | <a href="https://docs.litellm.ai/docs/enterprise"target="_blank">Enterprise Tier</a></h4>
|
<h4 align="center"><a href="https://docs.litellm.ai/docs/simple_proxy" target="_blank">LiteLLM Proxy Server (LLM Gateway)</a> | <a href="https://docs.litellm.ai/docs/hosted" target="_blank"> Hosted Proxy (Preview)</a> | <a href="https://docs.litellm.ai/docs/enterprise"target="_blank">Enterprise Tier</a></h4>
|
||||||
<h4 align="center">
|
<h4 align="center">
|
||||||
<a href="https://pypi.org/project/litellm/" target="_blank">
|
<a href="https://pypi.org/project/litellm/" target="_blank">
|
||||||
<img src="https://img.shields.io/pypi/v/litellm.svg" alt="PyPI Version">
|
<img src="https://img.shields.io/pypi/v/litellm.svg" alt="PyPI Version">
|
||||||
|
@ -35,9 +35,9 @@ LiteLLM manages:
|
||||||
- Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
|
- Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
|
||||||
- [Consistent output](https://docs.litellm.ai/docs/completion/output), text responses will always be available at `['choices'][0]['message']['content']`
|
- [Consistent output](https://docs.litellm.ai/docs/completion/output), text responses will always be available at `['choices'][0]['message']['content']`
|
||||||
- Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)
|
- Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)
|
||||||
- Set Budgets & Rate limits per project, api key, model [LiteLLM Proxy Server](https://docs.litellm.ai/docs/simple_proxy)
|
- Set Budgets & Rate limits per project, api key, model [LiteLLM Proxy Server (LLM Gateway)](https://docs.litellm.ai/docs/simple_proxy)
|
||||||
|
|
||||||
[**Jump to OpenAI Proxy Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs) <br>
|
[**Jump to LiteLLM Proxy (LLM Gateway) Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs) <br>
|
||||||
[**Jump to Supported LLM Providers**](https://github.com/BerriAI/litellm?tab=readme-ov-file#supported-providers-docs)
|
[**Jump to Supported LLM Providers**](https://github.com/BerriAI/litellm?tab=readme-ov-file#supported-providers-docs)
|
||||||
|
|
||||||
🚨 **Stable Release:** Use docker images with the `-stable` tag. These have undergone 12 hour load tests, before being published.
|
🚨 **Stable Release:** Use docker images with the `-stable` tag. These have undergone 12 hour load tests, before being published.
|
||||||
|
@ -134,7 +134,7 @@ litellm.success_callback = ["lunary", "langfuse", "athina", "helicone"] # log in
|
||||||
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
|
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
|
||||||
```
|
```
|
||||||
|
|
||||||
# OpenAI Proxy - ([Docs](https://docs.litellm.ai/docs/simple_proxy))
|
# LiteLLM Proxy Server (LLM Gateway) - ([Docs](https://docs.litellm.ai/docs/simple_proxy))
|
||||||
|
|
||||||
Track spend + Load Balance across multiple projects
|
Track spend + Load Balance across multiple projects
|
||||||
|
|
||||||
|
|
|
@ -14,7 +14,7 @@ https://github.com/BerriAI/litellm
|
||||||
|
|
||||||
## How to use LiteLLM
|
## How to use LiteLLM
|
||||||
You can use litellm through either:
|
You can use litellm through either:
|
||||||
1. [LiteLLM Proxy Server](#openai-proxy) - Server to call 100+ LLMs, load balance, cost tracking across projects
|
1. [LiteLLM Proxy Server](#openai-proxy) - Server (LLM Gateway) to call 100+ LLMs, load balance, cost tracking across projects
|
||||||
2. [LiteLLM python SDK](#basic-usage) - Python Client to call 100+ LLMs, load balance, cost tracking
|
2. [LiteLLM python SDK](#basic-usage) - Python Client to call 100+ LLMs, load balance, cost tracking
|
||||||
|
|
||||||
### When to use LiteLLM Proxy Server
|
### When to use LiteLLM Proxy Server
|
||||||
|
|
|
@ -427,6 +427,105 @@ print(resp)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### **Context Caching**
|
||||||
|
|
||||||
|
Use Vertex AI Context Caching
|
||||||
|
|
||||||
|
[**Relevant VertexAI Docs**](https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-overview)
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
|
||||||
|
<TabItem value="proxy" label="LiteLLM PROXY">
|
||||||
|
|
||||||
|
1. Add model to config.yaml
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
# used for /chat/completions, /completions, /embeddings endpoints
|
||||||
|
- model_name: gemini-1.5-pro-001
|
||||||
|
litellm_params:
|
||||||
|
model: vertex_ai_beta/gemini-1.5-pro-001
|
||||||
|
vertex_project: "project-id"
|
||||||
|
vertex_location: "us-central1"
|
||||||
|
vertex_credentials: "adroit-crow-413218-a956eef1a2a8.json" # Add path to service account.json
|
||||||
|
|
||||||
|
# used for the /cachedContent and vertexAI native endpoints
|
||||||
|
default_vertex_config:
|
||||||
|
vertex_project: "adroit-crow-413218"
|
||||||
|
vertex_location: "us-central1"
|
||||||
|
vertex_credentials: "adroit-crow-413218-a956eef1a2a8.json" # Add path to service account.json
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start Proxy
|
||||||
|
|
||||||
|
```
|
||||||
|
$ litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Make Request!
|
||||||
|
|
||||||
|
- First create a cachedContents object by calling the Vertex `cachedContents` endpoint. [VertexAI API Ref for cachedContents endpoint](https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-create#create-context-cache-sample-drest). (LiteLLM proxy forwards the `/cachedContents` request to the VertexAI API)
|
||||||
|
- Use the `cachedContents` object in your /chat/completions request to vertexAI
|
||||||
|
|
||||||
|
```python
|
||||||
|
import datetime
|
||||||
|
import openai
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
# Set Litellm proxy variables here
|
||||||
|
LITELLM_BASE_URL = "http://0.0.0.0:4000"
|
||||||
|
LITELLM_PROXY_API_KEY = "sk-1234"
|
||||||
|
|
||||||
|
client = openai.OpenAI(api_key=LITELLM_PROXY_API_KEY, base_url=LITELLM_BASE_URL)
|
||||||
|
httpx_client = httpx.Client(timeout=30)
|
||||||
|
|
||||||
|
################################
|
||||||
|
# First create a cachedContents object
|
||||||
|
# this request gets forwarded as is to: https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-create#create-context-cache-sample-drest
|
||||||
|
print("creating cached content")
|
||||||
|
create_cache = httpx_client.post(
|
||||||
|
url=f"{LITELLM_BASE_URL}/vertex-ai/cachedContents",
|
||||||
|
headers = {"Authorization": f"Bearer {LITELLM_PROXY_API_KEY}"},
|
||||||
|
json = {
|
||||||
|
"model": "gemini-1.5-pro-001",
|
||||||
|
"contents": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"parts": [{
|
||||||
|
"text": "This is sample text to demonstrate explicit caching."*4000
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
print("response from create_cache", create_cache)
|
||||||
|
create_cache_response = create_cache.json()
|
||||||
|
print("json from create_cache", create_cache_response)
|
||||||
|
cached_content_name = create_cache_response["name"]
|
||||||
|
|
||||||
|
#################################
|
||||||
|
# Use the `cachedContents` object in your /chat/completions
|
||||||
|
response = client.chat.completions.create( # type: ignore
|
||||||
|
model="gemini-1.5-pro-001",
|
||||||
|
max_tokens=8192,
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "what is the sample text about?",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
temperature="0.7",
|
||||||
|
extra_body={"cached_content": cached_content_name}, # 👈 key change
|
||||||
|
)
|
||||||
|
|
||||||
|
print("response from proxy", response)
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
## Pre-requisites
|
## Pre-requisites
|
||||||
* `pip install google-cloud-aiplatform` (pre-installed on proxy docker image)
|
* `pip install google-cloud-aiplatform` (pre-installed on proxy docker image)
|
||||||
* Authentication:
|
* Authentication:
|
||||||
|
|
|
@ -5,7 +5,7 @@ import TabItem from '@theme/TabItem';
|
||||||
# Quick Start
|
# Quick Start
|
||||||
Quick start CLI, Config, Docker
|
Quick start CLI, Config, Docker
|
||||||
|
|
||||||
LiteLLM Server manages:
|
LiteLLM Server (LLM Gateway) manages:
|
||||||
|
|
||||||
* **Unified Interface**: Calling 100+ LLMs [Huggingface/Bedrock/TogetherAI/etc.](#other-supported-models) in the OpenAI `ChatCompletions` & `Completions` format
|
* **Unified Interface**: Calling 100+ LLMs [Huggingface/Bedrock/TogetherAI/etc.](#other-supported-models) in the OpenAI `ChatCompletions` & `Completions` format
|
||||||
* **Cost tracking**: Authentication, Spend Tracking & Budgets [Virtual Keys](https://docs.litellm.ai/docs/proxy/virtual_keys)
|
* **Cost tracking**: Authentication, Spend Tracking & Budgets [Virtual Keys](https://docs.litellm.ai/docs/proxy/virtual_keys)
|
||||||
|
|
|
@ -1,5 +1,11 @@
|
||||||
# [BETA] Vertex AI Endpoints
|
# [BETA] Vertex AI Endpoints
|
||||||
|
|
||||||
|
:::tip
|
||||||
|
|
||||||
|
Looking for the Unified API (OpenAI format) for VertexAI ? [Go here - using vertexAI with LiteLLM SDK or LiteLLM Proxy Server](../docs/providers/vertex.md)
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
## Supported API Endpoints
|
## Supported API Endpoints
|
||||||
|
|
||||||
- Gemini API
|
- Gemini API
|
||||||
|
|
|
@ -24,7 +24,7 @@ const sidebars = {
|
||||||
link: {
|
link: {
|
||||||
type: "generated-index",
|
type: "generated-index",
|
||||||
title: "💥 LiteLLM Proxy Server",
|
title: "💥 LiteLLM Proxy Server",
|
||||||
description: `OpenAI Proxy Server to call 100+ LLMs in a unified interface & track spend, set budgets per virtual key/user`,
|
description: `OpenAI Proxy Server (LLM Gateway) to call 100+ LLMs in a unified interface & track spend, set budgets per virtual key/user`,
|
||||||
slug: "/simple_proxy",
|
slug: "/simple_proxy",
|
||||||
},
|
},
|
||||||
items: [
|
items: [
|
||||||
|
|
|
@ -261,6 +261,7 @@ default_user_params: Optional[Dict] = None
|
||||||
default_team_settings: Optional[List] = None
|
default_team_settings: Optional[List] = None
|
||||||
max_user_budget: Optional[float] = None
|
max_user_budget: Optional[float] = None
|
||||||
max_internal_user_budget: Optional[float] = None
|
max_internal_user_budget: Optional[float] = None
|
||||||
|
internal_user_budget_duration: Optional[str] = None
|
||||||
max_end_user_budget: Optional[float] = None
|
max_end_user_budget: Optional[float] = None
|
||||||
#### REQUEST PRIORITIZATION ####
|
#### REQUEST PRIORITIZATION ####
|
||||||
priority_reservation: Optional[Dict[str, float]] = None
|
priority_reservation: Optional[Dict[str, float]] = None
|
||||||
|
|
|
@ -90,7 +90,13 @@ class ServiceLogging(CustomLogger):
|
||||||
)
|
)
|
||||||
|
|
||||||
async def init_prometheus_services_logger_if_none(self):
|
async def init_prometheus_services_logger_if_none(self):
|
||||||
if self.prometheusServicesLogger is None:
|
"""
|
||||||
|
initializes prometheusServicesLogger if it is None or no attribute exists on ServiceLogging Object
|
||||||
|
|
||||||
|
"""
|
||||||
|
if not hasattr(self, "prometheusServicesLogger"):
|
||||||
|
self.prometheusServicesLogger = PrometheusServicesLogger()
|
||||||
|
elif self.prometheusServicesLogger is None:
|
||||||
self.prometheusServicesLogger = self.prometheusServicesLogger()
|
self.prometheusServicesLogger = self.prometheusServicesLogger()
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,9 @@
|
||||||
# What is this?
|
# What is this?
|
||||||
## Helper utilities
|
## Helper utilities
|
||||||
from typing import List, Literal, Optional, Tuple
|
import os
|
||||||
|
from typing import BinaryIO, List, Literal, Optional, Tuple
|
||||||
|
|
||||||
|
from litellm._logging import verbose_logger
|
||||||
|
|
||||||
|
|
||||||
def map_finish_reason(
|
def map_finish_reason(
|
||||||
|
@ -83,3 +86,20 @@ def _get_parent_otel_span_from_kwargs(kwargs: Optional[dict] = None):
|
||||||
return kwargs["litellm_parent_otel_span"]
|
return kwargs["litellm_parent_otel_span"]
|
||||||
except:
|
except:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_file_check_sum(_file: BinaryIO):
|
||||||
|
"""
|
||||||
|
Helper to safely get file checksum - used as a cache key
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
file_descriptor = _file.fileno()
|
||||||
|
file_stat = os.fstat(file_descriptor)
|
||||||
|
file_size = str(file_stat.st_size)
|
||||||
|
file_checksum = _file.name + file_size
|
||||||
|
return file_checksum
|
||||||
|
except Exception as e:
|
||||||
|
verbose_logger.error(f"Error getting file_checksum: {(str(e))}")
|
||||||
|
file_checksum = _file.name
|
||||||
|
return file_checksum
|
||||||
|
return file_checksum
|
||||||
|
|
|
@ -287,6 +287,9 @@ class AnthropicConfig:
|
||||||
if user_message is not None:
|
if user_message is not None:
|
||||||
new_messages.append(user_message)
|
new_messages.append(user_message)
|
||||||
|
|
||||||
|
if len(new_user_content_list) > 0:
|
||||||
|
new_messages.append({"role": "user", "content": new_user_content_list})
|
||||||
|
|
||||||
if len(tool_message_list) > 0:
|
if len(tool_message_list) > 0:
|
||||||
new_messages.extend(tool_message_list)
|
new_messages.extend(tool_message_list)
|
||||||
|
|
||||||
|
|
|
@ -278,6 +278,14 @@ class VertexFineTuningAPI(VertexLLM):
|
||||||
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
|
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
|
||||||
elif "countTokens" in request_route:
|
elif "countTokens" in request_route:
|
||||||
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
|
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
|
||||||
|
elif "cachedContents" in request_route:
|
||||||
|
_model = request_data.get("model")
|
||||||
|
if _model is not None and "/publishers/google/models/" not in _model:
|
||||||
|
request_data["model"] = (
|
||||||
|
f"projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{_model}"
|
||||||
|
)
|
||||||
|
|
||||||
|
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1beta1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unsupported Vertex AI request route: {request_route}")
|
raise ValueError(f"Unsupported Vertex AI request route: {request_route}")
|
||||||
if self.async_handler is None:
|
if self.async_handler is None:
|
||||||
|
|
|
@ -1135,8 +1135,9 @@ def convert_to_anthropic_tool_result(message: dict) -> AnthropicMessagesToolResu
|
||||||
return anthropic_tool_result
|
return anthropic_tool_result
|
||||||
if message["role"] == "function":
|
if message["role"] == "function":
|
||||||
content = message.get("content") # type: ignore
|
content = message.get("content") # type: ignore
|
||||||
|
tool_call_id = message.get("tool_call_id") or str(uuid.uuid4())
|
||||||
anthropic_tool_result = AnthropicMessagesToolResultParam(
|
anthropic_tool_result = AnthropicMessagesToolResultParam(
|
||||||
type="tool_result", tool_use_id=str(uuid.uuid4()), content=content
|
type="tool_result", tool_use_id=tool_call_id, content=content
|
||||||
)
|
)
|
||||||
|
|
||||||
return anthropic_tool_result
|
return anthropic_tool_result
|
||||||
|
|
|
@ -881,6 +881,21 @@ class VertexLLM(BaseLLM):
|
||||||
|
|
||||||
return self._credentials.token, self.project_id
|
return self._credentials.token, self.project_id
|
||||||
|
|
||||||
|
def is_using_v1beta1_features(self, optional_params: dict) -> bool:
|
||||||
|
"""
|
||||||
|
VertexAI only supports ContextCaching on v1beta1
|
||||||
|
|
||||||
|
use this helper to decide if request should be sent to v1 or v1beta1
|
||||||
|
|
||||||
|
Returns v1beta1 if context caching is enabled
|
||||||
|
Returns v1 in all other cases
|
||||||
|
"""
|
||||||
|
if "cached_content" in optional_params:
|
||||||
|
return True
|
||||||
|
if "CachedContent" in optional_params:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
def _get_token_and_url(
|
def _get_token_and_url(
|
||||||
self,
|
self,
|
||||||
model: str,
|
model: str,
|
||||||
|
@ -891,6 +906,7 @@ class VertexLLM(BaseLLM):
|
||||||
stream: Optional[bool],
|
stream: Optional[bool],
|
||||||
custom_llm_provider: Literal["vertex_ai", "vertex_ai_beta", "gemini"],
|
custom_llm_provider: Literal["vertex_ai", "vertex_ai_beta", "gemini"],
|
||||||
api_base: Optional[str],
|
api_base: Optional[str],
|
||||||
|
should_use_v1beta1_features: Optional[bool] = False,
|
||||||
) -> Tuple[Optional[str], str]:
|
) -> Tuple[Optional[str], str]:
|
||||||
"""
|
"""
|
||||||
Internal function. Returns the token and url for the call.
|
Internal function. Returns the token and url for the call.
|
||||||
|
@ -920,12 +936,13 @@ class VertexLLM(BaseLLM):
|
||||||
vertex_location = self.get_vertex_region(vertex_region=vertex_location)
|
vertex_location = self.get_vertex_region(vertex_region=vertex_location)
|
||||||
|
|
||||||
### SET RUNTIME ENDPOINT ###
|
### SET RUNTIME ENDPOINT ###
|
||||||
|
version = "v1beta1" if should_use_v1beta1_features is True else "v1"
|
||||||
endpoint = "generateContent"
|
endpoint = "generateContent"
|
||||||
if stream is True:
|
if stream is True:
|
||||||
endpoint = "streamGenerateContent"
|
endpoint = "streamGenerateContent"
|
||||||
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:{endpoint}?alt=sse"
|
url = f"https://{vertex_location}-aiplatform.googleapis.com/{version}/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:{endpoint}?alt=sse"
|
||||||
else:
|
else:
|
||||||
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:{endpoint}"
|
url = f"https://{vertex_location}-aiplatform.googleapis.com/{version}/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:{endpoint}"
|
||||||
|
|
||||||
if (
|
if (
|
||||||
api_base is not None
|
api_base is not None
|
||||||
|
@ -1055,6 +1072,9 @@ class VertexLLM(BaseLLM):
|
||||||
) -> Union[ModelResponse, CustomStreamWrapper]:
|
) -> Union[ModelResponse, CustomStreamWrapper]:
|
||||||
stream: Optional[bool] = optional_params.pop("stream", None) # type: ignore
|
stream: Optional[bool] = optional_params.pop("stream", None) # type: ignore
|
||||||
|
|
||||||
|
should_use_v1beta1_features = self.is_using_v1beta1_features(
|
||||||
|
optional_params=optional_params
|
||||||
|
)
|
||||||
auth_header, url = self._get_token_and_url(
|
auth_header, url = self._get_token_and_url(
|
||||||
model=model,
|
model=model,
|
||||||
gemini_api_key=gemini_api_key,
|
gemini_api_key=gemini_api_key,
|
||||||
|
@ -1064,6 +1084,7 @@ class VertexLLM(BaseLLM):
|
||||||
stream=stream,
|
stream=stream,
|
||||||
custom_llm_provider=custom_llm_provider,
|
custom_llm_provider=custom_llm_provider,
|
||||||
api_base=api_base,
|
api_base=api_base,
|
||||||
|
should_use_v1beta1_features=should_use_v1beta1_features,
|
||||||
)
|
)
|
||||||
|
|
||||||
## TRANSFORMATION ##
|
## TRANSFORMATION ##
|
||||||
|
|
|
@ -1,4 +1,8 @@
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: "gpt-4"
|
- model_name: "claude-3-5-sonnet-20240620"
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: "gpt-4"
|
model: "claude-3-5-sonnet-20240620"
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
max_internal_user_budget: 0.001
|
||||||
|
internal_user_budget_duration: "5m"
|
||||||
|
|
|
@ -91,6 +91,10 @@ async def new_user(
|
||||||
if litellm.max_internal_user_budget is not None:
|
if litellm.max_internal_user_budget is not None:
|
||||||
data_json["max_budget"] = litellm.max_internal_user_budget
|
data_json["max_budget"] = litellm.max_internal_user_budget
|
||||||
|
|
||||||
|
if "budget_duration" in data_json and data_json["budget_duration"] is None:
|
||||||
|
if litellm.internal_user_budget_duration is not None:
|
||||||
|
data_json["budget_duration"] = litellm.internal_user_budget_duration
|
||||||
|
|
||||||
response = await generate_key_helper_fn(request_type="user", **data_json)
|
response = await generate_key_helper_fn(request_type="user", **data_json)
|
||||||
|
|
||||||
# Admin UI Logic
|
# Admin UI Logic
|
||||||
|
|
|
@ -3,20 +3,14 @@ model_list:
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: openai/fake
|
model: openai/fake
|
||||||
api_key: fake-key
|
api_key: fake-key
|
||||||
api_base: https://exampleopenaiendpoint-production.up.railwaz.app/
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||||
- model_name: fireworks-llama-v3-70b-instruct
|
- model_name: fireworks-llama-v3-70b-instruct
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct
|
model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct
|
||||||
api_key: "os.environ/FIREWORKS"
|
api_key: "os.environ/FIREWORKS"
|
||||||
# provider specific wildcard routing
|
- model_name: "*"
|
||||||
- model_name: "anthropic/*"
|
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: "anthropic/*"
|
model: "*"
|
||||||
api_key: os.environ/ANTHROPIC_API_KEY
|
|
||||||
- model_name: "groq/*"
|
|
||||||
litellm_params:
|
|
||||||
model: "groq/*"
|
|
||||||
api_key: os.environ/GROQ_API_KEY
|
|
||||||
- model_name: "*"
|
- model_name: "*"
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: openai/*
|
model: openai/*
|
||||||
|
@ -25,37 +19,22 @@ model_list:
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: mistral/mistral-small-latest
|
model: mistral/mistral-small-latest
|
||||||
api_key: "os.environ/MISTRAL_API_KEY"
|
api_key: "os.environ/MISTRAL_API_KEY"
|
||||||
- model_name: tts
|
- model_name: gemini-1.5-pro-001
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: openai/tts-1
|
model: vertex_ai_beta/gemini-1.5-pro-001
|
||||||
api_key: "os.environ/OPENAI_API_KEY"
|
vertex_project: "adroit-crow-413218"
|
||||||
model_info:
|
vertex_location: "us-central1"
|
||||||
mode: audio_speech
|
vertex_credentials: "adroit-crow-413218-a956eef1a2a8.json"
|
||||||
|
# Add path to service account.json
|
||||||
|
|
||||||
# for /files endpoints
|
|
||||||
files_settings:
|
|
||||||
- custom_llm_provider: azure
|
|
||||||
api_base: https://exampleopenaiendpoint-production.up.railway.app
|
|
||||||
api_key: fake-key
|
|
||||||
api_version: "2023-03-15-preview"
|
|
||||||
- custom_llm_provider: openai
|
|
||||||
api_key: os.environ/OPENAI_API_KEY
|
|
||||||
|
|
||||||
|
default_vertex_config:
|
||||||
|
vertex_project: "adroit-crow-413218"
|
||||||
|
vertex_location: "us-central1"
|
||||||
|
vertex_credentials: "adroit-crow-413218-a956eef1a2a8.json" # Add path to service account.json
|
||||||
|
|
||||||
|
|
||||||
general_settings:
|
general_settings:
|
||||||
master_key: sk-1234
|
master_key: sk-1234
|
||||||
pass_through_endpoints:
|
|
||||||
- path: "/v1/rerank" # route you want to add to LiteLLM Proxy Server
|
|
||||||
target: "https://api.cohere.com/v1/rerank" # URL this route should forward requests to
|
|
||||||
headers: # headers to forward to this URL
|
|
||||||
content-type: application/json # (Optional) Extra Headers to pass to this endpoint
|
|
||||||
accept: application/json
|
|
||||||
forward_headers: True
|
|
||||||
|
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
callbacks: ["otel"] # 👈 KEY CHANGE
|
callbacks: ["otel"] # 👈 KEY CHANGE
|
||||||
success_callback: ["prometheus"]
|
|
||||||
failure_callback: ["prometheus"]
|
|
|
@ -5374,7 +5374,13 @@ async def anthropic_response(
|
||||||
litellm.adapters = [{"id": "anthropic", "adapter": anthropic_adapter}]
|
litellm.adapters = [{"id": "anthropic", "adapter": anthropic_adapter}]
|
||||||
|
|
||||||
global user_temperature, user_request_timeout, user_max_tokens, user_api_base
|
global user_temperature, user_request_timeout, user_max_tokens, user_api_base
|
||||||
data: dict = {**anthropic_data, "adapter_id": "anthropic"}
|
body = await request.body()
|
||||||
|
body_str = body.decode()
|
||||||
|
try:
|
||||||
|
request_data: dict = ast.literal_eval(body_str)
|
||||||
|
except Exception:
|
||||||
|
request_data = json.loads(body_str)
|
||||||
|
data: dict = {**request_data, "adapter_id": "anthropic"}
|
||||||
try:
|
try:
|
||||||
data["model"] = (
|
data["model"] = (
|
||||||
general_settings.get("completion_model", None) # server default
|
general_settings.get("completion_model", None) # server default
|
||||||
|
|
54
litellm/proxy/tests/test_gemini_context_caching.py
Normal file
54
litellm/proxy/tests/test_gemini_context_caching.py
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import openai
|
||||||
|
|
||||||
|
# Set Litellm proxy variables here
|
||||||
|
LITELLM_BASE_URL = "http://0.0.0.0:4000"
|
||||||
|
LITELLM_PROXY_API_KEY = "sk-1234"
|
||||||
|
|
||||||
|
client = openai.OpenAI(api_key=LITELLM_PROXY_API_KEY, base_url=LITELLM_BASE_URL)
|
||||||
|
httpx_client = httpx.Client(timeout=30)
|
||||||
|
|
||||||
|
################################
|
||||||
|
# First create a cachedContents object
|
||||||
|
print("creating cached content")
|
||||||
|
create_cache = httpx_client.post(
|
||||||
|
url=f"{LITELLM_BASE_URL}/vertex-ai/cachedContents",
|
||||||
|
headers={"Authorization": f"Bearer {LITELLM_PROXY_API_KEY}"},
|
||||||
|
json={
|
||||||
|
"model": "gemini-1.5-pro-001",
|
||||||
|
"contents": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"parts": [
|
||||||
|
{
|
||||||
|
"text": "This is sample text to demonstrate explicit caching."
|
||||||
|
* 4000
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
print("response from create_cache", create_cache)
|
||||||
|
create_cache_response = create_cache.json()
|
||||||
|
print("json from create_cache", create_cache_response)
|
||||||
|
cached_content_name = create_cache_response["name"]
|
||||||
|
|
||||||
|
#################################
|
||||||
|
# Use the `cachedContents` object in your /chat/completions
|
||||||
|
response = client.chat.completions.create( # type: ignore
|
||||||
|
model="gemini-1.5-pro-001",
|
||||||
|
max_tokens=8192,
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "what is the sample text about?",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
temperature="0.7",
|
||||||
|
extra_body={"cached_content": cached_content_name}, # 👈 key change
|
||||||
|
)
|
||||||
|
|
||||||
|
print("response from proxy", response)
|
|
@ -303,3 +303,30 @@ async def vertex_cancel_fine_tuning_job(
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise exception_handler(e) from e
|
raise exception_handler(e) from e
|
||||||
|
|
||||||
|
|
||||||
|
@router.post(
|
||||||
|
"/vertex-ai/cachedContents",
|
||||||
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
|
tags=["Vertex AI endpoints"],
|
||||||
|
)
|
||||||
|
async def vertex_create_add_cached_content(
|
||||||
|
request: Request,
|
||||||
|
fastapi_response: Response,
|
||||||
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
this is a pass through endpoint for the Vertex AI API. /cachedContents endpoint
|
||||||
|
|
||||||
|
Vertex API Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-create#create-context-cache-sample-drest
|
||||||
|
|
||||||
|
it uses the vertex ai credentials on the proxy and forwards to vertex ai api
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
response = await execute_post_vertex_ai_request(
|
||||||
|
request=request,
|
||||||
|
route="/cachedContents",
|
||||||
|
)
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
raise exception_handler(e) from e
|
||||||
|
|
|
@ -1969,3 +1969,58 @@ def test_prompt_factory_nested():
|
||||||
assert isinstance(
|
assert isinstance(
|
||||||
message["parts"][0]["text"], str
|
message["parts"][0]["text"], str
|
||||||
), "'text' value not a string."
|
), "'text' value not a string."
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_token_url():
|
||||||
|
from litellm.llms.vertex_httpx import VertexLLM
|
||||||
|
|
||||||
|
vertex_llm = VertexLLM()
|
||||||
|
vertex_ai_project = "adroit-crow-413218"
|
||||||
|
vertex_ai_location = "us-central1"
|
||||||
|
json_obj = get_vertex_ai_creds_json()
|
||||||
|
vertex_credentials = json.dumps(json_obj)
|
||||||
|
|
||||||
|
should_use_v1beta1_features = vertex_llm.is_using_v1beta1_features(
|
||||||
|
optional_params={"cached_content": "hi"}
|
||||||
|
)
|
||||||
|
|
||||||
|
assert should_use_v1beta1_features is True
|
||||||
|
|
||||||
|
_, url = vertex_llm._get_token_and_url(
|
||||||
|
vertex_project=vertex_ai_project,
|
||||||
|
vertex_location=vertex_ai_location,
|
||||||
|
vertex_credentials=vertex_credentials,
|
||||||
|
gemini_api_key="",
|
||||||
|
custom_llm_provider="vertex_ai_beta",
|
||||||
|
should_use_v1beta1_features=should_use_v1beta1_features,
|
||||||
|
api_base=None,
|
||||||
|
model="",
|
||||||
|
stream=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
print("url=", url)
|
||||||
|
|
||||||
|
assert "/v1beta1/" in url
|
||||||
|
|
||||||
|
should_use_v1beta1_features = vertex_llm.is_using_v1beta1_features(
|
||||||
|
optional_params={"temperature": 0.1}
|
||||||
|
)
|
||||||
|
|
||||||
|
_, url = vertex_llm._get_token_and_url(
|
||||||
|
vertex_project=vertex_ai_project,
|
||||||
|
vertex_location=vertex_ai_location,
|
||||||
|
vertex_credentials=vertex_credentials,
|
||||||
|
gemini_api_key="",
|
||||||
|
custom_llm_provider="vertex_ai_beta",
|
||||||
|
should_use_v1beta1_features=should_use_v1beta1_features,
|
||||||
|
api_base=None,
|
||||||
|
model="",
|
||||||
|
stream=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
print("url for normal request", url)
|
||||||
|
|
||||||
|
assert "v1beta1" not in url
|
||||||
|
assert "/v1/" in url
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
|
@ -183,3 +183,96 @@ async def test_anthropic_router_completion_e2e():
|
||||||
assert isinstance(response, AnthropicResponse)
|
assert isinstance(response, AnthropicResponse)
|
||||||
|
|
||||||
assert response.model == "gpt-3.5-turbo"
|
assert response.model == "gpt-3.5-turbo"
|
||||||
|
|
||||||
|
|
||||||
|
def test_anthropic_tool_calling_translation():
|
||||||
|
kwargs = {
|
||||||
|
"model": "claude-3-5-sonnet-20240620",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "Would development of a software platform be under ASC 350-40 or ASC 985?",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "tool_use",
|
||||||
|
"id": "37d6f703-cbcc-497d-95a1-2aa24a114adc",
|
||||||
|
"name": "TaskPlanningTool",
|
||||||
|
"input": {
|
||||||
|
"completed_steps": [],
|
||||||
|
"next_steps": [
|
||||||
|
{
|
||||||
|
"tool_name": "AccountingResearchTool",
|
||||||
|
"description": "Research ASC 350-40 to understand its scope and applicability to software development.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"tool_name": "AccountingResearchTool",
|
||||||
|
"description": "Research ASC 985 to understand its scope and applicability to software development.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"tool_name": "AccountingResearchTool",
|
||||||
|
"description": "Compare the scopes of ASC 350-40 and ASC 985 to determine which is more applicable to software platform development.",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"learnings": [],
|
||||||
|
"potential_issues": [
|
||||||
|
"The distinction between the two standards might not be clear-cut for all types of software development.",
|
||||||
|
"There might be specific circumstances or details about the software platform that could affect which standard applies.",
|
||||||
|
],
|
||||||
|
"missing_info": [
|
||||||
|
"Specific details about the type of software platform being developed (e.g., for internal use or for sale).",
|
||||||
|
"Whether the entity developing the software is also the end-user or if it's being developed for external customers.",
|
||||||
|
],
|
||||||
|
"done": False,
|
||||||
|
"required_formatting": None,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "tool_result",
|
||||||
|
"tool_use_id": "eb7023b1-5ee8-43b8-b90f-ac5a23d37c31",
|
||||||
|
"content": {
|
||||||
|
"completed_steps": [],
|
||||||
|
"next_steps": [
|
||||||
|
{
|
||||||
|
"tool_name": "AccountingResearchTool",
|
||||||
|
"description": "Research ASC 350-40 to understand its scope and applicability to software development.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"tool_name": "AccountingResearchTool",
|
||||||
|
"description": "Research ASC 985 to understand its scope and applicability to software development.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"tool_name": "AccountingResearchTool",
|
||||||
|
"description": "Compare the scopes of ASC 350-40 and ASC 985 to determine which is more applicable to software platform development.",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"formatting_step": None,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
from litellm.adapters.anthropic_adapter import anthropic_adapter
|
||||||
|
|
||||||
|
translated_params = anthropic_adapter.translate_completion_input_params(
|
||||||
|
kwargs=kwargs
|
||||||
|
)
|
||||||
|
|
||||||
|
print(translated_params["messages"])
|
||||||
|
|
||||||
|
assert len(translated_params["messages"]) > 0
|
||||||
|
assert translated_params["messages"][1]["role"] == "user"
|
||||||
|
|
|
@ -4405,6 +4405,3 @@ def test_moderation():
|
||||||
output = response.results[0]
|
output = response.results[0]
|
||||||
print(output)
|
print(output)
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
# test_moderation()
|
|
||||||
|
|
|
@ -219,3 +219,44 @@ def test_base64_image_input(url, expected_media_type):
|
||||||
response = convert_to_anthropic_image_obj(openai_image_url=url)
|
response = convert_to_anthropic_image_obj(openai_image_url=url)
|
||||||
|
|
||||||
assert response["media_type"] == expected_media_type
|
assert response["media_type"] == expected_media_type
|
||||||
|
|
||||||
|
|
||||||
|
def test_anthropic_messages_tool_call():
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Would development of a software platform be under ASC 350-40 or ASC 985?",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "",
|
||||||
|
"tool_call_id": "bc8cb4b6-88c4-4138-8993-3a9d9cd51656",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"id": "bc8cb4b6-88c4-4138-8993-3a9d9cd51656",
|
||||||
|
"function": {
|
||||||
|
"arguments": '{"completed_steps": [], "next_steps": [{"tool_name": "AccountingResearchTool", "description": "Research ASC 350-40 to understand its scope and applicability to software development."}, {"tool_name": "AccountingResearchTool", "description": "Research ASC 985 to understand its scope and applicability to software development."}, {"tool_name": "AccountingResearchTool", "description": "Compare the scopes of ASC 350-40 and ASC 985 to determine which is more applicable to software platform development."}], "learnings": [], "potential_issues": ["The distinction between the two standards might not be clear-cut for all types of software development.", "There might be specific circumstances or details about the software platform that could affect which standard applies."], "missing_info": ["Specific details about the type of software platform being developed (e.g., for internal use or for sale).", "Whether the entity developing the software is also the end-user or if it\'s being developed for external customers."], "done": false, "required_formatting": null}',
|
||||||
|
"name": "TaskPlanningTool",
|
||||||
|
},
|
||||||
|
"type": "function",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "function",
|
||||||
|
"content": '{"completed_steps":[],"next_steps":[{"tool_name":"AccountingResearchTool","description":"Research ASC 350-40 to understand its scope and applicability to software development."},{"tool_name":"AccountingResearchTool","description":"Research ASC 985 to understand its scope and applicability to software development."},{"tool_name":"AccountingResearchTool","description":"Compare the scopes of ASC 350-40 and ASC 985 to determine which is more applicable to software platform development."}],"formatting_step":null}',
|
||||||
|
"name": "TaskPlanningTool",
|
||||||
|
"tool_call_id": "bc8cb4b6-88c4-4138-8993-3a9d9cd51656",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
translated_messages = anthropic_messages_pt(
|
||||||
|
messages, model="claude-3-sonnet-20240229", llm_provider="anthropic"
|
||||||
|
)
|
||||||
|
|
||||||
|
print(translated_messages)
|
||||||
|
|
||||||
|
assert (
|
||||||
|
translated_messages[-1]["content"][0]["tool_use_id"]
|
||||||
|
== "bc8cb4b6-88c4-4138-8993-3a9d9cd51656"
|
||||||
|
)
|
||||||
|
|
|
@ -55,7 +55,10 @@ import litellm._service_logger # for storing API inputs, outputs, and metadata
|
||||||
import litellm.litellm_core_utils
|
import litellm.litellm_core_utils
|
||||||
import litellm.litellm_core_utils.json_validation_rule
|
import litellm.litellm_core_utils.json_validation_rule
|
||||||
from litellm.caching import DualCache
|
from litellm.caching import DualCache
|
||||||
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
from litellm.litellm_core_utils.core_helpers import (
|
||||||
|
get_file_check_sum,
|
||||||
|
map_finish_reason,
|
||||||
|
)
|
||||||
from litellm.litellm_core_utils.exception_mapping_utils import get_error_message
|
from litellm.litellm_core_utils.exception_mapping_utils import get_error_message
|
||||||
from litellm.litellm_core_utils.llm_request_utils import _ensure_extra_body_is_safe
|
from litellm.litellm_core_utils.llm_request_utils import _ensure_extra_body_is_safe
|
||||||
from litellm.litellm_core_utils.redact_messages import (
|
from litellm.litellm_core_utils.redact_messages import (
|
||||||
|
@ -557,12 +560,8 @@ def function_setup(
|
||||||
or call_type == CallTypes.transcription.value
|
or call_type == CallTypes.transcription.value
|
||||||
):
|
):
|
||||||
_file_name: BinaryIO = args[1] if len(args) > 1 else kwargs["file"]
|
_file_name: BinaryIO = args[1] if len(args) > 1 else kwargs["file"]
|
||||||
file_name = getattr(_file_name, "name", "audio_file")
|
file_checksum = get_file_check_sum(_file=_file_name)
|
||||||
file_descriptor = _file_name.fileno()
|
file_name = _file_name.name
|
||||||
file_stat = os.fstat(file_descriptor)
|
|
||||||
file_size = str(file_stat.st_size)
|
|
||||||
|
|
||||||
file_checksum = _file_name.name + file_size
|
|
||||||
if "metadata" in kwargs:
|
if "metadata" in kwargs:
|
||||||
kwargs["metadata"]["file_checksum"] = file_checksum
|
kwargs["metadata"]["file_checksum"] = file_checksum
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue