mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
Merge branch 'main' into litellm_auth_fix
This commit is contained in:
commit
ced4582ecb
24 changed files with 483 additions and 59 deletions
|
@ -11,7 +11,7 @@
|
|||
<p align="center">Call all LLM APIs using the OpenAI format [Bedrock, Huggingface, VertexAI, TogetherAI, Azure, OpenAI, Groq etc.]
|
||||
<br>
|
||||
</p>
|
||||
<h4 align="center"><a href="https://docs.litellm.ai/docs/simple_proxy" target="_blank">LiteLLM Proxy Server</a> | <a href="https://docs.litellm.ai/docs/hosted" target="_blank"> Hosted Proxy (Preview)</a> | <a href="https://docs.litellm.ai/docs/enterprise"target="_blank">Enterprise Tier</a></h4>
|
||||
<h4 align="center"><a href="https://docs.litellm.ai/docs/simple_proxy" target="_blank">LiteLLM Proxy Server (LLM Gateway)</a> | <a href="https://docs.litellm.ai/docs/hosted" target="_blank"> Hosted Proxy (Preview)</a> | <a href="https://docs.litellm.ai/docs/enterprise"target="_blank">Enterprise Tier</a></h4>
|
||||
<h4 align="center">
|
||||
<a href="https://pypi.org/project/litellm/" target="_blank">
|
||||
<img src="https://img.shields.io/pypi/v/litellm.svg" alt="PyPI Version">
|
||||
|
@ -35,9 +35,9 @@ LiteLLM manages:
|
|||
- Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
|
||||
- [Consistent output](https://docs.litellm.ai/docs/completion/output), text responses will always be available at `['choices'][0]['message']['content']`
|
||||
- Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)
|
||||
- Set Budgets & Rate limits per project, api key, model [LiteLLM Proxy Server](https://docs.litellm.ai/docs/simple_proxy)
|
||||
- Set Budgets & Rate limits per project, api key, model [LiteLLM Proxy Server (LLM Gateway)](https://docs.litellm.ai/docs/simple_proxy)
|
||||
|
||||
[**Jump to OpenAI Proxy Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs) <br>
|
||||
[**Jump to LiteLLM Proxy (LLM Gateway) Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs) <br>
|
||||
[**Jump to Supported LLM Providers**](https://github.com/BerriAI/litellm?tab=readme-ov-file#supported-providers-docs)
|
||||
|
||||
🚨 **Stable Release:** Use docker images with the `-stable` tag. These have undergone 12 hour load tests, before being published.
|
||||
|
@ -134,7 +134,7 @@ litellm.success_callback = ["lunary", "langfuse", "athina", "helicone"] # log in
|
|||
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
|
||||
```
|
||||
|
||||
# OpenAI Proxy - ([Docs](https://docs.litellm.ai/docs/simple_proxy))
|
||||
# LiteLLM Proxy Server (LLM Gateway) - ([Docs](https://docs.litellm.ai/docs/simple_proxy))
|
||||
|
||||
Track spend + Load Balance across multiple projects
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@ https://github.com/BerriAI/litellm
|
|||
|
||||
## How to use LiteLLM
|
||||
You can use litellm through either:
|
||||
1. [LiteLLM Proxy Server](#openai-proxy) - Server to call 100+ LLMs, load balance, cost tracking across projects
|
||||
1. [LiteLLM Proxy Server](#openai-proxy) - Server (LLM Gateway) to call 100+ LLMs, load balance, cost tracking across projects
|
||||
2. [LiteLLM python SDK](#basic-usage) - Python Client to call 100+ LLMs, load balance, cost tracking
|
||||
|
||||
### When to use LiteLLM Proxy Server
|
||||
|
|
|
@ -427,6 +427,105 @@ print(resp)
|
|||
```
|
||||
|
||||
|
||||
### **Context Caching**
|
||||
|
||||
Use Vertex AI Context Caching
|
||||
|
||||
[**Relevant VertexAI Docs**](https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-overview)
|
||||
|
||||
<Tabs>
|
||||
|
||||
<TabItem value="proxy" label="LiteLLM PROXY">
|
||||
|
||||
1. Add model to config.yaml
|
||||
```yaml
|
||||
model_list:
|
||||
# used for /chat/completions, /completions, /embeddings endpoints
|
||||
- model_name: gemini-1.5-pro-001
|
||||
litellm_params:
|
||||
model: vertex_ai_beta/gemini-1.5-pro-001
|
||||
vertex_project: "project-id"
|
||||
vertex_location: "us-central1"
|
||||
vertex_credentials: "adroit-crow-413218-a956eef1a2a8.json" # Add path to service account.json
|
||||
|
||||
# used for the /cachedContent and vertexAI native endpoints
|
||||
default_vertex_config:
|
||||
vertex_project: "adroit-crow-413218"
|
||||
vertex_location: "us-central1"
|
||||
vertex_credentials: "adroit-crow-413218-a956eef1a2a8.json" # Add path to service account.json
|
||||
|
||||
```
|
||||
|
||||
2. Start Proxy
|
||||
|
||||
```
|
||||
$ litellm --config /path/to/config.yaml
|
||||
```
|
||||
|
||||
3. Make Request!
|
||||
|
||||
- First create a cachedContents object by calling the Vertex `cachedContents` endpoint. [VertexAI API Ref for cachedContents endpoint](https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-create#create-context-cache-sample-drest). (LiteLLM proxy forwards the `/cachedContents` request to the VertexAI API)
|
||||
- Use the `cachedContents` object in your /chat/completions request to vertexAI
|
||||
|
||||
```python
|
||||
import datetime
|
||||
import openai
|
||||
import httpx
|
||||
|
||||
# Set Litellm proxy variables here
|
||||
LITELLM_BASE_URL = "http://0.0.0.0:4000"
|
||||
LITELLM_PROXY_API_KEY = "sk-1234"
|
||||
|
||||
client = openai.OpenAI(api_key=LITELLM_PROXY_API_KEY, base_url=LITELLM_BASE_URL)
|
||||
httpx_client = httpx.Client(timeout=30)
|
||||
|
||||
################################
|
||||
# First create a cachedContents object
|
||||
# this request gets forwarded as is to: https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-create#create-context-cache-sample-drest
|
||||
print("creating cached content")
|
||||
create_cache = httpx_client.post(
|
||||
url=f"{LITELLM_BASE_URL}/vertex-ai/cachedContents",
|
||||
headers = {"Authorization": f"Bearer {LITELLM_PROXY_API_KEY}"},
|
||||
json = {
|
||||
"model": "gemini-1.5-pro-001",
|
||||
"contents": [
|
||||
{
|
||||
"role": "user",
|
||||
"parts": [{
|
||||
"text": "This is sample text to demonstrate explicit caching."*4000
|
||||
}]
|
||||
}
|
||||
],
|
||||
}
|
||||
)
|
||||
print("response from create_cache", create_cache)
|
||||
create_cache_response = create_cache.json()
|
||||
print("json from create_cache", create_cache_response)
|
||||
cached_content_name = create_cache_response["name"]
|
||||
|
||||
#################################
|
||||
# Use the `cachedContents` object in your /chat/completions
|
||||
response = client.chat.completions.create( # type: ignore
|
||||
model="gemini-1.5-pro-001",
|
||||
max_tokens=8192,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "what is the sample text about?",
|
||||
},
|
||||
],
|
||||
temperature="0.7",
|
||||
extra_body={"cached_content": cached_content_name}, # 👈 key change
|
||||
)
|
||||
|
||||
print("response from proxy", response)
|
||||
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
|
||||
## Pre-requisites
|
||||
* `pip install google-cloud-aiplatform` (pre-installed on proxy docker image)
|
||||
* Authentication:
|
||||
|
|
|
@ -5,7 +5,7 @@ import TabItem from '@theme/TabItem';
|
|||
# Quick Start
|
||||
Quick start CLI, Config, Docker
|
||||
|
||||
LiteLLM Server manages:
|
||||
LiteLLM Server (LLM Gateway) manages:
|
||||
|
||||
* **Unified Interface**: Calling 100+ LLMs [Huggingface/Bedrock/TogetherAI/etc.](#other-supported-models) in the OpenAI `ChatCompletions` & `Completions` format
|
||||
* **Cost tracking**: Authentication, Spend Tracking & Budgets [Virtual Keys](https://docs.litellm.ai/docs/proxy/virtual_keys)
|
||||
|
|
|
@ -1,5 +1,11 @@
|
|||
# [BETA] Vertex AI Endpoints
|
||||
|
||||
:::tip
|
||||
|
||||
Looking for the Unified API (OpenAI format) for VertexAI ? [Go here - using vertexAI with LiteLLM SDK or LiteLLM Proxy Server](../docs/providers/vertex.md)
|
||||
|
||||
:::
|
||||
|
||||
## Supported API Endpoints
|
||||
|
||||
- Gemini API
|
||||
|
|
|
@ -24,7 +24,7 @@ const sidebars = {
|
|||
link: {
|
||||
type: "generated-index",
|
||||
title: "💥 LiteLLM Proxy Server",
|
||||
description: `OpenAI Proxy Server to call 100+ LLMs in a unified interface & track spend, set budgets per virtual key/user`,
|
||||
description: `OpenAI Proxy Server (LLM Gateway) to call 100+ LLMs in a unified interface & track spend, set budgets per virtual key/user`,
|
||||
slug: "/simple_proxy",
|
||||
},
|
||||
items: [
|
||||
|
|
|
@ -261,6 +261,7 @@ default_user_params: Optional[Dict] = None
|
|||
default_team_settings: Optional[List] = None
|
||||
max_user_budget: Optional[float] = None
|
||||
max_internal_user_budget: Optional[float] = None
|
||||
internal_user_budget_duration: Optional[str] = None
|
||||
max_end_user_budget: Optional[float] = None
|
||||
#### REQUEST PRIORITIZATION ####
|
||||
priority_reservation: Optional[Dict[str, float]] = None
|
||||
|
|
|
@ -90,7 +90,13 @@ class ServiceLogging(CustomLogger):
|
|||
)
|
||||
|
||||
async def init_prometheus_services_logger_if_none(self):
|
||||
if self.prometheusServicesLogger is None:
|
||||
"""
|
||||
initializes prometheusServicesLogger if it is None or no attribute exists on ServiceLogging Object
|
||||
|
||||
"""
|
||||
if not hasattr(self, "prometheusServicesLogger"):
|
||||
self.prometheusServicesLogger = PrometheusServicesLogger()
|
||||
elif self.prometheusServicesLogger is None:
|
||||
self.prometheusServicesLogger = self.prometheusServicesLogger()
|
||||
return
|
||||
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
# What is this?
|
||||
## Helper utilities
|
||||
from typing import List, Literal, Optional, Tuple
|
||||
import os
|
||||
from typing import BinaryIO, List, Literal, Optional, Tuple
|
||||
|
||||
from litellm._logging import verbose_logger
|
||||
|
||||
|
||||
def map_finish_reason(
|
||||
|
@ -83,3 +86,20 @@ def _get_parent_otel_span_from_kwargs(kwargs: Optional[dict] = None):
|
|||
return kwargs["litellm_parent_otel_span"]
|
||||
except:
|
||||
return None
|
||||
|
||||
|
||||
def get_file_check_sum(_file: BinaryIO):
|
||||
"""
|
||||
Helper to safely get file checksum - used as a cache key
|
||||
"""
|
||||
try:
|
||||
file_descriptor = _file.fileno()
|
||||
file_stat = os.fstat(file_descriptor)
|
||||
file_size = str(file_stat.st_size)
|
||||
file_checksum = _file.name + file_size
|
||||
return file_checksum
|
||||
except Exception as e:
|
||||
verbose_logger.error(f"Error getting file_checksum: {(str(e))}")
|
||||
file_checksum = _file.name
|
||||
return file_checksum
|
||||
return file_checksum
|
||||
|
|
|
@ -287,6 +287,9 @@ class AnthropicConfig:
|
|||
if user_message is not None:
|
||||
new_messages.append(user_message)
|
||||
|
||||
if len(new_user_content_list) > 0:
|
||||
new_messages.append({"role": "user", "content": new_user_content_list})
|
||||
|
||||
if len(tool_message_list) > 0:
|
||||
new_messages.extend(tool_message_list)
|
||||
|
||||
|
|
|
@ -278,6 +278,14 @@ class VertexFineTuningAPI(VertexLLM):
|
|||
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
|
||||
elif "countTokens" in request_route:
|
||||
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
|
||||
elif "cachedContents" in request_route:
|
||||
_model = request_data.get("model")
|
||||
if _model is not None and "/publishers/google/models/" not in _model:
|
||||
request_data["model"] = (
|
||||
f"projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{_model}"
|
||||
)
|
||||
|
||||
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1beta1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
|
||||
else:
|
||||
raise ValueError(f"Unsupported Vertex AI request route: {request_route}")
|
||||
if self.async_handler is None:
|
||||
|
|
|
@ -1135,8 +1135,9 @@ def convert_to_anthropic_tool_result(message: dict) -> AnthropicMessagesToolResu
|
|||
return anthropic_tool_result
|
||||
if message["role"] == "function":
|
||||
content = message.get("content") # type: ignore
|
||||
tool_call_id = message.get("tool_call_id") or str(uuid.uuid4())
|
||||
anthropic_tool_result = AnthropicMessagesToolResultParam(
|
||||
type="tool_result", tool_use_id=str(uuid.uuid4()), content=content
|
||||
type="tool_result", tool_use_id=tool_call_id, content=content
|
||||
)
|
||||
|
||||
return anthropic_tool_result
|
||||
|
|
|
@ -881,6 +881,21 @@ class VertexLLM(BaseLLM):
|
|||
|
||||
return self._credentials.token, self.project_id
|
||||
|
||||
def is_using_v1beta1_features(self, optional_params: dict) -> bool:
|
||||
"""
|
||||
VertexAI only supports ContextCaching on v1beta1
|
||||
|
||||
use this helper to decide if request should be sent to v1 or v1beta1
|
||||
|
||||
Returns v1beta1 if context caching is enabled
|
||||
Returns v1 in all other cases
|
||||
"""
|
||||
if "cached_content" in optional_params:
|
||||
return True
|
||||
if "CachedContent" in optional_params:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _get_token_and_url(
|
||||
self,
|
||||
model: str,
|
||||
|
@ -891,6 +906,7 @@ class VertexLLM(BaseLLM):
|
|||
stream: Optional[bool],
|
||||
custom_llm_provider: Literal["vertex_ai", "vertex_ai_beta", "gemini"],
|
||||
api_base: Optional[str],
|
||||
should_use_v1beta1_features: Optional[bool] = False,
|
||||
) -> Tuple[Optional[str], str]:
|
||||
"""
|
||||
Internal function. Returns the token and url for the call.
|
||||
|
@ -920,12 +936,13 @@ class VertexLLM(BaseLLM):
|
|||
vertex_location = self.get_vertex_region(vertex_region=vertex_location)
|
||||
|
||||
### SET RUNTIME ENDPOINT ###
|
||||
version = "v1beta1" if should_use_v1beta1_features is True else "v1"
|
||||
endpoint = "generateContent"
|
||||
if stream is True:
|
||||
endpoint = "streamGenerateContent"
|
||||
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:{endpoint}?alt=sse"
|
||||
url = f"https://{vertex_location}-aiplatform.googleapis.com/{version}/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:{endpoint}?alt=sse"
|
||||
else:
|
||||
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:{endpoint}"
|
||||
url = f"https://{vertex_location}-aiplatform.googleapis.com/{version}/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:{endpoint}"
|
||||
|
||||
if (
|
||||
api_base is not None
|
||||
|
@ -1055,6 +1072,9 @@ class VertexLLM(BaseLLM):
|
|||
) -> Union[ModelResponse, CustomStreamWrapper]:
|
||||
stream: Optional[bool] = optional_params.pop("stream", None) # type: ignore
|
||||
|
||||
should_use_v1beta1_features = self.is_using_v1beta1_features(
|
||||
optional_params=optional_params
|
||||
)
|
||||
auth_header, url = self._get_token_and_url(
|
||||
model=model,
|
||||
gemini_api_key=gemini_api_key,
|
||||
|
@ -1064,6 +1084,7 @@ class VertexLLM(BaseLLM):
|
|||
stream=stream,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
api_base=api_base,
|
||||
should_use_v1beta1_features=should_use_v1beta1_features,
|
||||
)
|
||||
|
||||
## TRANSFORMATION ##
|
||||
|
|
|
@ -1,4 +1,8 @@
|
|||
model_list:
|
||||
- model_name: "gpt-4"
|
||||
- model_name: "claude-3-5-sonnet-20240620"
|
||||
litellm_params:
|
||||
model: "gpt-4"
|
||||
model: "claude-3-5-sonnet-20240620"
|
||||
|
||||
litellm_settings:
|
||||
max_internal_user_budget: 0.001
|
||||
internal_user_budget_duration: "5m"
|
||||
|
|
|
@ -91,6 +91,10 @@ async def new_user(
|
|||
if litellm.max_internal_user_budget is not None:
|
||||
data_json["max_budget"] = litellm.max_internal_user_budget
|
||||
|
||||
if "budget_duration" in data_json and data_json["budget_duration"] is None:
|
||||
if litellm.internal_user_budget_duration is not None:
|
||||
data_json["budget_duration"] = litellm.internal_user_budget_duration
|
||||
|
||||
response = await generate_key_helper_fn(request_type="user", **data_json)
|
||||
|
||||
# Admin UI Logic
|
||||
|
|
|
@ -3,20 +3,14 @@ model_list:
|
|||
litellm_params:
|
||||
model: openai/fake
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railwaz.app/
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
- model_name: fireworks-llama-v3-70b-instruct
|
||||
litellm_params:
|
||||
model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct
|
||||
api_key: "os.environ/FIREWORKS"
|
||||
# provider specific wildcard routing
|
||||
- model_name: "anthropic/*"
|
||||
- model_name: "*"
|
||||
litellm_params:
|
||||
model: "anthropic/*"
|
||||
api_key: os.environ/ANTHROPIC_API_KEY
|
||||
- model_name: "groq/*"
|
||||
litellm_params:
|
||||
model: "groq/*"
|
||||
api_key: os.environ/GROQ_API_KEY
|
||||
model: "*"
|
||||
- model_name: "*"
|
||||
litellm_params:
|
||||
model: openai/*
|
||||
|
@ -25,37 +19,22 @@ model_list:
|
|||
litellm_params:
|
||||
model: mistral/mistral-small-latest
|
||||
api_key: "os.environ/MISTRAL_API_KEY"
|
||||
- model_name: tts
|
||||
- model_name: gemini-1.5-pro-001
|
||||
litellm_params:
|
||||
model: openai/tts-1
|
||||
api_key: "os.environ/OPENAI_API_KEY"
|
||||
model_info:
|
||||
mode: audio_speech
|
||||
|
||||
|
||||
# for /files endpoints
|
||||
files_settings:
|
||||
- custom_llm_provider: azure
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app
|
||||
api_key: fake-key
|
||||
api_version: "2023-03-15-preview"
|
||||
- custom_llm_provider: openai
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
model: vertex_ai_beta/gemini-1.5-pro-001
|
||||
vertex_project: "adroit-crow-413218"
|
||||
vertex_location: "us-central1"
|
||||
vertex_credentials: "adroit-crow-413218-a956eef1a2a8.json"
|
||||
# Add path to service account.json
|
||||
|
||||
default_vertex_config:
|
||||
vertex_project: "adroit-crow-413218"
|
||||
vertex_location: "us-central1"
|
||||
vertex_credentials: "adroit-crow-413218-a956eef1a2a8.json" # Add path to service account.json
|
||||
|
||||
|
||||
general_settings:
|
||||
master_key: sk-1234
|
||||
pass_through_endpoints:
|
||||
- path: "/v1/rerank" # route you want to add to LiteLLM Proxy Server
|
||||
target: "https://api.cohere.com/v1/rerank" # URL this route should forward requests to
|
||||
headers: # headers to forward to this URL
|
||||
content-type: application/json # (Optional) Extra Headers to pass to this endpoint
|
||||
accept: application/json
|
||||
forward_headers: True
|
||||
|
||||
|
||||
litellm_settings:
|
||||
callbacks: ["otel"] # 👈 KEY CHANGE
|
||||
success_callback: ["prometheus"]
|
||||
failure_callback: ["prometheus"]
|
|
@ -5374,7 +5374,13 @@ async def anthropic_response(
|
|||
litellm.adapters = [{"id": "anthropic", "adapter": anthropic_adapter}]
|
||||
|
||||
global user_temperature, user_request_timeout, user_max_tokens, user_api_base
|
||||
data: dict = {**anthropic_data, "adapter_id": "anthropic"}
|
||||
body = await request.body()
|
||||
body_str = body.decode()
|
||||
try:
|
||||
request_data: dict = ast.literal_eval(body_str)
|
||||
except Exception:
|
||||
request_data = json.loads(body_str)
|
||||
data: dict = {**request_data, "adapter_id": "anthropic"}
|
||||
try:
|
||||
data["model"] = (
|
||||
general_settings.get("completion_model", None) # server default
|
||||
|
|
54
litellm/proxy/tests/test_gemini_context_caching.py
Normal file
54
litellm/proxy/tests/test_gemini_context_caching.py
Normal file
|
@ -0,0 +1,54 @@
|
|||
import datetime
|
||||
|
||||
import httpx
|
||||
import openai
|
||||
|
||||
# Set Litellm proxy variables here
|
||||
LITELLM_BASE_URL = "http://0.0.0.0:4000"
|
||||
LITELLM_PROXY_API_KEY = "sk-1234"
|
||||
|
||||
client = openai.OpenAI(api_key=LITELLM_PROXY_API_KEY, base_url=LITELLM_BASE_URL)
|
||||
httpx_client = httpx.Client(timeout=30)
|
||||
|
||||
################################
|
||||
# First create a cachedContents object
|
||||
print("creating cached content")
|
||||
create_cache = httpx_client.post(
|
||||
url=f"{LITELLM_BASE_URL}/vertex-ai/cachedContents",
|
||||
headers={"Authorization": f"Bearer {LITELLM_PROXY_API_KEY}"},
|
||||
json={
|
||||
"model": "gemini-1.5-pro-001",
|
||||
"contents": [
|
||||
{
|
||||
"role": "user",
|
||||
"parts": [
|
||||
{
|
||||
"text": "This is sample text to demonstrate explicit caching."
|
||||
* 4000
|
||||
}
|
||||
],
|
||||
}
|
||||
],
|
||||
},
|
||||
)
|
||||
print("response from create_cache", create_cache)
|
||||
create_cache_response = create_cache.json()
|
||||
print("json from create_cache", create_cache_response)
|
||||
cached_content_name = create_cache_response["name"]
|
||||
|
||||
#################################
|
||||
# Use the `cachedContents` object in your /chat/completions
|
||||
response = client.chat.completions.create( # type: ignore
|
||||
model="gemini-1.5-pro-001",
|
||||
max_tokens=8192,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "what is the sample text about?",
|
||||
},
|
||||
],
|
||||
temperature="0.7",
|
||||
extra_body={"cached_content": cached_content_name}, # 👈 key change
|
||||
)
|
||||
|
||||
print("response from proxy", response)
|
|
@ -303,3 +303,30 @@ async def vertex_cancel_fine_tuning_job(
|
|||
return response
|
||||
except Exception as e:
|
||||
raise exception_handler(e) from e
|
||||
|
||||
|
||||
@router.post(
|
||||
"/vertex-ai/cachedContents",
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
tags=["Vertex AI endpoints"],
|
||||
)
|
||||
async def vertex_create_add_cached_content(
|
||||
request: Request,
|
||||
fastapi_response: Response,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
this is a pass through endpoint for the Vertex AI API. /cachedContents endpoint
|
||||
|
||||
Vertex API Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-create#create-context-cache-sample-drest
|
||||
|
||||
it uses the vertex ai credentials on the proxy and forwards to vertex ai api
|
||||
"""
|
||||
try:
|
||||
response = await execute_post_vertex_ai_request(
|
||||
request=request,
|
||||
route="/cachedContents",
|
||||
)
|
||||
return response
|
||||
except Exception as e:
|
||||
raise exception_handler(e) from e
|
||||
|
|
|
@ -1969,3 +1969,58 @@ def test_prompt_factory_nested():
|
|||
assert isinstance(
|
||||
message["parts"][0]["text"], str
|
||||
), "'text' value not a string."
|
||||
|
||||
|
||||
def test_get_token_url():
|
||||
from litellm.llms.vertex_httpx import VertexLLM
|
||||
|
||||
vertex_llm = VertexLLM()
|
||||
vertex_ai_project = "adroit-crow-413218"
|
||||
vertex_ai_location = "us-central1"
|
||||
json_obj = get_vertex_ai_creds_json()
|
||||
vertex_credentials = json.dumps(json_obj)
|
||||
|
||||
should_use_v1beta1_features = vertex_llm.is_using_v1beta1_features(
|
||||
optional_params={"cached_content": "hi"}
|
||||
)
|
||||
|
||||
assert should_use_v1beta1_features is True
|
||||
|
||||
_, url = vertex_llm._get_token_and_url(
|
||||
vertex_project=vertex_ai_project,
|
||||
vertex_location=vertex_ai_location,
|
||||
vertex_credentials=vertex_credentials,
|
||||
gemini_api_key="",
|
||||
custom_llm_provider="vertex_ai_beta",
|
||||
should_use_v1beta1_features=should_use_v1beta1_features,
|
||||
api_base=None,
|
||||
model="",
|
||||
stream=False,
|
||||
)
|
||||
|
||||
print("url=", url)
|
||||
|
||||
assert "/v1beta1/" in url
|
||||
|
||||
should_use_v1beta1_features = vertex_llm.is_using_v1beta1_features(
|
||||
optional_params={"temperature": 0.1}
|
||||
)
|
||||
|
||||
_, url = vertex_llm._get_token_and_url(
|
||||
vertex_project=vertex_ai_project,
|
||||
vertex_location=vertex_ai_location,
|
||||
vertex_credentials=vertex_credentials,
|
||||
gemini_api_key="",
|
||||
custom_llm_provider="vertex_ai_beta",
|
||||
should_use_v1beta1_features=should_use_v1beta1_features,
|
||||
api_base=None,
|
||||
model="",
|
||||
stream=False,
|
||||
)
|
||||
|
||||
print("url for normal request", url)
|
||||
|
||||
assert "v1beta1" not in url
|
||||
assert "/v1/" in url
|
||||
|
||||
pass
|
||||
|
|
|
@ -183,3 +183,96 @@ async def test_anthropic_router_completion_e2e():
|
|||
assert isinstance(response, AnthropicResponse)
|
||||
|
||||
assert response.model == "gpt-3.5-turbo"
|
||||
|
||||
|
||||
def test_anthropic_tool_calling_translation():
|
||||
kwargs = {
|
||||
"model": "claude-3-5-sonnet-20240620",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Would development of a software platform be under ASC 350-40 or ASC 985?",
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": "37d6f703-cbcc-497d-95a1-2aa24a114adc",
|
||||
"name": "TaskPlanningTool",
|
||||
"input": {
|
||||
"completed_steps": [],
|
||||
"next_steps": [
|
||||
{
|
||||
"tool_name": "AccountingResearchTool",
|
||||
"description": "Research ASC 350-40 to understand its scope and applicability to software development.",
|
||||
},
|
||||
{
|
||||
"tool_name": "AccountingResearchTool",
|
||||
"description": "Research ASC 985 to understand its scope and applicability to software development.",
|
||||
},
|
||||
{
|
||||
"tool_name": "AccountingResearchTool",
|
||||
"description": "Compare the scopes of ASC 350-40 and ASC 985 to determine which is more applicable to software platform development.",
|
||||
},
|
||||
],
|
||||
"learnings": [],
|
||||
"potential_issues": [
|
||||
"The distinction between the two standards might not be clear-cut for all types of software development.",
|
||||
"There might be specific circumstances or details about the software platform that could affect which standard applies.",
|
||||
],
|
||||
"missing_info": [
|
||||
"Specific details about the type of software platform being developed (e.g., for internal use or for sale).",
|
||||
"Whether the entity developing the software is also the end-user or if it's being developed for external customers.",
|
||||
],
|
||||
"done": False,
|
||||
"required_formatting": None,
|
||||
},
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "tool_result",
|
||||
"tool_use_id": "eb7023b1-5ee8-43b8-b90f-ac5a23d37c31",
|
||||
"content": {
|
||||
"completed_steps": [],
|
||||
"next_steps": [
|
||||
{
|
||||
"tool_name": "AccountingResearchTool",
|
||||
"description": "Research ASC 350-40 to understand its scope and applicability to software development.",
|
||||
},
|
||||
{
|
||||
"tool_name": "AccountingResearchTool",
|
||||
"description": "Research ASC 985 to understand its scope and applicability to software development.",
|
||||
},
|
||||
{
|
||||
"tool_name": "AccountingResearchTool",
|
||||
"description": "Compare the scopes of ASC 350-40 and ASC 985 to determine which is more applicable to software platform development.",
|
||||
},
|
||||
],
|
||||
"formatting_step": None,
|
||||
},
|
||||
}
|
||||
],
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
from litellm.adapters.anthropic_adapter import anthropic_adapter
|
||||
|
||||
translated_params = anthropic_adapter.translate_completion_input_params(
|
||||
kwargs=kwargs
|
||||
)
|
||||
|
||||
print(translated_params["messages"])
|
||||
|
||||
assert len(translated_params["messages"]) > 0
|
||||
assert translated_params["messages"][1]["role"] == "user"
|
||||
|
|
|
@ -4405,6 +4405,3 @@ def test_moderation():
|
|||
output = response.results[0]
|
||||
print(output)
|
||||
return output
|
||||
|
||||
|
||||
# test_moderation()
|
||||
|
|
|
@ -219,3 +219,44 @@ def test_base64_image_input(url, expected_media_type):
|
|||
response = convert_to_anthropic_image_obj(openai_image_url=url)
|
||||
|
||||
assert response["media_type"] == expected_media_type
|
||||
|
||||
|
||||
def test_anthropic_messages_tool_call():
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Would development of a software platform be under ASC 350-40 or ASC 985?",
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_call_id": "bc8cb4b6-88c4-4138-8993-3a9d9cd51656",
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "bc8cb4b6-88c4-4138-8993-3a9d9cd51656",
|
||||
"function": {
|
||||
"arguments": '{"completed_steps": [], "next_steps": [{"tool_name": "AccountingResearchTool", "description": "Research ASC 350-40 to understand its scope and applicability to software development."}, {"tool_name": "AccountingResearchTool", "description": "Research ASC 985 to understand its scope and applicability to software development."}, {"tool_name": "AccountingResearchTool", "description": "Compare the scopes of ASC 350-40 and ASC 985 to determine which is more applicable to software platform development."}], "learnings": [], "potential_issues": ["The distinction between the two standards might not be clear-cut for all types of software development.", "There might be specific circumstances or details about the software platform that could affect which standard applies."], "missing_info": ["Specific details about the type of software platform being developed (e.g., for internal use or for sale).", "Whether the entity developing the software is also the end-user or if it\'s being developed for external customers."], "done": false, "required_formatting": null}',
|
||||
"name": "TaskPlanningTool",
|
||||
},
|
||||
"type": "function",
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "function",
|
||||
"content": '{"completed_steps":[],"next_steps":[{"tool_name":"AccountingResearchTool","description":"Research ASC 350-40 to understand its scope and applicability to software development."},{"tool_name":"AccountingResearchTool","description":"Research ASC 985 to understand its scope and applicability to software development."},{"tool_name":"AccountingResearchTool","description":"Compare the scopes of ASC 350-40 and ASC 985 to determine which is more applicable to software platform development."}],"formatting_step":null}',
|
||||
"name": "TaskPlanningTool",
|
||||
"tool_call_id": "bc8cb4b6-88c4-4138-8993-3a9d9cd51656",
|
||||
},
|
||||
]
|
||||
|
||||
translated_messages = anthropic_messages_pt(
|
||||
messages, model="claude-3-sonnet-20240229", llm_provider="anthropic"
|
||||
)
|
||||
|
||||
print(translated_messages)
|
||||
|
||||
assert (
|
||||
translated_messages[-1]["content"][0]["tool_use_id"]
|
||||
== "bc8cb4b6-88c4-4138-8993-3a9d9cd51656"
|
||||
)
|
||||
|
|
|
@ -55,7 +55,10 @@ import litellm._service_logger # for storing API inputs, outputs, and metadata
|
|||
import litellm.litellm_core_utils
|
||||
import litellm.litellm_core_utils.json_validation_rule
|
||||
from litellm.caching import DualCache
|
||||
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
||||
from litellm.litellm_core_utils.core_helpers import (
|
||||
get_file_check_sum,
|
||||
map_finish_reason,
|
||||
)
|
||||
from litellm.litellm_core_utils.exception_mapping_utils import get_error_message
|
||||
from litellm.litellm_core_utils.llm_request_utils import _ensure_extra_body_is_safe
|
||||
from litellm.litellm_core_utils.redact_messages import (
|
||||
|
@ -557,12 +560,8 @@ def function_setup(
|
|||
or call_type == CallTypes.transcription.value
|
||||
):
|
||||
_file_name: BinaryIO = args[1] if len(args) > 1 else kwargs["file"]
|
||||
file_name = getattr(_file_name, "name", "audio_file")
|
||||
file_descriptor = _file_name.fileno()
|
||||
file_stat = os.fstat(file_descriptor)
|
||||
file_size = str(file_stat.st_size)
|
||||
|
||||
file_checksum = _file_name.name + file_size
|
||||
file_checksum = get_file_check_sum(_file=_file_name)
|
||||
file_name = _file_name.name
|
||||
if "metadata" in kwargs:
|
||||
kwargs["metadata"]["file_checksum"] = file_checksum
|
||||
else:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue