Merge branch 'main' into litellm_auth_fix

This commit is contained in:
Krish Dholakia 2024-08-08 17:14:16 -07:00 committed by GitHub
commit ced4582ecb
24 changed files with 483 additions and 59 deletions

View file

@ -11,7 +11,7 @@
<p align="center">Call all LLM APIs using the OpenAI format [Bedrock, Huggingface, VertexAI, TogetherAI, Azure, OpenAI, Groq etc.]
<br>
</p>
<h4 align="center"><a href="https://docs.litellm.ai/docs/simple_proxy" target="_blank">LiteLLM Proxy Server</a> | <a href="https://docs.litellm.ai/docs/hosted" target="_blank"> Hosted Proxy (Preview)</a> | <a href="https://docs.litellm.ai/docs/enterprise"target="_blank">Enterprise Tier</a></h4>
<h4 align="center"><a href="https://docs.litellm.ai/docs/simple_proxy" target="_blank">LiteLLM Proxy Server (LLM Gateway)</a> | <a href="https://docs.litellm.ai/docs/hosted" target="_blank"> Hosted Proxy (Preview)</a> | <a href="https://docs.litellm.ai/docs/enterprise"target="_blank">Enterprise Tier</a></h4>
<h4 align="center">
<a href="https://pypi.org/project/litellm/" target="_blank">
<img src="https://img.shields.io/pypi/v/litellm.svg" alt="PyPI Version">
@ -35,9 +35,9 @@ LiteLLM manages:
- Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
- [Consistent output](https://docs.litellm.ai/docs/completion/output), text responses will always be available at `['choices'][0]['message']['content']`
- Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)
- Set Budgets & Rate limits per project, api key, model [LiteLLM Proxy Server](https://docs.litellm.ai/docs/simple_proxy)
- Set Budgets & Rate limits per project, api key, model [LiteLLM Proxy Server (LLM Gateway)](https://docs.litellm.ai/docs/simple_proxy)
[**Jump to OpenAI Proxy Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs) <br>
[**Jump to LiteLLM Proxy (LLM Gateway) Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs) <br>
[**Jump to Supported LLM Providers**](https://github.com/BerriAI/litellm?tab=readme-ov-file#supported-providers-docs)
🚨 **Stable Release:** Use docker images with the `-stable` tag. These have undergone 12 hour load tests, before being published.
@ -134,7 +134,7 @@ litellm.success_callback = ["lunary", "langfuse", "athina", "helicone"] # log in
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
```
# OpenAI Proxy - ([Docs](https://docs.litellm.ai/docs/simple_proxy))
# LiteLLM Proxy Server (LLM Gateway) - ([Docs](https://docs.litellm.ai/docs/simple_proxy))
Track spend + Load Balance across multiple projects

View file

@ -14,7 +14,7 @@ https://github.com/BerriAI/litellm
## How to use LiteLLM
You can use litellm through either:
1. [LiteLLM Proxy Server](#openai-proxy) - Server to call 100+ LLMs, load balance, cost tracking across projects
1. [LiteLLM Proxy Server](#openai-proxy) - Server (LLM Gateway) to call 100+ LLMs, load balance, cost tracking across projects
2. [LiteLLM python SDK](#basic-usage) - Python Client to call 100+ LLMs, load balance, cost tracking
### When to use LiteLLM Proxy Server

View file

@ -427,6 +427,105 @@ print(resp)
```
### **Context Caching**
Use Vertex AI Context Caching
[**Relevant VertexAI Docs**](https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-overview)
<Tabs>
<TabItem value="proxy" label="LiteLLM PROXY">
1. Add model to config.yaml
```yaml
model_list:
# used for /chat/completions, /completions, /embeddings endpoints
- model_name: gemini-1.5-pro-001
litellm_params:
model: vertex_ai_beta/gemini-1.5-pro-001
vertex_project: "project-id"
vertex_location: "us-central1"
vertex_credentials: "adroit-crow-413218-a956eef1a2a8.json" # Add path to service account.json
# used for the /cachedContent and vertexAI native endpoints
default_vertex_config:
vertex_project: "adroit-crow-413218"
vertex_location: "us-central1"
vertex_credentials: "adroit-crow-413218-a956eef1a2a8.json" # Add path to service account.json
```
2. Start Proxy
```
$ litellm --config /path/to/config.yaml
```
3. Make Request!
- First create a cachedContents object by calling the Vertex `cachedContents` endpoint. [VertexAI API Ref for cachedContents endpoint](https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-create#create-context-cache-sample-drest). (LiteLLM proxy forwards the `/cachedContents` request to the VertexAI API)
- Use the `cachedContents` object in your /chat/completions request to vertexAI
```python
import datetime
import openai
import httpx
# Set Litellm proxy variables here
LITELLM_BASE_URL = "http://0.0.0.0:4000"
LITELLM_PROXY_API_KEY = "sk-1234"
client = openai.OpenAI(api_key=LITELLM_PROXY_API_KEY, base_url=LITELLM_BASE_URL)
httpx_client = httpx.Client(timeout=30)
################################
# First create a cachedContents object
# this request gets forwarded as is to: https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-create#create-context-cache-sample-drest
print("creating cached content")
create_cache = httpx_client.post(
url=f"{LITELLM_BASE_URL}/vertex-ai/cachedContents",
headers = {"Authorization": f"Bearer {LITELLM_PROXY_API_KEY}"},
json = {
"model": "gemini-1.5-pro-001",
"contents": [
{
"role": "user",
"parts": [{
"text": "This is sample text to demonstrate explicit caching."*4000
}]
}
],
}
)
print("response from create_cache", create_cache)
create_cache_response = create_cache.json()
print("json from create_cache", create_cache_response)
cached_content_name = create_cache_response["name"]
#################################
# Use the `cachedContents` object in your /chat/completions
response = client.chat.completions.create( # type: ignore
model="gemini-1.5-pro-001",
max_tokens=8192,
messages=[
{
"role": "user",
"content": "what is the sample text about?",
},
],
temperature="0.7",
extra_body={"cached_content": cached_content_name}, # 👈 key change
)
print("response from proxy", response)
```
</TabItem>
</Tabs>
## Pre-requisites
* `pip install google-cloud-aiplatform` (pre-installed on proxy docker image)
* Authentication:

View file

@ -5,7 +5,7 @@ import TabItem from '@theme/TabItem';
# Quick Start
Quick start CLI, Config, Docker
LiteLLM Server manages:
LiteLLM Server (LLM Gateway) manages:
* **Unified Interface**: Calling 100+ LLMs [Huggingface/Bedrock/TogetherAI/etc.](#other-supported-models) in the OpenAI `ChatCompletions` & `Completions` format
* **Cost tracking**: Authentication, Spend Tracking & Budgets [Virtual Keys](https://docs.litellm.ai/docs/proxy/virtual_keys)

View file

@ -1,5 +1,11 @@
# [BETA] Vertex AI Endpoints
:::tip
Looking for the Unified API (OpenAI format) for VertexAI ? [Go here - using vertexAI with LiteLLM SDK or LiteLLM Proxy Server](../docs/providers/vertex.md)
:::
## Supported API Endpoints
- Gemini API

View file

@ -24,7 +24,7 @@ const sidebars = {
link: {
type: "generated-index",
title: "💥 LiteLLM Proxy Server",
description: `OpenAI Proxy Server to call 100+ LLMs in a unified interface & track spend, set budgets per virtual key/user`,
description: `OpenAI Proxy Server (LLM Gateway) to call 100+ LLMs in a unified interface & track spend, set budgets per virtual key/user`,
slug: "/simple_proxy",
},
items: [

View file

@ -261,6 +261,7 @@ default_user_params: Optional[Dict] = None
default_team_settings: Optional[List] = None
max_user_budget: Optional[float] = None
max_internal_user_budget: Optional[float] = None
internal_user_budget_duration: Optional[str] = None
max_end_user_budget: Optional[float] = None
#### REQUEST PRIORITIZATION ####
priority_reservation: Optional[Dict[str, float]] = None

View file

@ -90,7 +90,13 @@ class ServiceLogging(CustomLogger):
)
async def init_prometheus_services_logger_if_none(self):
if self.prometheusServicesLogger is None:
"""
initializes prometheusServicesLogger if it is None or no attribute exists on ServiceLogging Object
"""
if not hasattr(self, "prometheusServicesLogger"):
self.prometheusServicesLogger = PrometheusServicesLogger()
elif self.prometheusServicesLogger is None:
self.prometheusServicesLogger = self.prometheusServicesLogger()
return

View file

@ -1,6 +1,9 @@
# What is this?
## Helper utilities
from typing import List, Literal, Optional, Tuple
import os
from typing import BinaryIO, List, Literal, Optional, Tuple
from litellm._logging import verbose_logger
def map_finish_reason(
@ -83,3 +86,20 @@ def _get_parent_otel_span_from_kwargs(kwargs: Optional[dict] = None):
return kwargs["litellm_parent_otel_span"]
except:
return None
def get_file_check_sum(_file: BinaryIO):
"""
Helper to safely get file checksum - used as a cache key
"""
try:
file_descriptor = _file.fileno()
file_stat = os.fstat(file_descriptor)
file_size = str(file_stat.st_size)
file_checksum = _file.name + file_size
return file_checksum
except Exception as e:
verbose_logger.error(f"Error getting file_checksum: {(str(e))}")
file_checksum = _file.name
return file_checksum
return file_checksum

View file

@ -287,6 +287,9 @@ class AnthropicConfig:
if user_message is not None:
new_messages.append(user_message)
if len(new_user_content_list) > 0:
new_messages.append({"role": "user", "content": new_user_content_list})
if len(tool_message_list) > 0:
new_messages.extend(tool_message_list)

View file

@ -278,6 +278,14 @@ class VertexFineTuningAPI(VertexLLM):
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
elif "countTokens" in request_route:
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
elif "cachedContents" in request_route:
_model = request_data.get("model")
if _model is not None and "/publishers/google/models/" not in _model:
request_data["model"] = (
f"projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{_model}"
)
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1beta1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
else:
raise ValueError(f"Unsupported Vertex AI request route: {request_route}")
if self.async_handler is None:

View file

@ -1135,8 +1135,9 @@ def convert_to_anthropic_tool_result(message: dict) -> AnthropicMessagesToolResu
return anthropic_tool_result
if message["role"] == "function":
content = message.get("content") # type: ignore
tool_call_id = message.get("tool_call_id") or str(uuid.uuid4())
anthropic_tool_result = AnthropicMessagesToolResultParam(
type="tool_result", tool_use_id=str(uuid.uuid4()), content=content
type="tool_result", tool_use_id=tool_call_id, content=content
)
return anthropic_tool_result

View file

@ -881,6 +881,21 @@ class VertexLLM(BaseLLM):
return self._credentials.token, self.project_id
def is_using_v1beta1_features(self, optional_params: dict) -> bool:
"""
VertexAI only supports ContextCaching on v1beta1
use this helper to decide if request should be sent to v1 or v1beta1
Returns v1beta1 if context caching is enabled
Returns v1 in all other cases
"""
if "cached_content" in optional_params:
return True
if "CachedContent" in optional_params:
return True
return False
def _get_token_and_url(
self,
model: str,
@ -891,6 +906,7 @@ class VertexLLM(BaseLLM):
stream: Optional[bool],
custom_llm_provider: Literal["vertex_ai", "vertex_ai_beta", "gemini"],
api_base: Optional[str],
should_use_v1beta1_features: Optional[bool] = False,
) -> Tuple[Optional[str], str]:
"""
Internal function. Returns the token and url for the call.
@ -920,12 +936,13 @@ class VertexLLM(BaseLLM):
vertex_location = self.get_vertex_region(vertex_region=vertex_location)
### SET RUNTIME ENDPOINT ###
version = "v1beta1" if should_use_v1beta1_features is True else "v1"
endpoint = "generateContent"
if stream is True:
endpoint = "streamGenerateContent"
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:{endpoint}?alt=sse"
url = f"https://{vertex_location}-aiplatform.googleapis.com/{version}/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:{endpoint}?alt=sse"
else:
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:{endpoint}"
url = f"https://{vertex_location}-aiplatform.googleapis.com/{version}/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:{endpoint}"
if (
api_base is not None
@ -1055,6 +1072,9 @@ class VertexLLM(BaseLLM):
) -> Union[ModelResponse, CustomStreamWrapper]:
stream: Optional[bool] = optional_params.pop("stream", None) # type: ignore
should_use_v1beta1_features = self.is_using_v1beta1_features(
optional_params=optional_params
)
auth_header, url = self._get_token_and_url(
model=model,
gemini_api_key=gemini_api_key,
@ -1064,6 +1084,7 @@ class VertexLLM(BaseLLM):
stream=stream,
custom_llm_provider=custom_llm_provider,
api_base=api_base,
should_use_v1beta1_features=should_use_v1beta1_features,
)
## TRANSFORMATION ##

View file

@ -1,4 +1,8 @@
model_list:
- model_name: "gpt-4"
- model_name: "claude-3-5-sonnet-20240620"
litellm_params:
model: "gpt-4"
model: "claude-3-5-sonnet-20240620"
litellm_settings:
max_internal_user_budget: 0.001
internal_user_budget_duration: "5m"

View file

@ -91,6 +91,10 @@ async def new_user(
if litellm.max_internal_user_budget is not None:
data_json["max_budget"] = litellm.max_internal_user_budget
if "budget_duration" in data_json and data_json["budget_duration"] is None:
if litellm.internal_user_budget_duration is not None:
data_json["budget_duration"] = litellm.internal_user_budget_duration
response = await generate_key_helper_fn(request_type="user", **data_json)
# Admin UI Logic

View file

@ -3,20 +3,14 @@ model_list:
litellm_params:
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railwaz.app/
api_base: https://exampleopenaiendpoint-production.up.railway.app/
- model_name: fireworks-llama-v3-70b-instruct
litellm_params:
model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct
api_key: "os.environ/FIREWORKS"
# provider specific wildcard routing
- model_name: "anthropic/*"
- model_name: "*"
litellm_params:
model: "anthropic/*"
api_key: os.environ/ANTHROPIC_API_KEY
- model_name: "groq/*"
litellm_params:
model: "groq/*"
api_key: os.environ/GROQ_API_KEY
model: "*"
- model_name: "*"
litellm_params:
model: openai/*
@ -25,37 +19,22 @@ model_list:
litellm_params:
model: mistral/mistral-small-latest
api_key: "os.environ/MISTRAL_API_KEY"
- model_name: tts
- model_name: gemini-1.5-pro-001
litellm_params:
model: openai/tts-1
api_key: "os.environ/OPENAI_API_KEY"
model_info:
mode: audio_speech
# for /files endpoints
files_settings:
- custom_llm_provider: azure
api_base: https://exampleopenaiendpoint-production.up.railway.app
api_key: fake-key
api_version: "2023-03-15-preview"
- custom_llm_provider: openai
api_key: os.environ/OPENAI_API_KEY
model: vertex_ai_beta/gemini-1.5-pro-001
vertex_project: "adroit-crow-413218"
vertex_location: "us-central1"
vertex_credentials: "adroit-crow-413218-a956eef1a2a8.json"
# Add path to service account.json
default_vertex_config:
vertex_project: "adroit-crow-413218"
vertex_location: "us-central1"
vertex_credentials: "adroit-crow-413218-a956eef1a2a8.json" # Add path to service account.json
general_settings:
master_key: sk-1234
pass_through_endpoints:
- path: "/v1/rerank" # route you want to add to LiteLLM Proxy Server
target: "https://api.cohere.com/v1/rerank" # URL this route should forward requests to
headers: # headers to forward to this URL
content-type: application/json # (Optional) Extra Headers to pass to this endpoint
accept: application/json
forward_headers: True
litellm_settings:
callbacks: ["otel"] # 👈 KEY CHANGE
success_callback: ["prometheus"]
failure_callback: ["prometheus"]

View file

@ -5374,7 +5374,13 @@ async def anthropic_response(
litellm.adapters = [{"id": "anthropic", "adapter": anthropic_adapter}]
global user_temperature, user_request_timeout, user_max_tokens, user_api_base
data: dict = {**anthropic_data, "adapter_id": "anthropic"}
body = await request.body()
body_str = body.decode()
try:
request_data: dict = ast.literal_eval(body_str)
except Exception:
request_data = json.loads(body_str)
data: dict = {**request_data, "adapter_id": "anthropic"}
try:
data["model"] = (
general_settings.get("completion_model", None) # server default

View file

@ -0,0 +1,54 @@
import datetime
import httpx
import openai
# Set Litellm proxy variables here
LITELLM_BASE_URL = "http://0.0.0.0:4000"
LITELLM_PROXY_API_KEY = "sk-1234"
client = openai.OpenAI(api_key=LITELLM_PROXY_API_KEY, base_url=LITELLM_BASE_URL)
httpx_client = httpx.Client(timeout=30)
################################
# First create a cachedContents object
print("creating cached content")
create_cache = httpx_client.post(
url=f"{LITELLM_BASE_URL}/vertex-ai/cachedContents",
headers={"Authorization": f"Bearer {LITELLM_PROXY_API_KEY}"},
json={
"model": "gemini-1.5-pro-001",
"contents": [
{
"role": "user",
"parts": [
{
"text": "This is sample text to demonstrate explicit caching."
* 4000
}
],
}
],
},
)
print("response from create_cache", create_cache)
create_cache_response = create_cache.json()
print("json from create_cache", create_cache_response)
cached_content_name = create_cache_response["name"]
#################################
# Use the `cachedContents` object in your /chat/completions
response = client.chat.completions.create( # type: ignore
model="gemini-1.5-pro-001",
max_tokens=8192,
messages=[
{
"role": "user",
"content": "what is the sample text about?",
},
],
temperature="0.7",
extra_body={"cached_content": cached_content_name}, # 👈 key change
)
print("response from proxy", response)

View file

@ -303,3 +303,30 @@ async def vertex_cancel_fine_tuning_job(
return response
except Exception as e:
raise exception_handler(e) from e
@router.post(
"/vertex-ai/cachedContents",
dependencies=[Depends(user_api_key_auth)],
tags=["Vertex AI endpoints"],
)
async def vertex_create_add_cached_content(
request: Request,
fastapi_response: Response,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""
this is a pass through endpoint for the Vertex AI API. /cachedContents endpoint
Vertex API Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-create#create-context-cache-sample-drest
it uses the vertex ai credentials on the proxy and forwards to vertex ai api
"""
try:
response = await execute_post_vertex_ai_request(
request=request,
route="/cachedContents",
)
return response
except Exception as e:
raise exception_handler(e) from e

View file

@ -1969,3 +1969,58 @@ def test_prompt_factory_nested():
assert isinstance(
message["parts"][0]["text"], str
), "'text' value not a string."
def test_get_token_url():
from litellm.llms.vertex_httpx import VertexLLM
vertex_llm = VertexLLM()
vertex_ai_project = "adroit-crow-413218"
vertex_ai_location = "us-central1"
json_obj = get_vertex_ai_creds_json()
vertex_credentials = json.dumps(json_obj)
should_use_v1beta1_features = vertex_llm.is_using_v1beta1_features(
optional_params={"cached_content": "hi"}
)
assert should_use_v1beta1_features is True
_, url = vertex_llm._get_token_and_url(
vertex_project=vertex_ai_project,
vertex_location=vertex_ai_location,
vertex_credentials=vertex_credentials,
gemini_api_key="",
custom_llm_provider="vertex_ai_beta",
should_use_v1beta1_features=should_use_v1beta1_features,
api_base=None,
model="",
stream=False,
)
print("url=", url)
assert "/v1beta1/" in url
should_use_v1beta1_features = vertex_llm.is_using_v1beta1_features(
optional_params={"temperature": 0.1}
)
_, url = vertex_llm._get_token_and_url(
vertex_project=vertex_ai_project,
vertex_location=vertex_ai_location,
vertex_credentials=vertex_credentials,
gemini_api_key="",
custom_llm_provider="vertex_ai_beta",
should_use_v1beta1_features=should_use_v1beta1_features,
api_base=None,
model="",
stream=False,
)
print("url for normal request", url)
assert "v1beta1" not in url
assert "/v1/" in url
pass

View file

@ -183,3 +183,96 @@ async def test_anthropic_router_completion_e2e():
assert isinstance(response, AnthropicResponse)
assert response.model == "gpt-3.5-turbo"
def test_anthropic_tool_calling_translation():
kwargs = {
"model": "claude-3-5-sonnet-20240620",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Would development of a software platform be under ASC 350-40 or ASC 985?",
}
],
},
{
"role": "assistant",
"content": [
{
"type": "tool_use",
"id": "37d6f703-cbcc-497d-95a1-2aa24a114adc",
"name": "TaskPlanningTool",
"input": {
"completed_steps": [],
"next_steps": [
{
"tool_name": "AccountingResearchTool",
"description": "Research ASC 350-40 to understand its scope and applicability to software development.",
},
{
"tool_name": "AccountingResearchTool",
"description": "Research ASC 985 to understand its scope and applicability to software development.",
},
{
"tool_name": "AccountingResearchTool",
"description": "Compare the scopes of ASC 350-40 and ASC 985 to determine which is more applicable to software platform development.",
},
],
"learnings": [],
"potential_issues": [
"The distinction between the two standards might not be clear-cut for all types of software development.",
"There might be specific circumstances or details about the software platform that could affect which standard applies.",
],
"missing_info": [
"Specific details about the type of software platform being developed (e.g., for internal use or for sale).",
"Whether the entity developing the software is also the end-user or if it's being developed for external customers.",
],
"done": False,
"required_formatting": None,
},
}
],
},
{
"role": "user",
"content": [
{
"type": "tool_result",
"tool_use_id": "eb7023b1-5ee8-43b8-b90f-ac5a23d37c31",
"content": {
"completed_steps": [],
"next_steps": [
{
"tool_name": "AccountingResearchTool",
"description": "Research ASC 350-40 to understand its scope and applicability to software development.",
},
{
"tool_name": "AccountingResearchTool",
"description": "Research ASC 985 to understand its scope and applicability to software development.",
},
{
"tool_name": "AccountingResearchTool",
"description": "Compare the scopes of ASC 350-40 and ASC 985 to determine which is more applicable to software platform development.",
},
],
"formatting_step": None,
},
}
],
},
],
}
from litellm.adapters.anthropic_adapter import anthropic_adapter
translated_params = anthropic_adapter.translate_completion_input_params(
kwargs=kwargs
)
print(translated_params["messages"])
assert len(translated_params["messages"]) > 0
assert translated_params["messages"][1]["role"] == "user"

View file

@ -4405,6 +4405,3 @@ def test_moderation():
output = response.results[0]
print(output)
return output
# test_moderation()

View file

@ -219,3 +219,44 @@ def test_base64_image_input(url, expected_media_type):
response = convert_to_anthropic_image_obj(openai_image_url=url)
assert response["media_type"] == expected_media_type
def test_anthropic_messages_tool_call():
messages = [
{
"role": "user",
"content": "Would development of a software platform be under ASC 350-40 or ASC 985?",
},
{
"role": "assistant",
"content": "",
"tool_call_id": "bc8cb4b6-88c4-4138-8993-3a9d9cd51656",
"tool_calls": [
{
"id": "bc8cb4b6-88c4-4138-8993-3a9d9cd51656",
"function": {
"arguments": '{"completed_steps": [], "next_steps": [{"tool_name": "AccountingResearchTool", "description": "Research ASC 350-40 to understand its scope and applicability to software development."}, {"tool_name": "AccountingResearchTool", "description": "Research ASC 985 to understand its scope and applicability to software development."}, {"tool_name": "AccountingResearchTool", "description": "Compare the scopes of ASC 350-40 and ASC 985 to determine which is more applicable to software platform development."}], "learnings": [], "potential_issues": ["The distinction between the two standards might not be clear-cut for all types of software development.", "There might be specific circumstances or details about the software platform that could affect which standard applies."], "missing_info": ["Specific details about the type of software platform being developed (e.g., for internal use or for sale).", "Whether the entity developing the software is also the end-user or if it\'s being developed for external customers."], "done": false, "required_formatting": null}',
"name": "TaskPlanningTool",
},
"type": "function",
}
],
},
{
"role": "function",
"content": '{"completed_steps":[],"next_steps":[{"tool_name":"AccountingResearchTool","description":"Research ASC 350-40 to understand its scope and applicability to software development."},{"tool_name":"AccountingResearchTool","description":"Research ASC 985 to understand its scope and applicability to software development."},{"tool_name":"AccountingResearchTool","description":"Compare the scopes of ASC 350-40 and ASC 985 to determine which is more applicable to software platform development."}],"formatting_step":null}',
"name": "TaskPlanningTool",
"tool_call_id": "bc8cb4b6-88c4-4138-8993-3a9d9cd51656",
},
]
translated_messages = anthropic_messages_pt(
messages, model="claude-3-sonnet-20240229", llm_provider="anthropic"
)
print(translated_messages)
assert (
translated_messages[-1]["content"][0]["tool_use_id"]
== "bc8cb4b6-88c4-4138-8993-3a9d9cd51656"
)

View file

@ -55,7 +55,10 @@ import litellm._service_logger # for storing API inputs, outputs, and metadata
import litellm.litellm_core_utils
import litellm.litellm_core_utils.json_validation_rule
from litellm.caching import DualCache
from litellm.litellm_core_utils.core_helpers import map_finish_reason
from litellm.litellm_core_utils.core_helpers import (
get_file_check_sum,
map_finish_reason,
)
from litellm.litellm_core_utils.exception_mapping_utils import get_error_message
from litellm.litellm_core_utils.llm_request_utils import _ensure_extra_body_is_safe
from litellm.litellm_core_utils.redact_messages import (
@ -557,12 +560,8 @@ def function_setup(
or call_type == CallTypes.transcription.value
):
_file_name: BinaryIO = args[1] if len(args) > 1 else kwargs["file"]
file_name = getattr(_file_name, "name", "audio_file")
file_descriptor = _file_name.fileno()
file_stat = os.fstat(file_descriptor)
file_size = str(file_stat.st_size)
file_checksum = _file_name.name + file_size
file_checksum = get_file_check_sum(_file=_file_name)
file_name = _file_name.name
if "metadata" in kwargs:
kwargs["metadata"]["file_checksum"] = file_checksum
else: