LiteLLM Minor Fixes & Improvements (10/02/2024) (#6023)

* feat(together_ai/completion): handle together ai completion calls

* fix: handle list of int / list of list of int for text completion calls

* fix(utils.py): check if base model in bedrock converse model list

Fixes https://github.com/BerriAI/litellm/issues/6003

* test(test_optional_params.py): add unit tests for bedrock optional param mapping

Fixes https://github.com/BerriAI/litellm/issues/6003

* feat(utils.py): enable passing dummy tool call for anthropic/bedrock calls if tool_use blocks exist

Fixes https://github.com/BerriAI/litellm/issues/5388

* fixed an issue with tool use of claude models with anthropic and bedrock (#6013)

* fix(utils.py): handle empty schema for anthropic/bedrock

Fixes https://github.com/BerriAI/litellm/issues/6012

* fix: fix linting errors

* fix: fix linting errors

* fix: fix linting errors

* fix(proxy_cli.py): fix import route for app + health checks path (#6026)

* (testing): Enable testing us.anthropic.claude-3-haiku-20240307-v1:0. (#6018)

* fix(proxy_cli.py): fix import route for app + health checks gettsburg.wav

Fixes https://github.com/BerriAI/litellm/issues/5999

---------

Co-authored-by: David Manouchehri <david.manouchehri@ai.moda>

---------

Co-authored-by: Ved Patwardhan <54766411+vedpatwardhan@users.noreply.github.com>
Co-authored-by: David Manouchehri <david.manouchehri@ai.moda>
This commit is contained in:
Krish Dholakia 2024-10-02 22:00:28 -04:00 committed by GitHub
parent 8995ff49ae
commit 14165d3648
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 443 additions and 125 deletions

View file

@ -0,0 +1,15 @@
from collections.abc import Iterable
from typing import List
def is_tokens_or_list_of_tokens(value: List):
# Check if it's a list of integers (tokens)
if isinstance(value, list) and all(isinstance(item, int) for item in value):
return True
# Check if it's a list of lists of integers (list of tokens)
if isinstance(value, list) and all(
isinstance(item, list) and all(isinstance(i, int) for i in item)
for item in value
):
return True
return False

View file

@ -4,7 +4,7 @@ import os
import time import time
import traceback import traceback
import types import types
from typing import Any, Callable, Coroutine, Iterable, Literal, Optional, Union from typing import Any, Callable, Coroutine, Iterable, Literal, Optional, Union, cast
import httpx import httpx
import openai import openai
@ -30,8 +30,10 @@ from litellm.utils import (
from ...types.llms.openai import * from ...types.llms.openai import *
from ..base import BaseLLM from ..base import BaseLLM
from ..prompt_templates.common_utils import convert_content_list_to_str
from ..prompt_templates.factory import custom_prompt, prompt_factory from ..prompt_templates.factory import custom_prompt, prompt_factory
from .common_utils import drop_params_from_unprocessable_entity_error from .common_utils import drop_params_from_unprocessable_entity_error
from .completion.utils import is_tokens_or_list_of_tokens
class OpenAIError(Exception): class OpenAIError(Exception):
@ -420,6 +422,35 @@ class OpenAITextCompletionConfig:
and v is not None and v is not None
} }
def _transform_prompt(
self,
messages: Union[List[AllMessageValues], List[OpenAITextCompletionUserMessage]],
) -> AllPromptValues:
if len(messages) == 1: # base case
message_content = messages[0].get("content")
if (
message_content
and isinstance(message_content, list)
and is_tokens_or_list_of_tokens(message_content)
):
openai_prompt: AllPromptValues = cast(AllPromptValues, message_content)
else:
openai_prompt = ""
content = convert_content_list_to_str(
cast(AllMessageValues, messages[0])
)
openai_prompt += content
else:
prompt_str_list: List[str] = []
for m in messages:
try: # expect list of int/list of list of int to be a 1 message array only.
content = convert_content_list_to_str(cast(AllMessageValues, m))
prompt_str_list.append(content)
except Exception as e:
raise e
openai_prompt = prompt_str_list
return openai_prompt
def convert_to_chat_model_response_object( def convert_to_chat_model_response_object(
self, self,
response_object: Optional[TextCompletionResponse] = None, response_object: Optional[TextCompletionResponse] = None,
@ -459,6 +490,7 @@ class OpenAITextCompletionConfig:
class OpenAIChatCompletion(BaseLLM): class OpenAIChatCompletion(BaseLLM):
def __init__(self) -> None: def __init__(self) -> None:
super().__init__() super().__init__()
@ -1466,7 +1498,9 @@ class OpenAIChatCompletion(BaseLLM):
elif mode == "audio_transcription": elif mode == "audio_transcription":
# Get the current directory of the file being run # Get the current directory of the file being run
pwd = os.path.dirname(os.path.realpath(__file__)) pwd = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(pwd, "../tests/gettysburg.wav") file_path = os.path.join(
pwd, "../../../tests/gettysburg.wav"
) # proxy address
audio_file = open(file_path, "rb") audio_file = open(file_path, "rb")
completion = await client.audio.transcriptions.with_raw_response.create( completion = await client.audio.transcriptions.with_raw_response.create(
file=audio_file, file=audio_file,
@ -1502,6 +1536,8 @@ class OpenAIChatCompletion(BaseLLM):
class OpenAITextCompletion(BaseLLM): class OpenAITextCompletion(BaseLLM):
openai_text_completion_global_config = OpenAITextCompletionConfig()
def __init__(self) -> None: def __init__(self) -> None:
super().__init__() super().__init__()
@ -1518,7 +1554,7 @@ class OpenAITextCompletion(BaseLLM):
model_response: ModelResponse, model_response: ModelResponse,
api_key: str, api_key: str,
model: str, model: str,
messages: list, messages: Union[List[AllMessageValues], List[OpenAITextCompletionUserMessage]],
timeout: float, timeout: float,
logging_obj: LiteLLMLoggingObj, logging_obj: LiteLLMLoggingObj,
optional_params: dict, optional_params: dict,
@ -1531,24 +1567,18 @@ class OpenAITextCompletion(BaseLLM):
organization: Optional[str] = None, organization: Optional[str] = None,
headers: Optional[dict] = None, headers: Optional[dict] = None,
): ):
super().completion()
try: try:
if headers is None: if headers is None:
headers = self.validate_environment(api_key=api_key) headers = self.validate_environment(api_key=api_key)
if model is None or messages is None: if model is None or messages is None:
raise OpenAIError(status_code=422, message="Missing model or messages") raise OpenAIError(status_code=422, message="Missing model or messages")
if (
len(messages) > 0
and "content" in messages[0]
and isinstance(messages[0]["content"], list)
):
prompt = messages[0]["content"]
else:
prompt = [message["content"] for message in messages] # type: ignore
# don't send max retries to the api, if set # don't send max retries to the api, if set
prompt = self.openai_text_completion_global_config._transform_prompt(
messages
)
data = {"model": model, "prompt": prompt, **optional_params} data = {"model": model, "prompt": prompt, **optional_params}
max_retries = data.pop("max_retries", 2) max_retries = data.pop("max_retries", 2)
## LOGGING ## LOGGING

View file

@ -551,6 +551,8 @@ class AnthropicChatCompletion(BaseLLM):
error_response = getattr(e, "response", None) error_response = getattr(e, "response", None)
if error_headers is None and error_response: if error_headers is None and error_response:
error_headers = getattr(error_response, "headers", None) error_headers = getattr(error_response, "headers", None)
if error_response and hasattr(error_response, "text"):
error_text = getattr(error_response, "text", error_text)
raise AnthropicError( raise AnthropicError(
message=error_text, message=error_text,
status_code=status_code, status_code=status_code,

View file

@ -6,11 +6,17 @@ from litellm.llms.prompt_templates.factory import anthropic_messages_pt
from litellm.types.llms.anthropic import ( from litellm.types.llms.anthropic import (
AnthropicMessageRequestBase, AnthropicMessageRequestBase,
AnthropicMessagesRequest, AnthropicMessagesRequest,
AnthropicMessagesTool,
AnthropicMessagesToolChoice, AnthropicMessagesToolChoice,
AnthropicSystemMessageContent, AnthropicSystemMessageContent,
) )
from litellm.types.llms.openai import AllMessageValues, ChatCompletionSystemMessage from litellm.types.llms.openai import (
from litellm.utils import has_tool_call_blocks AllMessageValues,
ChatCompletionSystemMessage,
ChatCompletionToolParam,
ChatCompletionToolParamFunctionChunk,
)
from litellm.utils import add_dummy_tool, has_tool_call_blocks
from ..common_utils import AnthropicError from ..common_utils import AnthropicError
@ -146,8 +152,13 @@ class AnthropicConfig:
and messages is not None and messages is not None
and has_tool_call_blocks(messages) and has_tool_call_blocks(messages)
): ):
if litellm.modify_params:
optional_params["tools"] = add_dummy_tool(
custom_llm_provider="bedrock_converse"
)
else:
raise litellm.UnsupportedParamsError( raise litellm.UnsupportedParamsError(
message="Anthropic doesn't support tool calling without `tools=` param specified. Pass `tools=` param to enable tool calling.", message="Anthropic doesn't support tool calling without `tools=` param specified. Pass `tools=` param OR set `litellm.modify_params = True` // `litellm_settings::modify_params: True` to add dummy tool to the request.",
model="", model="",
llm_provider="anthropic", llm_provider="anthropic",
) )
@ -266,18 +277,23 @@ class AnthropicConfig:
if "anthropic-beta" not in headers: if "anthropic-beta" not in headers:
# default to v1 of "anthropic-beta" # default to v1 of "anthropic-beta"
headers["anthropic-beta"] = "tools-2024-05-16" headers["anthropic-beta"] = "tools-2024-05-16"
anthropic_tools = [] anthropic_tools = []
for tool in optional_params["tools"]: for tool in optional_params["tools"]:
if "input_schema" in tool: # assume in anthropic format if "input_schema" in tool: # assume in anthropic format
anthropic_tools.append(tool) anthropic_tools.append(tool)
else: # assume openai tool call else: # assume openai tool call
new_tool = tool["function"] new_tool = tool["function"]
new_tool["input_schema"] = new_tool.pop("parameters") # rename key parameters = new_tool.pop(
"parameters",
{
"type": "object",
"properties": {},
},
)
new_tool["input_schema"] = parameters # rename key
if "cache_control" in tool: if "cache_control" in tool:
new_tool["cache_control"] = tool["cache_control"] new_tool["cache_control"] = tool["cache_control"]
anthropic_tools.append(new_tool) anthropic_tools.append(new_tool)
optional_params["tools"] = anthropic_tools optional_params["tools"] = anthropic_tools
data = { data = {

View file

@ -26,6 +26,7 @@ class AzureAIStudioConfig(OpenAIConfig):
def _transform_messages(self, messages: List[AllMessageValues]) -> List: def _transform_messages(self, messages: List[AllMessageValues]) -> List:
for message in messages: for message in messages:
message = convert_content_list_to_str(message=message) texts = convert_content_list_to_str(message=message)
if texts:
message["content"] = texts
return messages return messages

View file

@ -22,7 +22,7 @@ from litellm.types.llms.openai import (
ChatCompletionToolParamFunctionChunk, ChatCompletionToolParamFunctionChunk,
) )
from litellm.types.utils import ModelResponse, Usage from litellm.types.utils import ModelResponse, Usage
from litellm.utils import CustomStreamWrapper, has_tool_call_blocks from litellm.utils import CustomStreamWrapper, add_dummy_tool, has_tool_call_blocks
from ...prompt_templates.factory import _bedrock_converse_messages_pt, _bedrock_tools_pt from ...prompt_templates.factory import _bedrock_converse_messages_pt, _bedrock_tools_pt
from ..common_utils import BedrockError, get_bedrock_tool_name from ..common_utils import BedrockError, get_bedrock_tool_name
@ -213,10 +213,15 @@ class AmazonConverseConfig:
and messages is not None and messages is not None
and has_tool_call_blocks(messages) and has_tool_call_blocks(messages)
): ):
if litellm.modify_params:
optional_params["tools"] = add_dummy_tool(
custom_llm_provider="bedrock_converse"
)
else:
raise litellm.UnsupportedParamsError( raise litellm.UnsupportedParamsError(
message="Anthropic doesn't support tool calling without `tools=` param specified. Pass `tools=` param to enable tool calling.", message="Bedrock doesn't support tool calling without `tools=` param specified. Pass `tools=` param OR set `litellm.modify_params = True` // `litellm_settings::modify_params: True` to add dummy tool to the request.",
model="", model="",
llm_provider="anthropic", llm_provider="bedrock",
) )
return optional_params return optional_params

View file

@ -7,7 +7,7 @@ from typing import List
from litellm.types.llms.openai import AllMessageValues from litellm.types.llms.openai import AllMessageValues
def convert_content_list_to_str(message: AllMessageValues) -> AllMessageValues: def convert_content_list_to_str(message: AllMessageValues) -> str:
""" """
- handles scenario where content is list and not string - handles scenario where content is list and not string
- content list is just text, and no images - content list is just text, and no images
@ -26,7 +26,4 @@ def convert_content_list_to_str(message: AllMessageValues) -> AllMessageValues:
elif message_content is not None and isinstance(message_content, str): elif message_content is not None and isinstance(message_content, str):
texts = message_content texts = message_content
if texts: return texts
message["content"] = texts
return message

View file

@ -2554,7 +2554,10 @@ def _bedrock_tools_pt(tools: List) -> List[BedrockToolBlock]:
""" """
tool_block_list: List[BedrockToolBlock] = [] tool_block_list: List[BedrockToolBlock] = []
for tool in tools: for tool in tools:
parameters = tool.get("function", {}).get("parameters", None) parameters = tool.get("function", {}).get("parameters", {
"type": "object",
"properties": {}
})
name = tool.get("function", {}).get("name", "") name = tool.get("function", {}).get("name", "")
# related issue: https://github.com/BerriAI/litellm/issues/5007 # related issue: https://github.com/BerriAI/litellm/issues/5007

View file

@ -1,7 +0,0 @@
"""
Support for OpenAI's `/v1/completions` endpoint.
Calls done in OpenAI/openai.py as TogetherAI is openai-compatible.
Docs: https://docs.together.ai/reference/completions-1
"""

View file

@ -0,0 +1,61 @@
"""
Support for OpenAI's `/v1/completions` endpoint.
Calls done in OpenAI/openai.py as TogetherAI is openai-compatible.
Docs: https://docs.together.ai/reference/completions-1
"""
from typing import Any, Callable, List, Optional, Union
from litellm.litellm_core_utils.litellm_logging import Logging
from litellm.types.llms.openai import AllMessageValues, OpenAITextCompletionUserMessage
from litellm.utils import ModelResponse
from ...OpenAI.openai import OpenAITextCompletion
from .transformation import TogetherAITextCompletionConfig
together_ai_text_completion_global_config = TogetherAITextCompletionConfig()
class TogetherAITextCompletion(OpenAITextCompletion):
def completion(
self,
model_response: ModelResponse,
api_key: str,
model: str,
messages: Union[List[AllMessageValues], List[OpenAITextCompletionUserMessage]],
timeout: float,
logging_obj: Logging,
optional_params: dict,
print_verbose: Optional[Callable[..., Any]] = None,
api_base: Optional[str] = None,
acompletion: bool = False,
litellm_params=None,
logger_fn=None,
client=None,
organization: Optional[str] = None,
headers: Optional[dict] = None,
):
prompt = together_ai_text_completion_global_config._transform_prompt(messages)
message = OpenAITextCompletionUserMessage(role="user", content=prompt)
new_messages = [message]
return super().completion(
model_response=model_response,
api_key=api_key,
model=model,
messages=new_messages,
timeout=timeout,
logging_obj=logging_obj,
optional_params=optional_params,
print_verbose=print_verbose,
api_base=api_base,
acompletion=acompletion,
litellm_params=litellm_params,
logger_fn=logger_fn,
client=client,
organization=organization,
headers=headers,
)

View file

@ -0,0 +1,46 @@
"""
Translates calls from OpenAI's `/v1/completions` endpoint to TogetherAI's `/v1/completions` endpoint.
Calls done in OpenAI/openai.py as TogetherAI is openai-compatible.
Docs: https://docs.together.ai/reference/completions-1
"""
from typing import List, Union, cast
from litellm.llms.OpenAI.completion.utils import is_tokens_or_list_of_tokens
from litellm.types.llms.openai import (
AllMessageValues,
AllPromptValues,
OpenAITextCompletionUserMessage,
)
from ...OpenAI.openai import OpenAITextCompletionConfig
class TogetherAITextCompletionConfig(OpenAITextCompletionConfig):
def _transform_prompt(
self,
messages: Union[List[AllMessageValues], List[OpenAITextCompletionUserMessage]],
) -> AllPromptValues:
"""
TogetherAI expects a string prompt.
"""
initial_prompt: AllPromptValues = super()._transform_prompt(messages)
## TOGETHER AI SPECIFIC VALIDATION ##
if isinstance(initial_prompt, list) and is_tokens_or_list_of_tokens(
value=initial_prompt
):
raise ValueError("TogetherAI does not support integers as input")
if (
isinstance(initial_prompt, list)
and len(initial_prompt) == 1
and isinstance(initial_prompt[0], str)
):
together_prompt = initial_prompt[0]
elif isinstance(initial_prompt, list):
raise ValueError("TogetherAI does not support multiple prompts.")
else:
together_prompt = cast(str, initial_prompt)
return together_prompt

View file

@ -112,6 +112,7 @@ from .llms.prompt_templates.factory import (
) )
from .llms.sagemaker.sagemaker import SagemakerLLM from .llms.sagemaker.sagemaker import SagemakerLLM
from .llms.text_completion_codestral import CodestralTextCompletion from .llms.text_completion_codestral import CodestralTextCompletion
from .llms.together_ai.completion.handler import TogetherAITextCompletion
from .llms.triton import TritonChatCompletion from .llms.triton import TritonChatCompletion
from .llms.vertex_ai_and_google_ai_studio import ( from .llms.vertex_ai_and_google_ai_studio import (
vertex_ai_anthropic, vertex_ai_anthropic,
@ -168,6 +169,7 @@ openai_o1_chat_completions = OpenAIO1ChatCompletion()
openai_audio_transcriptions = OpenAIAudioTranscription() openai_audio_transcriptions = OpenAIAudioTranscription()
databricks_chat_completions = DatabricksChatCompletion() databricks_chat_completions = DatabricksChatCompletion()
groq_chat_completions = GroqChatCompletion() groq_chat_completions = GroqChatCompletion()
together_ai_text_completions = TogetherAITextCompletion()
azure_ai_chat_completions = AzureAIChatCompletion() azure_ai_chat_completions = AzureAIChatCompletion()
azure_ai_embedding = AzureAIEmbedding() azure_ai_embedding = AzureAIEmbedding()
anthropic_chat_completions = AnthropicChatCompletion() anthropic_chat_completions = AnthropicChatCompletion()
@ -1285,6 +1287,23 @@ def completion(
prompt = " ".join([message["content"] for message in messages]) # type: ignore prompt = " ".join([message["content"] for message in messages]) # type: ignore
## COMPLETION CALL ## COMPLETION CALL
if custom_llm_provider == "together_ai":
_response = together_ai_text_completions.completion(
model=model,
messages=messages,
model_response=model_response,
print_verbose=print_verbose,
api_key=api_key,
api_base=api_base,
acompletion=acompletion,
client=client, # pass AsyncOpenAI, OpenAI client
logging_obj=logging,
optional_params=optional_params,
litellm_params=litellm_params,
logger_fn=logger_fn,
timeout=timeout, # type: ignore
)
else:
_response = openai_text_completions.completion( _response = openai_text_completions.completion(
model=model, model=model,
messages=messages, messages=messages,

View file

@ -1,61 +1,7 @@
model_list: model_list:
- model_name: fake-claude-endpoint - model_name: whisper
litellm_params: litellm_params:
model: anthropic.claude-3-sonnet-20240229-v1:0 model: whisper-1
api_base: https://exampleopenaiendpoint-production.up.railway.app api_key: os.environ/OPENAI_API_KEY
aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY model_info:
aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID mode: audio_transcription
- model_name: gemini-vision
litellm_params:
model: vertex_ai/gemini-1.0-pro-vision-001
api_base: https://exampleopenaiendpoint-production.up.railway.app/v1/projects/adroit-crow-413218/locations/us-central1/publishers/google/models/gemini-1.0-pro-vision-001
vertex_project: "adroit-crow-413218"
vertex_location: "us-central1"
- model_name: fake-azure-endpoint
litellm_params:
model: openai/429
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app
- model_name: fake-openai-endpoint
litellm_params:
model: gpt-3.5-turbo
api_base: https://exampleopenaiendpoint-production.up.railway.app
- model_name: o1-preview
litellm_params:
model: o1-preview
- model_name: rerank-english-v3.0
litellm_params:
model: cohere/rerank-english-v3.0
api_key: os.environ/COHERE_API_KEY
- model_name: azure-rerank-english-v3.0
litellm_params:
model: azure_ai/rerank-english-v3.0
api_base: os.environ/AZURE_AI_COHERE_API_BASE
api_key: os.environ/AZURE_AI_COHERE_API_KEY
- model_name: "databricks/*"
litellm_params:
model: "databricks/*"
api_key: os.environ/DATABRICKS_API_KEY
api_base: os.environ/DATABRICKS_API_BASE
- model_name: "anthropic/*"
litellm_params:
model: "anthropic/*"
- model_name: "*"
litellm_params:
model: "openai/*"
- model_name: "fireworks_ai/*"
litellm_params:
model: "fireworks_ai/*"
configurable_clientside_auth_params: ["api_base"]
- model_name: "gemini-flash-experimental"
litellm_params:
model: "vertex_ai/gemini-flash-experimental"
litellm_settings:
json_logs: true
cache: true
cache_params:
type: "redis"
# namespace: "litellm_caching"
ttl: 900
callbacks: ["batch_redis_requests"]

View file

@ -673,6 +673,9 @@ def run_server(
import litellm import litellm
# DO NOT DELETE - enables global variables to work across files
from litellm.proxy.proxy_server import app # noqa
if run_gunicorn is False and run_hypercorn is False: if run_gunicorn is False and run_hypercorn is False:
if ssl_certfile_path is not None and ssl_keyfile_path is not None: if ssl_certfile_path is not None and ssl_keyfile_path is not None:
print( # noqa print( # noqa

View file

@ -347,12 +347,20 @@ OpenAIMessageContent = Union[
str, Iterable[Union[ChatCompletionTextObject, ChatCompletionImageObject]] str, Iterable[Union[ChatCompletionTextObject, ChatCompletionImageObject]]
] ]
# The prompt(s) to generate completions for, encoded as a string, array of strings, array of tokens, or array of token arrays.
AllPromptValues = Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None]
class OpenAIChatCompletionUserMessage(TypedDict): class OpenAIChatCompletionUserMessage(TypedDict):
role: Literal["user"] role: Literal["user"]
content: OpenAIMessageContent content: OpenAIMessageContent
class OpenAITextCompletionUserMessage(TypedDict):
role: Literal["user"]
content: AllPromptValues
class ChatCompletionUserMessage(OpenAIChatCompletionUserMessage, total=False): class ChatCompletionUserMessage(OpenAIChatCompletionUserMessage, total=False):
cache_control: ChatCompletionCachedContent cache_control: ChatCompletionCachedContent

View file

@ -80,6 +80,7 @@ from litellm.types.llms.openai import (
AllMessageValues, AllMessageValues,
ChatCompletionNamedToolChoiceParam, ChatCompletionNamedToolChoiceParam,
ChatCompletionToolParam, ChatCompletionToolParam,
ChatCompletionToolParamFunctionChunk,
) )
from litellm.types.utils import FileTypes # type: ignore from litellm.types.utils import FileTypes # type: ignore
from litellm.types.utils import ( from litellm.types.utils import (
@ -3360,7 +3361,8 @@ def get_optional_params(
supported_params = get_supported_openai_params( supported_params = get_supported_openai_params(
model=model, custom_llm_provider=custom_llm_provider model=model, custom_llm_provider=custom_llm_provider
) )
if model in litellm.BEDROCK_CONVERSE_MODELS: base_model = litellm.AmazonConverseConfig()._get_base_model(model)
if base_model in litellm.BEDROCK_CONVERSE_MODELS:
_check_valid_arg(supported_params=supported_params) _check_valid_arg(supported_params=supported_params)
optional_params = litellm.AmazonConverseConfig().map_openai_params( optional_params = litellm.AmazonConverseConfig().map_openai_params(
model=model, model=model,
@ -9255,3 +9257,24 @@ def process_response_headers(response_headers: Union[httpx.Headers, dict]) -> di
**additional_headers, **additional_headers,
} }
return additional_headers return additional_headers
def add_dummy_tool(custom_llm_provider: str) -> List[ChatCompletionToolParam]:
"""
Prevent Anthropic from raising error when tool_use block exists but no tools are provided.
Relevent Issues: https://github.com/BerriAI/litellm/issues/5388, https://github.com/BerriAI/litellm/issues/5747
"""
return [
ChatCompletionToolParam(
type="function",
function=ChatCompletionToolParamFunctionChunk(
name="dummy-tool",
description="This is a dummy tool call", # provided to satisfy bedrock constraint.
parameters={
"type": "object",
"properties": {},
},
),
)
]

View file

@ -75,6 +75,24 @@ def test_bedrock_optional_params_embeddings():
assert len(optional_params) == 0 assert len(optional_params) == 0
@pytest.mark.parametrize(
"model",
[
"us.anthropic.claude-3-haiku-20240307-v1:0",
"us.meta.llama3-2-11b-instruct-v1:0",
"anthropic.claude-3-haiku-20240307-v1:0",
],
)
def test_bedrock_optional_params_completions(model):
litellm.drop_params = True
optional_params = get_optional_params(
model=model, max_tokens=10, temperature=0.1, custom_llm_provider="bedrock"
)
print(f"optional_params: {optional_params}")
assert len(optional_params) == 3
assert optional_params == {"maxTokens": 10, "stream": False, "temperature": 0.1}
@pytest.mark.parametrize( @pytest.mark.parametrize(
"model, expected_dimensions, dimensions_kwarg", "model, expected_dimensions, dimensions_kwarg",
[ [

104
tests/local_testing/log.txt Normal file
View file

@ -0,0 +1,104 @@
============================= test session starts ==============================
platform darwin -- Python 3.11.4, pytest-8.3.2, pluggy-1.5.0 -- /Users/krrishdholakia/Documents/litellm/myenv/bin/python3.11
cachedir: .pytest_cache
rootdir: /Users/krrishdholakia/Documents/litellm
configfile: pyproject.toml
plugins: asyncio-0.23.8, respx-0.21.1, anyio-4.6.0
asyncio: mode=Mode.STRICT
collecting ... collected 1 item
test_function_calling.py::test_aaparallel_function_call[claude-3-haiku-20240307] <module 'litellm' from '/Users/krrishdholakia/Documents/litellm/litellm/__init__.py'>
Request to litellm:
litellm.completion(model='claude-3-haiku-20240307', messages=[{'role': 'user', 'content': "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}], tools=[{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}], tool_choice='auto')
SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
Final returned optional params: {'tools': [{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}], 'tool_choice': {'type': 'auto'}}
optional_params: {'tools': [{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}], 'tool_choice': {'type': 'auto'}}
SENT optional_params: {'tools': [{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}], 'tool_choice': {'type': 'auto'}, 'max_tokens': 4096}
tool: {'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}

POST Request Sent from LiteLLM:
curl -X POST \
https://api.anthropic.com/v1/messages \
-H 'accept: *****' -H 'anthropic-version: *****' -H 'content-type: *****' -H 'x-api-key: sk-ant-api03-bJf1M8qp-JDptRcZRE5ve5efAfSIaL5u-SZ9vItIkvuFcV5cUsd********************************************' -H 'anthropic-beta: *****' \
-d '{'messages': [{'role': 'user', 'content': [{'type': 'text', 'text': "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}]}], 'tools': [{'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'input_schema': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}], 'tool_choice': {'type': 'auto'}, 'max_tokens': 4096, 'model': 'claude-3-haiku-20240307'}'

_is_function_call: False
RAW RESPONSE:
{"id":"msg_01HRugqzL4WmcxMmbvDheTph","type":"message","role":"assistant","model":"claude-3-haiku-20240307","content":[{"type":"text","text":"Okay, let's check the current weather in those three cities:"},{"type":"tool_use","id":"toolu_016U6G3kpxjHSiJLwVCrrScz","name":"get_current_weather","input":{"location":"San Francisco","unit":"celsius"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":379,"output_tokens":87}}
raw model_response: {"id":"msg_01HRugqzL4WmcxMmbvDheTph","type":"message","role":"assistant","model":"claude-3-haiku-20240307","content":[{"type":"text","text":"Okay, let's check the current weather in those three cities:"},{"type":"tool_use","id":"toolu_016U6G3kpxjHSiJLwVCrrScz","name":"get_current_weather","input":{"location":"San Francisco","unit":"celsius"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":379,"output_tokens":87}}
Logging Details LiteLLM-Success Call: Cache_hit=None
Looking up model=claude-3-haiku-20240307 in model_cost_map
Looking up model=claude-3-haiku-20240307 in model_cost_map
Response
ModelResponse(id='chatcmpl-7222f6c2-962a-4776-8639-576723466cb7', choices=[Choices(finish_reason='tool_calls', index=0, message=Message(content="Okay, let's check the current weather in those three cities:", role='assistant', tool_calls=[ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "San Francisco", "unit": "celsius"}', name='get_current_weather'), id='toolu_016U6G3kpxjHSiJLwVCrrScz', type='function')], function_call=None))], created=1727897483, model='claude-3-haiku-20240307', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=87, prompt_tokens=379, total_tokens=466, completion_tokens_details=None))
length of tool calls 1
Expecting there to be 3 tool calls
tool_calls: [ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "San Francisco", "unit": "celsius"}', name='get_current_weather'), id='toolu_016U6G3kpxjHSiJLwVCrrScz', type='function')]
Response message
Message(content="Okay, let's check the current weather in those three cities:", role='assistant', tool_calls=[ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "San Francisco", "unit": "celsius"}', name='get_current_weather'), id='toolu_016U6G3kpxjHSiJLwVCrrScz', type='function')], function_call=None)
messages: [{'role': 'user', 'content': "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}, Message(content="Okay, let's check the current weather in those three cities:", role='assistant', tool_calls=[ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "San Francisco", "unit": "celsius"}', name='get_current_weather'), id='toolu_016U6G3kpxjHSiJLwVCrrScz', type='function')], function_call=None), {'tool_call_id': 'toolu_016U6G3kpxjHSiJLwVCrrScz', 'role': 'tool', 'name': 'get_current_weather', 'content': '{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}'}]
Request to litellm:
litellm.completion(model='claude-3-haiku-20240307', messages=[{'role': 'user', 'content': "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}, Message(content="Okay, let's check the current weather in those three cities:", role='assistant', tool_calls=[ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "San Francisco", "unit": "celsius"}', name='get_current_weather'), id='toolu_016U6G3kpxjHSiJLwVCrrScz', type='function')], function_call=None), {'tool_call_id': 'toolu_016U6G3kpxjHSiJLwVCrrScz', 'role': 'tool', 'name': 'get_current_weather', 'content': '{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}'}], temperature=0.2, seed=22, drop_params=True)
SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
Final returned optional params: {'temperature': 0.2, 'tools': [{'type': 'function', 'function': {'name': 'dummy-tool', 'description': '', 'parameters': {'type': 'object', 'properties': {}}}}]}
optional_params: {'temperature': 0.2, 'tools': [{'type': 'function', 'function': {'name': 'dummy-tool', 'description': '', 'parameters': {'type': 'object', 'properties': {}}}}]}
SENT optional_params: {'temperature': 0.2, 'tools': [{'type': 'function', 'function': {'name': 'dummy-tool', 'description': '', 'parameters': {'type': 'object', 'properties': {}}}}], 'max_tokens': 4096}
tool: {'type': 'function', 'function': {'name': 'dummy-tool', 'description': '', 'parameters': {'type': 'object', 'properties': {}}}}

POST Request Sent from LiteLLM:
curl -X POST \
https://api.anthropic.com/v1/messages \
-H 'accept: *****' -H 'anthropic-version: *****' -H 'content-type: *****' -H 'x-api-key: sk-ant-api03-bJf1M8qp-JDptRcZRE5ve5efAfSIaL5u-SZ9vItIkvuFcV5cUsd********************************************' -H 'anthropic-beta: *****' \
-d '{'messages': [{'role': 'user', 'content': [{'type': 'text', 'text': "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}]}, {'role': 'assistant', 'content': [{'type': 'tool_use', 'id': 'toolu_016U6G3kpxjHSiJLwVCrrScz', 'name': 'get_current_weather', 'input': {'location': 'San Francisco', 'unit': 'celsius'}}]}, {'role': 'user', 'content': [{'type': 'tool_result', 'tool_use_id': 'toolu_016U6G3kpxjHSiJLwVCrrScz', 'content': '{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}'}]}], 'temperature': 0.2, 'tools': [{'name': 'dummy-tool', 'description': '', 'input_schema': {'type': 'object', 'properties': {}}}], 'max_tokens': 4096, 'model': 'claude-3-haiku-20240307'}'

_is_function_call: False
RAW RESPONSE:
{"id":"msg_01Wp8NVScugz6yAGsmB5trpZ","type":"message","role":"assistant","model":"claude-3-haiku-20240307","content":[{"type":"text","text":"The current weather in San Francisco is 72°F (22°C)."},{"type":"tool_use","id":"toolu_01HTXEYDX4MspM76STtJqs1n","name":"get_current_weather","input":{"location":"Tokyo","unit":"celsius"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":426,"output_tokens":90}}
raw model_response: {"id":"msg_01Wp8NVScugz6yAGsmB5trpZ","type":"message","role":"assistant","model":"claude-3-haiku-20240307","content":[{"type":"text","text":"The current weather in San Francisco is 72°F (22°C)."},{"type":"tool_use","id":"toolu_01HTXEYDX4MspM76STtJqs1n","name":"get_current_weather","input":{"location":"Tokyo","unit":"celsius"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":426,"output_tokens":90}}
Logging Details LiteLLM-Success Call: Cache_hit=None
Looking up model=claude-3-haiku-20240307 in model_cost_map
Looking up model=claude-3-haiku-20240307 in model_cost_map
second response
ModelResponse(id='chatcmpl-c4ed5c25-ba7c-49e5-a6be-5720ab25fff0', choices=[Choices(finish_reason='tool_calls', index=0, message=Message(content='The current weather in San Francisco is 72°F (22°C).', role='assistant', tool_calls=[ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "Tokyo", "unit": "celsius"}', name='get_current_weather'), id='toolu_01HTXEYDX4MspM76STtJqs1n', type='function')], function_call=None))], created=1727897484, model='claude-3-haiku-20240307', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=90, prompt_tokens=426, total_tokens=516, completion_tokens_details=None))
PASSED
=============================== warnings summary ===============================
../../myenv/lib/python3.11/site-packages/pydantic/_internal/_config.py:284
/Users/krrishdholakia/Documents/litellm/myenv/lib/python3.11/site-packages/pydantic/_internal/_config.py:284: PydanticDeprecatedSince20: Support for class-based `config` is deprecated, use ConfigDict instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
warnings.warn(DEPRECATION_MESSAGE, DeprecationWarning)
../../litellm/utils.py:17
/Users/krrishdholakia/Documents/litellm/litellm/utils.py:17: DeprecationWarning: 'imghdr' is deprecated and slated for removal in Python 3.13
import imghdr
../../litellm/utils.py:124
/Users/krrishdholakia/Documents/litellm/litellm/utils.py:124: DeprecationWarning: open_text is deprecated. Use files() instead. Refer to https://importlib-resources.readthedocs.io/en/latest/using.html#migrating-from-legacy for migration advice.
with resources.open_text("litellm.llms.tokenizers", "anthropic_tokenizer.json") as f:
test_function_calling.py:56
/Users/krrishdholakia/Documents/litellm/tests/local_testing/test_function_calling.py:56: PytestUnknownMarkWarning: Unknown pytest.mark.flaky - is this a typo? You can register custom marks to avoid this warning - for details, see https://docs.pytest.org/en/stable/how-to/mark.html
@pytest.mark.flaky(retries=3, delay=1)
tests/local_testing/test_function_calling.py::test_aaparallel_function_call[claude-3-haiku-20240307]
tests/local_testing/test_function_calling.py::test_aaparallel_function_call[claude-3-haiku-20240307]
/Users/krrishdholakia/Documents/litellm/myenv/lib/python3.11/site-packages/httpx/_content.py:202: DeprecationWarning: Use 'content=<...>' to upload raw bytes/text content.
warnings.warn(message, DeprecationWarning)
-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
======================== 1 passed, 6 warnings in 1.89s =========================

View file

@ -47,16 +47,17 @@ def get_current_weather(location, unit="fahrenheit"):
[ [
"gpt-3.5-turbo-1106", "gpt-3.5-turbo-1106",
# "mistral/mistral-large-latest", # "mistral/mistral-large-latest",
# "claude-3-haiku-20240307", "claude-3-haiku-20240307",
# "gemini/gemini-1.5-pro", "gemini/gemini-1.5-pro",
"anthropic.claude-3-sonnet-20240229-v1:0", "anthropic.claude-3-sonnet-20240229-v1:0",
"groq/llama3-8b-8192", # "groq/llama3-8b-8192",
], ],
) )
@pytest.mark.flaky(retries=3, delay=1) @pytest.mark.flaky(retries=3, delay=1)
def test_aaparallel_function_call(model): def test_aaparallel_function_call(model):
try: try:
litellm.set_verbose = True litellm.set_verbose = True
litellm.modify_params = True
# Step 1: send the conversation and available functions to the model # Step 1: send the conversation and available functions to the model
messages = [ messages = [
{ {
@ -97,7 +98,6 @@ def test_aaparallel_function_call(model):
response_message = response.choices[0].message response_message = response.choices[0].message
tool_calls = response_message.tool_calls tool_calls = response_message.tool_calls
print("length of tool calls", len(tool_calls))
print("Expecting there to be 3 tool calls") print("Expecting there to be 3 tool calls")
assert ( assert (
len(tool_calls) > 0 len(tool_calls) > 0
@ -141,7 +141,7 @@ def test_aaparallel_function_call(model):
messages=messages, messages=messages,
temperature=0.2, temperature=0.2,
seed=22, seed=22,
tools=tools, # tools=tools,
drop_params=True, drop_params=True,
) # get a new response from the model where it can see the function response ) # get a new response from the model where it can see the function response
print("second response\n", second_response) print("second response\n", second_response)
@ -445,3 +445,29 @@ def test_groq_parallel_function_call():
print("second response\n", second_response) print("second response\n", second_response)
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
@pytest.mark.parametrize(
"model",
[
"anthropic.claude-3-sonnet-20240229-v1:0",
"claude-3-haiku-20240307",
],
)
def test_anthropic_function_call_with_no_schema(model):
"""
Relevant Issue: https://github.com/BerriAI/litellm/issues/6012
"""
tools = [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather in New York",
},
}
]
messages = [
{"role": "user", "content": "What is the current temperature in New York?"}
]
completion(model=model, messages=messages, tools=tools, tool_choice="auto")

View file

@ -4019,7 +4019,7 @@ def test_async_text_completion():
asyncio.run(test_get_response()) asyncio.run(test_get_response())
@pytest.mark.skip(reason="Skip flaky tgai test") @pytest.mark.flaky(retries=6, delay=1)
def test_async_text_completion_together_ai(): def test_async_text_completion_together_ai():
litellm.set_verbose = True litellm.set_verbose = True
print("test_async_text_completion") print("test_async_text_completion")
@ -4032,6 +4032,8 @@ def test_async_text_completion_together_ai():
max_tokens=10, max_tokens=10,
) )
print(f"response: {response}") print(f"response: {response}")
except litellm.RateLimitError as e:
print(e)
except litellm.Timeout as e: except litellm.Timeout as e:
print(e) print(e)
except Exception as e: except Exception as e: