diff --git a/litellm/llms/OpenAI/completion/utils.py b/litellm/llms/OpenAI/completion/utils.py new file mode 100644 index 000000000..096f69180 --- /dev/null +++ b/litellm/llms/OpenAI/completion/utils.py @@ -0,0 +1,15 @@ +from collections.abc import Iterable +from typing import List + + +def is_tokens_or_list_of_tokens(value: List): + # Check if it's a list of integers (tokens) + if isinstance(value, list) and all(isinstance(item, int) for item in value): + return True + # Check if it's a list of lists of integers (list of tokens) + if isinstance(value, list) and all( + isinstance(item, list) and all(isinstance(i, int) for i in item) + for item in value + ): + return True + return False diff --git a/litellm/llms/OpenAI/openai.py b/litellm/llms/OpenAI/openai.py index 012b641ef..86b290ab6 100644 --- a/litellm/llms/OpenAI/openai.py +++ b/litellm/llms/OpenAI/openai.py @@ -4,7 +4,7 @@ import os import time import traceback import types -from typing import Any, Callable, Coroutine, Iterable, Literal, Optional, Union +from typing import Any, Callable, Coroutine, Iterable, Literal, Optional, Union, cast import httpx import openai @@ -30,8 +30,10 @@ from litellm.utils import ( from ...types.llms.openai import * from ..base import BaseLLM +from ..prompt_templates.common_utils import convert_content_list_to_str from ..prompt_templates.factory import custom_prompt, prompt_factory from .common_utils import drop_params_from_unprocessable_entity_error +from .completion.utils import is_tokens_or_list_of_tokens class OpenAIError(Exception): @@ -420,6 +422,35 @@ class OpenAITextCompletionConfig: and v is not None } + def _transform_prompt( + self, + messages: Union[List[AllMessageValues], List[OpenAITextCompletionUserMessage]], + ) -> AllPromptValues: + if len(messages) == 1: # base case + message_content = messages[0].get("content") + if ( + message_content + and isinstance(message_content, list) + and is_tokens_or_list_of_tokens(message_content) + ): + openai_prompt: AllPromptValues = cast(AllPromptValues, message_content) + else: + openai_prompt = "" + content = convert_content_list_to_str( + cast(AllMessageValues, messages[0]) + ) + openai_prompt += content + else: + prompt_str_list: List[str] = [] + for m in messages: + try: # expect list of int/list of list of int to be a 1 message array only. + content = convert_content_list_to_str(cast(AllMessageValues, m)) + prompt_str_list.append(content) + except Exception as e: + raise e + openai_prompt = prompt_str_list + return openai_prompt + def convert_to_chat_model_response_object( self, response_object: Optional[TextCompletionResponse] = None, @@ -459,6 +490,7 @@ class OpenAITextCompletionConfig: class OpenAIChatCompletion(BaseLLM): + def __init__(self) -> None: super().__init__() @@ -1466,7 +1498,9 @@ class OpenAIChatCompletion(BaseLLM): elif mode == "audio_transcription": # Get the current directory of the file being run pwd = os.path.dirname(os.path.realpath(__file__)) - file_path = os.path.join(pwd, "../tests/gettysburg.wav") + file_path = os.path.join( + pwd, "../../../tests/gettysburg.wav" + ) # proxy address audio_file = open(file_path, "rb") completion = await client.audio.transcriptions.with_raw_response.create( file=audio_file, @@ -1502,6 +1536,8 @@ class OpenAIChatCompletion(BaseLLM): class OpenAITextCompletion(BaseLLM): + openai_text_completion_global_config = OpenAITextCompletionConfig() + def __init__(self) -> None: super().__init__() @@ -1518,7 +1554,7 @@ class OpenAITextCompletion(BaseLLM): model_response: ModelResponse, api_key: str, model: str, - messages: list, + messages: Union[List[AllMessageValues], List[OpenAITextCompletionUserMessage]], timeout: float, logging_obj: LiteLLMLoggingObj, optional_params: dict, @@ -1531,24 +1567,18 @@ class OpenAITextCompletion(BaseLLM): organization: Optional[str] = None, headers: Optional[dict] = None, ): - super().completion() try: if headers is None: headers = self.validate_environment(api_key=api_key) if model is None or messages is None: raise OpenAIError(status_code=422, message="Missing model or messages") - if ( - len(messages) > 0 - and "content" in messages[0] - and isinstance(messages[0]["content"], list) - ): - prompt = messages[0]["content"] - else: - prompt = [message["content"] for message in messages] # type: ignore - # don't send max retries to the api, if set + prompt = self.openai_text_completion_global_config._transform_prompt( + messages + ) + data = {"model": model, "prompt": prompt, **optional_params} max_retries = data.pop("max_retries", 2) ## LOGGING diff --git a/litellm/llms/anthropic/chat/handler.py b/litellm/llms/anthropic/chat/handler.py index 3c105999d..d5ad4d858 100644 --- a/litellm/llms/anthropic/chat/handler.py +++ b/litellm/llms/anthropic/chat/handler.py @@ -551,6 +551,8 @@ class AnthropicChatCompletion(BaseLLM): error_response = getattr(e, "response", None) if error_headers is None and error_response: error_headers = getattr(error_response, "headers", None) + if error_response and hasattr(error_response, "text"): + error_text = getattr(error_response, "text", error_text) raise AnthropicError( message=error_text, status_code=status_code, diff --git a/litellm/llms/anthropic/chat/transformation.py b/litellm/llms/anthropic/chat/transformation.py index 2ca22db3b..a7beef699 100644 --- a/litellm/llms/anthropic/chat/transformation.py +++ b/litellm/llms/anthropic/chat/transformation.py @@ -6,11 +6,17 @@ from litellm.llms.prompt_templates.factory import anthropic_messages_pt from litellm.types.llms.anthropic import ( AnthropicMessageRequestBase, AnthropicMessagesRequest, + AnthropicMessagesTool, AnthropicMessagesToolChoice, AnthropicSystemMessageContent, ) -from litellm.types.llms.openai import AllMessageValues, ChatCompletionSystemMessage -from litellm.utils import has_tool_call_blocks +from litellm.types.llms.openai import ( + AllMessageValues, + ChatCompletionSystemMessage, + ChatCompletionToolParam, + ChatCompletionToolParamFunctionChunk, +) +from litellm.utils import add_dummy_tool, has_tool_call_blocks from ..common_utils import AnthropicError @@ -146,11 +152,16 @@ class AnthropicConfig: and messages is not None and has_tool_call_blocks(messages) ): - raise litellm.UnsupportedParamsError( - message="Anthropic doesn't support tool calling without `tools=` param specified. Pass `tools=` param to enable tool calling.", - model="", - llm_provider="anthropic", - ) + if litellm.modify_params: + optional_params["tools"] = add_dummy_tool( + custom_llm_provider="bedrock_converse" + ) + else: + raise litellm.UnsupportedParamsError( + message="Anthropic doesn't support tool calling without `tools=` param specified. Pass `tools=` param OR set `litellm.modify_params = True` // `litellm_settings::modify_params: True` to add dummy tool to the request.", + model="", + llm_provider="anthropic", + ) return optional_params @@ -266,18 +277,23 @@ class AnthropicConfig: if "anthropic-beta" not in headers: # default to v1 of "anthropic-beta" headers["anthropic-beta"] = "tools-2024-05-16" - anthropic_tools = [] for tool in optional_params["tools"]: if "input_schema" in tool: # assume in anthropic format anthropic_tools.append(tool) else: # assume openai tool call new_tool = tool["function"] - new_tool["input_schema"] = new_tool.pop("parameters") # rename key + parameters = new_tool.pop( + "parameters", + { + "type": "object", + "properties": {}, + }, + ) + new_tool["input_schema"] = parameters # rename key if "cache_control" in tool: new_tool["cache_control"] = tool["cache_control"] anthropic_tools.append(new_tool) - optional_params["tools"] = anthropic_tools data = { diff --git a/litellm/llms/azure_ai/chat/transformation.py b/litellm/llms/azure_ai/chat/transformation.py index 85f107eca..60d1e1cd0 100644 --- a/litellm/llms/azure_ai/chat/transformation.py +++ b/litellm/llms/azure_ai/chat/transformation.py @@ -26,6 +26,7 @@ class AzureAIStudioConfig(OpenAIConfig): def _transform_messages(self, messages: List[AllMessageValues]) -> List: for message in messages: - message = convert_content_list_to_str(message=message) - + texts = convert_content_list_to_str(message=message) + if texts: + message["content"] = texts return messages diff --git a/litellm/llms/bedrock/chat/converse_transformation.py b/litellm/llms/bedrock/chat/converse_transformation.py index 677a06cc5..d53b01ee0 100644 --- a/litellm/llms/bedrock/chat/converse_transformation.py +++ b/litellm/llms/bedrock/chat/converse_transformation.py @@ -22,7 +22,7 @@ from litellm.types.llms.openai import ( ChatCompletionToolParamFunctionChunk, ) from litellm.types.utils import ModelResponse, Usage -from litellm.utils import CustomStreamWrapper, has_tool_call_blocks +from litellm.utils import CustomStreamWrapper, add_dummy_tool, has_tool_call_blocks from ...prompt_templates.factory import _bedrock_converse_messages_pt, _bedrock_tools_pt from ..common_utils import BedrockError, get_bedrock_tool_name @@ -213,11 +213,16 @@ class AmazonConverseConfig: and messages is not None and has_tool_call_blocks(messages) ): - raise litellm.UnsupportedParamsError( - message="Anthropic doesn't support tool calling without `tools=` param specified. Pass `tools=` param to enable tool calling.", - model="", - llm_provider="anthropic", - ) + if litellm.modify_params: + optional_params["tools"] = add_dummy_tool( + custom_llm_provider="bedrock_converse" + ) + else: + raise litellm.UnsupportedParamsError( + message="Bedrock doesn't support tool calling without `tools=` param specified. Pass `tools=` param OR set `litellm.modify_params = True` // `litellm_settings::modify_params: True` to add dummy tool to the request.", + model="", + llm_provider="bedrock", + ) return optional_params def _transform_request( diff --git a/litellm/llms/prompt_templates/common_utils.py b/litellm/llms/prompt_templates/common_utils.py index e32ae3709..6a63697c4 100644 --- a/litellm/llms/prompt_templates/common_utils.py +++ b/litellm/llms/prompt_templates/common_utils.py @@ -7,7 +7,7 @@ from typing import List from litellm.types.llms.openai import AllMessageValues -def convert_content_list_to_str(message: AllMessageValues) -> AllMessageValues: +def convert_content_list_to_str(message: AllMessageValues) -> str: """ - handles scenario where content is list and not string - content list is just text, and no images @@ -26,7 +26,4 @@ def convert_content_list_to_str(message: AllMessageValues) -> AllMessageValues: elif message_content is not None and isinstance(message_content, str): texts = message_content - if texts: - message["content"] = texts - - return message + return texts diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py index 42f2e864b..1b3e093b2 100644 --- a/litellm/llms/prompt_templates/factory.py +++ b/litellm/llms/prompt_templates/factory.py @@ -2554,7 +2554,10 @@ def _bedrock_tools_pt(tools: List) -> List[BedrockToolBlock]: """ tool_block_list: List[BedrockToolBlock] = [] for tool in tools: - parameters = tool.get("function", {}).get("parameters", None) + parameters = tool.get("function", {}).get("parameters", { + "type": "object", + "properties": {} + }) name = tool.get("function", {}).get("name", "") # related issue: https://github.com/BerriAI/litellm/issues/5007 diff --git a/litellm/llms/together_ai/completion.py b/litellm/llms/together_ai/completion.py deleted file mode 100644 index 525c0411a..000000000 --- a/litellm/llms/together_ai/completion.py +++ /dev/null @@ -1,7 +0,0 @@ -""" -Support for OpenAI's `/v1/completions` endpoint. - -Calls done in OpenAI/openai.py as TogetherAI is openai-compatible. - -Docs: https://docs.together.ai/reference/completions-1 -""" diff --git a/litellm/llms/together_ai/completion/handler.py b/litellm/llms/together_ai/completion/handler.py new file mode 100644 index 000000000..fab2a39c5 --- /dev/null +++ b/litellm/llms/together_ai/completion/handler.py @@ -0,0 +1,61 @@ +""" +Support for OpenAI's `/v1/completions` endpoint. + +Calls done in OpenAI/openai.py as TogetherAI is openai-compatible. + +Docs: https://docs.together.ai/reference/completions-1 +""" + +from typing import Any, Callable, List, Optional, Union + +from litellm.litellm_core_utils.litellm_logging import Logging +from litellm.types.llms.openai import AllMessageValues, OpenAITextCompletionUserMessage +from litellm.utils import ModelResponse + +from ...OpenAI.openai import OpenAITextCompletion +from .transformation import TogetherAITextCompletionConfig + +together_ai_text_completion_global_config = TogetherAITextCompletionConfig() + + +class TogetherAITextCompletion(OpenAITextCompletion): + + def completion( + self, + model_response: ModelResponse, + api_key: str, + model: str, + messages: Union[List[AllMessageValues], List[OpenAITextCompletionUserMessage]], + timeout: float, + logging_obj: Logging, + optional_params: dict, + print_verbose: Optional[Callable[..., Any]] = None, + api_base: Optional[str] = None, + acompletion: bool = False, + litellm_params=None, + logger_fn=None, + client=None, + organization: Optional[str] = None, + headers: Optional[dict] = None, + ): + prompt = together_ai_text_completion_global_config._transform_prompt(messages) + + message = OpenAITextCompletionUserMessage(role="user", content=prompt) + new_messages = [message] + return super().completion( + model_response=model_response, + api_key=api_key, + model=model, + messages=new_messages, + timeout=timeout, + logging_obj=logging_obj, + optional_params=optional_params, + print_verbose=print_verbose, + api_base=api_base, + acompletion=acompletion, + litellm_params=litellm_params, + logger_fn=logger_fn, + client=client, + organization=organization, + headers=headers, + ) diff --git a/litellm/llms/together_ai/completion/transformation.py b/litellm/llms/together_ai/completion/transformation.py new file mode 100644 index 000000000..65b9ad69b --- /dev/null +++ b/litellm/llms/together_ai/completion/transformation.py @@ -0,0 +1,46 @@ +""" +Translates calls from OpenAI's `/v1/completions` endpoint to TogetherAI's `/v1/completions` endpoint. + +Calls done in OpenAI/openai.py as TogetherAI is openai-compatible. + +Docs: https://docs.together.ai/reference/completions-1 +""" + +from typing import List, Union, cast + +from litellm.llms.OpenAI.completion.utils import is_tokens_or_list_of_tokens +from litellm.types.llms.openai import ( + AllMessageValues, + AllPromptValues, + OpenAITextCompletionUserMessage, +) + +from ...OpenAI.openai import OpenAITextCompletionConfig + + +class TogetherAITextCompletionConfig(OpenAITextCompletionConfig): + def _transform_prompt( + self, + messages: Union[List[AllMessageValues], List[OpenAITextCompletionUserMessage]], + ) -> AllPromptValues: + """ + TogetherAI expects a string prompt. + """ + initial_prompt: AllPromptValues = super()._transform_prompt(messages) + ## TOGETHER AI SPECIFIC VALIDATION ## + if isinstance(initial_prompt, list) and is_tokens_or_list_of_tokens( + value=initial_prompt + ): + raise ValueError("TogetherAI does not support integers as input") + if ( + isinstance(initial_prompt, list) + and len(initial_prompt) == 1 + and isinstance(initial_prompt[0], str) + ): + together_prompt = initial_prompt[0] + elif isinstance(initial_prompt, list): + raise ValueError("TogetherAI does not support multiple prompts.") + else: + together_prompt = cast(str, initial_prompt) + + return together_prompt diff --git a/litellm/main.py b/litellm/main.py index ccc7cc1b5..dfcce3958 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -112,6 +112,7 @@ from .llms.prompt_templates.factory import ( ) from .llms.sagemaker.sagemaker import SagemakerLLM from .llms.text_completion_codestral import CodestralTextCompletion +from .llms.together_ai.completion.handler import TogetherAITextCompletion from .llms.triton import TritonChatCompletion from .llms.vertex_ai_and_google_ai_studio import ( vertex_ai_anthropic, @@ -168,6 +169,7 @@ openai_o1_chat_completions = OpenAIO1ChatCompletion() openai_audio_transcriptions = OpenAIAudioTranscription() databricks_chat_completions = DatabricksChatCompletion() groq_chat_completions = GroqChatCompletion() +together_ai_text_completions = TogetherAITextCompletion() azure_ai_chat_completions = AzureAIChatCompletion() azure_ai_embedding = AzureAIEmbedding() anthropic_chat_completions = AnthropicChatCompletion() @@ -1285,21 +1287,38 @@ def completion( prompt = " ".join([message["content"] for message in messages]) # type: ignore ## COMPLETION CALL - _response = openai_text_completions.completion( - model=model, - messages=messages, - model_response=model_response, - print_verbose=print_verbose, - api_key=api_key, - api_base=api_base, - acompletion=acompletion, - client=client, # pass AsyncOpenAI, OpenAI client - logging_obj=logging, - optional_params=optional_params, - litellm_params=litellm_params, - logger_fn=logger_fn, - timeout=timeout, # type: ignore - ) + if custom_llm_provider == "together_ai": + _response = together_ai_text_completions.completion( + model=model, + messages=messages, + model_response=model_response, + print_verbose=print_verbose, + api_key=api_key, + api_base=api_base, + acompletion=acompletion, + client=client, # pass AsyncOpenAI, OpenAI client + logging_obj=logging, + optional_params=optional_params, + litellm_params=litellm_params, + logger_fn=logger_fn, + timeout=timeout, # type: ignore + ) + else: + _response = openai_text_completions.completion( + model=model, + messages=messages, + model_response=model_response, + print_verbose=print_verbose, + api_key=api_key, + api_base=api_base, + acompletion=acompletion, + client=client, # pass AsyncOpenAI, OpenAI client + logging_obj=logging, + optional_params=optional_params, + litellm_params=litellm_params, + logger_fn=logger_fn, + timeout=timeout, # type: ignore + ) if ( optional_params.get("stream", False) is False diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index b8d0aadf8..b9dae49f1 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,61 +1,7 @@ model_list: - - model_name: fake-claude-endpoint + - model_name: whisper litellm_params: - model: anthropic.claude-3-sonnet-20240229-v1:0 - api_base: https://exampleopenaiendpoint-production.up.railway.app - aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY - aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID - - model_name: gemini-vision - litellm_params: - model: vertex_ai/gemini-1.0-pro-vision-001 - api_base: https://exampleopenaiendpoint-production.up.railway.app/v1/projects/adroit-crow-413218/locations/us-central1/publishers/google/models/gemini-1.0-pro-vision-001 - vertex_project: "adroit-crow-413218" - vertex_location: "us-central1" - - model_name: fake-azure-endpoint - litellm_params: - model: openai/429 - api_key: fake-key - api_base: https://exampleopenaiendpoint-production.up.railway.app - - model_name: fake-openai-endpoint - litellm_params: - model: gpt-3.5-turbo - api_base: https://exampleopenaiendpoint-production.up.railway.app - - model_name: o1-preview - litellm_params: - model: o1-preview - - model_name: rerank-english-v3.0 - litellm_params: - model: cohere/rerank-english-v3.0 - api_key: os.environ/COHERE_API_KEY - - model_name: azure-rerank-english-v3.0 - litellm_params: - model: azure_ai/rerank-english-v3.0 - api_base: os.environ/AZURE_AI_COHERE_API_BASE - api_key: os.environ/AZURE_AI_COHERE_API_KEY - - model_name: "databricks/*" - litellm_params: - model: "databricks/*" - api_key: os.environ/DATABRICKS_API_KEY - api_base: os.environ/DATABRICKS_API_BASE - - model_name: "anthropic/*" - litellm_params: - model: "anthropic/*" - - model_name: "*" - litellm_params: - model: "openai/*" - - model_name: "fireworks_ai/*" - litellm_params: - model: "fireworks_ai/*" - configurable_clientside_auth_params: ["api_base"] - - model_name: "gemini-flash-experimental" - litellm_params: - model: "vertex_ai/gemini-flash-experimental" - -litellm_settings: - json_logs: true - cache: true - cache_params: - type: "redis" - # namespace: "litellm_caching" - ttl: 900 - callbacks: ["batch_redis_requests"] + model: whisper-1 + api_key: os.environ/OPENAI_API_KEY + model_info: + mode: audio_transcription \ No newline at end of file diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index 9de5ad7f2..9f889d2a2 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -673,6 +673,9 @@ def run_server( import litellm + # DO NOT DELETE - enables global variables to work across files + from litellm.proxy.proxy_server import app # noqa + if run_gunicorn is False and run_hypercorn is False: if ssl_certfile_path is not None and ssl_keyfile_path is not None: print( # noqa diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py index ee8336699..17eb89fd9 100644 --- a/litellm/types/llms/openai.py +++ b/litellm/types/llms/openai.py @@ -347,12 +347,20 @@ OpenAIMessageContent = Union[ str, Iterable[Union[ChatCompletionTextObject, ChatCompletionImageObject]] ] +# The prompt(s) to generate completions for, encoded as a string, array of strings, array of tokens, or array of token arrays. +AllPromptValues = Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None] + class OpenAIChatCompletionUserMessage(TypedDict): role: Literal["user"] content: OpenAIMessageContent +class OpenAITextCompletionUserMessage(TypedDict): + role: Literal["user"] + content: AllPromptValues + + class ChatCompletionUserMessage(OpenAIChatCompletionUserMessage, total=False): cache_control: ChatCompletionCachedContent diff --git a/litellm/utils.py b/litellm/utils.py index 9d63e1151..b0417babd 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -80,6 +80,7 @@ from litellm.types.llms.openai import ( AllMessageValues, ChatCompletionNamedToolChoiceParam, ChatCompletionToolParam, + ChatCompletionToolParamFunctionChunk, ) from litellm.types.utils import FileTypes # type: ignore from litellm.types.utils import ( @@ -3360,7 +3361,8 @@ def get_optional_params( supported_params = get_supported_openai_params( model=model, custom_llm_provider=custom_llm_provider ) - if model in litellm.BEDROCK_CONVERSE_MODELS: + base_model = litellm.AmazonConverseConfig()._get_base_model(model) + if base_model in litellm.BEDROCK_CONVERSE_MODELS: _check_valid_arg(supported_params=supported_params) optional_params = litellm.AmazonConverseConfig().map_openai_params( model=model, @@ -9255,3 +9257,24 @@ def process_response_headers(response_headers: Union[httpx.Headers, dict]) -> di **additional_headers, } return additional_headers + + +def add_dummy_tool(custom_llm_provider: str) -> List[ChatCompletionToolParam]: + """ + Prevent Anthropic from raising error when tool_use block exists but no tools are provided. + + Relevent Issues: https://github.com/BerriAI/litellm/issues/5388, https://github.com/BerriAI/litellm/issues/5747 + """ + return [ + ChatCompletionToolParam( + type="function", + function=ChatCompletionToolParamFunctionChunk( + name="dummy-tool", + description="This is a dummy tool call", # provided to satisfy bedrock constraint. + parameters={ + "type": "object", + "properties": {}, + }, + ), + ) + ] diff --git a/tests/llm_translation/test_optional_params.py b/tests/llm_translation/test_optional_params.py index e22e724d0..877880e3d 100644 --- a/tests/llm_translation/test_optional_params.py +++ b/tests/llm_translation/test_optional_params.py @@ -75,6 +75,24 @@ def test_bedrock_optional_params_embeddings(): assert len(optional_params) == 0 +@pytest.mark.parametrize( + "model", + [ + "us.anthropic.claude-3-haiku-20240307-v1:0", + "us.meta.llama3-2-11b-instruct-v1:0", + "anthropic.claude-3-haiku-20240307-v1:0", + ], +) +def test_bedrock_optional_params_completions(model): + litellm.drop_params = True + optional_params = get_optional_params( + model=model, max_tokens=10, temperature=0.1, custom_llm_provider="bedrock" + ) + print(f"optional_params: {optional_params}") + assert len(optional_params) == 3 + assert optional_params == {"maxTokens": 10, "stream": False, "temperature": 0.1} + + @pytest.mark.parametrize( "model, expected_dimensions, dimensions_kwarg", [ diff --git a/tests/local_testing/log.txt b/tests/local_testing/log.txt new file mode 100644 index 000000000..9b8654df0 --- /dev/null +++ b/tests/local_testing/log.txt @@ -0,0 +1,104 @@ +============================= test session starts ============================== +platform darwin -- Python 3.11.4, pytest-8.3.2, pluggy-1.5.0 -- /Users/krrishdholakia/Documents/litellm/myenv/bin/python3.11 +cachedir: .pytest_cache +rootdir: /Users/krrishdholakia/Documents/litellm +configfile: pyproject.toml +plugins: asyncio-0.23.8, respx-0.21.1, anyio-4.6.0 +asyncio: mode=Mode.STRICT +collecting ... collected 1 item + +test_function_calling.py::test_aaparallel_function_call[claude-3-haiku-20240307] + + +Request to litellm: +litellm.completion(model='claude-3-haiku-20240307', messages=[{'role': 'user', 'content': "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}], tools=[{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}], tool_choice='auto') + + +SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False +Final returned optional params: {'tools': [{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}], 'tool_choice': {'type': 'auto'}} +optional_params: {'tools': [{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}], 'tool_choice': {'type': 'auto'}} +SENT optional_params: {'tools': [{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}], 'tool_choice': {'type': 'auto'}, 'max_tokens': 4096} +tool: {'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}} + + +POST Request Sent from LiteLLM: +curl -X POST \ +https://api.anthropic.com/v1/messages \ +-H 'accept: *****' -H 'anthropic-version: *****' -H 'content-type: *****' -H 'x-api-key: sk-ant-api03-bJf1M8qp-JDptRcZRE5ve5efAfSIaL5u-SZ9vItIkvuFcV5cUsd********************************************' -H 'anthropic-beta: *****' \ +-d '{'messages': [{'role': 'user', 'content': [{'type': 'text', 'text': "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}]}], 'tools': [{'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'input_schema': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}], 'tool_choice': {'type': 'auto'}, 'max_tokens': 4096, 'model': 'claude-3-haiku-20240307'}' + + +_is_function_call: False +RAW RESPONSE: +{"id":"msg_01HRugqzL4WmcxMmbvDheTph","type":"message","role":"assistant","model":"claude-3-haiku-20240307","content":[{"type":"text","text":"Okay, let's check the current weather in those three cities:"},{"type":"tool_use","id":"toolu_016U6G3kpxjHSiJLwVCrrScz","name":"get_current_weather","input":{"location":"San Francisco","unit":"celsius"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":379,"output_tokens":87}} + + +raw model_response: {"id":"msg_01HRugqzL4WmcxMmbvDheTph","type":"message","role":"assistant","model":"claude-3-haiku-20240307","content":[{"type":"text","text":"Okay, let's check the current weather in those three cities:"},{"type":"tool_use","id":"toolu_016U6G3kpxjHSiJLwVCrrScz","name":"get_current_weather","input":{"location":"San Francisco","unit":"celsius"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":379,"output_tokens":87}} +Logging Details LiteLLM-Success Call: Cache_hit=None +Looking up model=claude-3-haiku-20240307 in model_cost_map +Looking up model=claude-3-haiku-20240307 in model_cost_map +Response + ModelResponse(id='chatcmpl-7222f6c2-962a-4776-8639-576723466cb7', choices=[Choices(finish_reason='tool_calls', index=0, message=Message(content="Okay, let's check the current weather in those three cities:", role='assistant', tool_calls=[ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "San Francisco", "unit": "celsius"}', name='get_current_weather'), id='toolu_016U6G3kpxjHSiJLwVCrrScz', type='function')], function_call=None))], created=1727897483, model='claude-3-haiku-20240307', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=87, prompt_tokens=379, total_tokens=466, completion_tokens_details=None)) +length of tool calls 1 +Expecting there to be 3 tool calls +tool_calls: [ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "San Francisco", "unit": "celsius"}', name='get_current_weather'), id='toolu_016U6G3kpxjHSiJLwVCrrScz', type='function')] +Response message + Message(content="Okay, let's check the current weather in those three cities:", role='assistant', tool_calls=[ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "San Francisco", "unit": "celsius"}', name='get_current_weather'), id='toolu_016U6G3kpxjHSiJLwVCrrScz', type='function')], function_call=None) +messages: [{'role': 'user', 'content': "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}, Message(content="Okay, let's check the current weather in those three cities:", role='assistant', tool_calls=[ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "San Francisco", "unit": "celsius"}', name='get_current_weather'), id='toolu_016U6G3kpxjHSiJLwVCrrScz', type='function')], function_call=None), {'tool_call_id': 'toolu_016U6G3kpxjHSiJLwVCrrScz', 'role': 'tool', 'name': 'get_current_weather', 'content': '{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}'}] + + +Request to litellm: +litellm.completion(model='claude-3-haiku-20240307', messages=[{'role': 'user', 'content': "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}, Message(content="Okay, let's check the current weather in those three cities:", role='assistant', tool_calls=[ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "San Francisco", "unit": "celsius"}', name='get_current_weather'), id='toolu_016U6G3kpxjHSiJLwVCrrScz', type='function')], function_call=None), {'tool_call_id': 'toolu_016U6G3kpxjHSiJLwVCrrScz', 'role': 'tool', 'name': 'get_current_weather', 'content': '{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}'}], temperature=0.2, seed=22, drop_params=True) + + +SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False +Final returned optional params: {'temperature': 0.2, 'tools': [{'type': 'function', 'function': {'name': 'dummy-tool', 'description': '', 'parameters': {'type': 'object', 'properties': {}}}}]} +optional_params: {'temperature': 0.2, 'tools': [{'type': 'function', 'function': {'name': 'dummy-tool', 'description': '', 'parameters': {'type': 'object', 'properties': {}}}}]} +SENT optional_params: {'temperature': 0.2, 'tools': [{'type': 'function', 'function': {'name': 'dummy-tool', 'description': '', 'parameters': {'type': 'object', 'properties': {}}}}], 'max_tokens': 4096} +tool: {'type': 'function', 'function': {'name': 'dummy-tool', 'description': '', 'parameters': {'type': 'object', 'properties': {}}}} + + +POST Request Sent from LiteLLM: +curl -X POST \ +https://api.anthropic.com/v1/messages \ +-H 'accept: *****' -H 'anthropic-version: *****' -H 'content-type: *****' -H 'x-api-key: sk-ant-api03-bJf1M8qp-JDptRcZRE5ve5efAfSIaL5u-SZ9vItIkvuFcV5cUsd********************************************' -H 'anthropic-beta: *****' \ +-d '{'messages': [{'role': 'user', 'content': [{'type': 'text', 'text': "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}]}, {'role': 'assistant', 'content': [{'type': 'tool_use', 'id': 'toolu_016U6G3kpxjHSiJLwVCrrScz', 'name': 'get_current_weather', 'input': {'location': 'San Francisco', 'unit': 'celsius'}}]}, {'role': 'user', 'content': [{'type': 'tool_result', 'tool_use_id': 'toolu_016U6G3kpxjHSiJLwVCrrScz', 'content': '{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}'}]}], 'temperature': 0.2, 'tools': [{'name': 'dummy-tool', 'description': '', 'input_schema': {'type': 'object', 'properties': {}}}], 'max_tokens': 4096, 'model': 'claude-3-haiku-20240307'}' + + +_is_function_call: False +RAW RESPONSE: +{"id":"msg_01Wp8NVScugz6yAGsmB5trpZ","type":"message","role":"assistant","model":"claude-3-haiku-20240307","content":[{"type":"text","text":"The current weather in San Francisco is 72°F (22°C)."},{"type":"tool_use","id":"toolu_01HTXEYDX4MspM76STtJqs1n","name":"get_current_weather","input":{"location":"Tokyo","unit":"celsius"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":426,"output_tokens":90}} + + +raw model_response: {"id":"msg_01Wp8NVScugz6yAGsmB5trpZ","type":"message","role":"assistant","model":"claude-3-haiku-20240307","content":[{"type":"text","text":"The current weather in San Francisco is 72°F (22°C)."},{"type":"tool_use","id":"toolu_01HTXEYDX4MspM76STtJqs1n","name":"get_current_weather","input":{"location":"Tokyo","unit":"celsius"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":426,"output_tokens":90}} +Logging Details LiteLLM-Success Call: Cache_hit=None +Looking up model=claude-3-haiku-20240307 in model_cost_map +Looking up model=claude-3-haiku-20240307 in model_cost_map +second response + ModelResponse(id='chatcmpl-c4ed5c25-ba7c-49e5-a6be-5720ab25fff0', choices=[Choices(finish_reason='tool_calls', index=0, message=Message(content='The current weather in San Francisco is 72°F (22°C).', role='assistant', tool_calls=[ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "Tokyo", "unit": "celsius"}', name='get_current_weather'), id='toolu_01HTXEYDX4MspM76STtJqs1n', type='function')], function_call=None))], created=1727897484, model='claude-3-haiku-20240307', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=90, prompt_tokens=426, total_tokens=516, completion_tokens_details=None)) +PASSED + +=============================== warnings summary =============================== +../../myenv/lib/python3.11/site-packages/pydantic/_internal/_config.py:284 + /Users/krrishdholakia/Documents/litellm/myenv/lib/python3.11/site-packages/pydantic/_internal/_config.py:284: PydanticDeprecatedSince20: Support for class-based `config` is deprecated, use ConfigDict instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/ + warnings.warn(DEPRECATION_MESSAGE, DeprecationWarning) + +../../litellm/utils.py:17 + /Users/krrishdholakia/Documents/litellm/litellm/utils.py:17: DeprecationWarning: 'imghdr' is deprecated and slated for removal in Python 3.13 + import imghdr + +../../litellm/utils.py:124 + /Users/krrishdholakia/Documents/litellm/litellm/utils.py:124: DeprecationWarning: open_text is deprecated. Use files() instead. Refer to https://importlib-resources.readthedocs.io/en/latest/using.html#migrating-from-legacy for migration advice. + with resources.open_text("litellm.llms.tokenizers", "anthropic_tokenizer.json") as f: + +test_function_calling.py:56 + /Users/krrishdholakia/Documents/litellm/tests/local_testing/test_function_calling.py:56: PytestUnknownMarkWarning: Unknown pytest.mark.flaky - is this a typo? You can register custom marks to avoid this warning - for details, see https://docs.pytest.org/en/stable/how-to/mark.html + @pytest.mark.flaky(retries=3, delay=1) + +tests/local_testing/test_function_calling.py::test_aaparallel_function_call[claude-3-haiku-20240307] +tests/local_testing/test_function_calling.py::test_aaparallel_function_call[claude-3-haiku-20240307] + /Users/krrishdholakia/Documents/litellm/myenv/lib/python3.11/site-packages/httpx/_content.py:202: DeprecationWarning: Use 'content=<...>' to upload raw bytes/text content. + warnings.warn(message, DeprecationWarning) + +-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html +======================== 1 passed, 6 warnings in 1.89s ========================= diff --git a/tests/local_testing/test_function_calling.py b/tests/local_testing/test_function_calling.py index d323325f4..0ba45692a 100644 --- a/tests/local_testing/test_function_calling.py +++ b/tests/local_testing/test_function_calling.py @@ -47,16 +47,17 @@ def get_current_weather(location, unit="fahrenheit"): [ "gpt-3.5-turbo-1106", # "mistral/mistral-large-latest", - # "claude-3-haiku-20240307", - # "gemini/gemini-1.5-pro", + "claude-3-haiku-20240307", + "gemini/gemini-1.5-pro", "anthropic.claude-3-sonnet-20240229-v1:0", - "groq/llama3-8b-8192", + # "groq/llama3-8b-8192", ], ) @pytest.mark.flaky(retries=3, delay=1) def test_aaparallel_function_call(model): try: litellm.set_verbose = True + litellm.modify_params = True # Step 1: send the conversation and available functions to the model messages = [ { @@ -97,7 +98,6 @@ def test_aaparallel_function_call(model): response_message = response.choices[0].message tool_calls = response_message.tool_calls - print("length of tool calls", len(tool_calls)) print("Expecting there to be 3 tool calls") assert ( len(tool_calls) > 0 @@ -141,7 +141,7 @@ def test_aaparallel_function_call(model): messages=messages, temperature=0.2, seed=22, - tools=tools, + # tools=tools, drop_params=True, ) # get a new response from the model where it can see the function response print("second response\n", second_response) @@ -445,3 +445,29 @@ def test_groq_parallel_function_call(): print("second response\n", second_response) except Exception as e: pytest.fail(f"Error occurred: {e}") + + +@pytest.mark.parametrize( + "model", + [ + "anthropic.claude-3-sonnet-20240229-v1:0", + "claude-3-haiku-20240307", + ], +) +def test_anthropic_function_call_with_no_schema(model): + """ + Relevant Issue: https://github.com/BerriAI/litellm/issues/6012 + """ + tools = [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in New York", + }, + } + ] + messages = [ + {"role": "user", "content": "What is the current temperature in New York?"} + ] + completion(model=model, messages=messages, tools=tools, tool_choice="auto") diff --git a/tests/local_testing/test_text_completion.py b/tests/local_testing/test_text_completion.py index 8e6f6939a..c4d2305fc 100644 --- a/tests/local_testing/test_text_completion.py +++ b/tests/local_testing/test_text_completion.py @@ -4019,7 +4019,7 @@ def test_async_text_completion(): asyncio.run(test_get_response()) -@pytest.mark.skip(reason="Skip flaky tgai test") +@pytest.mark.flaky(retries=6, delay=1) def test_async_text_completion_together_ai(): litellm.set_verbose = True print("test_async_text_completion") @@ -4032,6 +4032,8 @@ def test_async_text_completion_together_ai(): max_tokens=10, ) print(f"response: {response}") + except litellm.RateLimitError as e: + print(e) except litellm.Timeout as e: print(e) except Exception as e: