forked from phoenix/litellm-mirror
LiteLLM Minor Fixes & Improvements (10/02/2024) (#6023)
* feat(together_ai/completion): handle together ai completion calls * fix: handle list of int / list of list of int for text completion calls * fix(utils.py): check if base model in bedrock converse model list Fixes https://github.com/BerriAI/litellm/issues/6003 * test(test_optional_params.py): add unit tests for bedrock optional param mapping Fixes https://github.com/BerriAI/litellm/issues/6003 * feat(utils.py): enable passing dummy tool call for anthropic/bedrock calls if tool_use blocks exist Fixes https://github.com/BerriAI/litellm/issues/5388 * fixed an issue with tool use of claude models with anthropic and bedrock (#6013) * fix(utils.py): handle empty schema for anthropic/bedrock Fixes https://github.com/BerriAI/litellm/issues/6012 * fix: fix linting errors * fix: fix linting errors * fix: fix linting errors * fix(proxy_cli.py): fix import route for app + health checks path (#6026) * (testing): Enable testing us.anthropic.claude-3-haiku-20240307-v1:0. (#6018) * fix(proxy_cli.py): fix import route for app + health checks gettsburg.wav Fixes https://github.com/BerriAI/litellm/issues/5999 --------- Co-authored-by: David Manouchehri <david.manouchehri@ai.moda> --------- Co-authored-by: Ved Patwardhan <54766411+vedpatwardhan@users.noreply.github.com> Co-authored-by: David Manouchehri <david.manouchehri@ai.moda>
This commit is contained in:
parent
8995ff49ae
commit
14165d3648
20 changed files with 443 additions and 125 deletions
15
litellm/llms/OpenAI/completion/utils.py
Normal file
15
litellm/llms/OpenAI/completion/utils.py
Normal file
|
@ -0,0 +1,15 @@
|
|||
from collections.abc import Iterable
|
||||
from typing import List
|
||||
|
||||
|
||||
def is_tokens_or_list_of_tokens(value: List):
|
||||
# Check if it's a list of integers (tokens)
|
||||
if isinstance(value, list) and all(isinstance(item, int) for item in value):
|
||||
return True
|
||||
# Check if it's a list of lists of integers (list of tokens)
|
||||
if isinstance(value, list) and all(
|
||||
isinstance(item, list) and all(isinstance(i, int) for i in item)
|
||||
for item in value
|
||||
):
|
||||
return True
|
||||
return False
|
|
@ -4,7 +4,7 @@ import os
|
|||
import time
|
||||
import traceback
|
||||
import types
|
||||
from typing import Any, Callable, Coroutine, Iterable, Literal, Optional, Union
|
||||
from typing import Any, Callable, Coroutine, Iterable, Literal, Optional, Union, cast
|
||||
|
||||
import httpx
|
||||
import openai
|
||||
|
@ -30,8 +30,10 @@ from litellm.utils import (
|
|||
|
||||
from ...types.llms.openai import *
|
||||
from ..base import BaseLLM
|
||||
from ..prompt_templates.common_utils import convert_content_list_to_str
|
||||
from ..prompt_templates.factory import custom_prompt, prompt_factory
|
||||
from .common_utils import drop_params_from_unprocessable_entity_error
|
||||
from .completion.utils import is_tokens_or_list_of_tokens
|
||||
|
||||
|
||||
class OpenAIError(Exception):
|
||||
|
@ -420,6 +422,35 @@ class OpenAITextCompletionConfig:
|
|||
and v is not None
|
||||
}
|
||||
|
||||
def _transform_prompt(
|
||||
self,
|
||||
messages: Union[List[AllMessageValues], List[OpenAITextCompletionUserMessage]],
|
||||
) -> AllPromptValues:
|
||||
if len(messages) == 1: # base case
|
||||
message_content = messages[0].get("content")
|
||||
if (
|
||||
message_content
|
||||
and isinstance(message_content, list)
|
||||
and is_tokens_or_list_of_tokens(message_content)
|
||||
):
|
||||
openai_prompt: AllPromptValues = cast(AllPromptValues, message_content)
|
||||
else:
|
||||
openai_prompt = ""
|
||||
content = convert_content_list_to_str(
|
||||
cast(AllMessageValues, messages[0])
|
||||
)
|
||||
openai_prompt += content
|
||||
else:
|
||||
prompt_str_list: List[str] = []
|
||||
for m in messages:
|
||||
try: # expect list of int/list of list of int to be a 1 message array only.
|
||||
content = convert_content_list_to_str(cast(AllMessageValues, m))
|
||||
prompt_str_list.append(content)
|
||||
except Exception as e:
|
||||
raise e
|
||||
openai_prompt = prompt_str_list
|
||||
return openai_prompt
|
||||
|
||||
def convert_to_chat_model_response_object(
|
||||
self,
|
||||
response_object: Optional[TextCompletionResponse] = None,
|
||||
|
@ -459,6 +490,7 @@ class OpenAITextCompletionConfig:
|
|||
|
||||
|
||||
class OpenAIChatCompletion(BaseLLM):
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
|
||||
|
@ -1466,7 +1498,9 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
elif mode == "audio_transcription":
|
||||
# Get the current directory of the file being run
|
||||
pwd = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(pwd, "../tests/gettysburg.wav")
|
||||
file_path = os.path.join(
|
||||
pwd, "../../../tests/gettysburg.wav"
|
||||
) # proxy address
|
||||
audio_file = open(file_path, "rb")
|
||||
completion = await client.audio.transcriptions.with_raw_response.create(
|
||||
file=audio_file,
|
||||
|
@ -1502,6 +1536,8 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
|
||||
|
||||
class OpenAITextCompletion(BaseLLM):
|
||||
openai_text_completion_global_config = OpenAITextCompletionConfig()
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
|
||||
|
@ -1518,7 +1554,7 @@ class OpenAITextCompletion(BaseLLM):
|
|||
model_response: ModelResponse,
|
||||
api_key: str,
|
||||
model: str,
|
||||
messages: list,
|
||||
messages: Union[List[AllMessageValues], List[OpenAITextCompletionUserMessage]],
|
||||
timeout: float,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
optional_params: dict,
|
||||
|
@ -1531,24 +1567,18 @@ class OpenAITextCompletion(BaseLLM):
|
|||
organization: Optional[str] = None,
|
||||
headers: Optional[dict] = None,
|
||||
):
|
||||
super().completion()
|
||||
try:
|
||||
if headers is None:
|
||||
headers = self.validate_environment(api_key=api_key)
|
||||
if model is None or messages is None:
|
||||
raise OpenAIError(status_code=422, message="Missing model or messages")
|
||||
|
||||
if (
|
||||
len(messages) > 0
|
||||
and "content" in messages[0]
|
||||
and isinstance(messages[0]["content"], list)
|
||||
):
|
||||
prompt = messages[0]["content"]
|
||||
else:
|
||||
prompt = [message["content"] for message in messages] # type: ignore
|
||||
|
||||
# don't send max retries to the api, if set
|
||||
|
||||
prompt = self.openai_text_completion_global_config._transform_prompt(
|
||||
messages
|
||||
)
|
||||
|
||||
data = {"model": model, "prompt": prompt, **optional_params}
|
||||
max_retries = data.pop("max_retries", 2)
|
||||
## LOGGING
|
||||
|
|
|
@ -551,6 +551,8 @@ class AnthropicChatCompletion(BaseLLM):
|
|||
error_response = getattr(e, "response", None)
|
||||
if error_headers is None and error_response:
|
||||
error_headers = getattr(error_response, "headers", None)
|
||||
if error_response and hasattr(error_response, "text"):
|
||||
error_text = getattr(error_response, "text", error_text)
|
||||
raise AnthropicError(
|
||||
message=error_text,
|
||||
status_code=status_code,
|
||||
|
|
|
@ -6,11 +6,17 @@ from litellm.llms.prompt_templates.factory import anthropic_messages_pt
|
|||
from litellm.types.llms.anthropic import (
|
||||
AnthropicMessageRequestBase,
|
||||
AnthropicMessagesRequest,
|
||||
AnthropicMessagesTool,
|
||||
AnthropicMessagesToolChoice,
|
||||
AnthropicSystemMessageContent,
|
||||
)
|
||||
from litellm.types.llms.openai import AllMessageValues, ChatCompletionSystemMessage
|
||||
from litellm.utils import has_tool_call_blocks
|
||||
from litellm.types.llms.openai import (
|
||||
AllMessageValues,
|
||||
ChatCompletionSystemMessage,
|
||||
ChatCompletionToolParam,
|
||||
ChatCompletionToolParamFunctionChunk,
|
||||
)
|
||||
from litellm.utils import add_dummy_tool, has_tool_call_blocks
|
||||
|
||||
from ..common_utils import AnthropicError
|
||||
|
||||
|
@ -146,11 +152,16 @@ class AnthropicConfig:
|
|||
and messages is not None
|
||||
and has_tool_call_blocks(messages)
|
||||
):
|
||||
raise litellm.UnsupportedParamsError(
|
||||
message="Anthropic doesn't support tool calling without `tools=` param specified. Pass `tools=` param to enable tool calling.",
|
||||
model="",
|
||||
llm_provider="anthropic",
|
||||
)
|
||||
if litellm.modify_params:
|
||||
optional_params["tools"] = add_dummy_tool(
|
||||
custom_llm_provider="bedrock_converse"
|
||||
)
|
||||
else:
|
||||
raise litellm.UnsupportedParamsError(
|
||||
message="Anthropic doesn't support tool calling without `tools=` param specified. Pass `tools=` param OR set `litellm.modify_params = True` // `litellm_settings::modify_params: True` to add dummy tool to the request.",
|
||||
model="",
|
||||
llm_provider="anthropic",
|
||||
)
|
||||
|
||||
return optional_params
|
||||
|
||||
|
@ -266,18 +277,23 @@ class AnthropicConfig:
|
|||
if "anthropic-beta" not in headers:
|
||||
# default to v1 of "anthropic-beta"
|
||||
headers["anthropic-beta"] = "tools-2024-05-16"
|
||||
|
||||
anthropic_tools = []
|
||||
for tool in optional_params["tools"]:
|
||||
if "input_schema" in tool: # assume in anthropic format
|
||||
anthropic_tools.append(tool)
|
||||
else: # assume openai tool call
|
||||
new_tool = tool["function"]
|
||||
new_tool["input_schema"] = new_tool.pop("parameters") # rename key
|
||||
parameters = new_tool.pop(
|
||||
"parameters",
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {},
|
||||
},
|
||||
)
|
||||
new_tool["input_schema"] = parameters # rename key
|
||||
if "cache_control" in tool:
|
||||
new_tool["cache_control"] = tool["cache_control"]
|
||||
anthropic_tools.append(new_tool)
|
||||
|
||||
optional_params["tools"] = anthropic_tools
|
||||
|
||||
data = {
|
||||
|
|
|
@ -26,6 +26,7 @@ class AzureAIStudioConfig(OpenAIConfig):
|
|||
|
||||
def _transform_messages(self, messages: List[AllMessageValues]) -> List:
|
||||
for message in messages:
|
||||
message = convert_content_list_to_str(message=message)
|
||||
|
||||
texts = convert_content_list_to_str(message=message)
|
||||
if texts:
|
||||
message["content"] = texts
|
||||
return messages
|
||||
|
|
|
@ -22,7 +22,7 @@ from litellm.types.llms.openai import (
|
|||
ChatCompletionToolParamFunctionChunk,
|
||||
)
|
||||
from litellm.types.utils import ModelResponse, Usage
|
||||
from litellm.utils import CustomStreamWrapper, has_tool_call_blocks
|
||||
from litellm.utils import CustomStreamWrapper, add_dummy_tool, has_tool_call_blocks
|
||||
|
||||
from ...prompt_templates.factory import _bedrock_converse_messages_pt, _bedrock_tools_pt
|
||||
from ..common_utils import BedrockError, get_bedrock_tool_name
|
||||
|
@ -213,11 +213,16 @@ class AmazonConverseConfig:
|
|||
and messages is not None
|
||||
and has_tool_call_blocks(messages)
|
||||
):
|
||||
raise litellm.UnsupportedParamsError(
|
||||
message="Anthropic doesn't support tool calling without `tools=` param specified. Pass `tools=` param to enable tool calling.",
|
||||
model="",
|
||||
llm_provider="anthropic",
|
||||
)
|
||||
if litellm.modify_params:
|
||||
optional_params["tools"] = add_dummy_tool(
|
||||
custom_llm_provider="bedrock_converse"
|
||||
)
|
||||
else:
|
||||
raise litellm.UnsupportedParamsError(
|
||||
message="Bedrock doesn't support tool calling without `tools=` param specified. Pass `tools=` param OR set `litellm.modify_params = True` // `litellm_settings::modify_params: True` to add dummy tool to the request.",
|
||||
model="",
|
||||
llm_provider="bedrock",
|
||||
)
|
||||
return optional_params
|
||||
|
||||
def _transform_request(
|
||||
|
|
|
@ -7,7 +7,7 @@ from typing import List
|
|||
from litellm.types.llms.openai import AllMessageValues
|
||||
|
||||
|
||||
def convert_content_list_to_str(message: AllMessageValues) -> AllMessageValues:
|
||||
def convert_content_list_to_str(message: AllMessageValues) -> str:
|
||||
"""
|
||||
- handles scenario where content is list and not string
|
||||
- content list is just text, and no images
|
||||
|
@ -26,7 +26,4 @@ def convert_content_list_to_str(message: AllMessageValues) -> AllMessageValues:
|
|||
elif message_content is not None and isinstance(message_content, str):
|
||||
texts = message_content
|
||||
|
||||
if texts:
|
||||
message["content"] = texts
|
||||
|
||||
return message
|
||||
return texts
|
||||
|
|
|
@ -2554,7 +2554,10 @@ def _bedrock_tools_pt(tools: List) -> List[BedrockToolBlock]:
|
|||
"""
|
||||
tool_block_list: List[BedrockToolBlock] = []
|
||||
for tool in tools:
|
||||
parameters = tool.get("function", {}).get("parameters", None)
|
||||
parameters = tool.get("function", {}).get("parameters", {
|
||||
"type": "object",
|
||||
"properties": {}
|
||||
})
|
||||
name = tool.get("function", {}).get("name", "")
|
||||
|
||||
# related issue: https://github.com/BerriAI/litellm/issues/5007
|
||||
|
|
|
@ -1,7 +0,0 @@
|
|||
"""
|
||||
Support for OpenAI's `/v1/completions` endpoint.
|
||||
|
||||
Calls done in OpenAI/openai.py as TogetherAI is openai-compatible.
|
||||
|
||||
Docs: https://docs.together.ai/reference/completions-1
|
||||
"""
|
61
litellm/llms/together_ai/completion/handler.py
Normal file
61
litellm/llms/together_ai/completion/handler.py
Normal file
|
@ -0,0 +1,61 @@
|
|||
"""
|
||||
Support for OpenAI's `/v1/completions` endpoint.
|
||||
|
||||
Calls done in OpenAI/openai.py as TogetherAI is openai-compatible.
|
||||
|
||||
Docs: https://docs.together.ai/reference/completions-1
|
||||
"""
|
||||
|
||||
from typing import Any, Callable, List, Optional, Union
|
||||
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging
|
||||
from litellm.types.llms.openai import AllMessageValues, OpenAITextCompletionUserMessage
|
||||
from litellm.utils import ModelResponse
|
||||
|
||||
from ...OpenAI.openai import OpenAITextCompletion
|
||||
from .transformation import TogetherAITextCompletionConfig
|
||||
|
||||
together_ai_text_completion_global_config = TogetherAITextCompletionConfig()
|
||||
|
||||
|
||||
class TogetherAITextCompletion(OpenAITextCompletion):
|
||||
|
||||
def completion(
|
||||
self,
|
||||
model_response: ModelResponse,
|
||||
api_key: str,
|
||||
model: str,
|
||||
messages: Union[List[AllMessageValues], List[OpenAITextCompletionUserMessage]],
|
||||
timeout: float,
|
||||
logging_obj: Logging,
|
||||
optional_params: dict,
|
||||
print_verbose: Optional[Callable[..., Any]] = None,
|
||||
api_base: Optional[str] = None,
|
||||
acompletion: bool = False,
|
||||
litellm_params=None,
|
||||
logger_fn=None,
|
||||
client=None,
|
||||
organization: Optional[str] = None,
|
||||
headers: Optional[dict] = None,
|
||||
):
|
||||
prompt = together_ai_text_completion_global_config._transform_prompt(messages)
|
||||
|
||||
message = OpenAITextCompletionUserMessage(role="user", content=prompt)
|
||||
new_messages = [message]
|
||||
return super().completion(
|
||||
model_response=model_response,
|
||||
api_key=api_key,
|
||||
model=model,
|
||||
messages=new_messages,
|
||||
timeout=timeout,
|
||||
logging_obj=logging_obj,
|
||||
optional_params=optional_params,
|
||||
print_verbose=print_verbose,
|
||||
api_base=api_base,
|
||||
acompletion=acompletion,
|
||||
litellm_params=litellm_params,
|
||||
logger_fn=logger_fn,
|
||||
client=client,
|
||||
organization=organization,
|
||||
headers=headers,
|
||||
)
|
46
litellm/llms/together_ai/completion/transformation.py
Normal file
46
litellm/llms/together_ai/completion/transformation.py
Normal file
|
@ -0,0 +1,46 @@
|
|||
"""
|
||||
Translates calls from OpenAI's `/v1/completions` endpoint to TogetherAI's `/v1/completions` endpoint.
|
||||
|
||||
Calls done in OpenAI/openai.py as TogetherAI is openai-compatible.
|
||||
|
||||
Docs: https://docs.together.ai/reference/completions-1
|
||||
"""
|
||||
|
||||
from typing import List, Union, cast
|
||||
|
||||
from litellm.llms.OpenAI.completion.utils import is_tokens_or_list_of_tokens
|
||||
from litellm.types.llms.openai import (
|
||||
AllMessageValues,
|
||||
AllPromptValues,
|
||||
OpenAITextCompletionUserMessage,
|
||||
)
|
||||
|
||||
from ...OpenAI.openai import OpenAITextCompletionConfig
|
||||
|
||||
|
||||
class TogetherAITextCompletionConfig(OpenAITextCompletionConfig):
|
||||
def _transform_prompt(
|
||||
self,
|
||||
messages: Union[List[AllMessageValues], List[OpenAITextCompletionUserMessage]],
|
||||
) -> AllPromptValues:
|
||||
"""
|
||||
TogetherAI expects a string prompt.
|
||||
"""
|
||||
initial_prompt: AllPromptValues = super()._transform_prompt(messages)
|
||||
## TOGETHER AI SPECIFIC VALIDATION ##
|
||||
if isinstance(initial_prompt, list) and is_tokens_or_list_of_tokens(
|
||||
value=initial_prompt
|
||||
):
|
||||
raise ValueError("TogetherAI does not support integers as input")
|
||||
if (
|
||||
isinstance(initial_prompt, list)
|
||||
and len(initial_prompt) == 1
|
||||
and isinstance(initial_prompt[0], str)
|
||||
):
|
||||
together_prompt = initial_prompt[0]
|
||||
elif isinstance(initial_prompt, list):
|
||||
raise ValueError("TogetherAI does not support multiple prompts.")
|
||||
else:
|
||||
together_prompt = cast(str, initial_prompt)
|
||||
|
||||
return together_prompt
|
|
@ -112,6 +112,7 @@ from .llms.prompt_templates.factory import (
|
|||
)
|
||||
from .llms.sagemaker.sagemaker import SagemakerLLM
|
||||
from .llms.text_completion_codestral import CodestralTextCompletion
|
||||
from .llms.together_ai.completion.handler import TogetherAITextCompletion
|
||||
from .llms.triton import TritonChatCompletion
|
||||
from .llms.vertex_ai_and_google_ai_studio import (
|
||||
vertex_ai_anthropic,
|
||||
|
@ -168,6 +169,7 @@ openai_o1_chat_completions = OpenAIO1ChatCompletion()
|
|||
openai_audio_transcriptions = OpenAIAudioTranscription()
|
||||
databricks_chat_completions = DatabricksChatCompletion()
|
||||
groq_chat_completions = GroqChatCompletion()
|
||||
together_ai_text_completions = TogetherAITextCompletion()
|
||||
azure_ai_chat_completions = AzureAIChatCompletion()
|
||||
azure_ai_embedding = AzureAIEmbedding()
|
||||
anthropic_chat_completions = AnthropicChatCompletion()
|
||||
|
@ -1285,21 +1287,38 @@ def completion(
|
|||
prompt = " ".join([message["content"] for message in messages]) # type: ignore
|
||||
|
||||
## COMPLETION CALL
|
||||
_response = openai_text_completions.completion(
|
||||
model=model,
|
||||
messages=messages,
|
||||
model_response=model_response,
|
||||
print_verbose=print_verbose,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
acompletion=acompletion,
|
||||
client=client, # pass AsyncOpenAI, OpenAI client
|
||||
logging_obj=logging,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
logger_fn=logger_fn,
|
||||
timeout=timeout, # type: ignore
|
||||
)
|
||||
if custom_llm_provider == "together_ai":
|
||||
_response = together_ai_text_completions.completion(
|
||||
model=model,
|
||||
messages=messages,
|
||||
model_response=model_response,
|
||||
print_verbose=print_verbose,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
acompletion=acompletion,
|
||||
client=client, # pass AsyncOpenAI, OpenAI client
|
||||
logging_obj=logging,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
logger_fn=logger_fn,
|
||||
timeout=timeout, # type: ignore
|
||||
)
|
||||
else:
|
||||
_response = openai_text_completions.completion(
|
||||
model=model,
|
||||
messages=messages,
|
||||
model_response=model_response,
|
||||
print_verbose=print_verbose,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
acompletion=acompletion,
|
||||
client=client, # pass AsyncOpenAI, OpenAI client
|
||||
logging_obj=logging,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
logger_fn=logger_fn,
|
||||
timeout=timeout, # type: ignore
|
||||
)
|
||||
|
||||
if (
|
||||
optional_params.get("stream", False) is False
|
||||
|
|
|
@ -1,61 +1,7 @@
|
|||
model_list:
|
||||
- model_name: fake-claude-endpoint
|
||||
- model_name: whisper
|
||||
litellm_params:
|
||||
model: anthropic.claude-3-sonnet-20240229-v1:0
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app
|
||||
aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY
|
||||
aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID
|
||||
- model_name: gemini-vision
|
||||
litellm_params:
|
||||
model: vertex_ai/gemini-1.0-pro-vision-001
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/v1/projects/adroit-crow-413218/locations/us-central1/publishers/google/models/gemini-1.0-pro-vision-001
|
||||
vertex_project: "adroit-crow-413218"
|
||||
vertex_location: "us-central1"
|
||||
- model_name: fake-azure-endpoint
|
||||
litellm_params:
|
||||
model: openai/429
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app
|
||||
- model_name: fake-openai-endpoint
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app
|
||||
- model_name: o1-preview
|
||||
litellm_params:
|
||||
model: o1-preview
|
||||
- model_name: rerank-english-v3.0
|
||||
litellm_params:
|
||||
model: cohere/rerank-english-v3.0
|
||||
api_key: os.environ/COHERE_API_KEY
|
||||
- model_name: azure-rerank-english-v3.0
|
||||
litellm_params:
|
||||
model: azure_ai/rerank-english-v3.0
|
||||
api_base: os.environ/AZURE_AI_COHERE_API_BASE
|
||||
api_key: os.environ/AZURE_AI_COHERE_API_KEY
|
||||
- model_name: "databricks/*"
|
||||
litellm_params:
|
||||
model: "databricks/*"
|
||||
api_key: os.environ/DATABRICKS_API_KEY
|
||||
api_base: os.environ/DATABRICKS_API_BASE
|
||||
- model_name: "anthropic/*"
|
||||
litellm_params:
|
||||
model: "anthropic/*"
|
||||
- model_name: "*"
|
||||
litellm_params:
|
||||
model: "openai/*"
|
||||
- model_name: "fireworks_ai/*"
|
||||
litellm_params:
|
||||
model: "fireworks_ai/*"
|
||||
configurable_clientside_auth_params: ["api_base"]
|
||||
- model_name: "gemini-flash-experimental"
|
||||
litellm_params:
|
||||
model: "vertex_ai/gemini-flash-experimental"
|
||||
|
||||
litellm_settings:
|
||||
json_logs: true
|
||||
cache: true
|
||||
cache_params:
|
||||
type: "redis"
|
||||
# namespace: "litellm_caching"
|
||||
ttl: 900
|
||||
callbacks: ["batch_redis_requests"]
|
||||
model: whisper-1
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
model_info:
|
||||
mode: audio_transcription
|
|
@ -673,6 +673,9 @@ def run_server(
|
|||
|
||||
import litellm
|
||||
|
||||
# DO NOT DELETE - enables global variables to work across files
|
||||
from litellm.proxy.proxy_server import app # noqa
|
||||
|
||||
if run_gunicorn is False and run_hypercorn is False:
|
||||
if ssl_certfile_path is not None and ssl_keyfile_path is not None:
|
||||
print( # noqa
|
||||
|
|
|
@ -347,12 +347,20 @@ OpenAIMessageContent = Union[
|
|||
str, Iterable[Union[ChatCompletionTextObject, ChatCompletionImageObject]]
|
||||
]
|
||||
|
||||
# The prompt(s) to generate completions for, encoded as a string, array of strings, array of tokens, or array of token arrays.
|
||||
AllPromptValues = Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None]
|
||||
|
||||
|
||||
class OpenAIChatCompletionUserMessage(TypedDict):
|
||||
role: Literal["user"]
|
||||
content: OpenAIMessageContent
|
||||
|
||||
|
||||
class OpenAITextCompletionUserMessage(TypedDict):
|
||||
role: Literal["user"]
|
||||
content: AllPromptValues
|
||||
|
||||
|
||||
class ChatCompletionUserMessage(OpenAIChatCompletionUserMessage, total=False):
|
||||
cache_control: ChatCompletionCachedContent
|
||||
|
||||
|
|
|
@ -80,6 +80,7 @@ from litellm.types.llms.openai import (
|
|||
AllMessageValues,
|
||||
ChatCompletionNamedToolChoiceParam,
|
||||
ChatCompletionToolParam,
|
||||
ChatCompletionToolParamFunctionChunk,
|
||||
)
|
||||
from litellm.types.utils import FileTypes # type: ignore
|
||||
from litellm.types.utils import (
|
||||
|
@ -3360,7 +3361,8 @@ def get_optional_params(
|
|||
supported_params = get_supported_openai_params(
|
||||
model=model, custom_llm_provider=custom_llm_provider
|
||||
)
|
||||
if model in litellm.BEDROCK_CONVERSE_MODELS:
|
||||
base_model = litellm.AmazonConverseConfig()._get_base_model(model)
|
||||
if base_model in litellm.BEDROCK_CONVERSE_MODELS:
|
||||
_check_valid_arg(supported_params=supported_params)
|
||||
optional_params = litellm.AmazonConverseConfig().map_openai_params(
|
||||
model=model,
|
||||
|
@ -9255,3 +9257,24 @@ def process_response_headers(response_headers: Union[httpx.Headers, dict]) -> di
|
|||
**additional_headers,
|
||||
}
|
||||
return additional_headers
|
||||
|
||||
|
||||
def add_dummy_tool(custom_llm_provider: str) -> List[ChatCompletionToolParam]:
|
||||
"""
|
||||
Prevent Anthropic from raising error when tool_use block exists but no tools are provided.
|
||||
|
||||
Relevent Issues: https://github.com/BerriAI/litellm/issues/5388, https://github.com/BerriAI/litellm/issues/5747
|
||||
"""
|
||||
return [
|
||||
ChatCompletionToolParam(
|
||||
type="function",
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name="dummy-tool",
|
||||
description="This is a dummy tool call", # provided to satisfy bedrock constraint.
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {},
|
||||
},
|
||||
),
|
||||
)
|
||||
]
|
||||
|
|
|
@ -75,6 +75,24 @@ def test_bedrock_optional_params_embeddings():
|
|||
assert len(optional_params) == 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
"us.anthropic.claude-3-haiku-20240307-v1:0",
|
||||
"us.meta.llama3-2-11b-instruct-v1:0",
|
||||
"anthropic.claude-3-haiku-20240307-v1:0",
|
||||
],
|
||||
)
|
||||
def test_bedrock_optional_params_completions(model):
|
||||
litellm.drop_params = True
|
||||
optional_params = get_optional_params(
|
||||
model=model, max_tokens=10, temperature=0.1, custom_llm_provider="bedrock"
|
||||
)
|
||||
print(f"optional_params: {optional_params}")
|
||||
assert len(optional_params) == 3
|
||||
assert optional_params == {"maxTokens": 10, "stream": False, "temperature": 0.1}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model, expected_dimensions, dimensions_kwarg",
|
||||
[
|
||||
|
|
104
tests/local_testing/log.txt
Normal file
104
tests/local_testing/log.txt
Normal file
|
@ -0,0 +1,104 @@
|
|||
============================= test session starts ==============================
|
||||
platform darwin -- Python 3.11.4, pytest-8.3.2, pluggy-1.5.0 -- /Users/krrishdholakia/Documents/litellm/myenv/bin/python3.11
|
||||
cachedir: .pytest_cache
|
||||
rootdir: /Users/krrishdholakia/Documents/litellm
|
||||
configfile: pyproject.toml
|
||||
plugins: asyncio-0.23.8, respx-0.21.1, anyio-4.6.0
|
||||
asyncio: mode=Mode.STRICT
|
||||
collecting ... collected 1 item
|
||||
|
||||
test_function_calling.py::test_aaparallel_function_call[claude-3-haiku-20240307] <module 'litellm' from '/Users/krrishdholakia/Documents/litellm/litellm/__init__.py'>
|
||||
|
||||
|
||||
[92mRequest to litellm:[0m
|
||||
[92mlitellm.completion(model='claude-3-haiku-20240307', messages=[{'role': 'user', 'content': "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}], tools=[{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}], tool_choice='auto')[0m
|
||||
|
||||
|
||||
SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
|
||||
Final returned optional params: {'tools': [{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}], 'tool_choice': {'type': 'auto'}}
|
||||
optional_params: {'tools': [{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}], 'tool_choice': {'type': 'auto'}}
|
||||
SENT optional_params: {'tools': [{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}], 'tool_choice': {'type': 'auto'}, 'max_tokens': 4096}
|
||||
tool: {'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}
|
||||
[92m
|
||||
|
||||
POST Request Sent from LiteLLM:
|
||||
curl -X POST \
|
||||
https://api.anthropic.com/v1/messages \
|
||||
-H 'accept: *****' -H 'anthropic-version: *****' -H 'content-type: *****' -H 'x-api-key: sk-ant-api03-bJf1M8qp-JDptRcZRE5ve5efAfSIaL5u-SZ9vItIkvuFcV5cUsd********************************************' -H 'anthropic-beta: *****' \
|
||||
-d '{'messages': [{'role': 'user', 'content': [{'type': 'text', 'text': "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}]}], 'tools': [{'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'input_schema': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}], 'tool_choice': {'type': 'auto'}, 'max_tokens': 4096, 'model': 'claude-3-haiku-20240307'}'
|
||||
[0m
|
||||
|
||||
_is_function_call: False
|
||||
RAW RESPONSE:
|
||||
{"id":"msg_01HRugqzL4WmcxMmbvDheTph","type":"message","role":"assistant","model":"claude-3-haiku-20240307","content":[{"type":"text","text":"Okay, let's check the current weather in those three cities:"},{"type":"tool_use","id":"toolu_016U6G3kpxjHSiJLwVCrrScz","name":"get_current_weather","input":{"location":"San Francisco","unit":"celsius"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":379,"output_tokens":87}}
|
||||
|
||||
|
||||
raw model_response: {"id":"msg_01HRugqzL4WmcxMmbvDheTph","type":"message","role":"assistant","model":"claude-3-haiku-20240307","content":[{"type":"text","text":"Okay, let's check the current weather in those three cities:"},{"type":"tool_use","id":"toolu_016U6G3kpxjHSiJLwVCrrScz","name":"get_current_weather","input":{"location":"San Francisco","unit":"celsius"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":379,"output_tokens":87}}
|
||||
Logging Details LiteLLM-Success Call: Cache_hit=None
|
||||
Looking up model=claude-3-haiku-20240307 in model_cost_map
|
||||
Looking up model=claude-3-haiku-20240307 in model_cost_map
|
||||
Response
|
||||
ModelResponse(id='chatcmpl-7222f6c2-962a-4776-8639-576723466cb7', choices=[Choices(finish_reason='tool_calls', index=0, message=Message(content="Okay, let's check the current weather in those three cities:", role='assistant', tool_calls=[ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "San Francisco", "unit": "celsius"}', name='get_current_weather'), id='toolu_016U6G3kpxjHSiJLwVCrrScz', type='function')], function_call=None))], created=1727897483, model='claude-3-haiku-20240307', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=87, prompt_tokens=379, total_tokens=466, completion_tokens_details=None))
|
||||
length of tool calls 1
|
||||
Expecting there to be 3 tool calls
|
||||
tool_calls: [ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "San Francisco", "unit": "celsius"}', name='get_current_weather'), id='toolu_016U6G3kpxjHSiJLwVCrrScz', type='function')]
|
||||
Response message
|
||||
Message(content="Okay, let's check the current weather in those three cities:", role='assistant', tool_calls=[ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "San Francisco", "unit": "celsius"}', name='get_current_weather'), id='toolu_016U6G3kpxjHSiJLwVCrrScz', type='function')], function_call=None)
|
||||
messages: [{'role': 'user', 'content': "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}, Message(content="Okay, let's check the current weather in those three cities:", role='assistant', tool_calls=[ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "San Francisco", "unit": "celsius"}', name='get_current_weather'), id='toolu_016U6G3kpxjHSiJLwVCrrScz', type='function')], function_call=None), {'tool_call_id': 'toolu_016U6G3kpxjHSiJLwVCrrScz', 'role': 'tool', 'name': 'get_current_weather', 'content': '{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}'}]
|
||||
|
||||
|
||||
[92mRequest to litellm:[0m
|
||||
[92mlitellm.completion(model='claude-3-haiku-20240307', messages=[{'role': 'user', 'content': "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}, Message(content="Okay, let's check the current weather in those three cities:", role='assistant', tool_calls=[ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "San Francisco", "unit": "celsius"}', name='get_current_weather'), id='toolu_016U6G3kpxjHSiJLwVCrrScz', type='function')], function_call=None), {'tool_call_id': 'toolu_016U6G3kpxjHSiJLwVCrrScz', 'role': 'tool', 'name': 'get_current_weather', 'content': '{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}'}], temperature=0.2, seed=22, drop_params=True)[0m
|
||||
|
||||
|
||||
SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
|
||||
Final returned optional params: {'temperature': 0.2, 'tools': [{'type': 'function', 'function': {'name': 'dummy-tool', 'description': '', 'parameters': {'type': 'object', 'properties': {}}}}]}
|
||||
optional_params: {'temperature': 0.2, 'tools': [{'type': 'function', 'function': {'name': 'dummy-tool', 'description': '', 'parameters': {'type': 'object', 'properties': {}}}}]}
|
||||
SENT optional_params: {'temperature': 0.2, 'tools': [{'type': 'function', 'function': {'name': 'dummy-tool', 'description': '', 'parameters': {'type': 'object', 'properties': {}}}}], 'max_tokens': 4096}
|
||||
tool: {'type': 'function', 'function': {'name': 'dummy-tool', 'description': '', 'parameters': {'type': 'object', 'properties': {}}}}
|
||||
[92m
|
||||
|
||||
POST Request Sent from LiteLLM:
|
||||
curl -X POST \
|
||||
https://api.anthropic.com/v1/messages \
|
||||
-H 'accept: *****' -H 'anthropic-version: *****' -H 'content-type: *****' -H 'x-api-key: sk-ant-api03-bJf1M8qp-JDptRcZRE5ve5efAfSIaL5u-SZ9vItIkvuFcV5cUsd********************************************' -H 'anthropic-beta: *****' \
|
||||
-d '{'messages': [{'role': 'user', 'content': [{'type': 'text', 'text': "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}]}, {'role': 'assistant', 'content': [{'type': 'tool_use', 'id': 'toolu_016U6G3kpxjHSiJLwVCrrScz', 'name': 'get_current_weather', 'input': {'location': 'San Francisco', 'unit': 'celsius'}}]}, {'role': 'user', 'content': [{'type': 'tool_result', 'tool_use_id': 'toolu_016U6G3kpxjHSiJLwVCrrScz', 'content': '{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}'}]}], 'temperature': 0.2, 'tools': [{'name': 'dummy-tool', 'description': '', 'input_schema': {'type': 'object', 'properties': {}}}], 'max_tokens': 4096, 'model': 'claude-3-haiku-20240307'}'
|
||||
[0m
|
||||
|
||||
_is_function_call: False
|
||||
RAW RESPONSE:
|
||||
{"id":"msg_01Wp8NVScugz6yAGsmB5trpZ","type":"message","role":"assistant","model":"claude-3-haiku-20240307","content":[{"type":"text","text":"The current weather in San Francisco is 72°F (22°C)."},{"type":"tool_use","id":"toolu_01HTXEYDX4MspM76STtJqs1n","name":"get_current_weather","input":{"location":"Tokyo","unit":"celsius"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":426,"output_tokens":90}}
|
||||
|
||||
|
||||
raw model_response: {"id":"msg_01Wp8NVScugz6yAGsmB5trpZ","type":"message","role":"assistant","model":"claude-3-haiku-20240307","content":[{"type":"text","text":"The current weather in San Francisco is 72°F (22°C)."},{"type":"tool_use","id":"toolu_01HTXEYDX4MspM76STtJqs1n","name":"get_current_weather","input":{"location":"Tokyo","unit":"celsius"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":426,"output_tokens":90}}
|
||||
Logging Details LiteLLM-Success Call: Cache_hit=None
|
||||
Looking up model=claude-3-haiku-20240307 in model_cost_map
|
||||
Looking up model=claude-3-haiku-20240307 in model_cost_map
|
||||
second response
|
||||
ModelResponse(id='chatcmpl-c4ed5c25-ba7c-49e5-a6be-5720ab25fff0', choices=[Choices(finish_reason='tool_calls', index=0, message=Message(content='The current weather in San Francisco is 72°F (22°C).', role='assistant', tool_calls=[ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "Tokyo", "unit": "celsius"}', name='get_current_weather'), id='toolu_01HTXEYDX4MspM76STtJqs1n', type='function')], function_call=None))], created=1727897484, model='claude-3-haiku-20240307', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=90, prompt_tokens=426, total_tokens=516, completion_tokens_details=None))
|
||||
PASSED
|
||||
|
||||
=============================== warnings summary ===============================
|
||||
../../myenv/lib/python3.11/site-packages/pydantic/_internal/_config.py:284
|
||||
/Users/krrishdholakia/Documents/litellm/myenv/lib/python3.11/site-packages/pydantic/_internal/_config.py:284: PydanticDeprecatedSince20: Support for class-based `config` is deprecated, use ConfigDict instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
|
||||
warnings.warn(DEPRECATION_MESSAGE, DeprecationWarning)
|
||||
|
||||
../../litellm/utils.py:17
|
||||
/Users/krrishdholakia/Documents/litellm/litellm/utils.py:17: DeprecationWarning: 'imghdr' is deprecated and slated for removal in Python 3.13
|
||||
import imghdr
|
||||
|
||||
../../litellm/utils.py:124
|
||||
/Users/krrishdholakia/Documents/litellm/litellm/utils.py:124: DeprecationWarning: open_text is deprecated. Use files() instead. Refer to https://importlib-resources.readthedocs.io/en/latest/using.html#migrating-from-legacy for migration advice.
|
||||
with resources.open_text("litellm.llms.tokenizers", "anthropic_tokenizer.json") as f:
|
||||
|
||||
test_function_calling.py:56
|
||||
/Users/krrishdholakia/Documents/litellm/tests/local_testing/test_function_calling.py:56: PytestUnknownMarkWarning: Unknown pytest.mark.flaky - is this a typo? You can register custom marks to avoid this warning - for details, see https://docs.pytest.org/en/stable/how-to/mark.html
|
||||
@pytest.mark.flaky(retries=3, delay=1)
|
||||
|
||||
tests/local_testing/test_function_calling.py::test_aaparallel_function_call[claude-3-haiku-20240307]
|
||||
tests/local_testing/test_function_calling.py::test_aaparallel_function_call[claude-3-haiku-20240307]
|
||||
/Users/krrishdholakia/Documents/litellm/myenv/lib/python3.11/site-packages/httpx/_content.py:202: DeprecationWarning: Use 'content=<...>' to upload raw bytes/text content.
|
||||
warnings.warn(message, DeprecationWarning)
|
||||
|
||||
-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
|
||||
======================== 1 passed, 6 warnings in 1.89s =========================
|
|
@ -47,16 +47,17 @@ def get_current_weather(location, unit="fahrenheit"):
|
|||
[
|
||||
"gpt-3.5-turbo-1106",
|
||||
# "mistral/mistral-large-latest",
|
||||
# "claude-3-haiku-20240307",
|
||||
# "gemini/gemini-1.5-pro",
|
||||
"claude-3-haiku-20240307",
|
||||
"gemini/gemini-1.5-pro",
|
||||
"anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"groq/llama3-8b-8192",
|
||||
# "groq/llama3-8b-8192",
|
||||
],
|
||||
)
|
||||
@pytest.mark.flaky(retries=3, delay=1)
|
||||
def test_aaparallel_function_call(model):
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
litellm.modify_params = True
|
||||
# Step 1: send the conversation and available functions to the model
|
||||
messages = [
|
||||
{
|
||||
|
@ -97,7 +98,6 @@ def test_aaparallel_function_call(model):
|
|||
response_message = response.choices[0].message
|
||||
tool_calls = response_message.tool_calls
|
||||
|
||||
print("length of tool calls", len(tool_calls))
|
||||
print("Expecting there to be 3 tool calls")
|
||||
assert (
|
||||
len(tool_calls) > 0
|
||||
|
@ -141,7 +141,7 @@ def test_aaparallel_function_call(model):
|
|||
messages=messages,
|
||||
temperature=0.2,
|
||||
seed=22,
|
||||
tools=tools,
|
||||
# tools=tools,
|
||||
drop_params=True,
|
||||
) # get a new response from the model where it can see the function response
|
||||
print("second response\n", second_response)
|
||||
|
@ -445,3 +445,29 @@ def test_groq_parallel_function_call():
|
|||
print("second response\n", second_response)
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
"anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"claude-3-haiku-20240307",
|
||||
],
|
||||
)
|
||||
def test_anthropic_function_call_with_no_schema(model):
|
||||
"""
|
||||
Relevant Issue: https://github.com/BerriAI/litellm/issues/6012
|
||||
"""
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather in New York",
|
||||
},
|
||||
}
|
||||
]
|
||||
messages = [
|
||||
{"role": "user", "content": "What is the current temperature in New York?"}
|
||||
]
|
||||
completion(model=model, messages=messages, tools=tools, tool_choice="auto")
|
||||
|
|
|
@ -4019,7 +4019,7 @@ def test_async_text_completion():
|
|||
asyncio.run(test_get_response())
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Skip flaky tgai test")
|
||||
@pytest.mark.flaky(retries=6, delay=1)
|
||||
def test_async_text_completion_together_ai():
|
||||
litellm.set_verbose = True
|
||||
print("test_async_text_completion")
|
||||
|
@ -4032,6 +4032,8 @@ def test_async_text_completion_together_ai():
|
|||
max_tokens=10,
|
||||
)
|
||||
print(f"response: {response}")
|
||||
except litellm.RateLimitError as e:
|
||||
print(e)
|
||||
except litellm.Timeout as e:
|
||||
print(e)
|
||||
except Exception as e:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue