forked from phoenix/litellm-mirror
LiteLLM Minor Fixes & Improvements (09/27/2024) (#5938)
* fix(langfuse.py): prevent double logging requester metadata Fixes https://github.com/BerriAI/litellm/issues/5935 * build(model_prices_and_context_window.json): add mistral pixtral cost tracking Closes https://github.com/BerriAI/litellm/issues/5837 * handle streaming for azure ai studio error * [Perf Proxy] parallel request limiter - use one cache update call (#5932) * fix parallel request limiter - use one cache update call * ci/cd run again * run ci/cd again * use docker username password * fix config.yml * fix config * fix config * fix config.yml * ci/cd run again * use correct typing for batch set cache * fix async_set_cache_pipeline * fix only check user id tpm / rpm limits when limits set * fix test_openai_azure_embedding_with_oidc_and_cf * fix(groq/chat/transformation.py): Fixes https://github.com/BerriAI/litellm/issues/5839 * feat(anthropic/chat.py): return 'retry-after' headers from anthropic Fixes https://github.com/BerriAI/litellm/issues/4387 * feat: raise validation error if message has tool calls without passing `tools` param for anthropic/bedrock Closes https://github.com/BerriAI/litellm/issues/5747 * [Feature]#5940, add max_workers parameter for the batch_completion (#5947) * handle streaming for azure ai studio error * bump: version 1.48.2 → 1.48.3 * docs(data_security.md): add legal/compliance faq's Make it easier for companies to use litellm * docs: resolve imports * [Feature]#5940, add max_workers parameter for the batch_completion method --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Krrish Dholakia <krrishdholakia@gmail.com> Co-authored-by: josearangos <josearangos@Joses-MacBook-Pro.local> * fix(converse_transformation.py): fix default message value * fix(utils.py): fix get_model_info to handle finetuned models Fixes issue for standard logging payloads, where model_map_value was null for finetuned openai models * fix(litellm_pre_call_utils.py): add debug statement for data sent after updating with team/key callbacks * fix: fix linting errors * fix(anthropic/chat/handler.py): fix cache creation input tokens * fix(exception_mapping_utils.py): fix missing imports * fix(anthropic/chat/handler.py): fix usage block translation * test: fix test * test: fix tests * style(types/utils.py): trigger new build * test: fix test --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Jose Alberto Arango Sanchez <jose.arangos@udea.edu.co> Co-authored-by: josearangos <josearangos@Joses-MacBook-Pro.local>
This commit is contained in:
parent
754981a78f
commit
0b30e212da
35 changed files with 3657 additions and 2820 deletions
|
@ -89,6 +89,7 @@ retry = True
|
||||||
### AUTH ###
|
### AUTH ###
|
||||||
api_key: Optional[str] = None
|
api_key: Optional[str] = None
|
||||||
openai_key: Optional[str] = None
|
openai_key: Optional[str] = None
|
||||||
|
groq_key: Optional[str] = None
|
||||||
databricks_key: Optional[str] = None
|
databricks_key: Optional[str] = None
|
||||||
azure_key: Optional[str] = None
|
azure_key: Optional[str] = None
|
||||||
anthropic_key: Optional[str] = None
|
anthropic_key: Optional[str] = None
|
||||||
|
@ -892,7 +893,11 @@ ALL_LITELLM_RESPONSE_TYPES = [
|
||||||
from .types.utils import ImageObject
|
from .types.utils import ImageObject
|
||||||
from .llms.custom_llm import CustomLLM
|
from .llms.custom_llm import CustomLLM
|
||||||
from .llms.huggingface_restapi import HuggingfaceConfig
|
from .llms.huggingface_restapi import HuggingfaceConfig
|
||||||
from .llms.anthropic.chat import AnthropicConfig
|
from .llms.anthropic.chat.handler import AnthropicConfig
|
||||||
|
from .llms.anthropic.experimental_pass_through.transformation import (
|
||||||
|
AnthropicExperimentalPassThroughConfig,
|
||||||
|
)
|
||||||
|
from .llms.groq.stt.transformation import GroqSTTConfig
|
||||||
from .llms.anthropic.completion import AnthropicTextConfig
|
from .llms.anthropic.completion import AnthropicTextConfig
|
||||||
from .llms.databricks.chat import DatabricksConfig, DatabricksEmbeddingConfig
|
from .llms.databricks.chat import DatabricksConfig, DatabricksEmbeddingConfig
|
||||||
from .llms.predibase import PredibaseConfig
|
from .llms.predibase import PredibaseConfig
|
||||||
|
@ -962,8 +967,8 @@ from .llms.OpenAI.openai import (
|
||||||
OpenAITextCompletionConfig,
|
OpenAITextCompletionConfig,
|
||||||
MistralEmbeddingConfig,
|
MistralEmbeddingConfig,
|
||||||
DeepInfraConfig,
|
DeepInfraConfig,
|
||||||
GroqConfig,
|
|
||||||
)
|
)
|
||||||
|
from .llms.groq.chat.transformation import GroqChatConfig
|
||||||
from .llms.azure_ai.chat.transformation import AzureAIStudioConfig
|
from .llms.azure_ai.chat.transformation import AzureAIStudioConfig
|
||||||
from .llms.mistral.mistral_chat_transformation import MistralConfig
|
from .llms.mistral.mistral_chat_transformation import MistralConfig
|
||||||
from .llms.OpenAI.chat.o1_transformation import (
|
from .llms.OpenAI.chat.o1_transformation import (
|
||||||
|
|
|
@ -34,7 +34,7 @@ class AnthropicAdapter(CustomLogger):
|
||||||
"""
|
"""
|
||||||
request_body = AnthropicMessagesRequest(**kwargs) # type: ignore
|
request_body = AnthropicMessagesRequest(**kwargs) # type: ignore
|
||||||
|
|
||||||
translated_body = litellm.AnthropicConfig().translate_anthropic_to_openai(
|
translated_body = litellm.AnthropicExperimentalPassThroughConfig().translate_anthropic_to_openai(
|
||||||
anthropic_message_request=request_body
|
anthropic_message_request=request_body
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -44,7 +44,7 @@ class AnthropicAdapter(CustomLogger):
|
||||||
self, response: litellm.ModelResponse
|
self, response: litellm.ModelResponse
|
||||||
) -> Optional[AnthropicResponse]:
|
) -> Optional[AnthropicResponse]:
|
||||||
|
|
||||||
return litellm.AnthropicConfig().translate_openai_response_to_anthropic(
|
return litellm.AnthropicExperimentalPassThroughConfig().translate_openai_response_to_anthropic(
|
||||||
response=response
|
response=response
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -99,7 +99,7 @@ class AnthropicStreamWrapper(AdapterCompletionStreamWrapper):
|
||||||
if chunk == "None" or chunk is None:
|
if chunk == "None" or chunk is None:
|
||||||
raise Exception
|
raise Exception
|
||||||
|
|
||||||
processed_chunk = litellm.AnthropicConfig().translate_streaming_openai_response_to_anthropic(
|
processed_chunk = litellm.AnthropicExperimentalPassThroughConfig().translate_streaming_openai_response_to_anthropic(
|
||||||
response=chunk
|
response=chunk
|
||||||
)
|
)
|
||||||
if (
|
if (
|
||||||
|
@ -163,7 +163,7 @@ class AnthropicStreamWrapper(AdapterCompletionStreamWrapper):
|
||||||
async for chunk in self.completion_stream:
|
async for chunk in self.completion_stream:
|
||||||
if chunk == "None" or chunk is None:
|
if chunk == "None" or chunk is None:
|
||||||
raise Exception
|
raise Exception
|
||||||
processed_chunk = litellm.AnthropicConfig().translate_streaming_openai_response_to_anthropic(
|
processed_chunk = litellm.AnthropicExperimentalPassThroughConfig().translate_streaming_openai_response_to_anthropic(
|
||||||
response=chunk
|
response=chunk
|
||||||
)
|
)
|
||||||
if (
|
if (
|
||||||
|
|
|
@ -601,7 +601,7 @@ class LangFuseLogger:
|
||||||
"input": input if not mask_input else "redacted-by-litellm",
|
"input": input if not mask_input else "redacted-by-litellm",
|
||||||
"output": output if not mask_output else "redacted-by-litellm",
|
"output": output if not mask_output else "redacted-by-litellm",
|
||||||
"usage": usage,
|
"usage": usage,
|
||||||
"metadata": clean_metadata,
|
"metadata": log_requester_metadata(clean_metadata),
|
||||||
"level": level,
|
"level": level,
|
||||||
"version": clean_metadata.pop("version", None),
|
"version": clean_metadata.pop("version", None),
|
||||||
}
|
}
|
||||||
|
@ -768,3 +768,15 @@ def log_provider_specific_information_as_span(
|
||||||
name="vertex_ai_grounding_metadata",
|
name="vertex_ai_grounding_metadata",
|
||||||
input=vertex_ai_grounding_metadata,
|
input=vertex_ai_grounding_metadata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def log_requester_metadata(clean_metadata: dict):
|
||||||
|
returned_metadata = {}
|
||||||
|
requester_metadata = clean_metadata.get("requester_metadata") or {}
|
||||||
|
for k, v in clean_metadata.items():
|
||||||
|
if k not in requester_metadata:
|
||||||
|
returned_metadata[k] = v
|
||||||
|
|
||||||
|
returned_metadata.update({"requester_metadata": requester_metadata})
|
||||||
|
|
||||||
|
return returned_metadata
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1015,9 +1015,8 @@ class Logging:
|
||||||
!= langFuseLogger.public_key
|
!= langFuseLogger.public_key
|
||||||
)
|
)
|
||||||
or (
|
or (
|
||||||
self.langfuse_public_key is not None
|
self.langfuse_secret is not None
|
||||||
and self.langfuse_public_key
|
and self.langfuse_secret != langFuseLogger.secret_key
|
||||||
!= langFuseLogger.public_key
|
|
||||||
)
|
)
|
||||||
or (
|
or (
|
||||||
self.langfuse_host is not None
|
self.langfuse_host is not None
|
||||||
|
@ -1045,7 +1044,6 @@ class Logging:
|
||||||
service_name="langfuse",
|
service_name="langfuse",
|
||||||
logging_obj=temp_langfuse_logger,
|
logging_obj=temp_langfuse_logger,
|
||||||
)
|
)
|
||||||
|
|
||||||
if temp_langfuse_logger is not None:
|
if temp_langfuse_logger is not None:
|
||||||
_response = temp_langfuse_logger.log_event(
|
_response = temp_langfuse_logger.log_event(
|
||||||
kwargs=kwargs,
|
kwargs=kwargs,
|
||||||
|
|
|
@ -220,104 +220,6 @@ class DeepInfraConfig:
|
||||||
return optional_params
|
return optional_params
|
||||||
|
|
||||||
|
|
||||||
class GroqConfig:
|
|
||||||
"""
|
|
||||||
Reference: https://deepinfra.com/docs/advanced/openai_api
|
|
||||||
|
|
||||||
The class `DeepInfra` provides configuration for the DeepInfra's Chat Completions API interface. Below are the parameters:
|
|
||||||
"""
|
|
||||||
|
|
||||||
frequency_penalty: Optional[int] = None
|
|
||||||
function_call: Optional[Union[str, dict]] = None
|
|
||||||
functions: Optional[list] = None
|
|
||||||
logit_bias: Optional[dict] = None
|
|
||||||
max_tokens: Optional[int] = None
|
|
||||||
n: Optional[int] = None
|
|
||||||
presence_penalty: Optional[int] = None
|
|
||||||
stop: Optional[Union[str, list]] = None
|
|
||||||
temperature: Optional[int] = None
|
|
||||||
top_p: Optional[int] = None
|
|
||||||
response_format: Optional[dict] = None
|
|
||||||
tools: Optional[list] = None
|
|
||||||
tool_choice: Optional[Union[str, dict]] = None
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
frequency_penalty: Optional[int] = None,
|
|
||||||
function_call: Optional[Union[str, dict]] = None,
|
|
||||||
functions: Optional[list] = None,
|
|
||||||
logit_bias: Optional[dict] = None,
|
|
||||||
max_tokens: Optional[int] = None,
|
|
||||||
n: Optional[int] = None,
|
|
||||||
presence_penalty: Optional[int] = None,
|
|
||||||
stop: Optional[Union[str, list]] = None,
|
|
||||||
temperature: Optional[int] = None,
|
|
||||||
top_p: Optional[int] = None,
|
|
||||||
response_format: Optional[dict] = None,
|
|
||||||
tools: Optional[list] = None,
|
|
||||||
tool_choice: Optional[Union[str, dict]] = None,
|
|
||||||
) -> None:
|
|
||||||
locals_ = locals().copy()
|
|
||||||
for key, value in locals_.items():
|
|
||||||
if key != "self" and value is not None:
|
|
||||||
setattr(self.__class__, key, value)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def get_config(cls):
|
|
||||||
return {
|
|
||||||
k: v
|
|
||||||
for k, v in cls.__dict__.items()
|
|
||||||
if not k.startswith("__")
|
|
||||||
and not isinstance(
|
|
||||||
v,
|
|
||||||
(
|
|
||||||
types.FunctionType,
|
|
||||||
types.BuiltinFunctionType,
|
|
||||||
classmethod,
|
|
||||||
staticmethod,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
and v is not None
|
|
||||||
}
|
|
||||||
|
|
||||||
def get_supported_openai_params_stt(self):
|
|
||||||
return [
|
|
||||||
"prompt",
|
|
||||||
"response_format",
|
|
||||||
"temperature",
|
|
||||||
"language",
|
|
||||||
]
|
|
||||||
|
|
||||||
def get_supported_openai_response_formats_stt(self) -> List[str]:
|
|
||||||
return ["json", "verbose_json", "text"]
|
|
||||||
|
|
||||||
def map_openai_params_stt(
|
|
||||||
self,
|
|
||||||
non_default_params: dict,
|
|
||||||
optional_params: dict,
|
|
||||||
model: str,
|
|
||||||
drop_params: bool,
|
|
||||||
) -> dict:
|
|
||||||
response_formats = self.get_supported_openai_response_formats_stt()
|
|
||||||
for param, value in non_default_params.items():
|
|
||||||
if param == "response_format":
|
|
||||||
if value in response_formats:
|
|
||||||
optional_params[param] = value
|
|
||||||
else:
|
|
||||||
if litellm.drop_params is True or drop_params is True:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
raise litellm.utils.UnsupportedParamsError(
|
|
||||||
message="Groq doesn't support response_format={}. To drop unsupported openai params from the call, set `litellm.drop_params = True`".format(
|
|
||||||
value
|
|
||||||
),
|
|
||||||
status_code=400,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
optional_params[param] = value
|
|
||||||
return optional_params
|
|
||||||
|
|
||||||
|
|
||||||
class OpenAIConfig:
|
class OpenAIConfig:
|
||||||
"""
|
"""
|
||||||
Reference: https://platform.openai.com/docs/api-reference/chat/create
|
Reference: https://platform.openai.com/docs/api-reference/chat/create
|
||||||
|
|
1
litellm/llms/anthropic/chat/__init__.py
Normal file
1
litellm/llms/anthropic/chat/__init__.py
Normal file
|
@ -0,0 +1 @@
|
||||||
|
from .handler import AnthropicChatCompletion, ModelResponseIterator
|
|
@ -71,12 +71,19 @@ from litellm.types.llms.openai import (
|
||||||
ChatCompletionToolParamFunctionChunk,
|
ChatCompletionToolParamFunctionChunk,
|
||||||
ChatCompletionUsageBlock,
|
ChatCompletionUsageBlock,
|
||||||
ChatCompletionUserMessage,
|
ChatCompletionUserMessage,
|
||||||
|
OpenAIMessageContent,
|
||||||
)
|
)
|
||||||
from litellm.types.utils import Choices, GenericStreamingChunk
|
from litellm.types.utils import Choices, GenericStreamingChunk
|
||||||
from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
|
from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
|
||||||
|
|
||||||
from ..base import BaseLLM
|
from ...base import BaseLLM
|
||||||
from ..prompt_templates.factory import custom_prompt, prompt_factory
|
from ...prompt_templates.factory import (
|
||||||
|
anthropic_messages_pt,
|
||||||
|
custom_prompt,
|
||||||
|
prompt_factory,
|
||||||
|
)
|
||||||
|
from ..common_utils import AnthropicError
|
||||||
|
from .transformation import AnthropicConfig
|
||||||
|
|
||||||
|
|
||||||
class AnthropicConstants(Enum):
|
class AnthropicConstants(Enum):
|
||||||
|
@ -86,558 +93,6 @@ class AnthropicConstants(Enum):
|
||||||
# constants from https://github.com/anthropics/anthropic-sdk-python/blob/main/src/anthropic/_constants.py
|
# constants from https://github.com/anthropics/anthropic-sdk-python/blob/main/src/anthropic/_constants.py
|
||||||
|
|
||||||
|
|
||||||
class AnthropicError(Exception):
|
|
||||||
def __init__(self, status_code: int, message):
|
|
||||||
self.status_code = status_code
|
|
||||||
self.message: str = message
|
|
||||||
self.request = httpx.Request(
|
|
||||||
method="POST", url="https://api.anthropic.com/v1/messages"
|
|
||||||
)
|
|
||||||
self.response = httpx.Response(status_code=status_code, request=self.request)
|
|
||||||
super().__init__(
|
|
||||||
self.message
|
|
||||||
) # Call the base class constructor with the parameters it needs
|
|
||||||
|
|
||||||
|
|
||||||
class AnthropicConfig:
|
|
||||||
"""
|
|
||||||
Reference: https://docs.anthropic.com/claude/reference/messages_post
|
|
||||||
|
|
||||||
to pass metadata to anthropic, it's {"user_id": "any-relevant-information"}
|
|
||||||
"""
|
|
||||||
|
|
||||||
max_tokens: Optional[int] = (
|
|
||||||
4096 # anthropic requires a default value (Opus, Sonnet, and Haiku have the same default)
|
|
||||||
)
|
|
||||||
stop_sequences: Optional[list] = None
|
|
||||||
temperature: Optional[int] = None
|
|
||||||
top_p: Optional[int] = None
|
|
||||||
top_k: Optional[int] = None
|
|
||||||
metadata: Optional[dict] = None
|
|
||||||
system: Optional[str] = None
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
max_tokens: Optional[
|
|
||||||
int
|
|
||||||
] = 4096, # You can pass in a value yourself or use the default value 4096
|
|
||||||
stop_sequences: Optional[list] = None,
|
|
||||||
temperature: Optional[int] = None,
|
|
||||||
top_p: Optional[int] = None,
|
|
||||||
top_k: Optional[int] = None,
|
|
||||||
metadata: Optional[dict] = None,
|
|
||||||
system: Optional[str] = None,
|
|
||||||
) -> None:
|
|
||||||
locals_ = locals()
|
|
||||||
for key, value in locals_.items():
|
|
||||||
if key != "self" and value is not None:
|
|
||||||
setattr(self.__class__, key, value)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def get_config(cls):
|
|
||||||
return {
|
|
||||||
k: v
|
|
||||||
for k, v in cls.__dict__.items()
|
|
||||||
if not k.startswith("__")
|
|
||||||
and not isinstance(
|
|
||||||
v,
|
|
||||||
(
|
|
||||||
types.FunctionType,
|
|
||||||
types.BuiltinFunctionType,
|
|
||||||
classmethod,
|
|
||||||
staticmethod,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
and v is not None
|
|
||||||
}
|
|
||||||
|
|
||||||
def get_supported_openai_params(self):
|
|
||||||
return [
|
|
||||||
"stream",
|
|
||||||
"stop",
|
|
||||||
"temperature",
|
|
||||||
"top_p",
|
|
||||||
"max_tokens",
|
|
||||||
"max_completion_tokens",
|
|
||||||
"tools",
|
|
||||||
"tool_choice",
|
|
||||||
"extra_headers",
|
|
||||||
]
|
|
||||||
|
|
||||||
def get_cache_control_headers(self) -> dict:
|
|
||||||
return {
|
|
||||||
"anthropic-version": "2023-06-01",
|
|
||||||
"anthropic-beta": "prompt-caching-2024-07-31",
|
|
||||||
}
|
|
||||||
|
|
||||||
def map_openai_params(self, non_default_params: dict, optional_params: dict):
|
|
||||||
for param, value in non_default_params.items():
|
|
||||||
if param == "max_tokens":
|
|
||||||
optional_params["max_tokens"] = value
|
|
||||||
if param == "max_completion_tokens":
|
|
||||||
optional_params["max_tokens"] = value
|
|
||||||
if param == "tools":
|
|
||||||
optional_params["tools"] = value
|
|
||||||
if param == "tool_choice":
|
|
||||||
_tool_choice: Optional[AnthropicMessagesToolChoice] = None
|
|
||||||
if value == "auto":
|
|
||||||
_tool_choice = {"type": "auto"}
|
|
||||||
elif value == "required":
|
|
||||||
_tool_choice = {"type": "any"}
|
|
||||||
elif isinstance(value, dict):
|
|
||||||
_tool_choice = {"type": "tool", "name": value["function"]["name"]}
|
|
||||||
|
|
||||||
if _tool_choice is not None:
|
|
||||||
optional_params["tool_choice"] = _tool_choice
|
|
||||||
if param == "stream" and value == True:
|
|
||||||
optional_params["stream"] = value
|
|
||||||
if param == "stop":
|
|
||||||
if isinstance(value, str):
|
|
||||||
if (
|
|
||||||
value == "\n"
|
|
||||||
) and litellm.drop_params == True: # anthropic doesn't allow whitespace characters as stop-sequences
|
|
||||||
continue
|
|
||||||
value = [value]
|
|
||||||
elif isinstance(value, list):
|
|
||||||
new_v = []
|
|
||||||
for v in value:
|
|
||||||
if (
|
|
||||||
v == "\n"
|
|
||||||
) and litellm.drop_params == True: # anthropic doesn't allow whitespace characters as stop-sequences
|
|
||||||
continue
|
|
||||||
new_v.append(v)
|
|
||||||
if len(new_v) > 0:
|
|
||||||
value = new_v
|
|
||||||
else:
|
|
||||||
continue
|
|
||||||
optional_params["stop_sequences"] = value
|
|
||||||
if param == "temperature":
|
|
||||||
optional_params["temperature"] = value
|
|
||||||
if param == "top_p":
|
|
||||||
optional_params["top_p"] = value
|
|
||||||
return optional_params
|
|
||||||
|
|
||||||
def is_cache_control_set(self, messages: List[AllMessageValues]) -> bool:
|
|
||||||
"""
|
|
||||||
Return if {"cache_control": ..} in message content block
|
|
||||||
|
|
||||||
Used to check if anthropic prompt caching headers need to be set.
|
|
||||||
"""
|
|
||||||
for message in messages:
|
|
||||||
if message["content"] is not None and isinstance(message["content"], list):
|
|
||||||
for content in message["content"]:
|
|
||||||
if "cache_control" in content:
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
def translate_system_message(
|
|
||||||
self, messages: List[AllMessageValues]
|
|
||||||
) -> List[AnthropicSystemMessageContent]:
|
|
||||||
system_prompt_indices = []
|
|
||||||
anthropic_system_message_list: List[AnthropicSystemMessageContent] = []
|
|
||||||
for idx, message in enumerate(messages):
|
|
||||||
if message["role"] == "system":
|
|
||||||
valid_content: bool = False
|
|
||||||
system_message_block = ChatCompletionSystemMessage(**message)
|
|
||||||
if isinstance(system_message_block["content"], str):
|
|
||||||
anthropic_system_message_content = AnthropicSystemMessageContent(
|
|
||||||
type="text",
|
|
||||||
text=system_message_block["content"],
|
|
||||||
)
|
|
||||||
if "cache_control" in system_message_block:
|
|
||||||
anthropic_system_message_content["cache_control"] = (
|
|
||||||
system_message_block["cache_control"]
|
|
||||||
)
|
|
||||||
anthropic_system_message_list.append(
|
|
||||||
anthropic_system_message_content
|
|
||||||
)
|
|
||||||
valid_content = True
|
|
||||||
elif isinstance(message["content"], list):
|
|
||||||
for _content in message["content"]:
|
|
||||||
anthropic_system_message_content = (
|
|
||||||
AnthropicSystemMessageContent(
|
|
||||||
type=_content.get("type"),
|
|
||||||
text=_content.get("text"),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
if "cache_control" in _content:
|
|
||||||
anthropic_system_message_content["cache_control"] = (
|
|
||||||
_content["cache_control"]
|
|
||||||
)
|
|
||||||
|
|
||||||
anthropic_system_message_list.append(
|
|
||||||
anthropic_system_message_content
|
|
||||||
)
|
|
||||||
valid_content = True
|
|
||||||
|
|
||||||
if valid_content:
|
|
||||||
system_prompt_indices.append(idx)
|
|
||||||
if len(system_prompt_indices) > 0:
|
|
||||||
for idx in reversed(system_prompt_indices):
|
|
||||||
messages.pop(idx)
|
|
||||||
|
|
||||||
return anthropic_system_message_list
|
|
||||||
|
|
||||||
### FOR [BETA] `/v1/messages` endpoint support
|
|
||||||
|
|
||||||
def translatable_anthropic_params(self) -> List:
|
|
||||||
"""
|
|
||||||
Which anthropic params, we need to translate to the openai format.
|
|
||||||
"""
|
|
||||||
return ["messages", "metadata", "system", "tool_choice", "tools"]
|
|
||||||
|
|
||||||
def translate_anthropic_messages_to_openai(
|
|
||||||
self,
|
|
||||||
messages: List[
|
|
||||||
Union[
|
|
||||||
AnthropicMessagesUserMessageParam,
|
|
||||||
AnthopicMessagesAssistantMessageParam,
|
|
||||||
]
|
|
||||||
],
|
|
||||||
) -> List:
|
|
||||||
new_messages: List[AllMessageValues] = []
|
|
||||||
for m in messages:
|
|
||||||
user_message: Optional[ChatCompletionUserMessage] = None
|
|
||||||
tool_message_list: List[ChatCompletionToolMessage] = []
|
|
||||||
new_user_content_list: List[
|
|
||||||
Union[ChatCompletionTextObject, ChatCompletionImageObject]
|
|
||||||
] = []
|
|
||||||
## USER MESSAGE ##
|
|
||||||
if m["role"] == "user":
|
|
||||||
## translate user message
|
|
||||||
if isinstance(m["content"], str):
|
|
||||||
user_message = ChatCompletionUserMessage(
|
|
||||||
role="user", content=m["content"]
|
|
||||||
)
|
|
||||||
elif isinstance(m["content"], list):
|
|
||||||
for content in m["content"]:
|
|
||||||
if content["type"] == "text":
|
|
||||||
text_obj = ChatCompletionTextObject(
|
|
||||||
type="text", text=content["text"]
|
|
||||||
)
|
|
||||||
new_user_content_list.append(text_obj)
|
|
||||||
elif content["type"] == "image":
|
|
||||||
image_url = ChatCompletionImageUrlObject(
|
|
||||||
url=f"data:{content['type']};base64,{content['source']}"
|
|
||||||
)
|
|
||||||
image_obj = ChatCompletionImageObject(
|
|
||||||
type="image_url", image_url=image_url
|
|
||||||
)
|
|
||||||
|
|
||||||
new_user_content_list.append(image_obj)
|
|
||||||
elif content["type"] == "tool_result":
|
|
||||||
if "content" not in content:
|
|
||||||
tool_result = ChatCompletionToolMessage(
|
|
||||||
role="tool",
|
|
||||||
tool_call_id=content["tool_use_id"],
|
|
||||||
content="",
|
|
||||||
)
|
|
||||||
tool_message_list.append(tool_result)
|
|
||||||
elif isinstance(content["content"], str):
|
|
||||||
tool_result = ChatCompletionToolMessage(
|
|
||||||
role="tool",
|
|
||||||
tool_call_id=content["tool_use_id"],
|
|
||||||
content=content["content"],
|
|
||||||
)
|
|
||||||
tool_message_list.append(tool_result)
|
|
||||||
elif isinstance(content["content"], list):
|
|
||||||
for c in content["content"]:
|
|
||||||
if c["type"] == "text":
|
|
||||||
tool_result = ChatCompletionToolMessage(
|
|
||||||
role="tool",
|
|
||||||
tool_call_id=content["tool_use_id"],
|
|
||||||
content=c["text"],
|
|
||||||
)
|
|
||||||
tool_message_list.append(tool_result)
|
|
||||||
elif c["type"] == "image":
|
|
||||||
image_str = (
|
|
||||||
f"data:{c['type']};base64,{c['source']}"
|
|
||||||
)
|
|
||||||
tool_result = ChatCompletionToolMessage(
|
|
||||||
role="tool",
|
|
||||||
tool_call_id=content["tool_use_id"],
|
|
||||||
content=image_str,
|
|
||||||
)
|
|
||||||
tool_message_list.append(tool_result)
|
|
||||||
|
|
||||||
if user_message is not None:
|
|
||||||
new_messages.append(user_message)
|
|
||||||
|
|
||||||
if len(new_user_content_list) > 0:
|
|
||||||
new_messages.append({"role": "user", "content": new_user_content_list}) # type: ignore
|
|
||||||
|
|
||||||
if len(tool_message_list) > 0:
|
|
||||||
new_messages.extend(tool_message_list)
|
|
||||||
|
|
||||||
## ASSISTANT MESSAGE ##
|
|
||||||
assistant_message_str: Optional[str] = None
|
|
||||||
tool_calls: List[ChatCompletionAssistantToolCall] = []
|
|
||||||
if m["role"] == "assistant":
|
|
||||||
if isinstance(m["content"], str):
|
|
||||||
assistant_message_str = m["content"]
|
|
||||||
elif isinstance(m["content"], list):
|
|
||||||
for content in m["content"]:
|
|
||||||
if content["type"] == "text":
|
|
||||||
if assistant_message_str is None:
|
|
||||||
assistant_message_str = content["text"]
|
|
||||||
else:
|
|
||||||
assistant_message_str += content["text"]
|
|
||||||
elif content["type"] == "tool_use":
|
|
||||||
function_chunk = ChatCompletionToolCallFunctionChunk(
|
|
||||||
name=content["name"],
|
|
||||||
arguments=json.dumps(content["input"]),
|
|
||||||
)
|
|
||||||
|
|
||||||
tool_calls.append(
|
|
||||||
ChatCompletionAssistantToolCall(
|
|
||||||
id=content["id"],
|
|
||||||
type="function",
|
|
||||||
function=function_chunk,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
if assistant_message_str is not None or len(tool_calls) > 0:
|
|
||||||
assistant_message = ChatCompletionAssistantMessage(
|
|
||||||
role="assistant",
|
|
||||||
content=assistant_message_str,
|
|
||||||
)
|
|
||||||
if len(tool_calls) > 0:
|
|
||||||
assistant_message["tool_calls"] = tool_calls
|
|
||||||
new_messages.append(assistant_message)
|
|
||||||
|
|
||||||
return new_messages
|
|
||||||
|
|
||||||
def translate_anthropic_tool_choice_to_openai(
|
|
||||||
self, tool_choice: AnthropicMessagesToolChoice
|
|
||||||
) -> ChatCompletionToolChoiceValues:
|
|
||||||
if tool_choice["type"] == "any":
|
|
||||||
return "required"
|
|
||||||
elif tool_choice["type"] == "auto":
|
|
||||||
return "auto"
|
|
||||||
elif tool_choice["type"] == "tool":
|
|
||||||
tc_function_param = ChatCompletionToolChoiceFunctionParam(
|
|
||||||
name=tool_choice.get("name", "")
|
|
||||||
)
|
|
||||||
return ChatCompletionToolChoiceObjectParam(
|
|
||||||
type="function", function=tc_function_param
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
raise ValueError(
|
|
||||||
"Incompatible tool choice param submitted - {}".format(tool_choice)
|
|
||||||
)
|
|
||||||
|
|
||||||
def translate_anthropic_tools_to_openai(
|
|
||||||
self, tools: List[AnthropicMessagesTool]
|
|
||||||
) -> List[ChatCompletionToolParam]:
|
|
||||||
new_tools: List[ChatCompletionToolParam] = []
|
|
||||||
for tool in tools:
|
|
||||||
function_chunk = ChatCompletionToolParamFunctionChunk(
|
|
||||||
name=tool["name"],
|
|
||||||
parameters=tool["input_schema"],
|
|
||||||
)
|
|
||||||
if "description" in tool:
|
|
||||||
function_chunk["description"] = tool["description"]
|
|
||||||
new_tools.append(
|
|
||||||
ChatCompletionToolParam(type="function", function=function_chunk)
|
|
||||||
)
|
|
||||||
|
|
||||||
return new_tools
|
|
||||||
|
|
||||||
def translate_anthropic_to_openai(
|
|
||||||
self, anthropic_message_request: AnthropicMessagesRequest
|
|
||||||
) -> ChatCompletionRequest:
|
|
||||||
"""
|
|
||||||
This is used by the beta Anthropic Adapter, for translating anthropic `/v1/messages` requests to the openai format.
|
|
||||||
"""
|
|
||||||
new_messages: List[AllMessageValues] = []
|
|
||||||
|
|
||||||
## CONVERT ANTHROPIC MESSAGES TO OPENAI
|
|
||||||
new_messages = self.translate_anthropic_messages_to_openai(
|
|
||||||
messages=anthropic_message_request["messages"]
|
|
||||||
)
|
|
||||||
## ADD SYSTEM MESSAGE TO MESSAGES
|
|
||||||
if "system" in anthropic_message_request:
|
|
||||||
new_messages.insert(
|
|
||||||
0,
|
|
||||||
ChatCompletionSystemMessage(
|
|
||||||
role="system", content=anthropic_message_request["system"]
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
new_kwargs: ChatCompletionRequest = {
|
|
||||||
"model": anthropic_message_request["model"],
|
|
||||||
"messages": new_messages,
|
|
||||||
}
|
|
||||||
## CONVERT METADATA (user_id)
|
|
||||||
if "metadata" in anthropic_message_request:
|
|
||||||
if "user_id" in anthropic_message_request["metadata"]:
|
|
||||||
new_kwargs["user"] = anthropic_message_request["metadata"]["user_id"]
|
|
||||||
|
|
||||||
# Pass litellm proxy specific metadata
|
|
||||||
if "litellm_metadata" in anthropic_message_request:
|
|
||||||
# metadata will be passed to litellm.acompletion(), it's a litellm_param
|
|
||||||
new_kwargs["metadata"] = anthropic_message_request.pop("litellm_metadata")
|
|
||||||
|
|
||||||
## CONVERT TOOL CHOICE
|
|
||||||
if "tool_choice" in anthropic_message_request:
|
|
||||||
new_kwargs["tool_choice"] = self.translate_anthropic_tool_choice_to_openai(
|
|
||||||
tool_choice=anthropic_message_request["tool_choice"]
|
|
||||||
)
|
|
||||||
## CONVERT TOOLS
|
|
||||||
if "tools" in anthropic_message_request:
|
|
||||||
new_kwargs["tools"] = self.translate_anthropic_tools_to_openai(
|
|
||||||
tools=anthropic_message_request["tools"]
|
|
||||||
)
|
|
||||||
|
|
||||||
translatable_params = self.translatable_anthropic_params()
|
|
||||||
for k, v in anthropic_message_request.items():
|
|
||||||
if k not in translatable_params: # pass remaining params as is
|
|
||||||
new_kwargs[k] = v # type: ignore
|
|
||||||
|
|
||||||
return new_kwargs
|
|
||||||
|
|
||||||
def _translate_openai_content_to_anthropic(
|
|
||||||
self, choices: List[Choices]
|
|
||||||
) -> List[
|
|
||||||
Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse]
|
|
||||||
]:
|
|
||||||
new_content: List[
|
|
||||||
Union[
|
|
||||||
AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse
|
|
||||||
]
|
|
||||||
] = []
|
|
||||||
for choice in choices:
|
|
||||||
if (
|
|
||||||
choice.message.tool_calls is not None
|
|
||||||
and len(choice.message.tool_calls) > 0
|
|
||||||
):
|
|
||||||
for tool_call in choice.message.tool_calls:
|
|
||||||
new_content.append(
|
|
||||||
AnthropicResponseContentBlockToolUse(
|
|
||||||
type="tool_use",
|
|
||||||
id=tool_call.id,
|
|
||||||
name=tool_call.function.name or "",
|
|
||||||
input=json.loads(tool_call.function.arguments),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
elif choice.message.content is not None:
|
|
||||||
new_content.append(
|
|
||||||
AnthropicResponseContentBlockText(
|
|
||||||
type="text", text=choice.message.content
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
return new_content
|
|
||||||
|
|
||||||
def _translate_openai_finish_reason_to_anthropic(
|
|
||||||
self, openai_finish_reason: str
|
|
||||||
) -> AnthropicFinishReason:
|
|
||||||
if openai_finish_reason == "stop":
|
|
||||||
return "end_turn"
|
|
||||||
elif openai_finish_reason == "length":
|
|
||||||
return "max_tokens"
|
|
||||||
elif openai_finish_reason == "tool_calls":
|
|
||||||
return "tool_use"
|
|
||||||
return "end_turn"
|
|
||||||
|
|
||||||
def translate_openai_response_to_anthropic(
|
|
||||||
self, response: litellm.ModelResponse
|
|
||||||
) -> AnthropicResponse:
|
|
||||||
## translate content block
|
|
||||||
anthropic_content = self._translate_openai_content_to_anthropic(choices=response.choices) # type: ignore
|
|
||||||
## extract finish reason
|
|
||||||
anthropic_finish_reason = self._translate_openai_finish_reason_to_anthropic(
|
|
||||||
openai_finish_reason=response.choices[0].finish_reason # type: ignore
|
|
||||||
)
|
|
||||||
# extract usage
|
|
||||||
usage: litellm.Usage = getattr(response, "usage")
|
|
||||||
anthropic_usage = AnthropicResponseUsageBlock(
|
|
||||||
input_tokens=usage.prompt_tokens or 0,
|
|
||||||
output_tokens=usage.completion_tokens or 0,
|
|
||||||
)
|
|
||||||
translated_obj = AnthropicResponse(
|
|
||||||
id=response.id,
|
|
||||||
type="message",
|
|
||||||
role="assistant",
|
|
||||||
model=response.model or "unknown-model",
|
|
||||||
stop_sequence=None,
|
|
||||||
usage=anthropic_usage,
|
|
||||||
content=anthropic_content,
|
|
||||||
stop_reason=anthropic_finish_reason,
|
|
||||||
)
|
|
||||||
|
|
||||||
return translated_obj
|
|
||||||
|
|
||||||
def _translate_streaming_openai_chunk_to_anthropic(
|
|
||||||
self, choices: List[OpenAIStreamingChoice]
|
|
||||||
) -> Tuple[
|
|
||||||
Literal["text_delta", "input_json_delta"],
|
|
||||||
Union[ContentTextBlockDelta, ContentJsonBlockDelta],
|
|
||||||
]:
|
|
||||||
text: str = ""
|
|
||||||
partial_json: Optional[str] = None
|
|
||||||
for choice in choices:
|
|
||||||
if choice.delta.content is not None:
|
|
||||||
text += choice.delta.content
|
|
||||||
elif choice.delta.tool_calls is not None:
|
|
||||||
partial_json = ""
|
|
||||||
for tool in choice.delta.tool_calls:
|
|
||||||
if (
|
|
||||||
tool.function is not None
|
|
||||||
and tool.function.arguments is not None
|
|
||||||
):
|
|
||||||
partial_json += tool.function.arguments
|
|
||||||
|
|
||||||
if partial_json is not None:
|
|
||||||
return "input_json_delta", ContentJsonBlockDelta(
|
|
||||||
type="input_json_delta", partial_json=partial_json
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
return "text_delta", ContentTextBlockDelta(type="text_delta", text=text)
|
|
||||||
|
|
||||||
def translate_streaming_openai_response_to_anthropic(
|
|
||||||
self, response: litellm.ModelResponse
|
|
||||||
) -> Union[ContentBlockDelta, MessageBlockDelta]:
|
|
||||||
## base case - final chunk w/ finish reason
|
|
||||||
if response.choices[0].finish_reason is not None:
|
|
||||||
delta = MessageDelta(
|
|
||||||
stop_reason=self._translate_openai_finish_reason_to_anthropic(
|
|
||||||
response.choices[0].finish_reason
|
|
||||||
),
|
|
||||||
)
|
|
||||||
if getattr(response, "usage", None) is not None:
|
|
||||||
litellm_usage_chunk: Optional[litellm.Usage] = response.usage # type: ignore
|
|
||||||
elif (
|
|
||||||
hasattr(response, "_hidden_params")
|
|
||||||
and "usage" in response._hidden_params
|
|
||||||
):
|
|
||||||
litellm_usage_chunk = response._hidden_params["usage"]
|
|
||||||
else:
|
|
||||||
litellm_usage_chunk = None
|
|
||||||
if litellm_usage_chunk is not None:
|
|
||||||
usage_delta = UsageDelta(
|
|
||||||
input_tokens=litellm_usage_chunk.prompt_tokens or 0,
|
|
||||||
output_tokens=litellm_usage_chunk.completion_tokens or 0,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
usage_delta = UsageDelta(input_tokens=0, output_tokens=0)
|
|
||||||
return MessageBlockDelta(
|
|
||||||
type="message_delta", delta=delta, usage=usage_delta
|
|
||||||
)
|
|
||||||
(
|
|
||||||
type_of_content,
|
|
||||||
content_block_delta,
|
|
||||||
) = self._translate_streaming_openai_chunk_to_anthropic(
|
|
||||||
choices=response.choices # type: ignore
|
|
||||||
)
|
|
||||||
return ContentBlockDelta(
|
|
||||||
type="content_block_delta",
|
|
||||||
index=response.choices[0].index,
|
|
||||||
delta=content_block_delta,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# makes headers for API call
|
# makes headers for API call
|
||||||
def validate_environment(
|
def validate_environment(
|
||||||
api_key, user_headers, model, messages: List[AllMessageValues]
|
api_key, user_headers, model, messages: List[AllMessageValues]
|
||||||
|
@ -684,8 +139,14 @@ async def make_call(
|
||||||
api_base, headers=headers, data=data, stream=True, timeout=timeout
|
api_base, headers=headers, data=data, stream=True, timeout=timeout
|
||||||
)
|
)
|
||||||
except httpx.HTTPStatusError as e:
|
except httpx.HTTPStatusError as e:
|
||||||
|
error_headers = getattr(e, "headers", None)
|
||||||
|
error_response = getattr(e, "response", None)
|
||||||
|
if error_headers is None and error_response:
|
||||||
|
error_headers = getattr(error_response, "headers", None)
|
||||||
raise AnthropicError(
|
raise AnthropicError(
|
||||||
status_code=e.response.status_code, message=await e.response.aread()
|
status_code=e.response.status_code,
|
||||||
|
message=await e.response.aread(),
|
||||||
|
headers=error_headers,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
for exception in litellm.LITELLM_EXCEPTION_TYPES:
|
for exception in litellm.LITELLM_EXCEPTION_TYPES:
|
||||||
|
@ -726,8 +187,14 @@ def make_sync_call(
|
||||||
api_base, headers=headers, data=data, stream=True, timeout=timeout
|
api_base, headers=headers, data=data, stream=True, timeout=timeout
|
||||||
)
|
)
|
||||||
except httpx.HTTPStatusError as e:
|
except httpx.HTTPStatusError as e:
|
||||||
|
error_headers = getattr(e, "headers", None)
|
||||||
|
error_response = getattr(e, "response", None)
|
||||||
|
if error_headers is None and error_response:
|
||||||
|
error_headers = getattr(error_response, "headers", None)
|
||||||
raise AnthropicError(
|
raise AnthropicError(
|
||||||
status_code=e.response.status_code, message=e.response.read()
|
status_code=e.response.status_code,
|
||||||
|
message=e.response.read(),
|
||||||
|
headers=error_headers,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
for exception in litellm.LITELLM_EXCEPTION_TYPES:
|
for exception in litellm.LITELLM_EXCEPTION_TYPES:
|
||||||
|
@ -736,7 +203,12 @@ def make_sync_call(
|
||||||
raise AnthropicError(status_code=500, message=str(e))
|
raise AnthropicError(status_code=500, message=str(e))
|
||||||
|
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
raise AnthropicError(status_code=response.status_code, message=response.read())
|
response_headers = getattr(response, "headers", None)
|
||||||
|
raise AnthropicError(
|
||||||
|
status_code=response.status_code,
|
||||||
|
message=response.read(),
|
||||||
|
headers=response_headers,
|
||||||
|
)
|
||||||
|
|
||||||
completion_stream = ModelResponseIterator(
|
completion_stream = ModelResponseIterator(
|
||||||
streaming_response=response.iter_lines(), sync_stream=True
|
streaming_response=response.iter_lines(), sync_stream=True
|
||||||
|
@ -763,7 +235,7 @@ class AnthropicChatCompletion(BaseLLM):
|
||||||
response: Union[requests.Response, httpx.Response],
|
response: Union[requests.Response, httpx.Response],
|
||||||
model_response: ModelResponse,
|
model_response: ModelResponse,
|
||||||
stream: bool,
|
stream: bool,
|
||||||
logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
|
logging_obj: litellm.litellm_core_utils.litellm_logging.Logging, # type: ignore
|
||||||
optional_params: dict,
|
optional_params: dict,
|
||||||
api_key: str,
|
api_key: str,
|
||||||
data: Union[dict, str],
|
data: Union[dict, str],
|
||||||
|
@ -772,6 +244,14 @@ class AnthropicChatCompletion(BaseLLM):
|
||||||
encoding,
|
encoding,
|
||||||
json_mode: bool,
|
json_mode: bool,
|
||||||
) -> ModelResponse:
|
) -> ModelResponse:
|
||||||
|
_hidden_params = {}
|
||||||
|
_response_headers = dict(response.headers)
|
||||||
|
if _response_headers is not None:
|
||||||
|
llm_response_headers = {
|
||||||
|
"{}-{}".format("llm_provider", k): v
|
||||||
|
for k, v in _response_headers.items()
|
||||||
|
}
|
||||||
|
_hidden_params["additional_headers"] = llm_response_headers
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging_obj.post_call(
|
logging_obj.post_call(
|
||||||
input=messages,
|
input=messages,
|
||||||
|
@ -783,14 +263,21 @@ class AnthropicChatCompletion(BaseLLM):
|
||||||
## RESPONSE OBJECT
|
## RESPONSE OBJECT
|
||||||
try:
|
try:
|
||||||
completion_response = response.json()
|
completion_response = response.json()
|
||||||
except:
|
except Exception as e:
|
||||||
|
response_headers = getattr(response, "headers", None)
|
||||||
raise AnthropicError(
|
raise AnthropicError(
|
||||||
message=response.text, status_code=response.status_code
|
message="Unable to get json response - {}, Original Response: {}".format(
|
||||||
|
str(e), response.text
|
||||||
|
),
|
||||||
|
status_code=response.status_code,
|
||||||
|
headers=response_headers,
|
||||||
)
|
)
|
||||||
if "error" in completion_response:
|
if "error" in completion_response:
|
||||||
|
response_headers = getattr(response, "headers", None)
|
||||||
raise AnthropicError(
|
raise AnthropicError(
|
||||||
message=str(completion_response["error"]),
|
message=str(completion_response["error"]),
|
||||||
status_code=response.status_code,
|
status_code=response.status_code,
|
||||||
|
headers=response_headers,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
text_content = ""
|
text_content = ""
|
||||||
|
@ -856,6 +343,8 @@ class AnthropicChatCompletion(BaseLLM):
|
||||||
if "cache_read_input_tokens" in _usage:
|
if "cache_read_input_tokens" in _usage:
|
||||||
usage["cache_read_input_tokens"] = _usage["cache_read_input_tokens"]
|
usage["cache_read_input_tokens"] = _usage["cache_read_input_tokens"]
|
||||||
setattr(model_response, "usage", usage) # type: ignore
|
setattr(model_response, "usage", usage) # type: ignore
|
||||||
|
|
||||||
|
model_response._hidden_params = _hidden_params
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
async def acompletion_stream_function(
|
async def acompletion_stream_function(
|
||||||
|
@ -919,9 +408,9 @@ class AnthropicChatCompletion(BaseLLM):
|
||||||
litellm_params=None,
|
litellm_params=None,
|
||||||
logger_fn=None,
|
logger_fn=None,
|
||||||
headers={},
|
headers={},
|
||||||
client=None,
|
client: Optional[AsyncHTTPHandler] = None,
|
||||||
) -> Union[ModelResponse, CustomStreamWrapper]:
|
) -> Union[ModelResponse, CustomStreamWrapper]:
|
||||||
async_handler = get_async_httpx_client(
|
async_handler = client or get_async_httpx_client(
|
||||||
llm_provider=litellm.LlmProviders.ANTHROPIC
|
llm_provider=litellm.LlmProviders.ANTHROPIC
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -937,7 +426,17 @@ class AnthropicChatCompletion(BaseLLM):
|
||||||
original_response=str(e),
|
original_response=str(e),
|
||||||
additional_args={"complete_input_dict": data},
|
additional_args={"complete_input_dict": data},
|
||||||
)
|
)
|
||||||
raise e
|
status_code = getattr(e, "status_code", 500)
|
||||||
|
error_headers = getattr(e, "headers", None)
|
||||||
|
error_text = getattr(e, "text", str(e))
|
||||||
|
error_response = getattr(e, "response", None)
|
||||||
|
if error_headers is None and error_response:
|
||||||
|
error_headers = getattr(error_response, "headers", None)
|
||||||
|
raise AnthropicError(
|
||||||
|
message=error_text,
|
||||||
|
status_code=status_code,
|
||||||
|
headers=error_headers,
|
||||||
|
)
|
||||||
|
|
||||||
return self._process_response(
|
return self._process_response(
|
||||||
model=model,
|
model=model,
|
||||||
|
@ -977,73 +476,18 @@ class AnthropicChatCompletion(BaseLLM):
|
||||||
_is_function_call = False
|
_is_function_call = False
|
||||||
messages = copy.deepcopy(messages)
|
messages = copy.deepcopy(messages)
|
||||||
optional_params = copy.deepcopy(optional_params)
|
optional_params = copy.deepcopy(optional_params)
|
||||||
if model in custom_prompt_dict:
|
|
||||||
# check if the model has a registered custom prompt
|
|
||||||
model_prompt_details = custom_prompt_dict[model]
|
|
||||||
prompt = custom_prompt(
|
|
||||||
role_dict=model_prompt_details["roles"],
|
|
||||||
initial_prompt_value=model_prompt_details["initial_prompt_value"],
|
|
||||||
final_prompt_value=model_prompt_details["final_prompt_value"],
|
|
||||||
messages=messages,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
# Separate system prompt from rest of message
|
|
||||||
anthropic_system_message_list = AnthropicConfig().translate_system_message(
|
|
||||||
messages=messages
|
|
||||||
)
|
|
||||||
# Handling anthropic API Prompt Caching
|
|
||||||
if len(anthropic_system_message_list) > 0:
|
|
||||||
optional_params["system"] = anthropic_system_message_list
|
|
||||||
# Format rest of message according to anthropic guidelines
|
|
||||||
try:
|
|
||||||
messages = prompt_factory(
|
|
||||||
model=model, messages=messages, custom_llm_provider="anthropic"
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
raise AnthropicError(
|
|
||||||
status_code=400,
|
|
||||||
message="{}\nReceived Messages={}".format(str(e), messages),
|
|
||||||
) # don't use verbose_logger.exception, if exception is raised
|
|
||||||
|
|
||||||
## Load Config
|
|
||||||
config = litellm.AnthropicConfig.get_config()
|
|
||||||
for k, v in config.items():
|
|
||||||
if (
|
|
||||||
k not in optional_params
|
|
||||||
): # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
|
|
||||||
optional_params[k] = v
|
|
||||||
|
|
||||||
## Handle Tool Calling
|
|
||||||
if "tools" in optional_params:
|
|
||||||
_is_function_call = True
|
|
||||||
if "anthropic-beta" not in headers:
|
|
||||||
# default to v1 of "anthropic-beta"
|
|
||||||
headers["anthropic-beta"] = "tools-2024-05-16"
|
|
||||||
|
|
||||||
anthropic_tools = []
|
|
||||||
for tool in optional_params["tools"]:
|
|
||||||
if "input_schema" in tool: # assume in anthropic format
|
|
||||||
anthropic_tools.append(tool)
|
|
||||||
else: # assume openai tool call
|
|
||||||
new_tool = tool["function"]
|
|
||||||
new_tool["input_schema"] = new_tool.pop("parameters") # rename key
|
|
||||||
if "cache_control" in tool:
|
|
||||||
new_tool["cache_control"] = tool["cache_control"]
|
|
||||||
anthropic_tools.append(new_tool)
|
|
||||||
|
|
||||||
optional_params["tools"] = anthropic_tools
|
|
||||||
|
|
||||||
stream = optional_params.pop("stream", None)
|
stream = optional_params.pop("stream", None)
|
||||||
is_vertex_request: bool = optional_params.pop("is_vertex_request", False)
|
|
||||||
json_mode: bool = optional_params.pop("json_mode", False)
|
json_mode: bool = optional_params.pop("json_mode", False)
|
||||||
|
is_vertex_request: bool = optional_params.pop("is_vertex_request", False)
|
||||||
|
|
||||||
data = {
|
data = AnthropicConfig()._transform_request(
|
||||||
"messages": messages,
|
model=model,
|
||||||
**optional_params,
|
messages=messages,
|
||||||
}
|
optional_params=optional_params,
|
||||||
|
headers=headers,
|
||||||
if is_vertex_request is False:
|
_is_function_call=_is_function_call,
|
||||||
data["model"] = model
|
is_vertex_request=is_vertex_request,
|
||||||
|
)
|
||||||
|
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging_obj.pre_call(
|
logging_obj.pre_call(
|
||||||
|
@ -1136,12 +580,25 @@ class AnthropicChatCompletion(BaseLLM):
|
||||||
client = HTTPHandler(timeout=timeout) # type: ignore
|
client = HTTPHandler(timeout=timeout) # type: ignore
|
||||||
else:
|
else:
|
||||||
client = client
|
client = client
|
||||||
|
|
||||||
|
try:
|
||||||
response = client.post(
|
response = client.post(
|
||||||
api_base, headers=headers, data=json.dumps(data), timeout=timeout
|
api_base,
|
||||||
|
headers=headers,
|
||||||
|
data=json.dumps(data),
|
||||||
|
timeout=timeout,
|
||||||
)
|
)
|
||||||
if response.status_code != 200:
|
except Exception as e:
|
||||||
|
status_code = getattr(e, "status_code", 500)
|
||||||
|
error_headers = getattr(e, "headers", None)
|
||||||
|
error_text = getattr(e, "text", str(e))
|
||||||
|
error_response = getattr(e, "response", None)
|
||||||
|
if error_headers is None and error_response:
|
||||||
|
error_headers = getattr(error_response, "headers", None)
|
||||||
raise AnthropicError(
|
raise AnthropicError(
|
||||||
status_code=response.status_code, message=response.text
|
message=error_text,
|
||||||
|
status_code=status_code,
|
||||||
|
headers=error_headers,
|
||||||
)
|
)
|
||||||
|
|
||||||
return self._process_response(
|
return self._process_response(
|
||||||
|
@ -1151,7 +608,7 @@ class AnthropicChatCompletion(BaseLLM):
|
||||||
stream=stream,
|
stream=stream,
|
||||||
logging_obj=logging_obj,
|
logging_obj=logging_obj,
|
||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
data=data,
|
data=data, # type: ignore
|
||||||
messages=messages,
|
messages=messages,
|
||||||
print_verbose=print_verbose,
|
print_verbose=print_verbose,
|
||||||
optional_params=optional_params,
|
optional_params=optional_params,
|
||||||
|
@ -1192,7 +649,7 @@ class ModelResponseIterator:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _handle_usage(
|
def _handle_usage(
|
||||||
self, anthropic_usage_chunk: dict
|
self, anthropic_usage_chunk: Union[dict, UsageDelta]
|
||||||
) -> AnthropicChatCompletionUsageBlock:
|
) -> AnthropicChatCompletionUsageBlock:
|
||||||
special_fields = ["input_tokens", "output_tokens"]
|
special_fields = ["input_tokens", "output_tokens"]
|
||||||
|
|
||||||
|
@ -1203,15 +660,19 @@ class ModelResponseIterator:
|
||||||
+ anthropic_usage_chunk.get("output_tokens", 0),
|
+ anthropic_usage_chunk.get("output_tokens", 0),
|
||||||
)
|
)
|
||||||
|
|
||||||
if "cache_creation_input_tokens" in anthropic_usage_chunk:
|
cache_creation_input_tokens = anthropic_usage_chunk.get(
|
||||||
usage_block["cache_creation_input_tokens"] = anthropic_usage_chunk[
|
|
||||||
"cache_creation_input_tokens"
|
"cache_creation_input_tokens"
|
||||||
]
|
)
|
||||||
|
if cache_creation_input_tokens is not None and isinstance(
|
||||||
|
cache_creation_input_tokens, int
|
||||||
|
):
|
||||||
|
usage_block["cache_creation_input_tokens"] = cache_creation_input_tokens
|
||||||
|
|
||||||
if "cache_read_input_tokens" in anthropic_usage_chunk:
|
cache_read_input_tokens = anthropic_usage_chunk.get("cache_read_input_tokens")
|
||||||
usage_block["cache_read_input_tokens"] = anthropic_usage_chunk[
|
if cache_read_input_tokens is not None and isinstance(
|
||||||
"cache_read_input_tokens"
|
cache_read_input_tokens, int
|
||||||
]
|
):
|
||||||
|
usage_block["cache_read_input_tokens"] = cache_read_input_tokens
|
||||||
|
|
||||||
return usage_block
|
return usage_block
|
||||||
|
|
||||||
|
@ -1313,6 +774,7 @@ class ModelResponseIterator:
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
message_start_block = MessageStartBlock(**chunk) # type: ignore
|
message_start_block = MessageStartBlock(**chunk) # type: ignore
|
||||||
|
if "usage" in message_start_block["message"]:
|
||||||
usage = self._handle_usage(
|
usage = self._handle_usage(
|
||||||
anthropic_usage_chunk=message_start_block["message"]["usage"]
|
anthropic_usage_chunk=message_start_block["message"]["usage"]
|
||||||
)
|
)
|
289
litellm/llms/anthropic/chat/transformation.py
Normal file
289
litellm/llms/anthropic/chat/transformation.py
Normal file
|
@ -0,0 +1,289 @@
|
||||||
|
import types
|
||||||
|
from typing import List, Literal, Optional, Tuple, Union
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm.llms.prompt_templates.factory import anthropic_messages_pt
|
||||||
|
from litellm.types.llms.anthropic import (
|
||||||
|
AnthropicMessageRequestBase,
|
||||||
|
AnthropicMessagesRequest,
|
||||||
|
AnthropicMessagesToolChoice,
|
||||||
|
AnthropicSystemMessageContent,
|
||||||
|
)
|
||||||
|
from litellm.types.llms.openai import AllMessageValues, ChatCompletionSystemMessage
|
||||||
|
from litellm.utils import has_tool_call_blocks
|
||||||
|
|
||||||
|
from ..common_utils import AnthropicError
|
||||||
|
|
||||||
|
|
||||||
|
class AnthropicConfig:
|
||||||
|
"""
|
||||||
|
Reference: https://docs.anthropic.com/claude/reference/messages_post
|
||||||
|
|
||||||
|
to pass metadata to anthropic, it's {"user_id": "any-relevant-information"}
|
||||||
|
"""
|
||||||
|
|
||||||
|
max_tokens: Optional[int] = (
|
||||||
|
4096 # anthropic requires a default value (Opus, Sonnet, and Haiku have the same default)
|
||||||
|
)
|
||||||
|
stop_sequences: Optional[list] = None
|
||||||
|
temperature: Optional[int] = None
|
||||||
|
top_p: Optional[int] = None
|
||||||
|
top_k: Optional[int] = None
|
||||||
|
metadata: Optional[dict] = None
|
||||||
|
system: Optional[str] = None
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
max_tokens: Optional[
|
||||||
|
int
|
||||||
|
] = 4096, # You can pass in a value yourself or use the default value 4096
|
||||||
|
stop_sequences: Optional[list] = None,
|
||||||
|
temperature: Optional[int] = None,
|
||||||
|
top_p: Optional[int] = None,
|
||||||
|
top_k: Optional[int] = None,
|
||||||
|
metadata: Optional[dict] = None,
|
||||||
|
system: Optional[str] = None,
|
||||||
|
) -> None:
|
||||||
|
locals_ = locals()
|
||||||
|
for key, value in locals_.items():
|
||||||
|
if key != "self" and value is not None:
|
||||||
|
setattr(self.__class__, key, value)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_config(cls):
|
||||||
|
return {
|
||||||
|
k: v
|
||||||
|
for k, v in cls.__dict__.items()
|
||||||
|
if not k.startswith("__")
|
||||||
|
and not isinstance(
|
||||||
|
v,
|
||||||
|
(
|
||||||
|
types.FunctionType,
|
||||||
|
types.BuiltinFunctionType,
|
||||||
|
classmethod,
|
||||||
|
staticmethod,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
and v is not None
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_supported_openai_params(self):
|
||||||
|
return [
|
||||||
|
"stream",
|
||||||
|
"stop",
|
||||||
|
"temperature",
|
||||||
|
"top_p",
|
||||||
|
"max_tokens",
|
||||||
|
"max_completion_tokens",
|
||||||
|
"tools",
|
||||||
|
"tool_choice",
|
||||||
|
"extra_headers",
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_cache_control_headers(self) -> dict:
|
||||||
|
return {
|
||||||
|
"anthropic-version": "2023-06-01",
|
||||||
|
"anthropic-beta": "prompt-caching-2024-07-31",
|
||||||
|
}
|
||||||
|
|
||||||
|
def map_openai_params(
|
||||||
|
self,
|
||||||
|
non_default_params: dict,
|
||||||
|
optional_params: dict,
|
||||||
|
messages: Optional[List[AllMessageValues]] = None,
|
||||||
|
):
|
||||||
|
for param, value in non_default_params.items():
|
||||||
|
if param == "max_tokens":
|
||||||
|
optional_params["max_tokens"] = value
|
||||||
|
if param == "max_completion_tokens":
|
||||||
|
optional_params["max_tokens"] = value
|
||||||
|
if param == "tools":
|
||||||
|
optional_params["tools"] = value
|
||||||
|
if param == "tool_choice":
|
||||||
|
_tool_choice: Optional[AnthropicMessagesToolChoice] = None
|
||||||
|
if value == "auto":
|
||||||
|
_tool_choice = {"type": "auto"}
|
||||||
|
elif value == "required":
|
||||||
|
_tool_choice = {"type": "any"}
|
||||||
|
elif isinstance(value, dict):
|
||||||
|
_tool_choice = {"type": "tool", "name": value["function"]["name"]}
|
||||||
|
|
||||||
|
if _tool_choice is not None:
|
||||||
|
optional_params["tool_choice"] = _tool_choice
|
||||||
|
if param == "stream" and value is True:
|
||||||
|
optional_params["stream"] = value
|
||||||
|
if param == "stop":
|
||||||
|
if isinstance(value, str):
|
||||||
|
if (
|
||||||
|
value == "\n"
|
||||||
|
) and litellm.drop_params is True: # anthropic doesn't allow whitespace characters as stop-sequences
|
||||||
|
continue
|
||||||
|
value = [value]
|
||||||
|
elif isinstance(value, list):
|
||||||
|
new_v = []
|
||||||
|
for v in value:
|
||||||
|
if (
|
||||||
|
v == "\n"
|
||||||
|
) and litellm.drop_params is True: # anthropic doesn't allow whitespace characters as stop-sequences
|
||||||
|
continue
|
||||||
|
new_v.append(v)
|
||||||
|
if len(new_v) > 0:
|
||||||
|
value = new_v
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
optional_params["stop_sequences"] = value
|
||||||
|
if param == "temperature":
|
||||||
|
optional_params["temperature"] = value
|
||||||
|
if param == "top_p":
|
||||||
|
optional_params["top_p"] = value
|
||||||
|
|
||||||
|
## VALIDATE REQUEST
|
||||||
|
"""
|
||||||
|
Anthropic doesn't support tool calling without `tools=` param specified.
|
||||||
|
"""
|
||||||
|
if (
|
||||||
|
"tools" not in non_default_params
|
||||||
|
and messages is not None
|
||||||
|
and has_tool_call_blocks(messages)
|
||||||
|
):
|
||||||
|
raise litellm.UnsupportedParamsError(
|
||||||
|
message="Anthropic doesn't support tool calling without `tools=` param specified. Pass `tools=` param to enable tool calling.",
|
||||||
|
model="",
|
||||||
|
llm_provider="anthropic",
|
||||||
|
)
|
||||||
|
|
||||||
|
return optional_params
|
||||||
|
|
||||||
|
def is_cache_control_set(self, messages: List[AllMessageValues]) -> bool:
|
||||||
|
"""
|
||||||
|
Return if {"cache_control": ..} in message content block
|
||||||
|
|
||||||
|
Used to check if anthropic prompt caching headers need to be set.
|
||||||
|
"""
|
||||||
|
for message in messages:
|
||||||
|
_message_content = message.get("content")
|
||||||
|
if _message_content is not None and isinstance(_message_content, list):
|
||||||
|
for content in _message_content:
|
||||||
|
if "cache_control" in content:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def translate_system_message(
|
||||||
|
self, messages: List[AllMessageValues]
|
||||||
|
) -> List[AnthropicSystemMessageContent]:
|
||||||
|
"""
|
||||||
|
Translate system message to anthropic format.
|
||||||
|
|
||||||
|
Removes system message from the original list and returns a new list of anthropic system message content.
|
||||||
|
"""
|
||||||
|
system_prompt_indices = []
|
||||||
|
anthropic_system_message_list: List[AnthropicSystemMessageContent] = []
|
||||||
|
for idx, message in enumerate(messages):
|
||||||
|
if message["role"] == "system":
|
||||||
|
valid_content: bool = False
|
||||||
|
system_message_block = ChatCompletionSystemMessage(**message)
|
||||||
|
if isinstance(system_message_block["content"], str):
|
||||||
|
anthropic_system_message_content = AnthropicSystemMessageContent(
|
||||||
|
type="text",
|
||||||
|
text=system_message_block["content"],
|
||||||
|
)
|
||||||
|
if "cache_control" in system_message_block:
|
||||||
|
anthropic_system_message_content["cache_control"] = (
|
||||||
|
system_message_block["cache_control"]
|
||||||
|
)
|
||||||
|
anthropic_system_message_list.append(
|
||||||
|
anthropic_system_message_content
|
||||||
|
)
|
||||||
|
valid_content = True
|
||||||
|
elif isinstance(message["content"], list):
|
||||||
|
for _content in message["content"]:
|
||||||
|
anthropic_system_message_content = (
|
||||||
|
AnthropicSystemMessageContent(
|
||||||
|
type=_content.get("type"),
|
||||||
|
text=_content.get("text"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if "cache_control" in _content:
|
||||||
|
anthropic_system_message_content["cache_control"] = (
|
||||||
|
_content["cache_control"]
|
||||||
|
)
|
||||||
|
|
||||||
|
anthropic_system_message_list.append(
|
||||||
|
anthropic_system_message_content
|
||||||
|
)
|
||||||
|
valid_content = True
|
||||||
|
|
||||||
|
if valid_content:
|
||||||
|
system_prompt_indices.append(idx)
|
||||||
|
if len(system_prompt_indices) > 0:
|
||||||
|
for idx in reversed(system_prompt_indices):
|
||||||
|
messages.pop(idx)
|
||||||
|
|
||||||
|
return anthropic_system_message_list
|
||||||
|
|
||||||
|
def _transform_request(
|
||||||
|
self,
|
||||||
|
model: str,
|
||||||
|
messages: List[AllMessageValues],
|
||||||
|
optional_params: dict,
|
||||||
|
headers: dict,
|
||||||
|
_is_function_call: bool,
|
||||||
|
is_vertex_request: bool,
|
||||||
|
) -> dict:
|
||||||
|
"""
|
||||||
|
Translate messages to anthropic format.
|
||||||
|
"""
|
||||||
|
# Separate system prompt from rest of message
|
||||||
|
anthropic_system_message_list = self.translate_system_message(messages=messages)
|
||||||
|
# Handling anthropic API Prompt Caching
|
||||||
|
if len(anthropic_system_message_list) > 0:
|
||||||
|
optional_params["system"] = anthropic_system_message_list
|
||||||
|
# Format rest of message according to anthropic guidelines
|
||||||
|
try:
|
||||||
|
anthropic_messages = anthropic_messages_pt(
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
llm_provider="anthropic",
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
raise AnthropicError(
|
||||||
|
status_code=400,
|
||||||
|
message="{}\nReceived Messages={}".format(str(e), messages),
|
||||||
|
) # don't use verbose_logger.exception, if exception is raised
|
||||||
|
|
||||||
|
## Load Config
|
||||||
|
config = litellm.AnthropicConfig.get_config()
|
||||||
|
for k, v in config.items():
|
||||||
|
if (
|
||||||
|
k not in optional_params
|
||||||
|
): # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
|
||||||
|
optional_params[k] = v
|
||||||
|
|
||||||
|
## Handle Tool Calling
|
||||||
|
if "tools" in optional_params:
|
||||||
|
_is_function_call = True
|
||||||
|
if "anthropic-beta" not in headers:
|
||||||
|
# default to v1 of "anthropic-beta"
|
||||||
|
headers["anthropic-beta"] = "tools-2024-05-16"
|
||||||
|
|
||||||
|
anthropic_tools = []
|
||||||
|
for tool in optional_params["tools"]:
|
||||||
|
if "input_schema" in tool: # assume in anthropic format
|
||||||
|
anthropic_tools.append(tool)
|
||||||
|
else: # assume openai tool call
|
||||||
|
new_tool = tool["function"]
|
||||||
|
new_tool["input_schema"] = new_tool.pop("parameters") # rename key
|
||||||
|
if "cache_control" in tool:
|
||||||
|
new_tool["cache_control"] = tool["cache_control"]
|
||||||
|
anthropic_tools.append(new_tool)
|
||||||
|
|
||||||
|
optional_params["tools"] = anthropic_tools
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"messages": anthropic_messages,
|
||||||
|
**optional_params,
|
||||||
|
}
|
||||||
|
if not is_vertex_request:
|
||||||
|
data["model"] = model
|
||||||
|
return data
|
26
litellm/llms/anthropic/common_utils.py
Normal file
26
litellm/llms/anthropic/common_utils.py
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
"""
|
||||||
|
This file contains common utils for anthropic calls.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
|
||||||
|
class AnthropicError(Exception):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
status_code: int,
|
||||||
|
message,
|
||||||
|
headers: Optional[httpx.Headers] = None,
|
||||||
|
):
|
||||||
|
self.status_code = status_code
|
||||||
|
self.message: str = message
|
||||||
|
self.headers = headers
|
||||||
|
self.request = httpx.Request(
|
||||||
|
method="POST", url="https://api.anthropic.com/v1/messages"
|
||||||
|
)
|
||||||
|
self.response = httpx.Response(status_code=status_code, request=self.request)
|
||||||
|
super().__init__(
|
||||||
|
self.message
|
||||||
|
) # Call the base class constructor with the parameters it needs
|
|
@ -0,0 +1,425 @@
|
||||||
|
import json
|
||||||
|
import types
|
||||||
|
from typing import Any, Dict, List, Literal, Optional, Tuple, Union
|
||||||
|
|
||||||
|
from openai.types.chat.chat_completion_chunk import Choice as OpenAIStreamingChoice
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm.types.llms.anthropic import (
|
||||||
|
AnthopicMessagesAssistantMessageParam,
|
||||||
|
AnthropicChatCompletionUsageBlock,
|
||||||
|
AnthropicFinishReason,
|
||||||
|
AnthropicMessagesRequest,
|
||||||
|
AnthropicMessagesTool,
|
||||||
|
AnthropicMessagesToolChoice,
|
||||||
|
AnthropicMessagesUserMessageParam,
|
||||||
|
AnthropicResponse,
|
||||||
|
AnthropicResponseContentBlockText,
|
||||||
|
AnthropicResponseContentBlockToolUse,
|
||||||
|
AnthropicResponseUsageBlock,
|
||||||
|
AnthropicSystemMessageContent,
|
||||||
|
ContentBlockDelta,
|
||||||
|
ContentBlockStart,
|
||||||
|
ContentBlockStop,
|
||||||
|
ContentJsonBlockDelta,
|
||||||
|
ContentTextBlockDelta,
|
||||||
|
MessageBlockDelta,
|
||||||
|
MessageDelta,
|
||||||
|
MessageStartBlock,
|
||||||
|
UsageDelta,
|
||||||
|
)
|
||||||
|
from litellm.types.llms.openai import (
|
||||||
|
AllMessageValues,
|
||||||
|
ChatCompletionAssistantMessage,
|
||||||
|
ChatCompletionAssistantToolCall,
|
||||||
|
ChatCompletionImageObject,
|
||||||
|
ChatCompletionImageUrlObject,
|
||||||
|
ChatCompletionRequest,
|
||||||
|
ChatCompletionResponseMessage,
|
||||||
|
ChatCompletionSystemMessage,
|
||||||
|
ChatCompletionTextObject,
|
||||||
|
ChatCompletionToolCallChunk,
|
||||||
|
ChatCompletionToolCallFunctionChunk,
|
||||||
|
ChatCompletionToolChoiceFunctionParam,
|
||||||
|
ChatCompletionToolChoiceObjectParam,
|
||||||
|
ChatCompletionToolChoiceValues,
|
||||||
|
ChatCompletionToolMessage,
|
||||||
|
ChatCompletionToolParam,
|
||||||
|
ChatCompletionToolParamFunctionChunk,
|
||||||
|
ChatCompletionUsageBlock,
|
||||||
|
ChatCompletionUserMessage,
|
||||||
|
OpenAIMessageContent,
|
||||||
|
)
|
||||||
|
from litellm.types.utils import Choices, GenericStreamingChunk
|
||||||
|
from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
|
||||||
|
|
||||||
|
from ...base import BaseLLM
|
||||||
|
from ...prompt_templates.factory import (
|
||||||
|
anthropic_messages_pt,
|
||||||
|
custom_prompt,
|
||||||
|
prompt_factory,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AnthropicExperimentalPassThroughConfig:
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
### FOR [BETA] `/v1/messages` endpoint support
|
||||||
|
|
||||||
|
def translatable_anthropic_params(self) -> List:
|
||||||
|
"""
|
||||||
|
Which anthropic params, we need to translate to the openai format.
|
||||||
|
"""
|
||||||
|
return ["messages", "metadata", "system", "tool_choice", "tools"]
|
||||||
|
|
||||||
|
def translate_anthropic_messages_to_openai(
|
||||||
|
self,
|
||||||
|
messages: List[
|
||||||
|
Union[
|
||||||
|
AnthropicMessagesUserMessageParam,
|
||||||
|
AnthopicMessagesAssistantMessageParam,
|
||||||
|
]
|
||||||
|
],
|
||||||
|
) -> List:
|
||||||
|
new_messages: List[AllMessageValues] = []
|
||||||
|
for m in messages:
|
||||||
|
user_message: Optional[ChatCompletionUserMessage] = None
|
||||||
|
tool_message_list: List[ChatCompletionToolMessage] = []
|
||||||
|
new_user_content_list: List[
|
||||||
|
Union[ChatCompletionTextObject, ChatCompletionImageObject]
|
||||||
|
] = []
|
||||||
|
## USER MESSAGE ##
|
||||||
|
if m["role"] == "user":
|
||||||
|
## translate user message
|
||||||
|
message_content = m.get("content")
|
||||||
|
if message_content and isinstance(message_content, str):
|
||||||
|
user_message = ChatCompletionUserMessage(
|
||||||
|
role="user", content=message_content
|
||||||
|
)
|
||||||
|
elif message_content and isinstance(message_content, list):
|
||||||
|
for content in message_content:
|
||||||
|
if content["type"] == "text":
|
||||||
|
text_obj = ChatCompletionTextObject(
|
||||||
|
type="text", text=content["text"]
|
||||||
|
)
|
||||||
|
new_user_content_list.append(text_obj)
|
||||||
|
elif content["type"] == "image":
|
||||||
|
image_url = ChatCompletionImageUrlObject(
|
||||||
|
url=f"data:{content['type']};base64,{content['source']}"
|
||||||
|
)
|
||||||
|
image_obj = ChatCompletionImageObject(
|
||||||
|
type="image_url", image_url=image_url
|
||||||
|
)
|
||||||
|
|
||||||
|
new_user_content_list.append(image_obj)
|
||||||
|
elif content["type"] == "tool_result":
|
||||||
|
if "content" not in content:
|
||||||
|
tool_result = ChatCompletionToolMessage(
|
||||||
|
role="tool",
|
||||||
|
tool_call_id=content["tool_use_id"],
|
||||||
|
content="",
|
||||||
|
)
|
||||||
|
tool_message_list.append(tool_result)
|
||||||
|
elif isinstance(content["content"], str):
|
||||||
|
tool_result = ChatCompletionToolMessage(
|
||||||
|
role="tool",
|
||||||
|
tool_call_id=content["tool_use_id"],
|
||||||
|
content=content["content"],
|
||||||
|
)
|
||||||
|
tool_message_list.append(tool_result)
|
||||||
|
elif isinstance(content["content"], list):
|
||||||
|
for c in content["content"]:
|
||||||
|
if c["type"] == "text":
|
||||||
|
tool_result = ChatCompletionToolMessage(
|
||||||
|
role="tool",
|
||||||
|
tool_call_id=content["tool_use_id"],
|
||||||
|
content=c["text"],
|
||||||
|
)
|
||||||
|
tool_message_list.append(tool_result)
|
||||||
|
elif c["type"] == "image":
|
||||||
|
image_str = (
|
||||||
|
f"data:{c['type']};base64,{c['source']}"
|
||||||
|
)
|
||||||
|
tool_result = ChatCompletionToolMessage(
|
||||||
|
role="tool",
|
||||||
|
tool_call_id=content["tool_use_id"],
|
||||||
|
content=image_str,
|
||||||
|
)
|
||||||
|
tool_message_list.append(tool_result)
|
||||||
|
|
||||||
|
if user_message is not None:
|
||||||
|
new_messages.append(user_message)
|
||||||
|
|
||||||
|
if len(new_user_content_list) > 0:
|
||||||
|
new_messages.append({"role": "user", "content": new_user_content_list}) # type: ignore
|
||||||
|
|
||||||
|
if len(tool_message_list) > 0:
|
||||||
|
new_messages.extend(tool_message_list)
|
||||||
|
|
||||||
|
## ASSISTANT MESSAGE ##
|
||||||
|
assistant_message_str: Optional[str] = None
|
||||||
|
tool_calls: List[ChatCompletionAssistantToolCall] = []
|
||||||
|
if m["role"] == "assistant":
|
||||||
|
if isinstance(m["content"], str):
|
||||||
|
assistant_message_str = m["content"]
|
||||||
|
elif isinstance(m["content"], list):
|
||||||
|
for content in m["content"]:
|
||||||
|
if content["type"] == "text":
|
||||||
|
if assistant_message_str is None:
|
||||||
|
assistant_message_str = content["text"]
|
||||||
|
else:
|
||||||
|
assistant_message_str += content["text"]
|
||||||
|
elif content["type"] == "tool_use":
|
||||||
|
function_chunk = ChatCompletionToolCallFunctionChunk(
|
||||||
|
name=content["name"],
|
||||||
|
arguments=json.dumps(content["input"]),
|
||||||
|
)
|
||||||
|
|
||||||
|
tool_calls.append(
|
||||||
|
ChatCompletionAssistantToolCall(
|
||||||
|
id=content["id"],
|
||||||
|
type="function",
|
||||||
|
function=function_chunk,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if assistant_message_str is not None or len(tool_calls) > 0:
|
||||||
|
assistant_message = ChatCompletionAssistantMessage(
|
||||||
|
role="assistant",
|
||||||
|
content=assistant_message_str,
|
||||||
|
)
|
||||||
|
if len(tool_calls) > 0:
|
||||||
|
assistant_message["tool_calls"] = tool_calls
|
||||||
|
new_messages.append(assistant_message)
|
||||||
|
|
||||||
|
return new_messages
|
||||||
|
|
||||||
|
def translate_anthropic_tool_choice_to_openai(
|
||||||
|
self, tool_choice: AnthropicMessagesToolChoice
|
||||||
|
) -> ChatCompletionToolChoiceValues:
|
||||||
|
if tool_choice["type"] == "any":
|
||||||
|
return "required"
|
||||||
|
elif tool_choice["type"] == "auto":
|
||||||
|
return "auto"
|
||||||
|
elif tool_choice["type"] == "tool":
|
||||||
|
tc_function_param = ChatCompletionToolChoiceFunctionParam(
|
||||||
|
name=tool_choice.get("name", "")
|
||||||
|
)
|
||||||
|
return ChatCompletionToolChoiceObjectParam(
|
||||||
|
type="function", function=tc_function_param
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
"Incompatible tool choice param submitted - {}".format(tool_choice)
|
||||||
|
)
|
||||||
|
|
||||||
|
def translate_anthropic_tools_to_openai(
|
||||||
|
self, tools: List[AnthropicMessagesTool]
|
||||||
|
) -> List[ChatCompletionToolParam]:
|
||||||
|
new_tools: List[ChatCompletionToolParam] = []
|
||||||
|
for tool in tools:
|
||||||
|
function_chunk = ChatCompletionToolParamFunctionChunk(
|
||||||
|
name=tool["name"],
|
||||||
|
parameters=tool["input_schema"],
|
||||||
|
)
|
||||||
|
if "description" in tool:
|
||||||
|
function_chunk["description"] = tool["description"]
|
||||||
|
new_tools.append(
|
||||||
|
ChatCompletionToolParam(type="function", function=function_chunk)
|
||||||
|
)
|
||||||
|
|
||||||
|
return new_tools
|
||||||
|
|
||||||
|
def translate_anthropic_to_openai(
|
||||||
|
self, anthropic_message_request: AnthropicMessagesRequest
|
||||||
|
) -> ChatCompletionRequest:
|
||||||
|
"""
|
||||||
|
This is used by the beta Anthropic Adapter, for translating anthropic `/v1/messages` requests to the openai format.
|
||||||
|
"""
|
||||||
|
new_messages: List[AllMessageValues] = []
|
||||||
|
|
||||||
|
## CONVERT ANTHROPIC MESSAGES TO OPENAI
|
||||||
|
new_messages = self.translate_anthropic_messages_to_openai(
|
||||||
|
messages=anthropic_message_request["messages"]
|
||||||
|
)
|
||||||
|
## ADD SYSTEM MESSAGE TO MESSAGES
|
||||||
|
if "system" in anthropic_message_request:
|
||||||
|
new_messages.insert(
|
||||||
|
0,
|
||||||
|
ChatCompletionSystemMessage(
|
||||||
|
role="system", content=anthropic_message_request["system"]
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
new_kwargs: ChatCompletionRequest = {
|
||||||
|
"model": anthropic_message_request["model"],
|
||||||
|
"messages": new_messages,
|
||||||
|
}
|
||||||
|
## CONVERT METADATA (user_id)
|
||||||
|
if "metadata" in anthropic_message_request:
|
||||||
|
if "user_id" in anthropic_message_request["metadata"]:
|
||||||
|
new_kwargs["user"] = anthropic_message_request["metadata"]["user_id"]
|
||||||
|
|
||||||
|
# Pass litellm proxy specific metadata
|
||||||
|
if "litellm_metadata" in anthropic_message_request:
|
||||||
|
# metadata will be passed to litellm.acompletion(), it's a litellm_param
|
||||||
|
new_kwargs["metadata"] = anthropic_message_request.pop("litellm_metadata")
|
||||||
|
|
||||||
|
## CONVERT TOOL CHOICE
|
||||||
|
if "tool_choice" in anthropic_message_request:
|
||||||
|
new_kwargs["tool_choice"] = self.translate_anthropic_tool_choice_to_openai(
|
||||||
|
tool_choice=anthropic_message_request["tool_choice"]
|
||||||
|
)
|
||||||
|
## CONVERT TOOLS
|
||||||
|
if "tools" in anthropic_message_request:
|
||||||
|
new_kwargs["tools"] = self.translate_anthropic_tools_to_openai(
|
||||||
|
tools=anthropic_message_request["tools"]
|
||||||
|
)
|
||||||
|
|
||||||
|
translatable_params = self.translatable_anthropic_params()
|
||||||
|
for k, v in anthropic_message_request.items():
|
||||||
|
if k not in translatable_params: # pass remaining params as is
|
||||||
|
new_kwargs[k] = v # type: ignore
|
||||||
|
|
||||||
|
return new_kwargs
|
||||||
|
|
||||||
|
def _translate_openai_content_to_anthropic(
|
||||||
|
self, choices: List[Choices]
|
||||||
|
) -> List[
|
||||||
|
Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse]
|
||||||
|
]:
|
||||||
|
new_content: List[
|
||||||
|
Union[
|
||||||
|
AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse
|
||||||
|
]
|
||||||
|
] = []
|
||||||
|
for choice in choices:
|
||||||
|
if (
|
||||||
|
choice.message.tool_calls is not None
|
||||||
|
and len(choice.message.tool_calls) > 0
|
||||||
|
):
|
||||||
|
for tool_call in choice.message.tool_calls:
|
||||||
|
new_content.append(
|
||||||
|
AnthropicResponseContentBlockToolUse(
|
||||||
|
type="tool_use",
|
||||||
|
id=tool_call.id,
|
||||||
|
name=tool_call.function.name or "",
|
||||||
|
input=json.loads(tool_call.function.arguments),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
elif choice.message.content is not None:
|
||||||
|
new_content.append(
|
||||||
|
AnthropicResponseContentBlockText(
|
||||||
|
type="text", text=choice.message.content
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return new_content
|
||||||
|
|
||||||
|
def _translate_openai_finish_reason_to_anthropic(
|
||||||
|
self, openai_finish_reason: str
|
||||||
|
) -> AnthropicFinishReason:
|
||||||
|
if openai_finish_reason == "stop":
|
||||||
|
return "end_turn"
|
||||||
|
elif openai_finish_reason == "length":
|
||||||
|
return "max_tokens"
|
||||||
|
elif openai_finish_reason == "tool_calls":
|
||||||
|
return "tool_use"
|
||||||
|
return "end_turn"
|
||||||
|
|
||||||
|
def translate_openai_response_to_anthropic(
|
||||||
|
self, response: litellm.ModelResponse
|
||||||
|
) -> AnthropicResponse:
|
||||||
|
## translate content block
|
||||||
|
anthropic_content = self._translate_openai_content_to_anthropic(choices=response.choices) # type: ignore
|
||||||
|
## extract finish reason
|
||||||
|
anthropic_finish_reason = self._translate_openai_finish_reason_to_anthropic(
|
||||||
|
openai_finish_reason=response.choices[0].finish_reason # type: ignore
|
||||||
|
)
|
||||||
|
# extract usage
|
||||||
|
usage: litellm.Usage = getattr(response, "usage")
|
||||||
|
anthropic_usage = AnthropicResponseUsageBlock(
|
||||||
|
input_tokens=usage.prompt_tokens or 0,
|
||||||
|
output_tokens=usage.completion_tokens or 0,
|
||||||
|
)
|
||||||
|
translated_obj = AnthropicResponse(
|
||||||
|
id=response.id,
|
||||||
|
type="message",
|
||||||
|
role="assistant",
|
||||||
|
model=response.model or "unknown-model",
|
||||||
|
stop_sequence=None,
|
||||||
|
usage=anthropic_usage,
|
||||||
|
content=anthropic_content,
|
||||||
|
stop_reason=anthropic_finish_reason,
|
||||||
|
)
|
||||||
|
|
||||||
|
return translated_obj
|
||||||
|
|
||||||
|
def _translate_streaming_openai_chunk_to_anthropic(
|
||||||
|
self, choices: List[OpenAIStreamingChoice]
|
||||||
|
) -> Tuple[
|
||||||
|
Literal["text_delta", "input_json_delta"],
|
||||||
|
Union[ContentTextBlockDelta, ContentJsonBlockDelta],
|
||||||
|
]:
|
||||||
|
text: str = ""
|
||||||
|
partial_json: Optional[str] = None
|
||||||
|
for choice in choices:
|
||||||
|
if choice.delta.content is not None:
|
||||||
|
text += choice.delta.content
|
||||||
|
elif choice.delta.tool_calls is not None:
|
||||||
|
partial_json = ""
|
||||||
|
for tool in choice.delta.tool_calls:
|
||||||
|
if (
|
||||||
|
tool.function is not None
|
||||||
|
and tool.function.arguments is not None
|
||||||
|
):
|
||||||
|
partial_json += tool.function.arguments
|
||||||
|
|
||||||
|
if partial_json is not None:
|
||||||
|
return "input_json_delta", ContentJsonBlockDelta(
|
||||||
|
type="input_json_delta", partial_json=partial_json
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return "text_delta", ContentTextBlockDelta(type="text_delta", text=text)
|
||||||
|
|
||||||
|
def translate_streaming_openai_response_to_anthropic(
|
||||||
|
self, response: litellm.ModelResponse
|
||||||
|
) -> Union[ContentBlockDelta, MessageBlockDelta]:
|
||||||
|
## base case - final chunk w/ finish reason
|
||||||
|
if response.choices[0].finish_reason is not None:
|
||||||
|
delta = MessageDelta(
|
||||||
|
stop_reason=self._translate_openai_finish_reason_to_anthropic(
|
||||||
|
response.choices[0].finish_reason
|
||||||
|
),
|
||||||
|
)
|
||||||
|
if getattr(response, "usage", None) is not None:
|
||||||
|
litellm_usage_chunk: Optional[litellm.Usage] = response.usage # type: ignore
|
||||||
|
elif (
|
||||||
|
hasattr(response, "_hidden_params")
|
||||||
|
and "usage" in response._hidden_params
|
||||||
|
):
|
||||||
|
litellm_usage_chunk = response._hidden_params["usage"]
|
||||||
|
else:
|
||||||
|
litellm_usage_chunk = None
|
||||||
|
if litellm_usage_chunk is not None:
|
||||||
|
usage_delta = UsageDelta(
|
||||||
|
input_tokens=litellm_usage_chunk.prompt_tokens or 0,
|
||||||
|
output_tokens=litellm_usage_chunk.completion_tokens or 0,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
usage_delta = UsageDelta(input_tokens=0, output_tokens=0)
|
||||||
|
return MessageBlockDelta(
|
||||||
|
type="message_delta", delta=delta, usage=usage_delta
|
||||||
|
)
|
||||||
|
(
|
||||||
|
type_of_content,
|
||||||
|
content_block_delta,
|
||||||
|
) = self._translate_streaming_openai_chunk_to_anthropic(
|
||||||
|
choices=response.choices # type: ignore
|
||||||
|
)
|
||||||
|
return ContentBlockDelta(
|
||||||
|
type="content_block_delta",
|
||||||
|
index=response.choices[0].index,
|
||||||
|
delta=content_block_delta,
|
||||||
|
)
|
|
@ -22,7 +22,7 @@ from litellm.types.llms.openai import (
|
||||||
ChatCompletionToolParamFunctionChunk,
|
ChatCompletionToolParamFunctionChunk,
|
||||||
)
|
)
|
||||||
from litellm.types.utils import ModelResponse, Usage
|
from litellm.types.utils import ModelResponse, Usage
|
||||||
from litellm.utils import CustomStreamWrapper
|
from litellm.utils import CustomStreamWrapper, has_tool_call_blocks
|
||||||
|
|
||||||
from ...prompt_templates.factory import _bedrock_converse_messages_pt, _bedrock_tools_pt
|
from ...prompt_templates.factory import _bedrock_converse_messages_pt, _bedrock_tools_pt
|
||||||
from ..common_utils import BedrockError, get_bedrock_tool_name
|
from ..common_utils import BedrockError, get_bedrock_tool_name
|
||||||
|
@ -136,6 +136,7 @@ class AmazonConverseConfig:
|
||||||
non_default_params: dict,
|
non_default_params: dict,
|
||||||
optional_params: dict,
|
optional_params: dict,
|
||||||
drop_params: bool,
|
drop_params: bool,
|
||||||
|
messages: Optional[List[AllMessageValues]] = None,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
for param, value in non_default_params.items():
|
for param, value in non_default_params.items():
|
||||||
if param == "response_format":
|
if param == "response_format":
|
||||||
|
@ -202,6 +203,21 @@ class AmazonConverseConfig:
|
||||||
)
|
)
|
||||||
if _tool_choice_value is not None:
|
if _tool_choice_value is not None:
|
||||||
optional_params["tool_choice"] = _tool_choice_value
|
optional_params["tool_choice"] = _tool_choice_value
|
||||||
|
|
||||||
|
## VALIDATE REQUEST
|
||||||
|
"""
|
||||||
|
Bedrock doesn't support tool calling without `tools=` param specified.
|
||||||
|
"""
|
||||||
|
if (
|
||||||
|
"tools" not in non_default_params
|
||||||
|
and messages is not None
|
||||||
|
and has_tool_call_blocks(messages)
|
||||||
|
):
|
||||||
|
raise litellm.UnsupportedParamsError(
|
||||||
|
message="Anthropic doesn't support tool calling without `tools=` param specified. Pass `tools=` param to enable tool calling.",
|
||||||
|
model="",
|
||||||
|
llm_provider="anthropic",
|
||||||
|
)
|
||||||
return optional_params
|
return optional_params
|
||||||
|
|
||||||
def _transform_request(
|
def _transform_request(
|
||||||
|
|
60
litellm/llms/groq/chat/handler.py
Normal file
60
litellm/llms/groq/chat/handler.py
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
"""
|
||||||
|
Handles the chat completion request for groq
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Any, Callable, Optional, Union
|
||||||
|
|
||||||
|
from httpx._config import Timeout
|
||||||
|
|
||||||
|
from litellm.utils import ModelResponse
|
||||||
|
|
||||||
|
from ...groq.chat.transformation import GroqChatConfig
|
||||||
|
from ...OpenAI.openai import OpenAIChatCompletion
|
||||||
|
|
||||||
|
|
||||||
|
class GroqChatCompletion(OpenAIChatCompletion):
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super().__init__(**kwargs)
|
||||||
|
|
||||||
|
def completion(
|
||||||
|
self,
|
||||||
|
model_response: ModelResponse,
|
||||||
|
timeout: Union[float, Timeout],
|
||||||
|
optional_params: dict,
|
||||||
|
logging_obj: Any,
|
||||||
|
model: Optional[str] = None,
|
||||||
|
messages: Optional[list] = None,
|
||||||
|
print_verbose: Optional[Callable[..., Any]] = None,
|
||||||
|
api_key: Optional[str] = None,
|
||||||
|
api_base: Optional[str] = None,
|
||||||
|
acompletion: bool = False,
|
||||||
|
litellm_params=None,
|
||||||
|
logger_fn=None,
|
||||||
|
headers: Optional[dict] = None,
|
||||||
|
custom_prompt_dict: dict = {},
|
||||||
|
client=None,
|
||||||
|
organization: Optional[str] = None,
|
||||||
|
custom_llm_provider: Optional[str] = None,
|
||||||
|
drop_params: Optional[bool] = None,
|
||||||
|
):
|
||||||
|
messages = GroqChatConfig()._transform_messages(messages) # type: ignore
|
||||||
|
return super().completion(
|
||||||
|
model_response,
|
||||||
|
timeout,
|
||||||
|
optional_params,
|
||||||
|
logging_obj,
|
||||||
|
model,
|
||||||
|
messages,
|
||||||
|
print_verbose,
|
||||||
|
api_key,
|
||||||
|
api_base,
|
||||||
|
acompletion,
|
||||||
|
litellm_params,
|
||||||
|
logger_fn,
|
||||||
|
headers,
|
||||||
|
custom_prompt_dict,
|
||||||
|
client,
|
||||||
|
organization,
|
||||||
|
custom_llm_provider,
|
||||||
|
drop_params,
|
||||||
|
)
|
88
litellm/llms/groq/chat/transformation.py
Normal file
88
litellm/llms/groq/chat/transformation.py
Normal file
|
@ -0,0 +1,88 @@
|
||||||
|
"""
|
||||||
|
Translate from OpenAI's `/v1/chat/completions` to Groq's `/v1/chat/completions`
|
||||||
|
"""
|
||||||
|
|
||||||
|
import types
|
||||||
|
from typing import List, Optional, Union
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm.types.llms.openai import AllMessageValues, ChatCompletionAssistantMessage
|
||||||
|
|
||||||
|
from ...OpenAI.chat.gpt_transformation import OpenAIGPTConfig
|
||||||
|
|
||||||
|
|
||||||
|
class GroqChatConfig(OpenAIGPTConfig):
|
||||||
|
|
||||||
|
frequency_penalty: Optional[int] = None
|
||||||
|
function_call: Optional[Union[str, dict]] = None
|
||||||
|
functions: Optional[list] = None
|
||||||
|
logit_bias: Optional[dict] = None
|
||||||
|
max_tokens: Optional[int] = None
|
||||||
|
n: Optional[int] = None
|
||||||
|
presence_penalty: Optional[int] = None
|
||||||
|
stop: Optional[Union[str, list]] = None
|
||||||
|
temperature: Optional[int] = None
|
||||||
|
top_p: Optional[int] = None
|
||||||
|
response_format: Optional[dict] = None
|
||||||
|
tools: Optional[list] = None
|
||||||
|
tool_choice: Optional[Union[str, dict]] = None
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
frequency_penalty: Optional[int] = None,
|
||||||
|
function_call: Optional[Union[str, dict]] = None,
|
||||||
|
functions: Optional[list] = None,
|
||||||
|
logit_bias: Optional[dict] = None,
|
||||||
|
max_tokens: Optional[int] = None,
|
||||||
|
n: Optional[int] = None,
|
||||||
|
presence_penalty: Optional[int] = None,
|
||||||
|
stop: Optional[Union[str, list]] = None,
|
||||||
|
temperature: Optional[int] = None,
|
||||||
|
top_p: Optional[int] = None,
|
||||||
|
response_format: Optional[dict] = None,
|
||||||
|
tools: Optional[list] = None,
|
||||||
|
tool_choice: Optional[Union[str, dict]] = None,
|
||||||
|
) -> None:
|
||||||
|
locals_ = locals().copy()
|
||||||
|
for key, value in locals_.items():
|
||||||
|
if key != "self" and value is not None:
|
||||||
|
setattr(self.__class__, key, value)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_config(cls):
|
||||||
|
return {
|
||||||
|
k: v
|
||||||
|
for k, v in cls.__dict__.items()
|
||||||
|
if not k.startswith("__")
|
||||||
|
and not isinstance(
|
||||||
|
v,
|
||||||
|
(
|
||||||
|
types.FunctionType,
|
||||||
|
types.BuiltinFunctionType,
|
||||||
|
classmethod,
|
||||||
|
staticmethod,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
and v is not None
|
||||||
|
}
|
||||||
|
|
||||||
|
def _transform_messages(self, messages: List[AllMessageValues]) -> List:
|
||||||
|
for idx, message in enumerate(messages):
|
||||||
|
"""
|
||||||
|
1. Don't pass 'null' function_call assistant message to groq - https://github.com/BerriAI/litellm/issues/5839
|
||||||
|
"""
|
||||||
|
if isinstance(message, BaseModel):
|
||||||
|
_message = message.model_dump()
|
||||||
|
else:
|
||||||
|
_message = message
|
||||||
|
assistant_message = _message.get("role") == "assistant"
|
||||||
|
if assistant_message:
|
||||||
|
new_message = ChatCompletionAssistantMessage(role="assistant")
|
||||||
|
for k, v in _message.items():
|
||||||
|
if v is not None:
|
||||||
|
new_message[k] = v # type: ignore
|
||||||
|
messages[idx] = new_message
|
||||||
|
|
||||||
|
return messages
|
101
litellm/llms/groq/stt/transformation.py
Normal file
101
litellm/llms/groq/stt/transformation.py
Normal file
|
@ -0,0 +1,101 @@
|
||||||
|
"""
|
||||||
|
Translate from OpenAI's `/v1/audio/transcriptions` to Groq's `/v1/audio/transcriptions`
|
||||||
|
"""
|
||||||
|
|
||||||
|
import types
|
||||||
|
from typing import List, Optional, Union
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
|
||||||
|
class GroqSTTConfig:
|
||||||
|
|
||||||
|
frequency_penalty: Optional[int] = None
|
||||||
|
function_call: Optional[Union[str, dict]] = None
|
||||||
|
functions: Optional[list] = None
|
||||||
|
logit_bias: Optional[dict] = None
|
||||||
|
max_tokens: Optional[int] = None
|
||||||
|
n: Optional[int] = None
|
||||||
|
presence_penalty: Optional[int] = None
|
||||||
|
stop: Optional[Union[str, list]] = None
|
||||||
|
temperature: Optional[int] = None
|
||||||
|
top_p: Optional[int] = None
|
||||||
|
response_format: Optional[dict] = None
|
||||||
|
tools: Optional[list] = None
|
||||||
|
tool_choice: Optional[Union[str, dict]] = None
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
frequency_penalty: Optional[int] = None,
|
||||||
|
function_call: Optional[Union[str, dict]] = None,
|
||||||
|
functions: Optional[list] = None,
|
||||||
|
logit_bias: Optional[dict] = None,
|
||||||
|
max_tokens: Optional[int] = None,
|
||||||
|
n: Optional[int] = None,
|
||||||
|
presence_penalty: Optional[int] = None,
|
||||||
|
stop: Optional[Union[str, list]] = None,
|
||||||
|
temperature: Optional[int] = None,
|
||||||
|
top_p: Optional[int] = None,
|
||||||
|
response_format: Optional[dict] = None,
|
||||||
|
tools: Optional[list] = None,
|
||||||
|
tool_choice: Optional[Union[str, dict]] = None,
|
||||||
|
) -> None:
|
||||||
|
locals_ = locals().copy()
|
||||||
|
for key, value in locals_.items():
|
||||||
|
if key != "self" and value is not None:
|
||||||
|
setattr(self.__class__, key, value)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_config(cls):
|
||||||
|
return {
|
||||||
|
k: v
|
||||||
|
for k, v in cls.__dict__.items()
|
||||||
|
if not k.startswith("__")
|
||||||
|
and not isinstance(
|
||||||
|
v,
|
||||||
|
(
|
||||||
|
types.FunctionType,
|
||||||
|
types.BuiltinFunctionType,
|
||||||
|
classmethod,
|
||||||
|
staticmethod,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
and v is not None
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_supported_openai_params_stt(self):
|
||||||
|
return [
|
||||||
|
"prompt",
|
||||||
|
"response_format",
|
||||||
|
"temperature",
|
||||||
|
"language",
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_supported_openai_response_formats_stt(self) -> List[str]:
|
||||||
|
return ["json", "verbose_json", "text"]
|
||||||
|
|
||||||
|
def map_openai_params_stt(
|
||||||
|
self,
|
||||||
|
non_default_params: dict,
|
||||||
|
optional_params: dict,
|
||||||
|
model: str,
|
||||||
|
drop_params: bool,
|
||||||
|
) -> dict:
|
||||||
|
response_formats = self.get_supported_openai_response_formats_stt()
|
||||||
|
for param, value in non_default_params.items():
|
||||||
|
if param == "response_format":
|
||||||
|
if value in response_formats:
|
||||||
|
optional_params[param] = value
|
||||||
|
else:
|
||||||
|
if litellm.drop_params is True or drop_params is True:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise litellm.utils.UnsupportedParamsError(
|
||||||
|
message="Groq doesn't support response_format={}. To drop unsupported openai params from the call, set `litellm.drop_params = True`".format(
|
||||||
|
value
|
||||||
|
),
|
||||||
|
status_code=400,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
optional_params[param] = value
|
||||||
|
return optional_params
|
|
@ -276,7 +276,7 @@ def completion(
|
||||||
|
|
||||||
from anthropic import AnthropicVertex
|
from anthropic import AnthropicVertex
|
||||||
|
|
||||||
from litellm.llms.anthropic.chat import AnthropicChatCompletion
|
from litellm.llms.anthropic.chat.handler import AnthropicChatCompletion
|
||||||
from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import (
|
from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import (
|
||||||
VertexLLM,
|
VertexLLM,
|
||||||
)
|
)
|
||||||
|
@ -367,7 +367,7 @@ async def async_completion(
|
||||||
|
|
||||||
if client is None:
|
if client is None:
|
||||||
vertex_ai_client = AsyncAnthropicVertex(
|
vertex_ai_client = AsyncAnthropicVertex(
|
||||||
project_id=vertex_project, region=vertex_location, access_token=access_token
|
project_id=vertex_project, region=vertex_location, access_token=access_token # type: ignore
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
vertex_ai_client = client
|
vertex_ai_client = client
|
||||||
|
@ -438,7 +438,7 @@ async def async_streaming(
|
||||||
|
|
||||||
if client is None:
|
if client is None:
|
||||||
vertex_ai_client = AsyncAnthropicVertex(
|
vertex_ai_client = AsyncAnthropicVertex(
|
||||||
project_id=vertex_project, region=vertex_location, access_token=access_token
|
project_id=vertex_project, region=vertex_location, access_token=access_token # type: ignore
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
vertex_ai_client = client
|
vertex_ai_client = client
|
||||||
|
|
|
@ -96,6 +96,7 @@ from .llms.cohere import completion as cohere_completion # type: ignore
|
||||||
from .llms.cohere import embed as cohere_embed
|
from .llms.cohere import embed as cohere_embed
|
||||||
from .llms.custom_llm import CustomLLM, custom_chat_llm_router
|
from .llms.custom_llm import CustomLLM, custom_chat_llm_router
|
||||||
from .llms.databricks.chat import DatabricksChatCompletion
|
from .llms.databricks.chat import DatabricksChatCompletion
|
||||||
|
from .llms.groq.chat.handler import GroqChatCompletion
|
||||||
from .llms.huggingface_restapi import Huggingface
|
from .llms.huggingface_restapi import Huggingface
|
||||||
from .llms.OpenAI.audio_transcriptions import OpenAIAudioTranscription
|
from .llms.OpenAI.audio_transcriptions import OpenAIAudioTranscription
|
||||||
from .llms.OpenAI.chat.o1_handler import OpenAIO1ChatCompletion
|
from .llms.OpenAI.chat.o1_handler import OpenAIO1ChatCompletion
|
||||||
|
@ -168,6 +169,7 @@ openai_text_completions = OpenAITextCompletion()
|
||||||
openai_o1_chat_completions = OpenAIO1ChatCompletion()
|
openai_o1_chat_completions = OpenAIO1ChatCompletion()
|
||||||
openai_audio_transcriptions = OpenAIAudioTranscription()
|
openai_audio_transcriptions = OpenAIAudioTranscription()
|
||||||
databricks_chat_completions = DatabricksChatCompletion()
|
databricks_chat_completions = DatabricksChatCompletion()
|
||||||
|
groq_chat_completions = GroqChatCompletion()
|
||||||
azure_ai_chat_completions = AzureAIChatCompletion()
|
azure_ai_chat_completions = AzureAIChatCompletion()
|
||||||
azure_ai_embedding = AzureAIEmbedding()
|
azure_ai_embedding = AzureAIEmbedding()
|
||||||
anthropic_chat_completions = AnthropicChatCompletion()
|
anthropic_chat_completions = AnthropicChatCompletion()
|
||||||
|
@ -958,6 +960,7 @@ def completion(
|
||||||
extra_headers=extra_headers,
|
extra_headers=extra_headers,
|
||||||
api_version=api_version,
|
api_version=api_version,
|
||||||
parallel_tool_calls=parallel_tool_calls,
|
parallel_tool_calls=parallel_tool_calls,
|
||||||
|
messages=messages,
|
||||||
**non_default_params,
|
**non_default_params,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -1318,13 +1321,56 @@ def completion(
|
||||||
additional_args={"headers": headers},
|
additional_args={"headers": headers},
|
||||||
)
|
)
|
||||||
response = _response
|
response = _response
|
||||||
|
elif custom_llm_provider == "groq":
|
||||||
|
api_base = (
|
||||||
|
api_base # for deepinfra/perplexity/anyscale/groq/friendliai we check in get_llm_provider and pass in the api base from there
|
||||||
|
or litellm.api_base
|
||||||
|
or get_secret("GROQ_API_BASE")
|
||||||
|
or "https://api.groq.com/openai/v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
# set API KEY
|
||||||
|
api_key = (
|
||||||
|
api_key
|
||||||
|
or litellm.api_key # for deepinfra/perplexity/anyscale/friendliai we check in get_llm_provider and pass in the api key from there
|
||||||
|
or litellm.groq_key
|
||||||
|
or get_secret("GROQ_API_KEY")
|
||||||
|
)
|
||||||
|
|
||||||
|
headers = headers or litellm.headers
|
||||||
|
|
||||||
|
## LOAD CONFIG - if set
|
||||||
|
config = litellm.GroqChatConfig.get_config()
|
||||||
|
for k, v in config.items():
|
||||||
|
if (
|
||||||
|
k not in optional_params
|
||||||
|
): # completion(top_k=3) > openai_config(top_k=3) <- allows for dynamic variables to be passed in
|
||||||
|
optional_params[k] = v
|
||||||
|
|
||||||
|
response = groq_chat_completions.completion(
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
headers=headers,
|
||||||
|
model_response=model_response,
|
||||||
|
print_verbose=print_verbose,
|
||||||
|
api_key=api_key,
|
||||||
|
api_base=api_base,
|
||||||
|
acompletion=acompletion,
|
||||||
|
logging_obj=logging,
|
||||||
|
optional_params=optional_params,
|
||||||
|
litellm_params=litellm_params,
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
timeout=timeout, # type: ignore
|
||||||
|
custom_prompt_dict=custom_prompt_dict,
|
||||||
|
client=client, # pass AsyncOpenAI, OpenAI client
|
||||||
|
organization=organization,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
)
|
||||||
elif (
|
elif (
|
||||||
model in litellm.open_ai_chat_completion_models
|
model in litellm.open_ai_chat_completion_models
|
||||||
or custom_llm_provider == "custom_openai"
|
or custom_llm_provider == "custom_openai"
|
||||||
or custom_llm_provider == "deepinfra"
|
or custom_llm_provider == "deepinfra"
|
||||||
or custom_llm_provider == "perplexity"
|
or custom_llm_provider == "perplexity"
|
||||||
or custom_llm_provider == "groq"
|
|
||||||
or custom_llm_provider == "nvidia_nim"
|
or custom_llm_provider == "nvidia_nim"
|
||||||
or custom_llm_provider == "cerebras"
|
or custom_llm_provider == "cerebras"
|
||||||
or custom_llm_provider == "sambanova"
|
or custom_llm_provider == "sambanova"
|
||||||
|
@ -1431,6 +1477,7 @@ def completion(
|
||||||
original_response=response,
|
original_response=response,
|
||||||
additional_args={"headers": headers},
|
additional_args={"headers": headers},
|
||||||
)
|
)
|
||||||
|
|
||||||
elif (
|
elif (
|
||||||
"replicate" in model
|
"replicate" in model
|
||||||
or custom_llm_provider == "replicate"
|
or custom_llm_provider == "replicate"
|
||||||
|
@ -2933,6 +2980,7 @@ def batch_completion(
|
||||||
deployment_id=None,
|
deployment_id=None,
|
||||||
request_timeout: Optional[int] = None,
|
request_timeout: Optional[int] = None,
|
||||||
timeout: Optional[int] = 600,
|
timeout: Optional[int] = 600,
|
||||||
|
max_workers:Optional[int]= 100,
|
||||||
# Optional liteLLM function params
|
# Optional liteLLM function params
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
|
@ -2956,6 +3004,7 @@ def batch_completion(
|
||||||
user (str, optional): The user string for generating completions. Defaults to "".
|
user (str, optional): The user string for generating completions. Defaults to "".
|
||||||
deployment_id (optional): The deployment ID for generating completions. Defaults to None.
|
deployment_id (optional): The deployment ID for generating completions. Defaults to None.
|
||||||
request_timeout (int, optional): The request timeout for generating completions. Defaults to None.
|
request_timeout (int, optional): The request timeout for generating completions. Defaults to None.
|
||||||
|
max_workers (int,optional): The maximum number of threads to use for parallel processing.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
list: A list of completion results.
|
list: A list of completion results.
|
||||||
|
@ -3001,7 +3050,7 @@ def batch_completion(
|
||||||
for i in range(0, len(lst), n):
|
for i in range(0, len(lst), n):
|
||||||
yield lst[i : i + n]
|
yield lst[i : i + n]
|
||||||
|
|
||||||
with ThreadPoolExecutor(max_workers=100) as executor:
|
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||||
for sub_batch in chunks(batch_messages, 100):
|
for sub_batch in chunks(batch_messages, 100):
|
||||||
for message_list in sub_batch:
|
for message_list in sub_batch:
|
||||||
kwargs_modified = args.copy()
|
kwargs_modified = args.copy()
|
||||||
|
|
|
@ -1173,6 +1173,18 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_assistant_prefill": true
|
"supports_assistant_prefill": true
|
||||||
},
|
},
|
||||||
|
"mistral/pixtral-12b-2409": {
|
||||||
|
"max_tokens": 128000,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 128000,
|
||||||
|
"input_cost_per_token": 0.00000015,
|
||||||
|
"output_cost_per_token": 0.00000015,
|
||||||
|
"litellm_provider": "mistral",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_assistant_prefill": true,
|
||||||
|
"supports_vision": true
|
||||||
|
},
|
||||||
"mistral/open-mistral-7b": {
|
"mistral/open-mistral-7b": {
|
||||||
"max_tokens": 8191,
|
"max_tokens": 8191,
|
||||||
"max_input_tokens": 32000,
|
"max_input_tokens": 32000,
|
||||||
|
|
|
@ -760,7 +760,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
|
||||||
|
|
||||||
return _user_id_rate_limits.model_dump()
|
return _user_id_rate_limits.model_dump()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_proxy_logger.exception(
|
verbose_proxy_logger.debug(
|
||||||
"Parallel Request Limiter: Error getting user object", str(e)
|
"Parallel Request Limiter: Error getting user object", str(e)
|
||||||
)
|
)
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -389,6 +389,9 @@ async def add_litellm_data_to_request(
|
||||||
user_api_key_dict=user_api_key_dict,
|
user_api_key_dict=user_api_key_dict,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
verbose_proxy_logger.debug(
|
||||||
|
f"[PROXY]returned data from litellm_pre_call_utils: {data}"
|
||||||
|
)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1466,9 +1466,6 @@ class PrismaClient:
|
||||||
):
|
):
|
||||||
args_passed_in = locals()
|
args_passed_in = locals()
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
verbose_proxy_logger.debug(
|
|
||||||
f"PrismaClient: get_data - args_passed_in: {args_passed_in}"
|
|
||||||
)
|
|
||||||
hashed_token: Optional[str] = None
|
hashed_token: Optional[str] = None
|
||||||
try:
|
try:
|
||||||
response: Any = None
|
response: Any = None
|
||||||
|
|
|
@ -1224,3 +1224,14 @@ def test_langfuse_prompt_type(prompt):
|
||||||
_add_prompt_to_generation_params(
|
_add_prompt_to_generation_params(
|
||||||
generation_params=generation_params, clean_metadata=clean_metadata
|
generation_params=generation_params, clean_metadata=clean_metadata
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_langfuse_logging_metadata():
|
||||||
|
from litellm.integrations.langfuse import log_requester_metadata
|
||||||
|
|
||||||
|
metadata = {"key": "value", "requester_metadata": {"key": "value"}}
|
||||||
|
|
||||||
|
got_metadata = log_requester_metadata(clean_metadata=metadata)
|
||||||
|
expected_metadata = {"requester_metadata": {"key": "value"}}
|
||||||
|
|
||||||
|
assert expected_metadata == got_metadata
|
||||||
|
|
|
@ -61,6 +61,7 @@ async def test_litellm_anthropic_prompt_caching_tools():
|
||||||
}
|
}
|
||||||
|
|
||||||
mock_response.json = return_val
|
mock_response.json = return_val
|
||||||
|
mock_response.headers = {"key": "value"}
|
||||||
|
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
with patch(
|
with patch(
|
||||||
|
@ -466,6 +467,7 @@ async def test_litellm_anthropic_prompt_caching_system():
|
||||||
}
|
}
|
||||||
|
|
||||||
mock_response.json = return_val
|
mock_response.json = return_val
|
||||||
|
mock_response.headers = {"key": "value"}
|
||||||
|
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
with patch(
|
with patch(
|
||||||
|
|
|
@ -24,7 +24,7 @@ from litellm import RateLimitError, Timeout, completion, completion_cost, embedd
|
||||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||||
from litellm.llms.prompt_templates.factory import anthropic_messages_pt
|
from litellm.llms.prompt_templates.factory import anthropic_messages_pt
|
||||||
|
|
||||||
# litellm.num_retries = 3
|
# litellm.num_retries=3
|
||||||
|
|
||||||
litellm.cache = None
|
litellm.cache = None
|
||||||
litellm.success_callback = []
|
litellm.success_callback = []
|
||||||
|
|
|
@ -1173,7 +1173,12 @@ def test_turn_off_message_logging():
|
||||||
##### VALID JSON ######
|
##### VALID JSON ######
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("model", ["gpt-3.5-turbo", "azure/chatgpt-v-2"])
|
@pytest.mark.parametrize(
|
||||||
|
"model",
|
||||||
|
[
|
||||||
|
"ft:gpt-3.5-turbo:my-org:custom_suffix:id"
|
||||||
|
], # "gpt-3.5-turbo", "azure/chatgpt-v-2",
|
||||||
|
)
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"turn_off_message_logging",
|
"turn_off_message_logging",
|
||||||
[
|
[
|
||||||
|
@ -1200,7 +1205,7 @@ def test_standard_logging_payload(model, turn_off_message_logging):
|
||||||
_ = litellm.completion(
|
_ = litellm.completion(
|
||||||
model=model,
|
model=model,
|
||||||
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||||
# mock_response="Going well!",
|
mock_response="Going well!",
|
||||||
)
|
)
|
||||||
|
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
|
|
@ -7,6 +7,8 @@ from typing import Any
|
||||||
|
|
||||||
from openai import AuthenticationError, BadRequestError, OpenAIError, RateLimitError
|
from openai import AuthenticationError, BadRequestError, OpenAIError, RateLimitError
|
||||||
|
|
||||||
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||||
|
|
||||||
sys.path.insert(
|
sys.path.insert(
|
||||||
0, os.path.abspath("../..")
|
0, os.path.abspath("../..")
|
||||||
) # Adds the parent directory to the system path
|
) # Adds the parent directory to the system path
|
||||||
|
@ -884,6 +886,42 @@ def _pre_call_utils(
|
||||||
return data, original_function, mapped_target
|
return data, original_function, mapped_target
|
||||||
|
|
||||||
|
|
||||||
|
def _pre_call_utils_httpx(
|
||||||
|
call_type: str,
|
||||||
|
data: dict,
|
||||||
|
client: Union[HTTPHandler, AsyncHTTPHandler],
|
||||||
|
sync_mode: bool,
|
||||||
|
streaming: Optional[bool],
|
||||||
|
):
|
||||||
|
mapped_target: Any = client.client
|
||||||
|
if call_type == "embedding":
|
||||||
|
data["input"] = "Hello world!"
|
||||||
|
|
||||||
|
if sync_mode:
|
||||||
|
original_function = litellm.embedding
|
||||||
|
else:
|
||||||
|
original_function = litellm.aembedding
|
||||||
|
elif call_type == "chat_completion":
|
||||||
|
data["messages"] = [{"role": "user", "content": "Hello world"}]
|
||||||
|
if streaming is True:
|
||||||
|
data["stream"] = True
|
||||||
|
|
||||||
|
if sync_mode:
|
||||||
|
original_function = litellm.completion
|
||||||
|
else:
|
||||||
|
original_function = litellm.acompletion
|
||||||
|
elif call_type == "completion":
|
||||||
|
data["prompt"] = "Hello world"
|
||||||
|
if streaming is True:
|
||||||
|
data["stream"] = True
|
||||||
|
if sync_mode:
|
||||||
|
original_function = litellm.text_completion
|
||||||
|
else:
|
||||||
|
original_function = litellm.atext_completion
|
||||||
|
|
||||||
|
return data, original_function, mapped_target
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"sync_mode",
|
"sync_mode",
|
||||||
[True, False],
|
[True, False],
|
||||||
|
@ -1006,3 +1044,111 @@ async def test_exception_with_headers(sync_mode, provider, model, call_type, str
|
||||||
if exception_raised is False:
|
if exception_raised is False:
|
||||||
print(resp)
|
print(resp)
|
||||||
assert exception_raised
|
assert exception_raised
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"sync_mode",
|
||||||
|
[True, False],
|
||||||
|
)
|
||||||
|
@pytest.mark.parametrize("streaming", [True, False])
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"provider, model, call_type",
|
||||||
|
[
|
||||||
|
("anthropic", "claude-3-haiku-20240307", "chat_completion"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_exception_with_headers_httpx(
|
||||||
|
sync_mode, provider, model, call_type, streaming
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
User feedback: litellm says "No deployments available for selected model, Try again in 60 seconds"
|
||||||
|
but Azure says to retry in at most 9s
|
||||||
|
|
||||||
|
```
|
||||||
|
{"message": "litellm.proxy.proxy_server.embeddings(): Exception occured - No deployments available for selected model, Try again in 60 seconds. Passed model=text-embedding-ada-002. pre-call-checks=False, allowed_model_region=n/a, cooldown_list=[('b49cbc9314273db7181fe69b1b19993f04efb88f2c1819947c538bac08097e4c', {'Exception Received': 'litellm.RateLimitError: AzureException RateLimitError - Requests to the Embeddings_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 9 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.', 'Status Code': '429'})]", "level": "ERROR", "timestamp": "2024-08-22T03:25:36.900476"}
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
print(f"Received args: {locals()}")
|
||||||
|
import openai
|
||||||
|
|
||||||
|
if sync_mode:
|
||||||
|
client = HTTPHandler()
|
||||||
|
else:
|
||||||
|
client = AsyncHTTPHandler()
|
||||||
|
|
||||||
|
data = {"model": model}
|
||||||
|
data, original_function, mapped_target = _pre_call_utils_httpx(
|
||||||
|
call_type=call_type,
|
||||||
|
data=data,
|
||||||
|
client=client,
|
||||||
|
sync_mode=sync_mode,
|
||||||
|
streaming=streaming,
|
||||||
|
)
|
||||||
|
|
||||||
|
cooldown_time = 30.0
|
||||||
|
|
||||||
|
def _return_exception(*args, **kwargs):
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
from httpx import Headers, HTTPStatusError, Request, Response
|
||||||
|
|
||||||
|
# Create the Request object
|
||||||
|
request = Request("POST", "http://0.0.0.0:9000/chat/completions")
|
||||||
|
|
||||||
|
# Create the Response object with the necessary headers and status code
|
||||||
|
response = Response(
|
||||||
|
status_code=429,
|
||||||
|
headers=Headers(
|
||||||
|
{
|
||||||
|
"date": "Sat, 21 Sep 2024 22:56:53 GMT",
|
||||||
|
"server": "uvicorn",
|
||||||
|
"retry-after": "30",
|
||||||
|
"content-length": "30",
|
||||||
|
"content-type": "application/json",
|
||||||
|
}
|
||||||
|
),
|
||||||
|
request=request,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create and raise the HTTPStatusError exception
|
||||||
|
raise HTTPStatusError(
|
||||||
|
message="Error code: 429 - Rate Limit Error!",
|
||||||
|
request=request,
|
||||||
|
response=response,
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch.object(
|
||||||
|
mapped_target,
|
||||||
|
"send",
|
||||||
|
side_effect=_return_exception,
|
||||||
|
):
|
||||||
|
new_retry_after_mock_client = MagicMock(return_value=-1)
|
||||||
|
|
||||||
|
litellm.utils._get_retry_after_from_exception_header = (
|
||||||
|
new_retry_after_mock_client
|
||||||
|
)
|
||||||
|
|
||||||
|
exception_raised = False
|
||||||
|
try:
|
||||||
|
if sync_mode:
|
||||||
|
resp = original_function(**data, client=client)
|
||||||
|
if streaming:
|
||||||
|
for chunk in resp:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
resp = await original_function(**data, client=client)
|
||||||
|
|
||||||
|
if streaming:
|
||||||
|
async for chunk in resp:
|
||||||
|
continue
|
||||||
|
|
||||||
|
except litellm.RateLimitError as e:
|
||||||
|
exception_raised = True
|
||||||
|
assert e.litellm_response_headers is not None
|
||||||
|
print("e.litellm_response_headers", e.litellm_response_headers)
|
||||||
|
assert int(e.litellm_response_headers["retry-after"]) == cooldown_time
|
||||||
|
|
||||||
|
if exception_raised is False:
|
||||||
|
print(resp)
|
||||||
|
assert exception_raised
|
||||||
|
|
|
@ -45,11 +45,12 @@ def get_current_weather(location, unit="fahrenheit"):
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"model",
|
"model",
|
||||||
[
|
[
|
||||||
# "gpt-3.5-turbo-1106",
|
"gpt-3.5-turbo-1106",
|
||||||
# "mistral/mistral-large-latest",
|
# "mistral/mistral-large-latest",
|
||||||
# "claude-3-haiku-20240307",
|
# "claude-3-haiku-20240307",
|
||||||
# "gemini/gemini-1.5-pro",
|
# "gemini/gemini-1.5-pro",
|
||||||
"anthropic.claude-3-sonnet-20240229-v1:0",
|
"anthropic.claude-3-sonnet-20240229-v1:0",
|
||||||
|
"groq/llama3-8b-8192",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
@pytest.mark.flaky(retries=3, delay=1)
|
@pytest.mark.flaky(retries=3, delay=1)
|
||||||
|
@ -154,6 +155,105 @@ def test_aaparallel_function_call(model):
|
||||||
|
|
||||||
# test_parallel_function_call()
|
# test_parallel_function_call()
|
||||||
|
|
||||||
|
from litellm.types.utils import ChatCompletionMessageToolCall, Function, Message
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"model, provider",
|
||||||
|
[
|
||||||
|
(
|
||||||
|
"anthropic.claude-3-sonnet-20240229-v1:0",
|
||||||
|
"bedrock",
|
||||||
|
),
|
||||||
|
("claude-3-haiku-20240307", "anthropic"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"messages, expected_error_msg",
|
||||||
|
[
|
||||||
|
(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses",
|
||||||
|
},
|
||||||
|
Message(
|
||||||
|
content="Here are the current weather conditions for San Francisco, Tokyo, and Paris:",
|
||||||
|
role="assistant",
|
||||||
|
tool_calls=[
|
||||||
|
ChatCompletionMessageToolCall(
|
||||||
|
index=1,
|
||||||
|
function=Function(
|
||||||
|
arguments='{"location": "San Francisco, CA", "unit": "fahrenheit"}',
|
||||||
|
name="get_current_weather",
|
||||||
|
),
|
||||||
|
id="tooluse_Jj98qn6xQlOP_PiQr-w9iA",
|
||||||
|
type="function",
|
||||||
|
)
|
||||||
|
],
|
||||||
|
function_call=None,
|
||||||
|
),
|
||||||
|
{
|
||||||
|
"tool_call_id": "tooluse_Jj98qn6xQlOP_PiQr-w9iA",
|
||||||
|
"role": "tool",
|
||||||
|
"name": "get_current_weather",
|
||||||
|
"content": '{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
True,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
False,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_parallel_function_call_anthropic_error_msg(
|
||||||
|
model, provider, messages, expected_error_msg
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Anthropic doesn't support tool calling without `tools=` param specified.
|
||||||
|
|
||||||
|
Ensure this error is thrown when `tools=` param is not specified. But tool call requests are made.
|
||||||
|
|
||||||
|
Reference Issue: https://github.com/BerriAI/litellm/issues/5747, https://github.com/BerriAI/litellm/issues/5388
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
litellm.set_verbose = True
|
||||||
|
|
||||||
|
messages = messages
|
||||||
|
|
||||||
|
if expected_error_msg:
|
||||||
|
with pytest.raises(litellm.UnsupportedParamsError) as e:
|
||||||
|
second_response = litellm.completion(
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
temperature=0.2,
|
||||||
|
seed=22,
|
||||||
|
drop_params=True,
|
||||||
|
) # get a new response from the model where it can see the function response
|
||||||
|
print("second response\n", second_response)
|
||||||
|
else:
|
||||||
|
second_response = litellm.completion(
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
temperature=0.2,
|
||||||
|
seed=22,
|
||||||
|
drop_params=True,
|
||||||
|
) # get a new response from the model where it can see the function response
|
||||||
|
print("second response\n", second_response)
|
||||||
|
except litellm.InternalServerError as e:
|
||||||
|
print(e)
|
||||||
|
except litellm.RateLimitError as e:
|
||||||
|
print(e)
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
def test_parallel_function_call_stream():
|
def test_parallel_function_call_stream():
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -62,3 +62,9 @@ def test_get_model_info_shows_supports_prompt_caching():
|
||||||
info = litellm.get_model_info("deepseek/deepseek-chat")
|
info = litellm.get_model_info("deepseek/deepseek-chat")
|
||||||
print("info", info)
|
print("info", info)
|
||||||
assert info.get("supports_prompt_caching") is True
|
assert info.get("supports_prompt_caching") is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_model_info_finetuned_models():
|
||||||
|
info = litellm.get_model_info("ft:gpt-3.5-turbo:my-org:custom_suffix:id")
|
||||||
|
print("info", info)
|
||||||
|
assert info["input_cost_per_token"] == 0.000003
|
||||||
|
|
|
@ -18,13 +18,13 @@ class AnthropicMessagesTool(TypedDict, total=False):
|
||||||
|
|
||||||
|
|
||||||
class AnthropicMessagesTextParam(TypedDict, total=False):
|
class AnthropicMessagesTextParam(TypedDict, total=False):
|
||||||
type: Literal["text"]
|
type: Required[Literal["text"]]
|
||||||
text: str
|
text: Required[str]
|
||||||
cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
|
cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
|
||||||
|
|
||||||
|
|
||||||
class AnthropicMessagesToolUseParam(TypedDict):
|
class AnthropicMessagesToolUseParam(TypedDict):
|
||||||
type: Literal["tool_use"]
|
type: Required[Literal["tool_use"]]
|
||||||
id: str
|
id: str
|
||||||
name: str
|
name: str
|
||||||
input: dict
|
input: dict
|
||||||
|
@ -58,8 +58,8 @@ class AnthropicImageParamSource(TypedDict):
|
||||||
|
|
||||||
|
|
||||||
class AnthropicMessagesImageParam(TypedDict, total=False):
|
class AnthropicMessagesImageParam(TypedDict, total=False):
|
||||||
type: Literal["image"]
|
type: Required[Literal["image"]]
|
||||||
source: AnthropicImageParamSource
|
source: Required[AnthropicImageParamSource]
|
||||||
cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
|
cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
|
||||||
|
|
||||||
|
|
||||||
|
@ -102,16 +102,13 @@ class AnthropicSystemMessageContent(TypedDict, total=False):
|
||||||
cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
|
cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
|
||||||
|
|
||||||
|
|
||||||
class AnthropicMessagesRequest(TypedDict, total=False):
|
AllAnthropicMessageValues = Union[
|
||||||
model: Required[str]
|
AnthropicMessagesUserMessageParam, AnthopicMessagesAssistantMessageParam
|
||||||
messages: Required[
|
]
|
||||||
List[
|
|
||||||
Union[
|
|
||||||
AnthropicMessagesUserMessageParam,
|
class AnthropicMessageRequestBase(TypedDict, total=False):
|
||||||
AnthopicMessagesAssistantMessageParam,
|
messages: Required[List[AllAnthropicMessageValues]]
|
||||||
]
|
|
||||||
]
|
|
||||||
]
|
|
||||||
max_tokens: Required[int]
|
max_tokens: Required[int]
|
||||||
metadata: AnthropicMetadata
|
metadata: AnthropicMetadata
|
||||||
stop_sequences: List[str]
|
stop_sequences: List[str]
|
||||||
|
@ -123,6 +120,9 @@ class AnthropicMessagesRequest(TypedDict, total=False):
|
||||||
top_k: int
|
top_k: int
|
||||||
top_p: float
|
top_p: float
|
||||||
|
|
||||||
|
|
||||||
|
class AnthropicMessagesRequest(AnthropicMessageRequestBase, total=False):
|
||||||
|
model: Required[str]
|
||||||
# litellm param - used for tracking litellm proxy metadata in the request
|
# litellm param - used for tracking litellm proxy metadata in the request
|
||||||
litellm_metadata: dict
|
litellm_metadata: dict
|
||||||
|
|
||||||
|
@ -291,9 +291,9 @@ class AnthropicResponse(BaseModel):
|
||||||
"""Billing and rate-limit usage."""
|
"""Billing and rate-limit usage."""
|
||||||
|
|
||||||
|
|
||||||
class AnthropicChatCompletionUsageBlock(TypedDict, total=False):
|
from .openai import ChatCompletionUsageBlock
|
||||||
prompt_tokens: Required[int]
|
|
||||||
completion_tokens: Required[int]
|
|
||||||
total_tokens: Required[int]
|
class AnthropicChatCompletionUsageBlock(ChatCompletionUsageBlock, total=False):
|
||||||
cache_creation_input_tokens: int
|
cache_creation_input_tokens: int
|
||||||
cache_read_input_tokens: int
|
cache_read_input_tokens: int
|
||||||
|
|
|
@ -343,11 +343,14 @@ class ChatCompletionImageObject(TypedDict):
|
||||||
image_url: Union[str, ChatCompletionImageUrlObject]
|
image_url: Union[str, ChatCompletionImageUrlObject]
|
||||||
|
|
||||||
|
|
||||||
|
OpenAIMessageContent = Union[
|
||||||
|
str, Iterable[Union[ChatCompletionTextObject, ChatCompletionImageObject]]
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
class OpenAIChatCompletionUserMessage(TypedDict):
|
class OpenAIChatCompletionUserMessage(TypedDict):
|
||||||
role: Literal["user"]
|
role: Literal["user"]
|
||||||
content: Union[
|
content: OpenAIMessageContent
|
||||||
str, Iterable[Union[ChatCompletionTextObject, ChatCompletionImageObject]]
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionUserMessage(OpenAIChatCompletionUserMessage, total=False):
|
class ChatCompletionUserMessage(OpenAIChatCompletionUserMessage, total=False):
|
||||||
|
|
|
@ -7,7 +7,7 @@ from typing import Any, Dict, List, Literal, Optional, Tuple, Union
|
||||||
from openai._models import BaseModel as OpenAIObject
|
from openai._models import BaseModel as OpenAIObject
|
||||||
from openai.types.audio.transcription_create_params import FileTypes # type: ignore
|
from openai.types.audio.transcription_create_params import FileTypes # type: ignore
|
||||||
from openai.types.completion_usage import CompletionTokensDetails, CompletionUsage
|
from openai.types.completion_usage import CompletionTokensDetails, CompletionUsage
|
||||||
from pydantic import ConfigDict, Field, PrivateAttr
|
from pydantic import ConfigDict, PrivateAttr
|
||||||
from typing_extensions import Callable, Dict, Required, TypedDict, override
|
from typing_extensions import Callable, Dict, Required, TypedDict, override
|
||||||
|
|
||||||
from ..litellm_core_utils.core_helpers import map_finish_reason
|
from ..litellm_core_utils.core_helpers import map_finish_reason
|
||||||
|
|
2113
litellm/utils.py
2113
litellm/utils.py
File diff suppressed because it is too large
Load diff
|
@ -1173,6 +1173,18 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_assistant_prefill": true
|
"supports_assistant_prefill": true
|
||||||
},
|
},
|
||||||
|
"mistral/pixtral-12b-2409": {
|
||||||
|
"max_tokens": 128000,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 128000,
|
||||||
|
"input_cost_per_token": 0.00000015,
|
||||||
|
"output_cost_per_token": 0.00000015,
|
||||||
|
"litellm_provider": "mistral",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_assistant_prefill": true,
|
||||||
|
"supports_vision": true
|
||||||
|
},
|
||||||
"mistral/open-mistral-7b": {
|
"mistral/open-mistral-7b": {
|
||||||
"max_tokens": 8191,
|
"max_tokens": 8191,
|
||||||
"max_input_tokens": 32000,
|
"max_input_tokens": 32000,
|
||||||
|
|
|
@ -25,7 +25,12 @@ from unittest.mock import MagicMock, patch
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import AnthropicConfig, Router, adapter_completion
|
from litellm import (
|
||||||
|
AnthropicConfig,
|
||||||
|
Router,
|
||||||
|
adapter_completion,
|
||||||
|
AnthropicExperimentalPassThroughConfig,
|
||||||
|
)
|
||||||
from litellm.adapters.anthropic_adapter import anthropic_adapter
|
from litellm.adapters.anthropic_adapter import anthropic_adapter
|
||||||
from litellm.types.llms.anthropic import AnthropicResponse
|
from litellm.types.llms.anthropic import AnthropicResponse
|
||||||
|
|
||||||
|
@ -33,7 +38,7 @@ from litellm.types.llms.anthropic import AnthropicResponse
|
||||||
def test_anthropic_completion_messages_translation():
|
def test_anthropic_completion_messages_translation():
|
||||||
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||||
|
|
||||||
translated_messages = AnthropicConfig().translate_anthropic_messages_to_openai(messages=messages) # type: ignore
|
translated_messages = AnthropicExperimentalPassThroughConfig().translate_anthropic_messages_to_openai(messages=messages) # type: ignore
|
||||||
|
|
||||||
assert translated_messages == [{"role": "user", "content": "Hey, how's it going?"}]
|
assert translated_messages == [{"role": "user", "content": "Hey, how's it going?"}]
|
||||||
|
|
|
@ -5,7 +5,11 @@ import pytest
|
||||||
import sys
|
import sys
|
||||||
from typing import Any, Dict, List
|
from typing import Any, Dict, List
|
||||||
from unittest.mock import MagicMock, Mock, patch
|
from unittest.mock import MagicMock, Mock, patch
|
||||||
|
import os
|
||||||
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
import litellm
|
import litellm
|
||||||
from litellm.exceptions import BadRequestError
|
from litellm.exceptions import BadRequestError
|
||||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue