mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
LiteLLM Minor Fixes & Improvements (09/27/2024) (#5938)
* fix(langfuse.py): prevent double logging requester metadata Fixes https://github.com/BerriAI/litellm/issues/5935 * build(model_prices_and_context_window.json): add mistral pixtral cost tracking Closes https://github.com/BerriAI/litellm/issues/5837 * handle streaming for azure ai studio error * [Perf Proxy] parallel request limiter - use one cache update call (#5932) * fix parallel request limiter - use one cache update call * ci/cd run again * run ci/cd again * use docker username password * fix config.yml * fix config * fix config * fix config.yml * ci/cd run again * use correct typing for batch set cache * fix async_set_cache_pipeline * fix only check user id tpm / rpm limits when limits set * fix test_openai_azure_embedding_with_oidc_and_cf * fix(groq/chat/transformation.py): Fixes https://github.com/BerriAI/litellm/issues/5839 * feat(anthropic/chat.py): return 'retry-after' headers from anthropic Fixes https://github.com/BerriAI/litellm/issues/4387 * feat: raise validation error if message has tool calls without passing `tools` param for anthropic/bedrock Closes https://github.com/BerriAI/litellm/issues/5747 * [Feature]#5940, add max_workers parameter for the batch_completion (#5947) * handle streaming for azure ai studio error * bump: version 1.48.2 → 1.48.3 * docs(data_security.md): add legal/compliance faq's Make it easier for companies to use litellm * docs: resolve imports * [Feature]#5940, add max_workers parameter for the batch_completion method --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Krrish Dholakia <krrishdholakia@gmail.com> Co-authored-by: josearangos <josearangos@Joses-MacBook-Pro.local> * fix(converse_transformation.py): fix default message value * fix(utils.py): fix get_model_info to handle finetuned models Fixes issue for standard logging payloads, where model_map_value was null for finetuned openai models * fix(litellm_pre_call_utils.py): add debug statement for data sent after updating with team/key callbacks * fix: fix linting errors * fix(anthropic/chat/handler.py): fix cache creation input tokens * fix(exception_mapping_utils.py): fix missing imports * fix(anthropic/chat/handler.py): fix usage block translation * test: fix test * test: fix tests * style(types/utils.py): trigger new build * test: fix test --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Jose Alberto Arango Sanchez <jose.arangos@udea.edu.co> Co-authored-by: josearangos <josearangos@Joses-MacBook-Pro.local>
This commit is contained in:
parent
754981a78f
commit
0b30e212da
35 changed files with 3657 additions and 2820 deletions
|
@ -89,6 +89,7 @@ retry = True
|
|||
### AUTH ###
|
||||
api_key: Optional[str] = None
|
||||
openai_key: Optional[str] = None
|
||||
groq_key: Optional[str] = None
|
||||
databricks_key: Optional[str] = None
|
||||
azure_key: Optional[str] = None
|
||||
anthropic_key: Optional[str] = None
|
||||
|
@ -892,7 +893,11 @@ ALL_LITELLM_RESPONSE_TYPES = [
|
|||
from .types.utils import ImageObject
|
||||
from .llms.custom_llm import CustomLLM
|
||||
from .llms.huggingface_restapi import HuggingfaceConfig
|
||||
from .llms.anthropic.chat import AnthropicConfig
|
||||
from .llms.anthropic.chat.handler import AnthropicConfig
|
||||
from .llms.anthropic.experimental_pass_through.transformation import (
|
||||
AnthropicExperimentalPassThroughConfig,
|
||||
)
|
||||
from .llms.groq.stt.transformation import GroqSTTConfig
|
||||
from .llms.anthropic.completion import AnthropicTextConfig
|
||||
from .llms.databricks.chat import DatabricksConfig, DatabricksEmbeddingConfig
|
||||
from .llms.predibase import PredibaseConfig
|
||||
|
@ -962,8 +967,8 @@ from .llms.OpenAI.openai import (
|
|||
OpenAITextCompletionConfig,
|
||||
MistralEmbeddingConfig,
|
||||
DeepInfraConfig,
|
||||
GroqConfig,
|
||||
)
|
||||
from .llms.groq.chat.transformation import GroqChatConfig
|
||||
from .llms.azure_ai.chat.transformation import AzureAIStudioConfig
|
||||
from .llms.mistral.mistral_chat_transformation import MistralConfig
|
||||
from .llms.OpenAI.chat.o1_transformation import (
|
||||
|
|
|
@ -34,7 +34,7 @@ class AnthropicAdapter(CustomLogger):
|
|||
"""
|
||||
request_body = AnthropicMessagesRequest(**kwargs) # type: ignore
|
||||
|
||||
translated_body = litellm.AnthropicConfig().translate_anthropic_to_openai(
|
||||
translated_body = litellm.AnthropicExperimentalPassThroughConfig().translate_anthropic_to_openai(
|
||||
anthropic_message_request=request_body
|
||||
)
|
||||
|
||||
|
@ -44,7 +44,7 @@ class AnthropicAdapter(CustomLogger):
|
|||
self, response: litellm.ModelResponse
|
||||
) -> Optional[AnthropicResponse]:
|
||||
|
||||
return litellm.AnthropicConfig().translate_openai_response_to_anthropic(
|
||||
return litellm.AnthropicExperimentalPassThroughConfig().translate_openai_response_to_anthropic(
|
||||
response=response
|
||||
)
|
||||
|
||||
|
@ -99,7 +99,7 @@ class AnthropicStreamWrapper(AdapterCompletionStreamWrapper):
|
|||
if chunk == "None" or chunk is None:
|
||||
raise Exception
|
||||
|
||||
processed_chunk = litellm.AnthropicConfig().translate_streaming_openai_response_to_anthropic(
|
||||
processed_chunk = litellm.AnthropicExperimentalPassThroughConfig().translate_streaming_openai_response_to_anthropic(
|
||||
response=chunk
|
||||
)
|
||||
if (
|
||||
|
@ -163,7 +163,7 @@ class AnthropicStreamWrapper(AdapterCompletionStreamWrapper):
|
|||
async for chunk in self.completion_stream:
|
||||
if chunk == "None" or chunk is None:
|
||||
raise Exception
|
||||
processed_chunk = litellm.AnthropicConfig().translate_streaming_openai_response_to_anthropic(
|
||||
processed_chunk = litellm.AnthropicExperimentalPassThroughConfig().translate_streaming_openai_response_to_anthropic(
|
||||
response=chunk
|
||||
)
|
||||
if (
|
||||
|
|
|
@ -601,7 +601,7 @@ class LangFuseLogger:
|
|||
"input": input if not mask_input else "redacted-by-litellm",
|
||||
"output": output if not mask_output else "redacted-by-litellm",
|
||||
"usage": usage,
|
||||
"metadata": clean_metadata,
|
||||
"metadata": log_requester_metadata(clean_metadata),
|
||||
"level": level,
|
||||
"version": clean_metadata.pop("version", None),
|
||||
}
|
||||
|
@ -768,3 +768,15 @@ def log_provider_specific_information_as_span(
|
|||
name="vertex_ai_grounding_metadata",
|
||||
input=vertex_ai_grounding_metadata,
|
||||
)
|
||||
|
||||
|
||||
def log_requester_metadata(clean_metadata: dict):
|
||||
returned_metadata = {}
|
||||
requester_metadata = clean_metadata.get("requester_metadata") or {}
|
||||
for k, v in clean_metadata.items():
|
||||
if k not in requester_metadata:
|
||||
returned_metadata[k] = v
|
||||
|
||||
returned_metadata.update({"requester_metadata": requester_metadata})
|
||||
|
||||
return returned_metadata
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1015,9 +1015,8 @@ class Logging:
|
|||
!= langFuseLogger.public_key
|
||||
)
|
||||
or (
|
||||
self.langfuse_public_key is not None
|
||||
and self.langfuse_public_key
|
||||
!= langFuseLogger.public_key
|
||||
self.langfuse_secret is not None
|
||||
and self.langfuse_secret != langFuseLogger.secret_key
|
||||
)
|
||||
or (
|
||||
self.langfuse_host is not None
|
||||
|
@ -1045,7 +1044,6 @@ class Logging:
|
|||
service_name="langfuse",
|
||||
logging_obj=temp_langfuse_logger,
|
||||
)
|
||||
|
||||
if temp_langfuse_logger is not None:
|
||||
_response = temp_langfuse_logger.log_event(
|
||||
kwargs=kwargs,
|
||||
|
|
|
@ -220,104 +220,6 @@ class DeepInfraConfig:
|
|||
return optional_params
|
||||
|
||||
|
||||
class GroqConfig:
|
||||
"""
|
||||
Reference: https://deepinfra.com/docs/advanced/openai_api
|
||||
|
||||
The class `DeepInfra` provides configuration for the DeepInfra's Chat Completions API interface. Below are the parameters:
|
||||
"""
|
||||
|
||||
frequency_penalty: Optional[int] = None
|
||||
function_call: Optional[Union[str, dict]] = None
|
||||
functions: Optional[list] = None
|
||||
logit_bias: Optional[dict] = None
|
||||
max_tokens: Optional[int] = None
|
||||
n: Optional[int] = None
|
||||
presence_penalty: Optional[int] = None
|
||||
stop: Optional[Union[str, list]] = None
|
||||
temperature: Optional[int] = None
|
||||
top_p: Optional[int] = None
|
||||
response_format: Optional[dict] = None
|
||||
tools: Optional[list] = None
|
||||
tool_choice: Optional[Union[str, dict]] = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
frequency_penalty: Optional[int] = None,
|
||||
function_call: Optional[Union[str, dict]] = None,
|
||||
functions: Optional[list] = None,
|
||||
logit_bias: Optional[dict] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
n: Optional[int] = None,
|
||||
presence_penalty: Optional[int] = None,
|
||||
stop: Optional[Union[str, list]] = None,
|
||||
temperature: Optional[int] = None,
|
||||
top_p: Optional[int] = None,
|
||||
response_format: Optional[dict] = None,
|
||||
tools: Optional[list] = None,
|
||||
tool_choice: Optional[Union[str, dict]] = None,
|
||||
) -> None:
|
||||
locals_ = locals().copy()
|
||||
for key, value in locals_.items():
|
||||
if key != "self" and value is not None:
|
||||
setattr(self.__class__, key, value)
|
||||
|
||||
@classmethod
|
||||
def get_config(cls):
|
||||
return {
|
||||
k: v
|
||||
for k, v in cls.__dict__.items()
|
||||
if not k.startswith("__")
|
||||
and not isinstance(
|
||||
v,
|
||||
(
|
||||
types.FunctionType,
|
||||
types.BuiltinFunctionType,
|
||||
classmethod,
|
||||
staticmethod,
|
||||
),
|
||||
)
|
||||
and v is not None
|
||||
}
|
||||
|
||||
def get_supported_openai_params_stt(self):
|
||||
return [
|
||||
"prompt",
|
||||
"response_format",
|
||||
"temperature",
|
||||
"language",
|
||||
]
|
||||
|
||||
def get_supported_openai_response_formats_stt(self) -> List[str]:
|
||||
return ["json", "verbose_json", "text"]
|
||||
|
||||
def map_openai_params_stt(
|
||||
self,
|
||||
non_default_params: dict,
|
||||
optional_params: dict,
|
||||
model: str,
|
||||
drop_params: bool,
|
||||
) -> dict:
|
||||
response_formats = self.get_supported_openai_response_formats_stt()
|
||||
for param, value in non_default_params.items():
|
||||
if param == "response_format":
|
||||
if value in response_formats:
|
||||
optional_params[param] = value
|
||||
else:
|
||||
if litellm.drop_params is True or drop_params is True:
|
||||
pass
|
||||
else:
|
||||
raise litellm.utils.UnsupportedParamsError(
|
||||
message="Groq doesn't support response_format={}. To drop unsupported openai params from the call, set `litellm.drop_params = True`".format(
|
||||
value
|
||||
),
|
||||
status_code=400,
|
||||
)
|
||||
else:
|
||||
optional_params[param] = value
|
||||
return optional_params
|
||||
|
||||
|
||||
class OpenAIConfig:
|
||||
"""
|
||||
Reference: https://platform.openai.com/docs/api-reference/chat/create
|
||||
|
|
1
litellm/llms/anthropic/chat/__init__.py
Normal file
1
litellm/llms/anthropic/chat/__init__.py
Normal file
|
@ -0,0 +1 @@
|
|||
from .handler import AnthropicChatCompletion, ModelResponseIterator
|
|
@ -71,12 +71,19 @@ from litellm.types.llms.openai import (
|
|||
ChatCompletionToolParamFunctionChunk,
|
||||
ChatCompletionUsageBlock,
|
||||
ChatCompletionUserMessage,
|
||||
OpenAIMessageContent,
|
||||
)
|
||||
from litellm.types.utils import Choices, GenericStreamingChunk
|
||||
from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
|
||||
|
||||
from ..base import BaseLLM
|
||||
from ..prompt_templates.factory import custom_prompt, prompt_factory
|
||||
from ...base import BaseLLM
|
||||
from ...prompt_templates.factory import (
|
||||
anthropic_messages_pt,
|
||||
custom_prompt,
|
||||
prompt_factory,
|
||||
)
|
||||
from ..common_utils import AnthropicError
|
||||
from .transformation import AnthropicConfig
|
||||
|
||||
|
||||
class AnthropicConstants(Enum):
|
||||
|
@ -86,558 +93,6 @@ class AnthropicConstants(Enum):
|
|||
# constants from https://github.com/anthropics/anthropic-sdk-python/blob/main/src/anthropic/_constants.py
|
||||
|
||||
|
||||
class AnthropicError(Exception):
|
||||
def __init__(self, status_code: int, message):
|
||||
self.status_code = status_code
|
||||
self.message: str = message
|
||||
self.request = httpx.Request(
|
||||
method="POST", url="https://api.anthropic.com/v1/messages"
|
||||
)
|
||||
self.response = httpx.Response(status_code=status_code, request=self.request)
|
||||
super().__init__(
|
||||
self.message
|
||||
) # Call the base class constructor with the parameters it needs
|
||||
|
||||
|
||||
class AnthropicConfig:
|
||||
"""
|
||||
Reference: https://docs.anthropic.com/claude/reference/messages_post
|
||||
|
||||
to pass metadata to anthropic, it's {"user_id": "any-relevant-information"}
|
||||
"""
|
||||
|
||||
max_tokens: Optional[int] = (
|
||||
4096 # anthropic requires a default value (Opus, Sonnet, and Haiku have the same default)
|
||||
)
|
||||
stop_sequences: Optional[list] = None
|
||||
temperature: Optional[int] = None
|
||||
top_p: Optional[int] = None
|
||||
top_k: Optional[int] = None
|
||||
metadata: Optional[dict] = None
|
||||
system: Optional[str] = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
max_tokens: Optional[
|
||||
int
|
||||
] = 4096, # You can pass in a value yourself or use the default value 4096
|
||||
stop_sequences: Optional[list] = None,
|
||||
temperature: Optional[int] = None,
|
||||
top_p: Optional[int] = None,
|
||||
top_k: Optional[int] = None,
|
||||
metadata: Optional[dict] = None,
|
||||
system: Optional[str] = None,
|
||||
) -> None:
|
||||
locals_ = locals()
|
||||
for key, value in locals_.items():
|
||||
if key != "self" and value is not None:
|
||||
setattr(self.__class__, key, value)
|
||||
|
||||
@classmethod
|
||||
def get_config(cls):
|
||||
return {
|
||||
k: v
|
||||
for k, v in cls.__dict__.items()
|
||||
if not k.startswith("__")
|
||||
and not isinstance(
|
||||
v,
|
||||
(
|
||||
types.FunctionType,
|
||||
types.BuiltinFunctionType,
|
||||
classmethod,
|
||||
staticmethod,
|
||||
),
|
||||
)
|
||||
and v is not None
|
||||
}
|
||||
|
||||
def get_supported_openai_params(self):
|
||||
return [
|
||||
"stream",
|
||||
"stop",
|
||||
"temperature",
|
||||
"top_p",
|
||||
"max_tokens",
|
||||
"max_completion_tokens",
|
||||
"tools",
|
||||
"tool_choice",
|
||||
"extra_headers",
|
||||
]
|
||||
|
||||
def get_cache_control_headers(self) -> dict:
|
||||
return {
|
||||
"anthropic-version": "2023-06-01",
|
||||
"anthropic-beta": "prompt-caching-2024-07-31",
|
||||
}
|
||||
|
||||
def map_openai_params(self, non_default_params: dict, optional_params: dict):
|
||||
for param, value in non_default_params.items():
|
||||
if param == "max_tokens":
|
||||
optional_params["max_tokens"] = value
|
||||
if param == "max_completion_tokens":
|
||||
optional_params["max_tokens"] = value
|
||||
if param == "tools":
|
||||
optional_params["tools"] = value
|
||||
if param == "tool_choice":
|
||||
_tool_choice: Optional[AnthropicMessagesToolChoice] = None
|
||||
if value == "auto":
|
||||
_tool_choice = {"type": "auto"}
|
||||
elif value == "required":
|
||||
_tool_choice = {"type": "any"}
|
||||
elif isinstance(value, dict):
|
||||
_tool_choice = {"type": "tool", "name": value["function"]["name"]}
|
||||
|
||||
if _tool_choice is not None:
|
||||
optional_params["tool_choice"] = _tool_choice
|
||||
if param == "stream" and value == True:
|
||||
optional_params["stream"] = value
|
||||
if param == "stop":
|
||||
if isinstance(value, str):
|
||||
if (
|
||||
value == "\n"
|
||||
) and litellm.drop_params == True: # anthropic doesn't allow whitespace characters as stop-sequences
|
||||
continue
|
||||
value = [value]
|
||||
elif isinstance(value, list):
|
||||
new_v = []
|
||||
for v in value:
|
||||
if (
|
||||
v == "\n"
|
||||
) and litellm.drop_params == True: # anthropic doesn't allow whitespace characters as stop-sequences
|
||||
continue
|
||||
new_v.append(v)
|
||||
if len(new_v) > 0:
|
||||
value = new_v
|
||||
else:
|
||||
continue
|
||||
optional_params["stop_sequences"] = value
|
||||
if param == "temperature":
|
||||
optional_params["temperature"] = value
|
||||
if param == "top_p":
|
||||
optional_params["top_p"] = value
|
||||
return optional_params
|
||||
|
||||
def is_cache_control_set(self, messages: List[AllMessageValues]) -> bool:
|
||||
"""
|
||||
Return if {"cache_control": ..} in message content block
|
||||
|
||||
Used to check if anthropic prompt caching headers need to be set.
|
||||
"""
|
||||
for message in messages:
|
||||
if message["content"] is not None and isinstance(message["content"], list):
|
||||
for content in message["content"]:
|
||||
if "cache_control" in content:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def translate_system_message(
|
||||
self, messages: List[AllMessageValues]
|
||||
) -> List[AnthropicSystemMessageContent]:
|
||||
system_prompt_indices = []
|
||||
anthropic_system_message_list: List[AnthropicSystemMessageContent] = []
|
||||
for idx, message in enumerate(messages):
|
||||
if message["role"] == "system":
|
||||
valid_content: bool = False
|
||||
system_message_block = ChatCompletionSystemMessage(**message)
|
||||
if isinstance(system_message_block["content"], str):
|
||||
anthropic_system_message_content = AnthropicSystemMessageContent(
|
||||
type="text",
|
||||
text=system_message_block["content"],
|
||||
)
|
||||
if "cache_control" in system_message_block:
|
||||
anthropic_system_message_content["cache_control"] = (
|
||||
system_message_block["cache_control"]
|
||||
)
|
||||
anthropic_system_message_list.append(
|
||||
anthropic_system_message_content
|
||||
)
|
||||
valid_content = True
|
||||
elif isinstance(message["content"], list):
|
||||
for _content in message["content"]:
|
||||
anthropic_system_message_content = (
|
||||
AnthropicSystemMessageContent(
|
||||
type=_content.get("type"),
|
||||
text=_content.get("text"),
|
||||
)
|
||||
)
|
||||
if "cache_control" in _content:
|
||||
anthropic_system_message_content["cache_control"] = (
|
||||
_content["cache_control"]
|
||||
)
|
||||
|
||||
anthropic_system_message_list.append(
|
||||
anthropic_system_message_content
|
||||
)
|
||||
valid_content = True
|
||||
|
||||
if valid_content:
|
||||
system_prompt_indices.append(idx)
|
||||
if len(system_prompt_indices) > 0:
|
||||
for idx in reversed(system_prompt_indices):
|
||||
messages.pop(idx)
|
||||
|
||||
return anthropic_system_message_list
|
||||
|
||||
### FOR [BETA] `/v1/messages` endpoint support
|
||||
|
||||
def translatable_anthropic_params(self) -> List:
|
||||
"""
|
||||
Which anthropic params, we need to translate to the openai format.
|
||||
"""
|
||||
return ["messages", "metadata", "system", "tool_choice", "tools"]
|
||||
|
||||
def translate_anthropic_messages_to_openai(
|
||||
self,
|
||||
messages: List[
|
||||
Union[
|
||||
AnthropicMessagesUserMessageParam,
|
||||
AnthopicMessagesAssistantMessageParam,
|
||||
]
|
||||
],
|
||||
) -> List:
|
||||
new_messages: List[AllMessageValues] = []
|
||||
for m in messages:
|
||||
user_message: Optional[ChatCompletionUserMessage] = None
|
||||
tool_message_list: List[ChatCompletionToolMessage] = []
|
||||
new_user_content_list: List[
|
||||
Union[ChatCompletionTextObject, ChatCompletionImageObject]
|
||||
] = []
|
||||
## USER MESSAGE ##
|
||||
if m["role"] == "user":
|
||||
## translate user message
|
||||
if isinstance(m["content"], str):
|
||||
user_message = ChatCompletionUserMessage(
|
||||
role="user", content=m["content"]
|
||||
)
|
||||
elif isinstance(m["content"], list):
|
||||
for content in m["content"]:
|
||||
if content["type"] == "text":
|
||||
text_obj = ChatCompletionTextObject(
|
||||
type="text", text=content["text"]
|
||||
)
|
||||
new_user_content_list.append(text_obj)
|
||||
elif content["type"] == "image":
|
||||
image_url = ChatCompletionImageUrlObject(
|
||||
url=f"data:{content['type']};base64,{content['source']}"
|
||||
)
|
||||
image_obj = ChatCompletionImageObject(
|
||||
type="image_url", image_url=image_url
|
||||
)
|
||||
|
||||
new_user_content_list.append(image_obj)
|
||||
elif content["type"] == "tool_result":
|
||||
if "content" not in content:
|
||||
tool_result = ChatCompletionToolMessage(
|
||||
role="tool",
|
||||
tool_call_id=content["tool_use_id"],
|
||||
content="",
|
||||
)
|
||||
tool_message_list.append(tool_result)
|
||||
elif isinstance(content["content"], str):
|
||||
tool_result = ChatCompletionToolMessage(
|
||||
role="tool",
|
||||
tool_call_id=content["tool_use_id"],
|
||||
content=content["content"],
|
||||
)
|
||||
tool_message_list.append(tool_result)
|
||||
elif isinstance(content["content"], list):
|
||||
for c in content["content"]:
|
||||
if c["type"] == "text":
|
||||
tool_result = ChatCompletionToolMessage(
|
||||
role="tool",
|
||||
tool_call_id=content["tool_use_id"],
|
||||
content=c["text"],
|
||||
)
|
||||
tool_message_list.append(tool_result)
|
||||
elif c["type"] == "image":
|
||||
image_str = (
|
||||
f"data:{c['type']};base64,{c['source']}"
|
||||
)
|
||||
tool_result = ChatCompletionToolMessage(
|
||||
role="tool",
|
||||
tool_call_id=content["tool_use_id"],
|
||||
content=image_str,
|
||||
)
|
||||
tool_message_list.append(tool_result)
|
||||
|
||||
if user_message is not None:
|
||||
new_messages.append(user_message)
|
||||
|
||||
if len(new_user_content_list) > 0:
|
||||
new_messages.append({"role": "user", "content": new_user_content_list}) # type: ignore
|
||||
|
||||
if len(tool_message_list) > 0:
|
||||
new_messages.extend(tool_message_list)
|
||||
|
||||
## ASSISTANT MESSAGE ##
|
||||
assistant_message_str: Optional[str] = None
|
||||
tool_calls: List[ChatCompletionAssistantToolCall] = []
|
||||
if m["role"] == "assistant":
|
||||
if isinstance(m["content"], str):
|
||||
assistant_message_str = m["content"]
|
||||
elif isinstance(m["content"], list):
|
||||
for content in m["content"]:
|
||||
if content["type"] == "text":
|
||||
if assistant_message_str is None:
|
||||
assistant_message_str = content["text"]
|
||||
else:
|
||||
assistant_message_str += content["text"]
|
||||
elif content["type"] == "tool_use":
|
||||
function_chunk = ChatCompletionToolCallFunctionChunk(
|
||||
name=content["name"],
|
||||
arguments=json.dumps(content["input"]),
|
||||
)
|
||||
|
||||
tool_calls.append(
|
||||
ChatCompletionAssistantToolCall(
|
||||
id=content["id"],
|
||||
type="function",
|
||||
function=function_chunk,
|
||||
)
|
||||
)
|
||||
|
||||
if assistant_message_str is not None or len(tool_calls) > 0:
|
||||
assistant_message = ChatCompletionAssistantMessage(
|
||||
role="assistant",
|
||||
content=assistant_message_str,
|
||||
)
|
||||
if len(tool_calls) > 0:
|
||||
assistant_message["tool_calls"] = tool_calls
|
||||
new_messages.append(assistant_message)
|
||||
|
||||
return new_messages
|
||||
|
||||
def translate_anthropic_tool_choice_to_openai(
|
||||
self, tool_choice: AnthropicMessagesToolChoice
|
||||
) -> ChatCompletionToolChoiceValues:
|
||||
if tool_choice["type"] == "any":
|
||||
return "required"
|
||||
elif tool_choice["type"] == "auto":
|
||||
return "auto"
|
||||
elif tool_choice["type"] == "tool":
|
||||
tc_function_param = ChatCompletionToolChoiceFunctionParam(
|
||||
name=tool_choice.get("name", "")
|
||||
)
|
||||
return ChatCompletionToolChoiceObjectParam(
|
||||
type="function", function=tc_function_param
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
"Incompatible tool choice param submitted - {}".format(tool_choice)
|
||||
)
|
||||
|
||||
def translate_anthropic_tools_to_openai(
|
||||
self, tools: List[AnthropicMessagesTool]
|
||||
) -> List[ChatCompletionToolParam]:
|
||||
new_tools: List[ChatCompletionToolParam] = []
|
||||
for tool in tools:
|
||||
function_chunk = ChatCompletionToolParamFunctionChunk(
|
||||
name=tool["name"],
|
||||
parameters=tool["input_schema"],
|
||||
)
|
||||
if "description" in tool:
|
||||
function_chunk["description"] = tool["description"]
|
||||
new_tools.append(
|
||||
ChatCompletionToolParam(type="function", function=function_chunk)
|
||||
)
|
||||
|
||||
return new_tools
|
||||
|
||||
def translate_anthropic_to_openai(
|
||||
self, anthropic_message_request: AnthropicMessagesRequest
|
||||
) -> ChatCompletionRequest:
|
||||
"""
|
||||
This is used by the beta Anthropic Adapter, for translating anthropic `/v1/messages` requests to the openai format.
|
||||
"""
|
||||
new_messages: List[AllMessageValues] = []
|
||||
|
||||
## CONVERT ANTHROPIC MESSAGES TO OPENAI
|
||||
new_messages = self.translate_anthropic_messages_to_openai(
|
||||
messages=anthropic_message_request["messages"]
|
||||
)
|
||||
## ADD SYSTEM MESSAGE TO MESSAGES
|
||||
if "system" in anthropic_message_request:
|
||||
new_messages.insert(
|
||||
0,
|
||||
ChatCompletionSystemMessage(
|
||||
role="system", content=anthropic_message_request["system"]
|
||||
),
|
||||
)
|
||||
|
||||
new_kwargs: ChatCompletionRequest = {
|
||||
"model": anthropic_message_request["model"],
|
||||
"messages": new_messages,
|
||||
}
|
||||
## CONVERT METADATA (user_id)
|
||||
if "metadata" in anthropic_message_request:
|
||||
if "user_id" in anthropic_message_request["metadata"]:
|
||||
new_kwargs["user"] = anthropic_message_request["metadata"]["user_id"]
|
||||
|
||||
# Pass litellm proxy specific metadata
|
||||
if "litellm_metadata" in anthropic_message_request:
|
||||
# metadata will be passed to litellm.acompletion(), it's a litellm_param
|
||||
new_kwargs["metadata"] = anthropic_message_request.pop("litellm_metadata")
|
||||
|
||||
## CONVERT TOOL CHOICE
|
||||
if "tool_choice" in anthropic_message_request:
|
||||
new_kwargs["tool_choice"] = self.translate_anthropic_tool_choice_to_openai(
|
||||
tool_choice=anthropic_message_request["tool_choice"]
|
||||
)
|
||||
## CONVERT TOOLS
|
||||
if "tools" in anthropic_message_request:
|
||||
new_kwargs["tools"] = self.translate_anthropic_tools_to_openai(
|
||||
tools=anthropic_message_request["tools"]
|
||||
)
|
||||
|
||||
translatable_params = self.translatable_anthropic_params()
|
||||
for k, v in anthropic_message_request.items():
|
||||
if k not in translatable_params: # pass remaining params as is
|
||||
new_kwargs[k] = v # type: ignore
|
||||
|
||||
return new_kwargs
|
||||
|
||||
def _translate_openai_content_to_anthropic(
|
||||
self, choices: List[Choices]
|
||||
) -> List[
|
||||
Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse]
|
||||
]:
|
||||
new_content: List[
|
||||
Union[
|
||||
AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse
|
||||
]
|
||||
] = []
|
||||
for choice in choices:
|
||||
if (
|
||||
choice.message.tool_calls is not None
|
||||
and len(choice.message.tool_calls) > 0
|
||||
):
|
||||
for tool_call in choice.message.tool_calls:
|
||||
new_content.append(
|
||||
AnthropicResponseContentBlockToolUse(
|
||||
type="tool_use",
|
||||
id=tool_call.id,
|
||||
name=tool_call.function.name or "",
|
||||
input=json.loads(tool_call.function.arguments),
|
||||
)
|
||||
)
|
||||
elif choice.message.content is not None:
|
||||
new_content.append(
|
||||
AnthropicResponseContentBlockText(
|
||||
type="text", text=choice.message.content
|
||||
)
|
||||
)
|
||||
|
||||
return new_content
|
||||
|
||||
def _translate_openai_finish_reason_to_anthropic(
|
||||
self, openai_finish_reason: str
|
||||
) -> AnthropicFinishReason:
|
||||
if openai_finish_reason == "stop":
|
||||
return "end_turn"
|
||||
elif openai_finish_reason == "length":
|
||||
return "max_tokens"
|
||||
elif openai_finish_reason == "tool_calls":
|
||||
return "tool_use"
|
||||
return "end_turn"
|
||||
|
||||
def translate_openai_response_to_anthropic(
|
||||
self, response: litellm.ModelResponse
|
||||
) -> AnthropicResponse:
|
||||
## translate content block
|
||||
anthropic_content = self._translate_openai_content_to_anthropic(choices=response.choices) # type: ignore
|
||||
## extract finish reason
|
||||
anthropic_finish_reason = self._translate_openai_finish_reason_to_anthropic(
|
||||
openai_finish_reason=response.choices[0].finish_reason # type: ignore
|
||||
)
|
||||
# extract usage
|
||||
usage: litellm.Usage = getattr(response, "usage")
|
||||
anthropic_usage = AnthropicResponseUsageBlock(
|
||||
input_tokens=usage.prompt_tokens or 0,
|
||||
output_tokens=usage.completion_tokens or 0,
|
||||
)
|
||||
translated_obj = AnthropicResponse(
|
||||
id=response.id,
|
||||
type="message",
|
||||
role="assistant",
|
||||
model=response.model or "unknown-model",
|
||||
stop_sequence=None,
|
||||
usage=anthropic_usage,
|
||||
content=anthropic_content,
|
||||
stop_reason=anthropic_finish_reason,
|
||||
)
|
||||
|
||||
return translated_obj
|
||||
|
||||
def _translate_streaming_openai_chunk_to_anthropic(
|
||||
self, choices: List[OpenAIStreamingChoice]
|
||||
) -> Tuple[
|
||||
Literal["text_delta", "input_json_delta"],
|
||||
Union[ContentTextBlockDelta, ContentJsonBlockDelta],
|
||||
]:
|
||||
text: str = ""
|
||||
partial_json: Optional[str] = None
|
||||
for choice in choices:
|
||||
if choice.delta.content is not None:
|
||||
text += choice.delta.content
|
||||
elif choice.delta.tool_calls is not None:
|
||||
partial_json = ""
|
||||
for tool in choice.delta.tool_calls:
|
||||
if (
|
||||
tool.function is not None
|
||||
and tool.function.arguments is not None
|
||||
):
|
||||
partial_json += tool.function.arguments
|
||||
|
||||
if partial_json is not None:
|
||||
return "input_json_delta", ContentJsonBlockDelta(
|
||||
type="input_json_delta", partial_json=partial_json
|
||||
)
|
||||
else:
|
||||
return "text_delta", ContentTextBlockDelta(type="text_delta", text=text)
|
||||
|
||||
def translate_streaming_openai_response_to_anthropic(
|
||||
self, response: litellm.ModelResponse
|
||||
) -> Union[ContentBlockDelta, MessageBlockDelta]:
|
||||
## base case - final chunk w/ finish reason
|
||||
if response.choices[0].finish_reason is not None:
|
||||
delta = MessageDelta(
|
||||
stop_reason=self._translate_openai_finish_reason_to_anthropic(
|
||||
response.choices[0].finish_reason
|
||||
),
|
||||
)
|
||||
if getattr(response, "usage", None) is not None:
|
||||
litellm_usage_chunk: Optional[litellm.Usage] = response.usage # type: ignore
|
||||
elif (
|
||||
hasattr(response, "_hidden_params")
|
||||
and "usage" in response._hidden_params
|
||||
):
|
||||
litellm_usage_chunk = response._hidden_params["usage"]
|
||||
else:
|
||||
litellm_usage_chunk = None
|
||||
if litellm_usage_chunk is not None:
|
||||
usage_delta = UsageDelta(
|
||||
input_tokens=litellm_usage_chunk.prompt_tokens or 0,
|
||||
output_tokens=litellm_usage_chunk.completion_tokens or 0,
|
||||
)
|
||||
else:
|
||||
usage_delta = UsageDelta(input_tokens=0, output_tokens=0)
|
||||
return MessageBlockDelta(
|
||||
type="message_delta", delta=delta, usage=usage_delta
|
||||
)
|
||||
(
|
||||
type_of_content,
|
||||
content_block_delta,
|
||||
) = self._translate_streaming_openai_chunk_to_anthropic(
|
||||
choices=response.choices # type: ignore
|
||||
)
|
||||
return ContentBlockDelta(
|
||||
type="content_block_delta",
|
||||
index=response.choices[0].index,
|
||||
delta=content_block_delta,
|
||||
)
|
||||
|
||||
|
||||
# makes headers for API call
|
||||
def validate_environment(
|
||||
api_key, user_headers, model, messages: List[AllMessageValues]
|
||||
|
@ -684,8 +139,14 @@ async def make_call(
|
|||
api_base, headers=headers, data=data, stream=True, timeout=timeout
|
||||
)
|
||||
except httpx.HTTPStatusError as e:
|
||||
error_headers = getattr(e, "headers", None)
|
||||
error_response = getattr(e, "response", None)
|
||||
if error_headers is None and error_response:
|
||||
error_headers = getattr(error_response, "headers", None)
|
||||
raise AnthropicError(
|
||||
status_code=e.response.status_code, message=await e.response.aread()
|
||||
status_code=e.response.status_code,
|
||||
message=await e.response.aread(),
|
||||
headers=error_headers,
|
||||
)
|
||||
except Exception as e:
|
||||
for exception in litellm.LITELLM_EXCEPTION_TYPES:
|
||||
|
@ -726,8 +187,14 @@ def make_sync_call(
|
|||
api_base, headers=headers, data=data, stream=True, timeout=timeout
|
||||
)
|
||||
except httpx.HTTPStatusError as e:
|
||||
error_headers = getattr(e, "headers", None)
|
||||
error_response = getattr(e, "response", None)
|
||||
if error_headers is None and error_response:
|
||||
error_headers = getattr(error_response, "headers", None)
|
||||
raise AnthropicError(
|
||||
status_code=e.response.status_code, message=e.response.read()
|
||||
status_code=e.response.status_code,
|
||||
message=e.response.read(),
|
||||
headers=error_headers,
|
||||
)
|
||||
except Exception as e:
|
||||
for exception in litellm.LITELLM_EXCEPTION_TYPES:
|
||||
|
@ -736,7 +203,12 @@ def make_sync_call(
|
|||
raise AnthropicError(status_code=500, message=str(e))
|
||||
|
||||
if response.status_code != 200:
|
||||
raise AnthropicError(status_code=response.status_code, message=response.read())
|
||||
response_headers = getattr(response, "headers", None)
|
||||
raise AnthropicError(
|
||||
status_code=response.status_code,
|
||||
message=response.read(),
|
||||
headers=response_headers,
|
||||
)
|
||||
|
||||
completion_stream = ModelResponseIterator(
|
||||
streaming_response=response.iter_lines(), sync_stream=True
|
||||
|
@ -763,7 +235,7 @@ class AnthropicChatCompletion(BaseLLM):
|
|||
response: Union[requests.Response, httpx.Response],
|
||||
model_response: ModelResponse,
|
||||
stream: bool,
|
||||
logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
|
||||
logging_obj: litellm.litellm_core_utils.litellm_logging.Logging, # type: ignore
|
||||
optional_params: dict,
|
||||
api_key: str,
|
||||
data: Union[dict, str],
|
||||
|
@ -772,6 +244,14 @@ class AnthropicChatCompletion(BaseLLM):
|
|||
encoding,
|
||||
json_mode: bool,
|
||||
) -> ModelResponse:
|
||||
_hidden_params = {}
|
||||
_response_headers = dict(response.headers)
|
||||
if _response_headers is not None:
|
||||
llm_response_headers = {
|
||||
"{}-{}".format("llm_provider", k): v
|
||||
for k, v in _response_headers.items()
|
||||
}
|
||||
_hidden_params["additional_headers"] = llm_response_headers
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=messages,
|
||||
|
@ -783,14 +263,21 @@ class AnthropicChatCompletion(BaseLLM):
|
|||
## RESPONSE OBJECT
|
||||
try:
|
||||
completion_response = response.json()
|
||||
except:
|
||||
except Exception as e:
|
||||
response_headers = getattr(response, "headers", None)
|
||||
raise AnthropicError(
|
||||
message=response.text, status_code=response.status_code
|
||||
message="Unable to get json response - {}, Original Response: {}".format(
|
||||
str(e), response.text
|
||||
),
|
||||
status_code=response.status_code,
|
||||
headers=response_headers,
|
||||
)
|
||||
if "error" in completion_response:
|
||||
response_headers = getattr(response, "headers", None)
|
||||
raise AnthropicError(
|
||||
message=str(completion_response["error"]),
|
||||
status_code=response.status_code,
|
||||
headers=response_headers,
|
||||
)
|
||||
else:
|
||||
text_content = ""
|
||||
|
@ -856,6 +343,8 @@ class AnthropicChatCompletion(BaseLLM):
|
|||
if "cache_read_input_tokens" in _usage:
|
||||
usage["cache_read_input_tokens"] = _usage["cache_read_input_tokens"]
|
||||
setattr(model_response, "usage", usage) # type: ignore
|
||||
|
||||
model_response._hidden_params = _hidden_params
|
||||
return model_response
|
||||
|
||||
async def acompletion_stream_function(
|
||||
|
@ -919,9 +408,9 @@ class AnthropicChatCompletion(BaseLLM):
|
|||
litellm_params=None,
|
||||
logger_fn=None,
|
||||
headers={},
|
||||
client=None,
|
||||
client: Optional[AsyncHTTPHandler] = None,
|
||||
) -> Union[ModelResponse, CustomStreamWrapper]:
|
||||
async_handler = get_async_httpx_client(
|
||||
async_handler = client or get_async_httpx_client(
|
||||
llm_provider=litellm.LlmProviders.ANTHROPIC
|
||||
)
|
||||
|
||||
|
@ -937,7 +426,17 @@ class AnthropicChatCompletion(BaseLLM):
|
|||
original_response=str(e),
|
||||
additional_args={"complete_input_dict": data},
|
||||
)
|
||||
raise e
|
||||
status_code = getattr(e, "status_code", 500)
|
||||
error_headers = getattr(e, "headers", None)
|
||||
error_text = getattr(e, "text", str(e))
|
||||
error_response = getattr(e, "response", None)
|
||||
if error_headers is None and error_response:
|
||||
error_headers = getattr(error_response, "headers", None)
|
||||
raise AnthropicError(
|
||||
message=error_text,
|
||||
status_code=status_code,
|
||||
headers=error_headers,
|
||||
)
|
||||
|
||||
return self._process_response(
|
||||
model=model,
|
||||
|
@ -977,73 +476,18 @@ class AnthropicChatCompletion(BaseLLM):
|
|||
_is_function_call = False
|
||||
messages = copy.deepcopy(messages)
|
||||
optional_params = copy.deepcopy(optional_params)
|
||||
if model in custom_prompt_dict:
|
||||
# check if the model has a registered custom prompt
|
||||
model_prompt_details = custom_prompt_dict[model]
|
||||
prompt = custom_prompt(
|
||||
role_dict=model_prompt_details["roles"],
|
||||
initial_prompt_value=model_prompt_details["initial_prompt_value"],
|
||||
final_prompt_value=model_prompt_details["final_prompt_value"],
|
||||
messages=messages,
|
||||
)
|
||||
else:
|
||||
# Separate system prompt from rest of message
|
||||
anthropic_system_message_list = AnthropicConfig().translate_system_message(
|
||||
messages=messages
|
||||
)
|
||||
# Handling anthropic API Prompt Caching
|
||||
if len(anthropic_system_message_list) > 0:
|
||||
optional_params["system"] = anthropic_system_message_list
|
||||
# Format rest of message according to anthropic guidelines
|
||||
try:
|
||||
messages = prompt_factory(
|
||||
model=model, messages=messages, custom_llm_provider="anthropic"
|
||||
)
|
||||
except Exception as e:
|
||||
raise AnthropicError(
|
||||
status_code=400,
|
||||
message="{}\nReceived Messages={}".format(str(e), messages),
|
||||
) # don't use verbose_logger.exception, if exception is raised
|
||||
|
||||
## Load Config
|
||||
config = litellm.AnthropicConfig.get_config()
|
||||
for k, v in config.items():
|
||||
if (
|
||||
k not in optional_params
|
||||
): # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
|
||||
optional_params[k] = v
|
||||
|
||||
## Handle Tool Calling
|
||||
if "tools" in optional_params:
|
||||
_is_function_call = True
|
||||
if "anthropic-beta" not in headers:
|
||||
# default to v1 of "anthropic-beta"
|
||||
headers["anthropic-beta"] = "tools-2024-05-16"
|
||||
|
||||
anthropic_tools = []
|
||||
for tool in optional_params["tools"]:
|
||||
if "input_schema" in tool: # assume in anthropic format
|
||||
anthropic_tools.append(tool)
|
||||
else: # assume openai tool call
|
||||
new_tool = tool["function"]
|
||||
new_tool["input_schema"] = new_tool.pop("parameters") # rename key
|
||||
if "cache_control" in tool:
|
||||
new_tool["cache_control"] = tool["cache_control"]
|
||||
anthropic_tools.append(new_tool)
|
||||
|
||||
optional_params["tools"] = anthropic_tools
|
||||
|
||||
stream = optional_params.pop("stream", None)
|
||||
is_vertex_request: bool = optional_params.pop("is_vertex_request", False)
|
||||
json_mode: bool = optional_params.pop("json_mode", False)
|
||||
is_vertex_request: bool = optional_params.pop("is_vertex_request", False)
|
||||
|
||||
data = {
|
||||
"messages": messages,
|
||||
**optional_params,
|
||||
}
|
||||
|
||||
if is_vertex_request is False:
|
||||
data["model"] = model
|
||||
data = AnthropicConfig()._transform_request(
|
||||
model=model,
|
||||
messages=messages,
|
||||
optional_params=optional_params,
|
||||
headers=headers,
|
||||
_is_function_call=_is_function_call,
|
||||
is_vertex_request=is_vertex_request,
|
||||
)
|
||||
|
||||
## LOGGING
|
||||
logging_obj.pre_call(
|
||||
|
@ -1136,12 +580,25 @@ class AnthropicChatCompletion(BaseLLM):
|
|||
client = HTTPHandler(timeout=timeout) # type: ignore
|
||||
else:
|
||||
client = client
|
||||
|
||||
try:
|
||||
response = client.post(
|
||||
api_base, headers=headers, data=json.dumps(data), timeout=timeout
|
||||
api_base,
|
||||
headers=headers,
|
||||
data=json.dumps(data),
|
||||
timeout=timeout,
|
||||
)
|
||||
if response.status_code != 200:
|
||||
except Exception as e:
|
||||
status_code = getattr(e, "status_code", 500)
|
||||
error_headers = getattr(e, "headers", None)
|
||||
error_text = getattr(e, "text", str(e))
|
||||
error_response = getattr(e, "response", None)
|
||||
if error_headers is None and error_response:
|
||||
error_headers = getattr(error_response, "headers", None)
|
||||
raise AnthropicError(
|
||||
status_code=response.status_code, message=response.text
|
||||
message=error_text,
|
||||
status_code=status_code,
|
||||
headers=error_headers,
|
||||
)
|
||||
|
||||
return self._process_response(
|
||||
|
@ -1151,7 +608,7 @@ class AnthropicChatCompletion(BaseLLM):
|
|||
stream=stream,
|
||||
logging_obj=logging_obj,
|
||||
api_key=api_key,
|
||||
data=data,
|
||||
data=data, # type: ignore
|
||||
messages=messages,
|
||||
print_verbose=print_verbose,
|
||||
optional_params=optional_params,
|
||||
|
@ -1192,7 +649,7 @@ class ModelResponseIterator:
|
|||
return False
|
||||
|
||||
def _handle_usage(
|
||||
self, anthropic_usage_chunk: dict
|
||||
self, anthropic_usage_chunk: Union[dict, UsageDelta]
|
||||
) -> AnthropicChatCompletionUsageBlock:
|
||||
special_fields = ["input_tokens", "output_tokens"]
|
||||
|
||||
|
@ -1203,15 +660,19 @@ class ModelResponseIterator:
|
|||
+ anthropic_usage_chunk.get("output_tokens", 0),
|
||||
)
|
||||
|
||||
if "cache_creation_input_tokens" in anthropic_usage_chunk:
|
||||
usage_block["cache_creation_input_tokens"] = anthropic_usage_chunk[
|
||||
cache_creation_input_tokens = anthropic_usage_chunk.get(
|
||||
"cache_creation_input_tokens"
|
||||
]
|
||||
)
|
||||
if cache_creation_input_tokens is not None and isinstance(
|
||||
cache_creation_input_tokens, int
|
||||
):
|
||||
usage_block["cache_creation_input_tokens"] = cache_creation_input_tokens
|
||||
|
||||
if "cache_read_input_tokens" in anthropic_usage_chunk:
|
||||
usage_block["cache_read_input_tokens"] = anthropic_usage_chunk[
|
||||
"cache_read_input_tokens"
|
||||
]
|
||||
cache_read_input_tokens = anthropic_usage_chunk.get("cache_read_input_tokens")
|
||||
if cache_read_input_tokens is not None and isinstance(
|
||||
cache_read_input_tokens, int
|
||||
):
|
||||
usage_block["cache_read_input_tokens"] = cache_read_input_tokens
|
||||
|
||||
return usage_block
|
||||
|
||||
|
@ -1313,6 +774,7 @@ class ModelResponseIterator:
|
|||
}
|
||||
"""
|
||||
message_start_block = MessageStartBlock(**chunk) # type: ignore
|
||||
if "usage" in message_start_block["message"]:
|
||||
usage = self._handle_usage(
|
||||
anthropic_usage_chunk=message_start_block["message"]["usage"]
|
||||
)
|
289
litellm/llms/anthropic/chat/transformation.py
Normal file
289
litellm/llms/anthropic/chat/transformation.py
Normal file
|
@ -0,0 +1,289 @@
|
|||
import types
|
||||
from typing import List, Literal, Optional, Tuple, Union
|
||||
|
||||
import litellm
|
||||
from litellm.llms.prompt_templates.factory import anthropic_messages_pt
|
||||
from litellm.types.llms.anthropic import (
|
||||
AnthropicMessageRequestBase,
|
||||
AnthropicMessagesRequest,
|
||||
AnthropicMessagesToolChoice,
|
||||
AnthropicSystemMessageContent,
|
||||
)
|
||||
from litellm.types.llms.openai import AllMessageValues, ChatCompletionSystemMessage
|
||||
from litellm.utils import has_tool_call_blocks
|
||||
|
||||
from ..common_utils import AnthropicError
|
||||
|
||||
|
||||
class AnthropicConfig:
|
||||
"""
|
||||
Reference: https://docs.anthropic.com/claude/reference/messages_post
|
||||
|
||||
to pass metadata to anthropic, it's {"user_id": "any-relevant-information"}
|
||||
"""
|
||||
|
||||
max_tokens: Optional[int] = (
|
||||
4096 # anthropic requires a default value (Opus, Sonnet, and Haiku have the same default)
|
||||
)
|
||||
stop_sequences: Optional[list] = None
|
||||
temperature: Optional[int] = None
|
||||
top_p: Optional[int] = None
|
||||
top_k: Optional[int] = None
|
||||
metadata: Optional[dict] = None
|
||||
system: Optional[str] = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
max_tokens: Optional[
|
||||
int
|
||||
] = 4096, # You can pass in a value yourself or use the default value 4096
|
||||
stop_sequences: Optional[list] = None,
|
||||
temperature: Optional[int] = None,
|
||||
top_p: Optional[int] = None,
|
||||
top_k: Optional[int] = None,
|
||||
metadata: Optional[dict] = None,
|
||||
system: Optional[str] = None,
|
||||
) -> None:
|
||||
locals_ = locals()
|
||||
for key, value in locals_.items():
|
||||
if key != "self" and value is not None:
|
||||
setattr(self.__class__, key, value)
|
||||
|
||||
@classmethod
|
||||
def get_config(cls):
|
||||
return {
|
||||
k: v
|
||||
for k, v in cls.__dict__.items()
|
||||
if not k.startswith("__")
|
||||
and not isinstance(
|
||||
v,
|
||||
(
|
||||
types.FunctionType,
|
||||
types.BuiltinFunctionType,
|
||||
classmethod,
|
||||
staticmethod,
|
||||
),
|
||||
)
|
||||
and v is not None
|
||||
}
|
||||
|
||||
def get_supported_openai_params(self):
|
||||
return [
|
||||
"stream",
|
||||
"stop",
|
||||
"temperature",
|
||||
"top_p",
|
||||
"max_tokens",
|
||||
"max_completion_tokens",
|
||||
"tools",
|
||||
"tool_choice",
|
||||
"extra_headers",
|
||||
]
|
||||
|
||||
def get_cache_control_headers(self) -> dict:
|
||||
return {
|
||||
"anthropic-version": "2023-06-01",
|
||||
"anthropic-beta": "prompt-caching-2024-07-31",
|
||||
}
|
||||
|
||||
def map_openai_params(
|
||||
self,
|
||||
non_default_params: dict,
|
||||
optional_params: dict,
|
||||
messages: Optional[List[AllMessageValues]] = None,
|
||||
):
|
||||
for param, value in non_default_params.items():
|
||||
if param == "max_tokens":
|
||||
optional_params["max_tokens"] = value
|
||||
if param == "max_completion_tokens":
|
||||
optional_params["max_tokens"] = value
|
||||
if param == "tools":
|
||||
optional_params["tools"] = value
|
||||
if param == "tool_choice":
|
||||
_tool_choice: Optional[AnthropicMessagesToolChoice] = None
|
||||
if value == "auto":
|
||||
_tool_choice = {"type": "auto"}
|
||||
elif value == "required":
|
||||
_tool_choice = {"type": "any"}
|
||||
elif isinstance(value, dict):
|
||||
_tool_choice = {"type": "tool", "name": value["function"]["name"]}
|
||||
|
||||
if _tool_choice is not None:
|
||||
optional_params["tool_choice"] = _tool_choice
|
||||
if param == "stream" and value is True:
|
||||
optional_params["stream"] = value
|
||||
if param == "stop":
|
||||
if isinstance(value, str):
|
||||
if (
|
||||
value == "\n"
|
||||
) and litellm.drop_params is True: # anthropic doesn't allow whitespace characters as stop-sequences
|
||||
continue
|
||||
value = [value]
|
||||
elif isinstance(value, list):
|
||||
new_v = []
|
||||
for v in value:
|
||||
if (
|
||||
v == "\n"
|
||||
) and litellm.drop_params is True: # anthropic doesn't allow whitespace characters as stop-sequences
|
||||
continue
|
||||
new_v.append(v)
|
||||
if len(new_v) > 0:
|
||||
value = new_v
|
||||
else:
|
||||
continue
|
||||
optional_params["stop_sequences"] = value
|
||||
if param == "temperature":
|
||||
optional_params["temperature"] = value
|
||||
if param == "top_p":
|
||||
optional_params["top_p"] = value
|
||||
|
||||
## VALIDATE REQUEST
|
||||
"""
|
||||
Anthropic doesn't support tool calling without `tools=` param specified.
|
||||
"""
|
||||
if (
|
||||
"tools" not in non_default_params
|
||||
and messages is not None
|
||||
and has_tool_call_blocks(messages)
|
||||
):
|
||||
raise litellm.UnsupportedParamsError(
|
||||
message="Anthropic doesn't support tool calling without `tools=` param specified. Pass `tools=` param to enable tool calling.",
|
||||
model="",
|
||||
llm_provider="anthropic",
|
||||
)
|
||||
|
||||
return optional_params
|
||||
|
||||
def is_cache_control_set(self, messages: List[AllMessageValues]) -> bool:
|
||||
"""
|
||||
Return if {"cache_control": ..} in message content block
|
||||
|
||||
Used to check if anthropic prompt caching headers need to be set.
|
||||
"""
|
||||
for message in messages:
|
||||
_message_content = message.get("content")
|
||||
if _message_content is not None and isinstance(_message_content, list):
|
||||
for content in _message_content:
|
||||
if "cache_control" in content:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def translate_system_message(
|
||||
self, messages: List[AllMessageValues]
|
||||
) -> List[AnthropicSystemMessageContent]:
|
||||
"""
|
||||
Translate system message to anthropic format.
|
||||
|
||||
Removes system message from the original list and returns a new list of anthropic system message content.
|
||||
"""
|
||||
system_prompt_indices = []
|
||||
anthropic_system_message_list: List[AnthropicSystemMessageContent] = []
|
||||
for idx, message in enumerate(messages):
|
||||
if message["role"] == "system":
|
||||
valid_content: bool = False
|
||||
system_message_block = ChatCompletionSystemMessage(**message)
|
||||
if isinstance(system_message_block["content"], str):
|
||||
anthropic_system_message_content = AnthropicSystemMessageContent(
|
||||
type="text",
|
||||
text=system_message_block["content"],
|
||||
)
|
||||
if "cache_control" in system_message_block:
|
||||
anthropic_system_message_content["cache_control"] = (
|
||||
system_message_block["cache_control"]
|
||||
)
|
||||
anthropic_system_message_list.append(
|
||||
anthropic_system_message_content
|
||||
)
|
||||
valid_content = True
|
||||
elif isinstance(message["content"], list):
|
||||
for _content in message["content"]:
|
||||
anthropic_system_message_content = (
|
||||
AnthropicSystemMessageContent(
|
||||
type=_content.get("type"),
|
||||
text=_content.get("text"),
|
||||
)
|
||||
)
|
||||
if "cache_control" in _content:
|
||||
anthropic_system_message_content["cache_control"] = (
|
||||
_content["cache_control"]
|
||||
)
|
||||
|
||||
anthropic_system_message_list.append(
|
||||
anthropic_system_message_content
|
||||
)
|
||||
valid_content = True
|
||||
|
||||
if valid_content:
|
||||
system_prompt_indices.append(idx)
|
||||
if len(system_prompt_indices) > 0:
|
||||
for idx in reversed(system_prompt_indices):
|
||||
messages.pop(idx)
|
||||
|
||||
return anthropic_system_message_list
|
||||
|
||||
def _transform_request(
|
||||
self,
|
||||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
headers: dict,
|
||||
_is_function_call: bool,
|
||||
is_vertex_request: bool,
|
||||
) -> dict:
|
||||
"""
|
||||
Translate messages to anthropic format.
|
||||
"""
|
||||
# Separate system prompt from rest of message
|
||||
anthropic_system_message_list = self.translate_system_message(messages=messages)
|
||||
# Handling anthropic API Prompt Caching
|
||||
if len(anthropic_system_message_list) > 0:
|
||||
optional_params["system"] = anthropic_system_message_list
|
||||
# Format rest of message according to anthropic guidelines
|
||||
try:
|
||||
anthropic_messages = anthropic_messages_pt(
|
||||
model=model,
|
||||
messages=messages,
|
||||
llm_provider="anthropic",
|
||||
)
|
||||
except Exception as e:
|
||||
raise AnthropicError(
|
||||
status_code=400,
|
||||
message="{}\nReceived Messages={}".format(str(e), messages),
|
||||
) # don't use verbose_logger.exception, if exception is raised
|
||||
|
||||
## Load Config
|
||||
config = litellm.AnthropicConfig.get_config()
|
||||
for k, v in config.items():
|
||||
if (
|
||||
k not in optional_params
|
||||
): # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
|
||||
optional_params[k] = v
|
||||
|
||||
## Handle Tool Calling
|
||||
if "tools" in optional_params:
|
||||
_is_function_call = True
|
||||
if "anthropic-beta" not in headers:
|
||||
# default to v1 of "anthropic-beta"
|
||||
headers["anthropic-beta"] = "tools-2024-05-16"
|
||||
|
||||
anthropic_tools = []
|
||||
for tool in optional_params["tools"]:
|
||||
if "input_schema" in tool: # assume in anthropic format
|
||||
anthropic_tools.append(tool)
|
||||
else: # assume openai tool call
|
||||
new_tool = tool["function"]
|
||||
new_tool["input_schema"] = new_tool.pop("parameters") # rename key
|
||||
if "cache_control" in tool:
|
||||
new_tool["cache_control"] = tool["cache_control"]
|
||||
anthropic_tools.append(new_tool)
|
||||
|
||||
optional_params["tools"] = anthropic_tools
|
||||
|
||||
data = {
|
||||
"messages": anthropic_messages,
|
||||
**optional_params,
|
||||
}
|
||||
if not is_vertex_request:
|
||||
data["model"] = model
|
||||
return data
|
26
litellm/llms/anthropic/common_utils.py
Normal file
26
litellm/llms/anthropic/common_utils.py
Normal file
|
@ -0,0 +1,26 @@
|
|||
"""
|
||||
This file contains common utils for anthropic calls.
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
import httpx
|
||||
|
||||
|
||||
class AnthropicError(Exception):
|
||||
def __init__(
|
||||
self,
|
||||
status_code: int,
|
||||
message,
|
||||
headers: Optional[httpx.Headers] = None,
|
||||
):
|
||||
self.status_code = status_code
|
||||
self.message: str = message
|
||||
self.headers = headers
|
||||
self.request = httpx.Request(
|
||||
method="POST", url="https://api.anthropic.com/v1/messages"
|
||||
)
|
||||
self.response = httpx.Response(status_code=status_code, request=self.request)
|
||||
super().__init__(
|
||||
self.message
|
||||
) # Call the base class constructor with the parameters it needs
|
|
@ -0,0 +1,425 @@
|
|||
import json
|
||||
import types
|
||||
from typing import Any, Dict, List, Literal, Optional, Tuple, Union
|
||||
|
||||
from openai.types.chat.chat_completion_chunk import Choice as OpenAIStreamingChoice
|
||||
|
||||
import litellm
|
||||
from litellm.types.llms.anthropic import (
|
||||
AnthopicMessagesAssistantMessageParam,
|
||||
AnthropicChatCompletionUsageBlock,
|
||||
AnthropicFinishReason,
|
||||
AnthropicMessagesRequest,
|
||||
AnthropicMessagesTool,
|
||||
AnthropicMessagesToolChoice,
|
||||
AnthropicMessagesUserMessageParam,
|
||||
AnthropicResponse,
|
||||
AnthropicResponseContentBlockText,
|
||||
AnthropicResponseContentBlockToolUse,
|
||||
AnthropicResponseUsageBlock,
|
||||
AnthropicSystemMessageContent,
|
||||
ContentBlockDelta,
|
||||
ContentBlockStart,
|
||||
ContentBlockStop,
|
||||
ContentJsonBlockDelta,
|
||||
ContentTextBlockDelta,
|
||||
MessageBlockDelta,
|
||||
MessageDelta,
|
||||
MessageStartBlock,
|
||||
UsageDelta,
|
||||
)
|
||||
from litellm.types.llms.openai import (
|
||||
AllMessageValues,
|
||||
ChatCompletionAssistantMessage,
|
||||
ChatCompletionAssistantToolCall,
|
||||
ChatCompletionImageObject,
|
||||
ChatCompletionImageUrlObject,
|
||||
ChatCompletionRequest,
|
||||
ChatCompletionResponseMessage,
|
||||
ChatCompletionSystemMessage,
|
||||
ChatCompletionTextObject,
|
||||
ChatCompletionToolCallChunk,
|
||||
ChatCompletionToolCallFunctionChunk,
|
||||
ChatCompletionToolChoiceFunctionParam,
|
||||
ChatCompletionToolChoiceObjectParam,
|
||||
ChatCompletionToolChoiceValues,
|
||||
ChatCompletionToolMessage,
|
||||
ChatCompletionToolParam,
|
||||
ChatCompletionToolParamFunctionChunk,
|
||||
ChatCompletionUsageBlock,
|
||||
ChatCompletionUserMessage,
|
||||
OpenAIMessageContent,
|
||||
)
|
||||
from litellm.types.utils import Choices, GenericStreamingChunk
|
||||
from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
|
||||
|
||||
from ...base import BaseLLM
|
||||
from ...prompt_templates.factory import (
|
||||
anthropic_messages_pt,
|
||||
custom_prompt,
|
||||
prompt_factory,
|
||||
)
|
||||
|
||||
|
||||
class AnthropicExperimentalPassThroughConfig:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
### FOR [BETA] `/v1/messages` endpoint support
|
||||
|
||||
def translatable_anthropic_params(self) -> List:
|
||||
"""
|
||||
Which anthropic params, we need to translate to the openai format.
|
||||
"""
|
||||
return ["messages", "metadata", "system", "tool_choice", "tools"]
|
||||
|
||||
def translate_anthropic_messages_to_openai(
|
||||
self,
|
||||
messages: List[
|
||||
Union[
|
||||
AnthropicMessagesUserMessageParam,
|
||||
AnthopicMessagesAssistantMessageParam,
|
||||
]
|
||||
],
|
||||
) -> List:
|
||||
new_messages: List[AllMessageValues] = []
|
||||
for m in messages:
|
||||
user_message: Optional[ChatCompletionUserMessage] = None
|
||||
tool_message_list: List[ChatCompletionToolMessage] = []
|
||||
new_user_content_list: List[
|
||||
Union[ChatCompletionTextObject, ChatCompletionImageObject]
|
||||
] = []
|
||||
## USER MESSAGE ##
|
||||
if m["role"] == "user":
|
||||
## translate user message
|
||||
message_content = m.get("content")
|
||||
if message_content and isinstance(message_content, str):
|
||||
user_message = ChatCompletionUserMessage(
|
||||
role="user", content=message_content
|
||||
)
|
||||
elif message_content and isinstance(message_content, list):
|
||||
for content in message_content:
|
||||
if content["type"] == "text":
|
||||
text_obj = ChatCompletionTextObject(
|
||||
type="text", text=content["text"]
|
||||
)
|
||||
new_user_content_list.append(text_obj)
|
||||
elif content["type"] == "image":
|
||||
image_url = ChatCompletionImageUrlObject(
|
||||
url=f"data:{content['type']};base64,{content['source']}"
|
||||
)
|
||||
image_obj = ChatCompletionImageObject(
|
||||
type="image_url", image_url=image_url
|
||||
)
|
||||
|
||||
new_user_content_list.append(image_obj)
|
||||
elif content["type"] == "tool_result":
|
||||
if "content" not in content:
|
||||
tool_result = ChatCompletionToolMessage(
|
||||
role="tool",
|
||||
tool_call_id=content["tool_use_id"],
|
||||
content="",
|
||||
)
|
||||
tool_message_list.append(tool_result)
|
||||
elif isinstance(content["content"], str):
|
||||
tool_result = ChatCompletionToolMessage(
|
||||
role="tool",
|
||||
tool_call_id=content["tool_use_id"],
|
||||
content=content["content"],
|
||||
)
|
||||
tool_message_list.append(tool_result)
|
||||
elif isinstance(content["content"], list):
|
||||
for c in content["content"]:
|
||||
if c["type"] == "text":
|
||||
tool_result = ChatCompletionToolMessage(
|
||||
role="tool",
|
||||
tool_call_id=content["tool_use_id"],
|
||||
content=c["text"],
|
||||
)
|
||||
tool_message_list.append(tool_result)
|
||||
elif c["type"] == "image":
|
||||
image_str = (
|
||||
f"data:{c['type']};base64,{c['source']}"
|
||||
)
|
||||
tool_result = ChatCompletionToolMessage(
|
||||
role="tool",
|
||||
tool_call_id=content["tool_use_id"],
|
||||
content=image_str,
|
||||
)
|
||||
tool_message_list.append(tool_result)
|
||||
|
||||
if user_message is not None:
|
||||
new_messages.append(user_message)
|
||||
|
||||
if len(new_user_content_list) > 0:
|
||||
new_messages.append({"role": "user", "content": new_user_content_list}) # type: ignore
|
||||
|
||||
if len(tool_message_list) > 0:
|
||||
new_messages.extend(tool_message_list)
|
||||
|
||||
## ASSISTANT MESSAGE ##
|
||||
assistant_message_str: Optional[str] = None
|
||||
tool_calls: List[ChatCompletionAssistantToolCall] = []
|
||||
if m["role"] == "assistant":
|
||||
if isinstance(m["content"], str):
|
||||
assistant_message_str = m["content"]
|
||||
elif isinstance(m["content"], list):
|
||||
for content in m["content"]:
|
||||
if content["type"] == "text":
|
||||
if assistant_message_str is None:
|
||||
assistant_message_str = content["text"]
|
||||
else:
|
||||
assistant_message_str += content["text"]
|
||||
elif content["type"] == "tool_use":
|
||||
function_chunk = ChatCompletionToolCallFunctionChunk(
|
||||
name=content["name"],
|
||||
arguments=json.dumps(content["input"]),
|
||||
)
|
||||
|
||||
tool_calls.append(
|
||||
ChatCompletionAssistantToolCall(
|
||||
id=content["id"],
|
||||
type="function",
|
||||
function=function_chunk,
|
||||
)
|
||||
)
|
||||
|
||||
if assistant_message_str is not None or len(tool_calls) > 0:
|
||||
assistant_message = ChatCompletionAssistantMessage(
|
||||
role="assistant",
|
||||
content=assistant_message_str,
|
||||
)
|
||||
if len(tool_calls) > 0:
|
||||
assistant_message["tool_calls"] = tool_calls
|
||||
new_messages.append(assistant_message)
|
||||
|
||||
return new_messages
|
||||
|
||||
def translate_anthropic_tool_choice_to_openai(
|
||||
self, tool_choice: AnthropicMessagesToolChoice
|
||||
) -> ChatCompletionToolChoiceValues:
|
||||
if tool_choice["type"] == "any":
|
||||
return "required"
|
||||
elif tool_choice["type"] == "auto":
|
||||
return "auto"
|
||||
elif tool_choice["type"] == "tool":
|
||||
tc_function_param = ChatCompletionToolChoiceFunctionParam(
|
||||
name=tool_choice.get("name", "")
|
||||
)
|
||||
return ChatCompletionToolChoiceObjectParam(
|
||||
type="function", function=tc_function_param
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
"Incompatible tool choice param submitted - {}".format(tool_choice)
|
||||
)
|
||||
|
||||
def translate_anthropic_tools_to_openai(
|
||||
self, tools: List[AnthropicMessagesTool]
|
||||
) -> List[ChatCompletionToolParam]:
|
||||
new_tools: List[ChatCompletionToolParam] = []
|
||||
for tool in tools:
|
||||
function_chunk = ChatCompletionToolParamFunctionChunk(
|
||||
name=tool["name"],
|
||||
parameters=tool["input_schema"],
|
||||
)
|
||||
if "description" in tool:
|
||||
function_chunk["description"] = tool["description"]
|
||||
new_tools.append(
|
||||
ChatCompletionToolParam(type="function", function=function_chunk)
|
||||
)
|
||||
|
||||
return new_tools
|
||||
|
||||
def translate_anthropic_to_openai(
|
||||
self, anthropic_message_request: AnthropicMessagesRequest
|
||||
) -> ChatCompletionRequest:
|
||||
"""
|
||||
This is used by the beta Anthropic Adapter, for translating anthropic `/v1/messages` requests to the openai format.
|
||||
"""
|
||||
new_messages: List[AllMessageValues] = []
|
||||
|
||||
## CONVERT ANTHROPIC MESSAGES TO OPENAI
|
||||
new_messages = self.translate_anthropic_messages_to_openai(
|
||||
messages=anthropic_message_request["messages"]
|
||||
)
|
||||
## ADD SYSTEM MESSAGE TO MESSAGES
|
||||
if "system" in anthropic_message_request:
|
||||
new_messages.insert(
|
||||
0,
|
||||
ChatCompletionSystemMessage(
|
||||
role="system", content=anthropic_message_request["system"]
|
||||
),
|
||||
)
|
||||
|
||||
new_kwargs: ChatCompletionRequest = {
|
||||
"model": anthropic_message_request["model"],
|
||||
"messages": new_messages,
|
||||
}
|
||||
## CONVERT METADATA (user_id)
|
||||
if "metadata" in anthropic_message_request:
|
||||
if "user_id" in anthropic_message_request["metadata"]:
|
||||
new_kwargs["user"] = anthropic_message_request["metadata"]["user_id"]
|
||||
|
||||
# Pass litellm proxy specific metadata
|
||||
if "litellm_metadata" in anthropic_message_request:
|
||||
# metadata will be passed to litellm.acompletion(), it's a litellm_param
|
||||
new_kwargs["metadata"] = anthropic_message_request.pop("litellm_metadata")
|
||||
|
||||
## CONVERT TOOL CHOICE
|
||||
if "tool_choice" in anthropic_message_request:
|
||||
new_kwargs["tool_choice"] = self.translate_anthropic_tool_choice_to_openai(
|
||||
tool_choice=anthropic_message_request["tool_choice"]
|
||||
)
|
||||
## CONVERT TOOLS
|
||||
if "tools" in anthropic_message_request:
|
||||
new_kwargs["tools"] = self.translate_anthropic_tools_to_openai(
|
||||
tools=anthropic_message_request["tools"]
|
||||
)
|
||||
|
||||
translatable_params = self.translatable_anthropic_params()
|
||||
for k, v in anthropic_message_request.items():
|
||||
if k not in translatable_params: # pass remaining params as is
|
||||
new_kwargs[k] = v # type: ignore
|
||||
|
||||
return new_kwargs
|
||||
|
||||
def _translate_openai_content_to_anthropic(
|
||||
self, choices: List[Choices]
|
||||
) -> List[
|
||||
Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse]
|
||||
]:
|
||||
new_content: List[
|
||||
Union[
|
||||
AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse
|
||||
]
|
||||
] = []
|
||||
for choice in choices:
|
||||
if (
|
||||
choice.message.tool_calls is not None
|
||||
and len(choice.message.tool_calls) > 0
|
||||
):
|
||||
for tool_call in choice.message.tool_calls:
|
||||
new_content.append(
|
||||
AnthropicResponseContentBlockToolUse(
|
||||
type="tool_use",
|
||||
id=tool_call.id,
|
||||
name=tool_call.function.name or "",
|
||||
input=json.loads(tool_call.function.arguments),
|
||||
)
|
||||
)
|
||||
elif choice.message.content is not None:
|
||||
new_content.append(
|
||||
AnthropicResponseContentBlockText(
|
||||
type="text", text=choice.message.content
|
||||
)
|
||||
)
|
||||
|
||||
return new_content
|
||||
|
||||
def _translate_openai_finish_reason_to_anthropic(
|
||||
self, openai_finish_reason: str
|
||||
) -> AnthropicFinishReason:
|
||||
if openai_finish_reason == "stop":
|
||||
return "end_turn"
|
||||
elif openai_finish_reason == "length":
|
||||
return "max_tokens"
|
||||
elif openai_finish_reason == "tool_calls":
|
||||
return "tool_use"
|
||||
return "end_turn"
|
||||
|
||||
def translate_openai_response_to_anthropic(
|
||||
self, response: litellm.ModelResponse
|
||||
) -> AnthropicResponse:
|
||||
## translate content block
|
||||
anthropic_content = self._translate_openai_content_to_anthropic(choices=response.choices) # type: ignore
|
||||
## extract finish reason
|
||||
anthropic_finish_reason = self._translate_openai_finish_reason_to_anthropic(
|
||||
openai_finish_reason=response.choices[0].finish_reason # type: ignore
|
||||
)
|
||||
# extract usage
|
||||
usage: litellm.Usage = getattr(response, "usage")
|
||||
anthropic_usage = AnthropicResponseUsageBlock(
|
||||
input_tokens=usage.prompt_tokens or 0,
|
||||
output_tokens=usage.completion_tokens or 0,
|
||||
)
|
||||
translated_obj = AnthropicResponse(
|
||||
id=response.id,
|
||||
type="message",
|
||||
role="assistant",
|
||||
model=response.model or "unknown-model",
|
||||
stop_sequence=None,
|
||||
usage=anthropic_usage,
|
||||
content=anthropic_content,
|
||||
stop_reason=anthropic_finish_reason,
|
||||
)
|
||||
|
||||
return translated_obj
|
||||
|
||||
def _translate_streaming_openai_chunk_to_anthropic(
|
||||
self, choices: List[OpenAIStreamingChoice]
|
||||
) -> Tuple[
|
||||
Literal["text_delta", "input_json_delta"],
|
||||
Union[ContentTextBlockDelta, ContentJsonBlockDelta],
|
||||
]:
|
||||
text: str = ""
|
||||
partial_json: Optional[str] = None
|
||||
for choice in choices:
|
||||
if choice.delta.content is not None:
|
||||
text += choice.delta.content
|
||||
elif choice.delta.tool_calls is not None:
|
||||
partial_json = ""
|
||||
for tool in choice.delta.tool_calls:
|
||||
if (
|
||||
tool.function is not None
|
||||
and tool.function.arguments is not None
|
||||
):
|
||||
partial_json += tool.function.arguments
|
||||
|
||||
if partial_json is not None:
|
||||
return "input_json_delta", ContentJsonBlockDelta(
|
||||
type="input_json_delta", partial_json=partial_json
|
||||
)
|
||||
else:
|
||||
return "text_delta", ContentTextBlockDelta(type="text_delta", text=text)
|
||||
|
||||
def translate_streaming_openai_response_to_anthropic(
|
||||
self, response: litellm.ModelResponse
|
||||
) -> Union[ContentBlockDelta, MessageBlockDelta]:
|
||||
## base case - final chunk w/ finish reason
|
||||
if response.choices[0].finish_reason is not None:
|
||||
delta = MessageDelta(
|
||||
stop_reason=self._translate_openai_finish_reason_to_anthropic(
|
||||
response.choices[0].finish_reason
|
||||
),
|
||||
)
|
||||
if getattr(response, "usage", None) is not None:
|
||||
litellm_usage_chunk: Optional[litellm.Usage] = response.usage # type: ignore
|
||||
elif (
|
||||
hasattr(response, "_hidden_params")
|
||||
and "usage" in response._hidden_params
|
||||
):
|
||||
litellm_usage_chunk = response._hidden_params["usage"]
|
||||
else:
|
||||
litellm_usage_chunk = None
|
||||
if litellm_usage_chunk is not None:
|
||||
usage_delta = UsageDelta(
|
||||
input_tokens=litellm_usage_chunk.prompt_tokens or 0,
|
||||
output_tokens=litellm_usage_chunk.completion_tokens or 0,
|
||||
)
|
||||
else:
|
||||
usage_delta = UsageDelta(input_tokens=0, output_tokens=0)
|
||||
return MessageBlockDelta(
|
||||
type="message_delta", delta=delta, usage=usage_delta
|
||||
)
|
||||
(
|
||||
type_of_content,
|
||||
content_block_delta,
|
||||
) = self._translate_streaming_openai_chunk_to_anthropic(
|
||||
choices=response.choices # type: ignore
|
||||
)
|
||||
return ContentBlockDelta(
|
||||
type="content_block_delta",
|
||||
index=response.choices[0].index,
|
||||
delta=content_block_delta,
|
||||
)
|
|
@ -22,7 +22,7 @@ from litellm.types.llms.openai import (
|
|||
ChatCompletionToolParamFunctionChunk,
|
||||
)
|
||||
from litellm.types.utils import ModelResponse, Usage
|
||||
from litellm.utils import CustomStreamWrapper
|
||||
from litellm.utils import CustomStreamWrapper, has_tool_call_blocks
|
||||
|
||||
from ...prompt_templates.factory import _bedrock_converse_messages_pt, _bedrock_tools_pt
|
||||
from ..common_utils import BedrockError, get_bedrock_tool_name
|
||||
|
@ -136,6 +136,7 @@ class AmazonConverseConfig:
|
|||
non_default_params: dict,
|
||||
optional_params: dict,
|
||||
drop_params: bool,
|
||||
messages: Optional[List[AllMessageValues]] = None,
|
||||
) -> dict:
|
||||
for param, value in non_default_params.items():
|
||||
if param == "response_format":
|
||||
|
@ -202,6 +203,21 @@ class AmazonConverseConfig:
|
|||
)
|
||||
if _tool_choice_value is not None:
|
||||
optional_params["tool_choice"] = _tool_choice_value
|
||||
|
||||
## VALIDATE REQUEST
|
||||
"""
|
||||
Bedrock doesn't support tool calling without `tools=` param specified.
|
||||
"""
|
||||
if (
|
||||
"tools" not in non_default_params
|
||||
and messages is not None
|
||||
and has_tool_call_blocks(messages)
|
||||
):
|
||||
raise litellm.UnsupportedParamsError(
|
||||
message="Anthropic doesn't support tool calling without `tools=` param specified. Pass `tools=` param to enable tool calling.",
|
||||
model="",
|
||||
llm_provider="anthropic",
|
||||
)
|
||||
return optional_params
|
||||
|
||||
def _transform_request(
|
||||
|
|
60
litellm/llms/groq/chat/handler.py
Normal file
60
litellm/llms/groq/chat/handler.py
Normal file
|
@ -0,0 +1,60 @@
|
|||
"""
|
||||
Handles the chat completion request for groq
|
||||
"""
|
||||
|
||||
from typing import Any, Callable, Optional, Union
|
||||
|
||||
from httpx._config import Timeout
|
||||
|
||||
from litellm.utils import ModelResponse
|
||||
|
||||
from ...groq.chat.transformation import GroqChatConfig
|
||||
from ...OpenAI.openai import OpenAIChatCompletion
|
||||
|
||||
|
||||
class GroqChatCompletion(OpenAIChatCompletion):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
def completion(
|
||||
self,
|
||||
model_response: ModelResponse,
|
||||
timeout: Union[float, Timeout],
|
||||
optional_params: dict,
|
||||
logging_obj: Any,
|
||||
model: Optional[str] = None,
|
||||
messages: Optional[list] = None,
|
||||
print_verbose: Optional[Callable[..., Any]] = None,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
acompletion: bool = False,
|
||||
litellm_params=None,
|
||||
logger_fn=None,
|
||||
headers: Optional[dict] = None,
|
||||
custom_prompt_dict: dict = {},
|
||||
client=None,
|
||||
organization: Optional[str] = None,
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
drop_params: Optional[bool] = None,
|
||||
):
|
||||
messages = GroqChatConfig()._transform_messages(messages) # type: ignore
|
||||
return super().completion(
|
||||
model_response,
|
||||
timeout,
|
||||
optional_params,
|
||||
logging_obj,
|
||||
model,
|
||||
messages,
|
||||
print_verbose,
|
||||
api_key,
|
||||
api_base,
|
||||
acompletion,
|
||||
litellm_params,
|
||||
logger_fn,
|
||||
headers,
|
||||
custom_prompt_dict,
|
||||
client,
|
||||
organization,
|
||||
custom_llm_provider,
|
||||
drop_params,
|
||||
)
|
88
litellm/llms/groq/chat/transformation.py
Normal file
88
litellm/llms/groq/chat/transformation.py
Normal file
|
@ -0,0 +1,88 @@
|
|||
"""
|
||||
Translate from OpenAI's `/v1/chat/completions` to Groq's `/v1/chat/completions`
|
||||
"""
|
||||
|
||||
import types
|
||||
from typing import List, Optional, Union
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
import litellm
|
||||
from litellm.types.llms.openai import AllMessageValues, ChatCompletionAssistantMessage
|
||||
|
||||
from ...OpenAI.chat.gpt_transformation import OpenAIGPTConfig
|
||||
|
||||
|
||||
class GroqChatConfig(OpenAIGPTConfig):
|
||||
|
||||
frequency_penalty: Optional[int] = None
|
||||
function_call: Optional[Union[str, dict]] = None
|
||||
functions: Optional[list] = None
|
||||
logit_bias: Optional[dict] = None
|
||||
max_tokens: Optional[int] = None
|
||||
n: Optional[int] = None
|
||||
presence_penalty: Optional[int] = None
|
||||
stop: Optional[Union[str, list]] = None
|
||||
temperature: Optional[int] = None
|
||||
top_p: Optional[int] = None
|
||||
response_format: Optional[dict] = None
|
||||
tools: Optional[list] = None
|
||||
tool_choice: Optional[Union[str, dict]] = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
frequency_penalty: Optional[int] = None,
|
||||
function_call: Optional[Union[str, dict]] = None,
|
||||
functions: Optional[list] = None,
|
||||
logit_bias: Optional[dict] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
n: Optional[int] = None,
|
||||
presence_penalty: Optional[int] = None,
|
||||
stop: Optional[Union[str, list]] = None,
|
||||
temperature: Optional[int] = None,
|
||||
top_p: Optional[int] = None,
|
||||
response_format: Optional[dict] = None,
|
||||
tools: Optional[list] = None,
|
||||
tool_choice: Optional[Union[str, dict]] = None,
|
||||
) -> None:
|
||||
locals_ = locals().copy()
|
||||
for key, value in locals_.items():
|
||||
if key != "self" and value is not None:
|
||||
setattr(self.__class__, key, value)
|
||||
|
||||
@classmethod
|
||||
def get_config(cls):
|
||||
return {
|
||||
k: v
|
||||
for k, v in cls.__dict__.items()
|
||||
if not k.startswith("__")
|
||||
and not isinstance(
|
||||
v,
|
||||
(
|
||||
types.FunctionType,
|
||||
types.BuiltinFunctionType,
|
||||
classmethod,
|
||||
staticmethod,
|
||||
),
|
||||
)
|
||||
and v is not None
|
||||
}
|
||||
|
||||
def _transform_messages(self, messages: List[AllMessageValues]) -> List:
|
||||
for idx, message in enumerate(messages):
|
||||
"""
|
||||
1. Don't pass 'null' function_call assistant message to groq - https://github.com/BerriAI/litellm/issues/5839
|
||||
"""
|
||||
if isinstance(message, BaseModel):
|
||||
_message = message.model_dump()
|
||||
else:
|
||||
_message = message
|
||||
assistant_message = _message.get("role") == "assistant"
|
||||
if assistant_message:
|
||||
new_message = ChatCompletionAssistantMessage(role="assistant")
|
||||
for k, v in _message.items():
|
||||
if v is not None:
|
||||
new_message[k] = v # type: ignore
|
||||
messages[idx] = new_message
|
||||
|
||||
return messages
|
101
litellm/llms/groq/stt/transformation.py
Normal file
101
litellm/llms/groq/stt/transformation.py
Normal file
|
@ -0,0 +1,101 @@
|
|||
"""
|
||||
Translate from OpenAI's `/v1/audio/transcriptions` to Groq's `/v1/audio/transcriptions`
|
||||
"""
|
||||
|
||||
import types
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import litellm
|
||||
|
||||
|
||||
class GroqSTTConfig:
|
||||
|
||||
frequency_penalty: Optional[int] = None
|
||||
function_call: Optional[Union[str, dict]] = None
|
||||
functions: Optional[list] = None
|
||||
logit_bias: Optional[dict] = None
|
||||
max_tokens: Optional[int] = None
|
||||
n: Optional[int] = None
|
||||
presence_penalty: Optional[int] = None
|
||||
stop: Optional[Union[str, list]] = None
|
||||
temperature: Optional[int] = None
|
||||
top_p: Optional[int] = None
|
||||
response_format: Optional[dict] = None
|
||||
tools: Optional[list] = None
|
||||
tool_choice: Optional[Union[str, dict]] = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
frequency_penalty: Optional[int] = None,
|
||||
function_call: Optional[Union[str, dict]] = None,
|
||||
functions: Optional[list] = None,
|
||||
logit_bias: Optional[dict] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
n: Optional[int] = None,
|
||||
presence_penalty: Optional[int] = None,
|
||||
stop: Optional[Union[str, list]] = None,
|
||||
temperature: Optional[int] = None,
|
||||
top_p: Optional[int] = None,
|
||||
response_format: Optional[dict] = None,
|
||||
tools: Optional[list] = None,
|
||||
tool_choice: Optional[Union[str, dict]] = None,
|
||||
) -> None:
|
||||
locals_ = locals().copy()
|
||||
for key, value in locals_.items():
|
||||
if key != "self" and value is not None:
|
||||
setattr(self.__class__, key, value)
|
||||
|
||||
@classmethod
|
||||
def get_config(cls):
|
||||
return {
|
||||
k: v
|
||||
for k, v in cls.__dict__.items()
|
||||
if not k.startswith("__")
|
||||
and not isinstance(
|
||||
v,
|
||||
(
|
||||
types.FunctionType,
|
||||
types.BuiltinFunctionType,
|
||||
classmethod,
|
||||
staticmethod,
|
||||
),
|
||||
)
|
||||
and v is not None
|
||||
}
|
||||
|
||||
def get_supported_openai_params_stt(self):
|
||||
return [
|
||||
"prompt",
|
||||
"response_format",
|
||||
"temperature",
|
||||
"language",
|
||||
]
|
||||
|
||||
def get_supported_openai_response_formats_stt(self) -> List[str]:
|
||||
return ["json", "verbose_json", "text"]
|
||||
|
||||
def map_openai_params_stt(
|
||||
self,
|
||||
non_default_params: dict,
|
||||
optional_params: dict,
|
||||
model: str,
|
||||
drop_params: bool,
|
||||
) -> dict:
|
||||
response_formats = self.get_supported_openai_response_formats_stt()
|
||||
for param, value in non_default_params.items():
|
||||
if param == "response_format":
|
||||
if value in response_formats:
|
||||
optional_params[param] = value
|
||||
else:
|
||||
if litellm.drop_params is True or drop_params is True:
|
||||
pass
|
||||
else:
|
||||
raise litellm.utils.UnsupportedParamsError(
|
||||
message="Groq doesn't support response_format={}. To drop unsupported openai params from the call, set `litellm.drop_params = True`".format(
|
||||
value
|
||||
),
|
||||
status_code=400,
|
||||
)
|
||||
else:
|
||||
optional_params[param] = value
|
||||
return optional_params
|
|
@ -276,7 +276,7 @@ def completion(
|
|||
|
||||
from anthropic import AnthropicVertex
|
||||
|
||||
from litellm.llms.anthropic.chat import AnthropicChatCompletion
|
||||
from litellm.llms.anthropic.chat.handler import AnthropicChatCompletion
|
||||
from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import (
|
||||
VertexLLM,
|
||||
)
|
||||
|
@ -367,7 +367,7 @@ async def async_completion(
|
|||
|
||||
if client is None:
|
||||
vertex_ai_client = AsyncAnthropicVertex(
|
||||
project_id=vertex_project, region=vertex_location, access_token=access_token
|
||||
project_id=vertex_project, region=vertex_location, access_token=access_token # type: ignore
|
||||
)
|
||||
else:
|
||||
vertex_ai_client = client
|
||||
|
@ -438,7 +438,7 @@ async def async_streaming(
|
|||
|
||||
if client is None:
|
||||
vertex_ai_client = AsyncAnthropicVertex(
|
||||
project_id=vertex_project, region=vertex_location, access_token=access_token
|
||||
project_id=vertex_project, region=vertex_location, access_token=access_token # type: ignore
|
||||
)
|
||||
else:
|
||||
vertex_ai_client = client
|
||||
|
|
|
@ -96,6 +96,7 @@ from .llms.cohere import completion as cohere_completion # type: ignore
|
|||
from .llms.cohere import embed as cohere_embed
|
||||
from .llms.custom_llm import CustomLLM, custom_chat_llm_router
|
||||
from .llms.databricks.chat import DatabricksChatCompletion
|
||||
from .llms.groq.chat.handler import GroqChatCompletion
|
||||
from .llms.huggingface_restapi import Huggingface
|
||||
from .llms.OpenAI.audio_transcriptions import OpenAIAudioTranscription
|
||||
from .llms.OpenAI.chat.o1_handler import OpenAIO1ChatCompletion
|
||||
|
@ -168,6 +169,7 @@ openai_text_completions = OpenAITextCompletion()
|
|||
openai_o1_chat_completions = OpenAIO1ChatCompletion()
|
||||
openai_audio_transcriptions = OpenAIAudioTranscription()
|
||||
databricks_chat_completions = DatabricksChatCompletion()
|
||||
groq_chat_completions = GroqChatCompletion()
|
||||
azure_ai_chat_completions = AzureAIChatCompletion()
|
||||
azure_ai_embedding = AzureAIEmbedding()
|
||||
anthropic_chat_completions = AnthropicChatCompletion()
|
||||
|
@ -958,6 +960,7 @@ def completion(
|
|||
extra_headers=extra_headers,
|
||||
api_version=api_version,
|
||||
parallel_tool_calls=parallel_tool_calls,
|
||||
messages=messages,
|
||||
**non_default_params,
|
||||
)
|
||||
|
||||
|
@ -1318,13 +1321,56 @@ def completion(
|
|||
additional_args={"headers": headers},
|
||||
)
|
||||
response = _response
|
||||
elif custom_llm_provider == "groq":
|
||||
api_base = (
|
||||
api_base # for deepinfra/perplexity/anyscale/groq/friendliai we check in get_llm_provider and pass in the api base from there
|
||||
or litellm.api_base
|
||||
or get_secret("GROQ_API_BASE")
|
||||
or "https://api.groq.com/openai/v1"
|
||||
)
|
||||
|
||||
# set API KEY
|
||||
api_key = (
|
||||
api_key
|
||||
or litellm.api_key # for deepinfra/perplexity/anyscale/friendliai we check in get_llm_provider and pass in the api key from there
|
||||
or litellm.groq_key
|
||||
or get_secret("GROQ_API_KEY")
|
||||
)
|
||||
|
||||
headers = headers or litellm.headers
|
||||
|
||||
## LOAD CONFIG - if set
|
||||
config = litellm.GroqChatConfig.get_config()
|
||||
for k, v in config.items():
|
||||
if (
|
||||
k not in optional_params
|
||||
): # completion(top_k=3) > openai_config(top_k=3) <- allows for dynamic variables to be passed in
|
||||
optional_params[k] = v
|
||||
|
||||
response = groq_chat_completions.completion(
|
||||
model=model,
|
||||
messages=messages,
|
||||
headers=headers,
|
||||
model_response=model_response,
|
||||
print_verbose=print_verbose,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
acompletion=acompletion,
|
||||
logging_obj=logging,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
logger_fn=logger_fn,
|
||||
timeout=timeout, # type: ignore
|
||||
custom_prompt_dict=custom_prompt_dict,
|
||||
client=client, # pass AsyncOpenAI, OpenAI client
|
||||
organization=organization,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
elif (
|
||||
model in litellm.open_ai_chat_completion_models
|
||||
or custom_llm_provider == "custom_openai"
|
||||
or custom_llm_provider == "deepinfra"
|
||||
or custom_llm_provider == "perplexity"
|
||||
or custom_llm_provider == "groq"
|
||||
or custom_llm_provider == "nvidia_nim"
|
||||
or custom_llm_provider == "cerebras"
|
||||
or custom_llm_provider == "sambanova"
|
||||
|
@ -1431,6 +1477,7 @@ def completion(
|
|||
original_response=response,
|
||||
additional_args={"headers": headers},
|
||||
)
|
||||
|
||||
elif (
|
||||
"replicate" in model
|
||||
or custom_llm_provider == "replicate"
|
||||
|
@ -2933,6 +2980,7 @@ def batch_completion(
|
|||
deployment_id=None,
|
||||
request_timeout: Optional[int] = None,
|
||||
timeout: Optional[int] = 600,
|
||||
max_workers:Optional[int]= 100,
|
||||
# Optional liteLLM function params
|
||||
**kwargs,
|
||||
):
|
||||
|
@ -2956,6 +3004,7 @@ def batch_completion(
|
|||
user (str, optional): The user string for generating completions. Defaults to "".
|
||||
deployment_id (optional): The deployment ID for generating completions. Defaults to None.
|
||||
request_timeout (int, optional): The request timeout for generating completions. Defaults to None.
|
||||
max_workers (int,optional): The maximum number of threads to use for parallel processing.
|
||||
|
||||
Returns:
|
||||
list: A list of completion results.
|
||||
|
@ -3001,7 +3050,7 @@ def batch_completion(
|
|||
for i in range(0, len(lst), n):
|
||||
yield lst[i : i + n]
|
||||
|
||||
with ThreadPoolExecutor(max_workers=100) as executor:
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
for sub_batch in chunks(batch_messages, 100):
|
||||
for message_list in sub_batch:
|
||||
kwargs_modified = args.copy()
|
||||
|
|
|
@ -1173,6 +1173,18 @@
|
|||
"supports_function_calling": true,
|
||||
"supports_assistant_prefill": true
|
||||
},
|
||||
"mistral/pixtral-12b-2409": {
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 128000,
|
||||
"input_cost_per_token": 0.00000015,
|
||||
"output_cost_per_token": 0.00000015,
|
||||
"litellm_provider": "mistral",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"mistral/open-mistral-7b": {
|
||||
"max_tokens": 8191,
|
||||
"max_input_tokens": 32000,
|
||||
|
|
|
@ -760,7 +760,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
|
|||
|
||||
return _user_id_rate_limits.model_dump()
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.exception(
|
||||
verbose_proxy_logger.debug(
|
||||
"Parallel Request Limiter: Error getting user object", str(e)
|
||||
)
|
||||
return None
|
||||
|
|
|
@ -389,6 +389,9 @@ async def add_litellm_data_to_request(
|
|||
user_api_key_dict=user_api_key_dict,
|
||||
)
|
||||
|
||||
verbose_proxy_logger.debug(
|
||||
f"[PROXY]returned data from litellm_pre_call_utils: {data}"
|
||||
)
|
||||
return data
|
||||
|
||||
|
||||
|
|
|
@ -1466,9 +1466,6 @@ class PrismaClient:
|
|||
):
|
||||
args_passed_in = locals()
|
||||
start_time = time.time()
|
||||
verbose_proxy_logger.debug(
|
||||
f"PrismaClient: get_data - args_passed_in: {args_passed_in}"
|
||||
)
|
||||
hashed_token: Optional[str] = None
|
||||
try:
|
||||
response: Any = None
|
||||
|
|
|
@ -1224,3 +1224,14 @@ def test_langfuse_prompt_type(prompt):
|
|||
_add_prompt_to_generation_params(
|
||||
generation_params=generation_params, clean_metadata=clean_metadata
|
||||
)
|
||||
|
||||
|
||||
def test_langfuse_logging_metadata():
|
||||
from litellm.integrations.langfuse import log_requester_metadata
|
||||
|
||||
metadata = {"key": "value", "requester_metadata": {"key": "value"}}
|
||||
|
||||
got_metadata = log_requester_metadata(clean_metadata=metadata)
|
||||
expected_metadata = {"requester_metadata": {"key": "value"}}
|
||||
|
||||
assert expected_metadata == got_metadata
|
||||
|
|
|
@ -61,6 +61,7 @@ async def test_litellm_anthropic_prompt_caching_tools():
|
|||
}
|
||||
|
||||
mock_response.json = return_val
|
||||
mock_response.headers = {"key": "value"}
|
||||
|
||||
litellm.set_verbose = True
|
||||
with patch(
|
||||
|
@ -466,6 +467,7 @@ async def test_litellm_anthropic_prompt_caching_system():
|
|||
}
|
||||
|
||||
mock_response.json = return_val
|
||||
mock_response.headers = {"key": "value"}
|
||||
|
||||
litellm.set_verbose = True
|
||||
with patch(
|
||||
|
|
|
@ -1173,7 +1173,12 @@ def test_turn_off_message_logging():
|
|||
##### VALID JSON ######
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model", ["gpt-3.5-turbo", "azure/chatgpt-v-2"])
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
"ft:gpt-3.5-turbo:my-org:custom_suffix:id"
|
||||
], # "gpt-3.5-turbo", "azure/chatgpt-v-2",
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"turn_off_message_logging",
|
||||
[
|
||||
|
@ -1200,7 +1205,7 @@ def test_standard_logging_payload(model, turn_off_message_logging):
|
|||
_ = litellm.completion(
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||
# mock_response="Going well!",
|
||||
mock_response="Going well!",
|
||||
)
|
||||
|
||||
time.sleep(2)
|
||||
|
|
|
@ -7,6 +7,8 @@ from typing import Any
|
|||
|
||||
from openai import AuthenticationError, BadRequestError, OpenAIError, RateLimitError
|
||||
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
|
@ -884,6 +886,42 @@ def _pre_call_utils(
|
|||
return data, original_function, mapped_target
|
||||
|
||||
|
||||
def _pre_call_utils_httpx(
|
||||
call_type: str,
|
||||
data: dict,
|
||||
client: Union[HTTPHandler, AsyncHTTPHandler],
|
||||
sync_mode: bool,
|
||||
streaming: Optional[bool],
|
||||
):
|
||||
mapped_target: Any = client.client
|
||||
if call_type == "embedding":
|
||||
data["input"] = "Hello world!"
|
||||
|
||||
if sync_mode:
|
||||
original_function = litellm.embedding
|
||||
else:
|
||||
original_function = litellm.aembedding
|
||||
elif call_type == "chat_completion":
|
||||
data["messages"] = [{"role": "user", "content": "Hello world"}]
|
||||
if streaming is True:
|
||||
data["stream"] = True
|
||||
|
||||
if sync_mode:
|
||||
original_function = litellm.completion
|
||||
else:
|
||||
original_function = litellm.acompletion
|
||||
elif call_type == "completion":
|
||||
data["prompt"] = "Hello world"
|
||||
if streaming is True:
|
||||
data["stream"] = True
|
||||
if sync_mode:
|
||||
original_function = litellm.text_completion
|
||||
else:
|
||||
original_function = litellm.atext_completion
|
||||
|
||||
return data, original_function, mapped_target
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"sync_mode",
|
||||
[True, False],
|
||||
|
@ -1006,3 +1044,111 @@ async def test_exception_with_headers(sync_mode, provider, model, call_type, str
|
|||
if exception_raised is False:
|
||||
print(resp)
|
||||
assert exception_raised
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"sync_mode",
|
||||
[True, False],
|
||||
)
|
||||
@pytest.mark.parametrize("streaming", [True, False])
|
||||
@pytest.mark.parametrize(
|
||||
"provider, model, call_type",
|
||||
[
|
||||
("anthropic", "claude-3-haiku-20240307", "chat_completion"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.asyncio
|
||||
async def test_exception_with_headers_httpx(
|
||||
sync_mode, provider, model, call_type, streaming
|
||||
):
|
||||
"""
|
||||
User feedback: litellm says "No deployments available for selected model, Try again in 60 seconds"
|
||||
but Azure says to retry in at most 9s
|
||||
|
||||
```
|
||||
{"message": "litellm.proxy.proxy_server.embeddings(): Exception occured - No deployments available for selected model, Try again in 60 seconds. Passed model=text-embedding-ada-002. pre-call-checks=False, allowed_model_region=n/a, cooldown_list=[('b49cbc9314273db7181fe69b1b19993f04efb88f2c1819947c538bac08097e4c', {'Exception Received': 'litellm.RateLimitError: AzureException RateLimitError - Requests to the Embeddings_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 9 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.', 'Status Code': '429'})]", "level": "ERROR", "timestamp": "2024-08-22T03:25:36.900476"}
|
||||
```
|
||||
"""
|
||||
print(f"Received args: {locals()}")
|
||||
import openai
|
||||
|
||||
if sync_mode:
|
||||
client = HTTPHandler()
|
||||
else:
|
||||
client = AsyncHTTPHandler()
|
||||
|
||||
data = {"model": model}
|
||||
data, original_function, mapped_target = _pre_call_utils_httpx(
|
||||
call_type=call_type,
|
||||
data=data,
|
||||
client=client,
|
||||
sync_mode=sync_mode,
|
||||
streaming=streaming,
|
||||
)
|
||||
|
||||
cooldown_time = 30.0
|
||||
|
||||
def _return_exception(*args, **kwargs):
|
||||
import datetime
|
||||
|
||||
from httpx import Headers, HTTPStatusError, Request, Response
|
||||
|
||||
# Create the Request object
|
||||
request = Request("POST", "http://0.0.0.0:9000/chat/completions")
|
||||
|
||||
# Create the Response object with the necessary headers and status code
|
||||
response = Response(
|
||||
status_code=429,
|
||||
headers=Headers(
|
||||
{
|
||||
"date": "Sat, 21 Sep 2024 22:56:53 GMT",
|
||||
"server": "uvicorn",
|
||||
"retry-after": "30",
|
||||
"content-length": "30",
|
||||
"content-type": "application/json",
|
||||
}
|
||||
),
|
||||
request=request,
|
||||
)
|
||||
|
||||
# Create and raise the HTTPStatusError exception
|
||||
raise HTTPStatusError(
|
||||
message="Error code: 429 - Rate Limit Error!",
|
||||
request=request,
|
||||
response=response,
|
||||
)
|
||||
|
||||
with patch.object(
|
||||
mapped_target,
|
||||
"send",
|
||||
side_effect=_return_exception,
|
||||
):
|
||||
new_retry_after_mock_client = MagicMock(return_value=-1)
|
||||
|
||||
litellm.utils._get_retry_after_from_exception_header = (
|
||||
new_retry_after_mock_client
|
||||
)
|
||||
|
||||
exception_raised = False
|
||||
try:
|
||||
if sync_mode:
|
||||
resp = original_function(**data, client=client)
|
||||
if streaming:
|
||||
for chunk in resp:
|
||||
continue
|
||||
else:
|
||||
resp = await original_function(**data, client=client)
|
||||
|
||||
if streaming:
|
||||
async for chunk in resp:
|
||||
continue
|
||||
|
||||
except litellm.RateLimitError as e:
|
||||
exception_raised = True
|
||||
assert e.litellm_response_headers is not None
|
||||
print("e.litellm_response_headers", e.litellm_response_headers)
|
||||
assert int(e.litellm_response_headers["retry-after"]) == cooldown_time
|
||||
|
||||
if exception_raised is False:
|
||||
print(resp)
|
||||
assert exception_raised
|
||||
|
|
|
@ -45,11 +45,12 @@ def get_current_weather(location, unit="fahrenheit"):
|
|||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
# "gpt-3.5-turbo-1106",
|
||||
"gpt-3.5-turbo-1106",
|
||||
# "mistral/mistral-large-latest",
|
||||
# "claude-3-haiku-20240307",
|
||||
# "gemini/gemini-1.5-pro",
|
||||
"anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"groq/llama3-8b-8192",
|
||||
],
|
||||
)
|
||||
@pytest.mark.flaky(retries=3, delay=1)
|
||||
|
@ -154,6 +155,105 @@ def test_aaparallel_function_call(model):
|
|||
|
||||
# test_parallel_function_call()
|
||||
|
||||
from litellm.types.utils import ChatCompletionMessageToolCall, Function, Message
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model, provider",
|
||||
[
|
||||
(
|
||||
"anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"bedrock",
|
||||
),
|
||||
("claude-3-haiku-20240307", "anthropic"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"messages, expected_error_msg",
|
||||
[
|
||||
(
|
||||
[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses",
|
||||
},
|
||||
Message(
|
||||
content="Here are the current weather conditions for San Francisco, Tokyo, and Paris:",
|
||||
role="assistant",
|
||||
tool_calls=[
|
||||
ChatCompletionMessageToolCall(
|
||||
index=1,
|
||||
function=Function(
|
||||
arguments='{"location": "San Francisco, CA", "unit": "fahrenheit"}',
|
||||
name="get_current_weather",
|
||||
),
|
||||
id="tooluse_Jj98qn6xQlOP_PiQr-w9iA",
|
||||
type="function",
|
||||
)
|
||||
],
|
||||
function_call=None,
|
||||
),
|
||||
{
|
||||
"tool_call_id": "tooluse_Jj98qn6xQlOP_PiQr-w9iA",
|
||||
"role": "tool",
|
||||
"name": "get_current_weather",
|
||||
"content": '{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}',
|
||||
},
|
||||
],
|
||||
True,
|
||||
),
|
||||
(
|
||||
[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses",
|
||||
}
|
||||
],
|
||||
False,
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_parallel_function_call_anthropic_error_msg(
|
||||
model, provider, messages, expected_error_msg
|
||||
):
|
||||
"""
|
||||
Anthropic doesn't support tool calling without `tools=` param specified.
|
||||
|
||||
Ensure this error is thrown when `tools=` param is not specified. But tool call requests are made.
|
||||
|
||||
Reference Issue: https://github.com/BerriAI/litellm/issues/5747, https://github.com/BerriAI/litellm/issues/5388
|
||||
"""
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
|
||||
messages = messages
|
||||
|
||||
if expected_error_msg:
|
||||
with pytest.raises(litellm.UnsupportedParamsError) as e:
|
||||
second_response = litellm.completion(
|
||||
model=model,
|
||||
messages=messages,
|
||||
temperature=0.2,
|
||||
seed=22,
|
||||
drop_params=True,
|
||||
) # get a new response from the model where it can see the function response
|
||||
print("second response\n", second_response)
|
||||
else:
|
||||
second_response = litellm.completion(
|
||||
model=model,
|
||||
messages=messages,
|
||||
temperature=0.2,
|
||||
seed=22,
|
||||
drop_params=True,
|
||||
) # get a new response from the model where it can see the function response
|
||||
print("second response\n", second_response)
|
||||
except litellm.InternalServerError as e:
|
||||
print(e)
|
||||
except litellm.RateLimitError as e:
|
||||
print(e)
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
def test_parallel_function_call_stream():
|
||||
try:
|
||||
|
|
|
@ -62,3 +62,9 @@ def test_get_model_info_shows_supports_prompt_caching():
|
|||
info = litellm.get_model_info("deepseek/deepseek-chat")
|
||||
print("info", info)
|
||||
assert info.get("supports_prompt_caching") is True
|
||||
|
||||
|
||||
def test_get_model_info_finetuned_models():
|
||||
info = litellm.get_model_info("ft:gpt-3.5-turbo:my-org:custom_suffix:id")
|
||||
print("info", info)
|
||||
assert info["input_cost_per_token"] == 0.000003
|
||||
|
|
|
@ -18,13 +18,13 @@ class AnthropicMessagesTool(TypedDict, total=False):
|
|||
|
||||
|
||||
class AnthropicMessagesTextParam(TypedDict, total=False):
|
||||
type: Literal["text"]
|
||||
text: str
|
||||
type: Required[Literal["text"]]
|
||||
text: Required[str]
|
||||
cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
|
||||
|
||||
|
||||
class AnthropicMessagesToolUseParam(TypedDict):
|
||||
type: Literal["tool_use"]
|
||||
type: Required[Literal["tool_use"]]
|
||||
id: str
|
||||
name: str
|
||||
input: dict
|
||||
|
@ -58,8 +58,8 @@ class AnthropicImageParamSource(TypedDict):
|
|||
|
||||
|
||||
class AnthropicMessagesImageParam(TypedDict, total=False):
|
||||
type: Literal["image"]
|
||||
source: AnthropicImageParamSource
|
||||
type: Required[Literal["image"]]
|
||||
source: Required[AnthropicImageParamSource]
|
||||
cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
|
||||
|
||||
|
||||
|
@ -102,16 +102,13 @@ class AnthropicSystemMessageContent(TypedDict, total=False):
|
|||
cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
|
||||
|
||||
|
||||
class AnthropicMessagesRequest(TypedDict, total=False):
|
||||
model: Required[str]
|
||||
messages: Required[
|
||||
List[
|
||||
Union[
|
||||
AnthropicMessagesUserMessageParam,
|
||||
AnthopicMessagesAssistantMessageParam,
|
||||
]
|
||||
]
|
||||
AllAnthropicMessageValues = Union[
|
||||
AnthropicMessagesUserMessageParam, AnthopicMessagesAssistantMessageParam
|
||||
]
|
||||
|
||||
|
||||
class AnthropicMessageRequestBase(TypedDict, total=False):
|
||||
messages: Required[List[AllAnthropicMessageValues]]
|
||||
max_tokens: Required[int]
|
||||
metadata: AnthropicMetadata
|
||||
stop_sequences: List[str]
|
||||
|
@ -123,6 +120,9 @@ class AnthropicMessagesRequest(TypedDict, total=False):
|
|||
top_k: int
|
||||
top_p: float
|
||||
|
||||
|
||||
class AnthropicMessagesRequest(AnthropicMessageRequestBase, total=False):
|
||||
model: Required[str]
|
||||
# litellm param - used for tracking litellm proxy metadata in the request
|
||||
litellm_metadata: dict
|
||||
|
||||
|
@ -291,9 +291,9 @@ class AnthropicResponse(BaseModel):
|
|||
"""Billing and rate-limit usage."""
|
||||
|
||||
|
||||
class AnthropicChatCompletionUsageBlock(TypedDict, total=False):
|
||||
prompt_tokens: Required[int]
|
||||
completion_tokens: Required[int]
|
||||
total_tokens: Required[int]
|
||||
from .openai import ChatCompletionUsageBlock
|
||||
|
||||
|
||||
class AnthropicChatCompletionUsageBlock(ChatCompletionUsageBlock, total=False):
|
||||
cache_creation_input_tokens: int
|
||||
cache_read_input_tokens: int
|
||||
|
|
|
@ -343,13 +343,16 @@ class ChatCompletionImageObject(TypedDict):
|
|||
image_url: Union[str, ChatCompletionImageUrlObject]
|
||||
|
||||
|
||||
class OpenAIChatCompletionUserMessage(TypedDict):
|
||||
role: Literal["user"]
|
||||
content: Union[
|
||||
OpenAIMessageContent = Union[
|
||||
str, Iterable[Union[ChatCompletionTextObject, ChatCompletionImageObject]]
|
||||
]
|
||||
|
||||
|
||||
class OpenAIChatCompletionUserMessage(TypedDict):
|
||||
role: Literal["user"]
|
||||
content: OpenAIMessageContent
|
||||
|
||||
|
||||
class ChatCompletionUserMessage(OpenAIChatCompletionUserMessage, total=False):
|
||||
cache_control: ChatCompletionCachedContent
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ from typing import Any, Dict, List, Literal, Optional, Tuple, Union
|
|||
from openai._models import BaseModel as OpenAIObject
|
||||
from openai.types.audio.transcription_create_params import FileTypes # type: ignore
|
||||
from openai.types.completion_usage import CompletionTokensDetails, CompletionUsage
|
||||
from pydantic import ConfigDict, Field, PrivateAttr
|
||||
from pydantic import ConfigDict, PrivateAttr
|
||||
from typing_extensions import Callable, Dict, Required, TypedDict, override
|
||||
|
||||
from ..litellm_core_utils.core_helpers import map_finish_reason
|
||||
|
|
2113
litellm/utils.py
2113
litellm/utils.py
File diff suppressed because it is too large
Load diff
|
@ -1173,6 +1173,18 @@
|
|||
"supports_function_calling": true,
|
||||
"supports_assistant_prefill": true
|
||||
},
|
||||
"mistral/pixtral-12b-2409": {
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 128000,
|
||||
"input_cost_per_token": 0.00000015,
|
||||
"output_cost_per_token": 0.00000015,
|
||||
"litellm_provider": "mistral",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"mistral/open-mistral-7b": {
|
||||
"max_tokens": 8191,
|
||||
"max_input_tokens": 32000,
|
||||
|
|
|
@ -25,7 +25,12 @@ from unittest.mock import MagicMock, patch
|
|||
import pytest
|
||||
|
||||
import litellm
|
||||
from litellm import AnthropicConfig, Router, adapter_completion
|
||||
from litellm import (
|
||||
AnthropicConfig,
|
||||
Router,
|
||||
adapter_completion,
|
||||
AnthropicExperimentalPassThroughConfig,
|
||||
)
|
||||
from litellm.adapters.anthropic_adapter import anthropic_adapter
|
||||
from litellm.types.llms.anthropic import AnthropicResponse
|
||||
|
||||
|
@ -33,7 +38,7 @@ from litellm.types.llms.anthropic import AnthropicResponse
|
|||
def test_anthropic_completion_messages_translation():
|
||||
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||
|
||||
translated_messages = AnthropicConfig().translate_anthropic_messages_to_openai(messages=messages) # type: ignore
|
||||
translated_messages = AnthropicExperimentalPassThroughConfig().translate_anthropic_messages_to_openai(messages=messages) # type: ignore
|
||||
|
||||
assert translated_messages == [{"role": "user", "content": "Hey, how's it going?"}]
|
||||
|
|
@ -5,7 +5,11 @@ import pytest
|
|||
import sys
|
||||
from typing import Any, Dict, List
|
||||
from unittest.mock import MagicMock, Mock, patch
|
||||
import os
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import litellm
|
||||
from litellm.exceptions import BadRequestError
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue