LiteLLM Minor Fixes & Improvements (09/27/2024) (#5938)

* fix(langfuse.py): prevent double logging requester metadata

Fixes https://github.com/BerriAI/litellm/issues/5935

* build(model_prices_and_context_window.json): add mistral pixtral cost tracking

Closes https://github.com/BerriAI/litellm/issues/5837

* handle streaming for azure ai studio error

* [Perf Proxy] parallel request limiter - use one cache update call (#5932)

* fix parallel request limiter - use one cache update call

* ci/cd run again

* run ci/cd again

* use docker username password

* fix config.yml

* fix config

* fix config

* fix config.yml

* ci/cd run again

* use correct typing for batch set cache

* fix async_set_cache_pipeline

* fix only check user id tpm / rpm limits when limits set

* fix test_openai_azure_embedding_with_oidc_and_cf

* fix(groq/chat/transformation.py): Fixes https://github.com/BerriAI/litellm/issues/5839

* feat(anthropic/chat.py): return 'retry-after' headers from anthropic

Fixes https://github.com/BerriAI/litellm/issues/4387

* feat: raise validation error if message has tool calls without passing `tools` param for anthropic/bedrock

Closes https://github.com/BerriAI/litellm/issues/5747

* [Feature]#5940, add max_workers parameter for the batch_completion (#5947)

* handle streaming for azure ai studio error

* bump: version 1.48.2 → 1.48.3

* docs(data_security.md): add legal/compliance faq's

Make it easier for companies to use litellm

* docs: resolve imports

* [Feature]#5940, add max_workers parameter for the batch_completion method

---------

Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Co-authored-by: Krrish Dholakia <krrishdholakia@gmail.com>
Co-authored-by: josearangos <josearangos@Joses-MacBook-Pro.local>

* fix(converse_transformation.py): fix default message value

* fix(utils.py): fix get_model_info to handle finetuned models

Fixes issue for standard logging payloads, where model_map_value was null for finetuned openai models

* fix(litellm_pre_call_utils.py): add debug statement for data sent after updating with team/key callbacks

* fix: fix linting errors

* fix(anthropic/chat/handler.py): fix cache creation input tokens

* fix(exception_mapping_utils.py): fix missing imports

* fix(anthropic/chat/handler.py): fix usage block translation

* test: fix test

* test: fix tests

* style(types/utils.py): trigger new build

* test: fix test

---------

Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Co-authored-by: Jose Alberto Arango Sanchez <jose.arangos@udea.edu.co>
Co-authored-by: josearangos <josearangos@Joses-MacBook-Pro.local>
This commit is contained in:
Krish Dholakia 2024-09-27 22:52:57 -07:00 committed by GitHub
parent 754981a78f
commit 0b30e212da
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
35 changed files with 3657 additions and 2820 deletions

View file

@ -89,6 +89,7 @@ retry = True
### AUTH ### ### AUTH ###
api_key: Optional[str] = None api_key: Optional[str] = None
openai_key: Optional[str] = None openai_key: Optional[str] = None
groq_key: Optional[str] = None
databricks_key: Optional[str] = None databricks_key: Optional[str] = None
azure_key: Optional[str] = None azure_key: Optional[str] = None
anthropic_key: Optional[str] = None anthropic_key: Optional[str] = None
@ -892,7 +893,11 @@ ALL_LITELLM_RESPONSE_TYPES = [
from .types.utils import ImageObject from .types.utils import ImageObject
from .llms.custom_llm import CustomLLM from .llms.custom_llm import CustomLLM
from .llms.huggingface_restapi import HuggingfaceConfig from .llms.huggingface_restapi import HuggingfaceConfig
from .llms.anthropic.chat import AnthropicConfig from .llms.anthropic.chat.handler import AnthropicConfig
from .llms.anthropic.experimental_pass_through.transformation import (
AnthropicExperimentalPassThroughConfig,
)
from .llms.groq.stt.transformation import GroqSTTConfig
from .llms.anthropic.completion import AnthropicTextConfig from .llms.anthropic.completion import AnthropicTextConfig
from .llms.databricks.chat import DatabricksConfig, DatabricksEmbeddingConfig from .llms.databricks.chat import DatabricksConfig, DatabricksEmbeddingConfig
from .llms.predibase import PredibaseConfig from .llms.predibase import PredibaseConfig
@ -962,8 +967,8 @@ from .llms.OpenAI.openai import (
OpenAITextCompletionConfig, OpenAITextCompletionConfig,
MistralEmbeddingConfig, MistralEmbeddingConfig,
DeepInfraConfig, DeepInfraConfig,
GroqConfig,
) )
from .llms.groq.chat.transformation import GroqChatConfig
from .llms.azure_ai.chat.transformation import AzureAIStudioConfig from .llms.azure_ai.chat.transformation import AzureAIStudioConfig
from .llms.mistral.mistral_chat_transformation import MistralConfig from .llms.mistral.mistral_chat_transformation import MistralConfig
from .llms.OpenAI.chat.o1_transformation import ( from .llms.OpenAI.chat.o1_transformation import (

View file

@ -34,7 +34,7 @@ class AnthropicAdapter(CustomLogger):
""" """
request_body = AnthropicMessagesRequest(**kwargs) # type: ignore request_body = AnthropicMessagesRequest(**kwargs) # type: ignore
translated_body = litellm.AnthropicConfig().translate_anthropic_to_openai( translated_body = litellm.AnthropicExperimentalPassThroughConfig().translate_anthropic_to_openai(
anthropic_message_request=request_body anthropic_message_request=request_body
) )
@ -44,7 +44,7 @@ class AnthropicAdapter(CustomLogger):
self, response: litellm.ModelResponse self, response: litellm.ModelResponse
) -> Optional[AnthropicResponse]: ) -> Optional[AnthropicResponse]:
return litellm.AnthropicConfig().translate_openai_response_to_anthropic( return litellm.AnthropicExperimentalPassThroughConfig().translate_openai_response_to_anthropic(
response=response response=response
) )
@ -99,7 +99,7 @@ class AnthropicStreamWrapper(AdapterCompletionStreamWrapper):
if chunk == "None" or chunk is None: if chunk == "None" or chunk is None:
raise Exception raise Exception
processed_chunk = litellm.AnthropicConfig().translate_streaming_openai_response_to_anthropic( processed_chunk = litellm.AnthropicExperimentalPassThroughConfig().translate_streaming_openai_response_to_anthropic(
response=chunk response=chunk
) )
if ( if (
@ -163,7 +163,7 @@ class AnthropicStreamWrapper(AdapterCompletionStreamWrapper):
async for chunk in self.completion_stream: async for chunk in self.completion_stream:
if chunk == "None" or chunk is None: if chunk == "None" or chunk is None:
raise Exception raise Exception
processed_chunk = litellm.AnthropicConfig().translate_streaming_openai_response_to_anthropic( processed_chunk = litellm.AnthropicExperimentalPassThroughConfig().translate_streaming_openai_response_to_anthropic(
response=chunk response=chunk
) )
if ( if (

View file

@ -601,7 +601,7 @@ class LangFuseLogger:
"input": input if not mask_input else "redacted-by-litellm", "input": input if not mask_input else "redacted-by-litellm",
"output": output if not mask_output else "redacted-by-litellm", "output": output if not mask_output else "redacted-by-litellm",
"usage": usage, "usage": usage,
"metadata": clean_metadata, "metadata": log_requester_metadata(clean_metadata),
"level": level, "level": level,
"version": clean_metadata.pop("version", None), "version": clean_metadata.pop("version", None),
} }
@ -768,3 +768,15 @@ def log_provider_specific_information_as_span(
name="vertex_ai_grounding_metadata", name="vertex_ai_grounding_metadata",
input=vertex_ai_grounding_metadata, input=vertex_ai_grounding_metadata,
) )
def log_requester_metadata(clean_metadata: dict):
returned_metadata = {}
requester_metadata = clean_metadata.get("requester_metadata") or {}
for k, v in clean_metadata.items():
if k not in requester_metadata:
returned_metadata[k] = v
returned_metadata.update({"requester_metadata": requester_metadata})
return returned_metadata

File diff suppressed because it is too large Load diff

View file

@ -1015,9 +1015,8 @@ class Logging:
!= langFuseLogger.public_key != langFuseLogger.public_key
) )
or ( or (
self.langfuse_public_key is not None self.langfuse_secret is not None
and self.langfuse_public_key and self.langfuse_secret != langFuseLogger.secret_key
!= langFuseLogger.public_key
) )
or ( or (
self.langfuse_host is not None self.langfuse_host is not None
@ -1045,7 +1044,6 @@ class Logging:
service_name="langfuse", service_name="langfuse",
logging_obj=temp_langfuse_logger, logging_obj=temp_langfuse_logger,
) )
if temp_langfuse_logger is not None: if temp_langfuse_logger is not None:
_response = temp_langfuse_logger.log_event( _response = temp_langfuse_logger.log_event(
kwargs=kwargs, kwargs=kwargs,

View file

@ -220,104 +220,6 @@ class DeepInfraConfig:
return optional_params return optional_params
class GroqConfig:
"""
Reference: https://deepinfra.com/docs/advanced/openai_api
The class `DeepInfra` provides configuration for the DeepInfra's Chat Completions API interface. Below are the parameters:
"""
frequency_penalty: Optional[int] = None
function_call: Optional[Union[str, dict]] = None
functions: Optional[list] = None
logit_bias: Optional[dict] = None
max_tokens: Optional[int] = None
n: Optional[int] = None
presence_penalty: Optional[int] = None
stop: Optional[Union[str, list]] = None
temperature: Optional[int] = None
top_p: Optional[int] = None
response_format: Optional[dict] = None
tools: Optional[list] = None
tool_choice: Optional[Union[str, dict]] = None
def __init__(
self,
frequency_penalty: Optional[int] = None,
function_call: Optional[Union[str, dict]] = None,
functions: Optional[list] = None,
logit_bias: Optional[dict] = None,
max_tokens: Optional[int] = None,
n: Optional[int] = None,
presence_penalty: Optional[int] = None,
stop: Optional[Union[str, list]] = None,
temperature: Optional[int] = None,
top_p: Optional[int] = None,
response_format: Optional[dict] = None,
tools: Optional[list] = None,
tool_choice: Optional[Union[str, dict]] = None,
) -> None:
locals_ = locals().copy()
for key, value in locals_.items():
if key != "self" and value is not None:
setattr(self.__class__, key, value)
@classmethod
def get_config(cls):
return {
k: v
for k, v in cls.__dict__.items()
if not k.startswith("__")
and not isinstance(
v,
(
types.FunctionType,
types.BuiltinFunctionType,
classmethod,
staticmethod,
),
)
and v is not None
}
def get_supported_openai_params_stt(self):
return [
"prompt",
"response_format",
"temperature",
"language",
]
def get_supported_openai_response_formats_stt(self) -> List[str]:
return ["json", "verbose_json", "text"]
def map_openai_params_stt(
self,
non_default_params: dict,
optional_params: dict,
model: str,
drop_params: bool,
) -> dict:
response_formats = self.get_supported_openai_response_formats_stt()
for param, value in non_default_params.items():
if param == "response_format":
if value in response_formats:
optional_params[param] = value
else:
if litellm.drop_params is True or drop_params is True:
pass
else:
raise litellm.utils.UnsupportedParamsError(
message="Groq doesn't support response_format={}. To drop unsupported openai params from the call, set `litellm.drop_params = True`".format(
value
),
status_code=400,
)
else:
optional_params[param] = value
return optional_params
class OpenAIConfig: class OpenAIConfig:
""" """
Reference: https://platform.openai.com/docs/api-reference/chat/create Reference: https://platform.openai.com/docs/api-reference/chat/create

View file

@ -0,0 +1 @@
from .handler import AnthropicChatCompletion, ModelResponseIterator

View file

@ -71,12 +71,19 @@ from litellm.types.llms.openai import (
ChatCompletionToolParamFunctionChunk, ChatCompletionToolParamFunctionChunk,
ChatCompletionUsageBlock, ChatCompletionUsageBlock,
ChatCompletionUserMessage, ChatCompletionUserMessage,
OpenAIMessageContent,
) )
from litellm.types.utils import Choices, GenericStreamingChunk from litellm.types.utils import Choices, GenericStreamingChunk
from litellm.utils import CustomStreamWrapper, ModelResponse, Usage from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
from ..base import BaseLLM from ...base import BaseLLM
from ..prompt_templates.factory import custom_prompt, prompt_factory from ...prompt_templates.factory import (
anthropic_messages_pt,
custom_prompt,
prompt_factory,
)
from ..common_utils import AnthropicError
from .transformation import AnthropicConfig
class AnthropicConstants(Enum): class AnthropicConstants(Enum):
@ -86,558 +93,6 @@ class AnthropicConstants(Enum):
# constants from https://github.com/anthropics/anthropic-sdk-python/blob/main/src/anthropic/_constants.py # constants from https://github.com/anthropics/anthropic-sdk-python/blob/main/src/anthropic/_constants.py
class AnthropicError(Exception):
def __init__(self, status_code: int, message):
self.status_code = status_code
self.message: str = message
self.request = httpx.Request(
method="POST", url="https://api.anthropic.com/v1/messages"
)
self.response = httpx.Response(status_code=status_code, request=self.request)
super().__init__(
self.message
) # Call the base class constructor with the parameters it needs
class AnthropicConfig:
"""
Reference: https://docs.anthropic.com/claude/reference/messages_post
to pass metadata to anthropic, it's {"user_id": "any-relevant-information"}
"""
max_tokens: Optional[int] = (
4096 # anthropic requires a default value (Opus, Sonnet, and Haiku have the same default)
)
stop_sequences: Optional[list] = None
temperature: Optional[int] = None
top_p: Optional[int] = None
top_k: Optional[int] = None
metadata: Optional[dict] = None
system: Optional[str] = None
def __init__(
self,
max_tokens: Optional[
int
] = 4096, # You can pass in a value yourself or use the default value 4096
stop_sequences: Optional[list] = None,
temperature: Optional[int] = None,
top_p: Optional[int] = None,
top_k: Optional[int] = None,
metadata: Optional[dict] = None,
system: Optional[str] = None,
) -> None:
locals_ = locals()
for key, value in locals_.items():
if key != "self" and value is not None:
setattr(self.__class__, key, value)
@classmethod
def get_config(cls):
return {
k: v
for k, v in cls.__dict__.items()
if not k.startswith("__")
and not isinstance(
v,
(
types.FunctionType,
types.BuiltinFunctionType,
classmethod,
staticmethod,
),
)
and v is not None
}
def get_supported_openai_params(self):
return [
"stream",
"stop",
"temperature",
"top_p",
"max_tokens",
"max_completion_tokens",
"tools",
"tool_choice",
"extra_headers",
]
def get_cache_control_headers(self) -> dict:
return {
"anthropic-version": "2023-06-01",
"anthropic-beta": "prompt-caching-2024-07-31",
}
def map_openai_params(self, non_default_params: dict, optional_params: dict):
for param, value in non_default_params.items():
if param == "max_tokens":
optional_params["max_tokens"] = value
if param == "max_completion_tokens":
optional_params["max_tokens"] = value
if param == "tools":
optional_params["tools"] = value
if param == "tool_choice":
_tool_choice: Optional[AnthropicMessagesToolChoice] = None
if value == "auto":
_tool_choice = {"type": "auto"}
elif value == "required":
_tool_choice = {"type": "any"}
elif isinstance(value, dict):
_tool_choice = {"type": "tool", "name": value["function"]["name"]}
if _tool_choice is not None:
optional_params["tool_choice"] = _tool_choice
if param == "stream" and value == True:
optional_params["stream"] = value
if param == "stop":
if isinstance(value, str):
if (
value == "\n"
) and litellm.drop_params == True: # anthropic doesn't allow whitespace characters as stop-sequences
continue
value = [value]
elif isinstance(value, list):
new_v = []
for v in value:
if (
v == "\n"
) and litellm.drop_params == True: # anthropic doesn't allow whitespace characters as stop-sequences
continue
new_v.append(v)
if len(new_v) > 0:
value = new_v
else:
continue
optional_params["stop_sequences"] = value
if param == "temperature":
optional_params["temperature"] = value
if param == "top_p":
optional_params["top_p"] = value
return optional_params
def is_cache_control_set(self, messages: List[AllMessageValues]) -> bool:
"""
Return if {"cache_control": ..} in message content block
Used to check if anthropic prompt caching headers need to be set.
"""
for message in messages:
if message["content"] is not None and isinstance(message["content"], list):
for content in message["content"]:
if "cache_control" in content:
return True
return False
def translate_system_message(
self, messages: List[AllMessageValues]
) -> List[AnthropicSystemMessageContent]:
system_prompt_indices = []
anthropic_system_message_list: List[AnthropicSystemMessageContent] = []
for idx, message in enumerate(messages):
if message["role"] == "system":
valid_content: bool = False
system_message_block = ChatCompletionSystemMessage(**message)
if isinstance(system_message_block["content"], str):
anthropic_system_message_content = AnthropicSystemMessageContent(
type="text",
text=system_message_block["content"],
)
if "cache_control" in system_message_block:
anthropic_system_message_content["cache_control"] = (
system_message_block["cache_control"]
)
anthropic_system_message_list.append(
anthropic_system_message_content
)
valid_content = True
elif isinstance(message["content"], list):
for _content in message["content"]:
anthropic_system_message_content = (
AnthropicSystemMessageContent(
type=_content.get("type"),
text=_content.get("text"),
)
)
if "cache_control" in _content:
anthropic_system_message_content["cache_control"] = (
_content["cache_control"]
)
anthropic_system_message_list.append(
anthropic_system_message_content
)
valid_content = True
if valid_content:
system_prompt_indices.append(idx)
if len(system_prompt_indices) > 0:
for idx in reversed(system_prompt_indices):
messages.pop(idx)
return anthropic_system_message_list
### FOR [BETA] `/v1/messages` endpoint support
def translatable_anthropic_params(self) -> List:
"""
Which anthropic params, we need to translate to the openai format.
"""
return ["messages", "metadata", "system", "tool_choice", "tools"]
def translate_anthropic_messages_to_openai(
self,
messages: List[
Union[
AnthropicMessagesUserMessageParam,
AnthopicMessagesAssistantMessageParam,
]
],
) -> List:
new_messages: List[AllMessageValues] = []
for m in messages:
user_message: Optional[ChatCompletionUserMessage] = None
tool_message_list: List[ChatCompletionToolMessage] = []
new_user_content_list: List[
Union[ChatCompletionTextObject, ChatCompletionImageObject]
] = []
## USER MESSAGE ##
if m["role"] == "user":
## translate user message
if isinstance(m["content"], str):
user_message = ChatCompletionUserMessage(
role="user", content=m["content"]
)
elif isinstance(m["content"], list):
for content in m["content"]:
if content["type"] == "text":
text_obj = ChatCompletionTextObject(
type="text", text=content["text"]
)
new_user_content_list.append(text_obj)
elif content["type"] == "image":
image_url = ChatCompletionImageUrlObject(
url=f"data:{content['type']};base64,{content['source']}"
)
image_obj = ChatCompletionImageObject(
type="image_url", image_url=image_url
)
new_user_content_list.append(image_obj)
elif content["type"] == "tool_result":
if "content" not in content:
tool_result = ChatCompletionToolMessage(
role="tool",
tool_call_id=content["tool_use_id"],
content="",
)
tool_message_list.append(tool_result)
elif isinstance(content["content"], str):
tool_result = ChatCompletionToolMessage(
role="tool",
tool_call_id=content["tool_use_id"],
content=content["content"],
)
tool_message_list.append(tool_result)
elif isinstance(content["content"], list):
for c in content["content"]:
if c["type"] == "text":
tool_result = ChatCompletionToolMessage(
role="tool",
tool_call_id=content["tool_use_id"],
content=c["text"],
)
tool_message_list.append(tool_result)
elif c["type"] == "image":
image_str = (
f"data:{c['type']};base64,{c['source']}"
)
tool_result = ChatCompletionToolMessage(
role="tool",
tool_call_id=content["tool_use_id"],
content=image_str,
)
tool_message_list.append(tool_result)
if user_message is not None:
new_messages.append(user_message)
if len(new_user_content_list) > 0:
new_messages.append({"role": "user", "content": new_user_content_list}) # type: ignore
if len(tool_message_list) > 0:
new_messages.extend(tool_message_list)
## ASSISTANT MESSAGE ##
assistant_message_str: Optional[str] = None
tool_calls: List[ChatCompletionAssistantToolCall] = []
if m["role"] == "assistant":
if isinstance(m["content"], str):
assistant_message_str = m["content"]
elif isinstance(m["content"], list):
for content in m["content"]:
if content["type"] == "text":
if assistant_message_str is None:
assistant_message_str = content["text"]
else:
assistant_message_str += content["text"]
elif content["type"] == "tool_use":
function_chunk = ChatCompletionToolCallFunctionChunk(
name=content["name"],
arguments=json.dumps(content["input"]),
)
tool_calls.append(
ChatCompletionAssistantToolCall(
id=content["id"],
type="function",
function=function_chunk,
)
)
if assistant_message_str is not None or len(tool_calls) > 0:
assistant_message = ChatCompletionAssistantMessage(
role="assistant",
content=assistant_message_str,
)
if len(tool_calls) > 0:
assistant_message["tool_calls"] = tool_calls
new_messages.append(assistant_message)
return new_messages
def translate_anthropic_tool_choice_to_openai(
self, tool_choice: AnthropicMessagesToolChoice
) -> ChatCompletionToolChoiceValues:
if tool_choice["type"] == "any":
return "required"
elif tool_choice["type"] == "auto":
return "auto"
elif tool_choice["type"] == "tool":
tc_function_param = ChatCompletionToolChoiceFunctionParam(
name=tool_choice.get("name", "")
)
return ChatCompletionToolChoiceObjectParam(
type="function", function=tc_function_param
)
else:
raise ValueError(
"Incompatible tool choice param submitted - {}".format(tool_choice)
)
def translate_anthropic_tools_to_openai(
self, tools: List[AnthropicMessagesTool]
) -> List[ChatCompletionToolParam]:
new_tools: List[ChatCompletionToolParam] = []
for tool in tools:
function_chunk = ChatCompletionToolParamFunctionChunk(
name=tool["name"],
parameters=tool["input_schema"],
)
if "description" in tool:
function_chunk["description"] = tool["description"]
new_tools.append(
ChatCompletionToolParam(type="function", function=function_chunk)
)
return new_tools
def translate_anthropic_to_openai(
self, anthropic_message_request: AnthropicMessagesRequest
) -> ChatCompletionRequest:
"""
This is used by the beta Anthropic Adapter, for translating anthropic `/v1/messages` requests to the openai format.
"""
new_messages: List[AllMessageValues] = []
## CONVERT ANTHROPIC MESSAGES TO OPENAI
new_messages = self.translate_anthropic_messages_to_openai(
messages=anthropic_message_request["messages"]
)
## ADD SYSTEM MESSAGE TO MESSAGES
if "system" in anthropic_message_request:
new_messages.insert(
0,
ChatCompletionSystemMessage(
role="system", content=anthropic_message_request["system"]
),
)
new_kwargs: ChatCompletionRequest = {
"model": anthropic_message_request["model"],
"messages": new_messages,
}
## CONVERT METADATA (user_id)
if "metadata" in anthropic_message_request:
if "user_id" in anthropic_message_request["metadata"]:
new_kwargs["user"] = anthropic_message_request["metadata"]["user_id"]
# Pass litellm proxy specific metadata
if "litellm_metadata" in anthropic_message_request:
# metadata will be passed to litellm.acompletion(), it's a litellm_param
new_kwargs["metadata"] = anthropic_message_request.pop("litellm_metadata")
## CONVERT TOOL CHOICE
if "tool_choice" in anthropic_message_request:
new_kwargs["tool_choice"] = self.translate_anthropic_tool_choice_to_openai(
tool_choice=anthropic_message_request["tool_choice"]
)
## CONVERT TOOLS
if "tools" in anthropic_message_request:
new_kwargs["tools"] = self.translate_anthropic_tools_to_openai(
tools=anthropic_message_request["tools"]
)
translatable_params = self.translatable_anthropic_params()
for k, v in anthropic_message_request.items():
if k not in translatable_params: # pass remaining params as is
new_kwargs[k] = v # type: ignore
return new_kwargs
def _translate_openai_content_to_anthropic(
self, choices: List[Choices]
) -> List[
Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse]
]:
new_content: List[
Union[
AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse
]
] = []
for choice in choices:
if (
choice.message.tool_calls is not None
and len(choice.message.tool_calls) > 0
):
for tool_call in choice.message.tool_calls:
new_content.append(
AnthropicResponseContentBlockToolUse(
type="tool_use",
id=tool_call.id,
name=tool_call.function.name or "",
input=json.loads(tool_call.function.arguments),
)
)
elif choice.message.content is not None:
new_content.append(
AnthropicResponseContentBlockText(
type="text", text=choice.message.content
)
)
return new_content
def _translate_openai_finish_reason_to_anthropic(
self, openai_finish_reason: str
) -> AnthropicFinishReason:
if openai_finish_reason == "stop":
return "end_turn"
elif openai_finish_reason == "length":
return "max_tokens"
elif openai_finish_reason == "tool_calls":
return "tool_use"
return "end_turn"
def translate_openai_response_to_anthropic(
self, response: litellm.ModelResponse
) -> AnthropicResponse:
## translate content block
anthropic_content = self._translate_openai_content_to_anthropic(choices=response.choices) # type: ignore
## extract finish reason
anthropic_finish_reason = self._translate_openai_finish_reason_to_anthropic(
openai_finish_reason=response.choices[0].finish_reason # type: ignore
)
# extract usage
usage: litellm.Usage = getattr(response, "usage")
anthropic_usage = AnthropicResponseUsageBlock(
input_tokens=usage.prompt_tokens or 0,
output_tokens=usage.completion_tokens or 0,
)
translated_obj = AnthropicResponse(
id=response.id,
type="message",
role="assistant",
model=response.model or "unknown-model",
stop_sequence=None,
usage=anthropic_usage,
content=anthropic_content,
stop_reason=anthropic_finish_reason,
)
return translated_obj
def _translate_streaming_openai_chunk_to_anthropic(
self, choices: List[OpenAIStreamingChoice]
) -> Tuple[
Literal["text_delta", "input_json_delta"],
Union[ContentTextBlockDelta, ContentJsonBlockDelta],
]:
text: str = ""
partial_json: Optional[str] = None
for choice in choices:
if choice.delta.content is not None:
text += choice.delta.content
elif choice.delta.tool_calls is not None:
partial_json = ""
for tool in choice.delta.tool_calls:
if (
tool.function is not None
and tool.function.arguments is not None
):
partial_json += tool.function.arguments
if partial_json is not None:
return "input_json_delta", ContentJsonBlockDelta(
type="input_json_delta", partial_json=partial_json
)
else:
return "text_delta", ContentTextBlockDelta(type="text_delta", text=text)
def translate_streaming_openai_response_to_anthropic(
self, response: litellm.ModelResponse
) -> Union[ContentBlockDelta, MessageBlockDelta]:
## base case - final chunk w/ finish reason
if response.choices[0].finish_reason is not None:
delta = MessageDelta(
stop_reason=self._translate_openai_finish_reason_to_anthropic(
response.choices[0].finish_reason
),
)
if getattr(response, "usage", None) is not None:
litellm_usage_chunk: Optional[litellm.Usage] = response.usage # type: ignore
elif (
hasattr(response, "_hidden_params")
and "usage" in response._hidden_params
):
litellm_usage_chunk = response._hidden_params["usage"]
else:
litellm_usage_chunk = None
if litellm_usage_chunk is not None:
usage_delta = UsageDelta(
input_tokens=litellm_usage_chunk.prompt_tokens or 0,
output_tokens=litellm_usage_chunk.completion_tokens or 0,
)
else:
usage_delta = UsageDelta(input_tokens=0, output_tokens=0)
return MessageBlockDelta(
type="message_delta", delta=delta, usage=usage_delta
)
(
type_of_content,
content_block_delta,
) = self._translate_streaming_openai_chunk_to_anthropic(
choices=response.choices # type: ignore
)
return ContentBlockDelta(
type="content_block_delta",
index=response.choices[0].index,
delta=content_block_delta,
)
# makes headers for API call # makes headers for API call
def validate_environment( def validate_environment(
api_key, user_headers, model, messages: List[AllMessageValues] api_key, user_headers, model, messages: List[AllMessageValues]
@ -684,8 +139,14 @@ async def make_call(
api_base, headers=headers, data=data, stream=True, timeout=timeout api_base, headers=headers, data=data, stream=True, timeout=timeout
) )
except httpx.HTTPStatusError as e: except httpx.HTTPStatusError as e:
error_headers = getattr(e, "headers", None)
error_response = getattr(e, "response", None)
if error_headers is None and error_response:
error_headers = getattr(error_response, "headers", None)
raise AnthropicError( raise AnthropicError(
status_code=e.response.status_code, message=await e.response.aread() status_code=e.response.status_code,
message=await e.response.aread(),
headers=error_headers,
) )
except Exception as e: except Exception as e:
for exception in litellm.LITELLM_EXCEPTION_TYPES: for exception in litellm.LITELLM_EXCEPTION_TYPES:
@ -726,8 +187,14 @@ def make_sync_call(
api_base, headers=headers, data=data, stream=True, timeout=timeout api_base, headers=headers, data=data, stream=True, timeout=timeout
) )
except httpx.HTTPStatusError as e: except httpx.HTTPStatusError as e:
error_headers = getattr(e, "headers", None)
error_response = getattr(e, "response", None)
if error_headers is None and error_response:
error_headers = getattr(error_response, "headers", None)
raise AnthropicError( raise AnthropicError(
status_code=e.response.status_code, message=e.response.read() status_code=e.response.status_code,
message=e.response.read(),
headers=error_headers,
) )
except Exception as e: except Exception as e:
for exception in litellm.LITELLM_EXCEPTION_TYPES: for exception in litellm.LITELLM_EXCEPTION_TYPES:
@ -736,7 +203,12 @@ def make_sync_call(
raise AnthropicError(status_code=500, message=str(e)) raise AnthropicError(status_code=500, message=str(e))
if response.status_code != 200: if response.status_code != 200:
raise AnthropicError(status_code=response.status_code, message=response.read()) response_headers = getattr(response, "headers", None)
raise AnthropicError(
status_code=response.status_code,
message=response.read(),
headers=response_headers,
)
completion_stream = ModelResponseIterator( completion_stream = ModelResponseIterator(
streaming_response=response.iter_lines(), sync_stream=True streaming_response=response.iter_lines(), sync_stream=True
@ -763,7 +235,7 @@ class AnthropicChatCompletion(BaseLLM):
response: Union[requests.Response, httpx.Response], response: Union[requests.Response, httpx.Response],
model_response: ModelResponse, model_response: ModelResponse,
stream: bool, stream: bool,
logging_obj: litellm.litellm_core_utils.litellm_logging.Logging, logging_obj: litellm.litellm_core_utils.litellm_logging.Logging, # type: ignore
optional_params: dict, optional_params: dict,
api_key: str, api_key: str,
data: Union[dict, str], data: Union[dict, str],
@ -772,6 +244,14 @@ class AnthropicChatCompletion(BaseLLM):
encoding, encoding,
json_mode: bool, json_mode: bool,
) -> ModelResponse: ) -> ModelResponse:
_hidden_params = {}
_response_headers = dict(response.headers)
if _response_headers is not None:
llm_response_headers = {
"{}-{}".format("llm_provider", k): v
for k, v in _response_headers.items()
}
_hidden_params["additional_headers"] = llm_response_headers
## LOGGING ## LOGGING
logging_obj.post_call( logging_obj.post_call(
input=messages, input=messages,
@ -783,14 +263,21 @@ class AnthropicChatCompletion(BaseLLM):
## RESPONSE OBJECT ## RESPONSE OBJECT
try: try:
completion_response = response.json() completion_response = response.json()
except: except Exception as e:
response_headers = getattr(response, "headers", None)
raise AnthropicError( raise AnthropicError(
message=response.text, status_code=response.status_code message="Unable to get json response - {}, Original Response: {}".format(
str(e), response.text
),
status_code=response.status_code,
headers=response_headers,
) )
if "error" in completion_response: if "error" in completion_response:
response_headers = getattr(response, "headers", None)
raise AnthropicError( raise AnthropicError(
message=str(completion_response["error"]), message=str(completion_response["error"]),
status_code=response.status_code, status_code=response.status_code,
headers=response_headers,
) )
else: else:
text_content = "" text_content = ""
@ -856,6 +343,8 @@ class AnthropicChatCompletion(BaseLLM):
if "cache_read_input_tokens" in _usage: if "cache_read_input_tokens" in _usage:
usage["cache_read_input_tokens"] = _usage["cache_read_input_tokens"] usage["cache_read_input_tokens"] = _usage["cache_read_input_tokens"]
setattr(model_response, "usage", usage) # type: ignore setattr(model_response, "usage", usage) # type: ignore
model_response._hidden_params = _hidden_params
return model_response return model_response
async def acompletion_stream_function( async def acompletion_stream_function(
@ -919,9 +408,9 @@ class AnthropicChatCompletion(BaseLLM):
litellm_params=None, litellm_params=None,
logger_fn=None, logger_fn=None,
headers={}, headers={},
client=None, client: Optional[AsyncHTTPHandler] = None,
) -> Union[ModelResponse, CustomStreamWrapper]: ) -> Union[ModelResponse, CustomStreamWrapper]:
async_handler = get_async_httpx_client( async_handler = client or get_async_httpx_client(
llm_provider=litellm.LlmProviders.ANTHROPIC llm_provider=litellm.LlmProviders.ANTHROPIC
) )
@ -937,7 +426,17 @@ class AnthropicChatCompletion(BaseLLM):
original_response=str(e), original_response=str(e),
additional_args={"complete_input_dict": data}, additional_args={"complete_input_dict": data},
) )
raise e status_code = getattr(e, "status_code", 500)
error_headers = getattr(e, "headers", None)
error_text = getattr(e, "text", str(e))
error_response = getattr(e, "response", None)
if error_headers is None and error_response:
error_headers = getattr(error_response, "headers", None)
raise AnthropicError(
message=error_text,
status_code=status_code,
headers=error_headers,
)
return self._process_response( return self._process_response(
model=model, model=model,
@ -977,73 +476,18 @@ class AnthropicChatCompletion(BaseLLM):
_is_function_call = False _is_function_call = False
messages = copy.deepcopy(messages) messages = copy.deepcopy(messages)
optional_params = copy.deepcopy(optional_params) optional_params = copy.deepcopy(optional_params)
if model in custom_prompt_dict:
# check if the model has a registered custom prompt
model_prompt_details = custom_prompt_dict[model]
prompt = custom_prompt(
role_dict=model_prompt_details["roles"],
initial_prompt_value=model_prompt_details["initial_prompt_value"],
final_prompt_value=model_prompt_details["final_prompt_value"],
messages=messages,
)
else:
# Separate system prompt from rest of message
anthropic_system_message_list = AnthropicConfig().translate_system_message(
messages=messages
)
# Handling anthropic API Prompt Caching
if len(anthropic_system_message_list) > 0:
optional_params["system"] = anthropic_system_message_list
# Format rest of message according to anthropic guidelines
try:
messages = prompt_factory(
model=model, messages=messages, custom_llm_provider="anthropic"
)
except Exception as e:
raise AnthropicError(
status_code=400,
message="{}\nReceived Messages={}".format(str(e), messages),
) # don't use verbose_logger.exception, if exception is raised
## Load Config
config = litellm.AnthropicConfig.get_config()
for k, v in config.items():
if (
k not in optional_params
): # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
optional_params[k] = v
## Handle Tool Calling
if "tools" in optional_params:
_is_function_call = True
if "anthropic-beta" not in headers:
# default to v1 of "anthropic-beta"
headers["anthropic-beta"] = "tools-2024-05-16"
anthropic_tools = []
for tool in optional_params["tools"]:
if "input_schema" in tool: # assume in anthropic format
anthropic_tools.append(tool)
else: # assume openai tool call
new_tool = tool["function"]
new_tool["input_schema"] = new_tool.pop("parameters") # rename key
if "cache_control" in tool:
new_tool["cache_control"] = tool["cache_control"]
anthropic_tools.append(new_tool)
optional_params["tools"] = anthropic_tools
stream = optional_params.pop("stream", None) stream = optional_params.pop("stream", None)
is_vertex_request: bool = optional_params.pop("is_vertex_request", False)
json_mode: bool = optional_params.pop("json_mode", False) json_mode: bool = optional_params.pop("json_mode", False)
is_vertex_request: bool = optional_params.pop("is_vertex_request", False)
data = { data = AnthropicConfig()._transform_request(
"messages": messages, model=model,
**optional_params, messages=messages,
} optional_params=optional_params,
headers=headers,
if is_vertex_request is False: _is_function_call=_is_function_call,
data["model"] = model is_vertex_request=is_vertex_request,
)
## LOGGING ## LOGGING
logging_obj.pre_call( logging_obj.pre_call(
@ -1136,12 +580,25 @@ class AnthropicChatCompletion(BaseLLM):
client = HTTPHandler(timeout=timeout) # type: ignore client = HTTPHandler(timeout=timeout) # type: ignore
else: else:
client = client client = client
try:
response = client.post( response = client.post(
api_base, headers=headers, data=json.dumps(data), timeout=timeout api_base,
headers=headers,
data=json.dumps(data),
timeout=timeout,
) )
if response.status_code != 200: except Exception as e:
status_code = getattr(e, "status_code", 500)
error_headers = getattr(e, "headers", None)
error_text = getattr(e, "text", str(e))
error_response = getattr(e, "response", None)
if error_headers is None and error_response:
error_headers = getattr(error_response, "headers", None)
raise AnthropicError( raise AnthropicError(
status_code=response.status_code, message=response.text message=error_text,
status_code=status_code,
headers=error_headers,
) )
return self._process_response( return self._process_response(
@ -1151,7 +608,7 @@ class AnthropicChatCompletion(BaseLLM):
stream=stream, stream=stream,
logging_obj=logging_obj, logging_obj=logging_obj,
api_key=api_key, api_key=api_key,
data=data, data=data, # type: ignore
messages=messages, messages=messages,
print_verbose=print_verbose, print_verbose=print_verbose,
optional_params=optional_params, optional_params=optional_params,
@ -1192,7 +649,7 @@ class ModelResponseIterator:
return False return False
def _handle_usage( def _handle_usage(
self, anthropic_usage_chunk: dict self, anthropic_usage_chunk: Union[dict, UsageDelta]
) -> AnthropicChatCompletionUsageBlock: ) -> AnthropicChatCompletionUsageBlock:
special_fields = ["input_tokens", "output_tokens"] special_fields = ["input_tokens", "output_tokens"]
@ -1203,15 +660,19 @@ class ModelResponseIterator:
+ anthropic_usage_chunk.get("output_tokens", 0), + anthropic_usage_chunk.get("output_tokens", 0),
) )
if "cache_creation_input_tokens" in anthropic_usage_chunk: cache_creation_input_tokens = anthropic_usage_chunk.get(
usage_block["cache_creation_input_tokens"] = anthropic_usage_chunk[
"cache_creation_input_tokens" "cache_creation_input_tokens"
] )
if cache_creation_input_tokens is not None and isinstance(
cache_creation_input_tokens, int
):
usage_block["cache_creation_input_tokens"] = cache_creation_input_tokens
if "cache_read_input_tokens" in anthropic_usage_chunk: cache_read_input_tokens = anthropic_usage_chunk.get("cache_read_input_tokens")
usage_block["cache_read_input_tokens"] = anthropic_usage_chunk[ if cache_read_input_tokens is not None and isinstance(
"cache_read_input_tokens" cache_read_input_tokens, int
] ):
usage_block["cache_read_input_tokens"] = cache_read_input_tokens
return usage_block return usage_block
@ -1313,6 +774,7 @@ class ModelResponseIterator:
} }
""" """
message_start_block = MessageStartBlock(**chunk) # type: ignore message_start_block = MessageStartBlock(**chunk) # type: ignore
if "usage" in message_start_block["message"]:
usage = self._handle_usage( usage = self._handle_usage(
anthropic_usage_chunk=message_start_block["message"]["usage"] anthropic_usage_chunk=message_start_block["message"]["usage"]
) )

View file

@ -0,0 +1,289 @@
import types
from typing import List, Literal, Optional, Tuple, Union
import litellm
from litellm.llms.prompt_templates.factory import anthropic_messages_pt
from litellm.types.llms.anthropic import (
AnthropicMessageRequestBase,
AnthropicMessagesRequest,
AnthropicMessagesToolChoice,
AnthropicSystemMessageContent,
)
from litellm.types.llms.openai import AllMessageValues, ChatCompletionSystemMessage
from litellm.utils import has_tool_call_blocks
from ..common_utils import AnthropicError
class AnthropicConfig:
"""
Reference: https://docs.anthropic.com/claude/reference/messages_post
to pass metadata to anthropic, it's {"user_id": "any-relevant-information"}
"""
max_tokens: Optional[int] = (
4096 # anthropic requires a default value (Opus, Sonnet, and Haiku have the same default)
)
stop_sequences: Optional[list] = None
temperature: Optional[int] = None
top_p: Optional[int] = None
top_k: Optional[int] = None
metadata: Optional[dict] = None
system: Optional[str] = None
def __init__(
self,
max_tokens: Optional[
int
] = 4096, # You can pass in a value yourself or use the default value 4096
stop_sequences: Optional[list] = None,
temperature: Optional[int] = None,
top_p: Optional[int] = None,
top_k: Optional[int] = None,
metadata: Optional[dict] = None,
system: Optional[str] = None,
) -> None:
locals_ = locals()
for key, value in locals_.items():
if key != "self" and value is not None:
setattr(self.__class__, key, value)
@classmethod
def get_config(cls):
return {
k: v
for k, v in cls.__dict__.items()
if not k.startswith("__")
and not isinstance(
v,
(
types.FunctionType,
types.BuiltinFunctionType,
classmethod,
staticmethod,
),
)
and v is not None
}
def get_supported_openai_params(self):
return [
"stream",
"stop",
"temperature",
"top_p",
"max_tokens",
"max_completion_tokens",
"tools",
"tool_choice",
"extra_headers",
]
def get_cache_control_headers(self) -> dict:
return {
"anthropic-version": "2023-06-01",
"anthropic-beta": "prompt-caching-2024-07-31",
}
def map_openai_params(
self,
non_default_params: dict,
optional_params: dict,
messages: Optional[List[AllMessageValues]] = None,
):
for param, value in non_default_params.items():
if param == "max_tokens":
optional_params["max_tokens"] = value
if param == "max_completion_tokens":
optional_params["max_tokens"] = value
if param == "tools":
optional_params["tools"] = value
if param == "tool_choice":
_tool_choice: Optional[AnthropicMessagesToolChoice] = None
if value == "auto":
_tool_choice = {"type": "auto"}
elif value == "required":
_tool_choice = {"type": "any"}
elif isinstance(value, dict):
_tool_choice = {"type": "tool", "name": value["function"]["name"]}
if _tool_choice is not None:
optional_params["tool_choice"] = _tool_choice
if param == "stream" and value is True:
optional_params["stream"] = value
if param == "stop":
if isinstance(value, str):
if (
value == "\n"
) and litellm.drop_params is True: # anthropic doesn't allow whitespace characters as stop-sequences
continue
value = [value]
elif isinstance(value, list):
new_v = []
for v in value:
if (
v == "\n"
) and litellm.drop_params is True: # anthropic doesn't allow whitespace characters as stop-sequences
continue
new_v.append(v)
if len(new_v) > 0:
value = new_v
else:
continue
optional_params["stop_sequences"] = value
if param == "temperature":
optional_params["temperature"] = value
if param == "top_p":
optional_params["top_p"] = value
## VALIDATE REQUEST
"""
Anthropic doesn't support tool calling without `tools=` param specified.
"""
if (
"tools" not in non_default_params
and messages is not None
and has_tool_call_blocks(messages)
):
raise litellm.UnsupportedParamsError(
message="Anthropic doesn't support tool calling without `tools=` param specified. Pass `tools=` param to enable tool calling.",
model="",
llm_provider="anthropic",
)
return optional_params
def is_cache_control_set(self, messages: List[AllMessageValues]) -> bool:
"""
Return if {"cache_control": ..} in message content block
Used to check if anthropic prompt caching headers need to be set.
"""
for message in messages:
_message_content = message.get("content")
if _message_content is not None and isinstance(_message_content, list):
for content in _message_content:
if "cache_control" in content:
return True
return False
def translate_system_message(
self, messages: List[AllMessageValues]
) -> List[AnthropicSystemMessageContent]:
"""
Translate system message to anthropic format.
Removes system message from the original list and returns a new list of anthropic system message content.
"""
system_prompt_indices = []
anthropic_system_message_list: List[AnthropicSystemMessageContent] = []
for idx, message in enumerate(messages):
if message["role"] == "system":
valid_content: bool = False
system_message_block = ChatCompletionSystemMessage(**message)
if isinstance(system_message_block["content"], str):
anthropic_system_message_content = AnthropicSystemMessageContent(
type="text",
text=system_message_block["content"],
)
if "cache_control" in system_message_block:
anthropic_system_message_content["cache_control"] = (
system_message_block["cache_control"]
)
anthropic_system_message_list.append(
anthropic_system_message_content
)
valid_content = True
elif isinstance(message["content"], list):
for _content in message["content"]:
anthropic_system_message_content = (
AnthropicSystemMessageContent(
type=_content.get("type"),
text=_content.get("text"),
)
)
if "cache_control" in _content:
anthropic_system_message_content["cache_control"] = (
_content["cache_control"]
)
anthropic_system_message_list.append(
anthropic_system_message_content
)
valid_content = True
if valid_content:
system_prompt_indices.append(idx)
if len(system_prompt_indices) > 0:
for idx in reversed(system_prompt_indices):
messages.pop(idx)
return anthropic_system_message_list
def _transform_request(
self,
model: str,
messages: List[AllMessageValues],
optional_params: dict,
headers: dict,
_is_function_call: bool,
is_vertex_request: bool,
) -> dict:
"""
Translate messages to anthropic format.
"""
# Separate system prompt from rest of message
anthropic_system_message_list = self.translate_system_message(messages=messages)
# Handling anthropic API Prompt Caching
if len(anthropic_system_message_list) > 0:
optional_params["system"] = anthropic_system_message_list
# Format rest of message according to anthropic guidelines
try:
anthropic_messages = anthropic_messages_pt(
model=model,
messages=messages,
llm_provider="anthropic",
)
except Exception as e:
raise AnthropicError(
status_code=400,
message="{}\nReceived Messages={}".format(str(e), messages),
) # don't use verbose_logger.exception, if exception is raised
## Load Config
config = litellm.AnthropicConfig.get_config()
for k, v in config.items():
if (
k not in optional_params
): # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
optional_params[k] = v
## Handle Tool Calling
if "tools" in optional_params:
_is_function_call = True
if "anthropic-beta" not in headers:
# default to v1 of "anthropic-beta"
headers["anthropic-beta"] = "tools-2024-05-16"
anthropic_tools = []
for tool in optional_params["tools"]:
if "input_schema" in tool: # assume in anthropic format
anthropic_tools.append(tool)
else: # assume openai tool call
new_tool = tool["function"]
new_tool["input_schema"] = new_tool.pop("parameters") # rename key
if "cache_control" in tool:
new_tool["cache_control"] = tool["cache_control"]
anthropic_tools.append(new_tool)
optional_params["tools"] = anthropic_tools
data = {
"messages": anthropic_messages,
**optional_params,
}
if not is_vertex_request:
data["model"] = model
return data

View file

@ -0,0 +1,26 @@
"""
This file contains common utils for anthropic calls.
"""
from typing import Optional
import httpx
class AnthropicError(Exception):
def __init__(
self,
status_code: int,
message,
headers: Optional[httpx.Headers] = None,
):
self.status_code = status_code
self.message: str = message
self.headers = headers
self.request = httpx.Request(
method="POST", url="https://api.anthropic.com/v1/messages"
)
self.response = httpx.Response(status_code=status_code, request=self.request)
super().__init__(
self.message
) # Call the base class constructor with the parameters it needs

View file

@ -0,0 +1,425 @@
import json
import types
from typing import Any, Dict, List, Literal, Optional, Tuple, Union
from openai.types.chat.chat_completion_chunk import Choice as OpenAIStreamingChoice
import litellm
from litellm.types.llms.anthropic import (
AnthopicMessagesAssistantMessageParam,
AnthropicChatCompletionUsageBlock,
AnthropicFinishReason,
AnthropicMessagesRequest,
AnthropicMessagesTool,
AnthropicMessagesToolChoice,
AnthropicMessagesUserMessageParam,
AnthropicResponse,
AnthropicResponseContentBlockText,
AnthropicResponseContentBlockToolUse,
AnthropicResponseUsageBlock,
AnthropicSystemMessageContent,
ContentBlockDelta,
ContentBlockStart,
ContentBlockStop,
ContentJsonBlockDelta,
ContentTextBlockDelta,
MessageBlockDelta,
MessageDelta,
MessageStartBlock,
UsageDelta,
)
from litellm.types.llms.openai import (
AllMessageValues,
ChatCompletionAssistantMessage,
ChatCompletionAssistantToolCall,
ChatCompletionImageObject,
ChatCompletionImageUrlObject,
ChatCompletionRequest,
ChatCompletionResponseMessage,
ChatCompletionSystemMessage,
ChatCompletionTextObject,
ChatCompletionToolCallChunk,
ChatCompletionToolCallFunctionChunk,
ChatCompletionToolChoiceFunctionParam,
ChatCompletionToolChoiceObjectParam,
ChatCompletionToolChoiceValues,
ChatCompletionToolMessage,
ChatCompletionToolParam,
ChatCompletionToolParamFunctionChunk,
ChatCompletionUsageBlock,
ChatCompletionUserMessage,
OpenAIMessageContent,
)
from litellm.types.utils import Choices, GenericStreamingChunk
from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
from ...base import BaseLLM
from ...prompt_templates.factory import (
anthropic_messages_pt,
custom_prompt,
prompt_factory,
)
class AnthropicExperimentalPassThroughConfig:
def __init__(self):
pass
### FOR [BETA] `/v1/messages` endpoint support
def translatable_anthropic_params(self) -> List:
"""
Which anthropic params, we need to translate to the openai format.
"""
return ["messages", "metadata", "system", "tool_choice", "tools"]
def translate_anthropic_messages_to_openai(
self,
messages: List[
Union[
AnthropicMessagesUserMessageParam,
AnthopicMessagesAssistantMessageParam,
]
],
) -> List:
new_messages: List[AllMessageValues] = []
for m in messages:
user_message: Optional[ChatCompletionUserMessage] = None
tool_message_list: List[ChatCompletionToolMessage] = []
new_user_content_list: List[
Union[ChatCompletionTextObject, ChatCompletionImageObject]
] = []
## USER MESSAGE ##
if m["role"] == "user":
## translate user message
message_content = m.get("content")
if message_content and isinstance(message_content, str):
user_message = ChatCompletionUserMessage(
role="user", content=message_content
)
elif message_content and isinstance(message_content, list):
for content in message_content:
if content["type"] == "text":
text_obj = ChatCompletionTextObject(
type="text", text=content["text"]
)
new_user_content_list.append(text_obj)
elif content["type"] == "image":
image_url = ChatCompletionImageUrlObject(
url=f"data:{content['type']};base64,{content['source']}"
)
image_obj = ChatCompletionImageObject(
type="image_url", image_url=image_url
)
new_user_content_list.append(image_obj)
elif content["type"] == "tool_result":
if "content" not in content:
tool_result = ChatCompletionToolMessage(
role="tool",
tool_call_id=content["tool_use_id"],
content="",
)
tool_message_list.append(tool_result)
elif isinstance(content["content"], str):
tool_result = ChatCompletionToolMessage(
role="tool",
tool_call_id=content["tool_use_id"],
content=content["content"],
)
tool_message_list.append(tool_result)
elif isinstance(content["content"], list):
for c in content["content"]:
if c["type"] == "text":
tool_result = ChatCompletionToolMessage(
role="tool",
tool_call_id=content["tool_use_id"],
content=c["text"],
)
tool_message_list.append(tool_result)
elif c["type"] == "image":
image_str = (
f"data:{c['type']};base64,{c['source']}"
)
tool_result = ChatCompletionToolMessage(
role="tool",
tool_call_id=content["tool_use_id"],
content=image_str,
)
tool_message_list.append(tool_result)
if user_message is not None:
new_messages.append(user_message)
if len(new_user_content_list) > 0:
new_messages.append({"role": "user", "content": new_user_content_list}) # type: ignore
if len(tool_message_list) > 0:
new_messages.extend(tool_message_list)
## ASSISTANT MESSAGE ##
assistant_message_str: Optional[str] = None
tool_calls: List[ChatCompletionAssistantToolCall] = []
if m["role"] == "assistant":
if isinstance(m["content"], str):
assistant_message_str = m["content"]
elif isinstance(m["content"], list):
for content in m["content"]:
if content["type"] == "text":
if assistant_message_str is None:
assistant_message_str = content["text"]
else:
assistant_message_str += content["text"]
elif content["type"] == "tool_use":
function_chunk = ChatCompletionToolCallFunctionChunk(
name=content["name"],
arguments=json.dumps(content["input"]),
)
tool_calls.append(
ChatCompletionAssistantToolCall(
id=content["id"],
type="function",
function=function_chunk,
)
)
if assistant_message_str is not None or len(tool_calls) > 0:
assistant_message = ChatCompletionAssistantMessage(
role="assistant",
content=assistant_message_str,
)
if len(tool_calls) > 0:
assistant_message["tool_calls"] = tool_calls
new_messages.append(assistant_message)
return new_messages
def translate_anthropic_tool_choice_to_openai(
self, tool_choice: AnthropicMessagesToolChoice
) -> ChatCompletionToolChoiceValues:
if tool_choice["type"] == "any":
return "required"
elif tool_choice["type"] == "auto":
return "auto"
elif tool_choice["type"] == "tool":
tc_function_param = ChatCompletionToolChoiceFunctionParam(
name=tool_choice.get("name", "")
)
return ChatCompletionToolChoiceObjectParam(
type="function", function=tc_function_param
)
else:
raise ValueError(
"Incompatible tool choice param submitted - {}".format(tool_choice)
)
def translate_anthropic_tools_to_openai(
self, tools: List[AnthropicMessagesTool]
) -> List[ChatCompletionToolParam]:
new_tools: List[ChatCompletionToolParam] = []
for tool in tools:
function_chunk = ChatCompletionToolParamFunctionChunk(
name=tool["name"],
parameters=tool["input_schema"],
)
if "description" in tool:
function_chunk["description"] = tool["description"]
new_tools.append(
ChatCompletionToolParam(type="function", function=function_chunk)
)
return new_tools
def translate_anthropic_to_openai(
self, anthropic_message_request: AnthropicMessagesRequest
) -> ChatCompletionRequest:
"""
This is used by the beta Anthropic Adapter, for translating anthropic `/v1/messages` requests to the openai format.
"""
new_messages: List[AllMessageValues] = []
## CONVERT ANTHROPIC MESSAGES TO OPENAI
new_messages = self.translate_anthropic_messages_to_openai(
messages=anthropic_message_request["messages"]
)
## ADD SYSTEM MESSAGE TO MESSAGES
if "system" in anthropic_message_request:
new_messages.insert(
0,
ChatCompletionSystemMessage(
role="system", content=anthropic_message_request["system"]
),
)
new_kwargs: ChatCompletionRequest = {
"model": anthropic_message_request["model"],
"messages": new_messages,
}
## CONVERT METADATA (user_id)
if "metadata" in anthropic_message_request:
if "user_id" in anthropic_message_request["metadata"]:
new_kwargs["user"] = anthropic_message_request["metadata"]["user_id"]
# Pass litellm proxy specific metadata
if "litellm_metadata" in anthropic_message_request:
# metadata will be passed to litellm.acompletion(), it's a litellm_param
new_kwargs["metadata"] = anthropic_message_request.pop("litellm_metadata")
## CONVERT TOOL CHOICE
if "tool_choice" in anthropic_message_request:
new_kwargs["tool_choice"] = self.translate_anthropic_tool_choice_to_openai(
tool_choice=anthropic_message_request["tool_choice"]
)
## CONVERT TOOLS
if "tools" in anthropic_message_request:
new_kwargs["tools"] = self.translate_anthropic_tools_to_openai(
tools=anthropic_message_request["tools"]
)
translatable_params = self.translatable_anthropic_params()
for k, v in anthropic_message_request.items():
if k not in translatable_params: # pass remaining params as is
new_kwargs[k] = v # type: ignore
return new_kwargs
def _translate_openai_content_to_anthropic(
self, choices: List[Choices]
) -> List[
Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse]
]:
new_content: List[
Union[
AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse
]
] = []
for choice in choices:
if (
choice.message.tool_calls is not None
and len(choice.message.tool_calls) > 0
):
for tool_call in choice.message.tool_calls:
new_content.append(
AnthropicResponseContentBlockToolUse(
type="tool_use",
id=tool_call.id,
name=tool_call.function.name or "",
input=json.loads(tool_call.function.arguments),
)
)
elif choice.message.content is not None:
new_content.append(
AnthropicResponseContentBlockText(
type="text", text=choice.message.content
)
)
return new_content
def _translate_openai_finish_reason_to_anthropic(
self, openai_finish_reason: str
) -> AnthropicFinishReason:
if openai_finish_reason == "stop":
return "end_turn"
elif openai_finish_reason == "length":
return "max_tokens"
elif openai_finish_reason == "tool_calls":
return "tool_use"
return "end_turn"
def translate_openai_response_to_anthropic(
self, response: litellm.ModelResponse
) -> AnthropicResponse:
## translate content block
anthropic_content = self._translate_openai_content_to_anthropic(choices=response.choices) # type: ignore
## extract finish reason
anthropic_finish_reason = self._translate_openai_finish_reason_to_anthropic(
openai_finish_reason=response.choices[0].finish_reason # type: ignore
)
# extract usage
usage: litellm.Usage = getattr(response, "usage")
anthropic_usage = AnthropicResponseUsageBlock(
input_tokens=usage.prompt_tokens or 0,
output_tokens=usage.completion_tokens or 0,
)
translated_obj = AnthropicResponse(
id=response.id,
type="message",
role="assistant",
model=response.model or "unknown-model",
stop_sequence=None,
usage=anthropic_usage,
content=anthropic_content,
stop_reason=anthropic_finish_reason,
)
return translated_obj
def _translate_streaming_openai_chunk_to_anthropic(
self, choices: List[OpenAIStreamingChoice]
) -> Tuple[
Literal["text_delta", "input_json_delta"],
Union[ContentTextBlockDelta, ContentJsonBlockDelta],
]:
text: str = ""
partial_json: Optional[str] = None
for choice in choices:
if choice.delta.content is not None:
text += choice.delta.content
elif choice.delta.tool_calls is not None:
partial_json = ""
for tool in choice.delta.tool_calls:
if (
tool.function is not None
and tool.function.arguments is not None
):
partial_json += tool.function.arguments
if partial_json is not None:
return "input_json_delta", ContentJsonBlockDelta(
type="input_json_delta", partial_json=partial_json
)
else:
return "text_delta", ContentTextBlockDelta(type="text_delta", text=text)
def translate_streaming_openai_response_to_anthropic(
self, response: litellm.ModelResponse
) -> Union[ContentBlockDelta, MessageBlockDelta]:
## base case - final chunk w/ finish reason
if response.choices[0].finish_reason is not None:
delta = MessageDelta(
stop_reason=self._translate_openai_finish_reason_to_anthropic(
response.choices[0].finish_reason
),
)
if getattr(response, "usage", None) is not None:
litellm_usage_chunk: Optional[litellm.Usage] = response.usage # type: ignore
elif (
hasattr(response, "_hidden_params")
and "usage" in response._hidden_params
):
litellm_usage_chunk = response._hidden_params["usage"]
else:
litellm_usage_chunk = None
if litellm_usage_chunk is not None:
usage_delta = UsageDelta(
input_tokens=litellm_usage_chunk.prompt_tokens or 0,
output_tokens=litellm_usage_chunk.completion_tokens or 0,
)
else:
usage_delta = UsageDelta(input_tokens=0, output_tokens=0)
return MessageBlockDelta(
type="message_delta", delta=delta, usage=usage_delta
)
(
type_of_content,
content_block_delta,
) = self._translate_streaming_openai_chunk_to_anthropic(
choices=response.choices # type: ignore
)
return ContentBlockDelta(
type="content_block_delta",
index=response.choices[0].index,
delta=content_block_delta,
)

View file

@ -22,7 +22,7 @@ from litellm.types.llms.openai import (
ChatCompletionToolParamFunctionChunk, ChatCompletionToolParamFunctionChunk,
) )
from litellm.types.utils import ModelResponse, Usage from litellm.types.utils import ModelResponse, Usage
from litellm.utils import CustomStreamWrapper from litellm.utils import CustomStreamWrapper, has_tool_call_blocks
from ...prompt_templates.factory import _bedrock_converse_messages_pt, _bedrock_tools_pt from ...prompt_templates.factory import _bedrock_converse_messages_pt, _bedrock_tools_pt
from ..common_utils import BedrockError, get_bedrock_tool_name from ..common_utils import BedrockError, get_bedrock_tool_name
@ -136,6 +136,7 @@ class AmazonConverseConfig:
non_default_params: dict, non_default_params: dict,
optional_params: dict, optional_params: dict,
drop_params: bool, drop_params: bool,
messages: Optional[List[AllMessageValues]] = None,
) -> dict: ) -> dict:
for param, value in non_default_params.items(): for param, value in non_default_params.items():
if param == "response_format": if param == "response_format":
@ -202,6 +203,21 @@ class AmazonConverseConfig:
) )
if _tool_choice_value is not None: if _tool_choice_value is not None:
optional_params["tool_choice"] = _tool_choice_value optional_params["tool_choice"] = _tool_choice_value
## VALIDATE REQUEST
"""
Bedrock doesn't support tool calling without `tools=` param specified.
"""
if (
"tools" not in non_default_params
and messages is not None
and has_tool_call_blocks(messages)
):
raise litellm.UnsupportedParamsError(
message="Anthropic doesn't support tool calling without `tools=` param specified. Pass `tools=` param to enable tool calling.",
model="",
llm_provider="anthropic",
)
return optional_params return optional_params
def _transform_request( def _transform_request(

View file

@ -0,0 +1,60 @@
"""
Handles the chat completion request for groq
"""
from typing import Any, Callable, Optional, Union
from httpx._config import Timeout
from litellm.utils import ModelResponse
from ...groq.chat.transformation import GroqChatConfig
from ...OpenAI.openai import OpenAIChatCompletion
class GroqChatCompletion(OpenAIChatCompletion):
def __init__(self, **kwargs):
super().__init__(**kwargs)
def completion(
self,
model_response: ModelResponse,
timeout: Union[float, Timeout],
optional_params: dict,
logging_obj: Any,
model: Optional[str] = None,
messages: Optional[list] = None,
print_verbose: Optional[Callable[..., Any]] = None,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
acompletion: bool = False,
litellm_params=None,
logger_fn=None,
headers: Optional[dict] = None,
custom_prompt_dict: dict = {},
client=None,
organization: Optional[str] = None,
custom_llm_provider: Optional[str] = None,
drop_params: Optional[bool] = None,
):
messages = GroqChatConfig()._transform_messages(messages) # type: ignore
return super().completion(
model_response,
timeout,
optional_params,
logging_obj,
model,
messages,
print_verbose,
api_key,
api_base,
acompletion,
litellm_params,
logger_fn,
headers,
custom_prompt_dict,
client,
organization,
custom_llm_provider,
drop_params,
)

View file

@ -0,0 +1,88 @@
"""
Translate from OpenAI's `/v1/chat/completions` to Groq's `/v1/chat/completions`
"""
import types
from typing import List, Optional, Union
from pydantic import BaseModel
import litellm
from litellm.types.llms.openai import AllMessageValues, ChatCompletionAssistantMessage
from ...OpenAI.chat.gpt_transformation import OpenAIGPTConfig
class GroqChatConfig(OpenAIGPTConfig):
frequency_penalty: Optional[int] = None
function_call: Optional[Union[str, dict]] = None
functions: Optional[list] = None
logit_bias: Optional[dict] = None
max_tokens: Optional[int] = None
n: Optional[int] = None
presence_penalty: Optional[int] = None
stop: Optional[Union[str, list]] = None
temperature: Optional[int] = None
top_p: Optional[int] = None
response_format: Optional[dict] = None
tools: Optional[list] = None
tool_choice: Optional[Union[str, dict]] = None
def __init__(
self,
frequency_penalty: Optional[int] = None,
function_call: Optional[Union[str, dict]] = None,
functions: Optional[list] = None,
logit_bias: Optional[dict] = None,
max_tokens: Optional[int] = None,
n: Optional[int] = None,
presence_penalty: Optional[int] = None,
stop: Optional[Union[str, list]] = None,
temperature: Optional[int] = None,
top_p: Optional[int] = None,
response_format: Optional[dict] = None,
tools: Optional[list] = None,
tool_choice: Optional[Union[str, dict]] = None,
) -> None:
locals_ = locals().copy()
for key, value in locals_.items():
if key != "self" and value is not None:
setattr(self.__class__, key, value)
@classmethod
def get_config(cls):
return {
k: v
for k, v in cls.__dict__.items()
if not k.startswith("__")
and not isinstance(
v,
(
types.FunctionType,
types.BuiltinFunctionType,
classmethod,
staticmethod,
),
)
and v is not None
}
def _transform_messages(self, messages: List[AllMessageValues]) -> List:
for idx, message in enumerate(messages):
"""
1. Don't pass 'null' function_call assistant message to groq - https://github.com/BerriAI/litellm/issues/5839
"""
if isinstance(message, BaseModel):
_message = message.model_dump()
else:
_message = message
assistant_message = _message.get("role") == "assistant"
if assistant_message:
new_message = ChatCompletionAssistantMessage(role="assistant")
for k, v in _message.items():
if v is not None:
new_message[k] = v # type: ignore
messages[idx] = new_message
return messages

View file

@ -0,0 +1,101 @@
"""
Translate from OpenAI's `/v1/audio/transcriptions` to Groq's `/v1/audio/transcriptions`
"""
import types
from typing import List, Optional, Union
import litellm
class GroqSTTConfig:
frequency_penalty: Optional[int] = None
function_call: Optional[Union[str, dict]] = None
functions: Optional[list] = None
logit_bias: Optional[dict] = None
max_tokens: Optional[int] = None
n: Optional[int] = None
presence_penalty: Optional[int] = None
stop: Optional[Union[str, list]] = None
temperature: Optional[int] = None
top_p: Optional[int] = None
response_format: Optional[dict] = None
tools: Optional[list] = None
tool_choice: Optional[Union[str, dict]] = None
def __init__(
self,
frequency_penalty: Optional[int] = None,
function_call: Optional[Union[str, dict]] = None,
functions: Optional[list] = None,
logit_bias: Optional[dict] = None,
max_tokens: Optional[int] = None,
n: Optional[int] = None,
presence_penalty: Optional[int] = None,
stop: Optional[Union[str, list]] = None,
temperature: Optional[int] = None,
top_p: Optional[int] = None,
response_format: Optional[dict] = None,
tools: Optional[list] = None,
tool_choice: Optional[Union[str, dict]] = None,
) -> None:
locals_ = locals().copy()
for key, value in locals_.items():
if key != "self" and value is not None:
setattr(self.__class__, key, value)
@classmethod
def get_config(cls):
return {
k: v
for k, v in cls.__dict__.items()
if not k.startswith("__")
and not isinstance(
v,
(
types.FunctionType,
types.BuiltinFunctionType,
classmethod,
staticmethod,
),
)
and v is not None
}
def get_supported_openai_params_stt(self):
return [
"prompt",
"response_format",
"temperature",
"language",
]
def get_supported_openai_response_formats_stt(self) -> List[str]:
return ["json", "verbose_json", "text"]
def map_openai_params_stt(
self,
non_default_params: dict,
optional_params: dict,
model: str,
drop_params: bool,
) -> dict:
response_formats = self.get_supported_openai_response_formats_stt()
for param, value in non_default_params.items():
if param == "response_format":
if value in response_formats:
optional_params[param] = value
else:
if litellm.drop_params is True or drop_params is True:
pass
else:
raise litellm.utils.UnsupportedParamsError(
message="Groq doesn't support response_format={}. To drop unsupported openai params from the call, set `litellm.drop_params = True`".format(
value
),
status_code=400,
)
else:
optional_params[param] = value
return optional_params

View file

@ -276,7 +276,7 @@ def completion(
from anthropic import AnthropicVertex from anthropic import AnthropicVertex
from litellm.llms.anthropic.chat import AnthropicChatCompletion from litellm.llms.anthropic.chat.handler import AnthropicChatCompletion
from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import ( from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import (
VertexLLM, VertexLLM,
) )
@ -367,7 +367,7 @@ async def async_completion(
if client is None: if client is None:
vertex_ai_client = AsyncAnthropicVertex( vertex_ai_client = AsyncAnthropicVertex(
project_id=vertex_project, region=vertex_location, access_token=access_token project_id=vertex_project, region=vertex_location, access_token=access_token # type: ignore
) )
else: else:
vertex_ai_client = client vertex_ai_client = client
@ -438,7 +438,7 @@ async def async_streaming(
if client is None: if client is None:
vertex_ai_client = AsyncAnthropicVertex( vertex_ai_client = AsyncAnthropicVertex(
project_id=vertex_project, region=vertex_location, access_token=access_token project_id=vertex_project, region=vertex_location, access_token=access_token # type: ignore
) )
else: else:
vertex_ai_client = client vertex_ai_client = client

View file

@ -96,6 +96,7 @@ from .llms.cohere import completion as cohere_completion # type: ignore
from .llms.cohere import embed as cohere_embed from .llms.cohere import embed as cohere_embed
from .llms.custom_llm import CustomLLM, custom_chat_llm_router from .llms.custom_llm import CustomLLM, custom_chat_llm_router
from .llms.databricks.chat import DatabricksChatCompletion from .llms.databricks.chat import DatabricksChatCompletion
from .llms.groq.chat.handler import GroqChatCompletion
from .llms.huggingface_restapi import Huggingface from .llms.huggingface_restapi import Huggingface
from .llms.OpenAI.audio_transcriptions import OpenAIAudioTranscription from .llms.OpenAI.audio_transcriptions import OpenAIAudioTranscription
from .llms.OpenAI.chat.o1_handler import OpenAIO1ChatCompletion from .llms.OpenAI.chat.o1_handler import OpenAIO1ChatCompletion
@ -168,6 +169,7 @@ openai_text_completions = OpenAITextCompletion()
openai_o1_chat_completions = OpenAIO1ChatCompletion() openai_o1_chat_completions = OpenAIO1ChatCompletion()
openai_audio_transcriptions = OpenAIAudioTranscription() openai_audio_transcriptions = OpenAIAudioTranscription()
databricks_chat_completions = DatabricksChatCompletion() databricks_chat_completions = DatabricksChatCompletion()
groq_chat_completions = GroqChatCompletion()
azure_ai_chat_completions = AzureAIChatCompletion() azure_ai_chat_completions = AzureAIChatCompletion()
azure_ai_embedding = AzureAIEmbedding() azure_ai_embedding = AzureAIEmbedding()
anthropic_chat_completions = AnthropicChatCompletion() anthropic_chat_completions = AnthropicChatCompletion()
@ -958,6 +960,7 @@ def completion(
extra_headers=extra_headers, extra_headers=extra_headers,
api_version=api_version, api_version=api_version,
parallel_tool_calls=parallel_tool_calls, parallel_tool_calls=parallel_tool_calls,
messages=messages,
**non_default_params, **non_default_params,
) )
@ -1318,13 +1321,56 @@ def completion(
additional_args={"headers": headers}, additional_args={"headers": headers},
) )
response = _response response = _response
elif custom_llm_provider == "groq":
api_base = (
api_base # for deepinfra/perplexity/anyscale/groq/friendliai we check in get_llm_provider and pass in the api base from there
or litellm.api_base
or get_secret("GROQ_API_BASE")
or "https://api.groq.com/openai/v1"
)
# set API KEY
api_key = (
api_key
or litellm.api_key # for deepinfra/perplexity/anyscale/friendliai we check in get_llm_provider and pass in the api key from there
or litellm.groq_key
or get_secret("GROQ_API_KEY")
)
headers = headers or litellm.headers
## LOAD CONFIG - if set
config = litellm.GroqChatConfig.get_config()
for k, v in config.items():
if (
k not in optional_params
): # completion(top_k=3) > openai_config(top_k=3) <- allows for dynamic variables to be passed in
optional_params[k] = v
response = groq_chat_completions.completion(
model=model,
messages=messages,
headers=headers,
model_response=model_response,
print_verbose=print_verbose,
api_key=api_key,
api_base=api_base,
acompletion=acompletion,
logging_obj=logging,
optional_params=optional_params,
litellm_params=litellm_params,
logger_fn=logger_fn,
timeout=timeout, # type: ignore
custom_prompt_dict=custom_prompt_dict,
client=client, # pass AsyncOpenAI, OpenAI client
organization=organization,
custom_llm_provider=custom_llm_provider,
)
elif ( elif (
model in litellm.open_ai_chat_completion_models model in litellm.open_ai_chat_completion_models
or custom_llm_provider == "custom_openai" or custom_llm_provider == "custom_openai"
or custom_llm_provider == "deepinfra" or custom_llm_provider == "deepinfra"
or custom_llm_provider == "perplexity" or custom_llm_provider == "perplexity"
or custom_llm_provider == "groq"
or custom_llm_provider == "nvidia_nim" or custom_llm_provider == "nvidia_nim"
or custom_llm_provider == "cerebras" or custom_llm_provider == "cerebras"
or custom_llm_provider == "sambanova" or custom_llm_provider == "sambanova"
@ -1431,6 +1477,7 @@ def completion(
original_response=response, original_response=response,
additional_args={"headers": headers}, additional_args={"headers": headers},
) )
elif ( elif (
"replicate" in model "replicate" in model
or custom_llm_provider == "replicate" or custom_llm_provider == "replicate"
@ -2933,6 +2980,7 @@ def batch_completion(
deployment_id=None, deployment_id=None,
request_timeout: Optional[int] = None, request_timeout: Optional[int] = None,
timeout: Optional[int] = 600, timeout: Optional[int] = 600,
max_workers:Optional[int]= 100,
# Optional liteLLM function params # Optional liteLLM function params
**kwargs, **kwargs,
): ):
@ -2956,6 +3004,7 @@ def batch_completion(
user (str, optional): The user string for generating completions. Defaults to "". user (str, optional): The user string for generating completions. Defaults to "".
deployment_id (optional): The deployment ID for generating completions. Defaults to None. deployment_id (optional): The deployment ID for generating completions. Defaults to None.
request_timeout (int, optional): The request timeout for generating completions. Defaults to None. request_timeout (int, optional): The request timeout for generating completions. Defaults to None.
max_workers (int,optional): The maximum number of threads to use for parallel processing.
Returns: Returns:
list: A list of completion results. list: A list of completion results.
@ -3001,7 +3050,7 @@ def batch_completion(
for i in range(0, len(lst), n): for i in range(0, len(lst), n):
yield lst[i : i + n] yield lst[i : i + n]
with ThreadPoolExecutor(max_workers=100) as executor: with ThreadPoolExecutor(max_workers=max_workers) as executor:
for sub_batch in chunks(batch_messages, 100): for sub_batch in chunks(batch_messages, 100):
for message_list in sub_batch: for message_list in sub_batch:
kwargs_modified = args.copy() kwargs_modified = args.copy()

View file

@ -1173,6 +1173,18 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_assistant_prefill": true "supports_assistant_prefill": true
}, },
"mistral/pixtral-12b-2409": {
"max_tokens": 128000,
"max_input_tokens": 128000,
"max_output_tokens": 128000,
"input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000015,
"litellm_provider": "mistral",
"mode": "chat",
"supports_function_calling": true,
"supports_assistant_prefill": true,
"supports_vision": true
},
"mistral/open-mistral-7b": { "mistral/open-mistral-7b": {
"max_tokens": 8191, "max_tokens": 8191,
"max_input_tokens": 32000, "max_input_tokens": 32000,

View file

@ -760,7 +760,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
return _user_id_rate_limits.model_dump() return _user_id_rate_limits.model_dump()
except Exception as e: except Exception as e:
verbose_proxy_logger.exception( verbose_proxy_logger.debug(
"Parallel Request Limiter: Error getting user object", str(e) "Parallel Request Limiter: Error getting user object", str(e)
) )
return None return None

View file

@ -389,6 +389,9 @@ async def add_litellm_data_to_request(
user_api_key_dict=user_api_key_dict, user_api_key_dict=user_api_key_dict,
) )
verbose_proxy_logger.debug(
f"[PROXY]returned data from litellm_pre_call_utils: {data}"
)
return data return data

View file

@ -1466,9 +1466,6 @@ class PrismaClient:
): ):
args_passed_in = locals() args_passed_in = locals()
start_time = time.time() start_time = time.time()
verbose_proxy_logger.debug(
f"PrismaClient: get_data - args_passed_in: {args_passed_in}"
)
hashed_token: Optional[str] = None hashed_token: Optional[str] = None
try: try:
response: Any = None response: Any = None

View file

@ -1224,3 +1224,14 @@ def test_langfuse_prompt_type(prompt):
_add_prompt_to_generation_params( _add_prompt_to_generation_params(
generation_params=generation_params, clean_metadata=clean_metadata generation_params=generation_params, clean_metadata=clean_metadata
) )
def test_langfuse_logging_metadata():
from litellm.integrations.langfuse import log_requester_metadata
metadata = {"key": "value", "requester_metadata": {"key": "value"}}
got_metadata = log_requester_metadata(clean_metadata=metadata)
expected_metadata = {"requester_metadata": {"key": "value"}}
assert expected_metadata == got_metadata

View file

@ -61,6 +61,7 @@ async def test_litellm_anthropic_prompt_caching_tools():
} }
mock_response.json = return_val mock_response.json = return_val
mock_response.headers = {"key": "value"}
litellm.set_verbose = True litellm.set_verbose = True
with patch( with patch(
@ -466,6 +467,7 @@ async def test_litellm_anthropic_prompt_caching_system():
} }
mock_response.json = return_val mock_response.json = return_val
mock_response.headers = {"key": "value"}
litellm.set_verbose = True litellm.set_verbose = True
with patch( with patch(

View file

@ -24,7 +24,7 @@ from litellm import RateLimitError, Timeout, completion, completion_cost, embedd
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
from litellm.llms.prompt_templates.factory import anthropic_messages_pt from litellm.llms.prompt_templates.factory import anthropic_messages_pt
# litellm.num_retries = 3 # litellm.num_retries=3
litellm.cache = None litellm.cache = None
litellm.success_callback = [] litellm.success_callback = []

View file

@ -1173,7 +1173,12 @@ def test_turn_off_message_logging():
##### VALID JSON ###### ##### VALID JSON ######
@pytest.mark.parametrize("model", ["gpt-3.5-turbo", "azure/chatgpt-v-2"]) @pytest.mark.parametrize(
"model",
[
"ft:gpt-3.5-turbo:my-org:custom_suffix:id"
], # "gpt-3.5-turbo", "azure/chatgpt-v-2",
)
@pytest.mark.parametrize( @pytest.mark.parametrize(
"turn_off_message_logging", "turn_off_message_logging",
[ [
@ -1200,7 +1205,7 @@ def test_standard_logging_payload(model, turn_off_message_logging):
_ = litellm.completion( _ = litellm.completion(
model=model, model=model,
messages=[{"role": "user", "content": "Hey, how's it going?"}], messages=[{"role": "user", "content": "Hey, how's it going?"}],
# mock_response="Going well!", mock_response="Going well!",
) )
time.sleep(2) time.sleep(2)

View file

@ -7,6 +7,8 @@ from typing import Any
from openai import AuthenticationError, BadRequestError, OpenAIError, RateLimitError from openai import AuthenticationError, BadRequestError, OpenAIError, RateLimitError
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
sys.path.insert( sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
@ -884,6 +886,42 @@ def _pre_call_utils(
return data, original_function, mapped_target return data, original_function, mapped_target
def _pre_call_utils_httpx(
call_type: str,
data: dict,
client: Union[HTTPHandler, AsyncHTTPHandler],
sync_mode: bool,
streaming: Optional[bool],
):
mapped_target: Any = client.client
if call_type == "embedding":
data["input"] = "Hello world!"
if sync_mode:
original_function = litellm.embedding
else:
original_function = litellm.aembedding
elif call_type == "chat_completion":
data["messages"] = [{"role": "user", "content": "Hello world"}]
if streaming is True:
data["stream"] = True
if sync_mode:
original_function = litellm.completion
else:
original_function = litellm.acompletion
elif call_type == "completion":
data["prompt"] = "Hello world"
if streaming is True:
data["stream"] = True
if sync_mode:
original_function = litellm.text_completion
else:
original_function = litellm.atext_completion
return data, original_function, mapped_target
@pytest.mark.parametrize( @pytest.mark.parametrize(
"sync_mode", "sync_mode",
[True, False], [True, False],
@ -1006,3 +1044,111 @@ async def test_exception_with_headers(sync_mode, provider, model, call_type, str
if exception_raised is False: if exception_raised is False:
print(resp) print(resp)
assert exception_raised assert exception_raised
@pytest.mark.parametrize(
"sync_mode",
[True, False],
)
@pytest.mark.parametrize("streaming", [True, False])
@pytest.mark.parametrize(
"provider, model, call_type",
[
("anthropic", "claude-3-haiku-20240307", "chat_completion"),
],
)
@pytest.mark.asyncio
async def test_exception_with_headers_httpx(
sync_mode, provider, model, call_type, streaming
):
"""
User feedback: litellm says "No deployments available for selected model, Try again in 60 seconds"
but Azure says to retry in at most 9s
```
{"message": "litellm.proxy.proxy_server.embeddings(): Exception occured - No deployments available for selected model, Try again in 60 seconds. Passed model=text-embedding-ada-002. pre-call-checks=False, allowed_model_region=n/a, cooldown_list=[('b49cbc9314273db7181fe69b1b19993f04efb88f2c1819947c538bac08097e4c', {'Exception Received': 'litellm.RateLimitError: AzureException RateLimitError - Requests to the Embeddings_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 9 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.', 'Status Code': '429'})]", "level": "ERROR", "timestamp": "2024-08-22T03:25:36.900476"}
```
"""
print(f"Received args: {locals()}")
import openai
if sync_mode:
client = HTTPHandler()
else:
client = AsyncHTTPHandler()
data = {"model": model}
data, original_function, mapped_target = _pre_call_utils_httpx(
call_type=call_type,
data=data,
client=client,
sync_mode=sync_mode,
streaming=streaming,
)
cooldown_time = 30.0
def _return_exception(*args, **kwargs):
import datetime
from httpx import Headers, HTTPStatusError, Request, Response
# Create the Request object
request = Request("POST", "http://0.0.0.0:9000/chat/completions")
# Create the Response object with the necessary headers and status code
response = Response(
status_code=429,
headers=Headers(
{
"date": "Sat, 21 Sep 2024 22:56:53 GMT",
"server": "uvicorn",
"retry-after": "30",
"content-length": "30",
"content-type": "application/json",
}
),
request=request,
)
# Create and raise the HTTPStatusError exception
raise HTTPStatusError(
message="Error code: 429 - Rate Limit Error!",
request=request,
response=response,
)
with patch.object(
mapped_target,
"send",
side_effect=_return_exception,
):
new_retry_after_mock_client = MagicMock(return_value=-1)
litellm.utils._get_retry_after_from_exception_header = (
new_retry_after_mock_client
)
exception_raised = False
try:
if sync_mode:
resp = original_function(**data, client=client)
if streaming:
for chunk in resp:
continue
else:
resp = await original_function(**data, client=client)
if streaming:
async for chunk in resp:
continue
except litellm.RateLimitError as e:
exception_raised = True
assert e.litellm_response_headers is not None
print("e.litellm_response_headers", e.litellm_response_headers)
assert int(e.litellm_response_headers["retry-after"]) == cooldown_time
if exception_raised is False:
print(resp)
assert exception_raised

View file

@ -45,11 +45,12 @@ def get_current_weather(location, unit="fahrenheit"):
@pytest.mark.parametrize( @pytest.mark.parametrize(
"model", "model",
[ [
# "gpt-3.5-turbo-1106", "gpt-3.5-turbo-1106",
# "mistral/mistral-large-latest", # "mistral/mistral-large-latest",
# "claude-3-haiku-20240307", # "claude-3-haiku-20240307",
# "gemini/gemini-1.5-pro", # "gemini/gemini-1.5-pro",
"anthropic.claude-3-sonnet-20240229-v1:0", "anthropic.claude-3-sonnet-20240229-v1:0",
"groq/llama3-8b-8192",
], ],
) )
@pytest.mark.flaky(retries=3, delay=1) @pytest.mark.flaky(retries=3, delay=1)
@ -154,6 +155,105 @@ def test_aaparallel_function_call(model):
# test_parallel_function_call() # test_parallel_function_call()
from litellm.types.utils import ChatCompletionMessageToolCall, Function, Message
@pytest.mark.parametrize(
"model, provider",
[
(
"anthropic.claude-3-sonnet-20240229-v1:0",
"bedrock",
),
("claude-3-haiku-20240307", "anthropic"),
],
)
@pytest.mark.parametrize(
"messages, expected_error_msg",
[
(
[
{
"role": "user",
"content": "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses",
},
Message(
content="Here are the current weather conditions for San Francisco, Tokyo, and Paris:",
role="assistant",
tool_calls=[
ChatCompletionMessageToolCall(
index=1,
function=Function(
arguments='{"location": "San Francisco, CA", "unit": "fahrenheit"}',
name="get_current_weather",
),
id="tooluse_Jj98qn6xQlOP_PiQr-w9iA",
type="function",
)
],
function_call=None,
),
{
"tool_call_id": "tooluse_Jj98qn6xQlOP_PiQr-w9iA",
"role": "tool",
"name": "get_current_weather",
"content": '{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}',
},
],
True,
),
(
[
{
"role": "user",
"content": "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses",
}
],
False,
),
],
)
def test_parallel_function_call_anthropic_error_msg(
model, provider, messages, expected_error_msg
):
"""
Anthropic doesn't support tool calling without `tools=` param specified.
Ensure this error is thrown when `tools=` param is not specified. But tool call requests are made.
Reference Issue: https://github.com/BerriAI/litellm/issues/5747, https://github.com/BerriAI/litellm/issues/5388
"""
try:
litellm.set_verbose = True
messages = messages
if expected_error_msg:
with pytest.raises(litellm.UnsupportedParamsError) as e:
second_response = litellm.completion(
model=model,
messages=messages,
temperature=0.2,
seed=22,
drop_params=True,
) # get a new response from the model where it can see the function response
print("second response\n", second_response)
else:
second_response = litellm.completion(
model=model,
messages=messages,
temperature=0.2,
seed=22,
drop_params=True,
) # get a new response from the model where it can see the function response
print("second response\n", second_response)
except litellm.InternalServerError as e:
print(e)
except litellm.RateLimitError as e:
print(e)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_parallel_function_call_stream(): def test_parallel_function_call_stream():
try: try:

View file

@ -62,3 +62,9 @@ def test_get_model_info_shows_supports_prompt_caching():
info = litellm.get_model_info("deepseek/deepseek-chat") info = litellm.get_model_info("deepseek/deepseek-chat")
print("info", info) print("info", info)
assert info.get("supports_prompt_caching") is True assert info.get("supports_prompt_caching") is True
def test_get_model_info_finetuned_models():
info = litellm.get_model_info("ft:gpt-3.5-turbo:my-org:custom_suffix:id")
print("info", info)
assert info["input_cost_per_token"] == 0.000003

View file

@ -18,13 +18,13 @@ class AnthropicMessagesTool(TypedDict, total=False):
class AnthropicMessagesTextParam(TypedDict, total=False): class AnthropicMessagesTextParam(TypedDict, total=False):
type: Literal["text"] type: Required[Literal["text"]]
text: str text: Required[str]
cache_control: Optional[Union[dict, ChatCompletionCachedContent]] cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
class AnthropicMessagesToolUseParam(TypedDict): class AnthropicMessagesToolUseParam(TypedDict):
type: Literal["tool_use"] type: Required[Literal["tool_use"]]
id: str id: str
name: str name: str
input: dict input: dict
@ -58,8 +58,8 @@ class AnthropicImageParamSource(TypedDict):
class AnthropicMessagesImageParam(TypedDict, total=False): class AnthropicMessagesImageParam(TypedDict, total=False):
type: Literal["image"] type: Required[Literal["image"]]
source: AnthropicImageParamSource source: Required[AnthropicImageParamSource]
cache_control: Optional[Union[dict, ChatCompletionCachedContent]] cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
@ -102,16 +102,13 @@ class AnthropicSystemMessageContent(TypedDict, total=False):
cache_control: Optional[Union[dict, ChatCompletionCachedContent]] cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
class AnthropicMessagesRequest(TypedDict, total=False): AllAnthropicMessageValues = Union[
model: Required[str] AnthropicMessagesUserMessageParam, AnthopicMessagesAssistantMessageParam
messages: Required[ ]
List[
Union[
AnthropicMessagesUserMessageParam, class AnthropicMessageRequestBase(TypedDict, total=False):
AnthopicMessagesAssistantMessageParam, messages: Required[List[AllAnthropicMessageValues]]
]
]
]
max_tokens: Required[int] max_tokens: Required[int]
metadata: AnthropicMetadata metadata: AnthropicMetadata
stop_sequences: List[str] stop_sequences: List[str]
@ -123,6 +120,9 @@ class AnthropicMessagesRequest(TypedDict, total=False):
top_k: int top_k: int
top_p: float top_p: float
class AnthropicMessagesRequest(AnthropicMessageRequestBase, total=False):
model: Required[str]
# litellm param - used for tracking litellm proxy metadata in the request # litellm param - used for tracking litellm proxy metadata in the request
litellm_metadata: dict litellm_metadata: dict
@ -291,9 +291,9 @@ class AnthropicResponse(BaseModel):
"""Billing and rate-limit usage.""" """Billing and rate-limit usage."""
class AnthropicChatCompletionUsageBlock(TypedDict, total=False): from .openai import ChatCompletionUsageBlock
prompt_tokens: Required[int]
completion_tokens: Required[int]
total_tokens: Required[int] class AnthropicChatCompletionUsageBlock(ChatCompletionUsageBlock, total=False):
cache_creation_input_tokens: int cache_creation_input_tokens: int
cache_read_input_tokens: int cache_read_input_tokens: int

View file

@ -343,11 +343,14 @@ class ChatCompletionImageObject(TypedDict):
image_url: Union[str, ChatCompletionImageUrlObject] image_url: Union[str, ChatCompletionImageUrlObject]
OpenAIMessageContent = Union[
str, Iterable[Union[ChatCompletionTextObject, ChatCompletionImageObject]]
]
class OpenAIChatCompletionUserMessage(TypedDict): class OpenAIChatCompletionUserMessage(TypedDict):
role: Literal["user"] role: Literal["user"]
content: Union[ content: OpenAIMessageContent
str, Iterable[Union[ChatCompletionTextObject, ChatCompletionImageObject]]
]
class ChatCompletionUserMessage(OpenAIChatCompletionUserMessage, total=False): class ChatCompletionUserMessage(OpenAIChatCompletionUserMessage, total=False):

View file

@ -7,7 +7,7 @@ from typing import Any, Dict, List, Literal, Optional, Tuple, Union
from openai._models import BaseModel as OpenAIObject from openai._models import BaseModel as OpenAIObject
from openai.types.audio.transcription_create_params import FileTypes # type: ignore from openai.types.audio.transcription_create_params import FileTypes # type: ignore
from openai.types.completion_usage import CompletionTokensDetails, CompletionUsage from openai.types.completion_usage import CompletionTokensDetails, CompletionUsage
from pydantic import ConfigDict, Field, PrivateAttr from pydantic import ConfigDict, PrivateAttr
from typing_extensions import Callable, Dict, Required, TypedDict, override from typing_extensions import Callable, Dict, Required, TypedDict, override
from ..litellm_core_utils.core_helpers import map_finish_reason from ..litellm_core_utils.core_helpers import map_finish_reason

File diff suppressed because it is too large Load diff

View file

@ -1173,6 +1173,18 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_assistant_prefill": true "supports_assistant_prefill": true
}, },
"mistral/pixtral-12b-2409": {
"max_tokens": 128000,
"max_input_tokens": 128000,
"max_output_tokens": 128000,
"input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000015,
"litellm_provider": "mistral",
"mode": "chat",
"supports_function_calling": true,
"supports_assistant_prefill": true,
"supports_vision": true
},
"mistral/open-mistral-7b": { "mistral/open-mistral-7b": {
"max_tokens": 8191, "max_tokens": 8191,
"max_input_tokens": 32000, "max_input_tokens": 32000,

View file

@ -25,7 +25,12 @@ from unittest.mock import MagicMock, patch
import pytest import pytest
import litellm import litellm
from litellm import AnthropicConfig, Router, adapter_completion from litellm import (
AnthropicConfig,
Router,
adapter_completion,
AnthropicExperimentalPassThroughConfig,
)
from litellm.adapters.anthropic_adapter import anthropic_adapter from litellm.adapters.anthropic_adapter import anthropic_adapter
from litellm.types.llms.anthropic import AnthropicResponse from litellm.types.llms.anthropic import AnthropicResponse
@ -33,7 +38,7 @@ from litellm.types.llms.anthropic import AnthropicResponse
def test_anthropic_completion_messages_translation(): def test_anthropic_completion_messages_translation():
messages = [{"role": "user", "content": "Hey, how's it going?"}] messages = [{"role": "user", "content": "Hey, how's it going?"}]
translated_messages = AnthropicConfig().translate_anthropic_messages_to_openai(messages=messages) # type: ignore translated_messages = AnthropicExperimentalPassThroughConfig().translate_anthropic_messages_to_openai(messages=messages) # type: ignore
assert translated_messages == [{"role": "user", "content": "Hey, how's it going?"}] assert translated_messages == [{"role": "user", "content": "Hey, how's it going?"}]

View file

@ -5,7 +5,11 @@ import pytest
import sys import sys
from typing import Any, Dict, List from typing import Any, Dict, List
from unittest.mock import MagicMock, Mock, patch from unittest.mock import MagicMock, Mock, patch
import os
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm import litellm
from litellm.exceptions import BadRequestError from litellm.exceptions import BadRequestError
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler