forked from phoenix/litellm-mirror
Litellm dev 11 11 2024 (#6693)
* fix(__init__.py): add 'watsonx_text' as mapped llm api route Fixes https://github.com/BerriAI/litellm/issues/6663 * fix(opentelemetry.py): fix passing parallel tool calls to otel Fixes https://github.com/BerriAI/litellm/issues/6677 * refactor(test_opentelemetry_unit_tests.py): create a base set of unit tests for all logging integrations - test for parallel tool call handling reduces bugs in repo * fix(__init__.py): update provider-model mapping to include all known provider-model mappings Fixes https://github.com/BerriAI/litellm/issues/6669 * feat(anthropic): support passing document in llm api call * docs(anthropic.md): add pdf anthropic call to docs + expose new 'supports_pdf_input' function * fix(factory.py): fix linting error
This commit is contained in:
parent
b8ae08b8eb
commit
f59cb46e71
21 changed files with 533 additions and 2264 deletions
|
@ -864,3 +864,96 @@ Human: How do I boil water?
|
||||||
Assistant:
|
Assistant:
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Usage - PDF
|
||||||
|
|
||||||
|
Pass base64 encoded PDF files to Anthropic models using the `image_url` field.
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
### **using base64**
|
||||||
|
```python
|
||||||
|
from litellm import completion, supports_pdf_input
|
||||||
|
import base64
|
||||||
|
import requests
|
||||||
|
|
||||||
|
# URL of the file
|
||||||
|
url = "https://storage.googleapis.com/cloud-samples-data/generative-ai/pdf/2403.05530.pdf"
|
||||||
|
|
||||||
|
# Download the file
|
||||||
|
response = requests.get(url)
|
||||||
|
file_data = response.content
|
||||||
|
|
||||||
|
encoded_file = base64.b64encode(file_data).decode("utf-8")
|
||||||
|
|
||||||
|
## check if model supports pdf input - (2024/11/11) only claude-3-5-haiku-20241022 supports it
|
||||||
|
supports_pdf_input("anthropic/claude-3-5-haiku-20241022") # True
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="anthropic/claude-3-5-haiku-20241022",
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "text", "text": "You are a very professional document summarization specialist. Please summarize the given document."},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
max_tokens=300,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response.choices[0])
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" lable="PROXY">
|
||||||
|
|
||||||
|
1. Add model to config
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
- model_name: claude-3-5-haiku-20241022
|
||||||
|
litellm_params:
|
||||||
|
model: anthropic/claude-3-5-haiku-20241022
|
||||||
|
api_key: os.environ/ANTHROPIC_API_KEY
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start Proxy
|
||||||
|
|
||||||
|
```
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer <YOUR-LITELLM-KEY>" \
|
||||||
|
-d '{
|
||||||
|
"model": "claude-3-5-haiku-20241022",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "You are a very professional document summarization specialist. Please summarize the given document"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": "data:application/pdf;base64,{encoded_file}" # 👈 PDF
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"max_tokens": 300
|
||||||
|
}'
|
||||||
|
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
|
@ -375,6 +375,7 @@ open_ai_text_completion_models: List = []
|
||||||
cohere_models: List = []
|
cohere_models: List = []
|
||||||
cohere_chat_models: List = []
|
cohere_chat_models: List = []
|
||||||
mistral_chat_models: List = []
|
mistral_chat_models: List = []
|
||||||
|
text_completion_codestral_models: List = []
|
||||||
anthropic_models: List = []
|
anthropic_models: List = []
|
||||||
empower_models: List = []
|
empower_models: List = []
|
||||||
openrouter_models: List = []
|
openrouter_models: List = []
|
||||||
|
@ -401,6 +402,19 @@ deepinfra_models: List = []
|
||||||
perplexity_models: List = []
|
perplexity_models: List = []
|
||||||
watsonx_models: List = []
|
watsonx_models: List = []
|
||||||
gemini_models: List = []
|
gemini_models: List = []
|
||||||
|
xai_models: List = []
|
||||||
|
deepseek_models: List = []
|
||||||
|
azure_ai_models: List = []
|
||||||
|
voyage_models: List = []
|
||||||
|
databricks_models: List = []
|
||||||
|
cloudflare_models: List = []
|
||||||
|
codestral_models: List = []
|
||||||
|
friendliai_models: List = []
|
||||||
|
palm_models: List = []
|
||||||
|
groq_models: List = []
|
||||||
|
azure_models: List = []
|
||||||
|
anyscale_models: List = []
|
||||||
|
cerebras_models: List = []
|
||||||
|
|
||||||
|
|
||||||
def add_known_models():
|
def add_known_models():
|
||||||
|
@ -477,6 +491,34 @@ def add_known_models():
|
||||||
# ignore the 'up-to', '-to-' model names -> not real models. just for cost tracking based on model params.
|
# ignore the 'up-to', '-to-' model names -> not real models. just for cost tracking based on model params.
|
||||||
if "-to-" not in key:
|
if "-to-" not in key:
|
||||||
fireworks_ai_embedding_models.append(key)
|
fireworks_ai_embedding_models.append(key)
|
||||||
|
elif value.get("litellm_provider") == "text-completion-codestral":
|
||||||
|
text_completion_codestral_models.append(key)
|
||||||
|
elif value.get("litellm_provider") == "xai":
|
||||||
|
xai_models.append(key)
|
||||||
|
elif value.get("litellm_provider") == "deepseek":
|
||||||
|
deepseek_models.append(key)
|
||||||
|
elif value.get("litellm_provider") == "azure_ai":
|
||||||
|
azure_ai_models.append(key)
|
||||||
|
elif value.get("litellm_provider") == "voyage":
|
||||||
|
voyage_models.append(key)
|
||||||
|
elif value.get("litellm_provider") == "databricks":
|
||||||
|
databricks_models.append(key)
|
||||||
|
elif value.get("litellm_provider") == "cloudflare":
|
||||||
|
cloudflare_models.append(key)
|
||||||
|
elif value.get("litellm_provider") == "codestral":
|
||||||
|
codestral_models.append(key)
|
||||||
|
elif value.get("litellm_provider") == "friendliai":
|
||||||
|
friendliai_models.append(key)
|
||||||
|
elif value.get("litellm_provider") == "palm":
|
||||||
|
palm_models.append(key)
|
||||||
|
elif value.get("litellm_provider") == "groq":
|
||||||
|
groq_models.append(key)
|
||||||
|
elif value.get("litellm_provider") == "azure":
|
||||||
|
azure_models.append(key)
|
||||||
|
elif value.get("litellm_provider") == "anyscale":
|
||||||
|
anyscale_models.append(key)
|
||||||
|
elif value.get("litellm_provider") == "cerebras":
|
||||||
|
cerebras_models.append(key)
|
||||||
|
|
||||||
|
|
||||||
add_known_models()
|
add_known_models()
|
||||||
|
@ -722,6 +764,20 @@ model_list = (
|
||||||
+ vertex_language_models
|
+ vertex_language_models
|
||||||
+ watsonx_models
|
+ watsonx_models
|
||||||
+ gemini_models
|
+ gemini_models
|
||||||
|
+ text_completion_codestral_models
|
||||||
|
+ xai_models
|
||||||
|
+ deepseek_models
|
||||||
|
+ azure_ai_models
|
||||||
|
+ voyage_models
|
||||||
|
+ databricks_models
|
||||||
|
+ cloudflare_models
|
||||||
|
+ codestral_models
|
||||||
|
+ friendliai_models
|
||||||
|
+ palm_models
|
||||||
|
+ groq_models
|
||||||
|
+ azure_models
|
||||||
|
+ anyscale_models
|
||||||
|
+ cerebras_models
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -778,6 +834,7 @@ class LlmProviders(str, Enum):
|
||||||
FIREWORKS_AI = "fireworks_ai"
|
FIREWORKS_AI = "fireworks_ai"
|
||||||
FRIENDLIAI = "friendliai"
|
FRIENDLIAI = "friendliai"
|
||||||
WATSONX = "watsonx"
|
WATSONX = "watsonx"
|
||||||
|
WATSONX_TEXT = "watsonx_text"
|
||||||
TRITON = "triton"
|
TRITON = "triton"
|
||||||
PREDIBASE = "predibase"
|
PREDIBASE = "predibase"
|
||||||
DATABRICKS = "databricks"
|
DATABRICKS = "databricks"
|
||||||
|
@ -794,6 +851,7 @@ provider_list: List[Union[LlmProviders, str]] = list(LlmProviders)
|
||||||
|
|
||||||
models_by_provider: dict = {
|
models_by_provider: dict = {
|
||||||
"openai": open_ai_chat_completion_models + open_ai_text_completion_models,
|
"openai": open_ai_chat_completion_models + open_ai_text_completion_models,
|
||||||
|
"text-completion-openai": open_ai_text_completion_models,
|
||||||
"cohere": cohere_models + cohere_chat_models,
|
"cohere": cohere_models + cohere_chat_models,
|
||||||
"cohere_chat": cohere_chat_models,
|
"cohere_chat": cohere_chat_models,
|
||||||
"anthropic": anthropic_models,
|
"anthropic": anthropic_models,
|
||||||
|
@ -817,6 +875,23 @@ models_by_provider: dict = {
|
||||||
"watsonx": watsonx_models,
|
"watsonx": watsonx_models,
|
||||||
"gemini": gemini_models,
|
"gemini": gemini_models,
|
||||||
"fireworks_ai": fireworks_ai_models + fireworks_ai_embedding_models,
|
"fireworks_ai": fireworks_ai_models + fireworks_ai_embedding_models,
|
||||||
|
"aleph_alpha": aleph_alpha_models,
|
||||||
|
"text-completion-codestral": text_completion_codestral_models,
|
||||||
|
"xai": xai_models,
|
||||||
|
"deepseek": deepseek_models,
|
||||||
|
"mistral": mistral_chat_models,
|
||||||
|
"azure_ai": azure_ai_models,
|
||||||
|
"voyage": voyage_models,
|
||||||
|
"databricks": databricks_models,
|
||||||
|
"cloudflare": cloudflare_models,
|
||||||
|
"codestral": codestral_models,
|
||||||
|
"nlp_cloud": nlp_cloud_models,
|
||||||
|
"friendliai": friendliai_models,
|
||||||
|
"palm": palm_models,
|
||||||
|
"groq": groq_models,
|
||||||
|
"azure": azure_models,
|
||||||
|
"anyscale": anyscale_models,
|
||||||
|
"cerebras": cerebras_models,
|
||||||
}
|
}
|
||||||
|
|
||||||
# mapping for those models which have larger equivalents
|
# mapping for those models which have larger equivalents
|
||||||
|
@ -889,7 +964,6 @@ from .utils import (
|
||||||
supports_system_messages,
|
supports_system_messages,
|
||||||
get_litellm_params,
|
get_litellm_params,
|
||||||
acreate,
|
acreate,
|
||||||
get_model_list,
|
|
||||||
get_max_tokens,
|
get_max_tokens,
|
||||||
get_model_info,
|
get_model_info,
|
||||||
register_prompt_template,
|
register_prompt_template,
|
||||||
|
|
|
@ -2,14 +2,16 @@ import os
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
from typing import TYPE_CHECKING, Any, Dict, Optional, Union
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_logger
|
from litellm._logging import verbose_logger
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm.types.services import ServiceLoggerPayload
|
from litellm.types.services import ServiceLoggerPayload
|
||||||
from litellm.types.utils import (
|
from litellm.types.utils import (
|
||||||
|
ChatCompletionMessageToolCall,
|
||||||
EmbeddingResponse,
|
EmbeddingResponse,
|
||||||
|
Function,
|
||||||
ImageResponse,
|
ImageResponse,
|
||||||
ModelResponse,
|
ModelResponse,
|
||||||
StandardLoggingPayload,
|
StandardLoggingPayload,
|
||||||
|
@ -403,6 +405,28 @@ class OpenTelemetry(CustomLogger):
|
||||||
except Exception:
|
except Exception:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _tool_calls_kv_pair(
|
||||||
|
tool_calls: List[ChatCompletionMessageToolCall],
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
from litellm.proxy._types import SpanAttributes
|
||||||
|
|
||||||
|
kv_pairs: Dict[str, Any] = {}
|
||||||
|
for idx, tool_call in enumerate(tool_calls):
|
||||||
|
_function = tool_call.get("function")
|
||||||
|
if not _function:
|
||||||
|
continue
|
||||||
|
|
||||||
|
keys = Function.__annotations__.keys()
|
||||||
|
for key in keys:
|
||||||
|
_value = _function.get(key)
|
||||||
|
if _value:
|
||||||
|
kv_pairs[
|
||||||
|
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.function_call.{key}"
|
||||||
|
] = _value
|
||||||
|
|
||||||
|
return kv_pairs
|
||||||
|
|
||||||
def set_attributes( # noqa: PLR0915
|
def set_attributes( # noqa: PLR0915
|
||||||
self, span: Span, kwargs, response_obj: Optional[Any]
|
self, span: Span, kwargs, response_obj: Optional[Any]
|
||||||
):
|
):
|
||||||
|
@ -597,18 +621,13 @@ class OpenTelemetry(CustomLogger):
|
||||||
message = choice.get("message")
|
message = choice.get("message")
|
||||||
tool_calls = message.get("tool_calls")
|
tool_calls = message.get("tool_calls")
|
||||||
if tool_calls:
|
if tool_calls:
|
||||||
self.safe_set_attribute(
|
kv_pairs = OpenTelemetry._tool_calls_kv_pair(tool_calls) # type: ignore
|
||||||
span=span,
|
for key, value in kv_pairs.items():
|
||||||
key=f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.function_call.name",
|
self.safe_set_attribute(
|
||||||
value=tool_calls[0].get("function").get("name"),
|
span=span,
|
||||||
)
|
key=key,
|
||||||
self.safe_set_attribute(
|
value=value,
|
||||||
span=span,
|
)
|
||||||
key=f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.function_call.arguments",
|
|
||||||
value=tool_calls[0]
|
|
||||||
.get("function")
|
|
||||||
.get("arguments"),
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_logger.exception(
|
verbose_logger.exception(
|
||||||
|
|
|
@ -71,11 +71,12 @@ def validate_environment(
|
||||||
|
|
||||||
prompt_caching_set = AnthropicConfig().is_cache_control_set(messages=messages)
|
prompt_caching_set = AnthropicConfig().is_cache_control_set(messages=messages)
|
||||||
computer_tool_used = AnthropicConfig().is_computer_tool_used(tools=tools)
|
computer_tool_used = AnthropicConfig().is_computer_tool_used(tools=tools)
|
||||||
|
pdf_used = AnthropicConfig().is_pdf_used(messages=messages)
|
||||||
headers = AnthropicConfig().get_anthropic_headers(
|
headers = AnthropicConfig().get_anthropic_headers(
|
||||||
anthropic_version=anthropic_version,
|
anthropic_version=anthropic_version,
|
||||||
computer_tool_used=computer_tool_used,
|
computer_tool_used=computer_tool_used,
|
||||||
prompt_caching_set=prompt_caching_set,
|
prompt_caching_set=prompt_caching_set,
|
||||||
|
pdf_used=pdf_used,
|
||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -104,6 +104,7 @@ class AnthropicConfig:
|
||||||
anthropic_version: Optional[str] = None,
|
anthropic_version: Optional[str] = None,
|
||||||
computer_tool_used: bool = False,
|
computer_tool_used: bool = False,
|
||||||
prompt_caching_set: bool = False,
|
prompt_caching_set: bool = False,
|
||||||
|
pdf_used: bool = False,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
@ -112,6 +113,8 @@ class AnthropicConfig:
|
||||||
betas.append("prompt-caching-2024-07-31")
|
betas.append("prompt-caching-2024-07-31")
|
||||||
if computer_tool_used:
|
if computer_tool_used:
|
||||||
betas.append("computer-use-2024-10-22")
|
betas.append("computer-use-2024-10-22")
|
||||||
|
if pdf_used:
|
||||||
|
betas.append("pdfs-2024-09-25")
|
||||||
headers = {
|
headers = {
|
||||||
"anthropic-version": anthropic_version or "2023-06-01",
|
"anthropic-version": anthropic_version or "2023-06-01",
|
||||||
"x-api-key": api_key,
|
"x-api-key": api_key,
|
||||||
|
@ -365,6 +368,21 @@ class AnthropicConfig:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def is_pdf_used(self, messages: List[AllMessageValues]) -> bool:
|
||||||
|
"""
|
||||||
|
Set to true if media passed into messages.
|
||||||
|
"""
|
||||||
|
for message in messages:
|
||||||
|
if (
|
||||||
|
"content" in message
|
||||||
|
and message["content"] is not None
|
||||||
|
and isinstance(message["content"], list)
|
||||||
|
):
|
||||||
|
for content in message["content"]:
|
||||||
|
if "type" in content:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
def translate_system_message(
|
def translate_system_message(
|
||||||
self, messages: List[AllMessageValues]
|
self, messages: List[AllMessageValues]
|
||||||
) -> List[AnthropicSystemMessageContent]:
|
) -> List[AnthropicSystemMessageContent]:
|
||||||
|
|
|
@ -1330,7 +1330,10 @@ def convert_to_anthropic_tool_invoke(
|
||||||
|
|
||||||
def add_cache_control_to_content(
|
def add_cache_control_to_content(
|
||||||
anthropic_content_element: Union[
|
anthropic_content_element: Union[
|
||||||
dict, AnthropicMessagesImageParam, AnthropicMessagesTextParam
|
dict,
|
||||||
|
AnthropicMessagesImageParam,
|
||||||
|
AnthropicMessagesTextParam,
|
||||||
|
AnthropicMessagesDocumentParam,
|
||||||
],
|
],
|
||||||
orignal_content_element: Union[dict, AllMessageValues],
|
orignal_content_element: Union[dict, AllMessageValues],
|
||||||
):
|
):
|
||||||
|
@ -1343,6 +1346,32 @@ def add_cache_control_to_content(
|
||||||
return anthropic_content_element
|
return anthropic_content_element
|
||||||
|
|
||||||
|
|
||||||
|
def _anthropic_content_element_factory(
|
||||||
|
image_chunk: GenericImageParsingChunk,
|
||||||
|
) -> Union[AnthropicMessagesImageParam, AnthropicMessagesDocumentParam]:
|
||||||
|
if image_chunk["media_type"] == "application/pdf":
|
||||||
|
_anthropic_content_element: Union[
|
||||||
|
AnthropicMessagesDocumentParam, AnthropicMessagesImageParam
|
||||||
|
] = AnthropicMessagesDocumentParam(
|
||||||
|
type="document",
|
||||||
|
source=AnthropicContentParamSource(
|
||||||
|
type="base64",
|
||||||
|
media_type=image_chunk["media_type"],
|
||||||
|
data=image_chunk["data"],
|
||||||
|
),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
_anthropic_content_element = AnthropicMessagesImageParam(
|
||||||
|
type="image",
|
||||||
|
source=AnthropicContentParamSource(
|
||||||
|
type="base64",
|
||||||
|
media_type=image_chunk["media_type"],
|
||||||
|
data=image_chunk["data"],
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return _anthropic_content_element
|
||||||
|
|
||||||
|
|
||||||
def anthropic_messages_pt( # noqa: PLR0915
|
def anthropic_messages_pt( # noqa: PLR0915
|
||||||
messages: List[AllMessageValues],
|
messages: List[AllMessageValues],
|
||||||
model: str,
|
model: str,
|
||||||
|
@ -1400,15 +1429,9 @@ def anthropic_messages_pt( # noqa: PLR0915
|
||||||
openai_image_url=m["image_url"]["url"]
|
openai_image_url=m["image_url"]["url"]
|
||||||
)
|
)
|
||||||
|
|
||||||
_anthropic_content_element = AnthropicMessagesImageParam(
|
_anthropic_content_element = (
|
||||||
type="image",
|
_anthropic_content_element_factory(image_chunk)
|
||||||
source=AnthropicImageParamSource(
|
|
||||||
type="base64",
|
|
||||||
media_type=image_chunk["media_type"],
|
|
||||||
data=image_chunk["data"],
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
_content_element = add_cache_control_to_content(
|
_content_element = add_cache_control_to_content(
|
||||||
anthropic_content_element=_anthropic_content_element,
|
anthropic_content_element=_anthropic_content_element,
|
||||||
orignal_content_element=dict(m),
|
orignal_content_element=dict(m),
|
||||||
|
|
|
@ -1898,7 +1898,8 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"tool_use_system_prompt_tokens": 264,
|
"tool_use_system_prompt_tokens": 264,
|
||||||
"supports_assistant_prefill": true,
|
"supports_assistant_prefill": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_pdf_input": true
|
||||||
},
|
},
|
||||||
"claude-3-opus-20240229": {
|
"claude-3-opus-20240229": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
|
|
@ -1,63 +1,7 @@
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: claude-3-5-sonnet-20240620
|
- model_name: "*"
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: claude-3-5-sonnet-20240620
|
model: "*"
|
||||||
api_key: os.environ/ANTHROPIC_API_KEY
|
|
||||||
- model_name: claude-3-5-sonnet-aihubmix
|
|
||||||
litellm_params:
|
|
||||||
model: openai/claude-3-5-sonnet-20240620
|
|
||||||
input_cost_per_token: 0.000003 # 3$/M
|
|
||||||
output_cost_per_token: 0.000015 # 15$/M
|
|
||||||
api_base: "https://exampleopenaiendpoint-production.up.railway.app"
|
|
||||||
api_key: my-fake-key
|
|
||||||
- model_name: fake-openai-endpoint-2
|
|
||||||
litellm_params:
|
|
||||||
model: openai/my-fake-model
|
|
||||||
api_key: my-fake-key
|
|
||||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
|
||||||
stream_timeout: 0.001
|
|
||||||
timeout: 1
|
|
||||||
rpm: 1
|
|
||||||
- model_name: fake-openai-endpoint
|
|
||||||
litellm_params:
|
|
||||||
model: openai/my-fake-model
|
|
||||||
api_key: my-fake-key
|
|
||||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
|
||||||
## bedrock chat completions
|
|
||||||
- model_name: "*anthropic.claude*"
|
|
||||||
litellm_params:
|
|
||||||
model: bedrock/*anthropic.claude*
|
|
||||||
aws_access_key_id: os.environ/BEDROCK_AWS_ACCESS_KEY_ID
|
|
||||||
aws_secret_access_key: os.environ/BEDROCK_AWS_SECRET_ACCESS_KEY
|
|
||||||
aws_region_name: os.environ/AWS_REGION_NAME
|
|
||||||
guardrailConfig:
|
|
||||||
"guardrailIdentifier": "h4dsqwhp6j66"
|
|
||||||
"guardrailVersion": "2"
|
|
||||||
"trace": "enabled"
|
|
||||||
|
|
||||||
## bedrock embeddings
|
|
||||||
- model_name: "*amazon.titan-embed-*"
|
|
||||||
litellm_params:
|
|
||||||
model: bedrock/amazon.titan-embed-*
|
|
||||||
aws_access_key_id: os.environ/BEDROCK_AWS_ACCESS_KEY_ID
|
|
||||||
aws_secret_access_key: os.environ/BEDROCK_AWS_SECRET_ACCESS_KEY
|
|
||||||
aws_region_name: os.environ/AWS_REGION_NAME
|
|
||||||
- model_name: "*cohere.embed-*"
|
|
||||||
litellm_params:
|
|
||||||
model: bedrock/cohere.embed-*
|
|
||||||
aws_access_key_id: os.environ/BEDROCK_AWS_ACCESS_KEY_ID
|
|
||||||
aws_secret_access_key: os.environ/BEDROCK_AWS_SECRET_ACCESS_KEY
|
|
||||||
aws_region_name: os.environ/AWS_REGION_NAME
|
|
||||||
|
|
||||||
- model_name: gpt-4
|
|
||||||
litellm_params:
|
|
||||||
model: azure/chatgpt-v-2
|
|
||||||
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
|
|
||||||
api_version: "2023-05-15"
|
|
||||||
api_key: os.environ/AZURE_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault
|
|
||||||
rpm: 480
|
|
||||||
timeout: 300
|
|
||||||
stream_timeout: 60
|
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
fallbacks: [{ "claude-3-5-sonnet-20240620": ["claude-3-5-sonnet-aihubmix"] }]
|
fallbacks: [{ "claude-3-5-sonnet-20240620": ["claude-3-5-sonnet-aihubmix"] }]
|
||||||
|
|
|
@ -1236,7 +1236,6 @@ def _return_user_api_key_auth_obj(
|
||||||
start_time: datetime,
|
start_time: datetime,
|
||||||
user_role: Optional[LitellmUserRoles] = None,
|
user_role: Optional[LitellmUserRoles] = None,
|
||||||
) -> UserAPIKeyAuth:
|
) -> UserAPIKeyAuth:
|
||||||
traceback.print_stack()
|
|
||||||
end_time = datetime.now()
|
end_time = datetime.now()
|
||||||
user_api_key_service_logger_obj.service_success_hook(
|
user_api_key_service_logger_obj.service_success_hook(
|
||||||
service=ServiceTypes.AUTH,
|
service=ServiceTypes.AUTH,
|
||||||
|
|
|
@ -74,7 +74,7 @@ class AnthopicMessagesAssistantMessageParam(TypedDict, total=False):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
class AnthropicImageParamSource(TypedDict):
|
class AnthropicContentParamSource(TypedDict):
|
||||||
type: Literal["base64"]
|
type: Literal["base64"]
|
||||||
media_type: str
|
media_type: str
|
||||||
data: str
|
data: str
|
||||||
|
@ -82,7 +82,13 @@ class AnthropicImageParamSource(TypedDict):
|
||||||
|
|
||||||
class AnthropicMessagesImageParam(TypedDict, total=False):
|
class AnthropicMessagesImageParam(TypedDict, total=False):
|
||||||
type: Required[Literal["image"]]
|
type: Required[Literal["image"]]
|
||||||
source: Required[AnthropicImageParamSource]
|
source: Required[AnthropicContentParamSource]
|
||||||
|
cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
|
||||||
|
|
||||||
|
|
||||||
|
class AnthropicMessagesDocumentParam(TypedDict, total=False):
|
||||||
|
type: Required[Literal["document"]]
|
||||||
|
source: Required[AnthropicContentParamSource]
|
||||||
cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
|
cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
|
||||||
|
|
||||||
|
|
||||||
|
@ -108,6 +114,7 @@ AnthropicMessagesUserMessageValues = Union[
|
||||||
AnthropicMessagesTextParam,
|
AnthropicMessagesTextParam,
|
||||||
AnthropicMessagesImageParam,
|
AnthropicMessagesImageParam,
|
||||||
AnthropicMessagesToolResultParam,
|
AnthropicMessagesToolResultParam,
|
||||||
|
AnthropicMessagesDocumentParam,
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1322,11 +1322,6 @@ class TranscriptionResponse(OpenAIObject):
|
||||||
|
|
||||||
|
|
||||||
class GenericImageParsingChunk(TypedDict):
|
class GenericImageParsingChunk(TypedDict):
|
||||||
# {
|
|
||||||
# "type": "base64",
|
|
||||||
# "media_type": f"image/{image_format}",
|
|
||||||
# "data": base64_data,
|
|
||||||
# }
|
|
||||||
type: str
|
type: str
|
||||||
media_type: str
|
media_type: str
|
||||||
data: str
|
data: str
|
||||||
|
|
2134
litellm/utils.py
2134
litellm/utils.py
File diff suppressed because it is too large
Load diff
|
@ -1898,7 +1898,8 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"tool_use_system_prompt_tokens": 264,
|
"tool_use_system_prompt_tokens": 264,
|
||||||
"supports_assistant_prefill": true,
|
"supports_assistant_prefill": true,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true,
|
||||||
|
"supports_pdf_input": true
|
||||||
},
|
},
|
||||||
"claude-3-opus-20240229": {
|
"claude-3-opus-20240229": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
|
|
@ -44,3 +44,30 @@ class BaseLLMChatTest(ABC):
|
||||||
messages=messages,
|
messages=messages,
|
||||||
)
|
)
|
||||||
assert response is not None
|
assert response is not None
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def pdf_messages(self):
|
||||||
|
import base64
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
# URL of the file
|
||||||
|
url = "https://storage.googleapis.com/cloud-samples-data/generative-ai/pdf/2403.05530.pdf"
|
||||||
|
|
||||||
|
response = requests.get(url)
|
||||||
|
file_data = response.content
|
||||||
|
|
||||||
|
encoded_file = base64.b64encode(file_data).decode("utf-8")
|
||||||
|
url = f"data:application/pdf;base64,{encoded_file}"
|
||||||
|
|
||||||
|
image_content = [
|
||||||
|
{"type": "text", "text": "What's this file about?"},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {"url": url},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
image_messages = [{"role": "user", "content": image_content}]
|
||||||
|
|
||||||
|
return image_messages
|
||||||
|
|
|
@ -36,6 +36,7 @@ from litellm.types.llms.anthropic import AnthropicResponse
|
||||||
|
|
||||||
from litellm.llms.anthropic.common_utils import process_anthropic_headers
|
from litellm.llms.anthropic.common_utils import process_anthropic_headers
|
||||||
from httpx import Headers
|
from httpx import Headers
|
||||||
|
from base_llm_unit_tests import BaseLLMChatTest
|
||||||
|
|
||||||
|
|
||||||
def test_anthropic_completion_messages_translation():
|
def test_anthropic_completion_messages_translation():
|
||||||
|
@ -624,3 +625,40 @@ def test_anthropic_tool_helper(cache_control_location):
|
||||||
tool = AnthropicConfig()._map_tool_helper(tool=tool)
|
tool = AnthropicConfig()._map_tool_helper(tool=tool)
|
||||||
|
|
||||||
assert tool["cache_control"] == {"type": "ephemeral"}
|
assert tool["cache_control"] == {"type": "ephemeral"}
|
||||||
|
|
||||||
|
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
|
||||||
|
class TestAnthropicCompletion(BaseLLMChatTest):
|
||||||
|
def get_base_completion_call_args(self) -> dict:
|
||||||
|
return {"model": "claude-3-haiku-20240307"}
|
||||||
|
|
||||||
|
def test_pdf_handling(self, pdf_messages):
|
||||||
|
from litellm.llms.custom_httpx.http_handler import HTTPHandler
|
||||||
|
from litellm.types.llms.anthropic import AnthropicMessagesDocumentParam
|
||||||
|
import json
|
||||||
|
|
||||||
|
client = HTTPHandler()
|
||||||
|
|
||||||
|
with patch.object(client, "post", new=MagicMock()) as mock_client:
|
||||||
|
response = completion(
|
||||||
|
model="claude-3-5-sonnet-20241022",
|
||||||
|
messages=pdf_messages,
|
||||||
|
client=client,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_client.assert_called_once()
|
||||||
|
|
||||||
|
json_data = json.loads(mock_client.call_args.kwargs["data"])
|
||||||
|
headers = mock_client.call_args.kwargs["headers"]
|
||||||
|
|
||||||
|
assert headers["anthropic-beta"] == "pdfs-2024-09-25"
|
||||||
|
|
||||||
|
json_data["messages"][0]["role"] == "user"
|
||||||
|
_document_validation = AnthropicMessagesDocumentParam(
|
||||||
|
**json_data["messages"][0]["content"][1]
|
||||||
|
)
|
||||||
|
assert _document_validation["type"] == "document"
|
||||||
|
assert _document_validation["source"]["media_type"] == "application/pdf"
|
||||||
|
assert _document_validation["source"]["type"] == "base64"
|
||||||
|
|
|
@ -169,3 +169,11 @@ def test_get_llm_provider_hosted_vllm():
|
||||||
assert custom_llm_provider == "hosted_vllm"
|
assert custom_llm_provider == "hosted_vllm"
|
||||||
assert model == "llama-3.1-70b-instruct"
|
assert model == "llama-3.1-70b-instruct"
|
||||||
assert dynamic_api_key == ""
|
assert dynamic_api_key == ""
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_llm_provider_watson_text():
|
||||||
|
model, custom_llm_provider, dynamic_api_key, api_base = litellm.get_llm_provider(
|
||||||
|
model="watsonx_text/watson-text-to-speech",
|
||||||
|
)
|
||||||
|
assert custom_llm_provider == "watsonx_text"
|
||||||
|
assert model == "watson-text-to-speech"
|
||||||
|
|
|
@ -1,11 +0,0 @@
|
||||||
import os, sys, traceback
|
|
||||||
|
|
||||||
sys.path.insert(
|
|
||||||
0, os.path.abspath("../..")
|
|
||||||
) # Adds the parent directory to the system path
|
|
||||||
import litellm
|
|
||||||
from litellm import get_model_list
|
|
||||||
|
|
||||||
print(get_model_list())
|
|
||||||
print(get_model_list())
|
|
||||||
# print(litellm.model_list)
|
|
|
@ -1,41 +0,0 @@
|
||||||
# What is this?
|
|
||||||
## Unit tests for opentelemetry integration
|
|
||||||
|
|
||||||
# What is this?
|
|
||||||
## Unit test for presidio pii masking
|
|
||||||
import sys, os, asyncio, time, random
|
|
||||||
from datetime import datetime
|
|
||||||
import traceback
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
|
|
||||||
load_dotenv()
|
|
||||||
import os
|
|
||||||
import asyncio
|
|
||||||
|
|
||||||
sys.path.insert(
|
|
||||||
0, os.path.abspath("../..")
|
|
||||||
) # Adds the parent directory to the system path
|
|
||||||
import pytest
|
|
||||||
import litellm
|
|
||||||
from unittest.mock import patch, MagicMock, AsyncMock
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_opentelemetry_integration():
|
|
||||||
"""
|
|
||||||
Unit test to confirm the parent otel span is ended
|
|
||||||
"""
|
|
||||||
|
|
||||||
parent_otel_span = MagicMock()
|
|
||||||
litellm.callbacks = ["otel"]
|
|
||||||
|
|
||||||
await litellm.acompletion(
|
|
||||||
model="gpt-3.5-turbo",
|
|
||||||
messages=[{"role": "user", "content": "Hello, world!"}],
|
|
||||||
mock_response="Hey!",
|
|
||||||
metadata={"litellm_parent_otel_span": parent_otel_span},
|
|
||||||
)
|
|
||||||
|
|
||||||
await asyncio.sleep(1)
|
|
||||||
|
|
||||||
parent_otel_span.end.assert_called_once()
|
|
|
@ -943,3 +943,24 @@ def test_validate_chat_completion_user_messages(messages, expected_bool):
|
||||||
## Invalid message
|
## Invalid message
|
||||||
with pytest.raises(Exception):
|
with pytest.raises(Exception):
|
||||||
validate_chat_completion_user_messages(messages=messages)
|
validate_chat_completion_user_messages(messages=messages)
|
||||||
|
|
||||||
|
|
||||||
|
def test_models_by_provider():
|
||||||
|
"""
|
||||||
|
Make sure all providers from model map are in the valid providers list
|
||||||
|
"""
|
||||||
|
from litellm import models_by_provider
|
||||||
|
|
||||||
|
providers = set()
|
||||||
|
for k, v in litellm.model_cost.items():
|
||||||
|
if "_" in v["litellm_provider"] and "-" in v["litellm_provider"]:
|
||||||
|
continue
|
||||||
|
elif k == "sample_spec":
|
||||||
|
continue
|
||||||
|
elif v["litellm_provider"] == "sagemaker":
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
providers.add(v["litellm_provider"])
|
||||||
|
|
||||||
|
for provider in providers:
|
||||||
|
assert provider in models_by_provider.keys()
|
||||||
|
|
100
tests/logging_callback_tests/base_test.py
Normal file
100
tests/logging_callback_tests/base_test.py
Normal file
|
@ -0,0 +1,100 @@
|
||||||
|
import asyncio
|
||||||
|
import httpx
|
||||||
|
import json
|
||||||
|
import pytest
|
||||||
|
import sys
|
||||||
|
from typing import Any, Dict, List
|
||||||
|
from unittest.mock import MagicMock, Mock, patch
|
||||||
|
import os
|
||||||
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
|
import litellm
|
||||||
|
from litellm.exceptions import BadRequestError
|
||||||
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||||
|
from litellm.utils import CustomStreamWrapper
|
||||||
|
from litellm.types.utils import ModelResponse
|
||||||
|
|
||||||
|
# test_example.py
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
|
||||||
|
class BaseLoggingCallbackTest(ABC):
|
||||||
|
"""
|
||||||
|
Abstract base test class that enforces a common test across all test classes.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_response_obj(self):
|
||||||
|
from litellm.types.utils import (
|
||||||
|
ModelResponse,
|
||||||
|
Choices,
|
||||||
|
Message,
|
||||||
|
ChatCompletionMessageToolCall,
|
||||||
|
Function,
|
||||||
|
Usage,
|
||||||
|
CompletionTokensDetailsWrapper,
|
||||||
|
PromptTokensDetailsWrapper,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create a mock response object with the structure you need
|
||||||
|
return ModelResponse(
|
||||||
|
id="chatcmpl-ASId3YJWagBpBskWfoNEMPFSkmrEw",
|
||||||
|
created=1731308157,
|
||||||
|
model="gpt-4o-mini-2024-07-18",
|
||||||
|
object="chat.completion",
|
||||||
|
system_fingerprint="fp_0ba0d124f1",
|
||||||
|
choices=[
|
||||||
|
Choices(
|
||||||
|
finish_reason="tool_calls",
|
||||||
|
index=0,
|
||||||
|
message=Message(
|
||||||
|
content=None,
|
||||||
|
role="assistant",
|
||||||
|
tool_calls=[
|
||||||
|
ChatCompletionMessageToolCall(
|
||||||
|
function=Function(
|
||||||
|
arguments='{"city": "New York"}', name="get_weather"
|
||||||
|
),
|
||||||
|
id="call_PngsQS5YGmIZKnswhnUOnOVb",
|
||||||
|
type="function",
|
||||||
|
),
|
||||||
|
ChatCompletionMessageToolCall(
|
||||||
|
function=Function(
|
||||||
|
arguments='{"city": "New York"}', name="get_news"
|
||||||
|
),
|
||||||
|
id="call_1zsDThBu0VSK7KuY7eCcJBnq",
|
||||||
|
type="function",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
function_call=None,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
],
|
||||||
|
usage=Usage(
|
||||||
|
completion_tokens=46,
|
||||||
|
prompt_tokens=86,
|
||||||
|
total_tokens=132,
|
||||||
|
completion_tokens_details=CompletionTokensDetailsWrapper(
|
||||||
|
accepted_prediction_tokens=0,
|
||||||
|
audio_tokens=0,
|
||||||
|
reasoning_tokens=0,
|
||||||
|
rejected_prediction_tokens=0,
|
||||||
|
text_tokens=None,
|
||||||
|
),
|
||||||
|
prompt_tokens_details=PromptTokensDetailsWrapper(
|
||||||
|
audio_tokens=0, cached_tokens=0, text_tokens=None, image_tokens=None
|
||||||
|
),
|
||||||
|
),
|
||||||
|
service_tier=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def test_parallel_tool_calls(self, mock_response_obj: ModelResponse):
|
||||||
|
"""
|
||||||
|
Check if parallel tool calls are correctly logged by Logging callback
|
||||||
|
|
||||||
|
Relevant issue - https://github.com/BerriAI/litellm/issues/6677
|
||||||
|
"""
|
||||||
|
pass
|
|
@ -0,0 +1,58 @@
|
||||||
|
# What is this?
|
||||||
|
## Unit tests for opentelemetry integration
|
||||||
|
|
||||||
|
# What is this?
|
||||||
|
## Unit test for presidio pii masking
|
||||||
|
import sys, os, asyncio, time, random
|
||||||
|
from datetime import datetime
|
||||||
|
import traceback
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
import os
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
|
import pytest
|
||||||
|
import litellm
|
||||||
|
from unittest.mock import patch, MagicMock, AsyncMock
|
||||||
|
from base_test import BaseLoggingCallbackTest
|
||||||
|
from litellm.types.utils import ModelResponse
|
||||||
|
|
||||||
|
|
||||||
|
class TestOpentelemetryUnitTests(BaseLoggingCallbackTest):
|
||||||
|
def test_parallel_tool_calls(self, mock_response_obj: ModelResponse):
|
||||||
|
tool_calls = mock_response_obj.choices[0].message.tool_calls
|
||||||
|
from litellm.integrations.opentelemetry import OpenTelemetry
|
||||||
|
from litellm.proxy._types import SpanAttributes
|
||||||
|
|
||||||
|
kv_pair_dict = OpenTelemetry._tool_calls_kv_pair(tool_calls)
|
||||||
|
|
||||||
|
assert kv_pair_dict == {
|
||||||
|
f"{SpanAttributes.LLM_COMPLETIONS}.0.function_call.arguments": '{"city": "New York"}',
|
||||||
|
f"{SpanAttributes.LLM_COMPLETIONS}.0.function_call.name": "get_weather",
|
||||||
|
f"{SpanAttributes.LLM_COMPLETIONS}.1.function_call.arguments": '{"city": "New York"}',
|
||||||
|
f"{SpanAttributes.LLM_COMPLETIONS}.1.function_call.name": "get_news",
|
||||||
|
}
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_opentelemetry_integration(self):
|
||||||
|
"""
|
||||||
|
Unit test to confirm the parent otel span is ended
|
||||||
|
"""
|
||||||
|
|
||||||
|
parent_otel_span = MagicMock()
|
||||||
|
litellm.callbacks = ["otel"]
|
||||||
|
|
||||||
|
await litellm.acompletion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "Hello, world!"}],
|
||||||
|
mock_response="Hey!",
|
||||||
|
metadata={"litellm_parent_otel_span": parent_otel_span},
|
||||||
|
)
|
||||||
|
|
||||||
|
await asyncio.sleep(1)
|
||||||
|
|
||||||
|
parent_otel_span.end.assert_called_once()
|
Loading…
Add table
Add a link
Reference in a new issue