mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
* fix(cost_calculator.py): move to using `.get_model_info()` for cost per token calculations ensures cost tracking is reliable - handles edge cases of parsing model cost map * build(model_prices_and_context_window.json): add 'supports_response_schema' for select tgai models Fixes https://github.com/BerriAI/litellm/pull/7037#discussion_r1872157329 * build(model_prices_and_context_window.json): remove 'pdf input' and 'vision' support from nova micro in model map Bedrock docs indicate no support for micro - https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference-supported-models-features.html * fix(converse_transformation.py): support amazon nova tool use * fix(opentelemetry): Add missing LLM request type attribute to spans (#7041) * feat(opentelemetry): add LLM request type attribute to spans * lint * fix: curl usage (#7038) curl -d, --data <data> is lowercase d curl -D, --dump-header <filename> is uppercase D references: https://curl.se/docs/manpage.html#-d https://curl.se/docs/manpage.html#-D * fix(spend_tracking.py): handle empty 'id' in model response - when creating spend log Fixes https://github.com/BerriAI/litellm/issues/7023 * fix(streaming_chunk_builder.py): handle initial id being empty string Fixes https://github.com/BerriAI/litellm/issues/7023 * fix(anthropic_passthrough_logging_handler.py): add end user cost tracking for anthropic pass through endpoint * docs(pass_through/): refactor docs location + add table on supported features for pass through endpoints * feat(anthropic_passthrough_logging_handler.py): support end user cost tracking via anthropic sdk * docs(anthropic_completion.md): add docs on passing end user param for cost tracking on anthropic sdk * fix(litellm_logging.py): use standard logging payload if present in kwargs prevent datadog logging error for pass through endpoints * docs(bedrock.md): add rerank api usage example to docs * bugfix/change dummy tool name format (#7053) * fix viewing keys (#7042) * ui new build * build(model_prices_and_context_window.json): add bedrock region models to model cost map (#7044) * bye (#6982) * (fix) litellm router.aspeech (#6962) * doc Migrating Databases * fix aspeech on router * test_audio_speech_router * test_audio_speech_router * docs show supported providers on batches api doc * change dummy tool name format --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Krish Dholakia <krrishdholakia@gmail.com> Co-authored-by: yujonglee <yujonglee.dev@gmail.com> * fix: fix linting errors * test: update test * fix(litellm_logging.py): fix pass through check * fix(test_otel_logging.py): fix test * fix(cost_calculator.py): update handling for cost per second * fix(cost_calculator.py): fix cost check * test: fix test * (fix) adding public routes when using custom header (#7045) * get_api_key_from_custom_header * add test_get_api_key_from_custom_header * fix testing use 1 file for test user api key auth * fix test user api key auth * test_custom_api_key_header_name * build: update ui build --------- Co-authored-by: Doron Kopit <83537683+doronkopit5@users.noreply.github.com> Co-authored-by: lloydchang <lloydchang@gmail.com> Co-authored-by: hgulersen <haymigulersen@gmail.com> Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: yujonglee <yujonglee.dev@gmail.com>
406 lines
14 KiB
Python
406 lines
14 KiB
Python
import asyncio
|
|
import httpx
|
|
import json
|
|
import pytest
|
|
import sys
|
|
from typing import Any, Dict, List
|
|
from unittest.mock import MagicMock, Mock, patch
|
|
import os
|
|
|
|
sys.path.insert(
|
|
0, os.path.abspath("../..")
|
|
) # Adds the parent directory to the system path
|
|
import litellm
|
|
from litellm.exceptions import BadRequestError
|
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
|
from litellm.utils import (
|
|
CustomStreamWrapper,
|
|
get_supported_openai_params,
|
|
get_optional_params,
|
|
)
|
|
|
|
# test_example.py
|
|
from abc import ABC, abstractmethod
|
|
|
|
|
|
def _usage_format_tests(usage: litellm.Usage):
|
|
"""
|
|
OpenAI prompt caching
|
|
- prompt_tokens = sum of non-cache hit tokens + cache-hit tokens
|
|
- total_tokens = prompt_tokens + completion_tokens
|
|
|
|
Example
|
|
```
|
|
"usage": {
|
|
"prompt_tokens": 2006,
|
|
"completion_tokens": 300,
|
|
"total_tokens": 2306,
|
|
"prompt_tokens_details": {
|
|
"cached_tokens": 1920
|
|
},
|
|
"completion_tokens_details": {
|
|
"reasoning_tokens": 0
|
|
}
|
|
# ANTHROPIC_ONLY #
|
|
"cache_creation_input_tokens": 0
|
|
}
|
|
```
|
|
"""
|
|
assert usage.total_tokens == usage.prompt_tokens + usage.completion_tokens
|
|
|
|
assert usage.prompt_tokens > usage.prompt_tokens_details.cached_tokens
|
|
|
|
|
|
class BaseLLMChatTest(ABC):
|
|
"""
|
|
Abstract base test class that enforces a common test across all test classes.
|
|
"""
|
|
|
|
@abstractmethod
|
|
def get_base_completion_call_args(self) -> dict:
|
|
"""Must return the base completion call args"""
|
|
pass
|
|
|
|
def test_content_list_handling(self):
|
|
"""Check if content list is supported by LLM API"""
|
|
base_completion_call_args = self.get_base_completion_call_args()
|
|
messages = [
|
|
{
|
|
"role": "user",
|
|
"content": [{"type": "text", "text": "Hello, how are you?"}],
|
|
}
|
|
]
|
|
try:
|
|
response = litellm.completion(
|
|
**base_completion_call_args,
|
|
messages=messages,
|
|
)
|
|
assert response is not None
|
|
except litellm.InternalServerError:
|
|
pytest.skip("Model is overloaded")
|
|
|
|
# for OpenAI the content contains the JSON schema, so we need to assert that the content is not None
|
|
assert response.choices[0].message.content is not None
|
|
|
|
@pytest.mark.parametrize("image_url", ["str", "dict"])
|
|
def test_pdf_handling(self, pdf_messages, image_url):
|
|
from litellm.utils import supports_pdf_input
|
|
|
|
if image_url == "str":
|
|
image_url = pdf_messages
|
|
elif image_url == "dict":
|
|
image_url = {"url": pdf_messages}
|
|
|
|
image_content = [
|
|
{"type": "text", "text": "What's this file about?"},
|
|
{
|
|
"type": "image_url",
|
|
"image_url": image_url,
|
|
},
|
|
]
|
|
|
|
image_messages = [{"role": "user", "content": image_content}]
|
|
|
|
base_completion_call_args = self.get_base_completion_call_args()
|
|
|
|
if not supports_pdf_input(base_completion_call_args["model"], None):
|
|
pytest.skip("Model does not support image input")
|
|
|
|
response = litellm.completion(
|
|
**base_completion_call_args,
|
|
messages=image_messages,
|
|
)
|
|
assert response is not None
|
|
|
|
def test_message_with_name(self):
|
|
litellm.set_verbose = True
|
|
base_completion_call_args = self.get_base_completion_call_args()
|
|
messages = [
|
|
{"role": "user", "content": "Hello", "name": "test_name"},
|
|
]
|
|
response = litellm.completion(**base_completion_call_args, messages=messages)
|
|
assert response is not None
|
|
|
|
def test_multilingual_requests(self):
|
|
"""
|
|
Tests that the provider can handle multilingual requests and invalid utf-8 sequences
|
|
|
|
Context: https://github.com/openai/openai-python/issues/1921
|
|
"""
|
|
base_completion_call_args = self.get_base_completion_call_args()
|
|
response = litellm.completion(
|
|
**base_completion_call_args,
|
|
messages=[{"role": "user", "content": "你好世界!\ud83e, ö"}],
|
|
)
|
|
print("multilingual response: ", response)
|
|
assert response is not None
|
|
|
|
@pytest.mark.parametrize(
|
|
"response_format",
|
|
[
|
|
{"type": "json_object"},
|
|
{"type": "text"},
|
|
],
|
|
)
|
|
@pytest.mark.flaky(retries=6, delay=1)
|
|
def test_json_response_format(self, response_format):
|
|
"""
|
|
Test that the JSON response format is supported by the LLM API
|
|
"""
|
|
base_completion_call_args = self.get_base_completion_call_args()
|
|
litellm.set_verbose = True
|
|
|
|
messages = [
|
|
{
|
|
"role": "system",
|
|
"content": "Your output should be a JSON object with no additional properties. ",
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": "Respond with this in json. city=San Francisco, state=CA, weather=sunny, temp=60",
|
|
},
|
|
]
|
|
|
|
response = litellm.completion(
|
|
**base_completion_call_args,
|
|
messages=messages,
|
|
response_format=response_format,
|
|
)
|
|
|
|
print(response)
|
|
|
|
# OpenAI guarantees that the JSON schema is returned in the content
|
|
# relevant issue: https://github.com/BerriAI/litellm/issues/6741
|
|
assert response.choices[0].message.content is not None
|
|
|
|
@pytest.mark.flaky(retries=6, delay=1)
|
|
def test_json_response_pydantic_obj(self):
|
|
litellm.set_verbose = True
|
|
from pydantic import BaseModel
|
|
from litellm.utils import supports_response_schema
|
|
|
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
|
litellm.model_cost = litellm.get_model_cost_map(url="")
|
|
|
|
class TestModel(BaseModel):
|
|
first_response: str
|
|
|
|
base_completion_call_args = self.get_base_completion_call_args()
|
|
if not supports_response_schema(base_completion_call_args["model"], None):
|
|
pytest.skip("Model does not support response schema")
|
|
|
|
try:
|
|
res = litellm.completion(
|
|
**base_completion_call_args,
|
|
messages=[
|
|
{"role": "system", "content": "You are a helpful assistant."},
|
|
{
|
|
"role": "user",
|
|
"content": "What is the capital of France?",
|
|
},
|
|
],
|
|
response_format=TestModel,
|
|
)
|
|
assert res is not None
|
|
|
|
print(res.choices[0].message)
|
|
|
|
assert res.choices[0].message.content is not None
|
|
assert res.choices[0].message.tool_calls is None
|
|
except litellm.InternalServerError:
|
|
pytest.skip("Model is overloaded")
|
|
|
|
@pytest.mark.flaky(retries=6, delay=1)
|
|
def test_json_response_format_stream(self):
|
|
"""
|
|
Test that the JSON response format with streaming is supported by the LLM API
|
|
"""
|
|
base_completion_call_args = self.get_base_completion_call_args()
|
|
litellm.set_verbose = True
|
|
|
|
messages = [
|
|
{
|
|
"role": "system",
|
|
"content": "Your output should be a JSON object with no additional properties. ",
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": "Respond with this in json. city=San Francisco, state=CA, weather=sunny, temp=60",
|
|
},
|
|
]
|
|
|
|
try:
|
|
response = litellm.completion(
|
|
**base_completion_call_args,
|
|
messages=messages,
|
|
response_format={"type": "json_object"},
|
|
stream=True,
|
|
)
|
|
except litellm.InternalServerError:
|
|
pytest.skip("Model is overloaded")
|
|
|
|
print(response)
|
|
|
|
content = ""
|
|
for chunk in response:
|
|
content += chunk.choices[0].delta.content or ""
|
|
|
|
print(f"content={content}<END>")
|
|
|
|
# OpenAI guarantees that the JSON schema is returned in the content
|
|
# relevant issue: https://github.com/BerriAI/litellm/issues/6741
|
|
# we need to assert that the JSON schema was returned in the content, (for Anthropic we were returning it as part of the tool call)
|
|
assert content is not None
|
|
assert len(content) > 0
|
|
|
|
@pytest.fixture
|
|
def tool_call_no_arguments(self):
|
|
return {
|
|
"role": "assistant",
|
|
"content": "",
|
|
"tool_calls": [
|
|
{
|
|
"id": "call_2c384bc6-de46-4f29-8adc-60dd5805d305",
|
|
"function": {"name": "Get-FAQ", "arguments": "{}"},
|
|
"type": "function",
|
|
}
|
|
],
|
|
}
|
|
|
|
@abstractmethod
|
|
def test_tool_call_no_arguments(self, tool_call_no_arguments):
|
|
"""Test that tool calls with no arguments is translated correctly. Relevant issue: https://github.com/BerriAI/litellm/issues/6833"""
|
|
pass
|
|
|
|
def test_image_url(self):
|
|
litellm.set_verbose = True
|
|
from litellm.utils import supports_vision
|
|
|
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
|
litellm.model_cost = litellm.get_model_cost_map(url="")
|
|
|
|
base_completion_call_args = self.get_base_completion_call_args()
|
|
if not supports_vision(base_completion_call_args["model"], None):
|
|
pytest.skip("Model does not support image input")
|
|
|
|
messages = [
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{"type": "text", "text": "What's in this image?"},
|
|
{
|
|
"type": "image_url",
|
|
"image_url": {
|
|
"url": "https://i.pinimg.com/736x/b4/b1/be/b4b1becad04d03a9071db2817fc9fe77.jpg"
|
|
},
|
|
},
|
|
],
|
|
}
|
|
]
|
|
|
|
response = litellm.completion(**base_completion_call_args, messages=messages)
|
|
assert response is not None
|
|
|
|
def test_prompt_caching(self):
|
|
litellm.set_verbose = True
|
|
from litellm.utils import supports_prompt_caching
|
|
|
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
|
litellm.model_cost = litellm.get_model_cost_map(url="")
|
|
|
|
base_completion_call_args = self.get_base_completion_call_args()
|
|
if not supports_prompt_caching(base_completion_call_args["model"], None):
|
|
print("Model does not support prompt caching")
|
|
pytest.skip("Model does not support prompt caching")
|
|
|
|
try:
|
|
for _ in range(2):
|
|
response = litellm.completion(
|
|
**base_completion_call_args,
|
|
messages=[
|
|
# System Message
|
|
{
|
|
"role": "system",
|
|
"content": [
|
|
{
|
|
"type": "text",
|
|
"text": "Here is the full text of a complex legal agreement"
|
|
* 400,
|
|
"cache_control": {"type": "ephemeral"},
|
|
}
|
|
],
|
|
},
|
|
# marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache.
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{
|
|
"type": "text",
|
|
"text": "What are the key terms and conditions in this agreement?",
|
|
"cache_control": {"type": "ephemeral"},
|
|
}
|
|
],
|
|
},
|
|
{
|
|
"role": "assistant",
|
|
"content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo",
|
|
},
|
|
# The final turn is marked with cache-control, for continuing in followups.
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{
|
|
"type": "text",
|
|
"text": "What are the key terms and conditions in this agreement?",
|
|
"cache_control": {"type": "ephemeral"},
|
|
}
|
|
],
|
|
},
|
|
],
|
|
temperature=0.2,
|
|
max_tokens=10,
|
|
)
|
|
|
|
_usage_format_tests(response.usage)
|
|
|
|
print("response=", response)
|
|
print("response.usage=", response.usage)
|
|
|
|
_usage_format_tests(response.usage)
|
|
|
|
assert "prompt_tokens_details" in response.usage
|
|
assert response.usage.prompt_tokens_details.cached_tokens > 0
|
|
except litellm.InternalServerError:
|
|
pass
|
|
|
|
@pytest.fixture
|
|
def pdf_messages(self):
|
|
import base64
|
|
|
|
import requests
|
|
|
|
# URL of the file
|
|
url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
|
|
|
|
response = requests.get(url)
|
|
file_data = response.content
|
|
|
|
encoded_file = base64.b64encode(file_data).decode("utf-8")
|
|
url = f"data:application/pdf;base64,{encoded_file}"
|
|
|
|
return url
|
|
|
|
def test_completion_cost(self):
|
|
from litellm import completion_cost
|
|
|
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
|
litellm.model_cost = litellm.get_model_cost_map(url="")
|
|
|
|
litellm.set_verbose = True
|
|
response = litellm.completion(
|
|
**self.get_base_completion_call_args(),
|
|
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
|
)
|
|
cost = completion_cost(response)
|
|
|
|
assert cost > 0
|