LiteLLM Minor Fixes & Improvements (12/05/2024) (#7037)

* fix(together_ai/chat): only return response_format + tools for supported models

Fixes https://github.com/BerriAI/litellm/issues/6972

* feat(bedrock/rerank): initial working commit for bedrock rerank api support

Closes https://github.com/BerriAI/litellm/issues/7021

* feat(bedrock/rerank): async bedrock rerank api support

Addresses https://github.com/BerriAI/litellm/issues/7021

* build(model_prices_and_context_window.json): add 'supports_prompt_caching' for bedrock models + cleanup cross-region from model list (duplicate information - lead to inconsistencies )

* docs(json_mode.md): clarify model support for json schema

Closes https://github.com/BerriAI/litellm/issues/6998

* fix(_service_logger.py): handle dd callback in list

ensure failed spend tracking is logged to datadog

* feat(converse_transformation.py): translate from anthropic format to bedrock format

Closes https://github.com/BerriAI/litellm/issues/7030

* fix: fix linting errors

* test: fix test
This commit is contained in:
Krish Dholakia 2024-12-05 00:02:31 -08:00 committed by GitHub
parent 12dfd14b52
commit 61b35c12bb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
24 changed files with 858 additions and 400 deletions

View file

@ -38,76 +38,6 @@ def _usage_format_tests(usage: litellm.Usage):
assert usage.prompt_tokens > usage.prompt_tokens_details.cached_tokens
@pytest.mark.parametrize(
"model",
[
"anthropic/claude-3-5-sonnet-20240620",
# "openai/gpt-4o",
# "deepseek/deepseek-chat",
],
)
def test_prompt_caching_model(model):
try:
for _ in range(2):
response = litellm.completion(
model=model,
messages=[
# System Message
{
"role": "system",
"content": [
{
"type": "text",
"text": "Here is the full text of a complex legal agreement"
* 400,
"cache_control": {"type": "ephemeral"},
}
],
},
# marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache.
{
"role": "user",
"content": [
{
"type": "text",
"text": "What are the key terms and conditions in this agreement?",
"cache_control": {"type": "ephemeral"},
}
],
},
{
"role": "assistant",
"content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo",
},
# The final turn is marked with cache-control, for continuing in followups.
{
"role": "user",
"content": [
{
"type": "text",
"text": "What are the key terms and conditions in this agreement?",
"cache_control": {"type": "ephemeral"},
}
],
},
],
temperature=0.2,
max_tokens=10,
)
_usage_format_tests(response.usage)
print("response=", response)
print("response.usage=", response.usage)
_usage_format_tests(response.usage)
assert "prompt_tokens_details" in response.usage
assert response.usage.prompt_tokens_details.cached_tokens > 0
except litellm.InternalServerError:
pass
def test_supports_prompt_caching():
from litellm.utils import supports_prompt_caching