mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 03:34:10 +00:00
* fix(cost_calculator.py): move to using `.get_model_info()` for cost per token calculations ensures cost tracking is reliable - handles edge cases of parsing model cost map * build(model_prices_and_context_window.json): add 'supports_response_schema' for select tgai models Fixes https://github.com/BerriAI/litellm/pull/7037#discussion_r1872157329 * build(model_prices_and_context_window.json): remove 'pdf input' and 'vision' support from nova micro in model map Bedrock docs indicate no support for micro - https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference-supported-models-features.html * fix(converse_transformation.py): support amazon nova tool use * fix(opentelemetry): Add missing LLM request type attribute to spans (#7041) * feat(opentelemetry): add LLM request type attribute to spans * lint * fix: curl usage (#7038) curl -d, --data <data> is lowercase d curl -D, --dump-header <filename> is uppercase D references: https://curl.se/docs/manpage.html#-d https://curl.se/docs/manpage.html#-D * fix(spend_tracking.py): handle empty 'id' in model response - when creating spend log Fixes https://github.com/BerriAI/litellm/issues/7023 * fix(streaming_chunk_builder.py): handle initial id being empty string Fixes https://github.com/BerriAI/litellm/issues/7023 * fix(anthropic_passthrough_logging_handler.py): add end user cost tracking for anthropic pass through endpoint * docs(pass_through/): refactor docs location + add table on supported features for pass through endpoints * feat(anthropic_passthrough_logging_handler.py): support end user cost tracking via anthropic sdk * docs(anthropic_completion.md): add docs on passing end user param for cost tracking on anthropic sdk * fix(litellm_logging.py): use standard logging payload if present in kwargs prevent datadog logging error for pass through endpoints * docs(bedrock.md): add rerank api usage example to docs * bugfix/change dummy tool name format (#7053) * fix viewing keys (#7042) * ui new build * build(model_prices_and_context_window.json): add bedrock region models to model cost map (#7044) * bye (#6982) * (fix) litellm router.aspeech (#6962) * doc Migrating Databases * fix aspeech on router * test_audio_speech_router * test_audio_speech_router * docs show supported providers on batches api doc * change dummy tool name format --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Krish Dholakia <krrishdholakia@gmail.com> Co-authored-by: yujonglee <yujonglee.dev@gmail.com> * fix: fix linting errors * test: update test * fix(litellm_logging.py): fix pass through check * fix(test_otel_logging.py): fix test * fix(cost_calculator.py): update handling for cost per second * fix(cost_calculator.py): fix cost check * test: fix test * (fix) adding public routes when using custom header (#7045) * get_api_key_from_custom_header * add test_get_api_key_from_custom_header * fix testing use 1 file for test user api key auth * fix test user api key auth * test_custom_api_key_header_name * build: update ui build --------- Co-authored-by: Doron Kopit <83537683+doronkopit5@users.noreply.github.com> Co-authored-by: lloydchang <lloydchang@gmail.com> Co-authored-by: hgulersen <haymigulersen@gmail.com> Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: yujonglee <yujonglee.dev@gmail.com>
290 lines
8.6 KiB
Python
290 lines
8.6 KiB
Python
import json
|
|
import os
|
|
import sys
|
|
from datetime import datetime
|
|
from unittest.mock import AsyncMock
|
|
|
|
from pydantic.main import Model
|
|
|
|
sys.path.insert(
|
|
0, os.path.abspath("../..")
|
|
) # Adds the parent directory to the system-path
|
|
|
|
import pytest
|
|
import litellm
|
|
from litellm.integrations.opentelemetry import OpenTelemetry, OpenTelemetryConfig, Span
|
|
import asyncio
|
|
import logging
|
|
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
|
|
from litellm._logging import verbose_logger
|
|
from litellm.proxy._types import SpanAttributes
|
|
|
|
verbose_logger.setLevel(logging.DEBUG)
|
|
|
|
EXPECTED_SPAN_NAMES = ["litellm_request", "raw_gen_ai_request"]
|
|
exporter = InMemorySpanExporter()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.parametrize("streaming", [True, False])
|
|
async def test_async_otel_callback(streaming):
|
|
litellm.set_verbose = True
|
|
|
|
litellm.callbacks = [OpenTelemetry(config=OpenTelemetryConfig(exporter=exporter))]
|
|
|
|
response = await litellm.acompletion(
|
|
model="gpt-3.5-turbo",
|
|
messages=[{"role": "user", "content": "hi"}],
|
|
temperature=0.1,
|
|
user="OTEL_USER",
|
|
stream=streaming,
|
|
)
|
|
|
|
if streaming is True:
|
|
async for chunk in response:
|
|
print("chunk", chunk)
|
|
|
|
await asyncio.sleep(4)
|
|
spans = exporter.get_finished_spans()
|
|
print("spans", spans)
|
|
assert len(spans) == 2
|
|
|
|
_span_names = [span.name for span in spans]
|
|
print("recorded span names", _span_names)
|
|
assert set(_span_names) == set(EXPECTED_SPAN_NAMES)
|
|
|
|
# print the value of a span
|
|
for span in spans:
|
|
print("span name", span.name)
|
|
print("span attributes", span.attributes)
|
|
|
|
if span.name == "litellm_request":
|
|
validate_litellm_request(span)
|
|
# Additional specific checks
|
|
assert span._attributes["gen_ai.request.model"] == "gpt-3.5-turbo"
|
|
assert span._attributes["gen_ai.system"] == "openai"
|
|
assert span._attributes["gen_ai.request.temperature"] == 0.1
|
|
assert span._attributes["llm.is_streaming"] == str(streaming)
|
|
assert span._attributes["llm.user"] == "OTEL_USER"
|
|
elif span.name == "raw_gen_ai_request":
|
|
if streaming is True:
|
|
validate_raw_gen_ai_request_openai_streaming(span)
|
|
else:
|
|
validate_raw_gen_ai_request_openai_non_streaming(span)
|
|
|
|
# clear in memory exporter
|
|
exporter.clear()
|
|
|
|
|
|
def validate_litellm_request(span):
|
|
expected_attributes = [
|
|
"gen_ai.request.model",
|
|
"gen_ai.system",
|
|
"gen_ai.request.temperature",
|
|
"llm.is_streaming",
|
|
"llm.user",
|
|
"gen_ai.response.id",
|
|
"gen_ai.response.model",
|
|
"llm.usage.total_tokens",
|
|
"gen_ai.usage.completion_tokens",
|
|
"gen_ai.usage.prompt_tokens",
|
|
]
|
|
|
|
# get the str of all the span attributes
|
|
print("span attributes", span._attributes)
|
|
|
|
for attr in expected_attributes:
|
|
value = span._attributes[attr]
|
|
print("value", value)
|
|
assert value is not None, f"Attribute {attr} has None value"
|
|
|
|
|
|
def validate_raw_gen_ai_request_openai_non_streaming(span):
|
|
expected_attributes = [
|
|
"llm.openai.messages",
|
|
"llm.openai.temperature",
|
|
"llm.openai.user",
|
|
"llm.openai.extra_body",
|
|
"llm.openai.id",
|
|
"llm.openai.choices",
|
|
"llm.openai.created",
|
|
"llm.openai.model",
|
|
"llm.openai.object",
|
|
"llm.openai.service_tier",
|
|
"llm.openai.system_fingerprint",
|
|
"llm.openai.usage",
|
|
]
|
|
|
|
print("span attributes", span._attributes)
|
|
for attr in span._attributes:
|
|
print(attr)
|
|
|
|
for attr in expected_attributes:
|
|
assert span._attributes[attr] is not None, f"Attribute {attr} has None"
|
|
|
|
|
|
def validate_raw_gen_ai_request_openai_streaming(span):
|
|
expected_attributes = [
|
|
"llm.openai.messages",
|
|
"llm.openai.temperature",
|
|
"llm.openai.user",
|
|
"llm.openai.extra_body",
|
|
"llm.openai.model",
|
|
]
|
|
|
|
print("span attributes", span._attributes)
|
|
for attr in span._attributes:
|
|
print(attr)
|
|
|
|
for attr in expected_attributes:
|
|
assert span._attributes[attr] is not None, f"Attribute {attr} has None"
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"model",
|
|
["anthropic/claude-3-opus-20240229"],
|
|
)
|
|
@pytest.mark.flaky(retries=6, delay=2)
|
|
def test_completion_claude_3_function_call_with_otel(model):
|
|
litellm.set_verbose = True
|
|
|
|
litellm.callbacks = [OpenTelemetry(config=OpenTelemetryConfig(exporter=exporter))]
|
|
tools = [
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "get_current_weather",
|
|
"description": "Get the current weather in a given location",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"location": {
|
|
"type": "string",
|
|
"description": "The city and state, e.g. San Francisco, CA",
|
|
},
|
|
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
|
|
},
|
|
"required": ["location"],
|
|
},
|
|
},
|
|
}
|
|
]
|
|
messages = [
|
|
{
|
|
"role": "user",
|
|
"content": "What's the weather like in Boston today in Fahrenheit?",
|
|
}
|
|
]
|
|
try:
|
|
# test without max tokens
|
|
response = litellm.completion(
|
|
model=model,
|
|
messages=messages,
|
|
tools=tools,
|
|
tool_choice={
|
|
"type": "function",
|
|
"function": {"name": "get_current_weather"},
|
|
},
|
|
drop_params=True,
|
|
)
|
|
|
|
print("response from LiteLLM", response)
|
|
except litellm.InternalServerError:
|
|
pass
|
|
except Exception as e:
|
|
pytest.fail(f"Error occurred: {e}")
|
|
finally:
|
|
# clear in memory exporter
|
|
exporter.clear()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.parametrize("streaming", [True, False])
|
|
@pytest.mark.parametrize("global_redact", [True, False])
|
|
async def test_awesome_otel_with_message_logging_off(streaming, global_redact):
|
|
"""
|
|
No content should be logged when message logging is off
|
|
|
|
tests when litellm.turn_off_message_logging is set to True
|
|
tests when OpenTelemetry(message_logging=False) is set
|
|
"""
|
|
litellm.set_verbose = True
|
|
litellm.callbacks = [OpenTelemetry(config=OpenTelemetryConfig(exporter=exporter))]
|
|
if global_redact is False:
|
|
otel_logger = OpenTelemetry(
|
|
message_logging=False, config=OpenTelemetryConfig(exporter="console")
|
|
)
|
|
else:
|
|
# use global redaction
|
|
litellm.turn_off_message_logging = True
|
|
otel_logger = OpenTelemetry(config=OpenTelemetryConfig(exporter="console"))
|
|
|
|
litellm.callbacks = [otel_logger]
|
|
litellm.success_callback = []
|
|
litellm.failure_callback = []
|
|
|
|
response = await litellm.acompletion(
|
|
model="gpt-3.5-turbo",
|
|
messages=[{"role": "user", "content": "hi"}],
|
|
mock_response="hi",
|
|
stream=streaming,
|
|
)
|
|
print("response", response)
|
|
|
|
if streaming is True:
|
|
async for chunk in response:
|
|
print("chunk", chunk)
|
|
|
|
await asyncio.sleep(1)
|
|
spans = exporter.get_finished_spans()
|
|
print("spans", spans)
|
|
assert len(spans) == 1
|
|
|
|
_span = spans[0]
|
|
print("span attributes", _span.attributes)
|
|
|
|
validate_redacted_message_span_attributes(_span)
|
|
|
|
# clear in memory exporter
|
|
exporter.clear()
|
|
|
|
if global_redact is True:
|
|
litellm.turn_off_message_logging = False
|
|
|
|
|
|
def validate_redacted_message_span_attributes(span):
|
|
expected_attributes = [
|
|
"gen_ai.request.model",
|
|
"gen_ai.system",
|
|
"llm.is_streaming",
|
|
"llm.request.type",
|
|
"gen_ai.response.id",
|
|
"gen_ai.response.model",
|
|
"llm.usage.total_tokens",
|
|
"gen_ai.usage.completion_tokens",
|
|
"gen_ai.usage.prompt_tokens",
|
|
"metadata.user_api_key_hash",
|
|
"metadata.requester_ip_address",
|
|
"metadata.user_api_key_team_alias",
|
|
"metadata.requester_metadata",
|
|
"metadata.user_api_key_team_id",
|
|
"metadata.spend_logs_metadata",
|
|
"metadata.user_api_key_alias",
|
|
"metadata.user_api_key_user_id",
|
|
"metadata.user_api_key_org_id",
|
|
]
|
|
|
|
_all_attributes = set(
|
|
[
|
|
name.value if isinstance(name, SpanAttributes) else name
|
|
for name in span.attributes.keys()
|
|
]
|
|
)
|
|
print("all_attributes", _all_attributes)
|
|
|
|
for attr in _all_attributes:
|
|
print(f"attr: {attr}, type: {type(attr)}")
|
|
|
|
assert _all_attributes == set(expected_attributes)
|
|
|
|
pass
|