forked from phoenix/litellm-mirror
* fix(utils.py): add logprobs support for together ai Fixes https://github.com/BerriAI/litellm/issues/6724 * feat(pass_through_endpoints/): add anthropic/ pass-through endpoint adds new `anthropic/` pass-through endpoint + refactors docs * feat(spend_management_endpoints.py): allow /global/spend/report to query team + customer id enables seeing spend for a customer in a team * Add integration with MLflow Tracing (#6147) * Add MLflow logger Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> * Streaming handling Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> * lint Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> * address comments and fix issues Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> * address comments and fix issues Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> * Move logger construction code Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> * Add docs Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> * async handlers Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> * new picture Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> --------- Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> * fix(mlflow.py): fix ruff linting errors * ci(config.yml): add mlflow to ci testing * fix: fix test * test: fix test * Litellm key update fix (#6710) * fix(caching): convert arg to equivalent kwargs in llm caching handler prevent unexpected errors * fix(caching_handler.py): don't pass args to caching * fix(caching): remove all *args from caching.py * fix(caching): consistent function signatures + abc method * test(caching_unit_tests.py): add unit tests for llm caching ensures coverage for common caching scenarios across different implementations * refactor(litellm_logging.py): move to using cache key from hidden params instead of regenerating one * fix(router.py): drop redis password requirement * fix(proxy_server.py): fix faulty slack alerting check * fix(langfuse.py): avoid copying functions/thread lock objects in metadata fixes metadata copy error when parent otel span in metadata * test: update test * fix(key_management_endpoints.py): fix /key/update with metadata update * fix(key_management_endpoints.py): fix key_prepare_update helper * fix(key_management_endpoints.py): reset value to none if set in key update * fix: update test ' * Litellm dev 11 11 2024 (#6693) * fix(__init__.py): add 'watsonx_text' as mapped llm api route Fixes https://github.com/BerriAI/litellm/issues/6663 * fix(opentelemetry.py): fix passing parallel tool calls to otel Fixes https://github.com/BerriAI/litellm/issues/6677 * refactor(test_opentelemetry_unit_tests.py): create a base set of unit tests for all logging integrations - test for parallel tool call handling reduces bugs in repo * fix(__init__.py): update provider-model mapping to include all known provider-model mappings Fixes https://github.com/BerriAI/litellm/issues/6669 * feat(anthropic): support passing document in llm api call * docs(anthropic.md): add pdf anthropic call to docs + expose new 'supports_pdf_input' function * fix(factory.py): fix linting error * add clear doc string for GCS bucket logging * Add docs to export logs to Laminar (#6674) * Add docs to export logs to Laminar * minor fix: newline at end of file * place laminar after http and grpc * (Feat) Add langsmith key based logging (#6682) * add langsmith_api_key to StandardCallbackDynamicParams * create a file for langsmith types * langsmith add key / team based logging * add key based logging for langsmith * fix langsmith key based logging * fix linting langsmith * remove NOQA violation * add unit test coverage for all helpers in test langsmith * test_langsmith_key_based_logging * docs langsmith key based logging * run langsmith tests in logging callback tests * fix logging testing * test_langsmith_key_based_logging * test_add_callback_via_key_litellm_pre_call_utils_langsmith * add debug statement langsmith key based logging * test_langsmith_key_based_logging * (fix) OpenAI's optional messages[].name does not work with Mistral API (#6701) * use helper for _transform_messages mistral * add test_message_with_name to base LLMChat test * fix linting * add xAI on Admin UI (#6680) * (docs) add benchmarks on 1K RPS (#6704) * docs litellm proxy benchmarks * docs GCS bucket * doc fix - reduce clutter on logging doc title * (feat) add cost tracking stable diffusion 3 on Bedrock (#6676) * add cost tracking for sd3 * test_image_generation_bedrock * fix get model info for image cost * add cost_calculator for stability 1 models * add unit testing for bedrock image cost calc * test_cost_calculator_with_no_optional_params * add test_cost_calculator_basic * correctly allow size Optional * fix cost_calculator * sd3 unit tests cost calc * fix raise correct error 404 when /key/info is called on non-existent key (#6653) * fix raise correct error on /key/info * add not_found_error error * fix key not found in DB error * use 1 helper for checking token hash * fix error code on key info * fix test key gen prisma * test_generate_and_call_key_info * test fix test_call_with_valid_model_using_all_models * fix key info tests * bump: version 1.52.4 → 1.52.5 * add defaults used for GCS logging * LiteLLM Minor Fixes & Improvements (11/12/2024) (#6705) * fix(caching): convert arg to equivalent kwargs in llm caching handler prevent unexpected errors * fix(caching_handler.py): don't pass args to caching * fix(caching): remove all *args from caching.py * fix(caching): consistent function signatures + abc method * test(caching_unit_tests.py): add unit tests for llm caching ensures coverage for common caching scenarios across different implementations * refactor(litellm_logging.py): move to using cache key from hidden params instead of regenerating one * fix(router.py): drop redis password requirement * fix(proxy_server.py): fix faulty slack alerting check * fix(langfuse.py): avoid copying functions/thread lock objects in metadata fixes metadata copy error when parent otel span in metadata * test: update test * bump: version 1.52.5 → 1.52.6 * (feat) helm hook to sync db schema (#6715) * v0 migration job * fix job * fix migrations job.yml * handle standalone DB on helm hook * fix argo cd annotations * fix db migration helm hook * fix migration job * doc fix Using Http/2 with Hypercorn * (fix proxy redis) Add redis sentinel support (#6154) * add sentinel_password support * add doc for setting redis sentinel password * fix redis sentinel - use sentinel password * Fix: Update gpt-4o costs to that of gpt-4o-2024-08-06 (#6714) Fixes #6713 * (fix) using Anthropic `response_format={"type": "json_object"}` (#6721) * add support for response_format=json anthropic * add test_json_response_format to baseLLM ChatTest * fix test_litellm_anthropic_prompt_caching_tools * fix test_anthropic_function_call_with_no_schema * test test_create_json_tool_call_for_response_format * (feat) Add cost tracking for Azure Dall-e-3 Image Generation + use base class to ensure basic image generation tests pass (#6716) * add BaseImageGenTest * use 1 class for unit testing * add debugging to BaseImageGenTest * TestAzureOpenAIDalle3 * fix response_cost_calculator * test_basic_image_generation * fix img gen basic test * fix _select_model_name_for_cost_calc * fix test_aimage_generation_bedrock_with_optional_params * fix undo changes cost tracking * fix response_cost_calculator * fix test_cost_azure_gpt_35 * fix remove dup test (#6718) * (build) update db helm hook * (build) helm db pre sync hook * (build) helm db sync hook * test: run test_team_logging firdst --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Dinmukhamed Mailibay <47117969+dinmukhamedm@users.noreply.github.com> Co-authored-by: Kilian Lieret <kilian.lieret@posteo.de> * test: update test * test: skip anthropic overloaded error * test: cleanup test * test: update tests * test: fix test * test: handle gemini overloaded model error * test: handle internal server error * test: handle anthropic overloaded error * test: handle claude instability --------- Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> Co-authored-by: Yuki Watanabe <31463517+B-Step62@users.noreply.github.com> Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Dinmukhamed Mailibay <47117969+dinmukhamedm@users.noreply.github.com> Co-authored-by: Kilian Lieret <kilian.lieret@posteo.de>
281 lines
8.3 KiB
Python
281 lines
8.3 KiB
Python
import json
|
|
import os
|
|
import sys
|
|
from datetime import datetime
|
|
from unittest.mock import AsyncMock
|
|
|
|
from pydantic.main import Model
|
|
|
|
sys.path.insert(
|
|
0, os.path.abspath("../..")
|
|
) # Adds the parent directory to the system-path
|
|
|
|
import pytest
|
|
import litellm
|
|
from litellm.integrations.opentelemetry import OpenTelemetry, OpenTelemetryConfig, Span
|
|
import asyncio
|
|
import logging
|
|
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
|
|
from litellm._logging import verbose_logger
|
|
|
|
|
|
verbose_logger.setLevel(logging.DEBUG)
|
|
|
|
EXPECTED_SPAN_NAMES = ["litellm_request", "raw_gen_ai_request"]
|
|
exporter = InMemorySpanExporter()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.parametrize("streaming", [True, False])
|
|
async def test_async_otel_callback(streaming):
|
|
litellm.set_verbose = True
|
|
|
|
litellm.callbacks = [OpenTelemetry(config=OpenTelemetryConfig(exporter=exporter))]
|
|
|
|
response = await litellm.acompletion(
|
|
model="gpt-3.5-turbo",
|
|
messages=[{"role": "user", "content": "hi"}],
|
|
temperature=0.1,
|
|
user="OTEL_USER",
|
|
stream=streaming,
|
|
)
|
|
|
|
if streaming is True:
|
|
async for chunk in response:
|
|
print("chunk", chunk)
|
|
|
|
await asyncio.sleep(4)
|
|
spans = exporter.get_finished_spans()
|
|
print("spans", spans)
|
|
assert len(spans) == 2
|
|
|
|
_span_names = [span.name for span in spans]
|
|
print("recorded span names", _span_names)
|
|
assert set(_span_names) == set(EXPECTED_SPAN_NAMES)
|
|
|
|
# print the value of a span
|
|
for span in spans:
|
|
print("span name", span.name)
|
|
print("span attributes", span.attributes)
|
|
|
|
if span.name == "litellm_request":
|
|
validate_litellm_request(span)
|
|
# Additional specific checks
|
|
assert span._attributes["gen_ai.request.model"] == "gpt-3.5-turbo"
|
|
assert span._attributes["gen_ai.system"] == "openai"
|
|
assert span._attributes["gen_ai.request.temperature"] == 0.1
|
|
assert span._attributes["llm.is_streaming"] == str(streaming)
|
|
assert span._attributes["llm.user"] == "OTEL_USER"
|
|
elif span.name == "raw_gen_ai_request":
|
|
if streaming is True:
|
|
validate_raw_gen_ai_request_openai_streaming(span)
|
|
else:
|
|
validate_raw_gen_ai_request_openai_non_streaming(span)
|
|
|
|
# clear in memory exporter
|
|
exporter.clear()
|
|
|
|
|
|
def validate_litellm_request(span):
|
|
expected_attributes = [
|
|
"gen_ai.request.model",
|
|
"gen_ai.system",
|
|
"gen_ai.request.temperature",
|
|
"llm.is_streaming",
|
|
"llm.user",
|
|
"gen_ai.response.id",
|
|
"gen_ai.response.model",
|
|
"llm.usage.total_tokens",
|
|
"gen_ai.usage.completion_tokens",
|
|
"gen_ai.usage.prompt_tokens",
|
|
]
|
|
|
|
# get the str of all the span attributes
|
|
print("span attributes", span._attributes)
|
|
|
|
for attr in expected_attributes:
|
|
value = span._attributes[attr]
|
|
print("value", value)
|
|
assert value is not None, f"Attribute {attr} has None value"
|
|
|
|
|
|
def validate_raw_gen_ai_request_openai_non_streaming(span):
|
|
expected_attributes = [
|
|
"llm.openai.messages",
|
|
"llm.openai.temperature",
|
|
"llm.openai.user",
|
|
"llm.openai.extra_body",
|
|
"llm.openai.id",
|
|
"llm.openai.choices",
|
|
"llm.openai.created",
|
|
"llm.openai.model",
|
|
"llm.openai.object",
|
|
"llm.openai.service_tier",
|
|
"llm.openai.system_fingerprint",
|
|
"llm.openai.usage",
|
|
]
|
|
|
|
print("span attributes", span._attributes)
|
|
for attr in span._attributes:
|
|
print(attr)
|
|
|
|
for attr in expected_attributes:
|
|
assert span._attributes[attr] is not None, f"Attribute {attr} has None"
|
|
|
|
|
|
def validate_raw_gen_ai_request_openai_streaming(span):
|
|
expected_attributes = [
|
|
"llm.openai.messages",
|
|
"llm.openai.temperature",
|
|
"llm.openai.user",
|
|
"llm.openai.extra_body",
|
|
"llm.openai.model",
|
|
]
|
|
|
|
print("span attributes", span._attributes)
|
|
for attr in span._attributes:
|
|
print(attr)
|
|
|
|
for attr in expected_attributes:
|
|
assert span._attributes[attr] is not None, f"Attribute {attr} has None"
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"model",
|
|
["anthropic/claude-3-opus-20240229"],
|
|
)
|
|
@pytest.mark.flaky(retries=6, delay=2)
|
|
def test_completion_claude_3_function_call_with_otel(model):
|
|
litellm.set_verbose = True
|
|
|
|
litellm.callbacks = [OpenTelemetry(config=OpenTelemetryConfig(exporter=exporter))]
|
|
tools = [
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "get_current_weather",
|
|
"description": "Get the current weather in a given location",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"location": {
|
|
"type": "string",
|
|
"description": "The city and state, e.g. San Francisco, CA",
|
|
},
|
|
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
|
|
},
|
|
"required": ["location"],
|
|
},
|
|
},
|
|
}
|
|
]
|
|
messages = [
|
|
{
|
|
"role": "user",
|
|
"content": "What's the weather like in Boston today in Fahrenheit?",
|
|
}
|
|
]
|
|
try:
|
|
# test without max tokens
|
|
response = litellm.completion(
|
|
model=model,
|
|
messages=messages,
|
|
tools=tools,
|
|
tool_choice={
|
|
"type": "function",
|
|
"function": {"name": "get_current_weather"},
|
|
},
|
|
drop_params=True,
|
|
)
|
|
|
|
print("response from LiteLLM", response)
|
|
except litellm.InternalServerError:
|
|
pass
|
|
except Exception as e:
|
|
pytest.fail(f"Error occurred: {e}")
|
|
finally:
|
|
# clear in memory exporter
|
|
exporter.clear()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.parametrize("streaming", [True, False])
|
|
@pytest.mark.parametrize("global_redact", [True, False])
|
|
async def test_awesome_otel_with_message_logging_off(streaming, global_redact):
|
|
"""
|
|
No content should be logged when message logging is off
|
|
|
|
tests when litellm.turn_off_message_logging is set to True
|
|
tests when OpenTelemetry(message_logging=False) is set
|
|
"""
|
|
litellm.set_verbose = True
|
|
litellm.callbacks = [OpenTelemetry(config=OpenTelemetryConfig(exporter=exporter))]
|
|
if global_redact is False:
|
|
otel_logger = OpenTelemetry(
|
|
message_logging=False, config=OpenTelemetryConfig(exporter="console")
|
|
)
|
|
else:
|
|
# use global redaction
|
|
litellm.turn_off_message_logging = True
|
|
otel_logger = OpenTelemetry(config=OpenTelemetryConfig(exporter="console"))
|
|
|
|
litellm.callbacks = [otel_logger]
|
|
litellm.success_callback = []
|
|
litellm.failure_callback = []
|
|
|
|
response = await litellm.acompletion(
|
|
model="gpt-3.5-turbo",
|
|
messages=[{"role": "user", "content": "hi"}],
|
|
mock_response="hi",
|
|
stream=streaming,
|
|
)
|
|
print("response", response)
|
|
|
|
if streaming is True:
|
|
async for chunk in response:
|
|
print("chunk", chunk)
|
|
|
|
await asyncio.sleep(1)
|
|
spans = exporter.get_finished_spans()
|
|
print("spans", spans)
|
|
assert len(spans) == 1
|
|
|
|
_span = spans[0]
|
|
print("span attributes", _span.attributes)
|
|
|
|
validate_redacted_message_span_attributes(_span)
|
|
|
|
# clear in memory exporter
|
|
exporter.clear()
|
|
|
|
if global_redact is True:
|
|
litellm.turn_off_message_logging = False
|
|
|
|
|
|
def validate_redacted_message_span_attributes(span):
|
|
expected_attributes = [
|
|
"gen_ai.request.model",
|
|
"gen_ai.system",
|
|
"llm.is_streaming",
|
|
"gen_ai.response.id",
|
|
"gen_ai.response.model",
|
|
"llm.usage.total_tokens",
|
|
"gen_ai.usage.completion_tokens",
|
|
"gen_ai.usage.prompt_tokens",
|
|
"metadata.user_api_key_hash",
|
|
"metadata.requester_ip_address",
|
|
"metadata.user_api_key_team_alias",
|
|
"metadata.requester_metadata",
|
|
"metadata.user_api_key_team_id",
|
|
"metadata.spend_logs_metadata",
|
|
"metadata.user_api_key_alias",
|
|
"metadata.user_api_key_user_id",
|
|
"metadata.user_api_key_org_id",
|
|
]
|
|
|
|
_all_attributes = set([name for name in span.attributes.keys()])
|
|
print("all_attributes", _all_attributes)
|
|
|
|
assert _all_attributes == set(expected_attributes)
|
|
|
|
pass
|