forked from phoenix/litellm-mirror
* LiteLLM Minor Fixes & Improvements (09/26/2024) (#5925)
* fix(litellm_logging.py): don't initialize prometheus_logger if non premium user
Prevents bad error messages in logs
Fixes https://github.com/BerriAI/litellm/issues/5897
* Add Support for Custom Providers in Vision and Function Call Utils (#5688)
* Add Support for Custom Providers in Vision and Function Call Utils Lookup
* Remove parallel function call due to missing model info param
* Add Unit Tests for Vision and Function Call Changes
* fix-#5920: set header value to string to fix "'int' object has no att… (#5922)
* LiteLLM Minor Fixes & Improvements (09/24/2024) (#5880)
* LiteLLM Minor Fixes & Improvements (09/23/2024) (#5842)
* feat(auth_utils.py): enable admin to allow client-side credentials to be passed
Makes it easier for devs to experiment with finetuned fireworks ai models
* feat(router.py): allow setting configurable_clientside_auth_params for a model
Closes https://github.com/BerriAI/litellm/issues/5843
* build(model_prices_and_context_window.json): fix anthropic claude-3-5-sonnet max output token limit
Fixes https://github.com/BerriAI/litellm/issues/5850
* fix(azure_ai/): support content list for azure ai
Fixes https://github.com/BerriAI/litellm/issues/4237
* fix(litellm_logging.py): always set saved_cache_cost
Set to 0 by default
* fix(fireworks_ai/cost_calculator.py): add fireworks ai default pricing
handles calling 405b+ size models
* fix(slack_alerting.py): fix error alerting for failed spend tracking
Fixes regression with slack alerting error monitoring
* fix(vertex_and_google_ai_studio_gemini.py): handle gemini no candidates in streaming chunk error
* docs(bedrock.md): add llama3-1 models
* test: fix tests
* fix(azure_ai/chat): fix transformation for azure ai calls
* feat(azure_ai/embed): Add azure ai embeddings support
Closes https://github.com/BerriAI/litellm/issues/5861
* fix(azure_ai/embed): enable async embedding
* feat(azure_ai/embed): support azure ai multimodal embeddings
* fix(azure_ai/embed): support async multi modal embeddings
* feat(together_ai/embed): support together ai embedding calls
* feat(rerank/main.py): log source documents for rerank endpoints to langfuse
improves rerank endpoint logging
* fix(langfuse.py): support logging `/audio/speech` input to langfuse
* test(test_embedding.py): fix test
* test(test_completion_cost.py): fix helper util
* fix-#5920: set header value to string to fix "'int' object has no attribute 'encode'"
---------
Co-authored-by: Krish Dholakia <krrishdholakia@gmail.com>
* Revert "fix-#5920: set header value to string to fix "'int' object has no att…" (#5926)
This reverts commit a554ae2695
.
* build(model_prices_and_context_window.json): add azure ai cohere rerank model pricing
Enables cost tracking for azure ai cohere rerank models
* fix(litellm_logging.py): fix debug log to be clearer
Closes https://github.com/BerriAI/litellm/issues/5909
* test(test_utils.py): fix test name
* fix(azure_ai/cost_calculator.py): support cost tracking for azure ai rerank models
* fix(azure_ai): fix azure ai base model cost tracking for rerank endpoints
* fix(converse_handler.py): support new llama 3-2 models
Fixes https://github.com/BerriAI/litellm/issues/5901
* fix(litellm_logging.py): ensure response is redacted for standard message logging
Fixes https://github.com/BerriAI/litellm/issues/5890#issuecomment-2378242360
* fix(cost_calculator.py): use 'get_model_info' for cohere rerank cost calculation
allows user to set custom cost for model
* fix(config.yml): fix docker hub auht
* build(config.yml): add docker auth to all tests
* fix(db/create_views.py): fix linting error
* fix(main.py): fix circular import
* fix(azure_ai/__init__.py): fix circular import
* fix(main.py): fix import
* fix: fix linting errors
* test: fix test
* fix(proxy_server.py): pass premium user value on startup
used for prometheus init
---------
Co-authored-by: Cole Murray <colemurray.cs@gmail.com>
Co-authored-by: bravomark <62681807+bravomark@users.noreply.github.com>
* handle streaming for azure ai studio error
* [Perf Proxy] parallel request limiter - use one cache update call (#5932)
* fix parallel request limiter - use one cache update call
* ci/cd run again
* run ci/cd again
* use docker username password
* fix config.yml
* fix config
* fix config
* fix config.yml
* ci/cd run again
* use correct typing for batch set cache
* fix async_set_cache_pipeline
* fix only check user id tpm / rpm limits when limits set
* fix test_openai_azure_embedding_with_oidc_and_cf
* test: fix test
* test(test_rerank.py): fix test
---------
Co-authored-by: Cole Murray <colemurray.cs@gmail.com>
Co-authored-by: bravomark <62681807+bravomark@users.noreply.github.com>
Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>
167 lines
5.1 KiB
Python
167 lines
5.1 KiB
Python
import io
|
|
import os
|
|
import sys
|
|
|
|
sys.path.insert(0, os.path.abspath("../.."))
|
|
|
|
import asyncio
|
|
import logging
|
|
import uuid
|
|
|
|
import pytest
|
|
from prometheus_client import REGISTRY, CollectorRegistry
|
|
|
|
import litellm
|
|
from litellm import completion
|
|
from litellm._logging import verbose_logger
|
|
from litellm.integrations.prometheus import PrometheusLogger
|
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
|
|
|
verbose_logger.setLevel(logging.DEBUG)
|
|
|
|
litellm.set_verbose = True
|
|
import time
|
|
|
|
|
|
@pytest.mark.skip(reason="duplicate test of logging with callbacks")
|
|
@pytest.mark.asyncio()
|
|
async def test_async_prometheus_success_logging():
|
|
from litellm.integrations.prometheus import PrometheusLogger
|
|
|
|
pl = PrometheusLogger()
|
|
run_id = str(uuid.uuid4())
|
|
|
|
litellm.set_verbose = True
|
|
litellm.callbacks = [pl]
|
|
|
|
response = await litellm.acompletion(
|
|
model="claude-instant-1.2",
|
|
messages=[{"role": "user", "content": "what llm are u"}],
|
|
max_tokens=10,
|
|
mock_response="hi",
|
|
temperature=0.2,
|
|
metadata={
|
|
"id": run_id,
|
|
"tags": ["tag1", "tag2"],
|
|
"user_api_key": "6eb81e014497d89f3cc1aa9da7c2b37bda6b7fea68e4b710d33d94201e68970c",
|
|
"user_api_key_alias": "ishaans-prometheus-key",
|
|
"user_api_end_user_max_budget": None,
|
|
"litellm_api_version": "1.40.19",
|
|
"global_max_parallel_requests": None,
|
|
"user_api_key_user_id": "admin",
|
|
"user_api_key_org_id": None,
|
|
"user_api_key_team_id": "dbe2f686-a686-4896-864a-4c3924458709",
|
|
"user_api_key_team_alias": "testing-team",
|
|
},
|
|
)
|
|
print(response)
|
|
await asyncio.sleep(3)
|
|
|
|
# get prometheus logger
|
|
test_prometheus_logger = pl
|
|
print("done with success request")
|
|
|
|
print(
|
|
"vars of test_prometheus_logger",
|
|
vars(test_prometheus_logger.litellm_requests_metric),
|
|
)
|
|
|
|
# Get the metrics
|
|
metrics = {}
|
|
for metric in REGISTRY.collect():
|
|
for sample in metric.samples:
|
|
metrics[sample.name] = sample.value
|
|
|
|
print("metrics from prometheus", metrics)
|
|
assert metrics["litellm_requests_metric_total"] == 1.0
|
|
assert metrics["litellm_total_tokens_total"] == 30.0
|
|
assert metrics["litellm_deployment_success_responses_total"] == 1.0
|
|
assert metrics["litellm_deployment_total_requests_total"] == 1.0
|
|
assert metrics["litellm_deployment_latency_per_output_token_bucket"] == 1.0
|
|
|
|
|
|
@pytest.mark.asyncio()
|
|
async def test_async_prometheus_success_logging_with_callbacks():
|
|
|
|
pl = PrometheusLogger()
|
|
|
|
run_id = str(uuid.uuid4())
|
|
litellm.set_verbose = True
|
|
|
|
litellm.success_callback = []
|
|
litellm.failure_callback = []
|
|
litellm.callbacks = [pl]
|
|
|
|
# Get initial metric values
|
|
initial_metrics = {}
|
|
for metric in REGISTRY.collect():
|
|
for sample in metric.samples:
|
|
initial_metrics[sample.name] = sample.value
|
|
|
|
response = await litellm.acompletion(
|
|
model="claude-instant-1.2",
|
|
messages=[{"role": "user", "content": "what llm are u"}],
|
|
max_tokens=10,
|
|
mock_response="hi",
|
|
temperature=0.2,
|
|
metadata={
|
|
"id": run_id,
|
|
"tags": ["tag1", "tag2"],
|
|
"user_api_key": "6eb81e014497d89f3cc1aa9da7c2b37bda6b7fea68e4b710d33d94201e68970c",
|
|
"user_api_key_alias": "ishaans-prometheus-key",
|
|
"user_api_end_user_max_budget": None,
|
|
"litellm_api_version": "1.40.19",
|
|
"global_max_parallel_requests": None,
|
|
"user_api_key_user_id": "admin",
|
|
"user_api_key_org_id": None,
|
|
"user_api_key_team_id": "dbe2f686-a686-4896-864a-4c3924458709",
|
|
"user_api_key_team_alias": "testing-team",
|
|
},
|
|
)
|
|
print(response)
|
|
await asyncio.sleep(3)
|
|
|
|
# get prometheus logger
|
|
test_prometheus_logger = pl
|
|
|
|
print("done with success request")
|
|
|
|
print(
|
|
"vars of test_prometheus_logger",
|
|
vars(test_prometheus_logger.litellm_requests_metric),
|
|
)
|
|
|
|
# Get the updated metrics
|
|
updated_metrics = {}
|
|
for metric in REGISTRY.collect():
|
|
for sample in metric.samples:
|
|
updated_metrics[sample.name] = sample.value
|
|
|
|
print("metrics from prometheus", updated_metrics)
|
|
|
|
# Assert the delta for each metric
|
|
assert (
|
|
updated_metrics["litellm_requests_metric_total"]
|
|
- initial_metrics.get("litellm_requests_metric_total", 0)
|
|
== 1.0
|
|
)
|
|
assert (
|
|
updated_metrics["litellm_total_tokens_total"]
|
|
- initial_metrics.get("litellm_total_tokens_total", 0)
|
|
== 30.0
|
|
)
|
|
assert (
|
|
updated_metrics["litellm_deployment_success_responses_total"]
|
|
- initial_metrics.get("litellm_deployment_success_responses_total", 0)
|
|
== 1.0
|
|
)
|
|
assert (
|
|
updated_metrics["litellm_deployment_total_requests_total"]
|
|
- initial_metrics.get("litellm_deployment_total_requests_total", 0)
|
|
== 1.0
|
|
)
|
|
assert (
|
|
updated_metrics["litellm_deployment_latency_per_output_token_bucket"]
|
|
- initial_metrics.get("litellm_deployment_latency_per_output_token_bucket", 0)
|
|
== 1.0
|
|
)
|