forked from phoenix/litellm-mirror
Merge branch 'main' into litellm_tag_routing_fixes
This commit is contained in:
commit
a6d3bd0ab7
9 changed files with 146 additions and 13 deletions
|
@ -104,7 +104,7 @@ spec:
|
|||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||
env:
|
||||
- name: HOST
|
||||
value: "::"
|
||||
value: "{{ .Values.listen | default "0.0.0.0" }}"
|
||||
- name: PORT
|
||||
value: {{ .Values.service.port | quote}}
|
||||
{{- if .Values.db.deployStandalone }}
|
||||
|
@ -138,8 +138,13 @@ spec:
|
|||
- name: DATABASE_NAME
|
||||
value: {{ .Values.db.database }}
|
||||
{{- end }}
|
||||
{{- if .Values.database.url }}
|
||||
- name: DATABASE_URL
|
||||
value: {{ .Values.database.url | quote }}
|
||||
{{- else }}
|
||||
- name: DATABASE_URL
|
||||
value: "postgresql://$(DATABASE_USERNAME):$(DATABASE_PASSWORD)@$(DATABASE_HOST)/$(DATABASE_NAME)"
|
||||
{{- end }}
|
||||
- name: PROXY_MASTER_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
|
@ -231,4 +236,4 @@ spec:
|
|||
{{- with .Values.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
|
@ -208,8 +208,8 @@ curl -X POST 'http://0.0.0.0:4000/key/generate' \
|
|||
-d '{
|
||||
"metadata": {
|
||||
"logging": [{
|
||||
"callback_name": "langfuse", # 'otel', 'langfuse', 'lunary'
|
||||
"callback_type": "success" # set, if required by integration - future improvement, have logging tools work for success + failure by default
|
||||
"callback_name": "langfuse", # "otel", "langfuse", "lunary"
|
||||
"callback_type": "success", # "success", "failure", "success_and_failure"
|
||||
"callback_vars": {
|
||||
"langfuse_public_key": "os.environ/LANGFUSE_PUBLIC_KEY", # [RECOMMENDED] reference key in proxy environment
|
||||
"langfuse_secret_key": "os.environ/LANGFUSE_SECRET_KEY", # [RECOMMENDED] reference key in proxy environment
|
||||
|
|
|
@ -641,7 +641,9 @@ class OpenTelemetry(CustomLogger):
|
|||
return BatchSpanProcessor(
|
||||
OTLPSpanExporterHTTP(
|
||||
endpoint=self.OTEL_ENDPOINT, headers=_split_otel_headers
|
||||
)
|
||||
),
|
||||
max_queue_size=100,
|
||||
max_export_batch_size=100,
|
||||
)
|
||||
elif self.OTEL_EXPORTER == "otlp_grpc":
|
||||
verbose_logger.debug(
|
||||
|
@ -651,7 +653,9 @@ class OpenTelemetry(CustomLogger):
|
|||
return BatchSpanProcessor(
|
||||
OTLPSpanExporterGRPC(
|
||||
endpoint=self.OTEL_ENDPOINT, headers=_split_otel_headers
|
||||
)
|
||||
),
|
||||
max_queue_size=100,
|
||||
max_export_batch_size=100,
|
||||
)
|
||||
else:
|
||||
verbose_logger.debug(
|
||||
|
|
|
@ -228,6 +228,16 @@ def initialize_callbacks_on_proxy(
|
|||
litellm.callbacks.extend(imported_list)
|
||||
else:
|
||||
litellm.callbacks = imported_list # type: ignore
|
||||
|
||||
if "prometheus" in value:
|
||||
from litellm.proxy.proxy_server import app
|
||||
|
||||
verbose_proxy_logger.debug("Starting Prometheus Metrics on /metrics")
|
||||
from prometheus_client import make_asgi_app
|
||||
|
||||
# Add prometheus asgi middleware to route /metrics requests
|
||||
metrics_app = make_asgi_app()
|
||||
app.mount("/metrics", metrics_app)
|
||||
else:
|
||||
litellm.callbacks = [
|
||||
get_instance_fn(
|
||||
|
|
|
@ -86,10 +86,11 @@ def convert_key_logging_metadata_to_callback(
|
|||
team_callback_settings_obj.success_callback = []
|
||||
if team_callback_settings_obj.failure_callback is None:
|
||||
team_callback_settings_obj.failure_callback = []
|
||||
|
||||
if data.callback_name not in team_callback_settings_obj.success_callback:
|
||||
team_callback_settings_obj.success_callback.append(data.callback_name)
|
||||
|
||||
if data.callback_name in team_callback_settings_obj.failure_callback:
|
||||
if data.callback_name not in team_callback_settings_obj.failure_callback:
|
||||
team_callback_settings_obj.failure_callback.append(data.callback_name)
|
||||
|
||||
for var, value in data.callback_vars.items():
|
||||
|
|
|
@ -14,9 +14,17 @@ model_list:
|
|||
id: "test-openai"
|
||||
|
||||
|
||||
|
||||
router_settings:
|
||||
enable_tag_filtering: True # 👈 Key Chang
|
||||
|
||||
|
||||
general_settings:
|
||||
master_key: sk-1234
|
||||
alerting: ["slack"]
|
||||
spend_report_frequency: "1d"
|
||||
|
||||
|
||||
litellm_settings:
|
||||
success_callback: ["prometheus"]
|
||||
failure_callback: ["prometheus"]
|
|
@ -3690,7 +3690,7 @@ class Router:
|
|||
exception=original_exception,
|
||||
)
|
||||
|
||||
allowed_fails = _allowed_fails or self.allowed_fails
|
||||
allowed_fails = _allowed_fails if _allowed_fails is not None else self.allowed_fails
|
||||
|
||||
dt = get_utc_datetime()
|
||||
current_minute = dt.strftime("%H-%M")
|
||||
|
|
|
@ -9,7 +9,7 @@ import logging
|
|||
import uuid
|
||||
|
||||
import pytest
|
||||
from prometheus_client import REGISTRY
|
||||
from prometheus_client import REGISTRY, CollectorRegistry
|
||||
|
||||
import litellm
|
||||
from litellm import completion
|
||||
|
@ -79,3 +79,91 @@ async def test_async_prometheus_success_logging():
|
|||
assert metrics["litellm_deployment_success_responses_total"] == 1.0
|
||||
assert metrics["litellm_deployment_total_requests_total"] == 1.0
|
||||
assert metrics["litellm_deployment_latency_per_output_token_bucket"] == 1.0
|
||||
|
||||
|
||||
@pytest.mark.asyncio()
|
||||
async def test_async_prometheus_success_logging_with_callbacks():
|
||||
run_id = str(uuid.uuid4())
|
||||
litellm.set_verbose = True
|
||||
|
||||
litellm.success_callback = []
|
||||
litellm.failure_callback = []
|
||||
litellm.callbacks = ["prometheus"]
|
||||
|
||||
# Get initial metric values
|
||||
initial_metrics = {}
|
||||
for metric in REGISTRY.collect():
|
||||
for sample in metric.samples:
|
||||
initial_metrics[sample.name] = sample.value
|
||||
|
||||
response = await litellm.acompletion(
|
||||
model="claude-instant-1.2",
|
||||
messages=[{"role": "user", "content": "what llm are u"}],
|
||||
max_tokens=10,
|
||||
mock_response="hi",
|
||||
temperature=0.2,
|
||||
metadata={
|
||||
"id": run_id,
|
||||
"tags": ["tag1", "tag2"],
|
||||
"user_api_key": "6eb81e014497d89f3cc1aa9da7c2b37bda6b7fea68e4b710d33d94201e68970c",
|
||||
"user_api_key_alias": "ishaans-prometheus-key",
|
||||
"user_api_end_user_max_budget": None,
|
||||
"litellm_api_version": "1.40.19",
|
||||
"global_max_parallel_requests": None,
|
||||
"user_api_key_user_id": "admin",
|
||||
"user_api_key_org_id": None,
|
||||
"user_api_key_team_id": "dbe2f686-a686-4896-864a-4c3924458709",
|
||||
"user_api_key_team_alias": "testing-team",
|
||||
},
|
||||
)
|
||||
print(response)
|
||||
await asyncio.sleep(3)
|
||||
|
||||
# get prometheus logger
|
||||
from litellm.litellm_core_utils.litellm_logging import _in_memory_loggers
|
||||
|
||||
for callback in _in_memory_loggers:
|
||||
if isinstance(callback, PrometheusLogger):
|
||||
test_prometheus_logger = callback
|
||||
|
||||
print("done with success request")
|
||||
|
||||
print(
|
||||
"vars of test_prometheus_logger",
|
||||
vars(test_prometheus_logger.litellm_requests_metric),
|
||||
)
|
||||
|
||||
# Get the updated metrics
|
||||
updated_metrics = {}
|
||||
for metric in REGISTRY.collect():
|
||||
for sample in metric.samples:
|
||||
updated_metrics[sample.name] = sample.value
|
||||
|
||||
print("metrics from prometheus", updated_metrics)
|
||||
|
||||
# Assert the delta for each metric
|
||||
assert (
|
||||
updated_metrics["litellm_requests_metric_total"]
|
||||
- initial_metrics.get("litellm_requests_metric_total", 0)
|
||||
== 1.0
|
||||
)
|
||||
assert (
|
||||
updated_metrics["litellm_total_tokens_total"]
|
||||
- initial_metrics.get("litellm_total_tokens_total", 0)
|
||||
== 30.0
|
||||
)
|
||||
assert (
|
||||
updated_metrics["litellm_deployment_success_responses_total"]
|
||||
- initial_metrics.get("litellm_deployment_success_responses_total", 0)
|
||||
== 1.0
|
||||
)
|
||||
assert (
|
||||
updated_metrics["litellm_deployment_total_requests_total"]
|
||||
- initial_metrics.get("litellm_deployment_total_requests_total", 0)
|
||||
== 1.0
|
||||
)
|
||||
assert (
|
||||
updated_metrics["litellm_deployment_latency_per_output_token_bucket"]
|
||||
- initial_metrics.get("litellm_deployment_latency_per_output_token_bucket", 0)
|
||||
== 1.0
|
||||
)
|
||||
|
|
|
@ -1255,7 +1255,17 @@ async def test_add_callback_via_key(prisma_client):
|
|||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client):
|
||||
@pytest.mark.parametrize(
|
||||
"callback_type, expected_success_callbacks, expected_failure_callbacks",
|
||||
[
|
||||
("success", ["langfuse"], []),
|
||||
("failure", [], ["langfuse"]),
|
||||
("success_and_failure", ["langfuse"], ["langfuse"]),
|
||||
],
|
||||
)
|
||||
async def test_add_callback_via_key_litellm_pre_call_utils(
|
||||
prisma_client, callback_type, expected_success_callbacks, expected_failure_callbacks
|
||||
):
|
||||
import json
|
||||
|
||||
from fastapi import HTTPException, Request, Response
|
||||
|
@ -1312,7 +1322,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client):
|
|||
"logging": [
|
||||
{
|
||||
"callback_name": "langfuse",
|
||||
"callback_type": "success",
|
||||
"callback_type": callback_type,
|
||||
"callback_vars": {
|
||||
"langfuse_public_key": "my-mock-public-key",
|
||||
"langfuse_secret_key": "my-mock-secret-key",
|
||||
|
@ -1359,14 +1369,21 @@ async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client):
|
|||
}
|
||||
|
||||
new_data = await add_litellm_data_to_request(**data)
|
||||
print("NEW DATA: {}".format(new_data))
|
||||
|
||||
assert "success_callback" in new_data
|
||||
assert new_data["success_callback"] == ["langfuse"]
|
||||
assert "langfuse_public_key" in new_data
|
||||
assert new_data["langfuse_public_key"] == "my-mock-public-key"
|
||||
assert "langfuse_secret_key" in new_data
|
||||
assert new_data["langfuse_secret_key"] == "my-mock-secret-key"
|
||||
|
||||
if expected_success_callbacks:
|
||||
assert "success_callback" in new_data
|
||||
assert new_data["success_callback"] == expected_success_callbacks
|
||||
|
||||
if expected_failure_callbacks:
|
||||
assert "failure_callback" in new_data
|
||||
assert new_data["failure_callback"] == expected_failure_callbacks
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_gemini_pass_through_endpoint():
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue