Merge branch 'main' into litellm_tag_routing_fixes

2024-09-09 17:45:18 -07:00 · 2024-09-09 17:45:18 -07:00 · a6d3bd0ab7
commit a6d3bd0ab7
parent 05210fee6a 00f1d7b1ff
9 changed files with 146 additions and 13 deletions
--- a/deploy/charts/litellm-helm/templates/deployment.yaml
+++ b/deploy/charts/litellm-helm/templates/deployment.yaml
@ -104,7 +104,7 @@ spec:
          imagePullPolicy: {{ .Values.image.pullPolicy }}
          env:
            - name: HOST
-              value: "::"
+              value: "{{ .Values.listen | default "0.0.0.0" }}"
            - name: PORT
              value: {{ .Values.service.port | quote}}
            {{- if .Values.db.deployStandalone }}
@ -138,8 +138,13 @@ spec:
            - name: DATABASE_NAME
              value: {{ .Values.db.database }}
            {{- end }}
            {{- if .Values.database.url }}
            - name: DATABASE_URL
              value: {{ .Values.database.url | quote }}
            {{- else }}
            - name: DATABASE_URL
              value: "postgresql://$(DATABASE_USERNAME):$(DATABASE_PASSWORD)@$(DATABASE_HOST)/$(DATABASE_NAME)"
            {{- end }}
            - name: PROXY_MASTER_KEY
              valueFrom:
                secretKeyRef:
--- a/docs/my-website/docs/proxy/team_logging.md
+++ b/docs/my-website/docs/proxy/team_logging.md
@ -208,8 +208,8 @@ curl -X POST 'http://0.0.0.0:4000/key/generate' \
 -d '{
    "metadata": {
        "logging": [{
-            "callback_name": "langfuse", # 'otel', 'langfuse', 'lunary'
+            "callback_name": "langfuse", # "otel", "langfuse", "lunary"
-            "callback_type": "success" # set, if required by integration - future improvement, have logging tools work for success + failure by default 
+            "callback_type": "success", # "success", "failure", "success_and_failure"
            "callback_vars": {
                "langfuse_public_key": "os.environ/LANGFUSE_PUBLIC_KEY", # [RECOMMENDED] reference key in proxy environment
                "langfuse_secret_key": "os.environ/LANGFUSE_SECRET_KEY", # [RECOMMENDED] reference key in proxy environment
--- a/litellm/integrations/opentelemetry.py
+++ b/litellm/integrations/opentelemetry.py
@ -641,7 +641,9 @@ class OpenTelemetry(CustomLogger):
            return BatchSpanProcessor(
                OTLPSpanExporterHTTP(
                    endpoint=self.OTEL_ENDPOINT, headers=_split_otel_headers
-                )
+                ),
                max_queue_size=100,
                max_export_batch_size=100,
            )
        elif self.OTEL_EXPORTER == "otlp_grpc":
            verbose_logger.debug(
@ -651,7 +653,9 @@ class OpenTelemetry(CustomLogger):
            return BatchSpanProcessor(
                OTLPSpanExporterGRPC(
                    endpoint=self.OTEL_ENDPOINT, headers=_split_otel_headers
-                )
+                ),
                max_queue_size=100,
                max_export_batch_size=100,
            )
        else:
            verbose_logger.debug(
--- a/litellm/proxy/common_utils/callback_utils.py
+++ b/litellm/proxy/common_utils/callback_utils.py
@ -228,6 +228,16 @@ def initialize_callbacks_on_proxy(
            litellm.callbacks.extend(imported_list)
        else:
            litellm.callbacks = imported_list  # type: ignore
        if "prometheus" in value:
            from litellm.proxy.proxy_server import app
            verbose_proxy_logger.debug("Starting Prometheus Metrics on /metrics")
            from prometheus_client import make_asgi_app
            # Add prometheus asgi middleware to route /metrics requests
            metrics_app = make_asgi_app()
            app.mount("/metrics", metrics_app)
    else:
        litellm.callbacks = [
            get_instance_fn(
--- a/litellm/proxy/litellm_pre_call_utils.py
+++ b/litellm/proxy/litellm_pre_call_utils.py
@ -86,10 +86,11 @@ def convert_key_logging_metadata_to_callback(
            team_callback_settings_obj.success_callback = []
        if team_callback_settings_obj.failure_callback is None:
            team_callback_settings_obj.failure_callback = []
        if data.callback_name not in team_callback_settings_obj.success_callback:
            team_callback_settings_obj.success_callback.append(data.callback_name)
-        if data.callback_name in team_callback_settings_obj.failure_callback:
+        if data.callback_name not in team_callback_settings_obj.failure_callback:
            team_callback_settings_obj.failure_callback.append(data.callback_name)
    for var, value in data.callback_vars.items():
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -14,9 +14,17 @@ model_list:
      id: "test-openai"
 router_settings:
 enable_tag_filtering: True # 👈 Key Chang
 general_settings: 
 master_key: sk-1234 
 alerting: ["slack"]
 spend_report_frequency: "1d"
 litellm_settings:
  success_callback: ["prometheus"]
  failure_callback: ["prometheus"]
--- a/litellm/router.py
+++ b/litellm/router.py
@ -3690,7 +3690,7 @@ class Router:
            exception=original_exception,
        )
-        allowed_fails = _allowed_fails or self.allowed_fails
+        allowed_fails = _allowed_fails if _allowed_fails is not None else self.allowed_fails
        dt = get_utc_datetime()
        current_minute = dt.strftime("%H-%M")
--- a/litellm/tests/test_prometheus.py
+++ b/litellm/tests/test_prometheus.py
@ -9,7 +9,7 @@ import logging
 import uuid
 import pytest
-from prometheus_client import REGISTRY
+from prometheus_client import REGISTRY, CollectorRegistry
 import litellm
 from litellm import completion
@ -79,3 +79,91 @@ async def test_async_prometheus_success_logging():
    assert metrics["litellm_deployment_success_responses_total"] == 1.0
    assert metrics["litellm_deployment_total_requests_total"] == 1.0
    assert metrics["litellm_deployment_latency_per_output_token_bucket"] == 1.0
@pytest.mark.asyncio()
 async def test_async_prometheus_success_logging_with_callbacks():
    run_id = str(uuid.uuid4())
    litellm.set_verbose = True
    litellm.success_callback = []
    litellm.failure_callback = []
    litellm.callbacks = ["prometheus"]
    # Get initial metric values
    initial_metrics = {}
    for metric in REGISTRY.collect():
        for sample in metric.samples:
            initial_metrics[sample.name] = sample.value
    response = await litellm.acompletion(
        model="claude-instant-1.2",
        messages=[{"role": "user", "content": "what llm are u"}],
        max_tokens=10,
        mock_response="hi",
        temperature=0.2,
        metadata={
            "id": run_id,
            "tags": ["tag1", "tag2"],
            "user_api_key": "6eb81e014497d89f3cc1aa9da7c2b37bda6b7fea68e4b710d33d94201e68970c",
            "user_api_key_alias": "ishaans-prometheus-key",
            "user_api_end_user_max_budget": None,
            "litellm_api_version": "1.40.19",
            "global_max_parallel_requests": None,
            "user_api_key_user_id": "admin",
            "user_api_key_org_id": None,
            "user_api_key_team_id": "dbe2f686-a686-4896-864a-4c3924458709",
            "user_api_key_team_alias": "testing-team",
        },
    )
    print(response)
    await asyncio.sleep(3)
    # get prometheus logger
    from litellm.litellm_core_utils.litellm_logging import _in_memory_loggers
    for callback in _in_memory_loggers:
        if isinstance(callback, PrometheusLogger):
            test_prometheus_logger = callback
    print("done with success request")
    print(
        "vars of test_prometheus_logger",
        vars(test_prometheus_logger.litellm_requests_metric),
    )
    # Get the updated metrics
    updated_metrics = {}
    for metric in REGISTRY.collect():
        for sample in metric.samples:
            updated_metrics[sample.name] = sample.value
    print("metrics from prometheus", updated_metrics)
    # Assert the delta for each metric
    assert (
        updated_metrics["litellm_requests_metric_total"]
        - initial_metrics.get("litellm_requests_metric_total", 0)
        == 1.0
    )
    assert (
        updated_metrics["litellm_total_tokens_total"]
        - initial_metrics.get("litellm_total_tokens_total", 0)
        == 30.0
    )
    assert (
        updated_metrics["litellm_deployment_success_responses_total"]
        - initial_metrics.get("litellm_deployment_success_responses_total", 0)
        == 1.0
    )
    assert (
        updated_metrics["litellm_deployment_total_requests_total"]
        - initial_metrics.get("litellm_deployment_total_requests_total", 0)
        == 1.0
    )
    assert (
        updated_metrics["litellm_deployment_latency_per_output_token_bucket"]
        - initial_metrics.get("litellm_deployment_latency_per_output_token_bucket", 0)
        == 1.0
    )
--- a/litellm/tests/test_proxy_server.py
+++ b/litellm/tests/test_proxy_server.py
@ -1255,7 +1255,17 @@ async def test_add_callback_via_key(prisma_client):
@pytest.mark.asyncio
-async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client):
+@pytest.mark.parametrize(
    "callback_type, expected_success_callbacks, expected_failure_callbacks",
    [
        ("success", ["langfuse"], []),
        ("failure", [], ["langfuse"]),
        ("success_and_failure", ["langfuse"], ["langfuse"]),
    ],
 )
 async def test_add_callback_via_key_litellm_pre_call_utils(
    prisma_client, callback_type, expected_success_callbacks, expected_failure_callbacks
 ):
    import json
    from fastapi import HTTPException, Request, Response
@ -1312,7 +1322,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client):
                "logging": [
                    {
                        "callback_name": "langfuse",
-                        "callback_type": "success",
+                        "callback_type": callback_type,
                        "callback_vars": {
                            "langfuse_public_key": "my-mock-public-key",
                            "langfuse_secret_key": "my-mock-secret-key",
@ -1359,14 +1369,21 @@ async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client):
    }
    new_data = await add_litellm_data_to_request(**data)
    print("NEW DATA: {}".format(new_data))
    assert "success_callback" in new_data
    assert new_data["success_callback"] == ["langfuse"]
    assert "langfuse_public_key" in new_data
    assert new_data["langfuse_public_key"] == "my-mock-public-key"
    assert "langfuse_secret_key" in new_data
    assert new_data["langfuse_secret_key"] == "my-mock-secret-key"
    if expected_success_callbacks:
        assert "success_callback" in new_data
        assert new_data["success_callback"] == expected_success_callbacks
    if expected_failure_callbacks:
        assert "failure_callback" in new_data
        assert new_data["failure_callback"] == expected_failure_callbacks
@pytest.mark.asyncio
 async def test_gemini_pass_through_endpoint():