From dcb50243e7e94e51f0b03ff9e86be4e18c08e32c Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Fri, 6 Sep 2024 17:12:01 -0700
Subject: [PATCH 01/10] fix otel max batch size

---
 litellm/integrations/opentelemetry.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/litellm/integrations/opentelemetry.py b/litellm/integrations/opentelemetry.py
index d35c7f304..c116f9adf 100644
--- a/litellm/integrations/opentelemetry.py
+++ b/litellm/integrations/opentelemetry.py
@@ -641,7 +641,8 @@ class OpenTelemetry(CustomLogger):
             return BatchSpanProcessor(
                 OTLPSpanExporterHTTP(
                     endpoint=self.OTEL_ENDPOINT, headers=_split_otel_headers
-                )
+                ),
+                max_queue_size=100,
             )
         elif self.OTEL_EXPORTER == "otlp_grpc":
             verbose_logger.debug(
@@ -651,7 +652,8 @@ class OpenTelemetry(CustomLogger):
             return BatchSpanProcessor(
                 OTLPSpanExporterGRPC(
                     endpoint=self.OTEL_ENDPOINT, headers=_split_otel_headers
-                )
+                ),
+                max_export_batch_size=100,
             )
         else:
             verbose_logger.debug(

From 356ad9b22b663a4cd236ea6404d2b97b87065d6a Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Fri, 6 Sep 2024 17:31:43 -0700
Subject: [PATCH 02/10] fix otel set max_queue_size, max_queue_size

---
 litellm/integrations/opentelemetry.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/litellm/integrations/opentelemetry.py b/litellm/integrations/opentelemetry.py
index c116f9adf..5539a3bfe 100644
--- a/litellm/integrations/opentelemetry.py
+++ b/litellm/integrations/opentelemetry.py
@@ -643,6 +643,7 @@ class OpenTelemetry(CustomLogger):
                     endpoint=self.OTEL_ENDPOINT, headers=_split_otel_headers
                 ),
                 max_queue_size=100,
+                max_queue_size=100,
             )
         elif self.OTEL_EXPORTER == "otlp_grpc":
             verbose_logger.debug(
@@ -653,6 +654,7 @@ class OpenTelemetry(CustomLogger):
                 OTLPSpanExporterGRPC(
                     endpoint=self.OTEL_ENDPOINT, headers=_split_otel_headers
                 ),
+                max_queue_size=100,
                 max_export_batch_size=100,
             )
         else:

From e253c100f418eb49d3faafdec5408ed68b6a9ed7 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 9 Sep 2024 08:26:03 -0700
Subject: [PATCH 03/10] support using "callbacks" for prometheus

---
 litellm/proxy/common_utils/callback_utils.py | 10 ++++
 litellm/proxy/proxy_config.yaml              |  3 ++
 litellm/tests/test_prometheus.py             | 57 ++++++++++++++++++++
 3 files changed, 70 insertions(+)

diff --git a/litellm/proxy/common_utils/callback_utils.py b/litellm/proxy/common_utils/callback_utils.py
index fa976690e..4ccf61e23 100644
--- a/litellm/proxy/common_utils/callback_utils.py
+++ b/litellm/proxy/common_utils/callback_utils.py
@@ -228,6 +228,16 @@ def initialize_callbacks_on_proxy(
             litellm.callbacks.extend(imported_list)
         else:
             litellm.callbacks = imported_list  # type: ignore
+
+        if "prometheus" in value:
+            from litellm.proxy.proxy_server import app
+
+            verbose_proxy_logger.debug("Starting Prometheus Metrics on /metrics")
+            from prometheus_client import make_asgi_app
+
+            # Add prometheus asgi middleware to route /metrics requests
+            metrics_app = make_asgi_app()
+            app.mount("/metrics", metrics_app)
     else:
         litellm.callbacks = [
             get_instance_fn(
diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
index 71a356b80..b407b0d7a 100644
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -15,6 +15,9 @@ guardrails:
       mode: "pre_call"  # pre_call, during_call, post_call
       output_parse_pii: True
 
+litellm_settings:
+  callbacks: ["prometheus"]
+
 general_settings: 
  master_key: sk-1234 
  alerting: ["slack"]
diff --git a/litellm/tests/test_prometheus.py b/litellm/tests/test_prometheus.py
index 7574beb9d..1232130cb 100644
--- a/litellm/tests/test_prometheus.py
+++ b/litellm/tests/test_prometheus.py
@@ -79,3 +79,60 @@ async def test_async_prometheus_success_logging():
     assert metrics["litellm_deployment_success_responses_total"] == 1.0
     assert metrics["litellm_deployment_total_requests_total"] == 1.0
     assert metrics["litellm_deployment_latency_per_output_token_bucket"] == 1.0
+
+
+@pytest.mark.asyncio()
+async def test_async_prometheus_success_logging_with_callbacks():
+    run_id = str(uuid.uuid4())
+    litellm.set_verbose = True
+    litellm.callbacks = ["prometheus"]
+
+    response = await litellm.acompletion(
+        model="claude-instant-1.2",
+        messages=[{"role": "user", "content": "what llm are u"}],
+        max_tokens=10,
+        mock_response="hi",
+        temperature=0.2,
+        metadata={
+            "id": run_id,
+            "tags": ["tag1", "tag2"],
+            "user_api_key": "6eb81e014497d89f3cc1aa9da7c2b37bda6b7fea68e4b710d33d94201e68970c",
+            "user_api_key_alias": "ishaans-prometheus-key",
+            "user_api_end_user_max_budget": None,
+            "litellm_api_version": "1.40.19",
+            "global_max_parallel_requests": None,
+            "user_api_key_user_id": "admin",
+            "user_api_key_org_id": None,
+            "user_api_key_team_id": "dbe2f686-a686-4896-864a-4c3924458709",
+            "user_api_key_team_alias": "testing-team",
+        },
+    )
+    print(response)
+    await asyncio.sleep(3)
+
+    # get prometheus logger
+    from litellm.litellm_core_utils.litellm_logging import _in_memory_loggers
+
+    for callback in _in_memory_loggers:
+        if isinstance(callback, PrometheusLogger):
+            test_prometheus_logger = callback
+
+    print("done with success request")
+
+    print(
+        "vars of test_prometheus_logger",
+        vars(test_prometheus_logger.litellm_requests_metric),
+    )
+
+    # Get the metrics
+    metrics = {}
+    for metric in REGISTRY.collect():
+        for sample in metric.samples:
+            metrics[sample.name] = sample.value
+
+    print("metrics from prometheus", metrics)
+    assert metrics["litellm_requests_metric_total"] == 1.0
+    assert metrics["litellm_total_tokens_total"] == 30.0
+    assert metrics["litellm_deployment_success_responses_total"] == 1.0
+    assert metrics["litellm_deployment_total_requests_total"] == 1.0
+    assert metrics["litellm_deployment_latency_per_output_token_bucket"] == 1.0

From 8a3ac6018746fc9ea39407d02d09d6f846b1d204 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 9 Sep 2024 11:54:11 -0700
Subject: [PATCH 04/10] fix
 test_async_prometheus_success_logging_with_callbacks

---
 litellm/tests/test_prometheus.py | 51 +++++++++++++++++++++++++-------
 1 file changed, 41 insertions(+), 10 deletions(-)

diff --git a/litellm/tests/test_prometheus.py b/litellm/tests/test_prometheus.py
index 1232130cb..a7f9ef388 100644
--- a/litellm/tests/test_prometheus.py
+++ b/litellm/tests/test_prometheus.py
@@ -9,7 +9,7 @@ import logging
 import uuid
 
 import pytest
-from prometheus_client import REGISTRY
+from prometheus_client import REGISTRY, CollectorRegistry
 
 import litellm
 from litellm import completion
@@ -85,8 +85,17 @@ async def test_async_prometheus_success_logging():
 async def test_async_prometheus_success_logging_with_callbacks():
     run_id = str(uuid.uuid4())
     litellm.set_verbose = True
+
+    litellm.success_callback = []
+    litellm.failure_callback = []
     litellm.callbacks = ["prometheus"]
 
+    # Get initial metric values
+    initial_metrics = {}
+    for metric in REGISTRY.collect():
+        for sample in metric.samples:
+            initial_metrics[sample.name] = sample.value
+
     response = await litellm.acompletion(
         model="claude-instant-1.2",
         messages=[{"role": "user", "content": "what llm are u"}],
@@ -124,15 +133,37 @@ async def test_async_prometheus_success_logging_with_callbacks():
         vars(test_prometheus_logger.litellm_requests_metric),
     )
 
-    # Get the metrics
-    metrics = {}
+    # Get the updated metrics
+    updated_metrics = {}
     for metric in REGISTRY.collect():
         for sample in metric.samples:
-            metrics[sample.name] = sample.value
+            updated_metrics[sample.name] = sample.value
 
-    print("metrics from prometheus", metrics)
-    assert metrics["litellm_requests_metric_total"] == 1.0
-    assert metrics["litellm_total_tokens_total"] == 30.0
-    assert metrics["litellm_deployment_success_responses_total"] == 1.0
-    assert metrics["litellm_deployment_total_requests_total"] == 1.0
-    assert metrics["litellm_deployment_latency_per_output_token_bucket"] == 1.0
+    print("metrics from prometheus", updated_metrics)
+
+    # Assert the delta for each metric
+    assert (
+        updated_metrics["litellm_requests_metric_total"]
+        - initial_metrics.get("litellm_requests_metric_total", 0)
+        == 1.0
+    )
+    assert (
+        updated_metrics["litellm_total_tokens_total"]
+        - initial_metrics.get("litellm_total_tokens_total", 0)
+        == 30.0
+    )
+    assert (
+        updated_metrics["litellm_deployment_success_responses_total"]
+        - initial_metrics.get("litellm_deployment_success_responses_total", 0)
+        == 1.0
+    )
+    assert (
+        updated_metrics["litellm_deployment_total_requests_total"]
+        - initial_metrics.get("litellm_deployment_total_requests_total", 0)
+        == 1.0
+    )
+    assert (
+        updated_metrics["litellm_deployment_latency_per_output_token_bucket"]
+        - initial_metrics.get("litellm_deployment_latency_per_output_token_bucket", 0)
+        == 1.0
+    )

From b374990c794cb62fc510a15efd66e31f45f92b4d Mon Sep 17 00:00:00 2001
From: Krish Dholakia <krrishdholakia@gmail.com>
Date: Mon, 9 Sep 2024 14:17:44 -0700
Subject: [PATCH 05/10] build(deployment.yaml): Fix port + allow setting
 database url in helm chart (#5587)

---
 deploy/charts/litellm-helm/templates/deployment.yaml | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/deploy/charts/litellm-helm/templates/deployment.yaml b/deploy/charts/litellm-helm/templates/deployment.yaml
index 162cb8286..6d63a5110 100644
--- a/deploy/charts/litellm-helm/templates/deployment.yaml
+++ b/deploy/charts/litellm-helm/templates/deployment.yaml
@@ -104,7 +104,7 @@ spec:
           imagePullPolicy: {{ .Values.image.pullPolicy }}
           env:
             - name: HOST
-              value: "::"
+              value: "{{ .Values.listen | default "0.0.0.0" }}"
             - name: PORT
               value: {{ .Values.service.port | quote}}
             {{- if .Values.db.deployStandalone }}
@@ -138,8 +138,13 @@ spec:
             - name: DATABASE_NAME
               value: {{ .Values.db.database }}
             {{- end }}
+            {{- if .Values.database.url }}
+            - name: DATABASE_URL
+              value: {{ .Values.database.url | quote }}
+            {{- else }}
             - name: DATABASE_URL
               value: "postgresql://$(DATABASE_USERNAME):$(DATABASE_PASSWORD)@$(DATABASE_HOST)/$(DATABASE_NAME)"
+            {{- end }}
             - name: PROXY_MASTER_KEY
               valueFrom:
                 secretKeyRef:
@@ -231,4 +236,4 @@ spec:
       {{- with .Values.tolerations }}
       tolerations:
         {{- toYaml . | nindent 8 }}
-      {{- end }}
+      {{- end }}
\ No newline at end of file

From f742d6162f2a082b921a615a477b9f6161255493 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 9 Sep 2024 16:18:55 -0700
Subject: [PATCH 06/10] fix otel defaults

---
 litellm/integrations/opentelemetry.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/litellm/integrations/opentelemetry.py b/litellm/integrations/opentelemetry.py
index 5539a3bfe..b76fc7490 100644
--- a/litellm/integrations/opentelemetry.py
+++ b/litellm/integrations/opentelemetry.py
@@ -643,7 +643,7 @@ class OpenTelemetry(CustomLogger):
                     endpoint=self.OTEL_ENDPOINT, headers=_split_otel_headers
                 ),
                 max_queue_size=100,
-                max_queue_size=100,
+                max_export_batch_size=100,
             )
         elif self.OTEL_EXPORTER == "otlp_grpc":
             verbose_logger.debug(

From bbdcc75c601b4a15f78ce39d0b3933754e52d803 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 9 Sep 2024 16:33:06 -0700
Subject: [PATCH 07/10] fix log failures for key based logging

---
 litellm/proxy/litellm_pre_call_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py
index d41aae50f..890c576c9 100644
--- a/litellm/proxy/litellm_pre_call_utils.py
+++ b/litellm/proxy/litellm_pre_call_utils.py
@@ -86,10 +86,11 @@ def convert_key_logging_metadata_to_callback(
             team_callback_settings_obj.success_callback = []
         if team_callback_settings_obj.failure_callback is None:
             team_callback_settings_obj.failure_callback = []
+
         if data.callback_name not in team_callback_settings_obj.success_callback:
             team_callback_settings_obj.success_callback.append(data.callback_name)
 
-        if data.callback_name in team_callback_settings_obj.failure_callback:
+        if data.callback_name not in team_callback_settings_obj.failure_callback:
             team_callback_settings_obj.failure_callback.append(data.callback_name)
 
     for var, value in data.callback_vars.items():

From da30da9a971c2e1f3bb5415e227272624e69624b Mon Sep 17 00:00:00 2001
From: Elad Segal <eladsegal@users.noreply.github.com>
Date: Tue, 10 Sep 2024 02:35:12 +0300
Subject: [PATCH 08/10] Properly use `allowed_fails_policy` when it has fields
 with a value of 0 (#5604)

---
 litellm/router.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/litellm/router.py b/litellm/router.py
index bcd0b6221..e1cb108cc 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -3690,7 +3690,7 @@ class Router:
             exception=original_exception,
         )
 
-        allowed_fails = _allowed_fails or self.allowed_fails
+        allowed_fails = _allowed_fails if _allowed_fails is not None else self.allowed_fails
 
         dt = get_utc_datetime()
         current_minute = dt.strftime("%H-%M")

From 57ebe4649eeec5ce8348dc383f8ba31094415546 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 9 Sep 2024 16:44:37 -0700
Subject: [PATCH 09/10] add test for using success and failure

---
 litellm/tests/test_proxy_server.py | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/litellm/tests/test_proxy_server.py b/litellm/tests/test_proxy_server.py
index fb1025ab2..102c126d1 100644
--- a/litellm/tests/test_proxy_server.py
+++ b/litellm/tests/test_proxy_server.py
@@ -1255,7 +1255,17 @@ async def test_add_callback_via_key(prisma_client):
 
 
 @pytest.mark.asyncio
-async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client):
+@pytest.mark.parametrize(
+    "callback_type, expected_success_callbacks, expected_failure_callbacks",
+    [
+        ("success", ["langfuse"], []),
+        ("failure", [], ["langfuse"]),
+        ("success_and_failure", ["langfuse"], ["langfuse"]),
+    ],
+)
+async def test_add_callback_via_key_litellm_pre_call_utils(
+    prisma_client, callback_type, expected_success_callbacks, expected_failure_callbacks
+):
     import json
 
     from fastapi import HTTPException, Request, Response
@@ -1312,7 +1322,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client):
                 "logging": [
                     {
                         "callback_name": "langfuse",
-                        "callback_type": "success",
+                        "callback_type": callback_type,
                         "callback_vars": {
                             "langfuse_public_key": "my-mock-public-key",
                             "langfuse_secret_key": "my-mock-secret-key",
@@ -1359,14 +1369,21 @@ async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client):
     }
 
     new_data = await add_litellm_data_to_request(**data)
+    print("NEW DATA: {}".format(new_data))
 
-    assert "success_callback" in new_data
-    assert new_data["success_callback"] == ["langfuse"]
     assert "langfuse_public_key" in new_data
     assert new_data["langfuse_public_key"] == "my-mock-public-key"
     assert "langfuse_secret_key" in new_data
     assert new_data["langfuse_secret_key"] == "my-mock-secret-key"
 
+    if expected_success_callbacks:
+        assert "success_callback" in new_data
+        assert new_data["success_callback"] == expected_success_callbacks
+
+    if expected_failure_callbacks:
+        assert "failure_callback" in new_data
+        assert new_data["failure_callback"] == expected_failure_callbacks
+
 
 @pytest.mark.asyncio
 async def test_gemini_pass_through_endpoint():

From 949af7be2ec114b9d9bae792f504ffc68ceb8417 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 9 Sep 2024 16:49:26 -0700
Subject: [PATCH 10/10] fix team based logging doc

---
 docs/my-website/docs/proxy/team_logging.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/my-website/docs/proxy/team_logging.md b/docs/my-website/docs/proxy/team_logging.md
index c593f23bf..fb177da76 100644
--- a/docs/my-website/docs/proxy/team_logging.md
+++ b/docs/my-website/docs/proxy/team_logging.md
@@ -208,8 +208,8 @@ curl -X POST 'http://0.0.0.0:4000/key/generate' \
 -d '{
     "metadata": {
         "logging": [{
-            "callback_name": "langfuse", # 'otel', 'langfuse', 'lunary'
-            "callback_type": "success" # set, if required by integration - future improvement, have logging tools work for success + failure by default 
+            "callback_name": "langfuse", # "otel", "langfuse", "lunary"
+            "callback_type": "success", # "success", "failure", "success_and_failure"
             "callback_vars": {
                 "langfuse_public_key": "os.environ/LANGFUSE_PUBLIC_KEY", # [RECOMMENDED] reference key in proxy environment
                 "langfuse_secret_key": "os.environ/LANGFUSE_SECRET_KEY", # [RECOMMENDED] reference key in proxy environment