From dcb50243e7e94e51f0b03ff9e86be4e18c08e32c Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 6 Sep 2024 17:12:01 -0700 Subject: [PATCH 01/10] fix otel max batch size --- litellm/integrations/opentelemetry.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/litellm/integrations/opentelemetry.py b/litellm/integrations/opentelemetry.py index d35c7f304..c116f9adf 100644 --- a/litellm/integrations/opentelemetry.py +++ b/litellm/integrations/opentelemetry.py @@ -641,7 +641,8 @@ class OpenTelemetry(CustomLogger): return BatchSpanProcessor( OTLPSpanExporterHTTP( endpoint=self.OTEL_ENDPOINT, headers=_split_otel_headers - ) + ), + max_queue_size=100, ) elif self.OTEL_EXPORTER == "otlp_grpc": verbose_logger.debug( @@ -651,7 +652,8 @@ class OpenTelemetry(CustomLogger): return BatchSpanProcessor( OTLPSpanExporterGRPC( endpoint=self.OTEL_ENDPOINT, headers=_split_otel_headers - ) + ), + max_export_batch_size=100, ) else: verbose_logger.debug( From 356ad9b22b663a4cd236ea6404d2b97b87065d6a Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 6 Sep 2024 17:31:43 -0700 Subject: [PATCH 02/10] fix otel set max_queue_size, max_queue_size --- litellm/integrations/opentelemetry.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/litellm/integrations/opentelemetry.py b/litellm/integrations/opentelemetry.py index c116f9adf..5539a3bfe 100644 --- a/litellm/integrations/opentelemetry.py +++ b/litellm/integrations/opentelemetry.py @@ -643,6 +643,7 @@ class OpenTelemetry(CustomLogger): endpoint=self.OTEL_ENDPOINT, headers=_split_otel_headers ), max_queue_size=100, + max_queue_size=100, ) elif self.OTEL_EXPORTER == "otlp_grpc": verbose_logger.debug( @@ -653,6 +654,7 @@ class OpenTelemetry(CustomLogger): OTLPSpanExporterGRPC( endpoint=self.OTEL_ENDPOINT, headers=_split_otel_headers ), + max_queue_size=100, max_export_batch_size=100, ) else: From e253c100f418eb49d3faafdec5408ed68b6a9ed7 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 9 Sep 2024 08:26:03 -0700 Subject: [PATCH 03/10] support using "callbacks" for prometheus --- litellm/proxy/common_utils/callback_utils.py | 10 ++++ litellm/proxy/proxy_config.yaml | 3 ++ litellm/tests/test_prometheus.py | 57 ++++++++++++++++++++ 3 files changed, 70 insertions(+) diff --git a/litellm/proxy/common_utils/callback_utils.py b/litellm/proxy/common_utils/callback_utils.py index fa976690e..4ccf61e23 100644 --- a/litellm/proxy/common_utils/callback_utils.py +++ b/litellm/proxy/common_utils/callback_utils.py @@ -228,6 +228,16 @@ def initialize_callbacks_on_proxy( litellm.callbacks.extend(imported_list) else: litellm.callbacks = imported_list # type: ignore + + if "prometheus" in value: + from litellm.proxy.proxy_server import app + + verbose_proxy_logger.debug("Starting Prometheus Metrics on /metrics") + from prometheus_client import make_asgi_app + + # Add prometheus asgi middleware to route /metrics requests + metrics_app = make_asgi_app() + app.mount("/metrics", metrics_app) else: litellm.callbacks = [ get_instance_fn( diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 71a356b80..b407b0d7a 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -15,6 +15,9 @@ guardrails: mode: "pre_call" # pre_call, during_call, post_call output_parse_pii: True +litellm_settings: + callbacks: ["prometheus"] + general_settings: master_key: sk-1234 alerting: ["slack"] diff --git a/litellm/tests/test_prometheus.py b/litellm/tests/test_prometheus.py index 7574beb9d..1232130cb 100644 --- a/litellm/tests/test_prometheus.py +++ b/litellm/tests/test_prometheus.py @@ -79,3 +79,60 @@ async def test_async_prometheus_success_logging(): assert metrics["litellm_deployment_success_responses_total"] == 1.0 assert metrics["litellm_deployment_total_requests_total"] == 1.0 assert metrics["litellm_deployment_latency_per_output_token_bucket"] == 1.0 + + +@pytest.mark.asyncio() +async def test_async_prometheus_success_logging_with_callbacks(): + run_id = str(uuid.uuid4()) + litellm.set_verbose = True + litellm.callbacks = ["prometheus"] + + response = await litellm.acompletion( + model="claude-instant-1.2", + messages=[{"role": "user", "content": "what llm are u"}], + max_tokens=10, + mock_response="hi", + temperature=0.2, + metadata={ + "id": run_id, + "tags": ["tag1", "tag2"], + "user_api_key": "6eb81e014497d89f3cc1aa9da7c2b37bda6b7fea68e4b710d33d94201e68970c", + "user_api_key_alias": "ishaans-prometheus-key", + "user_api_end_user_max_budget": None, + "litellm_api_version": "1.40.19", + "global_max_parallel_requests": None, + "user_api_key_user_id": "admin", + "user_api_key_org_id": None, + "user_api_key_team_id": "dbe2f686-a686-4896-864a-4c3924458709", + "user_api_key_team_alias": "testing-team", + }, + ) + print(response) + await asyncio.sleep(3) + + # get prometheus logger + from litellm.litellm_core_utils.litellm_logging import _in_memory_loggers + + for callback in _in_memory_loggers: + if isinstance(callback, PrometheusLogger): + test_prometheus_logger = callback + + print("done with success request") + + print( + "vars of test_prometheus_logger", + vars(test_prometheus_logger.litellm_requests_metric), + ) + + # Get the metrics + metrics = {} + for metric in REGISTRY.collect(): + for sample in metric.samples: + metrics[sample.name] = sample.value + + print("metrics from prometheus", metrics) + assert metrics["litellm_requests_metric_total"] == 1.0 + assert metrics["litellm_total_tokens_total"] == 30.0 + assert metrics["litellm_deployment_success_responses_total"] == 1.0 + assert metrics["litellm_deployment_total_requests_total"] == 1.0 + assert metrics["litellm_deployment_latency_per_output_token_bucket"] == 1.0 From 8a3ac6018746fc9ea39407d02d09d6f846b1d204 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 9 Sep 2024 11:54:11 -0700 Subject: [PATCH 04/10] fix test_async_prometheus_success_logging_with_callbacks --- litellm/tests/test_prometheus.py | 51 +++++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 10 deletions(-) diff --git a/litellm/tests/test_prometheus.py b/litellm/tests/test_prometheus.py index 1232130cb..a7f9ef388 100644 --- a/litellm/tests/test_prometheus.py +++ b/litellm/tests/test_prometheus.py @@ -9,7 +9,7 @@ import logging import uuid import pytest -from prometheus_client import REGISTRY +from prometheus_client import REGISTRY, CollectorRegistry import litellm from litellm import completion @@ -85,8 +85,17 @@ async def test_async_prometheus_success_logging(): async def test_async_prometheus_success_logging_with_callbacks(): run_id = str(uuid.uuid4()) litellm.set_verbose = True + + litellm.success_callback = [] + litellm.failure_callback = [] litellm.callbacks = ["prometheus"] + # Get initial metric values + initial_metrics = {} + for metric in REGISTRY.collect(): + for sample in metric.samples: + initial_metrics[sample.name] = sample.value + response = await litellm.acompletion( model="claude-instant-1.2", messages=[{"role": "user", "content": "what llm are u"}], @@ -124,15 +133,37 @@ async def test_async_prometheus_success_logging_with_callbacks(): vars(test_prometheus_logger.litellm_requests_metric), ) - # Get the metrics - metrics = {} + # Get the updated metrics + updated_metrics = {} for metric in REGISTRY.collect(): for sample in metric.samples: - metrics[sample.name] = sample.value + updated_metrics[sample.name] = sample.value - print("metrics from prometheus", metrics) - assert metrics["litellm_requests_metric_total"] == 1.0 - assert metrics["litellm_total_tokens_total"] == 30.0 - assert metrics["litellm_deployment_success_responses_total"] == 1.0 - assert metrics["litellm_deployment_total_requests_total"] == 1.0 - assert metrics["litellm_deployment_latency_per_output_token_bucket"] == 1.0 + print("metrics from prometheus", updated_metrics) + + # Assert the delta for each metric + assert ( + updated_metrics["litellm_requests_metric_total"] + - initial_metrics.get("litellm_requests_metric_total", 0) + == 1.0 + ) + assert ( + updated_metrics["litellm_total_tokens_total"] + - initial_metrics.get("litellm_total_tokens_total", 0) + == 30.0 + ) + assert ( + updated_metrics["litellm_deployment_success_responses_total"] + - initial_metrics.get("litellm_deployment_success_responses_total", 0) + == 1.0 + ) + assert ( + updated_metrics["litellm_deployment_total_requests_total"] + - initial_metrics.get("litellm_deployment_total_requests_total", 0) + == 1.0 + ) + assert ( + updated_metrics["litellm_deployment_latency_per_output_token_bucket"] + - initial_metrics.get("litellm_deployment_latency_per_output_token_bucket", 0) + == 1.0 + ) From b374990c794cb62fc510a15efd66e31f45f92b4d Mon Sep 17 00:00:00 2001 From: Krish Dholakia Date: Mon, 9 Sep 2024 14:17:44 -0700 Subject: [PATCH 05/10] build(deployment.yaml): Fix port + allow setting database url in helm chart (#5587) --- deploy/charts/litellm-helm/templates/deployment.yaml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/deploy/charts/litellm-helm/templates/deployment.yaml b/deploy/charts/litellm-helm/templates/deployment.yaml index 162cb8286..6d63a5110 100644 --- a/deploy/charts/litellm-helm/templates/deployment.yaml +++ b/deploy/charts/litellm-helm/templates/deployment.yaml @@ -104,7 +104,7 @@ spec: imagePullPolicy: {{ .Values.image.pullPolicy }} env: - name: HOST - value: "::" + value: "{{ .Values.listen | default "0.0.0.0" }}" - name: PORT value: {{ .Values.service.port | quote}} {{- if .Values.db.deployStandalone }} @@ -138,8 +138,13 @@ spec: - name: DATABASE_NAME value: {{ .Values.db.database }} {{- end }} + {{- if .Values.database.url }} + - name: DATABASE_URL + value: {{ .Values.database.url | quote }} + {{- else }} - name: DATABASE_URL value: "postgresql://$(DATABASE_USERNAME):$(DATABASE_PASSWORD)@$(DATABASE_HOST)/$(DATABASE_NAME)" + {{- end }} - name: PROXY_MASTER_KEY valueFrom: secretKeyRef: @@ -231,4 +236,4 @@ spec: {{- with .Values.tolerations }} tolerations: {{- toYaml . | nindent 8 }} - {{- end }} + {{- end }} \ No newline at end of file From f742d6162f2a082b921a615a477b9f6161255493 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 9 Sep 2024 16:18:55 -0700 Subject: [PATCH 06/10] fix otel defaults --- litellm/integrations/opentelemetry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/integrations/opentelemetry.py b/litellm/integrations/opentelemetry.py index 5539a3bfe..b76fc7490 100644 --- a/litellm/integrations/opentelemetry.py +++ b/litellm/integrations/opentelemetry.py @@ -643,7 +643,7 @@ class OpenTelemetry(CustomLogger): endpoint=self.OTEL_ENDPOINT, headers=_split_otel_headers ), max_queue_size=100, - max_queue_size=100, + max_export_batch_size=100, ) elif self.OTEL_EXPORTER == "otlp_grpc": verbose_logger.debug( From bbdcc75c601b4a15f78ce39d0b3933754e52d803 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 9 Sep 2024 16:33:06 -0700 Subject: [PATCH 07/10] fix log failures for key based logging --- litellm/proxy/litellm_pre_call_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py index d41aae50f..890c576c9 100644 --- a/litellm/proxy/litellm_pre_call_utils.py +++ b/litellm/proxy/litellm_pre_call_utils.py @@ -86,10 +86,11 @@ def convert_key_logging_metadata_to_callback( team_callback_settings_obj.success_callback = [] if team_callback_settings_obj.failure_callback is None: team_callback_settings_obj.failure_callback = [] + if data.callback_name not in team_callback_settings_obj.success_callback: team_callback_settings_obj.success_callback.append(data.callback_name) - if data.callback_name in team_callback_settings_obj.failure_callback: + if data.callback_name not in team_callback_settings_obj.failure_callback: team_callback_settings_obj.failure_callback.append(data.callback_name) for var, value in data.callback_vars.items(): From da30da9a971c2e1f3bb5415e227272624e69624b Mon Sep 17 00:00:00 2001 From: Elad Segal Date: Tue, 10 Sep 2024 02:35:12 +0300 Subject: [PATCH 08/10] Properly use `allowed_fails_policy` when it has fields with a value of 0 (#5604) --- litellm/router.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/router.py b/litellm/router.py index bcd0b6221..e1cb108cc 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -3690,7 +3690,7 @@ class Router: exception=original_exception, ) - allowed_fails = _allowed_fails or self.allowed_fails + allowed_fails = _allowed_fails if _allowed_fails is not None else self.allowed_fails dt = get_utc_datetime() current_minute = dt.strftime("%H-%M") From 57ebe4649eeec5ce8348dc383f8ba31094415546 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 9 Sep 2024 16:44:37 -0700 Subject: [PATCH 09/10] add test for using success and failure --- litellm/tests/test_proxy_server.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/litellm/tests/test_proxy_server.py b/litellm/tests/test_proxy_server.py index fb1025ab2..102c126d1 100644 --- a/litellm/tests/test_proxy_server.py +++ b/litellm/tests/test_proxy_server.py @@ -1255,7 +1255,17 @@ async def test_add_callback_via_key(prisma_client): @pytest.mark.asyncio -async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client): +@pytest.mark.parametrize( + "callback_type, expected_success_callbacks, expected_failure_callbacks", + [ + ("success", ["langfuse"], []), + ("failure", [], ["langfuse"]), + ("success_and_failure", ["langfuse"], ["langfuse"]), + ], +) +async def test_add_callback_via_key_litellm_pre_call_utils( + prisma_client, callback_type, expected_success_callbacks, expected_failure_callbacks +): import json from fastapi import HTTPException, Request, Response @@ -1312,7 +1322,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client): "logging": [ { "callback_name": "langfuse", - "callback_type": "success", + "callback_type": callback_type, "callback_vars": { "langfuse_public_key": "my-mock-public-key", "langfuse_secret_key": "my-mock-secret-key", @@ -1359,14 +1369,21 @@ async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client): } new_data = await add_litellm_data_to_request(**data) + print("NEW DATA: {}".format(new_data)) - assert "success_callback" in new_data - assert new_data["success_callback"] == ["langfuse"] assert "langfuse_public_key" in new_data assert new_data["langfuse_public_key"] == "my-mock-public-key" assert "langfuse_secret_key" in new_data assert new_data["langfuse_secret_key"] == "my-mock-secret-key" + if expected_success_callbacks: + assert "success_callback" in new_data + assert new_data["success_callback"] == expected_success_callbacks + + if expected_failure_callbacks: + assert "failure_callback" in new_data + assert new_data["failure_callback"] == expected_failure_callbacks + @pytest.mark.asyncio async def test_gemini_pass_through_endpoint(): From 949af7be2ec114b9d9bae792f504ffc68ceb8417 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 9 Sep 2024 16:49:26 -0700 Subject: [PATCH 10/10] fix team based logging doc --- docs/my-website/docs/proxy/team_logging.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/my-website/docs/proxy/team_logging.md b/docs/my-website/docs/proxy/team_logging.md index c593f23bf..fb177da76 100644 --- a/docs/my-website/docs/proxy/team_logging.md +++ b/docs/my-website/docs/proxy/team_logging.md @@ -208,8 +208,8 @@ curl -X POST 'http://0.0.0.0:4000/key/generate' \ -d '{ "metadata": { "logging": [{ - "callback_name": "langfuse", # 'otel', 'langfuse', 'lunary' - "callback_type": "success" # set, if required by integration - future improvement, have logging tools work for success + failure by default + "callback_name": "langfuse", # "otel", "langfuse", "lunary" + "callback_type": "success", # "success", "failure", "success_and_failure" "callback_vars": { "langfuse_public_key": "os.environ/LANGFUSE_PUBLIC_KEY", # [RECOMMENDED] reference key in proxy environment "langfuse_secret_key": "os.environ/LANGFUSE_SECRET_KEY", # [RECOMMENDED] reference key in proxy environment