Merge branch 'main' into litellm_tag_routing_fixes

This commit is contained in:
Ishaan Jaff 2024-09-09 17:45:18 -07:00 committed by GitHub
commit a6d3bd0ab7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 146 additions and 13 deletions

View file

@ -104,7 +104,7 @@ spec:
imagePullPolicy: {{ .Values.image.pullPolicy }}
env:
- name: HOST
value: "::"
value: "{{ .Values.listen | default "0.0.0.0" }}"
- name: PORT
value: {{ .Values.service.port | quote}}
{{- if .Values.db.deployStandalone }}
@ -138,8 +138,13 @@ spec:
- name: DATABASE_NAME
value: {{ .Values.db.database }}
{{- end }}
{{- if .Values.database.url }}
- name: DATABASE_URL
value: {{ .Values.database.url | quote }}
{{- else }}
- name: DATABASE_URL
value: "postgresql://$(DATABASE_USERNAME):$(DATABASE_PASSWORD)@$(DATABASE_HOST)/$(DATABASE_NAME)"
{{- end }}
- name: PROXY_MASTER_KEY
valueFrom:
secretKeyRef:
@ -231,4 +236,4 @@ spec:
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}

View file

@ -208,8 +208,8 @@ curl -X POST 'http://0.0.0.0:4000/key/generate' \
-d '{
"metadata": {
"logging": [{
"callback_name": "langfuse", # 'otel', 'langfuse', 'lunary'
"callback_type": "success" # set, if required by integration - future improvement, have logging tools work for success + failure by default
"callback_name": "langfuse", # "otel", "langfuse", "lunary"
"callback_type": "success", # "success", "failure", "success_and_failure"
"callback_vars": {
"langfuse_public_key": "os.environ/LANGFUSE_PUBLIC_KEY", # [RECOMMENDED] reference key in proxy environment
"langfuse_secret_key": "os.environ/LANGFUSE_SECRET_KEY", # [RECOMMENDED] reference key in proxy environment

View file

@ -641,7 +641,9 @@ class OpenTelemetry(CustomLogger):
return BatchSpanProcessor(
OTLPSpanExporterHTTP(
endpoint=self.OTEL_ENDPOINT, headers=_split_otel_headers
)
),
max_queue_size=100,
max_export_batch_size=100,
)
elif self.OTEL_EXPORTER == "otlp_grpc":
verbose_logger.debug(
@ -651,7 +653,9 @@ class OpenTelemetry(CustomLogger):
return BatchSpanProcessor(
OTLPSpanExporterGRPC(
endpoint=self.OTEL_ENDPOINT, headers=_split_otel_headers
)
),
max_queue_size=100,
max_export_batch_size=100,
)
else:
verbose_logger.debug(

View file

@ -228,6 +228,16 @@ def initialize_callbacks_on_proxy(
litellm.callbacks.extend(imported_list)
else:
litellm.callbacks = imported_list # type: ignore
if "prometheus" in value:
from litellm.proxy.proxy_server import app
verbose_proxy_logger.debug("Starting Prometheus Metrics on /metrics")
from prometheus_client import make_asgi_app
# Add prometheus asgi middleware to route /metrics requests
metrics_app = make_asgi_app()
app.mount("/metrics", metrics_app)
else:
litellm.callbacks = [
get_instance_fn(

View file

@ -86,10 +86,11 @@ def convert_key_logging_metadata_to_callback(
team_callback_settings_obj.success_callback = []
if team_callback_settings_obj.failure_callback is None:
team_callback_settings_obj.failure_callback = []
if data.callback_name not in team_callback_settings_obj.success_callback:
team_callback_settings_obj.success_callback.append(data.callback_name)
if data.callback_name in team_callback_settings_obj.failure_callback:
if data.callback_name not in team_callback_settings_obj.failure_callback:
team_callback_settings_obj.failure_callback.append(data.callback_name)
for var, value in data.callback_vars.items():

View file

@ -14,9 +14,17 @@ model_list:
id: "test-openai"
router_settings:
enable_tag_filtering: True # 👈 Key Chang
general_settings:
master_key: sk-1234
alerting: ["slack"]
spend_report_frequency: "1d"
litellm_settings:
success_callback: ["prometheus"]
failure_callback: ["prometheus"]

View file

@ -3690,7 +3690,7 @@ class Router:
exception=original_exception,
)
allowed_fails = _allowed_fails or self.allowed_fails
allowed_fails = _allowed_fails if _allowed_fails is not None else self.allowed_fails
dt = get_utc_datetime()
current_minute = dt.strftime("%H-%M")

View file

@ -9,7 +9,7 @@ import logging
import uuid
import pytest
from prometheus_client import REGISTRY
from prometheus_client import REGISTRY, CollectorRegistry
import litellm
from litellm import completion
@ -79,3 +79,91 @@ async def test_async_prometheus_success_logging():
assert metrics["litellm_deployment_success_responses_total"] == 1.0
assert metrics["litellm_deployment_total_requests_total"] == 1.0
assert metrics["litellm_deployment_latency_per_output_token_bucket"] == 1.0
@pytest.mark.asyncio()
async def test_async_prometheus_success_logging_with_callbacks():
run_id = str(uuid.uuid4())
litellm.set_verbose = True
litellm.success_callback = []
litellm.failure_callback = []
litellm.callbacks = ["prometheus"]
# Get initial metric values
initial_metrics = {}
for metric in REGISTRY.collect():
for sample in metric.samples:
initial_metrics[sample.name] = sample.value
response = await litellm.acompletion(
model="claude-instant-1.2",
messages=[{"role": "user", "content": "what llm are u"}],
max_tokens=10,
mock_response="hi",
temperature=0.2,
metadata={
"id": run_id,
"tags": ["tag1", "tag2"],
"user_api_key": "6eb81e014497d89f3cc1aa9da7c2b37bda6b7fea68e4b710d33d94201e68970c",
"user_api_key_alias": "ishaans-prometheus-key",
"user_api_end_user_max_budget": None,
"litellm_api_version": "1.40.19",
"global_max_parallel_requests": None,
"user_api_key_user_id": "admin",
"user_api_key_org_id": None,
"user_api_key_team_id": "dbe2f686-a686-4896-864a-4c3924458709",
"user_api_key_team_alias": "testing-team",
},
)
print(response)
await asyncio.sleep(3)
# get prometheus logger
from litellm.litellm_core_utils.litellm_logging import _in_memory_loggers
for callback in _in_memory_loggers:
if isinstance(callback, PrometheusLogger):
test_prometheus_logger = callback
print("done with success request")
print(
"vars of test_prometheus_logger",
vars(test_prometheus_logger.litellm_requests_metric),
)
# Get the updated metrics
updated_metrics = {}
for metric in REGISTRY.collect():
for sample in metric.samples:
updated_metrics[sample.name] = sample.value
print("metrics from prometheus", updated_metrics)
# Assert the delta for each metric
assert (
updated_metrics["litellm_requests_metric_total"]
- initial_metrics.get("litellm_requests_metric_total", 0)
== 1.0
)
assert (
updated_metrics["litellm_total_tokens_total"]
- initial_metrics.get("litellm_total_tokens_total", 0)
== 30.0
)
assert (
updated_metrics["litellm_deployment_success_responses_total"]
- initial_metrics.get("litellm_deployment_success_responses_total", 0)
== 1.0
)
assert (
updated_metrics["litellm_deployment_total_requests_total"]
- initial_metrics.get("litellm_deployment_total_requests_total", 0)
== 1.0
)
assert (
updated_metrics["litellm_deployment_latency_per_output_token_bucket"]
- initial_metrics.get("litellm_deployment_latency_per_output_token_bucket", 0)
== 1.0
)

View file

@ -1255,7 +1255,17 @@ async def test_add_callback_via_key(prisma_client):
@pytest.mark.asyncio
async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client):
@pytest.mark.parametrize(
"callback_type, expected_success_callbacks, expected_failure_callbacks",
[
("success", ["langfuse"], []),
("failure", [], ["langfuse"]),
("success_and_failure", ["langfuse"], ["langfuse"]),
],
)
async def test_add_callback_via_key_litellm_pre_call_utils(
prisma_client, callback_type, expected_success_callbacks, expected_failure_callbacks
):
import json
from fastapi import HTTPException, Request, Response
@ -1312,7 +1322,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client):
"logging": [
{
"callback_name": "langfuse",
"callback_type": "success",
"callback_type": callback_type,
"callback_vars": {
"langfuse_public_key": "my-mock-public-key",
"langfuse_secret_key": "my-mock-secret-key",
@ -1359,14 +1369,21 @@ async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client):
}
new_data = await add_litellm_data_to_request(**data)
print("NEW DATA: {}".format(new_data))
assert "success_callback" in new_data
assert new_data["success_callback"] == ["langfuse"]
assert "langfuse_public_key" in new_data
assert new_data["langfuse_public_key"] == "my-mock-public-key"
assert "langfuse_secret_key" in new_data
assert new_data["langfuse_secret_key"] == "my-mock-secret-key"
if expected_success_callbacks:
assert "success_callback" in new_data
assert new_data["success_callback"] == expected_success_callbacks
if expected_failure_callbacks:
assert "failure_callback" in new_data
assert new_data["failure_callback"] == expected_failure_callbacks
@pytest.mark.asyncio
async def test_gemini_pass_through_endpoint():