forked from phoenix/litellm-mirror
Merge branch 'main' into litellm_tag_routing_fixes
This commit is contained in:
commit
a6d3bd0ab7
9 changed files with 146 additions and 13 deletions
|
@ -104,7 +104,7 @@ spec:
|
||||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||||
env:
|
env:
|
||||||
- name: HOST
|
- name: HOST
|
||||||
value: "::"
|
value: "{{ .Values.listen | default "0.0.0.0" }}"
|
||||||
- name: PORT
|
- name: PORT
|
||||||
value: {{ .Values.service.port | quote}}
|
value: {{ .Values.service.port | quote}}
|
||||||
{{- if .Values.db.deployStandalone }}
|
{{- if .Values.db.deployStandalone }}
|
||||||
|
@ -138,8 +138,13 @@ spec:
|
||||||
- name: DATABASE_NAME
|
- name: DATABASE_NAME
|
||||||
value: {{ .Values.db.database }}
|
value: {{ .Values.db.database }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
{{- if .Values.database.url }}
|
||||||
|
- name: DATABASE_URL
|
||||||
|
value: {{ .Values.database.url | quote }}
|
||||||
|
{{- else }}
|
||||||
- name: DATABASE_URL
|
- name: DATABASE_URL
|
||||||
value: "postgresql://$(DATABASE_USERNAME):$(DATABASE_PASSWORD)@$(DATABASE_HOST)/$(DATABASE_NAME)"
|
value: "postgresql://$(DATABASE_USERNAME):$(DATABASE_PASSWORD)@$(DATABASE_HOST)/$(DATABASE_NAME)"
|
||||||
|
{{- end }}
|
||||||
- name: PROXY_MASTER_KEY
|
- name: PROXY_MASTER_KEY
|
||||||
valueFrom:
|
valueFrom:
|
||||||
secretKeyRef:
|
secretKeyRef:
|
||||||
|
|
|
@ -208,8 +208,8 @@ curl -X POST 'http://0.0.0.0:4000/key/generate' \
|
||||||
-d '{
|
-d '{
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"logging": [{
|
"logging": [{
|
||||||
"callback_name": "langfuse", # 'otel', 'langfuse', 'lunary'
|
"callback_name": "langfuse", # "otel", "langfuse", "lunary"
|
||||||
"callback_type": "success" # set, if required by integration - future improvement, have logging tools work for success + failure by default
|
"callback_type": "success", # "success", "failure", "success_and_failure"
|
||||||
"callback_vars": {
|
"callback_vars": {
|
||||||
"langfuse_public_key": "os.environ/LANGFUSE_PUBLIC_KEY", # [RECOMMENDED] reference key in proxy environment
|
"langfuse_public_key": "os.environ/LANGFUSE_PUBLIC_KEY", # [RECOMMENDED] reference key in proxy environment
|
||||||
"langfuse_secret_key": "os.environ/LANGFUSE_SECRET_KEY", # [RECOMMENDED] reference key in proxy environment
|
"langfuse_secret_key": "os.environ/LANGFUSE_SECRET_KEY", # [RECOMMENDED] reference key in proxy environment
|
||||||
|
|
|
@ -641,7 +641,9 @@ class OpenTelemetry(CustomLogger):
|
||||||
return BatchSpanProcessor(
|
return BatchSpanProcessor(
|
||||||
OTLPSpanExporterHTTP(
|
OTLPSpanExporterHTTP(
|
||||||
endpoint=self.OTEL_ENDPOINT, headers=_split_otel_headers
|
endpoint=self.OTEL_ENDPOINT, headers=_split_otel_headers
|
||||||
)
|
),
|
||||||
|
max_queue_size=100,
|
||||||
|
max_export_batch_size=100,
|
||||||
)
|
)
|
||||||
elif self.OTEL_EXPORTER == "otlp_grpc":
|
elif self.OTEL_EXPORTER == "otlp_grpc":
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
|
@ -651,7 +653,9 @@ class OpenTelemetry(CustomLogger):
|
||||||
return BatchSpanProcessor(
|
return BatchSpanProcessor(
|
||||||
OTLPSpanExporterGRPC(
|
OTLPSpanExporterGRPC(
|
||||||
endpoint=self.OTEL_ENDPOINT, headers=_split_otel_headers
|
endpoint=self.OTEL_ENDPOINT, headers=_split_otel_headers
|
||||||
)
|
),
|
||||||
|
max_queue_size=100,
|
||||||
|
max_export_batch_size=100,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
|
|
|
@ -228,6 +228,16 @@ def initialize_callbacks_on_proxy(
|
||||||
litellm.callbacks.extend(imported_list)
|
litellm.callbacks.extend(imported_list)
|
||||||
else:
|
else:
|
||||||
litellm.callbacks = imported_list # type: ignore
|
litellm.callbacks = imported_list # type: ignore
|
||||||
|
|
||||||
|
if "prometheus" in value:
|
||||||
|
from litellm.proxy.proxy_server import app
|
||||||
|
|
||||||
|
verbose_proxy_logger.debug("Starting Prometheus Metrics on /metrics")
|
||||||
|
from prometheus_client import make_asgi_app
|
||||||
|
|
||||||
|
# Add prometheus asgi middleware to route /metrics requests
|
||||||
|
metrics_app = make_asgi_app()
|
||||||
|
app.mount("/metrics", metrics_app)
|
||||||
else:
|
else:
|
||||||
litellm.callbacks = [
|
litellm.callbacks = [
|
||||||
get_instance_fn(
|
get_instance_fn(
|
||||||
|
|
|
@ -86,10 +86,11 @@ def convert_key_logging_metadata_to_callback(
|
||||||
team_callback_settings_obj.success_callback = []
|
team_callback_settings_obj.success_callback = []
|
||||||
if team_callback_settings_obj.failure_callback is None:
|
if team_callback_settings_obj.failure_callback is None:
|
||||||
team_callback_settings_obj.failure_callback = []
|
team_callback_settings_obj.failure_callback = []
|
||||||
|
|
||||||
if data.callback_name not in team_callback_settings_obj.success_callback:
|
if data.callback_name not in team_callback_settings_obj.success_callback:
|
||||||
team_callback_settings_obj.success_callback.append(data.callback_name)
|
team_callback_settings_obj.success_callback.append(data.callback_name)
|
||||||
|
|
||||||
if data.callback_name in team_callback_settings_obj.failure_callback:
|
if data.callback_name not in team_callback_settings_obj.failure_callback:
|
||||||
team_callback_settings_obj.failure_callback.append(data.callback_name)
|
team_callback_settings_obj.failure_callback.append(data.callback_name)
|
||||||
|
|
||||||
for var, value in data.callback_vars.items():
|
for var, value in data.callback_vars.items():
|
||||||
|
|
|
@ -14,9 +14,17 @@ model_list:
|
||||||
id: "test-openai"
|
id: "test-openai"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
router_settings:
|
router_settings:
|
||||||
enable_tag_filtering: True # 👈 Key Chang
|
enable_tag_filtering: True # 👈 Key Chang
|
||||||
|
|
||||||
|
|
||||||
|
general_settings:
|
||||||
|
master_key: sk-1234
|
||||||
|
alerting: ["slack"]
|
||||||
|
spend_report_frequency: "1d"
|
||||||
|
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
success_callback: ["prometheus"]
|
success_callback: ["prometheus"]
|
||||||
failure_callback: ["prometheus"]
|
failure_callback: ["prometheus"]
|
|
@ -3690,7 +3690,7 @@ class Router:
|
||||||
exception=original_exception,
|
exception=original_exception,
|
||||||
)
|
)
|
||||||
|
|
||||||
allowed_fails = _allowed_fails or self.allowed_fails
|
allowed_fails = _allowed_fails if _allowed_fails is not None else self.allowed_fails
|
||||||
|
|
||||||
dt = get_utc_datetime()
|
dt = get_utc_datetime()
|
||||||
current_minute = dt.strftime("%H-%M")
|
current_minute = dt.strftime("%H-%M")
|
||||||
|
|
|
@ -9,7 +9,7 @@ import logging
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from prometheus_client import REGISTRY
|
from prometheus_client import REGISTRY, CollectorRegistry
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
|
@ -79,3 +79,91 @@ async def test_async_prometheus_success_logging():
|
||||||
assert metrics["litellm_deployment_success_responses_total"] == 1.0
|
assert metrics["litellm_deployment_success_responses_total"] == 1.0
|
||||||
assert metrics["litellm_deployment_total_requests_total"] == 1.0
|
assert metrics["litellm_deployment_total_requests_total"] == 1.0
|
||||||
assert metrics["litellm_deployment_latency_per_output_token_bucket"] == 1.0
|
assert metrics["litellm_deployment_latency_per_output_token_bucket"] == 1.0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio()
|
||||||
|
async def test_async_prometheus_success_logging_with_callbacks():
|
||||||
|
run_id = str(uuid.uuid4())
|
||||||
|
litellm.set_verbose = True
|
||||||
|
|
||||||
|
litellm.success_callback = []
|
||||||
|
litellm.failure_callback = []
|
||||||
|
litellm.callbacks = ["prometheus"]
|
||||||
|
|
||||||
|
# Get initial metric values
|
||||||
|
initial_metrics = {}
|
||||||
|
for metric in REGISTRY.collect():
|
||||||
|
for sample in metric.samples:
|
||||||
|
initial_metrics[sample.name] = sample.value
|
||||||
|
|
||||||
|
response = await litellm.acompletion(
|
||||||
|
model="claude-instant-1.2",
|
||||||
|
messages=[{"role": "user", "content": "what llm are u"}],
|
||||||
|
max_tokens=10,
|
||||||
|
mock_response="hi",
|
||||||
|
temperature=0.2,
|
||||||
|
metadata={
|
||||||
|
"id": run_id,
|
||||||
|
"tags": ["tag1", "tag2"],
|
||||||
|
"user_api_key": "6eb81e014497d89f3cc1aa9da7c2b37bda6b7fea68e4b710d33d94201e68970c",
|
||||||
|
"user_api_key_alias": "ishaans-prometheus-key",
|
||||||
|
"user_api_end_user_max_budget": None,
|
||||||
|
"litellm_api_version": "1.40.19",
|
||||||
|
"global_max_parallel_requests": None,
|
||||||
|
"user_api_key_user_id": "admin",
|
||||||
|
"user_api_key_org_id": None,
|
||||||
|
"user_api_key_team_id": "dbe2f686-a686-4896-864a-4c3924458709",
|
||||||
|
"user_api_key_team_alias": "testing-team",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
print(response)
|
||||||
|
await asyncio.sleep(3)
|
||||||
|
|
||||||
|
# get prometheus logger
|
||||||
|
from litellm.litellm_core_utils.litellm_logging import _in_memory_loggers
|
||||||
|
|
||||||
|
for callback in _in_memory_loggers:
|
||||||
|
if isinstance(callback, PrometheusLogger):
|
||||||
|
test_prometheus_logger = callback
|
||||||
|
|
||||||
|
print("done with success request")
|
||||||
|
|
||||||
|
print(
|
||||||
|
"vars of test_prometheus_logger",
|
||||||
|
vars(test_prometheus_logger.litellm_requests_metric),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get the updated metrics
|
||||||
|
updated_metrics = {}
|
||||||
|
for metric in REGISTRY.collect():
|
||||||
|
for sample in metric.samples:
|
||||||
|
updated_metrics[sample.name] = sample.value
|
||||||
|
|
||||||
|
print("metrics from prometheus", updated_metrics)
|
||||||
|
|
||||||
|
# Assert the delta for each metric
|
||||||
|
assert (
|
||||||
|
updated_metrics["litellm_requests_metric_total"]
|
||||||
|
- initial_metrics.get("litellm_requests_metric_total", 0)
|
||||||
|
== 1.0
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
updated_metrics["litellm_total_tokens_total"]
|
||||||
|
- initial_metrics.get("litellm_total_tokens_total", 0)
|
||||||
|
== 30.0
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
updated_metrics["litellm_deployment_success_responses_total"]
|
||||||
|
- initial_metrics.get("litellm_deployment_success_responses_total", 0)
|
||||||
|
== 1.0
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
updated_metrics["litellm_deployment_total_requests_total"]
|
||||||
|
- initial_metrics.get("litellm_deployment_total_requests_total", 0)
|
||||||
|
== 1.0
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
updated_metrics["litellm_deployment_latency_per_output_token_bucket"]
|
||||||
|
- initial_metrics.get("litellm_deployment_latency_per_output_token_bucket", 0)
|
||||||
|
== 1.0
|
||||||
|
)
|
||||||
|
|
|
@ -1255,7 +1255,17 @@ async def test_add_callback_via_key(prisma_client):
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client):
|
@pytest.mark.parametrize(
|
||||||
|
"callback_type, expected_success_callbacks, expected_failure_callbacks",
|
||||||
|
[
|
||||||
|
("success", ["langfuse"], []),
|
||||||
|
("failure", [], ["langfuse"]),
|
||||||
|
("success_and_failure", ["langfuse"], ["langfuse"]),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
async def test_add_callback_via_key_litellm_pre_call_utils(
|
||||||
|
prisma_client, callback_type, expected_success_callbacks, expected_failure_callbacks
|
||||||
|
):
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from fastapi import HTTPException, Request, Response
|
from fastapi import HTTPException, Request, Response
|
||||||
|
@ -1312,7 +1322,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client):
|
||||||
"logging": [
|
"logging": [
|
||||||
{
|
{
|
||||||
"callback_name": "langfuse",
|
"callback_name": "langfuse",
|
||||||
"callback_type": "success",
|
"callback_type": callback_type,
|
||||||
"callback_vars": {
|
"callback_vars": {
|
||||||
"langfuse_public_key": "my-mock-public-key",
|
"langfuse_public_key": "my-mock-public-key",
|
||||||
"langfuse_secret_key": "my-mock-secret-key",
|
"langfuse_secret_key": "my-mock-secret-key",
|
||||||
|
@ -1359,14 +1369,21 @@ async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client):
|
||||||
}
|
}
|
||||||
|
|
||||||
new_data = await add_litellm_data_to_request(**data)
|
new_data = await add_litellm_data_to_request(**data)
|
||||||
|
print("NEW DATA: {}".format(new_data))
|
||||||
|
|
||||||
assert "success_callback" in new_data
|
|
||||||
assert new_data["success_callback"] == ["langfuse"]
|
|
||||||
assert "langfuse_public_key" in new_data
|
assert "langfuse_public_key" in new_data
|
||||||
assert new_data["langfuse_public_key"] == "my-mock-public-key"
|
assert new_data["langfuse_public_key"] == "my-mock-public-key"
|
||||||
assert "langfuse_secret_key" in new_data
|
assert "langfuse_secret_key" in new_data
|
||||||
assert new_data["langfuse_secret_key"] == "my-mock-secret-key"
|
assert new_data["langfuse_secret_key"] == "my-mock-secret-key"
|
||||||
|
|
||||||
|
if expected_success_callbacks:
|
||||||
|
assert "success_callback" in new_data
|
||||||
|
assert new_data["success_callback"] == expected_success_callbacks
|
||||||
|
|
||||||
|
if expected_failure_callbacks:
|
||||||
|
assert "failure_callback" in new_data
|
||||||
|
assert new_data["failure_callback"] == expected_failure_callbacks
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_gemini_pass_through_endpoint():
|
async def test_gemini_pass_through_endpoint():
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue