Merge branch 'main' into litellm_fix_using_wildcard_openai_models_proxy

2024-04-15 14:35:06 -07:00 · 2024-04-15 14:35:06 -07:00 · adae555fb1
commit adae555fb1
parent 6df5337e65 33cf173405
6 changed files with 236 additions and 2 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -199,6 +199,10 @@ jobs:
              -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
              -e AWS_REGION_NAME=$AWS_REGION_NAME \
              -e OPENAI_API_KEY=$OPENAI_API_KEY \
              -e LANGFUSE_PROJECT1_PUBLIC=$LANGFUSE_PROJECT1_PUBLIC \
              -e LANGFUSE_PROJECT2_PUBLIC=$LANGFUSE_PROJECT2_PUBLIC \
              -e LANGFUSE_PROJECT1_SECRET=$LANGFUSE_PROJECT1_SECRET \
              -e LANGFUSE_PROJECT2_SECRET=$LANGFUSE_PROJECT2_SECRET \
              --name my-app \
              -v $(pwd)/proxy_server_config.yaml:/app/config.yaml \
              my-app:latest \
--- a/docs/my-website/docs/proxy/logging.md
+++ b/docs/my-website/docs/proxy/logging.md
@ -9,9 +9,9 @@ Log Proxy Input, Output, Exceptions using Custom Callbacks, Langfuse, OpenTeleme
 - [Async Custom Callbacks](#custom-callback-class-async)
 - [Async Custom Callback APIs](#custom-callback-apis-async)
 - [Logging to DataDog](#logging-proxy-inputoutput---datadog)
 - [Logging to Langfuse](#logging-proxy-inputoutput---langfuse)
 - [Logging to s3 Buckets](#logging-proxy-inputoutput---s3-buckets)
 - [Logging to DataDog](#logging-proxy-inputoutput---datadog)
 - [Logging to DynamoDB](#logging-proxy-inputoutput---dynamodb)
 - [Logging to Sentry](#logging-proxy-inputoutput---sentry)
 - [Logging to Traceloop (OpenTelemetry)](#logging-proxy-inputoutput-traceloop-opentelemetry)
@ -539,6 +539,36 @@ print(response)
 </Tabs>
 ### Team based Logging to Langfuse
 **Example:**
 This config would send langfuse logs to 2 different langfuse projects, based on the team id 
 ```yaml
 litellm_settings:
  default_team_settings: 
    - team_id: my-secret-project
      success_callback: ["langfuse"]
      langfuse_public_key: os.environ/LANGFUSE_PUB_KEY_1 # Project 1
      langfuse_secret: os.environ/LANGFUSE_PRIVATE_KEY_1 # Project 1
    - team_id: ishaans-secret-project
      success_callback: ["langfuse"]
      langfuse_public_key: os.environ/LANGFUSE_PUB_KEY_2 # Project 2
      langfuse_secret: os.environ/LANGFUSE_SECRET_2 # Project 2
 ```
 Now, when you [generate keys](./virtual_keys.md) for this team-id 
 ```bash
 curl -X POST 'http://0.0.0.0:4000/key/generate' \
 -H 'Authorization: Bearer sk-1234' \
 -H 'Content-Type: application/json' \
 -d '{"team_id": "ishaans-secret-project"}'
 ```
 All requests made with these keys will log data to their team-specific logging.
 ## Logging Proxy Input/Output - DataDog
 We will use the `--config` to set `litellm.success_callback = ["datadog"]` this will log all successfull LLM calls to DataDog
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -8,6 +8,19 @@ model_list:
    litellm_params:
      model: openai/*
      api_key: os.environ/OPENAI_API_KEY
 litellm_settings:
  default_team_settings: 
    - team_id: team-1
      success_callback: ["langfuse"]
      langfuse_public_key: os.environ/LANGFUSE_PROJECT1_PUBLIC # Project 1
      langfuse_secret: os.environ/LANGFUSE_PROJECT1_SECRET # Project 1
    - team_id: team-2
      success_callback: ["langfuse"]
      langfuse_public_key: os.environ/LANGFUSE_PROJECT2_PUBLIC # Project 2
      langfuse_secret: os.environ/LANGFUSE_PROJECT2_SECRET # Project 2
 general_settings:
  store_model_in_db: true
  master_key: sk-1234
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -1900,6 +1900,11 @@ class ProxyConfig:
                    param_name = getattr(response, "param_name", None)
                    param_value = getattr(response, "param_value", None)
                    if param_name is not None and param_value is not None:
                        # check if param_name is already in the config
                        if param_name in config:
                            if isinstance(config[param_name], dict):
                                config[param_name].update(param_value)
                            else:
                                config[param_name] = param_value
        return config
--- a/proxy_server_config.yaml
+++ b/proxy_server_config.yaml
@ -70,6 +70,15 @@ litellm_settings:
  request_timeout: 600
  telemetry: False
  context_window_fallbacks: [{"gpt-3.5-turbo": ["gpt-3.5-turbo-large"]}]
  default_team_settings: 
    - team_id: team-1
      success_callback: ["langfuse"]
      langfuse_public_key: os.environ/LANGFUSE_PROJECT1_PUBLIC # Project 1
      langfuse_secret: os.environ/LANGFUSE_PROJECT1_SECRET # Project 1
    - team_id: team-2
      success_callback: ["langfuse"]
      langfuse_public_key: os.environ/LANGFUSE_PROJECT2_PUBLIC # Project 2
      langfuse_secret: os.environ/LANGFUSE_PROJECT2_SECRET # Project 2
 router_settings:
  routing_strategy: usage-based-routing-v2 
--- a/tests/test_team_logging.py
+++ b/tests/test_team_logging.py
@ -0,0 +1,173 @@
 # What this tests ?
 ## Tests /models and /model/* endpoints
 import pytest
 import asyncio
 import aiohttp
 import os
 import dotenv
 from dotenv import load_dotenv
 import pytest
 load_dotenv()
 async def generate_key(session, models=[], team_id=None):
    url = "http://0.0.0.0:4000/key/generate"
    headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
    data = {
        "models": models,
        "duration": None,
        "team_id": team_id,
    }
    async with session.post(url, headers=headers, json=data) as response:
        status = response.status
        response_text = await response.text()
        print(response_text)
        print()
        if status != 200:
            raise Exception(f"Request did not return a 200 status code: {status}")
        return await response.json()
 async def chat_completion(session, key, model="azure-gpt-3.5", request_metadata=None):
    url = "http://0.0.0.0:4000/chat/completions"
    headers = {
        "Authorization": f"Bearer {key}",
        "Content-Type": "application/json",
    }
    data = {
        "model": model,
        "messages": [
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": "Hello!"},
        ],
        "metadata": request_metadata,
    }
    print("data sent in test=", data)
    async with session.post(url, headers=headers, json=data) as response:
        status = response.status
        response_text = await response.text()
        print(response_text)
        print()
        if status != 200:
            raise Exception(f"Request did not return a 200 status code: {status}")
@pytest.mark.asyncio
 async def test_team_logging():
    """
    -> Team 1 logs to project 1
    -> Create Key
    -> Make chat/completions call
    -> Fetch logs from langfuse
    """
    try:
        async with aiohttp.ClientSession() as session:
            key = await generate_key(
                session, models=["fake-openai-endpoint"], team_id="team-1"
            )  # team-1 logs to project 1
            import uuid
            _trace_id = f"trace-{uuid.uuid4()}"
            _request_metadata = {
                "trace_id": _trace_id,
            }
            await chat_completion(
                session,
                key["key"],
                model="fake-openai-endpoint",
                request_metadata=_request_metadata,
            )
            # Test - if the logs were sent to the correct team on langfuse
            import langfuse
            langfuse_client = langfuse.Langfuse(
                public_key=os.getenv("LANGFUSE_PROJECT1_PUBLIC"),
                secret_key=os.getenv("LANGFUSE_PROJECT1_SECRET"),
            )
            await asyncio.sleep(10)
            print(f"searching for trace_id={_trace_id} on langfuse")
            generations = langfuse_client.get_generations(trace_id=_trace_id).data
            print(generations)
            assert len(generations) == 1
    except Exception as e:
        pytest.fail(f"Unexpected error: {str(e)}")
@pytest.mark.asyncio
 async def test_team_2logging():
    """
    -> Team 1 logs to project 2
    -> Create Key
    -> Make chat/completions call
    -> Fetch logs from langfuse
    """
    try:
        async with aiohttp.ClientSession() as session:
            key = await generate_key(
                session, models=["fake-openai-endpoint"], team_id="team-2"
            )  # team-1 logs to project 1
            import uuid
            _trace_id = f"trace-{uuid.uuid4()}"
            _request_metadata = {
                "trace_id": _trace_id,
            }
            await chat_completion(
                session,
                key["key"],
                model="fake-openai-endpoint",
                request_metadata=_request_metadata,
            )
            # Test - if the logs were sent to the correct team on langfuse
            import langfuse
            langfuse_client = langfuse.Langfuse(
                public_key=os.getenv("LANGFUSE_PROJECT2_PUBLIC"),
                secret_key=os.getenv("LANGFUSE_PROJECT2_SECRET"),
            )
            await asyncio.sleep(10)
            print(f"searching for trace_id={_trace_id} on langfuse")
            generations = langfuse_client.get_generations(trace_id=_trace_id).data
            print("Team 2 generations", generations)
            # team-2 should have 1 generation with this trace id
            assert len(generations) == 1
            # team-1 should have 0 generations with this trace id
            langfuse_client_1 = langfuse.Langfuse(
                public_key=os.getenv("LANGFUSE_PROJECT1_PUBLIC"),
                secret_key=os.getenv("LANGFUSE_PROJECT1_SECRET"),
            )
            generations_team_1 = langfuse_client_1.get_generations(
                trace_id=_trace_id
            ).data
            print("Team 1 generations", generations_team_1)
            assert len(generations_team_1) == 0
    except Exception as e:
        pytest.fail("Team 2 logging failed: " + str(e))