LiteLLM Minor Fixes & Improvements (10/10/2024) (#6158)

* refactor(vertex_ai_partner_models/anthropic): refactor anthropic to use partner model logic * fix(vertex_ai/): support passing custom api base to partner models Fixes https://github.com/BerriAI/litellm/issues/4317 * fix(proxy_server.py): Fix prometheus premium user check logic * docs(prometheus.md): update quick start docs * fix(custom_llm.py): support passing dynamic api key + api base * fix(realtime_api/main.py): Add request/response logging for realtime api endpoints Closes https://github.com/BerriAI/litellm/issues/6081 * feat(openai/realtime): add openai realtime api logging Closes https://github.com/BerriAI/litellm/issues/6081 * fix(realtime_streaming.py): fix linting errors * fix(realtime_streaming.py): fix linting errors * fix: fix linting errors * fix pattern match router * Add literalai in the sidebar observability category (#6163) * fix: add literalai in the sidebar * fix: typo * update (#6160) * Feat: Add Langtrace integration (#5341) * Feat: Add Langtrace integration * add langtrace service name * fix timestamps for traces * add tests * Discard Callback + use existing otel logger * cleanup * remove print statments * remove callback * add docs * docs * add logging docs * format logging * remove emoji and add litellm proxy example * format logging * format `logging.md` * add langtrace docs to logging.md * sync conflict * docs fix * (perf) move s3 logging to Batch logging + async [94% faster perf under 100 RPS on 1 litellm instance] (#6165) * fix move s3 to use customLogger * add basic s3 logging test * add s3 to custom logger compatible * use batch logger for s3 * s3 set flush interval and batch size * fix s3 logging * add notes on s3 logging * fix s3 logging * add basic s3 logging test * fix s3 type errors * add test for sync logging on s3 * fix: fix to debug log --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Willy Douhard <willy.douhard@gmail.com> Co-authored-by: yujonglee <yujonglee.dev@gmail.com> Co-authored-by: Ali Waleed <ali@scale3labs.com>
2024-10-11 23:04:36 -07:00 · 2024-10-11 23:04:36 -07:00 · 11f9df923a
commit 11f9df923a
parent 9db4ccca9f
28 changed files with 966 additions and 760 deletions
--- a/tests/llm_translation/test_max_completion_tokens.py
+++ b/tests/llm_translation/test_max_completion_tokens.py
@ -296,7 +296,7 @@ def test_all_model_configs():
        optional_params={},
    ) == {"max_tokens": 10}

-    from litellm.llms.vertex_ai_and_google_ai_studio.vertex_ai_anthropic import (
+    from litellm.llms.vertex_ai_and_google_ai_studio.vertex_ai_partner_models.anthropic.transformation import (
        VertexAIAnthropicConfig,
    )

--- a/tests/local_testing/test_amazing_s3_logs.py
+++ b/tests/local_testing/test_amazing_s3_logs.py
@ -12,7 +12,70 @@ import litellm
 litellm.num_retries = 3

 import time, random
+from litellm._logging import verbose_logger
+import logging
 import pytest
+import boto3
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("sync_mode", [True, False])
+async def test_basic_s3_logging(sync_mode):
+    verbose_logger.setLevel(level=logging.DEBUG)
+    litellm.success_callback = ["s3"]
+    litellm.s3_callback_params = {
+        "s3_bucket_name": "load-testing-oct",
+        "s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY",
+        "s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID",
+        "s3_region_name": "us-west-2",
+    }
+    litellm.set_verbose = True
+
+    if sync_mode is True:
+        response = litellm.completion(
+            model="gpt-3.5-turbo",
+            messages=[{"role": "user", "content": "This is a test"}],
+            mock_response="It's simple to use and easy to get started",
+        )
+    else:
+        response = await litellm.acompletion(
+            model="gpt-3.5-turbo",
+            messages=[{"role": "user", "content": "This is a test"}],
+            mock_response="It's simple to use and easy to get started",
+        )
+    print(f"response: {response}")
+
+    await asyncio.sleep(12)
+
+    total_objects, all_s3_keys = list_all_s3_objects("load-testing-oct")
+
+    # assert that atlest one key has response.id in it
+    assert any(response.id in key for key in all_s3_keys)
+    s3 = boto3.client("s3")
+    # delete all objects
+    for key in all_s3_keys:
+        s3.delete_object(Bucket="load-testing-oct", Key=key)
+
+
+def list_all_s3_objects(bucket_name):
+    s3 = boto3.client("s3")
+
+    all_s3_keys = []
+
+    paginator = s3.get_paginator("list_objects_v2")
+    total_objects = 0
+
+    for page in paginator.paginate(Bucket=bucket_name):
+        if "Contents" in page:
+            total_objects += len(page["Contents"])
+            all_s3_keys.extend([obj["Key"] for obj in page["Contents"]])
+
+    print(f"Total number of objects in {bucket_name}: {total_objects}")
+    print(all_s3_keys)
+    return total_objects, all_s3_keys
+
+
+list_all_s3_objects("load-testing-oct")


@pytest.mark.skip(reason="AWS Suspended Account")
--- a/tests/local_testing/test_amazing_vertex_completion.py
+++ b/tests/local_testing/test_amazing_vertex_completion.py
@ -1616,9 +1616,11 @@ async def test_gemini_pro_json_schema_args_sent_httpx_openai_schema(
                )


-@pytest.mark.parametrize("provider", ["vertex_ai_beta"])  # "vertex_ai",
+@pytest.mark.parametrize(
+    "model", ["gemini-1.5-flash", "claude-3-sonnet@20240229"]
+)  # "vertex_ai",
@pytest.mark.asyncio
-async def test_gemini_pro_httpx_custom_api_base(provider):
+async def test_gemini_pro_httpx_custom_api_base(model):
    load_vertex_ai_credentials()
    litellm.set_verbose = True
    messages = [
@ -1634,7 +1636,7 @@ async def test_gemini_pro_httpx_custom_api_base(provider):
    with patch.object(client, "post", new=MagicMock()) as mock_call:
        try:
            response = completion(
-                model="vertex_ai_beta/gemini-1.5-flash",
+                model="vertex_ai/{}".format(model),
                messages=messages,
                response_format={"type": "json_object"},
                client=client,
@ -1647,8 +1649,17 @@ async def test_gemini_pro_httpx_custom_api_base(provider):

        mock_call.assert_called_once()

-        assert "my-custom-api-base:generateContent" == mock_call.call_args.kwargs["url"]
-        assert "hello" in mock_call.call_args.kwargs["headers"]
+        print(f"mock_call.call_args: {mock_call.call_args}")
+        print(f"mock_call.call_args.kwargs: {mock_call.call_args.kwargs}")
+        if "url" in mock_call.call_args.kwargs:
+            assert (
+                "my-custom-api-base:generateContent"
+                == mock_call.call_args.kwargs["url"]
+            )
+        else:
+            assert "my-custom-api-base:rawPredict" == mock_call.call_args[0][0]
+        if "headers" in mock_call.call_args.kwargs:
+            assert "hello" in mock_call.call_args.kwargs["headers"]


 # @pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")
--- a/tests/local_testing/test_custom_llm.py
+++ b/tests/local_testing/test_custom_llm.py
@ -28,7 +28,6 @@ from typing import (
    Union,
 )
 from unittest.mock import AsyncMock, MagicMock, patch
-
 import httpx
 from dotenv import load_dotenv

@ -226,6 +225,8 @@ class MyCustomLLM(CustomLLM):
        self,
        model: str,
        prompt: str,
+        api_key: Optional[str],
+        api_base: Optional[str],
        model_response: ImageResponse,
        optional_params: dict,
        logging_obj: Any,
@ -242,6 +243,8 @@ class MyCustomLLM(CustomLLM):
        self,
        model: str,
        prompt: str,
+        api_key: Optional[str],
+        api_base: Optional[str],
        model_response: ImageResponse,
        optional_params: dict,
        logging_obj: Any,
@ -362,3 +365,31 @@ async def test_simple_image_generation_async():
    )

    print(resp)
+
+
+@pytest.mark.asyncio
+async def test_image_generation_async_with_api_key_and_api_base():
+    my_custom_llm = MyCustomLLM()
+    litellm.custom_provider_map = [
+        {"provider": "custom_llm", "custom_handler": my_custom_llm}
+    ]
+
+    with patch.object(
+        my_custom_llm, "aimage_generation", new=AsyncMock()
+    ) as mock_client:
+        try:
+            resp = await litellm.aimage_generation(
+                model="custom_llm/my-fake-model",
+                prompt="Hello world",
+                api_key="my-api-key",
+                api_base="my-api-base",
+            )
+
+            print(resp)
+        except Exception as e:
+            print(e)
+
+        mock_client.assert_awaited_once()
+
+        mock_client.call_args.kwargs["api_key"] == "my-api-key"
+        mock_client.call_args.kwargs["api_base"] == "my-api-base"