LiteLLM Minor Fixes & Improvements (10/10/2024) (#6158)

* refactor(vertex_ai_partner_models/anthropic): refactor anthropic to use partner model logic

* fix(vertex_ai/): support passing custom api base to partner models

Fixes https://github.com/BerriAI/litellm/issues/4317

* fix(proxy_server.py): Fix prometheus premium user check logic

* docs(prometheus.md): update quick start docs

* fix(custom_llm.py): support passing dynamic api key + api base

* fix(realtime_api/main.py): Add request/response logging for realtime api endpoints

Closes https://github.com/BerriAI/litellm/issues/6081

* feat(openai/realtime): add openai realtime api logging

Closes https://github.com/BerriAI/litellm/issues/6081

* fix(realtime_streaming.py): fix linting errors

* fix(realtime_streaming.py): fix linting errors

* fix: fix linting errors

* fix pattern match router

* Add literalai in the sidebar observability category (#6163)

* fix: add literalai in the sidebar

* fix: typo

* update (#6160)

* Feat: Add Langtrace integration (#5341)

* Feat: Add Langtrace integration

* add langtrace service name

* fix timestamps for traces

* add tests

* Discard Callback + use existing otel logger

* cleanup

* remove print statments

* remove callback

* add docs

* docs

* add logging docs

* format logging

* remove emoji and add litellm proxy example

* format logging

* format `logging.md`

* add langtrace docs to logging.md

* sync conflict

* docs fix

* (perf) move s3 logging to Batch logging + async [94% faster perf under 100 RPS on 1 litellm instance] (#6165)

* fix move s3 to use customLogger

* add basic s3 logging test

* add s3 to custom logger compatible

* use batch logger for s3

* s3 set flush interval and batch size

* fix s3 logging

* add notes on s3 logging

* fix s3 logging

* add basic s3 logging test

* fix s3 type errors

* add test for sync logging on s3

* fix: fix to debug log

---------

Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Co-authored-by: Willy Douhard <willy.douhard@gmail.com>
Co-authored-by: yujonglee <yujonglee.dev@gmail.com>
Co-authored-by: Ali Waleed <ali@scale3labs.com>
This commit is contained in:
Krish Dholakia 2024-10-11 23:04:36 -07:00 committed by GitHub
parent 9db4ccca9f
commit 11f9df923a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
28 changed files with 966 additions and 760 deletions

View file

@ -296,7 +296,7 @@ def test_all_model_configs():
optional_params={},
) == {"max_tokens": 10}
from litellm.llms.vertex_ai_and_google_ai_studio.vertex_ai_anthropic import (
from litellm.llms.vertex_ai_and_google_ai_studio.vertex_ai_partner_models.anthropic.transformation import (
VertexAIAnthropicConfig,
)

View file

@ -12,7 +12,70 @@ import litellm
litellm.num_retries = 3
import time, random
from litellm._logging import verbose_logger
import logging
import pytest
import boto3
@pytest.mark.asyncio
@pytest.mark.parametrize("sync_mode", [True, False])
async def test_basic_s3_logging(sync_mode):
verbose_logger.setLevel(level=logging.DEBUG)
litellm.success_callback = ["s3"]
litellm.s3_callback_params = {
"s3_bucket_name": "load-testing-oct",
"s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY",
"s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID",
"s3_region_name": "us-west-2",
}
litellm.set_verbose = True
if sync_mode is True:
response = litellm.completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "This is a test"}],
mock_response="It's simple to use and easy to get started",
)
else:
response = await litellm.acompletion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "This is a test"}],
mock_response="It's simple to use and easy to get started",
)
print(f"response: {response}")
await asyncio.sleep(12)
total_objects, all_s3_keys = list_all_s3_objects("load-testing-oct")
# assert that atlest one key has response.id in it
assert any(response.id in key for key in all_s3_keys)
s3 = boto3.client("s3")
# delete all objects
for key in all_s3_keys:
s3.delete_object(Bucket="load-testing-oct", Key=key)
def list_all_s3_objects(bucket_name):
s3 = boto3.client("s3")
all_s3_keys = []
paginator = s3.get_paginator("list_objects_v2")
total_objects = 0
for page in paginator.paginate(Bucket=bucket_name):
if "Contents" in page:
total_objects += len(page["Contents"])
all_s3_keys.extend([obj["Key"] for obj in page["Contents"]])
print(f"Total number of objects in {bucket_name}: {total_objects}")
print(all_s3_keys)
return total_objects, all_s3_keys
list_all_s3_objects("load-testing-oct")
@pytest.mark.skip(reason="AWS Suspended Account")

View file

@ -1616,9 +1616,11 @@ async def test_gemini_pro_json_schema_args_sent_httpx_openai_schema(
)
@pytest.mark.parametrize("provider", ["vertex_ai_beta"]) # "vertex_ai",
@pytest.mark.parametrize(
"model", ["gemini-1.5-flash", "claude-3-sonnet@20240229"]
) # "vertex_ai",
@pytest.mark.asyncio
async def test_gemini_pro_httpx_custom_api_base(provider):
async def test_gemini_pro_httpx_custom_api_base(model):
load_vertex_ai_credentials()
litellm.set_verbose = True
messages = [
@ -1634,7 +1636,7 @@ async def test_gemini_pro_httpx_custom_api_base(provider):
with patch.object(client, "post", new=MagicMock()) as mock_call:
try:
response = completion(
model="vertex_ai_beta/gemini-1.5-flash",
model="vertex_ai/{}".format(model),
messages=messages,
response_format={"type": "json_object"},
client=client,
@ -1647,8 +1649,17 @@ async def test_gemini_pro_httpx_custom_api_base(provider):
mock_call.assert_called_once()
assert "my-custom-api-base:generateContent" == mock_call.call_args.kwargs["url"]
assert "hello" in mock_call.call_args.kwargs["headers"]
print(f"mock_call.call_args: {mock_call.call_args}")
print(f"mock_call.call_args.kwargs: {mock_call.call_args.kwargs}")
if "url" in mock_call.call_args.kwargs:
assert (
"my-custom-api-base:generateContent"
== mock_call.call_args.kwargs["url"]
)
else:
assert "my-custom-api-base:rawPredict" == mock_call.call_args[0][0]
if "headers" in mock_call.call_args.kwargs:
assert "hello" in mock_call.call_args.kwargs["headers"]
# @pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")

View file

@ -28,7 +28,6 @@ from typing import (
Union,
)
from unittest.mock import AsyncMock, MagicMock, patch
import httpx
from dotenv import load_dotenv
@ -226,6 +225,8 @@ class MyCustomLLM(CustomLLM):
self,
model: str,
prompt: str,
api_key: Optional[str],
api_base: Optional[str],
model_response: ImageResponse,
optional_params: dict,
logging_obj: Any,
@ -242,6 +243,8 @@ class MyCustomLLM(CustomLLM):
self,
model: str,
prompt: str,
api_key: Optional[str],
api_base: Optional[str],
model_response: ImageResponse,
optional_params: dict,
logging_obj: Any,
@ -362,3 +365,31 @@ async def test_simple_image_generation_async():
)
print(resp)
@pytest.mark.asyncio
async def test_image_generation_async_with_api_key_and_api_base():
my_custom_llm = MyCustomLLM()
litellm.custom_provider_map = [
{"provider": "custom_llm", "custom_handler": my_custom_llm}
]
with patch.object(
my_custom_llm, "aimage_generation", new=AsyncMock()
) as mock_client:
try:
resp = await litellm.aimage_generation(
model="custom_llm/my-fake-model",
prompt="Hello world",
api_key="my-api-key",
api_base="my-api-base",
)
print(resp)
except Exception as e:
print(e)
mock_client.assert_awaited_once()
mock_client.call_args.kwargs["api_key"] == "my-api-key"
mock_client.call_args.kwargs["api_base"] == "my-api-base"