forked from phoenix/litellm-mirror
LiteLLM Minor Fixes & Improvements (10/10/2024) (#6158)
* refactor(vertex_ai_partner_models/anthropic): refactor anthropic to use partner model logic * fix(vertex_ai/): support passing custom api base to partner models Fixes https://github.com/BerriAI/litellm/issues/4317 * fix(proxy_server.py): Fix prometheus premium user check logic * docs(prometheus.md): update quick start docs * fix(custom_llm.py): support passing dynamic api key + api base * fix(realtime_api/main.py): Add request/response logging for realtime api endpoints Closes https://github.com/BerriAI/litellm/issues/6081 * feat(openai/realtime): add openai realtime api logging Closes https://github.com/BerriAI/litellm/issues/6081 * fix(realtime_streaming.py): fix linting errors * fix(realtime_streaming.py): fix linting errors * fix: fix linting errors * fix pattern match router * Add literalai in the sidebar observability category (#6163) * fix: add literalai in the sidebar * fix: typo * update (#6160) * Feat: Add Langtrace integration (#5341) * Feat: Add Langtrace integration * add langtrace service name * fix timestamps for traces * add tests * Discard Callback + use existing otel logger * cleanup * remove print statments * remove callback * add docs * docs * add logging docs * format logging * remove emoji and add litellm proxy example * format logging * format `logging.md` * add langtrace docs to logging.md * sync conflict * docs fix * (perf) move s3 logging to Batch logging + async [94% faster perf under 100 RPS on 1 litellm instance] (#6165) * fix move s3 to use customLogger * add basic s3 logging test * add s3 to custom logger compatible * use batch logger for s3 * s3 set flush interval and batch size * fix s3 logging * add notes on s3 logging * fix s3 logging * add basic s3 logging test * fix s3 type errors * add test for sync logging on s3 * fix: fix to debug log --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Willy Douhard <willy.douhard@gmail.com> Co-authored-by: yujonglee <yujonglee.dev@gmail.com> Co-authored-by: Ali Waleed <ali@scale3labs.com>
This commit is contained in:
parent
9db4ccca9f
commit
11f9df923a
28 changed files with 966 additions and 760 deletions
|
@ -296,7 +296,7 @@ def test_all_model_configs():
|
|||
optional_params={},
|
||||
) == {"max_tokens": 10}
|
||||
|
||||
from litellm.llms.vertex_ai_and_google_ai_studio.vertex_ai_anthropic import (
|
||||
from litellm.llms.vertex_ai_and_google_ai_studio.vertex_ai_partner_models.anthropic.transformation import (
|
||||
VertexAIAnthropicConfig,
|
||||
)
|
||||
|
||||
|
|
|
@ -12,7 +12,70 @@ import litellm
|
|||
litellm.num_retries = 3
|
||||
|
||||
import time, random
|
||||
from litellm._logging import verbose_logger
|
||||
import logging
|
||||
import pytest
|
||||
import boto3
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
async def test_basic_s3_logging(sync_mode):
|
||||
verbose_logger.setLevel(level=logging.DEBUG)
|
||||
litellm.success_callback = ["s3"]
|
||||
litellm.s3_callback_params = {
|
||||
"s3_bucket_name": "load-testing-oct",
|
||||
"s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY",
|
||||
"s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID",
|
||||
"s3_region_name": "us-west-2",
|
||||
}
|
||||
litellm.set_verbose = True
|
||||
|
||||
if sync_mode is True:
|
||||
response = litellm.completion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": "This is a test"}],
|
||||
mock_response="It's simple to use and easy to get started",
|
||||
)
|
||||
else:
|
||||
response = await litellm.acompletion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": "This is a test"}],
|
||||
mock_response="It's simple to use and easy to get started",
|
||||
)
|
||||
print(f"response: {response}")
|
||||
|
||||
await asyncio.sleep(12)
|
||||
|
||||
total_objects, all_s3_keys = list_all_s3_objects("load-testing-oct")
|
||||
|
||||
# assert that atlest one key has response.id in it
|
||||
assert any(response.id in key for key in all_s3_keys)
|
||||
s3 = boto3.client("s3")
|
||||
# delete all objects
|
||||
for key in all_s3_keys:
|
||||
s3.delete_object(Bucket="load-testing-oct", Key=key)
|
||||
|
||||
|
||||
def list_all_s3_objects(bucket_name):
|
||||
s3 = boto3.client("s3")
|
||||
|
||||
all_s3_keys = []
|
||||
|
||||
paginator = s3.get_paginator("list_objects_v2")
|
||||
total_objects = 0
|
||||
|
||||
for page in paginator.paginate(Bucket=bucket_name):
|
||||
if "Contents" in page:
|
||||
total_objects += len(page["Contents"])
|
||||
all_s3_keys.extend([obj["Key"] for obj in page["Contents"]])
|
||||
|
||||
print(f"Total number of objects in {bucket_name}: {total_objects}")
|
||||
print(all_s3_keys)
|
||||
return total_objects, all_s3_keys
|
||||
|
||||
|
||||
list_all_s3_objects("load-testing-oct")
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
|
|
|
@ -1616,9 +1616,11 @@ async def test_gemini_pro_json_schema_args_sent_httpx_openai_schema(
|
|||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("provider", ["vertex_ai_beta"]) # "vertex_ai",
|
||||
@pytest.mark.parametrize(
|
||||
"model", ["gemini-1.5-flash", "claude-3-sonnet@20240229"]
|
||||
) # "vertex_ai",
|
||||
@pytest.mark.asyncio
|
||||
async def test_gemini_pro_httpx_custom_api_base(provider):
|
||||
async def test_gemini_pro_httpx_custom_api_base(model):
|
||||
load_vertex_ai_credentials()
|
||||
litellm.set_verbose = True
|
||||
messages = [
|
||||
|
@ -1634,7 +1636,7 @@ async def test_gemini_pro_httpx_custom_api_base(provider):
|
|||
with patch.object(client, "post", new=MagicMock()) as mock_call:
|
||||
try:
|
||||
response = completion(
|
||||
model="vertex_ai_beta/gemini-1.5-flash",
|
||||
model="vertex_ai/{}".format(model),
|
||||
messages=messages,
|
||||
response_format={"type": "json_object"},
|
||||
client=client,
|
||||
|
@ -1647,8 +1649,17 @@ async def test_gemini_pro_httpx_custom_api_base(provider):
|
|||
|
||||
mock_call.assert_called_once()
|
||||
|
||||
assert "my-custom-api-base:generateContent" == mock_call.call_args.kwargs["url"]
|
||||
assert "hello" in mock_call.call_args.kwargs["headers"]
|
||||
print(f"mock_call.call_args: {mock_call.call_args}")
|
||||
print(f"mock_call.call_args.kwargs: {mock_call.call_args.kwargs}")
|
||||
if "url" in mock_call.call_args.kwargs:
|
||||
assert (
|
||||
"my-custom-api-base:generateContent"
|
||||
== mock_call.call_args.kwargs["url"]
|
||||
)
|
||||
else:
|
||||
assert "my-custom-api-base:rawPredict" == mock_call.call_args[0][0]
|
||||
if "headers" in mock_call.call_args.kwargs:
|
||||
assert "hello" in mock_call.call_args.kwargs["headers"]
|
||||
|
||||
|
||||
# @pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")
|
||||
|
|
|
@ -28,7 +28,6 @@ from typing import (
|
|||
Union,
|
||||
)
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import httpx
|
||||
from dotenv import load_dotenv
|
||||
|
||||
|
@ -226,6 +225,8 @@ class MyCustomLLM(CustomLLM):
|
|||
self,
|
||||
model: str,
|
||||
prompt: str,
|
||||
api_key: Optional[str],
|
||||
api_base: Optional[str],
|
||||
model_response: ImageResponse,
|
||||
optional_params: dict,
|
||||
logging_obj: Any,
|
||||
|
@ -242,6 +243,8 @@ class MyCustomLLM(CustomLLM):
|
|||
self,
|
||||
model: str,
|
||||
prompt: str,
|
||||
api_key: Optional[str],
|
||||
api_base: Optional[str],
|
||||
model_response: ImageResponse,
|
||||
optional_params: dict,
|
||||
logging_obj: Any,
|
||||
|
@ -362,3 +365,31 @@ async def test_simple_image_generation_async():
|
|||
)
|
||||
|
||||
print(resp)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_image_generation_async_with_api_key_and_api_base():
|
||||
my_custom_llm = MyCustomLLM()
|
||||
litellm.custom_provider_map = [
|
||||
{"provider": "custom_llm", "custom_handler": my_custom_llm}
|
||||
]
|
||||
|
||||
with patch.object(
|
||||
my_custom_llm, "aimage_generation", new=AsyncMock()
|
||||
) as mock_client:
|
||||
try:
|
||||
resp = await litellm.aimage_generation(
|
||||
model="custom_llm/my-fake-model",
|
||||
prompt="Hello world",
|
||||
api_key="my-api-key",
|
||||
api_base="my-api-base",
|
||||
)
|
||||
|
||||
print(resp)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
mock_client.assert_awaited_once()
|
||||
|
||||
mock_client.call_args.kwargs["api_key"] == "my-api-key"
|
||||
mock_client.call_args.kwargs["api_base"] == "my-api-base"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue