fix(pattern_match_deployments.py): default to user input if unable to… (#6632)

* fix(pattern_match_deployments.py): default to user input if unable to map based on wildcards

* test: fix test

* test: reset test name

* test: update conftest to reload proxy server module between tests

* ci(config.yml): move langfuse out of local_testing

reduce ci/cd time

* ci(config.yml): cleanup langfuse ci/cd tests

* fix: update test to not use global proxy_server app module

* ci: move caching to a separate test pipeline

speed up ci pipeline

* test: update conftest to check if proxy_server attr exists before reloading

* build(conftest.py): don't block on inability to reload proxy_server

* ci(config.yml): update caching unit test filter to work on 'cache' keyword as well

* fix(encrypt_decrypt_utils.py): use function to get salt key

* test: mark flaky test

* test: handle anthropic overloaded errors

* refactor: create separate ci/cd pipeline for proxy unit tests

make ci/cd faster

* ci(config.yml): add litellm_proxy_unit_testing to build_and_test jobs

* ci(config.yml): generate prisma binaries for proxy unit tests

* test: readd vertex_key.json

* ci(config.yml): remove `-s` from proxy_unit_test cmd

speed up test

* ci: remove any 'debug' logging flag

speed up ci pipeline

* test: fix test

* test(test_braintrust.py): rerun

* test: add delay for braintrust test
This commit is contained in:
Krish Dholakia 2024-11-08 00:55:57 +05:30 committed by GitHub
parent 44840d615d
commit 27e18358ab
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
77 changed files with 2861 additions and 76 deletions

View file

@ -103,7 +103,7 @@ jobs:
command: |
pwd
ls
python -m pytest -vv tests/local_testing --cov=litellm --cov-report=xml -x --junitxml=test-results/junit.xml --durations=5 -k "not test_python_38.py and not router and not assistants"
python -m pytest -vv tests/local_testing --cov=litellm --cov-report=xml -x --junitxml=test-results/junit.xml --durations=5 -k "not test_python_38.py and not router and not assistants and not langfuse and not caching and not cache"
no_output_timeout: 120m
- run:
name: Rename the coverage files
@ -119,6 +119,204 @@ jobs:
paths:
- local_testing_coverage.xml
- local_testing_coverage
langfuse_logging_unit_tests:
docker:
- image: cimg/python:3.11
auth:
username: ${DOCKERHUB_USERNAME}
password: ${DOCKERHUB_PASSWORD}
working_directory: ~/project
steps:
- checkout
- run:
name: Show git commit hash
command: |
echo "Git commit hash: $CIRCLE_SHA1"
- restore_cache:
keys:
- v1-dependencies-{{ checksum ".circleci/requirements.txt" }}
- run:
name: Install Dependencies
command: |
python -m pip install --upgrade pip
python -m pip install -r .circleci/requirements.txt
pip install "pytest==7.3.1"
pip install "pytest-retry==1.6.3"
pip install "pytest-asyncio==0.21.1"
pip install "pytest-cov==5.0.0"
pip install mypy
pip install "google-generativeai==0.3.2"
pip install "google-cloud-aiplatform==1.43.0"
pip install pyarrow
pip install "boto3==1.34.34"
pip install "aioboto3==12.3.0"
pip install langchain
pip install lunary==0.2.5
pip install "azure-identity==1.16.1"
pip install "langfuse==2.45.0"
pip install "logfire==0.29.0"
pip install numpydoc
pip install traceloop-sdk==0.21.1
pip install opentelemetry-api==1.25.0
pip install opentelemetry-sdk==1.25.0
pip install opentelemetry-exporter-otlp==1.25.0
pip install openai==1.54.0
pip install prisma==0.11.0
pip install "detect_secrets==1.5.0"
pip install "httpx==0.24.1"
pip install "respx==0.21.1"
pip install fastapi
pip install "gunicorn==21.2.0"
pip install "anyio==4.2.0"
pip install "aiodynamo==23.10.1"
pip install "asyncio==3.4.3"
pip install "apscheduler==3.10.4"
pip install "PyGithub==1.59.1"
pip install argon2-cffi
pip install "pytest-mock==3.12.0"
pip install python-multipart
pip install google-cloud-aiplatform
pip install prometheus-client==0.20.0
pip install "pydantic==2.7.1"
pip install "diskcache==5.6.1"
pip install "Pillow==10.3.0"
pip install "jsonschema==4.22.0"
- save_cache:
paths:
- ./venv
key: v1-dependencies-{{ checksum ".circleci/requirements.txt" }}
- run:
name: Run prisma ./docker/entrypoint.sh
command: |
set +e
chmod +x docker/entrypoint.sh
./docker/entrypoint.sh
set -e
# Run pytest and generate JUnit XML report
- run:
name: Run tests
command: |
pwd
ls
python -m pytest -vv tests/local_testing --cov=litellm --cov-report=xml -x --junitxml=test-results/junit.xml --durations=5 -k "langfuse"
no_output_timeout: 120m
- run:
name: Rename the coverage files
command: |
mv coverage.xml langfuse_coverage.xml
mv .coverage langfuse_coverage
# Store test results
- store_test_results:
path: test-results
- persist_to_workspace:
root: .
paths:
- langfuse_coverage.xml
- langfuse_coverage
caching_unit_tests:
docker:
- image: cimg/python:3.11
auth:
username: ${DOCKERHUB_USERNAME}
password: ${DOCKERHUB_PASSWORD}
working_directory: ~/project
steps:
- checkout
- run:
name: Show git commit hash
command: |
echo "Git commit hash: $CIRCLE_SHA1"
- restore_cache:
keys:
- v1-dependencies-{{ checksum ".circleci/requirements.txt" }}
- run:
name: Install Dependencies
command: |
python -m pip install --upgrade pip
python -m pip install -r .circleci/requirements.txt
pip install "pytest==7.3.1"
pip install "pytest-retry==1.6.3"
pip install "pytest-asyncio==0.21.1"
pip install "pytest-cov==5.0.0"
pip install mypy
pip install "google-generativeai==0.3.2"
pip install "google-cloud-aiplatform==1.43.0"
pip install pyarrow
pip install "boto3==1.34.34"
pip install "aioboto3==12.3.0"
pip install langchain
pip install lunary==0.2.5
pip install "azure-identity==1.16.1"
pip install "langfuse==2.45.0"
pip install "logfire==0.29.0"
pip install numpydoc
pip install traceloop-sdk==0.21.1
pip install opentelemetry-api==1.25.0
pip install opentelemetry-sdk==1.25.0
pip install opentelemetry-exporter-otlp==1.25.0
pip install openai==1.54.0
pip install prisma==0.11.0
pip install "detect_secrets==1.5.0"
pip install "httpx==0.24.1"
pip install "respx==0.21.1"
pip install fastapi
pip install "gunicorn==21.2.0"
pip install "anyio==4.2.0"
pip install "aiodynamo==23.10.1"
pip install "asyncio==3.4.3"
pip install "apscheduler==3.10.4"
pip install "PyGithub==1.59.1"
pip install argon2-cffi
pip install "pytest-mock==3.12.0"
pip install python-multipart
pip install google-cloud-aiplatform
pip install prometheus-client==0.20.0
pip install "pydantic==2.7.1"
pip install "diskcache==5.6.1"
pip install "Pillow==10.3.0"
pip install "jsonschema==4.22.0"
- save_cache:
paths:
- ./venv
key: v1-dependencies-{{ checksum ".circleci/requirements.txt" }}
- run:
name: Run prisma ./docker/entrypoint.sh
command: |
set +e
chmod +x docker/entrypoint.sh
./docker/entrypoint.sh
set -e
# Run pytest and generate JUnit XML report
- run:
name: Run tests
command: |
pwd
ls
python -m pytest -vv tests/local_testing --cov=litellm --cov-report=xml -x --junitxml=test-results/junit.xml --durations=5 -k "caching or cache"
no_output_timeout: 120m
- run:
name: Rename the coverage files
command: |
mv coverage.xml caching_coverage.xml
mv .coverage caching_coverage
# Store test results
- store_test_results:
path: test-results
- persist_to_workspace:
root: .
paths:
- caching_coverage.xml
- caching_coverage
auth_ui_unit_tests:
docker:
- image: cimg/python:3.11
@ -215,6 +413,105 @@ jobs:
paths:
- litellm_router_coverage.xml
- litellm_router_coverage
litellm_proxy_unit_testing: # Runs all tests with the "proxy", "key", "jwt" filenames
docker:
- image: cimg/python:3.11
auth:
username: ${DOCKERHUB_USERNAME}
password: ${DOCKERHUB_PASSWORD}
working_directory: ~/project
steps:
- checkout
- run:
name: Show git commit hash
command: |
echo "Git commit hash: $CIRCLE_SHA1"
- restore_cache:
keys:
- v1-dependencies-{{ checksum ".circleci/requirements.txt" }}
- run:
name: Install Dependencies
command: |
python -m pip install --upgrade pip
python -m pip install -r .circleci/requirements.txt
pip install "pytest==7.3.1"
pip install "pytest-retry==1.6.3"
pip install "pytest-asyncio==0.21.1"
pip install "pytest-cov==5.0.0"
pip install mypy
pip install "google-generativeai==0.3.2"
pip install "google-cloud-aiplatform==1.43.0"
pip install pyarrow
pip install "boto3==1.34.34"
pip install "aioboto3==12.3.0"
pip install langchain
pip install lunary==0.2.5
pip install "azure-identity==1.16.1"
pip install "langfuse==2.45.0"
pip install "logfire==0.29.0"
pip install numpydoc
pip install traceloop-sdk==0.21.1
pip install opentelemetry-api==1.25.0
pip install opentelemetry-sdk==1.25.0
pip install opentelemetry-exporter-otlp==1.25.0
pip install openai==1.54.0
pip install prisma==0.11.0
pip install "detect_secrets==1.5.0"
pip install "httpx==0.24.1"
pip install "respx==0.21.1"
pip install fastapi
pip install "gunicorn==21.2.0"
pip install "anyio==4.2.0"
pip install "aiodynamo==23.10.1"
pip install "asyncio==3.4.3"
pip install "apscheduler==3.10.4"
pip install "PyGithub==1.59.1"
pip install argon2-cffi
pip install "pytest-mock==3.12.0"
pip install python-multipart
pip install google-cloud-aiplatform
pip install prometheus-client==0.20.0
pip install "pydantic==2.7.1"
pip install "diskcache==5.6.1"
pip install "Pillow==10.3.0"
pip install "jsonschema==4.22.0"
- save_cache:
paths:
- ./venv
key: v1-dependencies-{{ checksum ".circleci/requirements.txt" }}
- run:
name: Run prisma ./docker/entrypoint.sh
command: |
set +e
chmod +x docker/entrypoint.sh
./docker/entrypoint.sh
set -e
# Run pytest and generate JUnit XML report
- run:
name: Run tests
command: |
pwd
ls
python -m pytest tests/proxy_unit_tests --cov=litellm --cov-report=xml -vv -x -v --junitxml=test-results/junit.xml --durations=5
no_output_timeout: 120m
- run:
name: Rename the coverage files
command: |
mv coverage.xml litellm_proxy_unit_tests_coverage.xml
mv .coverage litellm_proxy_unit_tests_coverage
# Store test results
- store_test_results:
path: test-results
- persist_to_workspace:
root: .
paths:
- litellm_proxy_unit_tests_coverage.xml
- litellm_proxy_unit_tests_coverage
litellm_assistants_api_testing: # Runs all tests with the "assistants" keyword
docker:
- image: cimg/python:3.11
@ -814,7 +1111,7 @@ jobs:
python -m venv venv
. venv/bin/activate
pip install coverage
coverage combine llm_translation_coverage logging_coverage litellm_router_coverage local_testing_coverage litellm_assistants_api_coverage auth_ui_unit_tests_coverage
coverage combine llm_translation_coverage logging_coverage litellm_router_coverage local_testing_coverage litellm_assistants_api_coverage auth_ui_unit_tests_coverage langfuse_coverage caching_coverage litellm_proxy_unit_tests_coverage
coverage xml
- codecov/upload:
file: ./coverage.xml
@ -1031,6 +1328,24 @@ workflows:
only:
- main
- /litellm_.*/
- langfuse_logging_unit_tests:
filters:
branches:
only:
- main
- /litellm_.*/
- caching_unit_tests:
filters:
branches:
only:
- main
- /litellm_.*/
- litellm_proxy_unit_testing:
filters:
branches:
only:
- main
- /litellm_.*/
- litellm_assistants_api_testing:
filters:
branches:
@ -1096,6 +1411,9 @@ workflows:
- llm_translation_testing
- logging_testing
- litellm_router_testing
- caching_unit_tests
- litellm_proxy_unit_testing
- langfuse_logging_unit_tests
- local_testing
- litellm_assistants_api_testing
- auth_ui_unit_tests
@ -1132,10 +1450,13 @@ workflows:
- llm_translation_testing
- logging_testing
- litellm_router_testing
- caching_unit_tests
- langfuse_logging_unit_tests
- litellm_assistants_api_testing
- auth_ui_unit_tests
- db_migration_disable_update_check
- e2e_ui_testing
- litellm_proxy_unit_testing
- installing_litellm_on_python
- proxy_logging_guardrails_model_info_tests
- proxy_pass_through_endpoint_tests

View file

@ -23,7 +23,7 @@ from litellm.llms.custom_httpx.http_handler import (
get_async_httpx_client,
httpxSpecialProvider,
)
from litellm.utils import get_formatted_prompt
from litellm.utils import get_formatted_prompt, print_verbose
global_braintrust_http_handler = get_async_httpx_client(
llm_provider=httpxSpecialProvider.LoggingCallback
@ -229,6 +229,9 @@ class BraintrustLogger(CustomLogger):
request_data["metrics"] = metrics
try:
print_verbose(
f"global_braintrust_sync_http_handler.post: {global_braintrust_sync_http_handler.post}"
)
global_braintrust_sync_http_handler.post(
url=f"{self.api_base}/project_logs/{project_id}/insert",
json={"events": [request_data]},

View file

@ -3,18 +3,25 @@ import os
from litellm._logging import verbose_proxy_logger
LITELLM_SALT_KEY = os.getenv("LITELLM_SALT_KEY", None)
verbose_proxy_logger.debug(
"LITELLM_SALT_KEY is None using master_key to encrypt/decrypt secrets stored in DB"
)
def _get_salt_key():
from litellm.proxy.proxy_server import master_key
salt_key = os.getenv("LITELLM_SALT_KEY", None)
if salt_key is None:
verbose_proxy_logger.debug(
"LITELLM_SALT_KEY is None using master_key to encrypt/decrypt secrets stored in DB"
)
salt_key = master_key
return salt_key
def encrypt_value_helper(value: str):
from litellm.proxy.proxy_server import master_key
signing_key = LITELLM_SALT_KEY
if LITELLM_SALT_KEY is None:
signing_key = master_key
signing_key = _get_salt_key()
try:
if isinstance(value, str):
@ -35,9 +42,7 @@ def encrypt_value_helper(value: str):
def decrypt_value_helper(value: str):
from litellm.proxy.proxy_server import master_key
signing_key = LITELLM_SALT_KEY
if LITELLM_SALT_KEY is None:
signing_key = master_key
signing_key = _get_salt_key()
try:
if isinstance(value, str):

View file

@ -548,14 +548,16 @@ def test_anthropic_computer_tool_use():
model = "claude-3-5-sonnet-20241022"
messages = [{"role": "user", "content": "Save a picture of a cat to my desktop."}]
resp = completion(
model=model,
messages=messages,
tools=tools,
# headers={"anthropic-beta": "computer-use-2024-10-22"},
)
print(resp)
try:
resp = completion(
model=model,
messages=messages,
tools=tools,
# headers={"anthropic-beta": "computer-use-2024-10-22"},
)
print(resp)
except litellm.InternalServerError:
pass
@pytest.mark.parametrize(

View file

@ -26,6 +26,12 @@ def setup_and_teardown():
from litellm import Router
importlib.reload(litellm)
try:
if hasattr(litellm, "proxy") and hasattr(litellm.proxy, "proxy_server"):
importlib.reload(litellm.proxy.proxy_server)
except Exception as e:
print(f"Error reloading litellm.proxy.proxy_server: {e}")
import asyncio
loop = asyncio.get_event_loop_policy().new_event_loop()

View file

@ -131,7 +131,6 @@ def test_multiple_deployments_parallel():
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_cooldown_same_model_name(sync_mode):
litellm._turn_on_debug()
# users could have the same model with different api_base
# example
# azure/chatgpt, api_base: 1234

View file

@ -31,16 +31,15 @@ from litellm.llms.custom_httpx.http_handler import HTTPHandler
def test_braintrust_logging():
import litellm
litellm.set_verbose = True
http_client = HTTPHandler()
setattr(
litellm.integrations.braintrust_logging,
"global_braintrust_sync_http_handler",
http_client,
)
with patch.object(http_client, "post", new=MagicMock()) as mock_client:
with patch.object(
litellm.integrations.braintrust_logging.global_braintrust_sync_http_handler,
"post",
new=MagicMock(),
) as mock_client:
# set braintrust as a callback, litellm will send the data to braintrust
litellm.callbacks = ["braintrust"]
@ -50,4 +49,5 @@ def test_braintrust_logging():
messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}],
)
time.sleep(2)
mock_client.assert_called()

View file

@ -329,36 +329,6 @@ async def test_completion_predibase():
# test_completion_predibase()
def test_completion_claude():
litellm.set_verbose = True
litellm.cache = None
litellm.AnthropicTextConfig(max_tokens_to_sample=200, metadata={"user_id": "1224"})
messages = [
{
"role": "system",
"content": """You are an upbeat, enthusiastic personal fitness coach named Sam. Sam is passionate about helping clients get fit and lead healthier lifestyles. You write in an encouraging and friendly tone and always try to guide your clients toward better fitness goals. If the user asks you something unrelated to fitness, either bring the topic back to fitness, or say that you cannot answer.""",
},
{"content": user_message, "role": "user"},
]
try:
# test without max tokens
response = completion(
model="claude-3-5-haiku-20241022", messages=messages, request_timeout=10
)
# Add any assertions here to check response args
print(response)
print(response.usage)
print(response.usage.completion_tokens)
print(response["usage"]["completion_tokens"])
# print("new cost tracking")
except litellm.RateLimitError as e:
pass
except Exception as e:
if "overloaded_error" in str(e):
pass
pytest.fail(f"Error occurred: {e}")
# test_completion_claude()

View file

@ -15,7 +15,7 @@ from unittest.mock import Mock
import httpx
from litellm.proxy.proxy_server import app, initialize_pass_through_endpoints
from litellm.proxy.proxy_server import initialize_pass_through_endpoints
# Mock the async_client used in the pass_through_request function
@ -25,7 +25,8 @@ async def mock_request(*args, **kwargs):
return mock_response
def remove_rerank_route():
def remove_rerank_route(app):
for route in app.routes:
if route.path == "/v1/rerank" and "POST" in route.methods:
app.routes.remove(route)
@ -35,7 +36,11 @@ def remove_rerank_route():
@pytest.fixture
def client():
remove_rerank_route() # remove the native rerank route on the litellm proxy - since we're testing the pass through endpoints
from litellm.proxy.proxy_server import app
remove_rerank_route(
app=app
) # remove the native rerank route on the litellm proxy - since we're testing the pass through endpoints
return TestClient(app)
@ -145,8 +150,9 @@ async def test_pass_through_endpoint_rerank(client):
[(True, 0, 429), (True, 1, 200), (False, 0, 200)],
)
@pytest.mark.asyncio
async def test_pass_through_endpoint_rpm_limit(auth, expected_error_code, rpm_limit):
client = TestClient(app)
async def test_pass_through_endpoint_rpm_limit(
client, auth, expected_error_code, rpm_limit
):
import litellm
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.proxy_server import ProxyLogging, hash_token, user_api_key_cache
@ -214,9 +220,11 @@ async def test_pass_through_endpoint_rpm_limit(auth, expected_error_code, rpm_li
async def test_aaapass_through_endpoint_pass_through_keys_langfuse(
auth, expected_error_code, rpm_limit
):
from litellm.proxy.proxy_server import app
client = TestClient(app)
import litellm
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.proxy_server import ProxyLogging, hash_token, user_api_key_cache

View file

@ -149,7 +149,6 @@ def test_router_timeout_with_retries_anthropic_model(num_retries, expected_call_
"""
If request hits custom timeout, ensure it's retried.
"""
litellm._turn_on_debug()
from litellm.llms.custom_httpx.http_handler import HTTPHandler
import time

View file

@ -8,7 +8,7 @@ sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
from typing import Dict, List, Optional
from unittest.mock import MagicMock
from unittest.mock import MagicMock, patch, AsyncMock
import pytest
from starlette.datastructures import URL
@ -157,7 +157,7 @@ def test_returned_user_api_key_auth(user_role, expected_role):
@pytest.mark.parametrize("key_ownership", ["user_key", "team_key"])
@pytest.mark.asyncio
async def test_user_personal_budgets(key_ownership):
async def test_aaauser_personal_budgets(key_ownership):
"""
Set a personal budget on a user
@ -169,6 +169,7 @@ async def test_user_personal_budgets(key_ownership):
from fastapi import Request
from starlette.datastructures import URL
import litellm
from litellm.proxy._types import LiteLLM_UserTable, UserAPIKeyAuth
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
@ -193,7 +194,7 @@ async def test_user_personal_budgets(key_ownership):
team_max_budget=100,
spend=20,
)
await asyncio.sleep(1)
user_obj = LiteLLM_UserTable(
user_id=_user_id, spend=11, max_budget=10, user_email=""
)
@ -207,6 +208,10 @@ async def test_user_personal_budgets(key_ownership):
request = Request(scope={"type": "http"})
request._url = URL(url="/chat/completions")
test_user_cache = getattr(litellm.proxy.proxy_server, "user_api_key_cache")
assert test_user_cache.get_cache(key=hash_token(user_key)) == valid_token
try:
await user_api_key_auth(request=request, api_key="Bearer " + user_key)

View file

@ -0,0 +1,13 @@
{
"type": "service_account",
"project_id": "adroit-crow-413218",
"private_key_id": "",
"private_key": "",
"client_email": "test-adroit-crow@adroit-crow-413218.iam.gserviceaccount.com",
"client_id": "104886546564708740969",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/test-adroit-crow%40adroit-crow-413218.iam.gserviceaccount.com",
"universe_domain": "googleapis.com"
}

View file

@ -0,0 +1,12 @@
{"messages": [{"role": "system", "content": "Clippy is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "What's the capital of France?"}, {"role": "assistant", "content": "Paris, as if everyone doesn't know that already."}]}
{"messages": [{"role": "system", "content": "Clippy is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "Who wrote 'Romeo and Juliet'?"}, {"role": "assistant", "content": "Oh, just some guy named William Shakespeare. Ever heard of him?"}]}
{"messages": [{"role": "system", "content": "Clippy is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "How far is the Moon from Earth?"}, {"role": "assistant", "content": "Around 384,400 kilometers. Give or take a few, like that really matters."}]}
{"messages": [{"role": "system", "content": "Clippy is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "What's the capital of France?"}, {"role": "assistant", "content": "Paris, as if everyone doesn't know that already."}]}
{"messages": [{"role": "system", "content": "Clippy is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "Who wrote 'Romeo and Juliet'?"}, {"role": "assistant", "content": "Oh, just some guy named William Shakespeare. Ever heard of him?"}]}
{"messages": [{"role": "system", "content": "Clippy is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "How far is the Moon from Earth?"}, {"role": "assistant", "content": "Around 384,400 kilometers. Give or take a few, like that really matters."}]}
{"messages": [{"role": "system", "content": "Clippy is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "What's the capital of France?"}, {"role": "assistant", "content": "Paris, as if everyone doesn't know that already."}]}
{"messages": [{"role": "system", "content": "Clippy is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "Who wrote 'Romeo and Juliet'?"}, {"role": "assistant", "content": "Oh, just some guy named William Shakespeare. Ever heard of him?"}]}
{"messages": [{"role": "system", "content": "Clippy is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "How far is the Moon from Earth?"}, {"role": "assistant", "content": "Around 384,400 kilometers. Give or take a few, like that really matters."}]}
{"messages": [{"role": "system", "content": "Clippy is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "What's the capital of France?"}, {"role": "assistant", "content": "Paris, as if everyone doesn't know that already."}]}
{"messages": [{"role": "system", "content": "Clippy is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "Who wrote 'Romeo and Juliet'?"}, {"role": "assistant", "content": "Oh, just some guy named William Shakespeare. Ever heard of him?"}]}
{"messages": [{"role": "system", "content": "Clippy is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "How far is the Moon from Earth?"}, {"role": "assistant", "content": "Around 384,400 kilometers. Give or take a few, like that really matters."}]}

View file

@ -0,0 +1,2 @@
{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "gpt-3.5-turbo-0125", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 10}}
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "gpt-3.5-turbo-0125", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 10}}

View file

@ -0,0 +1,60 @@
# conftest.py
import importlib
import os
import sys
import pytest
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
@pytest.fixture(scope="function", autouse=True)
def setup_and_teardown():
"""
This fixture reloads litellm before every function. To speed up testing by removing callbacks being chained.
"""
curr_dir = os.getcwd() # Get the current working directory
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the project directory to the system path
import litellm
from litellm import Router
importlib.reload(litellm)
try:
if hasattr(litellm, "proxy") and hasattr(litellm.proxy, "proxy_server"):
importlib.reload(litellm.proxy.proxy_server)
except Exception as e:
print(f"Error reloading litellm.proxy.proxy_server: {e}")
import asyncio
loop = asyncio.get_event_loop_policy().new_event_loop()
asyncio.set_event_loop(loop)
print(litellm)
# from litellm import Router, completion, aembedding, acompletion, embedding
yield
# Teardown code (executes after the yield point)
loop.close() # Close the loop created earlier
asyncio.set_event_loop(None) # Remove the reference to the loop
def pytest_collection_modifyitems(config, items):
# Separate tests in 'test_amazing_proxy_custom_logger.py' and other tests
custom_logger_tests = [
item for item in items if "custom_logger" in item.parent.name
]
other_tests = [item for item in items if "custom_logger" not in item.parent.name]
# Sort tests based on their names
custom_logger_tests.sort(key=lambda x: x.name)
other_tests.sort(key=lambda x: x.name)
# Reorder the items list
items[:] = custom_logger_tests + other_tests

View file

@ -0,0 +1,60 @@
# conftest.py
import importlib
import os
import sys
import pytest
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
@pytest.fixture(scope="function", autouse=True)
def setup_and_teardown():
"""
This fixture reloads litellm before every function. To speed up testing by removing callbacks being chained.
"""
curr_dir = os.getcwd() # Get the current working directory
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the project directory to the system path
import litellm
from litellm import Router
importlib.reload(litellm)
try:
if hasattr(litellm, "proxy") and hasattr(litellm.proxy, "proxy_server"):
importlib.reload(litellm.proxy.proxy_server)
except Exception as e:
print(f"Error reloading litellm.proxy.proxy_server: {e}")
import asyncio
loop = asyncio.get_event_loop_policy().new_event_loop()
asyncio.set_event_loop(loop)
print(litellm)
# from litellm import Router, completion, aembedding, acompletion, embedding
yield
# Teardown code (executes after the yield point)
loop.close() # Close the loop created earlier
asyncio.set_event_loop(None) # Remove the reference to the loop
def pytest_collection_modifyitems(config, items):
# Separate tests in 'test_amazing_proxy_custom_logger.py' and other tests
custom_logger_tests = [
item for item in items if "custom_logger" in item.parent.name
]
other_tests = [item for item in items if "custom_logger" not in item.parent.name]
# Sort tests based on their names
custom_logger_tests.sort(key=lambda x: x.name)
other_tests.sort(key=lambda x: x.name)
# Reorder the items list
items[:] = custom_logger_tests + other_tests

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,30 @@
model_list:
- model_name: gpt-3.5-turbo-instruct
litellm_params:
model: ollama/zephyr
- model_name: gpt-4
litellm_params:
model: ollama/llama2
- model_name: gpt-3.5-turbo
litellm_params:
model: ollama/llama2
temperature: 0.1
max_tokens: 20
# request to gpt-4, response from ollama/llama2
# curl --location 'http://0.0.0.0:8000/chat/completions' \
# --header 'Content-Type: application/json' \
# --data ' {
# "model": "gpt-4",
# "messages": [
# {
# "role": "user",
# "content": "what llm are you"
# }
# ],
# }
# '
#
# {"id":"chatcmpl-27c85cf0-ab09-4bcf-8cb1-0ee950520743","choices":[{"finish_reason":"stop","index":0,"message":{"content":" Hello! I'm just an AI, I don't have personal experiences or emotions like humans do. However, I can help you with any questions or tasks you may have! Is there something specific you'd like to know or discuss?","role":"assistant","_logprobs":null}}],"created":1700094955.373751,"model":"ollama/llama2","object":"chat.completion","system_fingerprint":null,"usage":{"prompt_tokens":12,"completion_tokens":47,"total_tokens":59},"_response_ms":8028.017999999999}%

View file

@ -0,0 +1,15 @@
model_list:
- model_name: gpt-4-team1
litellm_params:
model: azure/chatgpt-v-2
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
api_version: "2023-05-15"
api_key: os.environ/AZURE_API_KEY
tpm: 20_000
- model_name: gpt-4-team2
litellm_params:
model: azure/gpt-4
api_key: os.environ/AZURE_API_KEY
api_base: https://openai-gpt-4-test-v-2.openai.azure.com/
tpm: 100_000

View file

@ -0,0 +1,7 @@
model_list:
- model_name: "openai-model"
litellm_params:
model: "gpt-3.5-turbo"
litellm_settings:
cache: True

View file

@ -0,0 +1,11 @@
model_list:
- model_name: "openai-model"
litellm_params:
model: "gpt-3.5-turbo"
litellm_settings:
cache: True
cache_params:
type: "redis"
supported_call_types: ["embedding", "aembedding"]
host: "os.environ/REDIS_HOST"

View file

@ -0,0 +1,48 @@
model_list:
################################################################################
# Azure
- model_name: gpt-4o-mini
litellm_params:
model: azure/gpt-4o-mini
api_base: https://amazin-prod.openai.azure.com
api_key: "os.environ/AZURE_GPT_4O"
deployment_id: gpt-4o-mini
- model_name: gpt-4o
litellm_params:
model: azure/gpt-4o
api_base: https://very-cool-prod.openai.azure.com
api_key: "os.environ/AZURE_GPT_4O"
deployment_id: gpt-4o
################################################################################
# Fireworks
- model_name: fireworks-llama-v3p1-405b-instruct
litellm_params:
model: fireworks_ai/accounts/fireworks/models/llama-v3p1-405b-instruct
api_key: "os.environ/FIREWORKS"
- model_name: fireworks-llama-v3p1-70b-instruct
litellm_params:
model: fireworks_ai/accounts/fireworks/models/llama-v3p1-70b-instruct
api_key: "os.environ/FIREWORKS"
general_settings:
alerting_threshold: 300 # sends alerts if requests hang for 5min+ and responses take 5min+
litellm_settings: # module level litellm settings - https://github.com/BerriAI/litellm/blob/main/litellm/__init__.py
success_callback: ["prometheus"]
service_callback: ["prometheus_system"]
drop_params: False # Raise an exception if the openai param being passed in isn't supported.
cache: false
default_internal_user_params:
user_role: os.environ/DEFAULT_USER_ROLE
success_callback: ["s3"]
s3_callback_params:
s3_bucket_name: logs-bucket-litellm # AWS Bucket Name for S3
s3_region_name: us-west-2 # AWS Region Name for S3
s3_aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID # us os.environ/<variable name> to pass environment variables. This is AWS Access Key ID for S3
s3_aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY # AWS Secret Access Key for S3
s3_path: my-test-path # [OPTIONAL] set path in bucket you want to write logs to
s3_endpoint_url: https://s3.amazonaws.com # [OPTIONAL] S3 endpoint URL, if you want to use Backblaze/cloudflare s3 buckets
router_settings:
routing_strategy: simple-shuffle # "simple-shuffle" shown to result in highest throughput. https://docs.litellm.ai/docs/proxy/configs#load-balancing

View file

@ -0,0 +1,7 @@
model_list:
- model_name: gpt-3.5-turbo
litellm_settings:
drop_params: True
success_callback: ["langfuse"] # https://docs.litellm.ai/docs/observability/langfuse_integration

View file

@ -0,0 +1,28 @@
litellm_settings:
drop_params: True
# Model-specific settings
model_list: # use the same model_name for using the litellm router. LiteLLM will use the router between gpt-3.5-turbo
- model_name: gpt-3.5-turbo # litellm will
litellm_params:
model: gpt-3.5-turbo
api_key: sk-uj6F
tpm: 20000 # [OPTIONAL] REPLACE with your openai tpm
rpm: 3 # [OPTIONAL] REPLACE with your openai rpm
- model_name: gpt-3.5-turbo
litellm_params:
model: gpt-3.5-turbo
api_key: sk-Imn
tpm: 20000 # [OPTIONAL] REPLACE with your openai tpm
rpm: 3 # [OPTIONAL] REPLACE with your openai rpm
- model_name: gpt-3.5-turbo
litellm_params:
model: openrouter/gpt-3.5-turbo
- model_name: mistral-7b-instruct
litellm_params:
model: mistralai/mistral-7b-instruct
environment_variables:
REDIS_HOST: localhost
REDIS_PASSWORD:
REDIS_PORT:

View file

@ -0,0 +1,7 @@
model_list:
- model_name: gpt-3.5-turbo
litellm_params:
model: gpt-3.5-turbo
general_settings:
otel: True # OpenTelemetry Logger this logs OTEL data to your collector

View file

@ -0,0 +1,4 @@
model_list:
- model_name: gpt-3.5-turbo
litellm_params:
model: gpt-3.5-turbo

Binary file not shown.

View file

@ -0,0 +1,112 @@
text = """
Alexander the Great
This article is about the ancient king of Macedonia. For other uses, see Alexander the Great (disambiguation).
Alexander III of Macedon (Ancient Greek: Ἀλέξανδρος, romanized: Alexandros; 20/21 July 356 BC 10/11 June 323 BC), most commonly known as Alexander the Great,[c] was a king of the ancient Greek kingdom of Macedon.[d] He succeeded his father Philip II to the throne in 336 BC at the age of 20 and spent most of his ruling years conducting a lengthy military campaign throughout Western Asia, Central Asia, parts of South Asia, and Egypt. By the age of 30, he had created one of the largest empires in history, stretching from Greece to northwestern India.[1] He was undefeated in battle and is widely considered to be one of history's greatest and most successful military commanders.[2][3]
Until the age of 16, Alexander was tutored by Aristotle. In 335 BC, shortly after his assumption of kingship over Macedon, he campaigned in the Balkans and reasserted control over Thrace and parts of Illyria before marching on the city of Thebes, which was subsequently destroyed in battle. Alexander then led the League of Corinth, and used his authority to launch the pan-Hellenic project envisaged by his father, assuming leadership over all Greeks in their conquest of Persia.[4][5]
In 334 BC, he invaded the Achaemenid Persian Empire and began a series of campaigns that lasted for 10 years. Following his conquest of Asia Minor, Alexander broke the power of Achaemenid Persia in a series of decisive battles, including those at Issus and Gaugamela; he subsequently overthrew Darius III and conquered the Achaemenid Empire in its entirety.[e] After the fall of Persia, the Macedonian Empire held a vast swath of territory between the Adriatic Sea and the Indus River. Alexander endeavored to reach the "ends of the world and the Great Outer Sea" and invaded India in 326 BC, achieving an important victory over Porus, an ancient Indian king of present-day Punjab, at the Battle of the Hydaspes. Due to the demand of his homesick troops, he eventually turned back at the Beas River and later died in 323 BC in Babylon, the city of Mesopotamia that he had planned to establish as his empire's capital. Alexander's death left unexecuted an additional series of planned military and mercantile campaigns that would have begun with a Greek invasion of Arabia. In the years following his death, a series of civil wars broke out across the Macedonian Empire, eventually leading to its disintegration at the hands of the Diadochi.
With his death marking the start of the Hellenistic period, Alexander's legacy includes the cultural diffusion and syncretism that his conquests engendered, such as Greco-Buddhism and Hellenistic Judaism. He founded more than twenty cities, with the most prominent being the city of Alexandria in Egypt. Alexander's settlement of Greek colonists and the resulting spread of Greek culture led to the overwhelming dominance of Hellenistic civilization and influence as far east as the Indian subcontinent. The Hellenistic period developed through the Roman Empire into modern Western culture; the Greek language became the lingua franca of the region and was the predominant language of the Byzantine Empire up until its collapse in the mid-15th century AD. Alexander became legendary as a classical hero in the mould of Achilles, featuring prominently in the historical and mythical traditions of both Greek and non-Greek cultures. His military achievements and unprecedented enduring successes in battle made him the measure against which many later military leaders would compare themselves,[f] and his tactics remain a significant subject of study in military academies worldwide.[6] Legends of Alexander's exploits coalesced into the third-century Alexander Romance which, in the premodern period, went through over one hundred recensions, translations, and derivations and was translated into almost every European vernacular and every language of the Islamic world.[7] After the Bible, it was the most popular form of European literature.[8]
Early life
Lineage and childhood
Alexander III was born in Pella, the capital of the Kingdom of Macedon,[9] on the sixth day of the ancient Greek month of Hekatombaion, which probably corresponds to 20 July 356 BC (although the exact date is uncertain).[10][11] He was the son of the erstwhile king of Macedon, Philip II, and his fourth wife, Olympias (daughter of Neoptolemus I, king of Epirus).[12][g] Although Philip had seven or eight wives, Olympias was his principal wife for some time, likely because she gave birth to Alexander.[13]
Several legends surround Alexander's birth and childhood.[14] According to the ancient Greek biographer Plutarch, on the eve of the consummation of her marriage to Philip, Olympias dreamed that her womb was struck by a thunderbolt that caused a flame to spread "far and wide" before dying away. Sometime after the wedding, Philip is said to have seen himself, in a dream, securing his wife's womb with a seal engraved with a lion's image.[15] Plutarch offered a variety of interpretations for these dreams: that Olympias was pregnant before her marriage, indicated by the sealing of her womb; or that Alexander's father was Zeus. Ancient commentators were divided about whether the ambitious Olympias promulgated the story of Alexander's divine parentage, variously claiming that she had told Alexander, or that she dismissed the suggestion as impious.[15]
On the day Alexander was born, Philip was preparing a siege on the city of Potidea on the peninsula of Chalcidice. That same day, Philip received news that his general Parmenion had defeated the combined Illyrian and Paeonian armies and that his horses had won at the Olympic Games. It was also said that on this day, the Temple of Artemis in Ephesus, one of the Seven Wonders of the World, burnt down. This led Hegesias of Magnesia to say that it had burnt down because Artemis was away, attending the birth of Alexander.[16] Such legends may have emerged when Alexander was king, and possibly at his instigation, to show that he was superhuman and destined for greatness from conception.[14]
In his early years, Alexander was raised by a nurse, Lanike, sister of Alexander's future general Cleitus the Black. Later in his childhood, Alexander was tutored by the strict Leonidas, a relative of his mother, and by Lysimachus of Acarnania.[17] Alexander was raised in the manner of noble Macedonian youths, learning to read, play the lyre, ride, fight, and hunt.[18] When Alexander was ten years old, a trader from Thessaly brought Philip a horse, which he offered to sell for thirteen talents. The horse refused to be mounted, and Philip ordered it away. Alexander, however, detecting the horse's fear of its own shadow, asked to tame the horse, which he eventually managed.[14] Plutarch stated that Philip, overjoyed at this display of courage and ambition, kissed his son tearfully, declaring: "My boy, you must find a kingdom big enough for your ambitions. Macedon is too small for you", and bought the horse for him.[19] Alexander named it Bucephalas, meaning "ox-head". Bucephalas carried Alexander as far as India. When the animal died (because of old age, according to Plutarch, at age 30), Alexander named a city after him, Bucephala.[20]
Education
When Alexander was 13, Philip began to search for a tutor, and considered such academics as Isocrates and Speusippus, the latter offering to resign from his stewardship of the Academy to take up the post. In the end, Philip chose Aristotle and provided the Temple of the Nymphs at Mieza as a classroom. In return for teaching Alexander, Philip agreed to rebuild Aristotle's hometown of Stageira, which Philip had razed, and to repopulate it by buying and freeing the ex-citizens who were slaves, or pardoning those who were in exile.[21]
Mieza was like a boarding school for Alexander and the children of Macedonian nobles, such as Ptolemy, Hephaistion, and Cassander. Many of these students would become his friends and future generals, and are often known as the "Companions". Aristotle taught Alexander and his companions about medicine, philosophy, morals, religion, logic, and art. Under Aristotle's tutelage, Alexander developed a passion for the works of Homer, and in particular the Iliad; Aristotle gave him an annotated copy, which Alexander later carried on his campaigns.[22] Alexander was able to quote Euripides from memory.[23]
During his youth, Alexander was also acquainted with Persian exiles at the Macedonian court, who received the protection of Philip II for several years as they opposed Artaxerxes III.[24][25][26] Among them were Artabazos II and his daughter Barsine, possible future mistress of Alexander, who resided at the Macedonian court from 352 to 342 BC, as well as Amminapes, future satrap of Alexander, or a Persian nobleman named Sisines.[24][27][28][29] This gave the Macedonian court a good knowledge of Persian issues, and may even have influenced some of the innovations in the management of the Macedonian state.[27]
Suda writes that Anaximenes of Lampsacus was one of Alexander's teachers, and that Anaximenes also accompanied Alexander on his campaigns.[30]
Heir of Philip II
Regency and ascent of Macedon
Main articles: Philip II of Macedon and Rise of Macedon
Further information: History of Macedonia (ancient kingdom)
At the age of 16, Alexander's education under Aristotle ended. Philip II had waged war against the Thracians to the north, which left Alexander in charge as regent and heir apparent.[14] During Philip's absence, the Thracian tribe of Maedi revolted against Macedonia. Alexander responded quickly and drove them from their territory. The territory was colonized, and a city, named Alexandropolis, was founded.[31]
Upon Philip's return, Alexander was dispatched with a small force to subdue the revolts in southern Thrace. Campaigning against the Greek city of Perinthus, Alexander reportedly saved his father's life. Meanwhile, the city of Amphissa began to work lands that were sacred to Apollo near Delphi, a sacrilege that gave Philip the opportunity to further intervene in Greek affairs. While Philip was occupied in Thrace, Alexander was ordered to muster an army for a campaign in southern Greece. Concerned that other Greek states might intervene, Alexander made it look as though he was preparing to attack Illyria instead. During this turmoil, the Illyrians invaded Macedonia, only to be repelled by Alexander.[32]
Philip and his army joined his son in 338 BC, and they marched south through Thermopylae, taking it after stubborn resistance from its Theban garrison. They went on to occupy the city of Elatea, only a few days' march from both Athens and Thebes. The Athenians, led by Demosthenes, voted to seek alliance with Thebes against Macedonia. Both Athens and Philip sent embassies to win Thebes's favour, but Athens won the contest.[33] Philip marched on Amphissa (ostensibly acting on the request of the Amphictyonic League), capturing the mercenaries sent there by Demosthenes and accepting the city's surrender. Philip then returned to Elatea, sending a final offer of peace to Athens and Thebes, who both rejected it.[34]
As Philip marched south, his opponents blocked him near Chaeronea, Boeotia. During the ensuing Battle of Chaeronea, Philip commanded the right wing and Alexander the left, accompanied by a group of Philip's trusted generals. According to the ancient sources, the two sides fought bitterly for some time. Philip deliberately commanded his troops to retreat, counting on the untested Athenian hoplites to follow, thus breaking their line. Alexander was the first to break the Theban lines, followed by Philip's generals. Having damaged the enemy's cohesion, Philip ordered his troops to press forward and quickly routed them. With the Athenians lost, the Thebans were surrounded. Left to fight alone, they were defeated.[35]
After the victory at Chaeronea, Philip and Alexander marched unopposed into the Peloponnese, welcomed by all cities; however, when they reached Sparta, they were refused, but did not resort to war.[36] At Corinth, Philip established a "Hellenic Alliance" (modelled on the old anti-Persian alliance of the Greco-Persian Wars), which included most Greek city-states except Sparta. Philip was then named Hegemon (often translated as "Supreme Commander") of this league (known by modern scholars as the League of Corinth), and announced his plans to attack the Persian Empire.[37][38]
Exile and return
When Philip returned to Pella, he fell in love with and married Cleopatra Eurydice in 338 BC,[39] the niece of his general Attalus.[40] The marriage made Alexander's position as heir less secure, since any son of Cleopatra Eurydice would be a fully Macedonian heir, while Alexander was only half-Macedonian.[41] During the wedding banquet, a drunken Attalus publicly prayed to the gods that the union would produce a legitimate heir.[40]
At the wedding of Cleopatra, whom Philip fell in love with and married, she being much too young for him, her uncle Attalus in his drink desired the Macedonians would implore the gods to give them a lawful successor to the kingdom by his niece. This so irritated Alexander, that throwing one of the cups at his head, "You villain," said he, "what, am I then a bastard?" Then Philip, taking Attalus's part, rose up and would have run his son through; but by good fortune for them both, either his over-hasty rage, or the wine he had drunk, made his foot slip, so that he fell down on the floor. At which Alexander reproachfully insulted over him: "See there," said he, "the man who makes preparations to pass out of Europe into Asia, overturned in passing from one seat to another."
Plutarch, describing the feud at Philip's wedding.[42]none
In 337 BC, Alexander fled Macedon with his mother, dropping her off with her brother, King Alexander I of Epirus in Dodona, capital of the Molossians.[43] He continued to Illyria,[43] where he sought refuge with one or more Illyrian kings, perhaps with Glaucias, and was treated as a guest, despite having defeated them in battle a few years before.[44] However, it appears Philip never intended to disown his politically and militarily trained son.[43] Accordingly, Alexander returned to Macedon after six months due to the efforts of a family friend, Demaratus, who mediated between the two parties.[45]
In the following year, the Persian satrap (governor) of Caria, Pixodarus, offered his eldest daughter to Alexander's half-brother, Philip Arrhidaeus.[43] Olympias and several of Alexander's friends suggested this showed Philip intended to make Arrhidaeus his heir.[43] Alexander reacted by sending an actor, Thessalus of Corinth, to tell Pixodarus that he should not offer his daughter's hand to an illegitimate son, but instead to Alexander. When Philip heard of this, he stopped the negotiations and scolded Alexander for wishing to marry the daughter of a Carian, explaining that he wanted a better bride for him.[43] Philip exiled four of Alexander's friends, Harpalus, Nearchus, Ptolemy and Erigyius, and had the Corinthians bring Thessalus to him in chains.[46]
King of Macedon
Accession
Further information: Government of Macedonia (ancient kingdom)
In summer 336 BC, while at Aegae attending the wedding of his daughter Cleopatra to Olympias's brother, Alexander I of Epirus, Philip was assassinated by the captain of his bodyguards, Pausanias.[h] As Pausanias tried to escape, he tripped over a vine and was killed by his pursuers, including two of Alexander's companions, Perdiccas and Leonnatus. Alexander was proclaimed king on the spot by the nobles and army at the age of 20.[47][48][49]
Consolidation of power
Alexander began his reign by eliminating potential rivals to the throne. He had his cousin, the former Amyntas IV, executed.[51] He also had two Macedonian princes from the region of Lyncestis killed for having been involved in his father's assassination, but spared a third, Alexander Lyncestes. Olympias had Cleopatra Eurydice, and Europa, her daughter by Philip, burned alive. When Alexander learned about this, he was furious. Alexander also ordered the murder of Attalus,[51] who was in command of the advance guard of the army in Asia Minor and Cleopatra's uncle.[52]
Attalus was at that time corresponding with Demosthenes, regarding the possibility of defecting to Athens. Attalus also had severely insulted Alexander, and following Cleopatra's murder, Alexander may have considered him too dangerous to be left alive.[52] Alexander spared Arrhidaeus, who was by all accounts mentally disabled, possibly as a result of poisoning by Olympias.[47][49][53]
News of Philip's death roused many states into revolt, including Thebes, Athens, Thessaly, and the Thracian tribes north of Macedon. When news of the revolts reached Alexander, he responded quickly. Though advised to use diplomacy, Alexander mustered 3,000 Macedonian cavalry and rode south towards Thessaly. He found the Thessalian army occupying the pass between Mount Olympus and Mount Ossa, and ordered his men to ride over Mount Ossa. When the Thessalians awoke the next day, they found Alexander in their rear and promptly surrendered, adding their cavalry to Alexander's force. He then continued south towards the Peloponnese.[54]
Alexander stopped at Thermopylae, where he was recognized as the leader of the Amphictyonic League before heading south to Corinth. Athens sued for peace and Alexander pardoned the rebels. The famous encounter between Alexander and Diogenes the Cynic occurred during Alexander's stay in Corinth. When Alexander asked Diogenes what he could do for him, the philosopher disdainfully asked Alexander to stand a little to the side, as he was blocking the sunlight.[55] This reply apparently delighted Alexander, who is reported to have said "But verily, if I were not Alexander, I would like to be Diogenes."[56] At Corinth, Alexander took the title of Hegemon ("leader") and, like Philip, was appointed commander for the coming war against Persia. He also received news of a Thracian uprising.[57]
Balkan campaign
Main article: Alexander's Balkan campaign
Before crossing to Asia, Alexander wanted to safeguard his northern borders. In the spring of 335 BC, he advanced to suppress several revolts. Starting from Amphipolis, he travelled east into the country of the "Independent Thracians"; and at Mount Haemus, the Macedonian army attacked and defeated the Thracian forces manning the heights.[58] The Macedonians marched into the country of the Triballi, and defeated their army near the Lyginus river[59] (a tributary of the Danube). Alexander then marched for three days to the Danube, encountering the Getae tribe on the opposite shore. Crossing the river at night, he surprised them and forced their army to retreat after the first cavalry skirmish.[60]
News then reached Alexander that the Illyrian chieftain Cleitus and King Glaukias of the Taulantii were in open revolt against his authority. Marching west into Illyria, Alexander defeated each in turn, forcing the two rulers to flee with their troops. With these victories, he secured his northern frontier.[61]
Destruction of Thebes
While Alexander campaigned north, the Thebans and Athenians rebelled once again. Alexander immediately headed south.[62] While the other cities again hesitated, Thebes decided to fight. The Theban resistance was ineffective, and Alexander razed the city and divided its territory between the other Boeotian cities. The end of Thebes cowed Athens, leaving all of Greece temporarily at peace.[62] Alexander then set out on his Asian campaign, leaving Antipater as regent.[63]
Conquest of the Achaemenid Persian Empire
Main articles: Wars of Alexander the Great and Chronology of the expedition of Alexander the Great into Asia
Asia Minor
Further information: Battle of the Granicus, Siege of Halicarnassus, and Siege of Miletus
After his victory at the Battle of Chaeronea (338 BC), Philip II began the work of establishing himself as hēgemṓn (Greek: ἡγεμών) of a league which according to Diodorus was to wage a campaign against the Persians for the sundry grievances Greece suffered in 480 and free the Greek cities of the western coast and islands from Achaemenid rule. In 336 he sent Parmenion, Amyntas, Andromenes, Attalus, and an army of 10,000 men into Anatolia to make preparations for an invasion.[64][65] At first, all went well. The Greek cities on the western coast of Anatolia revolted until the news arrived that Philip had been murdered and had been succeeded by his young son Alexander. The Macedonians were demoralized by Philip's death and were subsequently defeated near Magnesia by the Achaemenids under the command of the mercenary Memnon of Rhodes.[64][65]
Taking over the invasion project of Philip II, Alexander's army crossed the Hellespont in 334 BC with approximately 48,100 soldiers, 6,100 cavalry and a fleet of 120 ships with crews numbering 38,000,[62] drawn from Macedon and various Greek city-states, mercenaries, and feudally raised soldiers from Thrace, Paionia, and Illyria.[66][i] He showed his intent to conquer the entirety of the Persian Empire by throwing a spear into Asian soil and saying he accepted Asia as a gift from the gods. This also showed Alexander's eagerness to fight, in contrast to his father's preference for diplomacy.[62]
After an initial victory against Persian forces at the Battle of the Granicus, Alexander accepted the surrender of the Persian provincial capital and treasury of Sardis; he then proceeded along the Ionian coast, granting autonomy and democracy to the cities. Miletus, held by Achaemenid forces, required a delicate siege operation, with Persian naval forces nearby. Further south, at Halicarnassus, in Caria, Alexander successfully waged his first large-scale siege, eventually forcing his opponents, the mercenary captain Memnon of Rhodes and the Persian satrap of Caria, Orontobates, to withdraw by sea.[67] Alexander left the government of Caria to a member of the Hecatomnid dynasty, Ada, who adopted Alexander.[68]
From Halicarnassus, Alexander proceeded into mountainous Lycia and the Pamphylian plain, asserting control over all coastal cities to deny the Persians naval bases. From Pamphylia onwards the coast held no major ports and Alexander moved inland. At Termessos, Alexander humbled but did not storm the Pisidian city.[69] At the ancient Phrygian capital of Gordium, Alexander "undid" the hitherto unsolvable Gordian Knot, a feat said to await the future "king of Asia".[70] According to the story, Alexander proclaimed that it did not matter how the knot was undone and hacked it apart with his sword.[71]
The Levant and Syria
Further information: Battle of Issus and Siege of Tyre (332 BC)
In spring 333 BC, Alexander crossed the Taurus into Cilicia. After a long pause due to an illness, he marched on towards Syria. Though outmanoeuvered by Darius's significantly larger army, he marched back to Cilicia, where he defeated Darius at Issus. Darius fled the battle, causing his army to collapse, and left behind his wife, his two daughters, his mother Sisygambis, and a fabulous treasure.[72] He offered a peace treaty that included the lands he had already lost, and a ransom of 10,000 talents for his family. Alexander replied that since he was now king of Asia, it was he alone who decided territorial divisions.[73] Alexander proceeded to take possession of Syria, and most of the coast of the Levant.[68] In the following year, 332 BC, he was forced to attack Tyre, which he captured after a long and difficult siege.[74][75] The men of military age were massacred and the women and children sold into slavery.[76]
Egypt
Further information: Siege of Gaza (332 BCE)
When Alexander destroyed Tyre, most of the towns on the route to Egypt quickly capitulated. However, Alexander was met with resistance at Gaza. The stronghold was heavily fortified and built on a hill, requiring a siege. When "his engineers pointed out to him that because of the height of the mound it would be impossible... this encouraged Alexander all the more to make the attempt".[77] After three unsuccessful assaults, the stronghold fell, but not before Alexander had received a serious shoulder wound. As in Tyre, men of military age were put to the sword and the women and children were sold into slavery.[78]
"""

View file

@ -0,0 +1,104 @@
============================= test session starts ==============================
platform darwin -- Python 3.11.4, pytest-8.3.2, pluggy-1.5.0 -- /Users/krrishdholakia/Documents/litellm/myenv/bin/python3.11
cachedir: .pytest_cache
rootdir: /Users/krrishdholakia/Documents/litellm
configfile: pyproject.toml
plugins: asyncio-0.23.8, respx-0.21.1, anyio-4.6.0
asyncio: mode=Mode.STRICT
collecting ... collected 1 item
test_function_calling.py::test_aaparallel_function_call[claude-3-haiku-20240307] <module 'litellm' from '/Users/krrishdholakia/Documents/litellm/litellm/__init__.py'>
Request to litellm:
litellm.completion(model='claude-3-haiku-20240307', messages=[{'role': 'user', 'content': "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}], tools=[{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}], tool_choice='auto')
SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
Final returned optional params: {'tools': [{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}], 'tool_choice': {'type': 'auto'}}
optional_params: {'tools': [{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}], 'tool_choice': {'type': 'auto'}}
SENT optional_params: {'tools': [{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}], 'tool_choice': {'type': 'auto'}, 'max_tokens': 4096}
tool: {'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}

POST Request Sent from LiteLLM:
curl -X POST \
https://api.anthropic.com/v1/messages \
-H 'accept: *****' -H 'anthropic-version: *****' -H 'content-type: *****' -H 'x-api-key: sk-ant-api03-bJf1M8qp-JDptRcZRE5ve5efAfSIaL5u-SZ9vItIkvuFcV5cUsd********************************************' -H 'anthropic-beta: *****' \
-d '{'messages': [{'role': 'user', 'content': [{'type': 'text', 'text': "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}]}], 'tools': [{'name': 'get_current_weather', 'description': 'Get the current weather in a given location', 'input_schema': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}], 'tool_choice': {'type': 'auto'}, 'max_tokens': 4096, 'model': 'claude-3-haiku-20240307'}'

_is_function_call: False
RAW RESPONSE:
{"id":"msg_01HRugqzL4WmcxMmbvDheTph","type":"message","role":"assistant","model":"claude-3-haiku-20240307","content":[{"type":"text","text":"Okay, let's check the current weather in those three cities:"},{"type":"tool_use","id":"toolu_016U6G3kpxjHSiJLwVCrrScz","name":"get_current_weather","input":{"location":"San Francisco","unit":"celsius"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":379,"output_tokens":87}}
raw model_response: {"id":"msg_01HRugqzL4WmcxMmbvDheTph","type":"message","role":"assistant","model":"claude-3-haiku-20240307","content":[{"type":"text","text":"Okay, let's check the current weather in those three cities:"},{"type":"tool_use","id":"toolu_016U6G3kpxjHSiJLwVCrrScz","name":"get_current_weather","input":{"location":"San Francisco","unit":"celsius"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":379,"output_tokens":87}}
Logging Details LiteLLM-Success Call: Cache_hit=None
Looking up model=claude-3-haiku-20240307 in model_cost_map
Looking up model=claude-3-haiku-20240307 in model_cost_map
Response
ModelResponse(id='chatcmpl-7222f6c2-962a-4776-8639-576723466cb7', choices=[Choices(finish_reason='tool_calls', index=0, message=Message(content="Okay, let's check the current weather in those three cities:", role='assistant', tool_calls=[ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "San Francisco", "unit": "celsius"}', name='get_current_weather'), id='toolu_016U6G3kpxjHSiJLwVCrrScz', type='function')], function_call=None))], created=1727897483, model='claude-3-haiku-20240307', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=87, prompt_tokens=379, total_tokens=466, completion_tokens_details=None))
length of tool calls 1
Expecting there to be 3 tool calls
tool_calls: [ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "San Francisco", "unit": "celsius"}', name='get_current_weather'), id='toolu_016U6G3kpxjHSiJLwVCrrScz', type='function')]
Response message
Message(content="Okay, let's check the current weather in those three cities:", role='assistant', tool_calls=[ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "San Francisco", "unit": "celsius"}', name='get_current_weather'), id='toolu_016U6G3kpxjHSiJLwVCrrScz', type='function')], function_call=None)
messages: [{'role': 'user', 'content': "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}, Message(content="Okay, let's check the current weather in those three cities:", role='assistant', tool_calls=[ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "San Francisco", "unit": "celsius"}', name='get_current_weather'), id='toolu_016U6G3kpxjHSiJLwVCrrScz', type='function')], function_call=None), {'tool_call_id': 'toolu_016U6G3kpxjHSiJLwVCrrScz', 'role': 'tool', 'name': 'get_current_weather', 'content': '{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}'}]
Request to litellm:
litellm.completion(model='claude-3-haiku-20240307', messages=[{'role': 'user', 'content': "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}, Message(content="Okay, let's check the current weather in those three cities:", role='assistant', tool_calls=[ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "San Francisco", "unit": "celsius"}', name='get_current_weather'), id='toolu_016U6G3kpxjHSiJLwVCrrScz', type='function')], function_call=None), {'tool_call_id': 'toolu_016U6G3kpxjHSiJLwVCrrScz', 'role': 'tool', 'name': 'get_current_weather', 'content': '{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}'}], temperature=0.2, seed=22, drop_params=True)
SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
Final returned optional params: {'temperature': 0.2, 'tools': [{'type': 'function', 'function': {'name': 'dummy-tool', 'description': '', 'parameters': {'type': 'object', 'properties': {}}}}]}
optional_params: {'temperature': 0.2, 'tools': [{'type': 'function', 'function': {'name': 'dummy-tool', 'description': '', 'parameters': {'type': 'object', 'properties': {}}}}]}
SENT optional_params: {'temperature': 0.2, 'tools': [{'type': 'function', 'function': {'name': 'dummy-tool', 'description': '', 'parameters': {'type': 'object', 'properties': {}}}}], 'max_tokens': 4096}
tool: {'type': 'function', 'function': {'name': 'dummy-tool', 'description': '', 'parameters': {'type': 'object', 'properties': {}}}}

POST Request Sent from LiteLLM:
curl -X POST \
https://api.anthropic.com/v1/messages \
-H 'accept: *****' -H 'anthropic-version: *****' -H 'content-type: *****' -H 'x-api-key: sk-ant-api03-bJf1M8qp-JDptRcZRE5ve5efAfSIaL5u-SZ9vItIkvuFcV5cUsd********************************************' -H 'anthropic-beta: *****' \
-d '{'messages': [{'role': 'user', 'content': [{'type': 'text', 'text': "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}]}, {'role': 'assistant', 'content': [{'type': 'tool_use', 'id': 'toolu_016U6G3kpxjHSiJLwVCrrScz', 'name': 'get_current_weather', 'input': {'location': 'San Francisco', 'unit': 'celsius'}}]}, {'role': 'user', 'content': [{'type': 'tool_result', 'tool_use_id': 'toolu_016U6G3kpxjHSiJLwVCrrScz', 'content': '{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}'}]}], 'temperature': 0.2, 'tools': [{'name': 'dummy-tool', 'description': '', 'input_schema': {'type': 'object', 'properties': {}}}], 'max_tokens': 4096, 'model': 'claude-3-haiku-20240307'}'

_is_function_call: False
RAW RESPONSE:
{"id":"msg_01Wp8NVScugz6yAGsmB5trpZ","type":"message","role":"assistant","model":"claude-3-haiku-20240307","content":[{"type":"text","text":"The current weather in San Francisco is 72°F (22°C)."},{"type":"tool_use","id":"toolu_01HTXEYDX4MspM76STtJqs1n","name":"get_current_weather","input":{"location":"Tokyo","unit":"celsius"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":426,"output_tokens":90}}
raw model_response: {"id":"msg_01Wp8NVScugz6yAGsmB5trpZ","type":"message","role":"assistant","model":"claude-3-haiku-20240307","content":[{"type":"text","text":"The current weather in San Francisco is 72°F (22°C)."},{"type":"tool_use","id":"toolu_01HTXEYDX4MspM76STtJqs1n","name":"get_current_weather","input":{"location":"Tokyo","unit":"celsius"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":426,"output_tokens":90}}
Logging Details LiteLLM-Success Call: Cache_hit=None
Looking up model=claude-3-haiku-20240307 in model_cost_map
Looking up model=claude-3-haiku-20240307 in model_cost_map
second response
ModelResponse(id='chatcmpl-c4ed5c25-ba7c-49e5-a6be-5720ab25fff0', choices=[Choices(finish_reason='tool_calls', index=0, message=Message(content='The current weather in San Francisco is 72°F (22°C).', role='assistant', tool_calls=[ChatCompletionMessageToolCall(index=1, function=Function(arguments='{"location": "Tokyo", "unit": "celsius"}', name='get_current_weather'), id='toolu_01HTXEYDX4MspM76STtJqs1n', type='function')], function_call=None))], created=1727897484, model='claude-3-haiku-20240307', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=90, prompt_tokens=426, total_tokens=516, completion_tokens_details=None))
PASSED
=============================== warnings summary ===============================
../../myenv/lib/python3.11/site-packages/pydantic/_internal/_config.py:284
/Users/krrishdholakia/Documents/litellm/myenv/lib/python3.11/site-packages/pydantic/_internal/_config.py:284: PydanticDeprecatedSince20: Support for class-based `config` is deprecated, use ConfigDict instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
warnings.warn(DEPRECATION_MESSAGE, DeprecationWarning)
../../litellm/utils.py:17
/Users/krrishdholakia/Documents/litellm/litellm/utils.py:17: DeprecationWarning: 'imghdr' is deprecated and slated for removal in Python 3.13
import imghdr
../../litellm/utils.py:124
/Users/krrishdholakia/Documents/litellm/litellm/utils.py:124: DeprecationWarning: open_text is deprecated. Use files() instead. Refer to https://importlib-resources.readthedocs.io/en/latest/using.html#migrating-from-legacy for migration advice.
with resources.open_text("litellm.llms.tokenizers", "anthropic_tokenizer.json") as f:
test_function_calling.py:56
/Users/krrishdholakia/Documents/litellm/tests/local_testing/test_function_calling.py:56: PytestUnknownMarkWarning: Unknown pytest.mark.flaky - is this a typo? You can register custom marks to avoid this warning - for details, see https://docs.pytest.org/en/stable/how-to/mark.html
@pytest.mark.flaky(retries=3, delay=1)
tests/local_testing/test_function_calling.py::test_aaparallel_function_call[claude-3-haiku-20240307]
tests/local_testing/test_function_calling.py::test_aaparallel_function_call[claude-3-haiku-20240307]
/Users/krrishdholakia/Documents/litellm/myenv/lib/python3.11/site-packages/httpx/_content.py:202: DeprecationWarning: Use 'content=<...>' to upload raw bytes/text content.
warnings.warn(message, DeprecationWarning)
-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
======================== 1 passed, 6 warnings in 1.89s =========================

View file

@ -0,0 +1,733 @@
system_message_short = {
"message": {
"role": "system",
"content": "You are a bot.",
},
"count": 12,
}
system_message = {
"message": {
"role": "system",
"content": "You are a helpful, pattern-following assistant that translates corporate jargon into plain English.",
},
"count": 25,
}
system_message_long = {
"message": {
"role": "system",
"content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.",
},
"count": 31,
}
system_message_unicode = {
"message": {
"role": "system",
"content": "á",
},
"count": 8,
}
system_message_with_name = {
"message": {
"role": "system",
"name": "example_user",
"content": "New synergies will help drive top-line growth.",
},
"count": 20,
}
user_message = {
"message": {
"role": "user",
"content": "Hello, how are you?",
},
"count": 13,
}
user_message_unicode = {
"message": {
"role": "user",
"content": "á",
},
"count": 8,
}
user_message_perf = {
"message": {
"role": "user",
"content": "What happens in a performance review?",
},
"count": 14,
}
assistant_message_perf = {
"message": {
"role": "assistant",
"content": "During the performance review at Contoso Electronics, the supervisor will discuss the employee's performance over the past year and provide feedback on areas for improvement. They will also provide an opportunity for the employee to discuss their goals and objectives for the upcoming year. The review is a two-way dialogue between managers and employees, and employees will receive a written summary of their performance review which will include a rating of their performance, feedback, and goals and objectives for the upcoming year [employee_handbook-3.pdf].",
},
"count": 106,
}
assistant_message_perf_short = {
"message": {
"role": "assistant",
"content": "The supervisor will discuss the employee's performance and provide feedback on areas for improvement. They will also provide an opportunity for the employee to discuss their goals and objectives for the upcoming year. The review is a two-way dialogue between managers and employees, and employees will receive a written summary of their performance review which will include a rating of their performance, feedback, and goals for the upcoming year [employee_handbook-3.pdf].",
},
"count": 91,
}
user_message_dresscode = {
"message": {
"role": "user",
"content": "Is there a dress code?",
},
"count": 13,
}
assistant_message_dresscode = {
"message": {
"role": "assistant",
"content": "Yes, there is a dress code at Contoso Electronics. Look sharp! [employee_handbook-1.pdf]",
},
"count": 30,
}
user_message_pm = {
"message": {
"role": "user",
"content": "What does a Product Manager do?",
},
"count": 14,
}
text_and_image_message = {
"message": {
"role": "user",
"content": [
{"type": "text", "text": "Describe this picture:"},
{
"type": "image_url",
"image_url": {
"url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z/C/HgAGgwJ/lK3Q6wAAAABJRU5ErkJggg==",
"detail": "high",
},
},
],
},
"count": 266,
}
search_sources_toolchoice_auto = {
"system_message": {
"role": "system",
"content": "You are a bot.",
},
"tools": [
{
"type": "function",
"function": {
"name": "search_sources",
"description": "Retrieve sources from the Azure AI Search index",
"parameters": {
"type": "object",
"properties": {
"search_query": {
"type": "string",
"description": "Query string to retrieve documents from azure search eg: 'Health care plan'",
}
},
"required": ["search_query"],
},
},
}
],
"tool_choice": "auto",
"count": 66,
}
search_sources_toolchoice_none = {
"system_message": {
"role": "system",
"content": "You are a bot.",
},
"tools": [
{
"type": "function",
"function": {
"name": "search_sources",
"description": "Retrieve sources from the Azure AI Search index",
"parameters": {
"type": "object",
"properties": {
"search_query": {
"type": "string",
"description": "Query string to retrieve documents from azure search eg: 'Health care plan'",
}
},
"required": ["search_query"],
},
},
}
],
"tool_choice": "none",
"count": 67,
}
search_sources_toolchoice_name = {
"system_message": {
"role": "system",
"content": "You are a bot.",
},
"tools": [
{
"type": "function",
"function": {
"name": "search_sources",
"description": "Retrieve sources from the Azure AI Search index",
"parameters": {
"type": "object",
"properties": {
"search_query": {
"type": "string",
"description": "Query string to retrieve documents from azure search eg: 'Health care plan'",
}
},
"required": ["search_query"],
},
},
}
],
"tool_choice": {"type": "function", "function": {"name": "search_sources"}},
"count": 75,
}
integer_enum = {
"system_message": {
"role": "system",
"content": "You are a bot.",
},
"tools": [
{
"type": "function",
"function": {
"name": "data_demonstration",
"description": "This is the main function description",
"parameters": {
"type": "object",
"properties": {
"integer_enum": {"type": "integer", "enum": [-1, 1]}
},
},
},
}
],
"tool_choice": "none",
"count": 54,
}
integer_enum_tool_choice_name = {
"system_message": {
"role": "system",
"content": "You are a bot.",
},
"tools": [
{
"type": "function",
"function": {
"name": "data_demonstration",
"description": "This is the main function description",
"parameters": {
"type": "object",
"properties": {
"integer_enum": {"type": "integer", "enum": [-1, 1]}
},
},
},
}
],
"tool_choice": {
"type": "function",
"function": {"name": "data_demonstration"},
}, # 4 tokens for "data_demonstration"
"count": 64,
}
no_parameters = {
"system_message": {
"role": "system",
"content": "You are a bot.",
},
"tools": [
{
"type": "function",
"function": {
"name": "search_sources",
"description": "Retrieve sources from the Azure AI Search index",
},
}
],
"tool_choice": "auto",
"count": 42,
}
no_parameters_tool_choice_name = {
"system_message": {
"role": "system",
"content": "You are a bot.",
},
"tools": [
{
"type": "function",
"function": {
"name": "search_sources",
"description": "Retrieve sources from the Azure AI Search index",
},
}
],
"tool_choice": {
"type": "function",
"function": {"name": "search_sources"},
}, # 2 tokens for "search_sources"
"count": 51,
}
no_parameter_description_or_required = {
"system_message": {
"role": "system",
"content": "You are a bot.",
},
"tools": [
{
"type": "function",
"function": {
"name": "search_sources",
"description": "Retrieve sources from the Azure AI Search index",
"parameters": {
"type": "object",
"properties": {"search_query": {"type": "string"}},
},
},
}
],
"tool_choice": "auto",
"count": 49,
}
no_parameter_description = {
"system_message": {
"role": "system",
"content": "You are a bot.",
},
"tools": [
{
"type": "function",
"function": {
"name": "search_sources",
"description": "Retrieve sources from the Azure AI Search index",
"parameters": {
"type": "object",
"properties": {"search_query": {"type": "string"}},
"required": ["search_query"],
},
},
}
],
"tool_choice": "auto",
"count": 49,
}
string_enum = {
"system_message": {
"role": "system",
"content": "You are a bot.",
},
"tools": [
{
"type": "function",
"function": {
"name": "summarize_order",
"description": "Summarize the customer order request",
"parameters": {
"type": "object",
"properties": {
"product_name": {
"type": "string",
"description": "Product name ordered by customer",
},
"quantity": {
"type": "integer",
"description": "Quantity ordered by customer",
},
"unit": {
"type": "string",
"enum": ["meals", "days"],
"description": "unit of measurement of the customer order",
},
},
"required": ["product_name", "quantity", "unit"],
},
},
}
],
"tool_choice": "none",
"count": 86,
}
inner_object = {
"system_message": {
"role": "system",
"content": "You are a bot.",
},
"tools": [
{
"type": "function",
"function": {
"name": "data_demonstration",
"description": "This is the main function description",
"parameters": {
"type": "object",
"properties": {
"object_1": {
"type": "object",
"description": "The object data type as a property",
"properties": {
"string1": {"type": "string"},
},
}
},
"required": ["object_1"],
},
},
}
],
"tool_choice": "none",
"count": 65, # counted 67, over by 2
}
"""
namespace functions {
// This is the main function description
type data_demonstration = (_: {
// The object data type as a property
object_1: {
string1?: string,
},
}) => any;
} // namespace functions
"""
inner_object_with_enum_only = {
"system_message": {
"role": "system",
"content": "You are a bot.",
},
"tools": [
{
"type": "function",
"function": {
"name": "data_demonstration",
"description": "This is the main function description",
"parameters": {
"type": "object",
"properties": {
"object_1": {
"type": "object",
"description": "The object data type as a property",
"properties": {
"string_2a": {
"type": "string",
"enum": ["Happy", "Sad"],
}
},
}
},
"required": ["object_1"],
},
},
}
],
"tool_choice": "none",
"count": 73, # counted 74, over by 1
}
"""
namespace functions {
// This is the main function description
type data_demonstration = (_: {
// The object data type as a property
object_1: {
string_2a?: "Happy" | "Sad",
},
}) => any;
} // namespace functions
"""
inner_object_with_enum = {
"system_message": {
"role": "system",
"content": "You are a bot.",
},
"tools": [
{
"type": "function",
"function": {
"name": "data_demonstration",
"description": "This is the main function description",
"parameters": {
"type": "object",
"properties": {
"object_1": {
"type": "object",
"description": "The object data type as a property",
"properties": {
"string_2a": {
"type": "string",
"enum": ["Happy", "Sad"],
},
"string_2b": {
"type": "string",
"description": "Description in a second object is lost",
},
},
}
},
"required": ["object_1"],
},
},
}
],
"tool_choice": "none",
"count": 89, # counted 92, over by 3
}
"""
namespace functions {
// This is the main function description
type data_demonstration = (_: {
// The object data type as a property
object_1: {
string_2a?: "Happy" | "Sad",
// Description in a second object is lost
string_2b?: string,
},
}) => any;
} // namespace functions
"""
inner_object_and_string = {
"system_message": {
"role": "system",
"content": "You are a bot.",
},
"tools": [
{
"type": "function",
"function": {
"name": "data_demonstration",
"description": "This is the main function description",
"parameters": {
"type": "object",
"properties": {
"object_1": {
"type": "object",
"description": "The object data type as a property",
"properties": {
"string_2a": {
"type": "string",
"enum": ["Happy", "Sad"],
},
"string_2b": {
"type": "string",
"description": "Description in a second object is lost",
},
},
},
"string_1": {
"type": "string",
"description": "Not required gets a question mark",
},
},
"required": ["object_1"],
},
},
}
],
"tool_choice": "none",
"count": 103, # counted 106, over by 3
}
"""
namespace functions {
// This is the main function description
type data_demonstration = (_: {
// The object data type as a property
object_1: {
string_2a?: "Happy" | "Sad",
// Description in a second object is lost
string_2b?: string,
},
// Not required gets a question mark
string_1?: string,
}) => any;
} // namespace functions
"""
boolean = {
"system_message": {
"role": "system",
"content": "You are a bot.",
},
"tools": [
{
"type": "function",
"function": {
"name": "human_escalation",
"description": "Check if user wants to escalate to a human",
"parameters": {
"type": "object",
"properties": {
"requires_escalation": {
"type": "boolean",
"description": "If user is showing signs of frustration or anger in the query. Also if the user says they want to talk to a real person and not a chat bot.",
}
},
"required": ["requires_escalation"],
},
},
}
],
"tool_choice": "none",
"count": 89, # over by 3
}
array = {
"system_message": {
"role": "system",
"content": "You are a bot.",
},
"tools": [
{
"type": "function",
"function": {
"name": "get_coordinates",
"description": "Get the latitude and longitude of multiple mailing addresses",
"parameters": {
"type": "object",
"properties": {
"addresses": {
"type": "array",
"description": "The mailing addresses to be located",
"items": {"type": "string"},
}
},
"required": ["addresses"],
},
},
}
],
"tool_choice": "none",
"count": 59,
}
null = {
"system_message": {
"role": "system",
"content": "You are a bot.",
},
"tools": [
{
"type": "function",
"function": {
"name": "get_null",
"description": "Get the null value",
"parameters": {
"type": "object",
"properties": {
"null_value": {
"type": "null",
"description": "The null value to be returned",
}
},
"required": ["null_value"],
},
},
}
],
"tool_choice": "none",
"count": 55,
}
no_type = {
"system_message": {
"role": "system",
"content": "You are a bot.",
},
"tools": [
{
"type": "function",
"function": {
"name": "get_no_type",
"description": "Get the no type value",
"parameters": {
"type": "object",
"properties": {
"no_type_value": {
"description": "The no type value to be returned",
}
},
"required": ["no_type_value"],
},
},
}
],
"tool_choice": "none",
"count": 59,
}
MESSAGES_TEXT = [
system_message,
system_message_short,
system_message_long,
system_message_unicode,
system_message_with_name,
user_message,
user_message_unicode,
user_message_perf,
user_message_dresscode,
user_message_pm,
assistant_message_perf,
assistant_message_perf_short,
assistant_message_dresscode,
]
MESSAGES_WITH_IMAGES = [text_and_image_message]
MESSAGES_WITH_TOOLS = [
inner_object,
inner_object_and_string,
inner_object_with_enum_only,
inner_object_with_enum,
search_sources_toolchoice_auto,
search_sources_toolchoice_none,
search_sources_toolchoice_name,
integer_enum,
integer_enum_tool_choice_name,
no_parameters,
no_parameters_tool_choice_name,
no_parameter_description_or_required,
no_parameter_description,
string_enum,
boolean,
array,
no_type,
null,
]

View file

@ -0,0 +1,3 @@
{
"gpt-3.5-turbo": 7.7e-05
}

View file

@ -0,0 +1,2 @@
{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "gpt-3.5-turbo-0125", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 10}}
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "gpt-3.5-turbo-0125", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 10}}

View file

@ -0,0 +1,3 @@
{"custom_id": "task-0", "method": "POST", "url": "/chat/completions", "body": {"model": "my-custom-name", "messages": [{"role": "system", "content": "You are an AI assistant that helps people find information."}, {"role": "user", "content": "When was Microsoft founded?"}]}}
{"custom_id": "task-1", "method": "POST", "url": "/chat/completions", "body": {"model": "my-custom-name", "messages": [{"role": "system", "content": "You are an AI assistant that helps people find information."}, {"role": "user", "content": "When was the first XBOX released?"}]}}
{"custom_id": "task-2", "method": "POST", "url": "/chat/completions", "body": {"model": "my-custom-name", "messages": [{"role": "system", "content": "You are an AI assistant that helps people find information."}, {"role": "user", "content": "What is Altair Basic?"}]}}

Binary file not shown.

View file

@ -0,0 +1,22 @@
from litellm.proxy._types import UserAPIKeyAuth
from fastapi import Request
from dotenv import load_dotenv
import os
load_dotenv()
async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
try:
print(f"api_key: {api_key}")
if api_key == "":
raise Exception(
f"CustomAuth - Malformed API Key passed in. Ensure Key has `Bearer` prefix"
)
if api_key == f"{os.getenv('PROXY_MASTER_KEY')}-1234":
return UserAPIKeyAuth(api_key=api_key)
raise Exception
except Exception as e:
if len(str(e)) > 0:
raise e
raise Exception("Failed custom auth")

View file

@ -0,0 +1,121 @@
from litellm.integrations.custom_logger import CustomLogger
import inspect
import litellm
class testCustomCallbackProxy(CustomLogger):
def __init__(self):
self.success: bool = False # type: ignore
self.failure: bool = False # type: ignore
self.async_success: bool = False # type: ignore
self.async_success_embedding: bool = False # type: ignore
self.async_failure: bool = False # type: ignore
self.async_failure_embedding: bool = False # type: ignore
self.async_completion_kwargs = None # type: ignore
self.async_embedding_kwargs = None # type: ignore
self.async_embedding_response = None # type: ignore
self.async_completion_kwargs_fail = None # type: ignore
self.async_embedding_kwargs_fail = None # type: ignore
self.streaming_response_obj = None # type: ignore
blue_color_code = "\033[94m"
reset_color_code = "\033[0m"
print(f"{blue_color_code}Initialized LiteLLM custom logger")
try:
print(f"Logger Initialized with following methods:")
methods = [
method
for method in dir(self)
if inspect.ismethod(getattr(self, method))
]
# Pretty print the methods
for method in methods:
print(f" - {method}")
print(f"{reset_color_code}")
except Exception:
pass
def log_pre_api_call(self, model, messages, kwargs):
print(f"Pre-API Call")
def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
print(f"Post-API Call")
def log_stream_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Stream")
def log_success_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Success")
self.success = True
def log_failure_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Failure")
self.failure = True
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Async success")
self.async_success = True
print("Value of async success: ", self.async_success)
print("\n kwargs: ", kwargs)
if (
kwargs.get("model") == "azure-embedding-model"
or kwargs.get("model") == "ada"
):
print("Got an embedding model", kwargs.get("model"))
print("Setting embedding success to True")
self.async_success_embedding = True
print("Value of async success embedding: ", self.async_success_embedding)
self.async_embedding_kwargs = kwargs
self.async_embedding_response = response_obj
if kwargs.get("stream") == True:
self.streaming_response_obj = response_obj
self.async_completion_kwargs = kwargs
model = kwargs.get("model", None)
messages = kwargs.get("messages", None)
user = kwargs.get("user", None)
# Access litellm_params passed to litellm.completion(), example access `metadata`
litellm_params = kwargs.get("litellm_params", {})
metadata = litellm_params.get(
"metadata", {}
) # headers passed to LiteLLM proxy, can be found here
# Calculate cost using litellm.completion_cost()
cost = litellm.completion_cost(completion_response=response_obj)
response = response_obj
# tokens used in response
usage = response_obj["usage"]
print("\n\n in custom callback vars my custom logger, ", vars(my_custom_logger))
print(
f"""
Model: {model},
Messages: {messages},
User: {user},
Usage: {usage},
Cost: {cost},
Response: {response}
Proxy Metadata: {metadata}
"""
)
return
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Async Failure")
self.async_failure = True
print("Value of async failure: ", self.async_failure)
print("\n kwargs: ", kwargs)
if kwargs.get("model") == "text-embedding-ada-002":
self.async_failure_embedding = True
self.async_embedding_kwargs_fail = kwargs
self.async_completion_kwargs_fail = kwargs
my_custom_logger = testCustomCallbackProxy()

View file

@ -0,0 +1,21 @@
model_list:
- model_name: gpt-3.5-turbo
litellm_params:
api_key: bad-key
model: gpt-3.5-turbo
- model_name: working-azure-gpt-3.5-turbo
litellm_params:
model: azure/chatgpt-v-2
api_base: os.environ/AZURE_API_BASE
api_key: os.environ/AZURE_API_KEY
- model_name: azure-gpt-3.5-turbo
litellm_params:
model: azure/chatgpt-v-2
api_base: os.environ/AZURE_API_BASE
api_key: bad-key
- model_name: azure-embedding
litellm_params:
model: azure/azure-embedding-model
api_base: os.environ/AZURE_API_BASE
api_key: bad-key

View file

@ -0,0 +1,17 @@
model_list:
- model_name: azure-cloudflare
litellm_params:
model: azure/chatgpt-v-2
api_base: https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1
api_key: os.environ/AZURE_API_KEY
api_version: 2023-07-01-preview
litellm_settings:
set_verbose: True
cache: True # set cache responses to True
cache_params: # set cache params for s3
type: s3
s3_bucket_name: litellm-my-test-bucket-2 # AWS Bucket Name for S3
s3_region_name: us-east-1 # AWS Region Name for S3
s3_aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID # AWS Access Key ID for S3
s3_aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY # AWS Secret Access Key for S3

View file

@ -0,0 +1,28 @@
general_settings:
database_url: os.environ/DATABASE_URL
master_key: os.environ/PROXY_MASTER_KEY
litellm_settings:
drop_params: true
success_callback: ["langfuse"]
model_list:
- litellm_params:
api_base: https://my-endpoint-europe-berri-992.openai.azure.com/
api_key: os.environ/AZURE_EUROPE_API_KEY
model: azure/gpt-35-turbo
model_name: azure-model
- litellm_params:
api_base: https://my-endpoint-canada-berri992.openai.azure.com
api_key: os.environ/AZURE_CANADA_API_KEY
model: azure/gpt-35-turbo
model_name: azure-model
- litellm_params:
api_base: https://openai-france-1234.openai.azure.com
api_key: os.environ/AZURE_FRANCE_API_KEY
model: azure/gpt-turbo
model_name: azure-model
- litellm_params:
model: gpt-3.5-turbo
model_info:
description: this is a test openai model
model_name: test_openai_models

View file

@ -0,0 +1,11 @@
model_list:
- model_name: "openai-model"
litellm_params:
model: "gpt-3.5-turbo"
litellm_settings:
drop_params: True
set_verbose: True
general_settings:
custom_auth: custom_auth.user_api_key_auth

View file

@ -0,0 +1,127 @@
model_list:
- litellm_params:
api_base: https://my-endpoint-europe-berri-992.openai.azure.com/
api_key: os.environ/AZURE_EUROPE_API_KEY
model: azure/gpt-35-turbo
model_name: azure-model
- litellm_params:
api_base: https://my-endpoint-canada-berri992.openai.azure.com
api_key: os.environ/AZURE_CANADA_API_KEY
model: azure/gpt-35-turbo
model_name: azure-model
- litellm_params:
api_base: https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1
api_key: os.environ/AZURE_API_KEY
model: azure/chatgpt-v-2
model_name: azure-cloudflare-model
- litellm_params:
api_base: https://openai-france-1234.openai.azure.com
api_key: os.environ/AZURE_FRANCE_API_KEY
model: azure/gpt-turbo
model_name: azure-model
- litellm_params:
model: gpt-3.5-turbo
model_info:
description: this is a test openai model
model_name: test_openai_models
- litellm_params:
model: gpt-3.5-turbo
model_info:
description: this is a test openai model
id: 56f1bd94-3b54-4b67-9ea2-7c70e9a3a709
model_name: test_openai_models
- litellm_params:
model: gpt-3.5-turbo
model_info:
description: this is a test openai model
id: 4d1ee26c-abca-450c-8744-8e87fd6755e9
model_name: test_openai_models
- litellm_params:
model: gpt-3.5-turbo
model_info:
description: this is a test openai model
id: 00e19c0f-b63d-42bb-88e9-016fb0c60764
model_name: test_openai_models
- litellm_params:
model: gpt-3.5-turbo
model_info:
description: this is a test openai model
id: 79fc75bf-8e1b-47d5-8d24-9365a854af03
model_name: test_openai_models
- litellm_params:
api_base: os.environ/AZURE_API_BASE
api_key: os.environ/AZURE_API_KEY
api_version: 2023-07-01-preview
model: azure/azure-embedding-model
model_info:
mode: embedding
model_name: azure-embedding-model
- litellm_params:
model: gpt-3.5-turbo
model_info:
description: this is a test openai model
id: 55848c55-4162-40f9-a6e2-9a722b9ef404
model_name: test_openai_models
- litellm_params:
model: gpt-3.5-turbo
model_info:
description: this is a test openai model
id: 34339b1e-e030-4bcc-a531-c48559f10ce4
model_name: test_openai_models
- litellm_params:
model: gpt-3.5-turbo
model_info:
description: this is a test openai model
id: f6f74e14-ac64-4403-9365-319e584dcdc5
model_name: test_openai_models
- litellm_params:
model: gpt-3.5-turbo
model_info:
description: this is a test openai model
id: 9b1ef341-322c-410a-8992-903987fef439
model_name: test_openai_models
- litellm_params:
model: dall-e-3
model_info:
mode: image_generation
model_name: dall-e-3
- litellm_params:
api_base: os.environ/AZURE_SWEDEN_API_BASE
api_key: os.environ/AZURE_SWEDEN_API_KEY
api_version: 2023-12-01-preview
model: azure/dall-e-3-test
model_info:
mode: image_generation
model_name: dall-e-3
- litellm_params:
api_base: os.environ/AZURE_API_BASE
api_key: os.environ/AZURE_API_KEY
api_version: 2023-06-01-preview
model: azure/
model_info:
mode: image_generation
model_name: dall-e-2
- litellm_params:
api_base: os.environ/AZURE_API_BASE
api_key: os.environ/AZURE_API_KEY
api_version: 2023-07-01-preview
model: azure/azure-embedding-model
model_info:
base_model: text-embedding-ada-002
mode: embedding
model_name: text-embedding-ada-002
- litellm_params:
model: gpt-3.5-turbo
model_info:
description: this is a test openai model
id: 34cb2419-7c63-44ae-a189-53f1d1ce5953
model_name: test_openai_models
- litellm_params:
model: amazon.titan-embed-text-v1
model_name: amazon-embeddings
- litellm_params:
model: gpt-3.5-turbo
model_info:
description: this is a test openai model
id: 753dca9a-898d-4ff7-9961-5acf7cdf38cf
model_name: test_openai_models

View file

@ -0,0 +1,26 @@
model_list:
- model_name: Azure OpenAI GPT-4 Canada
litellm_params:
model: azure/chatgpt-v-2
api_base: os.environ/AZURE_API_BASE
api_key: os.environ/AZURE_API_KEY
api_version: "2023-07-01-preview"
model_info:
mode: chat
input_cost_per_token: 0.0002
id: gm
- model_name: azure-embedding-model
litellm_params:
model: azure/azure-embedding-model
api_base: os.environ/AZURE_API_BASE
api_key: os.environ/AZURE_API_KEY
api_version: "2023-07-01-preview"
model_info:
mode: embedding
input_cost_per_token: 0.002
id: hello
litellm_settings:
drop_params: True
set_verbose: True
callbacks: custom_callbacks.my_custom_logger

View file

@ -0,0 +1,32 @@
model_list:
- litellm_params:
api_base: https://my-endpoint-europe-berri-992.openai.azure.com/
api_key: os.environ/AZURE_EUROPE_API_KEY
model: azure/gpt-35-turbo
model_name: azure-model
- litellm_params:
api_base: https://my-endpoint-canada-berri992.openai.azure.com
api_key: os.environ/AZURE_CANADA_API_KEY
model: azure/gpt-35-turbo
model_name: azure-model
- litellm_params:
api_base: https://openai-france-1234.openai.azure.com
api_key: os.environ/AZURE_FRANCE_API_KEY
model: azure/gpt-turbo
model_name: azure-model
litellm_settings:
guardrails:
- prompt_injection:
callbacks: [lakera_prompt_injection, detect_prompt_injection]
default_on: true
- hide_secrets:
callbacks: [hide_secrets]
default_on: true
- moderations:
callbacks: [openai_moderations]
default_on: false

View file

@ -0,0 +1,359 @@
### What this tests ####
## This test asserts the type of data passed into each method of the custom callback handler
import asyncio
import inspect
import os
import sys
import time
import traceback
import uuid
from datetime import datetime
import pytest
from pydantic import BaseModel
sys.path.insert(0, os.path.abspath("../.."))
from typing import List, Literal, Optional, Union
from unittest.mock import AsyncMock, MagicMock, patch
import litellm
from litellm import Cache, completion, embedding
from litellm.integrations.custom_logger import CustomLogger
from litellm.types.utils import LiteLLMCommonStrings
# Test Scenarios (test across completion, streaming, embedding)
## 1: Pre-API-Call
## 2: Post-API-Call
## 3: On LiteLLM Call success
## 4: On LiteLLM Call failure
## 5. Caching
# Test models
## 1. OpenAI
## 2. Azure OpenAI
## 3. Non-OpenAI/Azure - e.g. Bedrock
# Test interfaces
## 1. litellm.completion() + litellm.embeddings()
## refer to test_custom_callback_input_router.py for the router + proxy tests
class CompletionCustomHandler(
CustomLogger
): # https://docs.litellm.ai/docs/observability/custom_callback#callback-class
"""
The set of expected inputs to a custom handler for a
"""
# Class variables or attributes
def __init__(self):
self.errors = []
self.states: List[
Literal[
"sync_pre_api_call",
"async_pre_api_call",
"post_api_call",
"sync_stream",
"async_stream",
"sync_success",
"async_success",
"sync_failure",
"async_failure",
]
] = []
def log_pre_api_call(self, model, messages, kwargs):
try:
self.states.append("sync_pre_api_call")
## MODEL
assert isinstance(model, str)
## MESSAGES
assert isinstance(messages, list)
## KWARGS
assert isinstance(kwargs["model"], str)
assert isinstance(kwargs["messages"], list)
assert isinstance(kwargs["optional_params"], dict)
assert isinstance(kwargs["litellm_params"], dict)
assert isinstance(kwargs["start_time"], (datetime, type(None)))
assert isinstance(kwargs["stream"], bool)
assert isinstance(kwargs["user"], (str, type(None)))
### METADATA
metadata_value = kwargs["litellm_params"].get("metadata")
assert metadata_value is None or isinstance(metadata_value, dict)
if metadata_value is not None:
if litellm.turn_off_message_logging is True:
assert (
metadata_value["raw_request"]
is LiteLLMCommonStrings.redacted_by_litellm.value
)
else:
assert "raw_request" not in metadata_value or isinstance(
metadata_value["raw_request"], str
)
except Exception:
print(f"Assertion Error: {traceback.format_exc()}")
self.errors.append(traceback.format_exc())
def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
try:
self.states.append("post_api_call")
## START TIME
assert isinstance(start_time, datetime)
## END TIME
assert end_time == None
## RESPONSE OBJECT
assert response_obj == None
## KWARGS
assert isinstance(kwargs["model"], str)
assert isinstance(kwargs["messages"], list)
assert isinstance(kwargs["optional_params"], dict)
assert isinstance(kwargs["litellm_params"], dict)
assert isinstance(kwargs["start_time"], (datetime, type(None)))
assert isinstance(kwargs["stream"], bool)
assert isinstance(kwargs["user"], (str, type(None)))
assert isinstance(kwargs["input"], (list, dict, str))
assert isinstance(kwargs["api_key"], (str, type(None)))
assert (
isinstance(
kwargs["original_response"],
(str, litellm.CustomStreamWrapper, BaseModel),
)
or inspect.iscoroutine(kwargs["original_response"])
or inspect.isasyncgen(kwargs["original_response"])
)
assert isinstance(kwargs["additional_args"], (dict, type(None)))
assert isinstance(kwargs["log_event_type"], str)
except Exception:
print(f"Assertion Error: {traceback.format_exc()}")
self.errors.append(traceback.format_exc())
async def async_log_stream_event(self, kwargs, response_obj, start_time, end_time):
try:
self.states.append("async_stream")
## START TIME
assert isinstance(start_time, datetime)
## END TIME
assert isinstance(end_time, datetime)
## RESPONSE OBJECT
assert isinstance(response_obj, litellm.ModelResponse)
## KWARGS
assert isinstance(kwargs["model"], str)
assert isinstance(kwargs["messages"], list) and isinstance(
kwargs["messages"][0], dict
)
assert isinstance(kwargs["optional_params"], dict)
assert isinstance(kwargs["litellm_params"], dict)
assert isinstance(kwargs["start_time"], (datetime, type(None)))
assert isinstance(kwargs["stream"], bool)
assert isinstance(kwargs["user"], (str, type(None)))
assert (
isinstance(kwargs["input"], list)
and isinstance(kwargs["input"][0], dict)
) or isinstance(kwargs["input"], (dict, str))
assert isinstance(kwargs["api_key"], (str, type(None)))
assert (
isinstance(
kwargs["original_response"], (str, litellm.CustomStreamWrapper)
)
or inspect.isasyncgen(kwargs["original_response"])
or inspect.iscoroutine(kwargs["original_response"])
)
assert isinstance(kwargs["additional_args"], (dict, type(None)))
assert isinstance(kwargs["log_event_type"], str)
except Exception:
print(f"Assertion Error: {traceback.format_exc()}")
self.errors.append(traceback.format_exc())
def log_success_event(self, kwargs, response_obj, start_time, end_time):
try:
print(f"\n\nkwargs={kwargs}\n\n")
print(
json.dumps(kwargs, default=str)
) # this is a test to confirm no circular references are in the logging object
self.states.append("sync_success")
## START TIME
assert isinstance(start_time, datetime)
## END TIME
assert isinstance(end_time, datetime)
## RESPONSE OBJECT
assert isinstance(
response_obj,
(
litellm.ModelResponse,
litellm.EmbeddingResponse,
litellm.ImageResponse,
),
)
## KWARGS
assert isinstance(kwargs["model"], str)
assert isinstance(kwargs["messages"], list) and isinstance(
kwargs["messages"][0], dict
)
assert isinstance(kwargs["optional_params"], dict)
assert isinstance(kwargs["litellm_params"], dict)
assert isinstance(kwargs["litellm_params"]["api_base"], str)
assert kwargs["cache_hit"] is None or isinstance(kwargs["cache_hit"], bool)
assert isinstance(kwargs["start_time"], (datetime, type(None)))
assert isinstance(kwargs["stream"], bool)
assert isinstance(kwargs["user"], (str, type(None)))
assert (
isinstance(kwargs["input"], list)
and (
isinstance(kwargs["input"][0], dict)
or isinstance(kwargs["input"][0], str)
)
) or isinstance(kwargs["input"], (dict, str))
assert isinstance(kwargs["api_key"], (str, type(None)))
assert isinstance(
kwargs["original_response"],
(str, litellm.CustomStreamWrapper, BaseModel),
), "Original Response={}. Allowed types=[str, litellm.CustomStreamWrapper, BaseModel]".format(
kwargs["original_response"]
)
assert isinstance(kwargs["additional_args"], (dict, type(None)))
assert isinstance(kwargs["log_event_type"], str)
assert isinstance(kwargs["response_cost"], (float, type(None)))
except Exception:
print(f"Assertion Error: {traceback.format_exc()}")
self.errors.append(traceback.format_exc())
def log_failure_event(self, kwargs, response_obj, start_time, end_time):
try:
print(f"kwargs: {kwargs}")
self.states.append("sync_failure")
## START TIME
assert isinstance(start_time, datetime)
## END TIME
assert isinstance(end_time, datetime)
## RESPONSE OBJECT
assert response_obj == None
## KWARGS
assert isinstance(kwargs["model"], str)
assert isinstance(kwargs["messages"], list) and isinstance(
kwargs["messages"][0], dict
)
assert isinstance(kwargs["optional_params"], dict)
assert isinstance(kwargs["litellm_params"], dict)
assert isinstance(kwargs["litellm_params"]["metadata"], Optional[dict])
assert isinstance(kwargs["start_time"], (datetime, type(None)))
assert isinstance(kwargs["stream"], bool)
assert isinstance(kwargs["user"], (str, type(None)))
assert (
isinstance(kwargs["input"], list)
and isinstance(kwargs["input"][0], dict)
) or isinstance(kwargs["input"], (dict, str))
assert isinstance(kwargs["api_key"], (str, type(None)))
assert (
isinstance(
kwargs["original_response"], (str, litellm.CustomStreamWrapper)
)
or kwargs["original_response"] == None
)
assert isinstance(kwargs["additional_args"], (dict, type(None)))
assert isinstance(kwargs["log_event_type"], str)
except Exception:
print(f"Assertion Error: {traceback.format_exc()}")
self.errors.append(traceback.format_exc())
async def async_log_pre_api_call(self, model, messages, kwargs):
try:
self.states.append("async_pre_api_call")
## MODEL
assert isinstance(model, str)
## MESSAGES
assert isinstance(messages, list) and isinstance(messages[0], dict)
## KWARGS
assert isinstance(kwargs["model"], str)
assert isinstance(kwargs["messages"], list) and isinstance(
kwargs["messages"][0], dict
)
assert isinstance(kwargs["optional_params"], dict)
assert isinstance(kwargs["litellm_params"], dict)
assert isinstance(kwargs["start_time"], (datetime, type(None)))
assert isinstance(kwargs["stream"], bool)
assert isinstance(kwargs["user"], (str, type(None)))
except Exception as e:
print(f"Assertion Error: {traceback.format_exc()}")
self.errors.append(traceback.format_exc())
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
try:
print(
"in async_log_success_event", kwargs, response_obj, start_time, end_time
)
self.states.append("async_success")
## START TIME
assert isinstance(start_time, datetime)
## END TIME
assert isinstance(end_time, datetime)
## RESPONSE OBJECT
assert isinstance(
response_obj,
(
litellm.ModelResponse,
litellm.EmbeddingResponse,
litellm.TextCompletionResponse,
),
)
## KWARGS
assert isinstance(kwargs["model"], str)
assert isinstance(kwargs["messages"], list)
assert isinstance(kwargs["optional_params"], dict)
assert isinstance(kwargs["litellm_params"], dict)
assert isinstance(kwargs["litellm_params"]["api_base"], str)
assert isinstance(kwargs["start_time"], (datetime, type(None)))
assert isinstance(kwargs["stream"], bool)
assert isinstance(kwargs["completion_start_time"], datetime)
assert kwargs["cache_hit"] is None or isinstance(kwargs["cache_hit"], bool)
assert isinstance(kwargs["user"], (str, type(None)))
assert isinstance(kwargs["input"], (list, dict, str))
assert isinstance(kwargs["api_key"], (str, type(None)))
assert (
isinstance(
kwargs["original_response"], (str, litellm.CustomStreamWrapper)
)
or inspect.isasyncgen(kwargs["original_response"])
or inspect.iscoroutine(kwargs["original_response"])
)
assert isinstance(kwargs["additional_args"], (dict, type(None)))
assert isinstance(kwargs["log_event_type"], str)
assert kwargs["cache_hit"] is None or isinstance(kwargs["cache_hit"], bool)
assert isinstance(kwargs["response_cost"], (float, type(None)))
except Exception:
print(f"Assertion Error: {traceback.format_exc()}")
self.errors.append(traceback.format_exc())
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
try:
self.states.append("async_failure")
## START TIME
assert isinstance(start_time, datetime)
## END TIME
assert isinstance(end_time, datetime)
## RESPONSE OBJECT
assert response_obj == None
## KWARGS
assert isinstance(kwargs["model"], str)
assert isinstance(kwargs["messages"], list)
assert isinstance(kwargs["optional_params"], dict)
assert isinstance(kwargs["litellm_params"], dict)
assert isinstance(kwargs["start_time"], (datetime, type(None)))
assert isinstance(kwargs["stream"], bool)
assert isinstance(kwargs["user"], (str, type(None)))
assert isinstance(kwargs["input"], (list, str, dict))
assert isinstance(kwargs["api_key"], (str, type(None)))
assert (
isinstance(
kwargs["original_response"], (str, litellm.CustomStreamWrapper)
)
or inspect.isasyncgen(kwargs["original_response"])
or inspect.iscoroutine(kwargs["original_response"])
or kwargs["original_response"] == None
)
assert isinstance(kwargs["additional_args"], (dict, type(None)))
assert isinstance(kwargs["log_event_type"], str)
except Exception:
print(f"Assertion Error: {traceback.format_exc()}")
self.errors.append(traceback.format_exc())

View file

@ -147,7 +147,7 @@ async def test_valid_invalid_token(audience):
# VALID TOKEN
## GENERATE A TOKEN
# Assuming the current time is in UTC
expiration_time = int((datetime.utcnow() + timedelta(minutes=10)).timestamp())
expiration_time = int((datetime.now() + timedelta(minutes=10)).timestamp())
payload = {
"sub": "user123",
@ -175,7 +175,7 @@ async def test_valid_invalid_token(audience):
# INVALID TOKEN
## GENERATE A TOKEN
# Assuming the current time is in UTC
expiration_time = int((datetime.utcnow() + timedelta(minutes=10)).timestamp())
expiration_time = int((datetime.now() + timedelta(minutes=10)).timestamp())
payload = {
"sub": "user123",
@ -264,7 +264,7 @@ def team_token_tuple():
# VALID TOKEN
## GENERATE A TOKEN
# Assuming the current time is in UTC
expiration_time = int((datetime.utcnow() + timedelta(minutes=10)).timestamp())
expiration_time = int((datetime.now() + timedelta(minutes=10)).timestamp())
team_id = f"team123_{uuid.uuid4()}"
payload = {
@ -349,7 +349,7 @@ async def test_team_token_output(prisma_client, audience):
# VALID TOKEN
## GENERATE A TOKEN
# Assuming the current time is in UTC
expiration_time = int((datetime.utcnow() + timedelta(minutes=10)).timestamp())
expiration_time = int((datetime.now() + timedelta(minutes=10)).timestamp())
team_id = f"team123_{uuid.uuid4()}"
payload = {
@ -542,7 +542,7 @@ async def aaaatest_user_token_output(
# VALID TOKEN
## GENERATE A TOKEN
# Assuming the current time is in UTC
expiration_time = int((datetime.utcnow() + timedelta(minutes=10)).timestamp())
expiration_time = int((datetime.now() + timedelta(minutes=10)).timestamp())
team_id = f"team123_{uuid.uuid4()}"
user_id = f"user123_{uuid.uuid4()}"
@ -936,7 +936,7 @@ async def test_allow_access_by_email(public_jwt_key, user_email, should_work):
# VALID TOKEN
## GENERATE A TOKEN
# Assuming the current time is in UTC
expiration_time = int((datetime.utcnow() + timedelta(minutes=10)).timestamp())
expiration_time = int((datetime.now() + timedelta(minutes=10)).timestamp())
team_id = f"team123_{uuid.uuid4()}"
payload = {

View file

@ -0,0 +1,23 @@
# #### What this tests ####
# # This tests if the litellm model response type is returnable in a flask app
# import sys, os
# import traceback
# from flask import Flask, request, jsonify, abort, Response
# sys.path.insert(0, os.path.abspath('../../..')) # Adds the parent directory to the system path
# import litellm
# from litellm import completion
# litellm.set_verbose = False
# app = Flask(__name__)
# @app.route('/')
# def hello():
# data = request.json
# return completion(**data)
# if __name__ == '__main__':
# from waitress import serve
# serve(app, host='localhost', port=8080, threads=10)

View file

@ -0,0 +1,14 @@
# import requests, json
# BASE_URL = 'http://localhost:8080'
# def test_hello_route():
# data = {"model": "claude-3-5-haiku-20241022", "messages": [{"role": "user", "content": "hey, how's it going?"}]}
# headers = {'Content-Type': 'application/json'}
# response = requests.get(BASE_URL, headers=headers, data=json.dumps(data))
# print(response.text)
# assert response.status_code == 200
# print("Hello route test passed!")
# if __name__ == '__main__':
# test_hello_route()

View file

@ -33,6 +33,7 @@ def test_encrypt_decrypt_with_master_key():
def test_encrypt_decrypt_with_salt_key():
os.environ["LITELLM_SALT_KEY"] = "sk-salt-key2222"
print(f"LITELLM_SALT_KEY: {os.environ['LITELLM_SALT_KEY']}")
assert decrypt_value_helper(encrypt_value_helper("test")) == "test"
assert decrypt_value_helper(encrypt_value_helper(10)) == 10
assert decrypt_value_helper(encrypt_value_helper(True)) is True

View file

@ -45,6 +45,7 @@ def test_active_callbacks(client):
print("response.status_code", response.status_code)
json_response = response.json()
print(f"json_response={json_response}")
_active_callbacks = json_response["litellm.callbacks"]
expected_callback_names = [

View file

@ -0,0 +1,389 @@
# What is this?
## Unit tests for user_api_key_auth helper functions
import os
import sys
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
from typing import Dict, List, Optional
from unittest.mock import MagicMock, patch, AsyncMock
import pytest
from starlette.datastructures import URL
import litellm
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
class Request:
def __init__(self, client_ip: Optional[str] = None, headers: Optional[dict] = None):
self.client = MagicMock()
self.client.host = client_ip
self.headers: Dict[str, str] = {}
@pytest.mark.parametrize(
"allowed_ips, client_ip, expected_result",
[
(None, "127.0.0.1", True), # No IP restrictions, should be allowed
(["127.0.0.1"], "127.0.0.1", True), # IP in allowed list
(["192.168.1.1"], "127.0.0.1", False), # IP not in allowed list
([], "127.0.0.1", False), # Empty allowed list, no IP should be allowed
(["192.168.1.1", "10.0.0.1"], "10.0.0.1", True), # IP in allowed list
(
["192.168.1.1"],
None,
False,
), # Request with no client IP should not be allowed
],
)
def test_check_valid_ip(
allowed_ips: Optional[List[str]], client_ip: Optional[str], expected_result: bool
):
from litellm.proxy.auth.auth_utils import _check_valid_ip
request = Request(client_ip)
assert _check_valid_ip(allowed_ips, request)[0] == expected_result # type: ignore
# test x-forwarder for is used when user has opted in
@pytest.mark.parametrize(
"allowed_ips, client_ip, expected_result",
[
(None, "127.0.0.1", True), # No IP restrictions, should be allowed
(["127.0.0.1"], "127.0.0.1", True), # IP in allowed list
(["192.168.1.1"], "127.0.0.1", False), # IP not in allowed list
([], "127.0.0.1", False), # Empty allowed list, no IP should be allowed
(["192.168.1.1", "10.0.0.1"], "10.0.0.1", True), # IP in allowed list
(
["192.168.1.1"],
None,
False,
), # Request with no client IP should not be allowed
],
)
def test_check_valid_ip_sent_with_x_forwarded_for(
allowed_ips: Optional[List[str]], client_ip: Optional[str], expected_result: bool
):
from litellm.proxy.auth.auth_utils import _check_valid_ip
request = Request(client_ip, headers={"X-Forwarded-For": client_ip})
assert _check_valid_ip(allowed_ips, request, use_x_forwarded_for=True)[0] == expected_result # type: ignore
@pytest.mark.asyncio
async def test_check_blocked_team():
"""
cached valid_token obj has team_blocked = true
cached team obj has team_blocked = false
assert team is not blocked
"""
import asyncio
import time
from fastapi import Request
from starlette.datastructures import URL
from litellm.proxy._types import (
LiteLLM_TeamTable,
LiteLLM_TeamTableCachedObj,
UserAPIKeyAuth,
)
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
from litellm.proxy.proxy_server import hash_token, user_api_key_cache
_team_id = "1234"
user_key = "sk-12345678"
valid_token = UserAPIKeyAuth(
team_id=_team_id,
team_blocked=True,
token=hash_token(user_key),
last_refreshed_at=time.time(),
)
await asyncio.sleep(1)
team_obj = LiteLLM_TeamTableCachedObj(
team_id=_team_id, blocked=False, last_refreshed_at=time.time()
)
hashed_token = hash_token(user_key)
print(f"STORING TOKEN UNDER KEY={hashed_token}")
user_api_key_cache.set_cache(key=hashed_token, value=valid_token)
user_api_key_cache.set_cache(key="team_id:{}".format(_team_id), value=team_obj)
setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
setattr(litellm.proxy.proxy_server, "prisma_client", "hello-world")
request = Request(scope={"type": "http"})
request._url = URL(url="/chat/completions")
await user_api_key_auth(request=request, api_key="Bearer " + user_key)
@pytest.mark.parametrize(
"user_role, expected_role",
[
("app_user", "internal_user"),
("internal_user", "internal_user"),
("proxy_admin_viewer", "proxy_admin_viewer"),
],
)
def test_returned_user_api_key_auth(user_role, expected_role):
from litellm.proxy._types import LiteLLM_UserTable, LitellmUserRoles
from litellm.proxy.auth.user_api_key_auth import _return_user_api_key_auth_obj
from datetime import datetime
new_obj = _return_user_api_key_auth_obj(
user_obj=LiteLLM_UserTable(
user_role=user_role, user_id="", max_budget=None, user_email=""
),
api_key="hello-world",
parent_otel_span=None,
valid_token_dict={},
route="/chat/completion",
start_time=datetime.now(),
)
assert new_obj.user_role == expected_role
@pytest.mark.parametrize("key_ownership", ["user_key", "team_key"])
@pytest.mark.asyncio
async def test_aaauser_personal_budgets(key_ownership):
"""
Set a personal budget on a user
- have it only apply when key belongs to user -> raises BudgetExceededError
- if key belongs to team, have key respect team budget -> allows call to go through
"""
import asyncio
import time
from fastapi import Request
from starlette.datastructures import URL
import litellm
from litellm.proxy._types import LiteLLM_UserTable, UserAPIKeyAuth
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
from litellm.proxy.proxy_server import hash_token, user_api_key_cache
_user_id = "1234"
user_key = "sk-12345678"
if key_ownership == "user_key":
valid_token = UserAPIKeyAuth(
token=hash_token(user_key),
last_refreshed_at=time.time(),
user_id=_user_id,
spend=20,
)
elif key_ownership == "team_key":
valid_token = UserAPIKeyAuth(
token=hash_token(user_key),
last_refreshed_at=time.time(),
user_id=_user_id,
team_id="my-special-team",
team_max_budget=100,
spend=20,
)
user_obj = LiteLLM_UserTable(
user_id=_user_id, spend=11, max_budget=10, user_email=""
)
user_api_key_cache.set_cache(key=hash_token(user_key), value=valid_token)
user_api_key_cache.set_cache(key="{}".format(_user_id), value=user_obj)
setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
setattr(litellm.proxy.proxy_server, "prisma_client", "hello-world")
request = Request(scope={"type": "http"})
request._url = URL(url="/chat/completions")
test_user_cache = getattr(litellm.proxy.proxy_server, "user_api_key_cache")
assert test_user_cache.get_cache(key=hash_token(user_key)) == valid_token
try:
await user_api_key_auth(request=request, api_key="Bearer " + user_key)
if key_ownership == "user_key":
pytest.fail("Expected this call to fail. User is over limit.")
except Exception:
if key_ownership == "team_key":
pytest.fail("Expected this call to work. Key is below team budget.")
@pytest.mark.asyncio
@pytest.mark.parametrize("prohibited_param", ["api_base", "base_url"])
async def test_user_api_key_auth_fails_with_prohibited_params(prohibited_param):
"""
Relevant issue: https://huntr.com/bounties/4001e1a2-7b7a-4776-a3ae-e6692ec3d997
"""
import json
from fastapi import Request
# Setup
user_key = "sk-1234"
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
# Create request with prohibited parameter in body
request = Request(scope={"type": "http"})
request._url = URL(url="/chat/completions")
async def return_body():
body = {prohibited_param: "https://custom-api.com"}
return bytes(json.dumps(body), "utf-8")
request.body = return_body
try:
response = await user_api_key_auth(
request=request, api_key="Bearer " + user_key
)
except Exception as e:
print("error str=", str(e))
error_message = str(e.message)
print("error message=", error_message)
assert "is not allowed in request body" in error_message
@pytest.mark.asyncio()
@pytest.mark.parametrize(
"route, should_raise_error",
[
("/embeddings", False),
("/chat/completions", True),
("/completions", True),
("/models", True),
("/v1/embeddings", True),
],
)
async def test_auth_with_allowed_routes(route, should_raise_error):
# Setup
user_key = "sk-1234"
general_settings = {"allowed_routes": ["/embeddings"]}
from fastapi import Request
from litellm.proxy import proxy_server
initial_general_settings = getattr(proxy_server, "general_settings")
setattr(proxy_server, "master_key", "sk-1234")
setattr(proxy_server, "general_settings", general_settings)
request = Request(scope={"type": "http"})
request._url = URL(url=route)
if should_raise_error:
try:
await user_api_key_auth(request=request, api_key="Bearer " + user_key)
pytest.fail("Expected this call to fail. User is over limit.")
except Exception as e:
print("error str=", str(e.message))
error_str = str(e.message)
assert "Route" in error_str and "not allowed" in error_str
pass
else:
await user_api_key_auth(request=request, api_key="Bearer " + user_key)
setattr(proxy_server, "general_settings", initial_general_settings)
@pytest.mark.parametrize(
"route, user_role, expected_result",
[
# Proxy Admin checks
("/global/spend/logs", "proxy_admin", True),
("/key/delete", "proxy_admin", True),
("/key/generate", "proxy_admin", True),
("/key/regenerate", "proxy_admin", True),
# Internal User checks - allowed routes
("/global/spend/logs", "internal_user", True),
("/key/delete", "internal_user", True),
("/key/generate", "internal_user", True),
("/key/82akk800000000jjsk/regenerate", "internal_user", True),
# Internal User Viewer
("/key/generate", "internal_user_viewer", False),
# Internal User checks - disallowed routes
("/organization/member_add", "internal_user", False),
],
)
def test_is_ui_route_allowed(route, user_role, expected_result):
from litellm.proxy.auth.user_api_key_auth import _is_ui_route_allowed
from litellm.proxy._types import LiteLLM_UserTable
user_obj = LiteLLM_UserTable(
user_id="3b803c0e-666e-4e99-bd5c-6e534c07e297",
max_budget=None,
spend=0.0,
model_max_budget={},
model_spend={},
user_email="my-test-email@1234.com",
models=[],
tpm_limit=None,
rpm_limit=None,
user_role=user_role,
organization_memberships=[],
)
received_args: dict = {
"route": route,
"user_obj": user_obj,
}
try:
assert _is_ui_route_allowed(**received_args) == expected_result
except Exception as e:
# If expected result is False, we expect an error
if expected_result is False:
pass
else:
raise e
@pytest.mark.parametrize(
"route, user_role, expected_result",
[
("/key/generate", "internal_user_viewer", False),
],
)
def test_is_api_route_allowed(route, user_role, expected_result):
from litellm.proxy.auth.user_api_key_auth import _is_api_route_allowed
from litellm.proxy._types import LiteLLM_UserTable
user_obj = LiteLLM_UserTable(
user_id="3b803c0e-666e-4e99-bd5c-6e534c07e297",
max_budget=None,
spend=0.0,
model_max_budget={},
model_spend={},
user_email="my-test-email@1234.com",
models=[],
tpm_limit=None,
rpm_limit=None,
user_role=user_role,
organization_memberships=[],
)
received_args: dict = {
"route": route,
"user_obj": user_obj,
}
try:
assert _is_api_route_allowed(**received_args) == expected_result
except Exception as e:
# If expected result is False, we expect an error
if expected_result is False:
pass
else:
raise e

View file

@ -0,0 +1,13 @@
{
"type": "service_account",
"project_id": "adroit-crow-413218",
"private_key_id": "",
"private_key": "",
"client_email": "test-adroit-crow@adroit-crow-413218.iam.gserviceaccount.com",
"client_id": "104886546564708740969",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/test-adroit-crow%40adroit-crow-413218.iam.gserviceaccount.com",
"universe_domain": "googleapis.com"
}

View file

@ -87,6 +87,7 @@ proxy_handler_instance = MyCustomHandler()
# Set litellm.callbacks = [proxy_handler_instance] on the proxy
# need to set litellm.callbacks = [proxy_handler_instance] # on the proxy
@pytest.mark.asyncio
@pytest.mark.flaky(retries=6, delay=10)
async def test_transcription_on_router():
litellm.set_verbose = True
litellm.callbacks = [proxy_handler_instance]