mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
All checks were successful
Read Version from pyproject.toml / read-version (push) Successful in 47s
* fix(azure/): support passing headers to azure openai endpoints
Fixes https://github.com/BerriAI/litellm/issues/6217
* fix(utils.py): move default tokenizer to just openai
hf tokenizer makes network calls when trying to get the tokenizer - this slows down execution time calls
* fix(router.py): fix pattern matching router - add generic "*" to it as well
Fixes issue where generic "*" model access group wouldn't show up
* fix(pattern_match_deployments.py): match to more specific pattern
match to more specific pattern
allows setting generic wildcard model access group and excluding specific models more easily
* fix(proxy_server.py): fix _delete_deployment to handle base case where db_model list is empty
don't delete all router models b/c of empty list
Fixes https://github.com/BerriAI/litellm/issues/7196
* fix(anthropic/): fix handling response_format for anthropic messages with anthropic api
* fix(fireworks_ai/): support passing response_format + tool call in same message
Addresses https://github.com/BerriAI/litellm/issues/7135
* Revert "fix(fireworks_ai/): support passing response_format + tool call in same message"
This reverts commit 6a30dc6929
.
* test: fix test
* fix(replicate/): fix replicate default retry/polling logic
* test: add unit testing for router pattern matching
* test: update test to use default oai tokenizer
* test: mark flaky test
* test: skip flaky test
138 lines
3.5 KiB
Python
138 lines
3.5 KiB
Python
# Test the following scenarios:
|
|
# 1. Generate a Key, and use it to make a call
|
|
|
|
|
|
import sys, os
|
|
import traceback
|
|
from dotenv import load_dotenv
|
|
from fastapi import Request
|
|
from datetime import datetime
|
|
|
|
load_dotenv()
|
|
import os, io, time
|
|
|
|
# this file is to test litellm/proxy
|
|
|
|
sys.path.insert(
|
|
0, os.path.abspath("../..")
|
|
) # Adds the parent directory to the system path
|
|
import pytest, logging, asyncio
|
|
import litellm, asyncio
|
|
from litellm.proxy.proxy_server import token_counter
|
|
from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend
|
|
from litellm._logging import verbose_proxy_logger
|
|
|
|
verbose_proxy_logger.setLevel(level=logging.DEBUG)
|
|
|
|
from litellm.proxy._types import TokenCountRequest, TokenCountResponse
|
|
|
|
|
|
from litellm import Router
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_vLLM_token_counting():
|
|
"""
|
|
Test Token counter for vLLM models
|
|
- User passes model="special-alias"
|
|
- token_counter should infer that special_alias -> maps to wolfram/miquliz-120b-v2.0
|
|
-> token counter should use hugging face tokenizer
|
|
"""
|
|
|
|
llm_router = Router(
|
|
model_list=[
|
|
{
|
|
"model_name": "special-alias",
|
|
"litellm_params": {
|
|
"model": "openai/wolfram/miquliz-120b-v2.0",
|
|
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
|
|
},
|
|
}
|
|
]
|
|
)
|
|
|
|
setattr(litellm.proxy.proxy_server, "llm_router", llm_router)
|
|
|
|
response = await token_counter(
|
|
request=TokenCountRequest(
|
|
model="special-alias",
|
|
messages=[{"role": "user", "content": "hello"}],
|
|
)
|
|
)
|
|
|
|
print("response: ", response)
|
|
|
|
assert (
|
|
response.tokenizer_type == "openai_tokenizer"
|
|
) # SHOULD use the default tokenizer
|
|
assert response.model_used == "wolfram/miquliz-120b-v2.0"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_token_counting_model_not_in_model_list():
|
|
"""
|
|
Test Token counter - when a model is not in model_list
|
|
-> should use the default OpenAI tokenizer
|
|
"""
|
|
|
|
llm_router = Router(
|
|
model_list=[
|
|
{
|
|
"model_name": "gpt-4",
|
|
"litellm_params": {
|
|
"model": "gpt-4",
|
|
},
|
|
}
|
|
]
|
|
)
|
|
|
|
setattr(litellm.proxy.proxy_server, "llm_router", llm_router)
|
|
|
|
response = await token_counter(
|
|
request=TokenCountRequest(
|
|
model="special-alias",
|
|
messages=[{"role": "user", "content": "hello"}],
|
|
)
|
|
)
|
|
|
|
print("response: ", response)
|
|
|
|
assert (
|
|
response.tokenizer_type == "openai_tokenizer"
|
|
) # SHOULD use the OpenAI tokenizer
|
|
assert response.model_used == "special-alias"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_gpt_token_counting():
|
|
"""
|
|
Test Token counter
|
|
-> should work for gpt-4
|
|
"""
|
|
|
|
llm_router = Router(
|
|
model_list=[
|
|
{
|
|
"model_name": "gpt-4",
|
|
"litellm_params": {
|
|
"model": "gpt-4",
|
|
},
|
|
}
|
|
]
|
|
)
|
|
|
|
setattr(litellm.proxy.proxy_server, "llm_router", llm_router)
|
|
|
|
response = await token_counter(
|
|
request=TokenCountRequest(
|
|
model="gpt-4",
|
|
messages=[{"role": "user", "content": "hello"}],
|
|
)
|
|
)
|
|
|
|
print("response: ", response)
|
|
|
|
assert (
|
|
response.tokenizer_type == "openai_tokenizer"
|
|
) # SHOULD use the OpenAI tokenizer
|
|
assert response.request_model == "gpt-4"
|