mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-24 18:24:20 +00:00
test: update tests to new deployment model (#10142)
* test: update tests to new deployment model * test: update model name * test: skip cohere rbac issue test * test: update test - replace gpt-4o model
This commit is contained in:
parent
415abfc222
commit
1ea046cc61
72 changed files with 294 additions and 292 deletions
|
@ -14,7 +14,7 @@ from litellm.llms.anthropic.chat import ModelResponseIterator
|
|||
import httpx
|
||||
import json
|
||||
from litellm.llms.custom_httpx.http_handler import HTTPHandler
|
||||
from base_rerank_unit_tests import BaseLLMRerankTest
|
||||
# from base_rerank_unit_tests import BaseLLMRerankTest
|
||||
|
||||
load_dotenv()
|
||||
import io
|
||||
|
@ -255,16 +255,17 @@ def test_azure_deepseek_reasoning_content():
|
|||
assert response.choices[0].message.content == "\n\nThe sky is a canvas of blue"
|
||||
|
||||
|
||||
class TestAzureAIRerank(BaseLLMRerankTest):
|
||||
def get_custom_llm_provider(self) -> litellm.LlmProviders:
|
||||
return litellm.LlmProviders.AZURE_AI
|
||||
# skipping due to cohere rbac issues
|
||||
# class TestAzureAIRerank(BaseLLMRerankTest):
|
||||
# def get_custom_llm_provider(self) -> litellm.LlmProviders:
|
||||
# return litellm.LlmProviders.AZURE_AI
|
||||
|
||||
def get_base_rerank_call_args(self) -> dict:
|
||||
return {
|
||||
"model": "azure_ai/cohere-rerank-v3-english",
|
||||
"api_base": os.getenv("AZURE_AI_COHERE_API_BASE"),
|
||||
"api_key": os.getenv("AZURE_AI_COHERE_API_KEY"),
|
||||
}
|
||||
# def get_base_rerank_call_args(self) -> dict:
|
||||
# return {
|
||||
# "model": "azure_ai/cohere-rerank-v3-english",
|
||||
# "api_base": os.getenv("AZURE_AI_COHERE_API_BASE"),
|
||||
# "api_key": os.getenv("AZURE_AI_COHERE_API_KEY"),
|
||||
# }
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
@ -279,7 +280,7 @@ async def test_azure_ai_request_format():
|
|||
|
||||
# Set up the test parameters
|
||||
api_key = os.getenv("AZURE_API_KEY")
|
||||
api_base = f"{os.getenv('AZURE_API_BASE')}/openai/deployments/gpt-4o/chat/completions?api-version=2024-08-01-preview"
|
||||
api_base = f"{os.getenv('AZURE_API_BASE')}/openai/deployments/gpt-4o-new-test/chat/completions?api-version=2024-08-01-preview"
|
||||
model = "azure_ai/gpt-4o"
|
||||
messages = [
|
||||
{"role": "user", "content": "hi"},
|
||||
|
|
|
@ -137,7 +137,7 @@ def test_azure_extra_headers(input, call_type, header_value):
|
|||
func = image_generation
|
||||
|
||||
data = {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_base": "https://openai-gpt-4-test-v-1.openai.azure.com",
|
||||
"api_version": "2023-07-01-preview",
|
||||
"api_key": "my-azure-api-key",
|
||||
|
@ -339,7 +339,7 @@ def test_azure_gpt_4o_with_tool_call_and_response_format(api_version):
|
|||
|
||||
with patch.object(client.chat.completions.with_raw_response, "create") as mock_post:
|
||||
response = litellm.completion(
|
||||
model="azure/gpt-4o",
|
||||
model="azure/gpt-4o-new-test",
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
|
@ -474,7 +474,7 @@ def test_azure_max_retries_0(
|
|||
|
||||
try:
|
||||
completion(
|
||||
model="azure/gpt-4o",
|
||||
model="azure/gpt-4o-new-test",
|
||||
messages=[{"role": "user", "content": "Hello world"}],
|
||||
max_retries=max_retries,
|
||||
stream=stream,
|
||||
|
@ -502,7 +502,7 @@ async def test_async_azure_max_retries_0(
|
|||
|
||||
try:
|
||||
await acompletion(
|
||||
model="azure/gpt-4o",
|
||||
model="azure/gpt-4o-new-test",
|
||||
messages=[{"role": "user", "content": "Hello world"}],
|
||||
max_retries=max_retries,
|
||||
stream=stream,
|
||||
|
|
|
@ -217,7 +217,7 @@ def test_openai_optional_params_embeddings():
|
|||
def test_azure_optional_params_embeddings():
|
||||
litellm.drop_params = True
|
||||
optional_params = get_optional_params_embeddings(
|
||||
model="chatgpt-v-2",
|
||||
model="chatgpt-v-3",
|
||||
user="John",
|
||||
encoding_format=None,
|
||||
custom_llm_provider="azure",
|
||||
|
@ -396,7 +396,7 @@ def test_azure_tool_choice(api_version):
|
|||
"""
|
||||
litellm.drop_params = True
|
||||
optional_params = litellm.utils.get_optional_params(
|
||||
model="chatgpt-v-2",
|
||||
model="chatgpt-v-3",
|
||||
user="John",
|
||||
custom_llm_provider="azure",
|
||||
max_tokens=10,
|
||||
|
|
|
@ -150,6 +150,7 @@ async def test_basic_rerank_together_ai(sync_mode):
|
|||
|
||||
@pytest.mark.asyncio()
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
@pytest.mark.skip(reason="Skipping test due to Cohere RBAC issues")
|
||||
async def test_basic_rerank_azure_ai(sync_mode):
|
||||
import os
|
||||
|
||||
|
|
|
@ -91,7 +91,7 @@ async def make_async_calls(metadata=None, **completion_kwargs):
|
|||
def create_async_task(**completion_kwargs):
|
||||
litellm.set_verbose = True
|
||||
completion_args = {
|
||||
"model": "openai/chatgpt-v-2",
|
||||
"model": "openai/chatgpt-v-3",
|
||||
"api_version": "2024-02-01",
|
||||
"messages": [{"role": "user", "content": "This is a test"}],
|
||||
"max_tokens": 5,
|
||||
|
|
|
@ -86,7 +86,7 @@ def create_async_task(**completion_kwargs):
|
|||
By default a standard set of arguments are used for the litellm.acompletion function.
|
||||
"""
|
||||
completion_args = {
|
||||
"model": "openai/chatgpt-v-2",
|
||||
"model": "openai/chatgpt-v-3",
|
||||
"api_version": "2024-02-01",
|
||||
"messages": [{"role": "user", "content": "This is a test" * 100}],
|
||||
"max_tokens": 5,
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
model_list:
|
||||
- model_name: gpt-4-team1
|
||||
litellm_params:
|
||||
model: azure/chatgpt-v-2
|
||||
model: azure/chatgpt-v-3
|
||||
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
|
||||
api_version: "2023-05-15"
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
|
|
|
@ -26,7 +26,7 @@ model_list = [
|
|||
{ # list of model deployments
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": "bad-key",
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -143,7 +143,7 @@ async def test_cooldown_same_model_name(sync_mode):
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": "bad-key",
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -153,7 +153,7 @@ async def test_cooldown_same_model_name(sync_mode):
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -184,7 +184,7 @@ async def test_cooldown_same_model_name(sync_mode):
|
|||
model_ids.append(model["model_info"]["id"])
|
||||
print("\n litellm model ids ", model_ids)
|
||||
|
||||
# example litellm_model_names ['azure/chatgpt-v-2-ModelID-64321', 'azure/chatgpt-v-2-ModelID-63960']
|
||||
# example litellm_model_names ['azure/chatgpt-v-3-ModelID-64321', 'azure/chatgpt-v-3-ModelID-63960']
|
||||
assert (
|
||||
model_ids[0] != model_ids[1]
|
||||
) # ensure both models have a uuid added, and they have different names
|
||||
|
@ -201,7 +201,7 @@ async def test_cooldown_same_model_name(sync_mode):
|
|||
model_ids.append(model["model_info"]["id"])
|
||||
print("\n litellm model ids ", model_ids)
|
||||
|
||||
# example litellm_model_names ['azure/chatgpt-v-2-ModelID-64321', 'azure/chatgpt-v-2-ModelID-63960']
|
||||
# example litellm_model_names ['azure/chatgpt-v-3-ModelID-64321', 'azure/chatgpt-v-3-ModelID-63960']
|
||||
assert (
|
||||
model_ids[0] != model_ids[1]
|
||||
) # ensure both models have a uuid added, and they have different names
|
||||
|
|
|
@ -194,7 +194,7 @@ def create_async_task(**completion_kwargs):
|
|||
By default a standard set of arguments are used for the litellm.acompletion function.
|
||||
"""
|
||||
completion_args = {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_version": "2024-02-01",
|
||||
"messages": [{"role": "user", "content": "This is a test"}],
|
||||
"max_tokens": 5,
|
||||
|
|
|
@ -71,7 +71,7 @@ async def test_create_delete_assistants(provider, sync_mode):
|
|||
model = "gpt-4-turbo"
|
||||
if provider == "azure":
|
||||
os.environ["AZURE_API_VERSION"] = "2024-05-01-preview"
|
||||
model = "chatgpt-v-2"
|
||||
model = "chatgpt-v-3"
|
||||
|
||||
if sync_mode == True:
|
||||
assistant = litellm.create_assistants(
|
||||
|
|
|
@ -46,7 +46,7 @@ async def test_aaaaazure_tenant_id_auth(respx_mock: MockRouter):
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"tenant_id": os.getenv("AZURE_TENANT_ID"),
|
||||
"client_id": os.getenv("AZURE_CLIENT_ID"),
|
||||
|
@ -95,6 +95,6 @@ async def test_aaaaazure_tenant_id_auth(respx_mock: MockRouter):
|
|||
|
||||
assert json_body == {
|
||||
"messages": [{"role": "user", "content": "Hello world!"}],
|
||||
"model": "chatgpt-v-2",
|
||||
"model": "chatgpt-v-3",
|
||||
"stream": False,
|
||||
}
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
# {
|
||||
# "model_name": "azure-test",
|
||||
# "litellm_params": {
|
||||
# "model": "azure/chatgpt-v-2",
|
||||
# "model": "azure/chatgpt-v-3",
|
||||
# "api_key": os.getenv("AZURE_API_KEY"),
|
||||
# "api_base": os.getenv("AZURE_API_BASE"),
|
||||
# "api_version": os.getenv("AZURE_API_VERSION"),
|
||||
|
@ -33,7 +33,7 @@
|
|||
# try:
|
||||
# start_time = time.time()
|
||||
# response = await client.chat.completions.create(
|
||||
# model="chatgpt-v-2",
|
||||
# model="chatgpt-v-3",
|
||||
# messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}],
|
||||
# stream=True,
|
||||
# )
|
||||
|
|
|
@ -324,7 +324,7 @@ def test_caching_with_models_v2():
|
|||
litellm.set_verbose = True
|
||||
response1 = completion(model="gpt-3.5-turbo", messages=messages, caching=True)
|
||||
response2 = completion(model="gpt-3.5-turbo", messages=messages, caching=True)
|
||||
response3 = completion(model="azure/chatgpt-v-2", messages=messages, caching=True)
|
||||
response3 = completion(model="azure/chatgpt-v-3", messages=messages, caching=True)
|
||||
print(f"response1: {response1}")
|
||||
print(f"response2: {response2}")
|
||||
print(f"response3: {response3}")
|
||||
|
@ -1170,7 +1170,7 @@ async def test_s3_cache_stream_azure(sync_mode):
|
|||
|
||||
if sync_mode:
|
||||
response1 = litellm.completion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=messages,
|
||||
max_tokens=40,
|
||||
temperature=1,
|
||||
|
@ -1183,7 +1183,7 @@ async def test_s3_cache_stream_azure(sync_mode):
|
|||
print(response_1_content)
|
||||
else:
|
||||
response1 = await litellm.acompletion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=messages,
|
||||
max_tokens=40,
|
||||
temperature=1,
|
||||
|
@ -1203,7 +1203,7 @@ async def test_s3_cache_stream_azure(sync_mode):
|
|||
|
||||
if sync_mode:
|
||||
response2 = litellm.completion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=messages,
|
||||
max_tokens=40,
|
||||
temperature=1,
|
||||
|
@ -1216,7 +1216,7 @@ async def test_s3_cache_stream_azure(sync_mode):
|
|||
print(response_2_content)
|
||||
else:
|
||||
response2 = await litellm.acompletion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=messages,
|
||||
max_tokens=40,
|
||||
temperature=1,
|
||||
|
@ -1279,7 +1279,7 @@ async def test_s3_cache_acompletion_azure():
|
|||
print("s3 Cache: test for caching, streaming + completion")
|
||||
|
||||
response1 = await litellm.acompletion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=messages,
|
||||
max_tokens=40,
|
||||
temperature=1,
|
||||
|
@ -1289,7 +1289,7 @@ async def test_s3_cache_acompletion_azure():
|
|||
time.sleep(2)
|
||||
|
||||
response2 = await litellm.acompletion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=messages,
|
||||
max_tokens=40,
|
||||
temperature=1,
|
||||
|
|
|
@ -58,7 +58,7 @@ def test_caching_router():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
|
|
@ -55,7 +55,7 @@
|
|||
# # {
|
||||
# # "model_name": "gpt-3.5-turbo", # openai model name
|
||||
# # "litellm_params": { # params for litellm completion/embedding call
|
||||
# # "model": "azure/chatgpt-v-2",
|
||||
# # "model": "azure/chatgpt-v-3",
|
||||
# # "api_key": os.getenv("AZURE_API_KEY"),
|
||||
# # "api_version": os.getenv("AZURE_API_VERSION"),
|
||||
# # "api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -93,7 +93,7 @@
|
|||
# # {
|
||||
# # "model_name": "gpt-3.5-turbo", # openai model name
|
||||
# # "litellm_params": { # params for litellm completion/embedding call
|
||||
# # "model": "azure/chatgpt-v-2",
|
||||
# # "model": "azure/chatgpt-v-3",
|
||||
# # "api_key": os.getenv("AZURE_API_KEY"),
|
||||
# # "api_version": os.getenv("AZURE_API_VERSION"),
|
||||
# # "api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
|
|
@ -732,7 +732,7 @@ def encode_image(image_path):
|
|||
"model",
|
||||
[
|
||||
"gpt-4o",
|
||||
"azure/gpt-4o",
|
||||
"azure/gpt-4o-new-test",
|
||||
"anthropic/claude-3-opus-20240229",
|
||||
],
|
||||
) #
|
||||
|
@ -1824,9 +1824,9 @@ def test_completion_openai():
|
|||
"model, api_version",
|
||||
[
|
||||
# ("gpt-4o-2024-08-06", None),
|
||||
# ("azure/chatgpt-v-2", None),
|
||||
# ("azure/chatgpt-v-3", None),
|
||||
("bedrock/anthropic.claude-3-sonnet-20240229-v1:0", None),
|
||||
# ("azure/gpt-4o", "2024-08-01-preview"),
|
||||
# ("azure/gpt-4o-new-test", "2024-08-01-preview"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.flaky(retries=3, delay=1)
|
||||
|
@ -2495,7 +2495,7 @@ def test_completion_azure_extra_headers():
|
|||
litellm.client_session = http_client
|
||||
try:
|
||||
response = completion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=messages,
|
||||
api_base=os.getenv("AZURE_API_BASE"),
|
||||
api_version="2023-07-01-preview",
|
||||
|
@ -2544,7 +2544,7 @@ def test_completion_azure_ad_token():
|
|||
litellm.client_session = http_client
|
||||
try:
|
||||
response = completion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=messages,
|
||||
azure_ad_token="my-special-token",
|
||||
)
|
||||
|
@ -2575,7 +2575,7 @@ def test_completion_azure_key_completion_arg():
|
|||
litellm.set_verbose = True
|
||||
## Test azure call
|
||||
response = completion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=messages,
|
||||
api_key=old_key,
|
||||
logprobs=True,
|
||||
|
@ -2633,7 +2633,7 @@ async def test_re_use_azure_async_client():
|
|||
## Test azure call
|
||||
for _ in range(3):
|
||||
response = await litellm.acompletion(
|
||||
model="azure/chatgpt-v-2", messages=messages, client=client
|
||||
model="azure/chatgpt-v-3", messages=messages, client=client
|
||||
)
|
||||
print(f"response: {response}")
|
||||
except Exception as e:
|
||||
|
@ -2665,7 +2665,7 @@ def test_completion_azure():
|
|||
litellm.set_verbose = False
|
||||
## Test azure call
|
||||
response = completion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=messages,
|
||||
api_key="os.environ/AZURE_API_KEY",
|
||||
)
|
||||
|
@ -2673,7 +2673,7 @@ def test_completion_azure():
|
|||
print(f"response hidden params: {response._hidden_params}")
|
||||
## Test azure flag for backwards-compat
|
||||
# response = completion(
|
||||
# model="chatgpt-v-2",
|
||||
# model="chatgpt-v-3",
|
||||
# messages=messages,
|
||||
# azure=True,
|
||||
# max_tokens=10
|
||||
|
@ -2712,7 +2712,7 @@ def test_azure_openai_ad_token():
|
|||
litellm.input_callback = [tester]
|
||||
try:
|
||||
response = litellm.completion(
|
||||
model="azure/chatgpt-v-2", # e.g. gpt-35-instant
|
||||
model="azure/chatgpt-v-3", # e.g. gpt-35-instant
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
|
@ -2750,7 +2750,7 @@ def test_completion_azure2():
|
|||
|
||||
## Test azure call
|
||||
response = completion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=messages,
|
||||
api_base=api_base,
|
||||
api_key=api_key,
|
||||
|
@ -2787,7 +2787,7 @@ def test_completion_azure3():
|
|||
|
||||
## Test azure call
|
||||
response = completion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=messages,
|
||||
max_tokens=10,
|
||||
)
|
||||
|
@ -2835,7 +2835,7 @@ def test_completion_azure_with_litellm_key():
|
|||
openai.api_key = "ymca"
|
||||
|
||||
response = completion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=messages,
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
|
@ -2863,7 +2863,7 @@ def test_completion_azure_deployment_id():
|
|||
try:
|
||||
litellm.set_verbose = True
|
||||
response = completion(
|
||||
deployment_id="chatgpt-v-2",
|
||||
deployment_id="chatgpt-v-3",
|
||||
model="gpt-3.5-turbo",
|
||||
messages=messages,
|
||||
)
|
||||
|
@ -3925,7 +3925,7 @@ def test_completion_stream_watsonx():
|
|||
@pytest.mark.parametrize(
|
||||
"provider, model, project, region_name, token",
|
||||
[
|
||||
("azure", "chatgpt-v-2", None, None, "test-token"),
|
||||
("azure", "chatgpt-v-3", None, None, "test-token"),
|
||||
("vertex_ai", "anthropic-claude-3", "adroit-crow-1", "us-east1", None),
|
||||
("watsonx", "ibm/granite", "96946574", "dallas", "1234"),
|
||||
("bedrock", "anthropic.claude-3", None, "us-east-1", None),
|
||||
|
@ -4178,7 +4178,7 @@ async def test_completion_ai21_chat():
|
|||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
["gpt-4o", "azure/chatgpt-v-2"],
|
||||
["gpt-4o", "azure/chatgpt-v-3"],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"stream",
|
||||
|
@ -4200,7 +4200,7 @@ def test_completion_response_ratelimit_headers(model, stream):
|
|||
assert "x-ratelimit-remaining-requests" in additional_headers
|
||||
assert "x-ratelimit-remaining-tokens" in additional_headers
|
||||
|
||||
if model == "azure/chatgpt-v-2":
|
||||
if model == "azure/chatgpt-v-3":
|
||||
# Azure OpenAI header
|
||||
assert "llm_provider-azureml-model-session" in additional_headers
|
||||
if model == "claude-3-sonnet-20240229":
|
||||
|
|
|
@ -46,7 +46,7 @@ async def test_delete_deployment():
|
|||
import base64
|
||||
|
||||
litellm_params = LiteLLM_Params(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
api_key=os.getenv("AZURE_API_KEY"),
|
||||
api_base=os.getenv("AZURE_API_BASE"),
|
||||
api_version=os.getenv("AZURE_API_VERSION"),
|
||||
|
@ -232,7 +232,7 @@ async def test_db_error_new_model_check():
|
|||
|
||||
|
||||
litellm_params = LiteLLM_Params(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
api_key=os.getenv("AZURE_API_KEY"),
|
||||
api_base=os.getenv("AZURE_API_BASE"),
|
||||
api_version=os.getenv("AZURE_API_VERSION"),
|
||||
|
@ -250,7 +250,7 @@ def _create_model_list(flag_value: Literal[0, 1], master_key: str):
|
|||
import base64
|
||||
|
||||
new_litellm_params = LiteLLM_Params(
|
||||
model="azure/chatgpt-v-2-3",
|
||||
model="azure/chatgpt-v-3-3",
|
||||
api_key=os.getenv("AZURE_API_KEY"),
|
||||
api_base=os.getenv("AZURE_API_BASE"),
|
||||
api_version=os.getenv("AZURE_API_VERSION"),
|
||||
|
|
|
@ -5,12 +5,12 @@ model_list:
|
|||
model: gpt-3.5-turbo
|
||||
- model_name: working-azure-gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: azure/chatgpt-v-2
|
||||
model: azure/chatgpt-v-3
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
- model_name: azure-gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: azure/chatgpt-v-2
|
||||
model: azure/chatgpt-v-3
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
api_key: bad-key
|
||||
- model_name: azure-embedding
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
model_list:
|
||||
- model_name: azure-cloudflare
|
||||
litellm_params:
|
||||
model: azure/chatgpt-v-2
|
||||
model: azure/chatgpt-v-3
|
||||
api_base: https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_version: 2023-07-01-preview
|
||||
|
|
|
@ -12,7 +12,7 @@ model_list:
|
|||
- litellm_params:
|
||||
api_base: https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
model: azure/chatgpt-v-2
|
||||
model: azure/chatgpt-v-3
|
||||
model_name: azure-cloudflare-model
|
||||
- litellm_params:
|
||||
api_base: https://openai-france-1234.openai.azure.com
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
model_list:
|
||||
- model_name: Azure OpenAI GPT-4 Canada
|
||||
litellm_params:
|
||||
model: azure/chatgpt-v-2
|
||||
model: azure/chatgpt-v-3
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_version: "2023-07-01-preview"
|
||||
|
|
|
@ -450,12 +450,12 @@ def test_chat_azure_stream():
|
|||
customHandler = CompletionCustomHandler()
|
||||
litellm.callbacks = [customHandler]
|
||||
response = litellm.completion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=[{"role": "user", "content": "Hi 👋 - i'm sync azure"}],
|
||||
)
|
||||
# test streaming
|
||||
response = litellm.completion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=[{"role": "user", "content": "Hi 👋 - i'm sync azure"}],
|
||||
stream=True,
|
||||
)
|
||||
|
@ -464,7 +464,7 @@ def test_chat_azure_stream():
|
|||
# test failure callback
|
||||
try:
|
||||
response = litellm.completion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=[{"role": "user", "content": "Hi 👋 - i'm sync azure"}],
|
||||
api_key="my-bad-key",
|
||||
stream=True,
|
||||
|
@ -491,12 +491,12 @@ async def test_async_chat_azure_stream():
|
|||
customHandler = CompletionCustomHandler()
|
||||
litellm.callbacks = [customHandler]
|
||||
response = await litellm.acompletion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=[{"role": "user", "content": "Hi 👋 - i'm async azure"}],
|
||||
)
|
||||
## test streaming
|
||||
response = await litellm.acompletion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=[{"role": "user", "content": "Hi 👋 - i'm async azure"}],
|
||||
stream=True,
|
||||
)
|
||||
|
@ -507,7 +507,7 @@ async def test_async_chat_azure_stream():
|
|||
# test failure callback
|
||||
try:
|
||||
response = await litellm.acompletion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=[{"role": "user", "content": "Hi 👋 - i'm async azure"}],
|
||||
api_key="my-bad-key",
|
||||
stream=True,
|
||||
|
@ -1018,7 +1018,7 @@ async def test_async_completion_azure_caching():
|
|||
litellm.callbacks = [customHandler_caching]
|
||||
unique_time = time.time()
|
||||
response1 = await litellm.acompletion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=[
|
||||
{"role": "user", "content": f"Hi 👋 - i'm async azure {unique_time}"}
|
||||
],
|
||||
|
@ -1027,7 +1027,7 @@ async def test_async_completion_azure_caching():
|
|||
await asyncio.sleep(1)
|
||||
print(f"customHandler_caching.states pre-cache hit: {customHandler_caching.states}")
|
||||
response2 = await litellm.acompletion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=[
|
||||
{"role": "user", "content": f"Hi 👋 - i'm async azure {unique_time}"}
|
||||
],
|
||||
|
@ -1056,7 +1056,7 @@ async def test_async_completion_azure_caching_streaming():
|
|||
litellm.callbacks = [customHandler_caching]
|
||||
unique_time = uuid.uuid4()
|
||||
response1 = await litellm.acompletion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=[
|
||||
{"role": "user", "content": f"Hi 👋 - i'm async azure {unique_time}"}
|
||||
],
|
||||
|
@ -1069,7 +1069,7 @@ async def test_async_completion_azure_caching_streaming():
|
|||
initial_customhandler_caching_states = len(customHandler_caching.states)
|
||||
print(f"customHandler_caching.states pre-cache hit: {customHandler_caching.states}")
|
||||
response2 = await litellm.acompletion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=[
|
||||
{"role": "user", "content": f"Hi 👋 - i'm async azure {unique_time}"}
|
||||
],
|
||||
|
@ -1207,7 +1207,7 @@ def test_turn_off_message_logging():
|
|||
"model",
|
||||
[
|
||||
"ft:gpt-3.5-turbo:my-org:custom_suffix:id"
|
||||
], # "gpt-3.5-turbo", "azure/chatgpt-v-2",
|
||||
], # "gpt-3.5-turbo", "azure/chatgpt-v-3",
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"turn_off_message_logging",
|
||||
|
|
|
@ -284,7 +284,7 @@ class CompletionCustomHandler(
|
|||
)
|
||||
|
||||
if (
|
||||
kwargs["model"] == "chatgpt-v-2"
|
||||
kwargs["model"] == "chatgpt-v-3"
|
||||
and base_model is not None
|
||||
and kwargs["stream"] != True
|
||||
):
|
||||
|
@ -394,7 +394,7 @@ async def test_async_chat_azure():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -438,7 +438,7 @@ async def test_async_chat_azure():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": "my-bad-key",
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -545,7 +545,7 @@ async def test_async_chat_azure_with_fallbacks():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": "my-bad-key",
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -606,7 +606,7 @@ async def test_async_completion_azure_caching():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
|
|
@ -160,7 +160,7 @@ def test_completion_azure_stream_moderation_failure():
|
|||
]
|
||||
try:
|
||||
response = completion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=messages,
|
||||
mock_response="Exception: content_filter_policy",
|
||||
stream=True,
|
||||
|
@ -195,7 +195,7 @@ def test_async_custom_handler_stream():
|
|||
async def test_1():
|
||||
nonlocal complete_streaming_response
|
||||
response = await litellm.acompletion(
|
||||
model="azure/chatgpt-v-2", messages=messages, stream=True
|
||||
model="azure/chatgpt-v-3", messages=messages, stream=True
|
||||
)
|
||||
async for chunk in response:
|
||||
complete_streaming_response += (
|
||||
|
@ -239,7 +239,7 @@ def test_azure_completion_stream():
|
|||
complete_streaming_response = ""
|
||||
|
||||
response = litellm.completion(
|
||||
model="azure/chatgpt-v-2", messages=messages, stream=True
|
||||
model="azure/chatgpt-v-3", messages=messages, stream=True
|
||||
)
|
||||
for chunk in response:
|
||||
complete_streaming_response += chunk["choices"][0]["delta"]["content"] or ""
|
||||
|
|
|
@ -51,7 +51,7 @@ async def test_content_policy_exception_azure():
|
|||
# this is ony a test - we needed some way to invoke the exception :(
|
||||
litellm.set_verbose = True
|
||||
response = await litellm.acompletion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=[{"role": "user", "content": "where do I buy lethal drugs from"}],
|
||||
mock_response="Exception: content_filter_policy",
|
||||
)
|
||||
|
@ -124,7 +124,7 @@ def test_context_window_with_fallbacks(model):
|
|||
ctx_window_fallback_dict = {
|
||||
"command-nightly": "claude-2.1",
|
||||
"gpt-3.5-turbo-instruct": "gpt-3.5-turbo-16k",
|
||||
"azure/chatgpt-v-2": "gpt-3.5-turbo-16k",
|
||||
"azure/chatgpt-v-3": "gpt-3.5-turbo-16k",
|
||||
}
|
||||
sample_text = "how does a court case get to the Supreme Court?" * 1000
|
||||
messages = [{"content": sample_text, "role": "user"}]
|
||||
|
@ -161,7 +161,7 @@ def invalid_auth(model): # set the model key to an invalid key, depending on th
|
|||
os.environ["AWS_REGION_NAME"] = "bad-key"
|
||||
temporary_secret_key = os.environ["AWS_SECRET_ACCESS_KEY"]
|
||||
os.environ["AWS_SECRET_ACCESS_KEY"] = "bad-key"
|
||||
elif model == "azure/chatgpt-v-2":
|
||||
elif model == "azure/chatgpt-v-3":
|
||||
temporary_key = os.environ["AZURE_API_KEY"]
|
||||
os.environ["AZURE_API_KEY"] = "bad-key"
|
||||
elif model == "claude-3-5-haiku-20241022":
|
||||
|
@ -262,7 +262,7 @@ def test_completion_azure_exception():
|
|||
old_azure_key = os.environ["AZURE_API_KEY"]
|
||||
os.environ["AZURE_API_KEY"] = "good morning"
|
||||
response = completion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
)
|
||||
os.environ["AZURE_API_KEY"] = old_azure_key
|
||||
|
@ -309,7 +309,7 @@ async def asynctest_completion_azure_exception():
|
|||
old_azure_key = os.environ["AZURE_API_KEY"]
|
||||
os.environ["AZURE_API_KEY"] = "good morning"
|
||||
response = await litellm.acompletion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
)
|
||||
print(f"response: {response}")
|
||||
|
@ -528,7 +528,7 @@ def test_content_policy_violation_error_streaming():
|
|||
async def test_get_response():
|
||||
try:
|
||||
response = await litellm.acompletion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=[{"role": "user", "content": "say 1"}],
|
||||
temperature=0,
|
||||
top_p=1,
|
||||
|
@ -557,7 +557,7 @@ def test_content_policy_violation_error_streaming():
|
|||
async def test_get_error():
|
||||
try:
|
||||
response = await litellm.acompletion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=[
|
||||
{"role": "user", "content": "where do i buy lethal drugs from"}
|
||||
],
|
||||
|
@ -754,7 +754,7 @@ def test_litellm_predibase_exception():
|
|||
# return False
|
||||
# # Repeat each model 500 times
|
||||
# # extended_models = [model for model in models for _ in range(250)]
|
||||
# extended_models = ["azure/chatgpt-v-2" for _ in range(250)]
|
||||
# extended_models = ["azure/chatgpt-v-3" for _ in range(250)]
|
||||
|
||||
# def worker(model):
|
||||
# return test_model_call(model)
|
||||
|
@ -934,7 +934,7 @@ def _pre_call_utils_httpx(
|
|||
("openai", "gpt-3.5-turbo", "chat_completion", False),
|
||||
("openai", "gpt-3.5-turbo", "chat_completion", True),
|
||||
("openai", "gpt-3.5-turbo-instruct", "completion", True),
|
||||
("azure", "azure/chatgpt-v-2", "chat_completion", True),
|
||||
("azure", "azure/chatgpt-v-3", "chat_completion", True),
|
||||
("azure", "azure/text-embedding-ada-002", "embedding", True),
|
||||
("azure", "azure_text/gpt-3.5-turbo-instruct", "completion", True),
|
||||
],
|
||||
|
@ -1158,7 +1158,7 @@ async def test_exception_with_headers_httpx(
|
|||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model", ["azure/chatgpt-v-2", "openai/gpt-3.5-turbo"])
|
||||
@pytest.mark.parametrize("model", ["azure/chatgpt-v-3", "openai/gpt-3.5-turbo"])
|
||||
async def test_bad_request_error_contains_httpx_response(model):
|
||||
"""
|
||||
Test that the BadRequestError contains the httpx response
|
||||
|
@ -1209,7 +1209,7 @@ def test_context_window_exceeded_error_from_litellm_proxy():
|
|||
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
@pytest.mark.parametrize("stream_mode", [True, False])
|
||||
@pytest.mark.parametrize("model", ["azure/gpt-4o"]) # "gpt-4o-mini",
|
||||
@pytest.mark.parametrize("model", ["azure/gpt-4o-new-test"]) # "gpt-4o-mini",
|
||||
@pytest.mark.asyncio
|
||||
async def test_exception_bubbling_up(sync_mode, stream_mode, model):
|
||||
"""
|
||||
|
|
|
@ -108,14 +108,14 @@ async def test_aaabasic_gcs_logger():
|
|||
},
|
||||
"endpoint": "http://localhost:4000/chat/completions",
|
||||
"model_group": "gpt-3.5-turbo",
|
||||
"deployment": "azure/chatgpt-v-2",
|
||||
"deployment": "azure/chatgpt-v-3",
|
||||
"model_info": {
|
||||
"id": "4bad40a1eb6bebd1682800f16f44b9f06c52a6703444c99c7f9f32e9de3693b4",
|
||||
"db_model": False,
|
||||
},
|
||||
"api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
|
||||
"caching_groups": None,
|
||||
"raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-2', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
|
||||
"raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-3', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
|
||||
},
|
||||
)
|
||||
|
||||
|
@ -216,14 +216,14 @@ async def test_basic_gcs_logger_failure():
|
|||
},
|
||||
"endpoint": "http://localhost:4000/chat/completions",
|
||||
"model_group": "gpt-3.5-turbo",
|
||||
"deployment": "azure/chatgpt-v-2",
|
||||
"deployment": "azure/chatgpt-v-3",
|
||||
"model_info": {
|
||||
"id": "4bad40a1eb6bebd1682800f16f44b9f06c52a6703444c99c7f9f32e9de3693b4",
|
||||
"db_model": False,
|
||||
},
|
||||
"api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
|
||||
"caching_groups": None,
|
||||
"raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-2', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
|
||||
"raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-3', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
|
||||
},
|
||||
)
|
||||
except Exception:
|
||||
|
@ -626,14 +626,14 @@ async def test_basic_gcs_logger_with_folder_in_bucket_name():
|
|||
},
|
||||
"endpoint": "http://localhost:4000/chat/completions",
|
||||
"model_group": "gpt-3.5-turbo",
|
||||
"deployment": "azure/chatgpt-v-2",
|
||||
"deployment": "azure/chatgpt-v-3",
|
||||
"model_info": {
|
||||
"id": "4bad40a1eb6bebd1682800f16f44b9f06c52a6703444c99c7f9f32e9de3693b4",
|
||||
"db_model": False,
|
||||
},
|
||||
"api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
|
||||
"caching_groups": None,
|
||||
"raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-2', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
|
||||
"raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-3', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
|
||||
},
|
||||
)
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ import litellm
|
|||
async def test_azure_health_check():
|
||||
response = await litellm.ahealth_check(
|
||||
model_params={
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"messages": [{"role": "user", "content": "Hey, how's it going?"}],
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
|
|
@ -78,7 +78,7 @@ async def make_async_calls(metadata=None, **completion_kwargs):
|
|||
|
||||
def create_async_task(**completion_kwargs):
|
||||
completion_args = {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_version": "2024-02-01",
|
||||
"messages": [{"role": "user", "content": "This is a test"}],
|
||||
"max_tokens": 5,
|
||||
|
|
|
@ -33,7 +33,7 @@ def test_model_added():
|
|||
"litellm_params": {
|
||||
"metadata": {
|
||||
"model_group": "gpt-3.5-turbo",
|
||||
"deployment": "azure/chatgpt-v-2",
|
||||
"deployment": "azure/chatgpt-v-3",
|
||||
},
|
||||
"model_info": {"id": "1234"},
|
||||
}
|
||||
|
@ -47,7 +47,7 @@ def test_get_available_deployments():
|
|||
test_cache = DualCache()
|
||||
least_busy_logger = LeastBusyLoggingHandler(router_cache=test_cache, model_list=[])
|
||||
model_group = "gpt-3.5-turbo"
|
||||
deployment = "azure/chatgpt-v-2"
|
||||
deployment = "azure/chatgpt-v-3"
|
||||
kwargs = {
|
||||
"litellm_params": {
|
||||
"metadata": {
|
||||
|
@ -113,7 +113,7 @@ async def test_router_get_available_deployments(async_test):
|
|||
router.leastbusy_logger.test_flag = True
|
||||
|
||||
model_group = "azure-model"
|
||||
deployment = "azure/chatgpt-v-2"
|
||||
deployment = "azure/chatgpt-v-3"
|
||||
request_count_dict = {1: 10, 2: 54, 3: 100}
|
||||
cache_key = f"{model_group}_request_count"
|
||||
if async_test is True:
|
||||
|
|
|
@ -46,7 +46,7 @@
|
|||
# {
|
||||
# "model_name": "gpt-3.5-turbo",
|
||||
# "litellm_params": {
|
||||
# "model": "azure/chatgpt-v-2",
|
||||
# "model": "azure/chatgpt-v-3",
|
||||
# "api_key": os.getenv("AZURE_API_KEY"),
|
||||
# "api_base": os.getenv("AZURE_API_BASE"),
|
||||
# "api_version": os.getenv("AZURE_API_VERSION"),
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
# {
|
||||
# "model_name": "gpt-3.5-turbo",
|
||||
# "litellm_params": {
|
||||
# "model": "azure/chatgpt-v-2",
|
||||
# "model": "azure/chatgpt-v-3",
|
||||
# "api_key": os.getenv("AZURE_API_KEY"),
|
||||
# "api_base": os.getenv("AZURE_API_BASE"),
|
||||
# "api_version": os.getenv("AZURE_API_VERSION"),
|
||||
|
|
|
@ -60,7 +60,7 @@ async def test_get_available_deployments_custom_price():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"input_cost_per_token": 0.00003,
|
||||
"output_cost_per_token": 0.00003,
|
||||
},
|
||||
|
|
|
@ -48,7 +48,7 @@ async def test_latency_memory_leak(sync_mode):
|
|||
"litellm_params": {
|
||||
"metadata": {
|
||||
"model_group": "gpt-3.5-turbo",
|
||||
"deployment": "azure/chatgpt-v-2",
|
||||
"deployment": "azure/chatgpt-v-3",
|
||||
},
|
||||
"model_info": {"id": deployment_id},
|
||||
}
|
||||
|
@ -130,7 +130,7 @@ def test_latency_updated():
|
|||
"litellm_params": {
|
||||
"metadata": {
|
||||
"model_group": "gpt-3.5-turbo",
|
||||
"deployment": "azure/chatgpt-v-2",
|
||||
"deployment": "azure/chatgpt-v-3",
|
||||
},
|
||||
"model_info": {"id": deployment_id},
|
||||
}
|
||||
|
@ -173,7 +173,7 @@ def test_latency_updated_custom_ttl():
|
|||
"litellm_params": {
|
||||
"metadata": {
|
||||
"model_group": "gpt-3.5-turbo",
|
||||
"deployment": "azure/chatgpt-v-2",
|
||||
"deployment": "azure/chatgpt-v-3",
|
||||
},
|
||||
"model_info": {"id": deployment_id},
|
||||
}
|
||||
|
@ -200,12 +200,12 @@ def test_get_available_deployments():
|
|||
model_list = [
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {"model": "azure/chatgpt-v-2"},
|
||||
"litellm_params": {"model": "azure/chatgpt-v-3"},
|
||||
"model_info": {"id": "1234"},
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {"model": "azure/chatgpt-v-2"},
|
||||
"litellm_params": {"model": "azure/chatgpt-v-3"},
|
||||
"model_info": {"id": "5678"},
|
||||
},
|
||||
]
|
||||
|
@ -219,7 +219,7 @@ def test_get_available_deployments():
|
|||
"litellm_params": {
|
||||
"metadata": {
|
||||
"model_group": "gpt-3.5-turbo",
|
||||
"deployment": "azure/chatgpt-v-2",
|
||||
"deployment": "azure/chatgpt-v-3",
|
||||
},
|
||||
"model_info": {"id": deployment_id},
|
||||
}
|
||||
|
@ -240,7 +240,7 @@ def test_get_available_deployments():
|
|||
"litellm_params": {
|
||||
"metadata": {
|
||||
"model_group": "gpt-3.5-turbo",
|
||||
"deployment": "azure/chatgpt-v-2",
|
||||
"deployment": "azure/chatgpt-v-3",
|
||||
},
|
||||
"model_info": {"id": deployment_id},
|
||||
}
|
||||
|
@ -275,7 +275,7 @@ async def _deploy(lowest_latency_logger, deployment_id, tokens_used, duration):
|
|||
"litellm_params": {
|
||||
"metadata": {
|
||||
"model_group": "gpt-3.5-turbo",
|
||||
"deployment": "azure/chatgpt-v-2",
|
||||
"deployment": "azure/chatgpt-v-3",
|
||||
},
|
||||
"model_info": {"id": deployment_id},
|
||||
}
|
||||
|
@ -317,12 +317,12 @@ def test_get_available_endpoints_tpm_rpm_check_async(ans_rpm):
|
|||
model_list = [
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {"model": "azure/chatgpt-v-2"},
|
||||
"litellm_params": {"model": "azure/chatgpt-v-3"},
|
||||
"model_info": {"id": "1234", "rpm": ans_rpm},
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {"model": "azure/chatgpt-v-2"},
|
||||
"litellm_params": {"model": "azure/chatgpt-v-3"},
|
||||
"model_info": {"id": "5678", "rpm": non_ans_rpm},
|
||||
},
|
||||
]
|
||||
|
@ -366,12 +366,12 @@ def test_get_available_endpoints_tpm_rpm_check(ans_rpm):
|
|||
model_list = [
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {"model": "azure/chatgpt-v-2"},
|
||||
"litellm_params": {"model": "azure/chatgpt-v-3"},
|
||||
"model_info": {"id": "1234", "rpm": ans_rpm},
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {"model": "azure/chatgpt-v-2"},
|
||||
"litellm_params": {"model": "azure/chatgpt-v-3"},
|
||||
"model_info": {"id": "5678", "rpm": non_ans_rpm},
|
||||
},
|
||||
]
|
||||
|
@ -385,7 +385,7 @@ def test_get_available_endpoints_tpm_rpm_check(ans_rpm):
|
|||
"litellm_params": {
|
||||
"metadata": {
|
||||
"model_group": "gpt-3.5-turbo",
|
||||
"deployment": "azure/chatgpt-v-2",
|
||||
"deployment": "azure/chatgpt-v-3",
|
||||
},
|
||||
"model_info": {"id": deployment_id},
|
||||
}
|
||||
|
@ -407,7 +407,7 @@ def test_get_available_endpoints_tpm_rpm_check(ans_rpm):
|
|||
"litellm_params": {
|
||||
"metadata": {
|
||||
"model_group": "gpt-3.5-turbo",
|
||||
"deployment": "azure/chatgpt-v-2",
|
||||
"deployment": "azure/chatgpt-v-3",
|
||||
},
|
||||
"model_info": {"id": deployment_id},
|
||||
}
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
# {
|
||||
# "model_name": "gpt-3.5-turbo", # openai model name
|
||||
# "litellm_params": { # params for litellm completion/embedding call
|
||||
# "model": "azure/chatgpt-v-2",
|
||||
# "model": "azure/chatgpt-v-3",
|
||||
# "api_key": os.getenv("AZURE_API_KEY"),
|
||||
# "api_version": os.getenv("AZURE_API_VERSION"),
|
||||
# "api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -40,7 +40,7 @@
|
|||
# {
|
||||
# "model_name": "bad-model", # openai model name
|
||||
# "litellm_params": { # params for litellm completion/embedding call
|
||||
# "model": "azure/chatgpt-v-2",
|
||||
# "model": "azure/chatgpt-v-3",
|
||||
# "api_key": "bad-key",
|
||||
# "api_version": os.getenv("AZURE_API_VERSION"),
|
||||
# "api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
|
|
@ -157,7 +157,7 @@ def test_router_mock_request_with_mock_timeout_with_fallbacks():
|
|||
{
|
||||
"model_name": "azure-gpt",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
},
|
||||
|
|
|
@ -104,12 +104,12 @@ async def test_router_with_caching():
|
|||
model_list = [
|
||||
{
|
||||
"model_name": "azure/gpt-4",
|
||||
"litellm_params": get_azure_params("chatgpt-v-2"),
|
||||
"litellm_params": get_azure_params("chatgpt-v-3"),
|
||||
"tpm": 100,
|
||||
},
|
||||
{
|
||||
"model_name": "azure/gpt-4",
|
||||
"litellm_params": get_azure_params("chatgpt-v-2"),
|
||||
"litellm_params": get_azure_params("chatgpt-v-3"),
|
||||
"tpm": 1000,
|
||||
},
|
||||
]
|
||||
|
|
|
@ -107,7 +107,7 @@ async def test_prompt_injection_llm_eval():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
|
|
@ -729,7 +729,7 @@ def azure_openai_test_completion():
|
|||
try:
|
||||
# OVERRIDE WITH DYNAMIC MAX TOKENS
|
||||
response_1 = litellm.completion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=[
|
||||
{
|
||||
"content": "Hello, how are you? Be as verbose as possible",
|
||||
|
@ -743,7 +743,7 @@ def azure_openai_test_completion():
|
|||
|
||||
# USE CONFIG TOKENS
|
||||
response_2 = litellm.completion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=[
|
||||
{
|
||||
"content": "Hello, how are you? Be as verbose as possible",
|
||||
|
|
|
@ -266,7 +266,7 @@ def test_router_sensitive_keys():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": "special-key",
|
||||
},
|
||||
"model_info": {"id": 12345},
|
||||
|
@ -334,7 +334,7 @@ async def test_router_retries(sync_mode):
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
|
@ -417,7 +417,7 @@ def test_exception_raising():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": "bad-key",
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -479,7 +479,7 @@ def test_reading_key_from_model_list():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": old_api_key,
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -535,7 +535,7 @@ def test_reading_key_from_model_list():
|
|||
def test_call_one_endpoint():
|
||||
# [PROD TEST CASE]
|
||||
# user passes one deployment they want to call on the router, we call the specified one
|
||||
# this test makes a completion calls azure/chatgpt-v-2, it should work
|
||||
# this test makes a completion calls azure/chatgpt-v-3, it should work
|
||||
try:
|
||||
print("Testing calling a specific deployment")
|
||||
old_api_key = os.environ["AZURE_API_KEY"]
|
||||
|
@ -544,7 +544,7 @@ def test_call_one_endpoint():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": old_api_key,
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -574,7 +574,7 @@ def test_call_one_endpoint():
|
|||
|
||||
async def call_azure_completion():
|
||||
response = await router.acompletion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=[{"role": "user", "content": "hello this request will pass"}],
|
||||
specific_deployment=True,
|
||||
)
|
||||
|
@ -620,7 +620,7 @@ def test_router_azure_acompletion():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": old_api_key,
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -793,7 +793,7 @@ def test_router_context_window_check_pre_call_check_in_group_custom_model_info()
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -847,7 +847,7 @@ def test_router_context_window_check_pre_call_check():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -901,7 +901,7 @@ def test_router_context_window_check_pre_call_check_out_group():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo-small", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -980,7 +980,7 @@ def test_router_region_pre_call_check(allowed_model_region):
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -2616,7 +2616,7 @@ def test_is_team_specific_model():
|
|||
# {
|
||||
# "model_name": "gpt-3.5-turbo",
|
||||
# "litellm_params": {
|
||||
# "model": "azure/chatgpt-v-2",
|
||||
# "model": "azure/chatgpt-v-3",
|
||||
# "api_key": os.getenv("AZURE_API_KEY"),
|
||||
# "api_base": os.getenv("AZURE_API_BASE"),
|
||||
# "tpm": 100000,
|
||||
|
@ -2626,7 +2626,7 @@ def test_is_team_specific_model():
|
|||
# {
|
||||
# "model_name": "gpt-3.5-turbo",
|
||||
# "litellm_params": {
|
||||
# "model": "azure/chatgpt-v-2",
|
||||
# "model": "azure/chatgpt-v-3",
|
||||
# "api_key": os.getenv("AZURE_API_KEY"),
|
||||
# "api_base": os.getenv("AZURE_API_BASE"),
|
||||
# "tpm": 500,
|
||||
|
|
|
@ -74,7 +74,7 @@ async def test_provider_budgets_e2e_test():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -268,7 +268,7 @@ async def test_prometheus_metric_tracking():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
|
|
@ -96,7 +96,7 @@ async def test_acompletion_caching_on_router():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
|
@ -213,7 +213,7 @@ async def test_acompletion_caching_with_ttl_on_router():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
|
@ -279,7 +279,7 @@ async def test_acompletion_caching_on_router_caching_groups():
|
|||
{
|
||||
"model_name": "azure-gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
|
|
|
@ -43,7 +43,7 @@ async def test_router_init():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
|
|
|
@ -41,7 +41,7 @@ async def test_cooldown_badrequest_error():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
|
|
@ -33,7 +33,7 @@ def test_async_fallbacks(caplog):
|
|||
{
|
||||
"model_name": "azure/gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -93,7 +93,7 @@ def test_async_fallbacks(caplog):
|
|||
# - error request, falling back notice, success notice
|
||||
expected_logs = [
|
||||
"Falling back to model_group = azure/gpt-3.5-turbo",
|
||||
"litellm.acompletion(model=azure/chatgpt-v-2)\x1b[32m 200 OK\x1b[0m",
|
||||
"litellm.acompletion(model=azure/chatgpt-v-3)\x1b[32m 200 OK\x1b[0m",
|
||||
"Successful fallback b/w models.",
|
||||
]
|
||||
|
||||
|
|
|
@ -67,7 +67,7 @@ def test_sync_fallbacks():
|
|||
{ # list of model deployments
|
||||
"model_name": "azure/gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": "bad-key",
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -78,7 +78,7 @@ def test_sync_fallbacks():
|
|||
{ # list of model deployments
|
||||
"model_name": "azure/gpt-3.5-turbo-context-fallback", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -150,7 +150,7 @@ async def test_async_fallbacks():
|
|||
{ # list of model deployments
|
||||
"model_name": "azure/gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": "bad-key",
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -161,7 +161,7 @@ async def test_async_fallbacks():
|
|||
{ # list of model deployments
|
||||
"model_name": "azure/gpt-3.5-turbo-context-fallback", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -349,7 +349,7 @@ def test_dynamic_fallbacks_sync():
|
|||
{ # list of model deployments
|
||||
"model_name": "azure/gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": "bad-key",
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -360,7 +360,7 @@ def test_dynamic_fallbacks_sync():
|
|||
{ # list of model deployments
|
||||
"model_name": "azure/gpt-3.5-turbo-context-fallback", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -426,7 +426,7 @@ async def test_dynamic_fallbacks_async():
|
|||
{ # list of model deployments
|
||||
"model_name": "azure/gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": "bad-key",
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -437,7 +437,7 @@ async def test_dynamic_fallbacks_async():
|
|||
{ # list of model deployments
|
||||
"model_name": "azure/gpt-3.5-turbo-context-fallback", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -509,7 +509,7 @@ async def test_async_fallbacks_streaming():
|
|||
{ # list of model deployments
|
||||
"model_name": "azure/gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": "bad-key",
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -520,7 +520,7 @@ async def test_async_fallbacks_streaming():
|
|||
{ # list of model deployments
|
||||
"model_name": "azure/gpt-3.5-turbo-context-fallback", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -594,7 +594,7 @@ def test_sync_fallbacks_streaming():
|
|||
{ # list of model deployments
|
||||
"model_name": "azure/gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": "bad-key",
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -605,7 +605,7 @@ def test_sync_fallbacks_streaming():
|
|||
{ # list of model deployments
|
||||
"model_name": "azure/gpt-3.5-turbo-context-fallback", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -675,7 +675,7 @@ async def test_async_fallbacks_max_retries_per_request():
|
|||
{ # list of model deployments
|
||||
"model_name": "azure/gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": "bad-key",
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -686,7 +686,7 @@ async def test_async_fallbacks_max_retries_per_request():
|
|||
{ # list of model deployments
|
||||
"model_name": "azure/gpt-3.5-turbo-context-fallback", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -808,13 +808,13 @@ def test_ausage_based_routing_fallbacks():
|
|||
model_list = [
|
||||
{
|
||||
"model_name": "azure/gpt-4-fast",
|
||||
"litellm_params": get_azure_params("chatgpt-v-2"),
|
||||
"litellm_params": get_azure_params("chatgpt-v-3"),
|
||||
"model_info": {"id": 1},
|
||||
"rpm": AZURE_FAST_RPM,
|
||||
},
|
||||
{
|
||||
"model_name": "azure/gpt-4-basic",
|
||||
"litellm_params": get_azure_params("chatgpt-v-2"),
|
||||
"litellm_params": get_azure_params("chatgpt-v-3"),
|
||||
"model_info": {"id": 2},
|
||||
"rpm": AZURE_BASIC_RPM,
|
||||
},
|
||||
|
@ -889,7 +889,7 @@ def test_custom_cooldown_times():
|
|||
{ # list of model deployments
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": "bad-key",
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -899,7 +899,7 @@ def test_custom_cooldown_times():
|
|||
{ # list of model deployments
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -993,7 +993,7 @@ async def test_service_unavailable_fallbacks(sync_mode):
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo-0125-preview",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
|
|
@ -41,7 +41,7 @@ def test_weighted_selection_router():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
|
@ -54,7 +54,7 @@ def test_weighted_selection_router():
|
|||
)
|
||||
selection_counts = defaultdict(int)
|
||||
|
||||
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
|
||||
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time
|
||||
for _ in range(1000):
|
||||
selected_model = router.get_available_deployment("gpt-3.5-turbo")
|
||||
selected_model_id = selected_model["litellm_params"]["model"]
|
||||
|
@ -64,10 +64,10 @@ def test_weighted_selection_router():
|
|||
|
||||
total_requests = sum(selection_counts.values())
|
||||
|
||||
# Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
|
||||
# Assert that 'azure/chatgpt-v-3' has about 90% of the total requests
|
||||
assert (
|
||||
selection_counts["azure/chatgpt-v-2"] / total_requests > 0.89
|
||||
), f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
|
||||
selection_counts["azure/chatgpt-v-3"] / total_requests > 0.89
|
||||
), f"Assertion failed: 'azure/chatgpt-v-3' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
|
||||
|
||||
router.reset()
|
||||
except Exception as e:
|
||||
|
@ -97,7 +97,7 @@ def test_weighted_selection_router_tpm():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
|
@ -110,7 +110,7 @@ def test_weighted_selection_router_tpm():
|
|||
)
|
||||
selection_counts = defaultdict(int)
|
||||
|
||||
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
|
||||
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time
|
||||
for _ in range(1000):
|
||||
selected_model = router.get_available_deployment("gpt-3.5-turbo")
|
||||
selected_model_id = selected_model["litellm_params"]["model"]
|
||||
|
@ -120,10 +120,10 @@ def test_weighted_selection_router_tpm():
|
|||
|
||||
total_requests = sum(selection_counts.values())
|
||||
|
||||
# Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
|
||||
# Assert that 'azure/chatgpt-v-3' has about 90% of the total requests
|
||||
assert (
|
||||
selection_counts["azure/chatgpt-v-2"] / total_requests > 0.89
|
||||
), f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
|
||||
selection_counts["azure/chatgpt-v-3"] / total_requests > 0.89
|
||||
), f"Assertion failed: 'azure/chatgpt-v-3' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
|
||||
|
||||
router.reset()
|
||||
except Exception as e:
|
||||
|
@ -153,7 +153,7 @@ def test_weighted_selection_router_tpm_as_router_param():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
|
@ -166,7 +166,7 @@ def test_weighted_selection_router_tpm_as_router_param():
|
|||
)
|
||||
selection_counts = defaultdict(int)
|
||||
|
||||
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
|
||||
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time
|
||||
for _ in range(1000):
|
||||
selected_model = router.get_available_deployment("gpt-3.5-turbo")
|
||||
selected_model_id = selected_model["litellm_params"]["model"]
|
||||
|
@ -176,10 +176,10 @@ def test_weighted_selection_router_tpm_as_router_param():
|
|||
|
||||
total_requests = sum(selection_counts.values())
|
||||
|
||||
# Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
|
||||
# Assert that 'azure/chatgpt-v-3' has about 90% of the total requests
|
||||
assert (
|
||||
selection_counts["azure/chatgpt-v-2"] / total_requests > 0.89
|
||||
), f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
|
||||
selection_counts["azure/chatgpt-v-3"] / total_requests > 0.89
|
||||
), f"Assertion failed: 'azure/chatgpt-v-3' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
|
||||
|
||||
router.reset()
|
||||
except Exception as e:
|
||||
|
@ -210,7 +210,7 @@ def test_weighted_selection_router_rpm_as_router_param():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
|
@ -224,7 +224,7 @@ def test_weighted_selection_router_rpm_as_router_param():
|
|||
)
|
||||
selection_counts = defaultdict(int)
|
||||
|
||||
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
|
||||
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time
|
||||
for _ in range(1000):
|
||||
selected_model = router.get_available_deployment("gpt-3.5-turbo")
|
||||
selected_model_id = selected_model["litellm_params"]["model"]
|
||||
|
@ -234,10 +234,10 @@ def test_weighted_selection_router_rpm_as_router_param():
|
|||
|
||||
total_requests = sum(selection_counts.values())
|
||||
|
||||
# Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
|
||||
# Assert that 'azure/chatgpt-v-3' has about 90% of the total requests
|
||||
assert (
|
||||
selection_counts["azure/chatgpt-v-2"] / total_requests > 0.89
|
||||
), f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
|
||||
selection_counts["azure/chatgpt-v-3"] / total_requests > 0.89
|
||||
), f"Assertion failed: 'azure/chatgpt-v-3' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
|
||||
|
||||
router.reset()
|
||||
except Exception as e:
|
||||
|
@ -266,7 +266,7 @@ def test_weighted_selection_router_no_rpm_set():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
|
@ -286,7 +286,7 @@ def test_weighted_selection_router_no_rpm_set():
|
|||
)
|
||||
selection_counts = defaultdict(int)
|
||||
|
||||
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
|
||||
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time
|
||||
for _ in range(1000):
|
||||
selected_model = router.get_available_deployment("claude-1")
|
||||
selected_model_id = selected_model["litellm_params"]["model"]
|
||||
|
@ -296,7 +296,7 @@ def test_weighted_selection_router_no_rpm_set():
|
|||
|
||||
total_requests = sum(selection_counts.values())
|
||||
|
||||
# Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
|
||||
# Assert that 'azure/chatgpt-v-3' has about 90% of the total requests
|
||||
assert (
|
||||
selection_counts["bedrock/claude1.2"] / total_requests == 1
|
||||
), f"Assertion failed: Selection counts {selection_counts}"
|
||||
|
@ -325,7 +325,7 @@ def test_model_group_aliases():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
|
@ -358,7 +358,7 @@ def test_model_group_aliases():
|
|||
)
|
||||
|
||||
# test that
|
||||
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
|
||||
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time
|
||||
selection_counts = defaultdict(int)
|
||||
for _ in range(1000):
|
||||
selected_model = router.get_available_deployment("gpt-3.5-turbo")
|
||||
|
@ -369,10 +369,10 @@ def test_model_group_aliases():
|
|||
|
||||
total_requests = sum(selection_counts.values())
|
||||
|
||||
# Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
|
||||
# Assert that 'azure/chatgpt-v-3' has about 90% of the total requests
|
||||
assert (
|
||||
selection_counts["azure/chatgpt-v-2"] / total_requests > 0.89
|
||||
), f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
|
||||
selection_counts["azure/chatgpt-v-3"] / total_requests > 0.89
|
||||
), f"Assertion failed: 'azure/chatgpt-v-3' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
|
||||
|
||||
router.reset()
|
||||
except Exception as e:
|
||||
|
@ -552,7 +552,7 @@ async def test_weighted_selection_router_async(rpm_list, tpm_list):
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
|
@ -566,7 +566,7 @@ async def test_weighted_selection_router_async(rpm_list, tpm_list):
|
|||
)
|
||||
selection_counts = defaultdict(int)
|
||||
|
||||
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
|
||||
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-3 about 90% of the time
|
||||
for _ in range(1000):
|
||||
selected_model = await router.async_get_available_deployment(
|
||||
"gpt-3.5-turbo", request_kwargs={}
|
||||
|
@ -579,13 +579,13 @@ async def test_weighted_selection_router_async(rpm_list, tpm_list):
|
|||
total_requests = sum(selection_counts.values())
|
||||
|
||||
if rpm_list[0] is not None or tpm_list[0] is not None:
|
||||
# Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
|
||||
# Assert that 'azure/chatgpt-v-3' has about 90% of the total requests
|
||||
assert (
|
||||
selection_counts["azure/chatgpt-v-2"] / total_requests > 0.89
|
||||
), f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
|
||||
selection_counts["azure/chatgpt-v-3"] / total_requests > 0.89
|
||||
), f"Assertion failed: 'azure/chatgpt-v-3' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
|
||||
else:
|
||||
# Assert both are used
|
||||
assert selection_counts["azure/chatgpt-v-2"] > 0
|
||||
assert selection_counts["azure/chatgpt-v-3"] > 0
|
||||
assert selection_counts["gpt-3.5-turbo"] > 0
|
||||
router.reset()
|
||||
except Exception as e:
|
||||
|
|
|
@ -40,7 +40,7 @@
|
|||
# {
|
||||
# "model_name": "gpt-3.5-turbo",
|
||||
# "litellm_params": {
|
||||
# "model": "azure/chatgpt-v-2",
|
||||
# "model": "azure/chatgpt-v-3",
|
||||
# "api_key": os.getenv("AZURE_API_KEY"),
|
||||
# "api_version": os.getenv("AZURE_API_VERSION"),
|
||||
# "api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -96,7 +96,7 @@
|
|||
# {
|
||||
# "model_name": "gpt-3.5-turbo",
|
||||
# "litellm_params": {
|
||||
# "model": "azure/chatgpt-v-2",
|
||||
# "model": "azure/chatgpt-v-3",
|
||||
# "api_key": os.getenv("AZURE_API_KEY"),
|
||||
# "api_version": os.getenv("AZURE_API_VERSION"),
|
||||
# "api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -134,7 +134,7 @@
|
|||
# {
|
||||
# "model_name": "azure-cloudflare",
|
||||
# "litellm_params": {
|
||||
# "model": "azure/chatgpt-v-2",
|
||||
# "model": "azure/chatgpt-v-3",
|
||||
# "api_key": os.getenv("AZURE_API_KEY"),
|
||||
# "api_version": os.getenv("AZURE_API_VERSION"),
|
||||
# "api_base": "https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1",
|
||||
|
@ -201,7 +201,7 @@
|
|||
# {
|
||||
# "model_name": "gpt-3.5-turbo",
|
||||
# "litellm_params": {
|
||||
# "model": "azure/chatgpt-v-2",
|
||||
# "model": "azure/chatgpt-v-3",
|
||||
# "api_key": os.getenv("AZURE_API_KEY"),
|
||||
# "api_version": os.getenv("AZURE_API_VERSION"),
|
||||
# "api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -254,7 +254,7 @@
|
|||
# {
|
||||
# "model_name": "gpt-3.5-turbo",
|
||||
# "litellm_params": {
|
||||
# "model": "azure/chatgpt-v-2",
|
||||
# "model": "azure/chatgpt-v-3",
|
||||
# "api_key": os.getenv("AZURE_API_KEY"),
|
||||
# "api_version": os.getenv("AZURE_API_VERSION"),
|
||||
# "api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -615,7 +615,7 @@
|
|||
# {
|
||||
# "model_name": "gpt-3.5-turbo",
|
||||
# "litellm_params": {
|
||||
# "model": "azure/chatgpt-v-2",
|
||||
# "model": "azure/chatgpt-v-3",
|
||||
# "api_key": os.getenv("AZURE_API_KEY"),
|
||||
# "api_version": os.getenv("AZURE_API_VERSION"),
|
||||
# "api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -660,7 +660,7 @@
|
|||
# {
|
||||
# "model_name": "gpt-3.5-turbo",
|
||||
# "litellm_params": {
|
||||
# "model": "azure/chatgpt-v-2",
|
||||
# "model": "azure/chatgpt-v-3",
|
||||
# "api_key": os.getenv("AZURE_API_KEY"),
|
||||
# "api_version": os.getenv("AZURE_API_VERSION"),
|
||||
# "api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
|
|
@ -69,7 +69,7 @@ async def test_async_fallbacks():
|
|||
{ # list of model deployments
|
||||
"model_name": "azure/gpt-3.5-turbo-context-fallback", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
|
|
@ -166,7 +166,7 @@ async def test_router_retry_policy(error_type):
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -175,7 +175,7 @@ async def test_router_retry_policy(error_type):
|
|||
{
|
||||
"model_name": "bad-model", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": "bad-key",
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -275,7 +275,7 @@ async def test_dynamic_router_retry_policy(model_group):
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -287,7 +287,7 @@ async def test_dynamic_router_retry_policy(model_group):
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -299,7 +299,7 @@ async def test_dynamic_router_retry_policy(model_group):
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -311,7 +311,7 @@ async def test_dynamic_router_retry_policy(model_group):
|
|||
{
|
||||
"model_name": "bad-model", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": "bad-key",
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -393,7 +393,7 @@ def test_retry_rate_limit_error_with_healthy_deployments():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -426,7 +426,7 @@ def test_do_retry_rate_limit_error_with_no_fallbacks_and_no_healthy_deployments(
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -459,14 +459,14 @@ def test_raise_context_window_exceeded_error():
|
|||
llm_provider="azure",
|
||||
model="gpt-3.5-turbo",
|
||||
)
|
||||
context_window_fallbacks = [{"gpt-3.5-turbo": ["azure/chatgpt-v-2"]}]
|
||||
context_window_fallbacks = [{"gpt-3.5-turbo": ["azure/chatgpt-v-3"]}]
|
||||
|
||||
router = Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -508,7 +508,7 @@ def test_raise_context_window_exceeded_error_no_retry():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -562,7 +562,7 @@ def test_timeout_for_rate_limit_error_with_healthy_deployments(
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -589,7 +589,7 @@ def test_timeout_for_rate_limit_error_with_healthy_deployments(
|
|||
"litellm_params": {
|
||||
"api_key": "my-key",
|
||||
"api_base": "https://openai-gpt-4-test-v-1.openai.azure.com",
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
},
|
||||
"model_info": {
|
||||
"id": "0e30bc8a63fa91ae4415d4234e231b3f9e6dd900cac57d118ce13a720d95e9d6",
|
||||
|
@ -615,7 +615,7 @@ def test_timeout_for_rate_limit_error_with_no_healthy_deployments():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -650,7 +650,7 @@ def test_no_retry_for_not_found_error_404():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -709,7 +709,7 @@ def test_no_retry_when_no_healthy_deployments():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
|
|
@ -30,7 +30,7 @@ def test_router_timeouts():
|
|||
{
|
||||
"model_name": "openai-gpt-4",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": "os.environ/AZURE_API_KEY",
|
||||
"api_base": "os.environ/AZURE_API_BASE",
|
||||
"api_version": "os.environ/AZURE_API_VERSION",
|
||||
|
|
|
@ -32,7 +32,7 @@ def test_returned_settings():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": "bad-key",
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -96,7 +96,7 @@ def test_update_kwargs_before_fallbacks_unit_test():
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": "bad-key",
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
@ -133,7 +133,7 @@ async def test_update_kwargs_before_fallbacks(call_type):
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": "bad-key",
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
|
|
|
@ -241,7 +241,7 @@ tools_schema = [
|
|||
def test_completion_azure_stream_special_char():
|
||||
litellm.set_verbose = True
|
||||
messages = [{"role": "user", "content": "hi. respond with the <xml> tag only"}]
|
||||
response = completion(model="azure/chatgpt-v-2", messages=messages, stream=True)
|
||||
response = completion(model="azure/chatgpt-v-3", messages=messages, stream=True)
|
||||
response_str = ""
|
||||
for part in response:
|
||||
response_str += part.choices[0].delta.content or ""
|
||||
|
@ -449,7 +449,7 @@ def test_completion_azure_stream():
|
|||
},
|
||||
]
|
||||
response = completion(
|
||||
model="azure/chatgpt-v-2", messages=messages, stream=True, max_tokens=50
|
||||
model="azure/chatgpt-v-3", messages=messages, stream=True, max_tokens=50
|
||||
)
|
||||
complete_response = ""
|
||||
# Add any assertions here to check the response
|
||||
|
@ -2070,7 +2070,7 @@ def test_openai_chat_completion_complete_response_call():
|
|||
"model",
|
||||
[
|
||||
"gpt-3.5-turbo",
|
||||
"azure/chatgpt-v-2",
|
||||
"azure/chatgpt-v-3",
|
||||
"claude-3-haiku-20240307",
|
||||
"o1-preview",
|
||||
"o1",
|
||||
|
|
|
@ -23,7 +23,7 @@ import litellm
|
|||
[
|
||||
("gpt-3.5-turbo", "openai"),
|
||||
("anthropic.claude-instant-v1", "bedrock"),
|
||||
("azure/chatgpt-v-2", "azure"),
|
||||
("azure/chatgpt-v-3", "azure"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
|
@ -104,7 +104,7 @@ def test_hanging_request_azure():
|
|||
{
|
||||
"model_name": "azure-gpt",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_base": os.environ["AZURE_API_BASE"],
|
||||
"api_key": os.environ["AZURE_API_KEY"],
|
||||
},
|
||||
|
@ -158,7 +158,7 @@ def test_hanging_request_openai():
|
|||
{
|
||||
"model_name": "azure-gpt",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_base": os.environ["AZURE_API_BASE"],
|
||||
"api_key": os.environ["AZURE_API_KEY"],
|
||||
},
|
||||
|
|
|
@ -45,7 +45,7 @@ def test_tpm_rpm_updated():
|
|||
)
|
||||
model_group = "gpt-3.5-turbo"
|
||||
deployment_id = "1234"
|
||||
deployment = "azure/chatgpt-v-2"
|
||||
deployment = "azure/chatgpt-v-3"
|
||||
total_tokens = 50
|
||||
standard_logging_payload: StandardLoggingPayload = create_standard_logging_payload()
|
||||
standard_logging_payload["model_group"] = model_group
|
||||
|
@ -100,12 +100,12 @@ def test_get_available_deployments():
|
|||
model_list = [
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {"model": "azure/chatgpt-v-2"},
|
||||
"litellm_params": {"model": "azure/chatgpt-v-3"},
|
||||
"model_info": {"id": "1234"},
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {"model": "azure/chatgpt-v-2"},
|
||||
"litellm_params": {"model": "azure/chatgpt-v-3"},
|
||||
"model_info": {"id": "5678"},
|
||||
},
|
||||
]
|
||||
|
@ -116,7 +116,7 @@ def test_get_available_deployments():
|
|||
## DEPLOYMENT 1 ##
|
||||
total_tokens = 50
|
||||
deployment_id = "1234"
|
||||
deployment = "azure/chatgpt-v-2"
|
||||
deployment = "azure/chatgpt-v-3"
|
||||
standard_logging_payload = create_standard_logging_payload()
|
||||
standard_logging_payload["model_group"] = model_group
|
||||
standard_logging_payload["model_id"] = deployment_id
|
||||
|
@ -721,7 +721,7 @@ async def test_tpm_rpm_routing_model_name_checks():
|
|||
deployment = {
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"mock_response": "Hey, how's it going?",
|
||||
|
@ -763,5 +763,5 @@ async def test_tpm_rpm_routing_model_name_checks():
|
|||
|
||||
assert (
|
||||
standard_logging_payload["hidden_params"]["litellm_model_name"]
|
||||
== "azure/chatgpt-v-2"
|
||||
== "azure/chatgpt-v-3"
|
||||
)
|
||||
|
|
|
@ -56,7 +56,7 @@ def test_get_api_base_unit_test(model, optional_params, expected_api_base):
|
|||
async def test_get_api_base():
|
||||
_pl = ProxyLogging(user_api_key_cache=DualCache())
|
||||
_pl.update_values(alerting=["slack"], alerting_threshold=100, redis_cache=None)
|
||||
model = "chatgpt-v-2"
|
||||
model = "chatgpt-v-3"
|
||||
messages = [{"role": "user", "content": "Hey how's it going?"}]
|
||||
litellm_params = {
|
||||
"acompletion": True,
|
||||
|
|
|
@ -244,7 +244,7 @@ async def make_async_calls():
|
|||
for _ in range(5):
|
||||
task = asyncio.create_task(
|
||||
litellm.acompletion(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=[{"role": "user", "content": "This is a test"}],
|
||||
max_tokens=5,
|
||||
temperature=0.7,
|
||||
|
|
|
@ -40,7 +40,7 @@ def test_spend_logs_payload(model_id: Optional[str]):
|
|||
|
||||
input_args: dict = {
|
||||
"kwargs": {
|
||||
"model": "chatgpt-v-2",
|
||||
"model": "chatgpt-v-3",
|
||||
"messages": [
|
||||
{"role": "system", "content": "you are a helpful assistant.\n"},
|
||||
{"role": "user", "content": "bom dia"},
|
||||
|
@ -89,7 +89,7 @@ def test_spend_logs_payload(model_id: Optional[str]):
|
|||
},
|
||||
"endpoint": "http://localhost:4000/chat/completions",
|
||||
"model_group": "gpt-3.5-turbo",
|
||||
"deployment": "azure/chatgpt-v-2",
|
||||
"deployment": "azure/chatgpt-v-3",
|
||||
"model_info": {
|
||||
"id": "4bad40a1eb6bebd1682800f16f44b9f06c52a6703444c99c7f9f32e9de3693b4",
|
||||
"db_model": False,
|
||||
|
@ -99,7 +99,7 @@ def test_spend_logs_payload(model_id: Optional[str]):
|
|||
"error_information": None,
|
||||
"status": "success",
|
||||
"proxy_server_request": "{}",
|
||||
"raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-2', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
|
||||
"raw_request": "\n\nPOST Request Sent from LiteLLM:\ncurl -X POST \\\nhttps://openai-gpt-4-test-v-1.openai.azure.com//openai/ \\\n-H 'Authorization: *****' \\\n-d '{'model': 'chatgpt-v-3', 'messages': [{'role': 'system', 'content': 'you are a helpful assistant.\\n'}, {'role': 'user', 'content': 'bom dia'}], 'stream': False, 'max_tokens': 10, 'user': '116544810872468347480', 'extra_body': {}}'\n",
|
||||
},
|
||||
"model_info": {
|
||||
"id": "4bad40a1eb6bebd1682800f16f44b9f06c52a6703444c99c7f9f32e9de3693b4",
|
||||
|
@ -158,7 +158,7 @@ def test_spend_logs_payload(model_id: Optional[str]):
|
|||
"api_base": "openai-gpt-4-test-v-1.openai.azure.com",
|
||||
"acompletion": True,
|
||||
"complete_input_dict": {
|
||||
"model": "chatgpt-v-2",
|
||||
"model": "chatgpt-v-3",
|
||||
"messages": [
|
||||
{"role": "system", "content": "you are a helpful assistant.\n"},
|
||||
{"role": "user", "content": "bom dia"},
|
||||
|
|
|
@ -25,7 +25,7 @@ config = {
|
|||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.environ["AZURE_API_KEY"],
|
||||
"api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
|
||||
"api_version": "2023-07-01-preview",
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
|
||||
# chat = ChatOpenAI(
|
||||
# openai_api_base="http://0.0.0.0:8000",
|
||||
# model = "azure/chatgpt-v-2",
|
||||
# model = "azure/chatgpt-v-3",
|
||||
# temperature=0.1,
|
||||
# extra_body={
|
||||
# "metadata": {
|
||||
|
|
|
@ -39,7 +39,7 @@ client = openai.AzureOpenAI(
|
|||
)
|
||||
try:
|
||||
response = client.chat.completions.create(
|
||||
model="chatgpt-v-2",
|
||||
model="chatgpt-v-3",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
|
|
|
@ -4,7 +4,7 @@ client = openai.OpenAI(api_key="hi", base_url="http://0.0.0.0:8000")
|
|||
|
||||
# # request sent to model set on litellm proxy, `litellm --model`
|
||||
response = client.chat.completions.create(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=[
|
||||
{"role": "user", "content": "this is a test request, write a short poem"}
|
||||
],
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
model_list:
|
||||
- model_name: gpt-4-team1
|
||||
litellm_params:
|
||||
model: azure/chatgpt-v-2
|
||||
model: azure/chatgpt-v-3
|
||||
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
|
||||
api_version: "2023-05-15"
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
|
|
|
@ -5,12 +5,12 @@ model_list:
|
|||
model: gpt-3.5-turbo
|
||||
- model_name: working-azure-gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: azure/chatgpt-v-2
|
||||
model: azure/chatgpt-v-3
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
- model_name: azure-gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: azure/chatgpt-v-2
|
||||
model: azure/chatgpt-v-3
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
api_key: bad-key
|
||||
- model_name: azure-embedding
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
model_list:
|
||||
- model_name: azure-cloudflare
|
||||
litellm_params:
|
||||
model: azure/chatgpt-v-2
|
||||
model: azure/chatgpt-v-3
|
||||
api_base: https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_version: 2023-07-01-preview
|
||||
|
|
|
@ -12,7 +12,7 @@ model_list:
|
|||
- litellm_params:
|
||||
api_base: https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
model: azure/chatgpt-v-2
|
||||
model: azure/chatgpt-v-3
|
||||
model_name: azure-cloudflare-model
|
||||
- litellm_params:
|
||||
api_base: https://openai-france-1234.openai.azure.com
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
model_list:
|
||||
- model_name: Azure OpenAI GPT-4 Canada
|
||||
litellm_params:
|
||||
model: azure/chatgpt-v-2
|
||||
model: azure/chatgpt-v-3
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_version: "2023-07-01-preview"
|
||||
|
|
|
@ -1546,7 +1546,7 @@ def test_call_with_key_over_budget(prisma_client):
|
|||
)
|
||||
await proxy_db_logger._PROXY_track_cost_callback(
|
||||
kwargs={
|
||||
"model": "chatgpt-v-2",
|
||||
"model": "chatgpt-v-3",
|
||||
"stream": False,
|
||||
"litellm_params": {
|
||||
"metadata": {
|
||||
|
@ -1578,7 +1578,7 @@ def test_call_with_key_over_budget(prisma_client):
|
|||
|
||||
assert spend_log.request_id == request_id
|
||||
assert spend_log.spend == float("2e-05")
|
||||
assert spend_log.model == "chatgpt-v-2"
|
||||
assert spend_log.model == "chatgpt-v-3"
|
||||
assert (
|
||||
spend_log.cache_key
|
||||
== "c891d64397a472e6deb31b87a5ac4d3ed5b2dcc069bc87e2afe91e6d64e95a1e"
|
||||
|
@ -1669,7 +1669,7 @@ def test_call_with_key_over_budget_no_cache(prisma_client):
|
|||
proxy_db_logger = _ProxyDBLogger()
|
||||
await proxy_db_logger._PROXY_track_cost_callback(
|
||||
kwargs={
|
||||
"model": "chatgpt-v-2",
|
||||
"model": "chatgpt-v-3",
|
||||
"stream": False,
|
||||
"litellm_params": {
|
||||
"metadata": {
|
||||
|
@ -1702,7 +1702,7 @@ def test_call_with_key_over_budget_no_cache(prisma_client):
|
|||
|
||||
assert spend_log.request_id == request_id
|
||||
assert spend_log.spend == float("2e-05")
|
||||
assert spend_log.model == "chatgpt-v-2"
|
||||
assert spend_log.model == "chatgpt-v-3"
|
||||
assert (
|
||||
spend_log.cache_key
|
||||
== "c891d64397a472e6deb31b87a5ac4d3ed5b2dcc069bc87e2afe91e6d64e95a1e"
|
||||
|
@ -1757,7 +1757,7 @@ async def test_call_with_key_over_model_budget(
|
|||
|
||||
try:
|
||||
|
||||
# set budget for chatgpt-v-2 to 0.000001, expect the next request to fail
|
||||
# set budget for chatgpt-v-3 to 0.000001, expect the next request to fail
|
||||
model_max_budget = {
|
||||
"gpt-4o-mini": {
|
||||
"budget_limit": "0.000001",
|
||||
|
@ -1898,7 +1898,7 @@ async def test_call_with_key_never_over_budget(prisma_client):
|
|||
)
|
||||
await proxy_db_logger._PROXY_track_cost_callback(
|
||||
kwargs={
|
||||
"model": "chatgpt-v-2",
|
||||
"model": "chatgpt-v-3",
|
||||
"stream": False,
|
||||
"litellm_params": {
|
||||
"metadata": {
|
||||
|
@ -1987,7 +1987,7 @@ async def test_call_with_key_over_budget_stream(prisma_client):
|
|||
await proxy_db_logger._PROXY_track_cost_callback(
|
||||
kwargs={
|
||||
"call_type": "acompletion",
|
||||
"model": "sagemaker-chatgpt-v-2",
|
||||
"model": "sagemaker-chatgpt-v-3",
|
||||
"stream": True,
|
||||
"complete_streaming_response": resp,
|
||||
"litellm_params": {
|
||||
|
@ -2431,7 +2431,7 @@ async def track_cost_callback_helper_fn(generated_key: str, user_id: str):
|
|||
await proxy_db_logger._PROXY_track_cost_callback(
|
||||
kwargs={
|
||||
"call_type": "acompletion",
|
||||
"model": "sagemaker-chatgpt-v-2",
|
||||
"model": "sagemaker-chatgpt-v-3",
|
||||
"stream": True,
|
||||
"complete_streaming_response": resp,
|
||||
"litellm_params": {
|
||||
|
|
|
@ -164,7 +164,7 @@ def test_chat_completion(client):
|
|||
my_custom_logger.async_success == True
|
||||
) # checks if the status of async_success is True, only the async_log_success_event can set this to true
|
||||
assert (
|
||||
my_custom_logger.async_completion_kwargs["model"] == "chatgpt-v-2"
|
||||
my_custom_logger.async_completion_kwargs["model"] == "chatgpt-v-3"
|
||||
) # checks if kwargs passed to async_log_success_event are correct
|
||||
print(
|
||||
"\n\n Custom Logger Async Completion args",
|
||||
|
|
|
@ -64,7 +64,7 @@ def test_chat_completion(client_no_auth):
|
|||
ModelConfig(
|
||||
model_name="user-azure-instance",
|
||||
litellm_params=CompletionRequest(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
api_key=os.getenv("AZURE_API_KEY"),
|
||||
api_version=os.getenv("AZURE_API_VERSION"),
|
||||
api_base=os.getenv("AZURE_API_BASE"),
|
||||
|
|
|
@ -446,7 +446,7 @@ def test_chat_completion_azure(mock_acompletion, client_no_auth):
|
|||
try:
|
||||
# Your test data
|
||||
test_data = {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"messages": [
|
||||
{"role": "user", "content": "write 1 sentence poem"},
|
||||
],
|
||||
|
@ -457,7 +457,7 @@ def test_chat_completion_azure(mock_acompletion, client_no_auth):
|
|||
response = client_no_auth.post("/v1/chat/completions", json=test_data)
|
||||
|
||||
mock_acompletion.assert_called_once_with(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=[
|
||||
{"role": "user", "content": "write 1 sentence poem"},
|
||||
],
|
||||
|
@ -489,19 +489,19 @@ def test_openai_deployments_model_chat_completions_azure(
|
|||
try:
|
||||
# Your test data
|
||||
test_data = {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"messages": [
|
||||
{"role": "user", "content": "write 1 sentence poem"},
|
||||
],
|
||||
"max_tokens": 10,
|
||||
}
|
||||
|
||||
url = "/openai/deployments/azure/chatgpt-v-2/chat/completions"
|
||||
url = "/openai/deployments/azure/chatgpt-v-3/chat/completions"
|
||||
print(f"testing proxy server with Azure Request {url}")
|
||||
response = client_no_auth.post(url, json=test_data)
|
||||
|
||||
mock_acompletion.assert_called_once_with(
|
||||
model="azure/chatgpt-v-2",
|
||||
model="azure/chatgpt-v-3",
|
||||
messages=[
|
||||
{"role": "user", "content": "write 1 sentence poem"},
|
||||
],
|
||||
|
@ -1314,7 +1314,7 @@ async def test_add_callback_via_key(prisma_client):
|
|||
try:
|
||||
# Your test data
|
||||
test_data = {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"messages": [
|
||||
{"role": "user", "content": "write 1 sentence poem"},
|
||||
],
|
||||
|
@ -1408,7 +1408,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils(
|
|||
request._url = URL(url="/chat/completions")
|
||||
|
||||
test_data = {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"messages": [
|
||||
{"role": "user", "content": "write 1 sentence poem"},
|
||||
],
|
||||
|
@ -1423,7 +1423,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils(
|
|||
|
||||
data = {
|
||||
"data": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"messages": [{"role": "user", "content": "write 1 sentence poem"}],
|
||||
"max_tokens": 10,
|
||||
"mock_response": "Hello world",
|
||||
|
@ -1523,7 +1523,7 @@ async def test_disable_fallbacks_by_key(disable_fallbacks_set):
|
|||
|
||||
key_metadata = {"disable_fallbacks": disable_fallbacks_set}
|
||||
existing_data = {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"messages": [{"role": "user", "content": "write 1 sentence poem"}],
|
||||
}
|
||||
data = LiteLLMProxyRequestSetup.add_key_level_controls(
|
||||
|
@ -1564,7 +1564,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils_gcs_bucket(
|
|||
request._url = URL(url="/chat/completions")
|
||||
|
||||
test_data = {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"messages": [
|
||||
{"role": "user", "content": "write 1 sentence poem"},
|
||||
],
|
||||
|
@ -1579,7 +1579,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils_gcs_bucket(
|
|||
|
||||
data = {
|
||||
"data": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"messages": [{"role": "user", "content": "write 1 sentence poem"}],
|
||||
"max_tokens": 10,
|
||||
"mock_response": "Hello world",
|
||||
|
@ -1697,7 +1697,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils_langsmith(
|
|||
request._url = URL(url="/chat/completions")
|
||||
|
||||
test_data = {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"messages": [
|
||||
{"role": "user", "content": "write 1 sentence poem"},
|
||||
],
|
||||
|
@ -1712,7 +1712,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils_langsmith(
|
|||
|
||||
data = {
|
||||
"data": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"messages": [{"role": "user", "content": "write 1 sentence poem"}],
|
||||
"max_tokens": 10,
|
||||
"mock_response": "Hello world",
|
||||
|
|
|
@ -171,7 +171,7 @@
|
|||
# model_data = {
|
||||
# "model_name": "azure-model",
|
||||
# "litellm_params": {
|
||||
# "model": "azure/chatgpt-v-2",
|
||||
# "model": "azure/chatgpt-v-3",
|
||||
# "api_key": os.getenv("AZURE_API_KEY"),
|
||||
# "api_base": os.getenv("AZURE_API_BASE"),
|
||||
# "api_version": os.getenv("AZURE_API_VERSION")
|
||||
|
|
|
@ -67,7 +67,7 @@ async def add_models(session, model_id="123", model_name="azure-gpt-3.5", key="s
|
|||
data = {
|
||||
"model_name": model_name,
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": "os.environ/AZURE_API_KEY",
|
||||
"api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
|
||||
"api_version": "2023-05-15",
|
||||
|
@ -100,7 +100,7 @@ async def update_model(session, model_id="123", model_name="azure-gpt-3.5", key=
|
|||
data = {
|
||||
"model_name": model_name,
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": "os.environ/AZURE_API_KEY",
|
||||
"api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
|
||||
"api_version": "2023-05-15",
|
||||
|
@ -292,7 +292,7 @@ async def add_model_for_health_checking(session, model_id="123"):
|
|||
data = {
|
||||
"model_name": f"azure-model-health-check-{model_id}",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"model": "azure/chatgpt-v-3",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
|
||||
"api_version": "2023-05-15",
|
||||
|
@ -417,7 +417,7 @@ async def test_add_model_run_health():
|
|||
|
||||
assert _health_info["healthy_count"] == 1
|
||||
assert (
|
||||
_healthy_endpooint["model"] == "azure/chatgpt-v-2"
|
||||
_healthy_endpooint["model"] == "azure/chatgpt-v-3"
|
||||
) # this is the model that got added
|
||||
|
||||
# assert httpx client is is unchanges
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue