test - wildcard openai models on proxy

2024-04-15 14:05:26 -07:00 · 2024-04-15 14:05:26 -07:00 · 6df5337e65
commit 6df5337e65
parent 020fb54ea7
3 changed files with 34 additions and 7 deletions
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -4,6 +4,10 @@ model_list:
      model: openai/fake
      api_key: fake-key
      api_base: https://exampleopenaiendpoint-production.up.railway.app/
  - model_name: "*"
    litellm_params:
      model: openai/*
      api_key: os.environ/OPENAI_API_KEY
 general_settings:
  store_model_in_db: true
  master_key: sk-1234
--- a/proxy_server_config.yaml
+++ b/proxy_server_config.yaml
@ -55,6 +55,10 @@ model_list:
      api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
      stream_timeout: 0.001
      rpm: 1
  - model_name: "*"
    litellm_params:
      model: openai/*
      api_key: os.environ/OPENAI_API_KEY
  - model_name: gpt-instruct # [PROD TEST] - tests if `/health` automatically infers this to be a text completion model
    litellm_params:
      model: text-completion-openai/gpt-3.5-turbo-instruct
--- a/tests/test_openai_endpoints.py
+++ b/tests/test_openai_endpoints.py
@ -14,16 +14,19 @@ def response_header_check(response):
    assert headers_size < 4096, "Response headers exceed the 4kb limit"
-async def generate_key(session):
+async def generate_key(
-    url = "http://0.0.0.0:4000/key/generate"
+    session,
-    headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
+    models=[
    data = {
        "models": [
        "gpt-4",
        "text-embedding-ada-002",
        "dall-e-2",
        "fake-openai-endpoint-2",
    ],
 ):
    url = "http://0.0.0.0:4000/key/generate"
    headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
    data = {
        "models": models,
        "duration": None,
    }
@ -294,3 +297,19 @@ async def test_image_generation():
        key_gen = await new_user(session=session)
        key_2 = key_gen["key"]
        await image_generation(session=session, key=key_2)
@pytest.mark.asyncio
 async def test_openai_wildcard_chat_completion():
    """
    - Create key for model = "*" -> this has access to all models
    - proxy_server_config.yaml has model = *
    - Make chat completion call
    """
    async with aiohttp.ClientSession() as session:
        key_gen = await generate_key(session=session, models=["*"])
        key = key_gen["key"]
        # call chat/completions with a model that the key was not created for + the model is not on the config.yaml
        await chat_completion(session=session, key=key, model="gpt-3.5-turbo-0125")