forked from phoenix/litellm-mirror
* fix(lowest_tpm_rpm_routing.py): fix parallel rate limit check * fix(lowest_tpm_rpm_v2.py): return headers in correct format * test: update test * build(deps): bump cookie and express in /docs/my-website (#6566) Bumps [cookie](https://github.com/jshttp/cookie) and [express](https://github.com/expressjs/express). These dependencies needed to be updated together. Updates `cookie` from 0.6.0 to 0.7.1 - [Release notes](https://github.com/jshttp/cookie/releases) - [Commits](https://github.com/jshttp/cookie/compare/v0.6.0...v0.7.1) Updates `express` from 4.20.0 to 4.21.1 - [Release notes](https://github.com/expressjs/express/releases) - [Changelog](https://github.com/expressjs/express/blob/4.21.1/History.md) - [Commits](https://github.com/expressjs/express/compare/4.20.0...4.21.1) --- updated-dependencies: - dependency-name: cookie dependency-type: indirect - dependency-name: express dependency-type: indirect ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * docs(virtual_keys.md): update Dockerfile reference (#6554) Signed-off-by: Emmanuel Ferdman <emmanuelferdman@gmail.com> * (proxy fix) - call connect on prisma client when running setup (#6534) * critical fix - call connect on prisma client when running setup * fix test_proxy_server_prisma_setup * fix test_proxy_server_prisma_setup * Add 3.5 haiku (#6588) * feat: add claude-3-5-haiku-20241022 entries * feat: add claude-3-5-haiku-20241022 and vertex_ai/claude-3-5-haiku@20241022 models * add missing entries, remove vision * remove image token costs * Litellm perf improvements 3 (#6573) * perf: move writing key to cache, to background task * perf(litellm_pre_call_utils.py): add otel tracing for pre-call utils adds 200ms on calls with pgdb connected * fix(litellm_pre_call_utils.py'): rename call_type to actual call used * perf(proxy_server.py): remove db logic from _get_config_from_file was causing db calls to occur on every llm request, if team_id was set on key * fix(auth_checks.py): add check for reducing db calls if user/team id does not exist in db reduces latency/call by ~100ms * fix(proxy_server.py): minor fix on existing_settings not incl alerting * fix(exception_mapping_utils.py): map databricks exception string * fix(auth_checks.py): fix auth check logic * test: correctly mark flaky test * fix(utils.py): handle auth token error for tokenizers.from_pretrained * build: fix map * build: fix map * build: fix json for model map * test: remove eol model * fix(proxy_server.py): fix db config loading logic * fix(proxy_server.py): fix order of config / db updates, to ensure fields not overwritten * test: skip test if required env var is missing * test: fix test --------- Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: Emmanuel Ferdman <emmanuelferdman@gmail.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Emmanuel Ferdman <emmanuelferdman@gmail.com> Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: paul-gauthier <69695708+paul-gauthier@users.noreply.github.com>
190 lines
5.5 KiB
Python
190 lines
5.5 KiB
Python
# What this tests ?
|
|
## Tests /models and /model/* endpoints
|
|
|
|
import pytest
|
|
import asyncio
|
|
import aiohttp
|
|
import os
|
|
import dotenv
|
|
from dotenv import load_dotenv
|
|
import pytest
|
|
|
|
load_dotenv()
|
|
|
|
|
|
async def generate_key(session, models=[], team_id=None):
|
|
url = "http://0.0.0.0:4000/key/generate"
|
|
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
|
|
data = {
|
|
"models": models,
|
|
"duration": None,
|
|
"team_id": team_id,
|
|
}
|
|
|
|
async with session.post(url, headers=headers, json=data) as response:
|
|
status = response.status
|
|
response_text = await response.text()
|
|
|
|
print(response_text)
|
|
print()
|
|
|
|
if status != 200:
|
|
raise Exception(f"Request did not return a 200 status code: {status}")
|
|
return await response.json()
|
|
|
|
|
|
async def chat_completion(session, key, model="azure-gpt-3.5", request_metadata=None):
|
|
url = "http://0.0.0.0:4000/chat/completions"
|
|
headers = {
|
|
"Authorization": f"Bearer {key}",
|
|
"Content-Type": "application/json",
|
|
}
|
|
data = {
|
|
"model": model,
|
|
"messages": [
|
|
{"role": "system", "content": "You are a helpful assistant."},
|
|
{"role": "user", "content": "Hello!"},
|
|
],
|
|
"metadata": request_metadata,
|
|
}
|
|
|
|
print("data sent in test=", data)
|
|
|
|
async with session.post(url, headers=headers, json=data) as response:
|
|
status = response.status
|
|
response_text = await response.text()
|
|
|
|
print(response_text)
|
|
print()
|
|
|
|
if status != 200:
|
|
raise Exception(f"Request did not return a 200 status code: {status}")
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.flaky(retries=6, delay=1)
|
|
async def test_team_logging():
|
|
"""
|
|
-> Team 1 logs to project 1
|
|
-> Create Key
|
|
-> Make chat/completions call
|
|
-> Fetch logs from langfuse
|
|
"""
|
|
try:
|
|
async with aiohttp.ClientSession() as session:
|
|
|
|
key = await generate_key(
|
|
session, models=["fake-openai-endpoint"], team_id="team-1"
|
|
) # team-1 logs to project 1
|
|
|
|
import uuid
|
|
|
|
_trace_id = f"trace-{uuid.uuid4()}"
|
|
_request_metadata = {
|
|
"trace_id": _trace_id,
|
|
}
|
|
|
|
await chat_completion(
|
|
session,
|
|
key["key"],
|
|
model="fake-openai-endpoint",
|
|
request_metadata=_request_metadata,
|
|
)
|
|
|
|
# Test - if the logs were sent to the correct team on langfuse
|
|
import langfuse
|
|
|
|
langfuse_client = langfuse.Langfuse(
|
|
public_key=os.getenv("LANGFUSE_PROJECT1_PUBLIC"),
|
|
secret_key=os.getenv("LANGFUSE_PROJECT1_SECRET"),
|
|
)
|
|
|
|
await asyncio.sleep(10)
|
|
|
|
print(f"searching for trace_id={_trace_id} on langfuse")
|
|
|
|
generations = langfuse_client.get_generations(trace_id=_trace_id).data
|
|
print(generations)
|
|
assert len(generations) == 1
|
|
except Exception as e:
|
|
pytest.fail(f"Unexpected error: {str(e)}")
|
|
|
|
|
|
@pytest.mark.skip(reason="todo fix langfuse credential error")
|
|
@pytest.mark.asyncio
|
|
async def test_team_2logging():
|
|
"""
|
|
-> Team 1 logs to project 2
|
|
-> Create Key
|
|
-> Make chat/completions call
|
|
-> Fetch logs from langfuse
|
|
"""
|
|
langfuse_public_key = os.getenv("LANGFUSE_PROJECT2_PUBLIC")
|
|
|
|
print(f"langfuse_public_key: {langfuse_public_key}")
|
|
langfuse_secret_key = os.getenv("LANGFUSE_PROJECT2_SECRET")
|
|
print(f"langfuse_secret_key: {langfuse_secret_key}")
|
|
langfuse_host = "https://us.cloud.langfuse.com"
|
|
|
|
try:
|
|
assert langfuse_public_key is not None
|
|
assert langfuse_secret_key is not None
|
|
except Exception as e:
|
|
# skip test if langfuse credentials are not set
|
|
return
|
|
|
|
try:
|
|
async with aiohttp.ClientSession() as session:
|
|
|
|
key = await generate_key(
|
|
session, models=["fake-openai-endpoint"], team_id="team-2"
|
|
) # team-1 logs to project 1
|
|
|
|
import uuid
|
|
|
|
_trace_id = f"trace-{uuid.uuid4()}"
|
|
_request_metadata = {
|
|
"trace_id": _trace_id,
|
|
}
|
|
|
|
await chat_completion(
|
|
session,
|
|
key["key"],
|
|
model="fake-openai-endpoint",
|
|
request_metadata=_request_metadata,
|
|
)
|
|
|
|
# Test - if the logs were sent to the correct team on langfuse
|
|
import langfuse
|
|
|
|
langfuse_client = langfuse.Langfuse(
|
|
public_key=langfuse_public_key,
|
|
secret_key=langfuse_secret_key,
|
|
host=langfuse_host,
|
|
)
|
|
|
|
await asyncio.sleep(10)
|
|
|
|
print(f"searching for trace_id={_trace_id} on langfuse")
|
|
|
|
generations = langfuse_client.get_generations(trace_id=_trace_id).data
|
|
print("Team 2 generations", generations)
|
|
|
|
# team-2 should have 1 generation with this trace id
|
|
assert len(generations) == 1
|
|
|
|
# team-1 should have 0 generations with this trace id
|
|
langfuse_client_1 = langfuse.Langfuse(
|
|
public_key=os.getenv("LANGFUSE_PROJECT1_PUBLIC"),
|
|
secret_key=os.getenv("LANGFUSE_PROJECT1_SECRET"),
|
|
)
|
|
|
|
generations_team_1 = langfuse_client_1.get_generations(
|
|
trace_id=_trace_id
|
|
).data
|
|
print("Team 1 generations", generations_team_1)
|
|
|
|
assert len(generations_team_1) == 0
|
|
|
|
except Exception as e:
|
|
pytest.fail("Team 2 logging failed: " + str(e))
|