forked from phoenix/litellm-mirror
* fix(pattern_match_deployments.py): default to user input if unable to map based on wildcards * test: fix test * test: reset test name * test: update conftest to reload proxy server module between tests * ci(config.yml): move langfuse out of local_testing reduce ci/cd time * ci(config.yml): cleanup langfuse ci/cd tests * fix: update test to not use global proxy_server app module * ci: move caching to a separate test pipeline speed up ci pipeline * test: update conftest to check if proxy_server attr exists before reloading * build(conftest.py): don't block on inability to reload proxy_server * ci(config.yml): update caching unit test filter to work on 'cache' keyword as well * fix(encrypt_decrypt_utils.py): use function to get salt key * test: mark flaky test * test: handle anthropic overloaded errors * refactor: create separate ci/cd pipeline for proxy unit tests make ci/cd faster * ci(config.yml): add litellm_proxy_unit_testing to build_and_test jobs * ci(config.yml): generate prisma binaries for proxy unit tests * test: readd vertex_key.json * ci(config.yml): remove `-s` from proxy_unit_test cmd speed up test * ci: remove any 'debug' logging flag speed up ci pipeline * test: fix test * test(test_braintrust.py): rerun * test: add delay for braintrust test
82 lines
2.4 KiB
Python
82 lines
2.4 KiB
Python
# import openai, json, time, asyncio
|
|
# client = openai.AsyncOpenAI(
|
|
# api_key="sk-1234",
|
|
# base_url="http://0.0.0.0:8000"
|
|
# )
|
|
|
|
# super_fake_messages = [
|
|
# {
|
|
# "role": "user",
|
|
# "content": f"What's the weather like in San Francisco, Tokyo, and Paris? {time.time()}"
|
|
# },
|
|
# {
|
|
# "content": None,
|
|
# "role": "assistant",
|
|
# "tool_calls": [
|
|
# {
|
|
# "id": "1",
|
|
# "function": {
|
|
# "arguments": "{\"location\": \"San Francisco\", \"unit\": \"celsius\"}",
|
|
# "name": "get_current_weather"
|
|
# },
|
|
# "type": "function"
|
|
# },
|
|
# {
|
|
# "id": "2",
|
|
# "function": {
|
|
# "arguments": "{\"location\": \"Tokyo\", \"unit\": \"celsius\"}",
|
|
# "name": "get_current_weather"
|
|
# },
|
|
# "type": "function"
|
|
# },
|
|
# {
|
|
# "id": "3",
|
|
# "function": {
|
|
# "arguments": "{\"location\": \"Paris\", \"unit\": \"celsius\"}",
|
|
# "name": "get_current_weather"
|
|
# },
|
|
# "type": "function"
|
|
# }
|
|
# ]
|
|
# },
|
|
# {
|
|
# "tool_call_id": "1",
|
|
# "role": "tool",
|
|
# "name": "get_current_weather",
|
|
# "content": "{\"location\": \"San Francisco\", \"temperature\": \"90\", \"unit\": \"celsius\"}"
|
|
# },
|
|
# {
|
|
# "tool_call_id": "2",
|
|
# "role": "tool",
|
|
# "name": "get_current_weather",
|
|
# "content": "{\"location\": \"Tokyo\", \"temperature\": \"30\", \"unit\": \"celsius\"}"
|
|
# },
|
|
# {
|
|
# "tool_call_id": "3",
|
|
# "role": "tool",
|
|
# "name": "get_current_weather",
|
|
# "content": "{\"location\": \"Paris\", \"temperature\": \"50\", \"unit\": \"celsius\"}"
|
|
# }
|
|
# ]
|
|
|
|
# async def chat_completions():
|
|
# super_fake_response = await client.chat.completions.create(
|
|
# model="gpt-3.5-turbo",
|
|
# messages=super_fake_messages,
|
|
# seed=1337,
|
|
# stream=False
|
|
# ) # get a new response from the model where it can see the function response
|
|
# await asyncio.sleep(1)
|
|
# return super_fake_response
|
|
|
|
# async def loadtest_fn(n = 1):
|
|
# global num_task_cancelled_errors, exception_counts, chat_completions
|
|
# start = time.time()
|
|
# tasks = [chat_completions() for _ in range(n)]
|
|
# chat_completions = await asyncio.gather(*tasks)
|
|
# successful_completions = [c for c in chat_completions if c is not None]
|
|
# print(n, time.time() - start, len(successful_completions))
|
|
|
|
# # print(json.dumps(super_fake_response.model_dump(), indent=4))
|
|
|
|
# asyncio.run(loadtest_fn())
|