forked from phoenix/litellm-mirror
coverage for custom api scenario in load testing
This commit is contained in:
parent
303ee905e6
commit
80d77fed71
8 changed files with 8 additions and 7 deletions
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -36,7 +36,7 @@ async def acompletion(*args, **kwargs):
|
||||||
|
|
||||||
@client
|
@client
|
||||||
# @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(2), reraise=True, retry_error_callback=lambda retry_state: setattr(retry_state.outcome, 'retry_variable', litellm.retry)) # retry call, turn this off by setting `litellm.retry = False`
|
# @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(2), reraise=True, retry_error_callback=lambda retry_state: setattr(retry_state.outcome, 'retry_variable', litellm.retry)) # retry call, turn this off by setting `litellm.retry = False`
|
||||||
@timeout(60) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout`
|
@timeout(600) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout`
|
||||||
def completion(
|
def completion(
|
||||||
messages, model="gpt-3.5-turbo",# required params
|
messages, model="gpt-3.5-turbo",# required params
|
||||||
# Optional OpenAI params: see https://platform.openai.com/docs/api-reference/chat/create
|
# Optional OpenAI params: see https://platform.openai.com/docs/api-reference/chat/create
|
||||||
|
@ -44,7 +44,7 @@ def completion(
|
||||||
temperature=1, top_p=1, n=1, stream=False, stop=None, max_tokens=float('inf'),
|
temperature=1, top_p=1, n=1, stream=False, stop=None, max_tokens=float('inf'),
|
||||||
presence_penalty=0, frequency_penalty=0, logit_bias={}, user="", deployment_id=None,
|
presence_penalty=0, frequency_penalty=0, logit_bias={}, user="", deployment_id=None,
|
||||||
# Optional liteLLM function params
|
# Optional liteLLM function params
|
||||||
*, return_async=False, api_key=None, force_timeout=60, azure=False, logger_fn=None, verbose=False,
|
*, return_async=False, api_key=None, force_timeout=600, azure=False, logger_fn=None, verbose=False,
|
||||||
hugging_face = False, replicate=False,together_ai = False, custom_llm_provider=None, custom_api_base=None
|
hugging_face = False, replicate=False,together_ai = False, custom_llm_provider=None, custom_api_base=None
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -4,5 +4,6 @@ sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import load_test_model
|
from litellm import load_test_model
|
||||||
|
|
||||||
result = load_test_model(model="gpt-3.5-turbo", num_calls=5)
|
model="gpt-3.5-turbo"
|
||||||
|
result = load_test_model(model=model, num_calls=5)
|
||||||
print(result)
|
print(result)
|
|
@ -38,7 +38,7 @@ def timeout(
|
||||||
thread.start()
|
thread.start()
|
||||||
future = asyncio.run_coroutine_threadsafe(async_func(), thread.loop)
|
future = asyncio.run_coroutine_threadsafe(async_func(), thread.loop)
|
||||||
local_timeout_duration = timeout_duration
|
local_timeout_duration = timeout_duration
|
||||||
if "force_timeout" in kwargs:
|
if "force_timeout" in kwargs and kwargs["force_timeout"] is not None:
|
||||||
local_timeout_duration = kwargs["force_timeout"]
|
local_timeout_duration = kwargs["force_timeout"]
|
||||||
try:
|
try:
|
||||||
result = future.result(timeout=local_timeout_duration)
|
result = future.result(timeout=local_timeout_duration)
|
||||||
|
|
|
@ -302,7 +302,7 @@ def get_optional_params(
|
||||||
return optional_params
|
return optional_params
|
||||||
return optional_params
|
return optional_params
|
||||||
|
|
||||||
def load_test_model(model: str, custom_llm_provider: str = None, prompt: str = None, num_calls: int = None):
|
def load_test_model(model: str, custom_llm_provider: str = None, custom_api_base: str = None, prompt: str = None, num_calls: int = None, force_timeout: int = None):
|
||||||
test_prompt = "Hey, how's it going"
|
test_prompt = "Hey, how's it going"
|
||||||
test_calls = 100
|
test_calls = 100
|
||||||
if prompt:
|
if prompt:
|
||||||
|
@ -312,7 +312,7 @@ def load_test_model(model: str, custom_llm_provider: str = None, prompt: str = N
|
||||||
messages = [[{"role": "user", "content": test_prompt}] for _ in range(test_calls)]
|
messages = [[{"role": "user", "content": test_prompt}] for _ in range(test_calls)]
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
try:
|
try:
|
||||||
litellm.batch_completion(model=model, messages=messages, custom_llm_provider=custom_llm_provider)
|
litellm.batch_completion(model=model, messages=messages, custom_llm_provider=custom_llm_provider, custom_api_base = custom_api_base, force_timeout=force_timeout)
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
response_time = end_time - start_time
|
response_time = end_time - start_time
|
||||||
return {"total_response_time": response_time, "calls_made": 100, "status": "success", "exception": None}
|
return {"total_response_time": response_time, "calls_made": 100, "status": "success", "exception": None}
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "0.1.382"
|
version = "0.1.383"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT License"
|
license = "MIT License"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue