forked from phoenix/litellm-mirror
docs(routing.md): update routing docs
This commit is contained in:
parent
fd3895878d
commit
826f56a6a0
3 changed files with 15 additions and 11 deletions
|
@ -247,16 +247,24 @@ print(f"response: {response}")
|
||||||
|
|
||||||
If you want a server to just route requests to different LLM APIs, use our [OpenAI Proxy Server](./simple_proxy.md#multiple-instances-of-1-model)
|
If you want a server to just route requests to different LLM APIs, use our [OpenAI Proxy Server](./simple_proxy.md#multiple-instances-of-1-model)
|
||||||
|
|
||||||
## Queuing
|
## Queuing (Beta)
|
||||||
|
|
||||||
### Quick Start
|
|
||||||
|
|
||||||
This requires a [Redis DB](https://redis.com/) to work.
|
This requires a [Redis DB](https://redis.com/) to work.
|
||||||
|
|
||||||
Our implementation uses LiteLLM's proxy server + Celery workers to process up to 100 req./s
|
Our implementation uses LiteLLM's proxy server + Celery workers to process up to 100 req./s
|
||||||
|
|
||||||
|
:::info
|
||||||
|
|
||||||
|
This is pretty new, and might have bugs. Any contributions to improving our implementation are welcome
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
|
||||||
[**See Code**](https://github.com/BerriAI/litellm/blob/fbf9cab5b9e35df524e2c9953180c58d92e4cd97/litellm/proxy/proxy_server.py#L589)
|
[**See Code**](https://github.com/BerriAI/litellm/blob/fbf9cab5b9e35df524e2c9953180c58d92e4cd97/litellm/proxy/proxy_server.py#L589)
|
||||||
|
|
||||||
|
|
||||||
|
### Quick Start
|
||||||
|
|
||||||
1. Add Redis credentials in a .env file
|
1. Add Redis credentials in a .env file
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
|
|
@ -139,11 +139,6 @@ def run_server(host, port, api_base, api_version, model, alias, add_key, headers
|
||||||
status = polling_response["status"]
|
status = polling_response["status"]
|
||||||
if status == "finished":
|
if status == "finished":
|
||||||
llm_response = polling_response["result"]
|
llm_response = polling_response["result"]
|
||||||
with open("response_log.txt", "a") as log_file:
|
|
||||||
log_file.write(
|
|
||||||
f"Response ID: {llm_response.get('id', 'NA')}\nLLM Response: {llm_response}\nTime: {end_time - start_time:.2f} seconds\n\n"
|
|
||||||
)
|
|
||||||
|
|
||||||
break
|
break
|
||||||
print(f"POLLING JOB{polling_url}\nSTATUS: {status}, \n Response {polling_response}")
|
print(f"POLLING JOB{polling_url}\nSTATUS: {status}, \n Response {polling_response}")
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
|
@ -156,7 +151,7 @@ def run_server(host, port, api_base, api_version, model, alias, add_key, headers
|
||||||
|
|
||||||
# List to store the futures of concurrent calls
|
# List to store the futures of concurrent calls
|
||||||
futures = []
|
futures = []
|
||||||
|
start_time = time.time()
|
||||||
# Make concurrent calls
|
# Make concurrent calls
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=concurrent_calls) as executor:
|
with concurrent.futures.ThreadPoolExecutor(max_workers=concurrent_calls) as executor:
|
||||||
for _ in range(concurrent_calls):
|
for _ in range(concurrent_calls):
|
||||||
|
@ -175,7 +170,8 @@ def run_server(host, port, api_base, api_version, model, alias, add_key, headers
|
||||||
successful_calls += 1
|
successful_calls += 1
|
||||||
else:
|
else:
|
||||||
failed_calls += 1
|
failed_calls += 1
|
||||||
|
end_time = time.time()
|
||||||
|
print(f"Elapsed Time: {end_time-start_time}")
|
||||||
print(f"Load test Summary:")
|
print(f"Load test Summary:")
|
||||||
print(f"Total Requests: {concurrent_calls}")
|
print(f"Total Requests: {concurrent_calls}")
|
||||||
print(f"Successful Calls: {successful_calls}")
|
print(f"Successful Calls: {successful_calls}")
|
||||||
|
|
|
@ -618,7 +618,7 @@ async def async_queue_response(request: Request, task_id: str):
|
||||||
try:
|
try:
|
||||||
job = async_result(task_id, app=celery_app_conn)
|
job = async_result(task_id, app=celery_app_conn)
|
||||||
if job.ready():
|
if job.ready():
|
||||||
return job.result
|
return {"status": "finished", "result": job.result}
|
||||||
else:
|
else:
|
||||||
return {'status': 'queued'}
|
return {'status': 'queued'}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue