mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
feat(router.py): Support Loadbalancing batch azure api endpoints (#5469)
* feat(router.py): initial commit for loadbalancing azure batch api endpoints Closes https://github.com/BerriAI/litellm/issues/5396 * fix(router.py): working `router.acreate_file()` * feat(router.py): working router.acreate_batch endpoint * feat(router.py): expose router.aretrieve_batch function Make it easy for user to retrieve the batch information * feat(router.py): support 'router.alist_batches' endpoint Adds support for getting all batches across all endpoints * feat(router.py): working loadbalancing on `/v1/files` * feat(proxy_server.py): working loadbalancing on `/v1/batches` * feat(proxy_server.py): working loadbalancing on Retrieve + List batch
This commit is contained in:
parent
7a22faaba4
commit
9f3fa29624
10 changed files with 667 additions and 37 deletions
59
litellm/router_utils/batch_utils.py
Normal file
59
litellm/router_utils/batch_utils.py
Normal file
|
@ -0,0 +1,59 @@
|
|||
import io
|
||||
import json
|
||||
from typing import IO, Optional, Tuple, Union
|
||||
|
||||
|
||||
class InMemoryFile(io.BytesIO):
|
||||
def __init__(self, content: bytes, name: str):
|
||||
super().__init__(content)
|
||||
self.name = name
|
||||
|
||||
|
||||
def replace_model_in_jsonl(
|
||||
file_content: Union[bytes, IO, Tuple[str, bytes, str]], new_model_name: str
|
||||
) -> Optional[InMemoryFile]:
|
||||
try:
|
||||
# Decode the bytes to a string and split into lines
|
||||
# If file_content is a file-like object, read the bytes
|
||||
if hasattr(file_content, "read"):
|
||||
file_content_bytes = file_content.read() # type: ignore
|
||||
elif isinstance(file_content, tuple):
|
||||
file_content_bytes = file_content[1]
|
||||
else:
|
||||
file_content_bytes = file_content
|
||||
|
||||
# Decode the bytes to a string and split into lines
|
||||
file_content_str = file_content_bytes.decode("utf-8")
|
||||
lines = file_content_str.splitlines()
|
||||
modified_lines = []
|
||||
for line in lines:
|
||||
# Parse each line as a JSON object
|
||||
json_object = json.loads(line.strip())
|
||||
|
||||
# Replace the model name if it exists
|
||||
if "body" in json_object:
|
||||
json_object["body"]["model"] = new_model_name
|
||||
|
||||
# Convert the modified JSON object back to a string
|
||||
modified_lines.append(json.dumps(json_object))
|
||||
|
||||
# Reassemble the modified lines and return as bytes
|
||||
modified_file_content = "\n".join(modified_lines).encode("utf-8")
|
||||
return InMemoryFile(modified_file_content, name="modified_file.jsonl") # type: ignore
|
||||
|
||||
except (json.JSONDecodeError, UnicodeDecodeError, TypeError) as e:
|
||||
return None
|
||||
|
||||
|
||||
def _get_router_metadata_variable_name(function_name) -> str:
|
||||
"""
|
||||
Helper to return what the "metadata" field should be called in the request data
|
||||
|
||||
For all /thread or /assistant endpoints we need to call this "litellm_metadata"
|
||||
|
||||
For ALL other endpoints we call this "metadata
|
||||
"""
|
||||
if "batch" in function_name:
|
||||
return "litellm_metadata"
|
||||
else:
|
||||
return "metadata"
|
Loading…
Add table
Add a link
Reference in a new issue