mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 03:34:10 +00:00
feat(proxy_server.py): enable infinite retries on rate limited requests
This commit is contained in:
parent
e5268fa6bc
commit
4791dda66f
7 changed files with 255 additions and 186 deletions
|
@ -182,17 +182,11 @@ def ollama_completion_stream(url, data):
|
|||
traceback.print_exc()
|
||||
session.close()
|
||||
|
||||
async def iter_lines(reader):
|
||||
buffer = b""
|
||||
async for chunk in reader.iter_any():
|
||||
buffer += chunk
|
||||
while b'\n' in buffer:
|
||||
line, buffer = buffer.split(b'\n', 1)
|
||||
yield line
|
||||
|
||||
async def ollama_async_streaming(url, data, model_response, encoding, logging_obj):
|
||||
try:
|
||||
with httpx.stream(
|
||||
client = httpx.AsyncClient()
|
||||
async with client.stream(
|
||||
url=f"{url}",
|
||||
json=data,
|
||||
method="POST",
|
||||
|
@ -201,8 +195,8 @@ async def ollama_async_streaming(url, data, model_response, encoding, logging_ob
|
|||
if response.status_code != 200:
|
||||
raise OllamaError(status_code=response.status_code, message=response.text)
|
||||
|
||||
streamwrapper = litellm.CustomStreamWrapper(completion_stream=response.iter_lines(), model=data['model'], custom_llm_provider="ollama",logging_obj=logging_obj)
|
||||
for transformed_chunk in streamwrapper:
|
||||
streamwrapper = litellm.CustomStreamWrapper(completion_stream=response.aiter_lines(), model=data['model'], custom_llm_provider="ollama",logging_obj=logging_obj)
|
||||
async for transformed_chunk in streamwrapper:
|
||||
yield transformed_chunk
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue