forked from phoenix/litellm-mirror
fix(main.py): don't pass stream to petals
This commit is contained in:
parent
9780efca4b
commit
c312ac4ca8
3 changed files with 19 additions and 13 deletions
|
@ -1313,11 +1313,12 @@ def completion(
|
||||||
or model in litellm.petals_models
|
or model in litellm.petals_models
|
||||||
):
|
):
|
||||||
api_base = (
|
api_base = (
|
||||||
litellm.api_base or
|
api_base or
|
||||||
api_base
|
litellm.api_base
|
||||||
)
|
)
|
||||||
|
|
||||||
custom_llm_provider = "petals"
|
custom_llm_provider = "petals"
|
||||||
|
stream = optional_params.pop("stream", False)
|
||||||
model_response = petals.completion(
|
model_response = petals.completion(
|
||||||
model=model,
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
|
|
@ -403,7 +403,7 @@ class Router:
|
||||||
# if the function call is successful, no exception will be raised and we'll break out of the loop
|
# if the function call is successful, no exception will be raised and we'll break out of the loop
|
||||||
response = await original_function(*args, **kwargs)
|
response = await original_function(*args, **kwargs)
|
||||||
return response
|
return response
|
||||||
except (Exception, asyncio.CancelledError) as e:
|
except Exception as e:
|
||||||
original_exception = e
|
original_exception = e
|
||||||
### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR w/ fallbacks available
|
### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR w/ fallbacks available
|
||||||
if ((isinstance(original_exception, litellm.ContextWindowExceededError) and context_window_fallbacks is None)
|
if ((isinstance(original_exception, litellm.ContextWindowExceededError) and context_window_fallbacks is None)
|
||||||
|
@ -411,10 +411,7 @@ class Router:
|
||||||
raise original_exception
|
raise original_exception
|
||||||
### RETRY
|
### RETRY
|
||||||
#### check if it should retry + back-off if required
|
#### check if it should retry + back-off if required
|
||||||
if isinstance(original_exception, asyncio.CancelledError):
|
if hasattr(original_exception, "status_code") and hasattr(original_exception, "response") and litellm._should_retry(status_code=original_exception.status_code):
|
||||||
timeout = 0 # immediately retry
|
|
||||||
await asyncio.sleep(timeout)
|
|
||||||
elif hasattr(original_exception, "status_code") and hasattr(original_exception, "response") and litellm._should_retry(status_code=original_exception.status_code):
|
|
||||||
if hasattr(original_exception.response, "headers"):
|
if hasattr(original_exception.response, "headers"):
|
||||||
timeout = litellm._calculate_retry_after(remaining_retries=num_retries, max_retries=num_retries, response_headers=original_exception.response.headers)
|
timeout = litellm._calculate_retry_after(remaining_retries=num_retries, max_retries=num_retries, response_headers=original_exception.response.headers)
|
||||||
else:
|
else:
|
||||||
|
@ -432,11 +429,8 @@ class Router:
|
||||||
response = await response
|
response = await response
|
||||||
return response
|
return response
|
||||||
|
|
||||||
except (Exception, asyncio.CancelledError) as e:
|
except Exception as e:
|
||||||
if isinstance(original_exception, asyncio.CancelledError):
|
if hasattr(e, "status_code") and hasattr(e, "response") and litellm._should_retry(status_code=e.status_code):
|
||||||
timeout = 0 # immediately retry
|
|
||||||
await asyncio.sleep(timeout)
|
|
||||||
elif hasattr(e, "status_code") and hasattr(e, "response") and litellm._should_retry(status_code=e.status_code):
|
|
||||||
remaining_retries = num_retries - current_attempt
|
remaining_retries = num_retries - current_attempt
|
||||||
if hasattr(e.response, "headers"):
|
if hasattr(e.response, "headers"):
|
||||||
timeout = litellm._calculate_retry_after(remaining_retries=num_retries, max_retries=num_retries, response_headers=e.response.headers)
|
timeout = litellm._calculate_retry_after(remaining_retries=num_retries, max_retries=num_retries, response_headers=e.response.headers)
|
||||||
|
|
|
@ -631,7 +631,7 @@ def test_completion_bedrock_ai21_stream():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
test_completion_bedrock_ai21_stream()
|
# test_completion_bedrock_ai21_stream()
|
||||||
|
|
||||||
# def test_completion_sagemaker_stream():
|
# def test_completion_sagemaker_stream():
|
||||||
# try:
|
# try:
|
||||||
|
@ -760,6 +760,17 @@ def hf_test_completion_tgi_stream():
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
# hf_test_completion_tgi_stream()
|
# hf_test_completion_tgi_stream()
|
||||||
|
|
||||||
|
def test_petals():
|
||||||
|
print(f"making petals call")
|
||||||
|
response = completion(
|
||||||
|
model="petals/meta-llama/Llama-2-70b-chat-hf",
|
||||||
|
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||||
|
# stream=True,
|
||||||
|
base_url="https://chat.petals.dev/api/v1/generate"
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"response: {response}")
|
||||||
|
test_petals()
|
||||||
# def test_completion_aleph_alpha():
|
# def test_completion_aleph_alpha():
|
||||||
# try:
|
# try:
|
||||||
# response = completion(
|
# response = completion(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue