forked from phoenix/litellm-mirror
fix(proxy_server.py): raise streaming exceptions
This commit is contained in:
parent
0e08a0082b
commit
343a06fd84
4 changed files with 25 additions and 17 deletions
|
@ -213,10 +213,7 @@ async def _async_streaming(response, model, custom_llm_provider, args):
|
||||||
print_verbose(f"line in async streaming: {line}")
|
print_verbose(f"line in async streaming: {line}")
|
||||||
yield line
|
yield line
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print_verbose(f"error raised _async_streaming: {traceback.format_exc()}")
|
raise e
|
||||||
raise exception_type(
|
|
||||||
model=model, custom_llm_provider=custom_llm_provider, original_exception=e, completion_kwargs=args,
|
|
||||||
)
|
|
||||||
|
|
||||||
def mock_completion(model: str, messages: List, stream: Optional[bool] = False, mock_response: str = "This is a mock request", **kwargs):
|
def mock_completion(model: str, messages: List, stream: Optional[bool] = False, mock_response: str = "This is a mock request", **kwargs):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -826,13 +826,15 @@ def data_generator(response):
|
||||||
|
|
||||||
async def async_data_generator(response, user_api_key_dict):
|
async def async_data_generator(response, user_api_key_dict):
|
||||||
print_verbose("inside generator")
|
print_verbose("inside generator")
|
||||||
|
try:
|
||||||
async for chunk in response:
|
async for chunk in response:
|
||||||
print_verbose(f"returned chunk: {chunk}")
|
print_verbose(f"returned chunk: {chunk}")
|
||||||
try:
|
try:
|
||||||
yield f"data: {json.dumps(chunk.dict())}\n\n"
|
yield f"data: {json.dumps(chunk.dict())}\n\n"
|
||||||
except:
|
except Exception as e:
|
||||||
yield f"data: {json.dumps(chunk)}\n\n"
|
yield f"data: {str(e)}\n\n"
|
||||||
|
except Exception as e:
|
||||||
|
yield f"data: {str(e)}\n\n"
|
||||||
def get_litellm_model_info(model: dict = {}):
|
def get_litellm_model_info(model: dict = {}):
|
||||||
model_info = model.get("model_info", {})
|
model_info = model.get("model_info", {})
|
||||||
model_to_lookup = model.get("litellm_params", {}).get("model", None)
|
model_to_lookup = model.get("litellm_params", {}).get("model", None)
|
||||||
|
@ -971,6 +973,7 @@ async def completion(request: Request, model: Optional[str] = None, user_api_key
|
||||||
background_tasks.add_task(log_input_output, request, response) # background task for logging to OTEL
|
background_tasks.add_task(log_input_output, request, response) # background task for logging to OTEL
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
print(f"EXCEPTION RAISED IN PROXY MAIN.PY")
|
||||||
print(f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`")
|
print(f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`")
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
error_traceback = traceback.format_exc()
|
error_traceback = traceback.format_exc()
|
||||||
|
|
|
@ -975,11 +975,12 @@ def test_openai_text_completion_call():
|
||||||
# test_openai_text_completion_call()
|
# test_openai_text_completion_call()
|
||||||
|
|
||||||
# # test on together ai completion call - starcoder
|
# # test on together ai completion call - starcoder
|
||||||
def test_together_ai_completion_call_starcoder():
|
def test_together_ai_completion_call_mistral():
|
||||||
try:
|
try:
|
||||||
|
litellm.set_verbose = False
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
response = completion(
|
response = completion(
|
||||||
model="together_ai/bigcode/starcoder",
|
model="together_ai/mistralai/Mistral-7B-Instruct-v0.2",
|
||||||
messages=messages,
|
messages=messages,
|
||||||
logger_fn=logger_fn,
|
logger_fn=logger_fn,
|
||||||
stream=True,
|
stream=True,
|
||||||
|
@ -1002,7 +1003,7 @@ def test_together_ai_completion_call_starcoder():
|
||||||
print(f"error occurred: {traceback.format_exc()}")
|
print(f"error occurred: {traceback.format_exc()}")
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# test_together_ai_completion_call_starcoder()
|
test_together_ai_completion_call_starcoder()
|
||||||
|
|
||||||
def test_together_ai_completion_call_starcoder_bad_key():
|
def test_together_ai_completion_call_starcoder_bad_key():
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -5146,6 +5146,14 @@ def exception_type(
|
||||||
llm_provider="together_ai",
|
llm_provider="together_ai",
|
||||||
request=original_exception.request
|
request=original_exception.request
|
||||||
)
|
)
|
||||||
|
elif original_exception.status_code == 422:
|
||||||
|
exception_mapping_worked = True
|
||||||
|
raise BadRequestError(
|
||||||
|
message=f"TogetherAIException - {error_response['error']}",
|
||||||
|
model=model,
|
||||||
|
llm_provider="together_ai",
|
||||||
|
response=original_exception.response
|
||||||
|
)
|
||||||
elif original_exception.status_code == 429:
|
elif original_exception.status_code == 429:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise RateLimitError(
|
raise RateLimitError(
|
||||||
|
@ -5584,7 +5592,7 @@ class CustomStreamWrapper:
|
||||||
elif "[DONE]" in chunk:
|
elif "[DONE]" in chunk:
|
||||||
return {"text": text, "is_finished": True, "finish_reason": "stop"}
|
return {"text": text, "is_finished": True, "finish_reason": "stop"}
|
||||||
elif "error" in chunk:
|
elif "error" in chunk:
|
||||||
raise ValueError(chunk)
|
raise litellm.together_ai.TogetherAIError(status_code=422, message=f"{str(chunk)}")
|
||||||
else:
|
else:
|
||||||
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
|
||||||
|
|
||||||
|
@ -6131,7 +6139,6 @@ class CustomStreamWrapper:
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
raise # Re-raise StopIteration
|
raise # Re-raise StopIteration
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print_verbose(f"HITS AN ERROR: {str(e)}\n\n {traceback.format_exc()}")
|
|
||||||
traceback_exception = traceback.format_exc()
|
traceback_exception = traceback.format_exc()
|
||||||
# LOG FAILURE - handle streaming failure logging in the _next_ object, remove `handle_failure` once it's deprecated
|
# LOG FAILURE - handle streaming failure logging in the _next_ object, remove `handle_failure` once it's deprecated
|
||||||
threading.Thread(target=self.logging_obj.failure_handler, args=(e, traceback_exception)).start()
|
threading.Thread(target=self.logging_obj.failure_handler, args=(e, traceback_exception)).start()
|
||||||
|
@ -6180,7 +6187,7 @@ class CustomStreamWrapper:
|
||||||
traceback_exception = traceback.format_exc()
|
traceback_exception = traceback.format_exc()
|
||||||
# Handle any exceptions that might occur during streaming
|
# Handle any exceptions that might occur during streaming
|
||||||
asyncio.create_task(self.logging_obj.async_failure_handler(e, traceback_exception))
|
asyncio.create_task(self.logging_obj.async_failure_handler(e, traceback_exception))
|
||||||
raise StopAsyncIteration
|
raise e
|
||||||
|
|
||||||
class TextCompletionStreamWrapper:
|
class TextCompletionStreamWrapper:
|
||||||
def __init__(self, completion_stream, model):
|
def __init__(self, completion_stream, model):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue