From 343a06fd844f25fc619c73b7b7ba9d0ed3d03136 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Mon, 25 Dec 2023 07:17:54 +0530 Subject: [PATCH] fix(proxy_server.py): raise streaming exceptions --- litellm/main.py | 5 +---- litellm/proxy/proxy_server.py | 17 ++++++++++------- litellm/tests/test_streaming.py | 7 ++++--- litellm/utils.py | 13 ++++++++++--- 4 files changed, 25 insertions(+), 17 deletions(-) diff --git a/litellm/main.py b/litellm/main.py index 0c3ab4562..fb3be6233 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -213,10 +213,7 @@ async def _async_streaming(response, model, custom_llm_provider, args): print_verbose(f"line in async streaming: {line}") yield line except Exception as e: - print_verbose(f"error raised _async_streaming: {traceback.format_exc()}") - raise exception_type( - model=model, custom_llm_provider=custom_llm_provider, original_exception=e, completion_kwargs=args, - ) + raise e def mock_completion(model: str, messages: List, stream: Optional[bool] = False, mock_response: str = "This is a mock request", **kwargs): """ diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index effc8297d..3e4e7f25d 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -826,13 +826,15 @@ def data_generator(response): async def async_data_generator(response, user_api_key_dict): print_verbose("inside generator") - async for chunk in response: - print_verbose(f"returned chunk: {chunk}") - try: - yield f"data: {json.dumps(chunk.dict())}\n\n" - except: - yield f"data: {json.dumps(chunk)}\n\n" - + try: + async for chunk in response: + print_verbose(f"returned chunk: {chunk}") + try: + yield f"data: {json.dumps(chunk.dict())}\n\n" + except Exception as e: + yield f"data: {str(e)}\n\n" + except Exception as e: + yield f"data: {str(e)}\n\n" def get_litellm_model_info(model: dict = {}): model_info = model.get("model_info", {}) model_to_lookup = model.get("litellm_params", {}).get("model", None) @@ -971,6 +973,7 @@ async def completion(request: Request, model: Optional[str] = None, user_api_key background_tasks.add_task(log_input_output, request, response) # background task for logging to OTEL return response except Exception as e: + print(f"EXCEPTION RAISED IN PROXY MAIN.PY") print(f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`") traceback.print_exc() error_traceback = traceback.format_exc() diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py index 2dfd5778a..e02440e8d 100644 --- a/litellm/tests/test_streaming.py +++ b/litellm/tests/test_streaming.py @@ -975,11 +975,12 @@ def test_openai_text_completion_call(): # test_openai_text_completion_call() # # test on together ai completion call - starcoder -def test_together_ai_completion_call_starcoder(): +def test_together_ai_completion_call_mistral(): try: + litellm.set_verbose = False start_time = time.time() response = completion( - model="together_ai/bigcode/starcoder", + model="together_ai/mistralai/Mistral-7B-Instruct-v0.2", messages=messages, logger_fn=logger_fn, stream=True, @@ -1002,7 +1003,7 @@ def test_together_ai_completion_call_starcoder(): print(f"error occurred: {traceback.format_exc()}") pass -# test_together_ai_completion_call_starcoder() +test_together_ai_completion_call_starcoder() def test_together_ai_completion_call_starcoder_bad_key(): try: diff --git a/litellm/utils.py b/litellm/utils.py index 767bc0b07..94fc46039 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -5146,6 +5146,14 @@ def exception_type( llm_provider="together_ai", request=original_exception.request ) + elif original_exception.status_code == 422: + exception_mapping_worked = True + raise BadRequestError( + message=f"TogetherAIException - {error_response['error']}", + model=model, + llm_provider="together_ai", + response=original_exception.response + ) elif original_exception.status_code == 429: exception_mapping_worked = True raise RateLimitError( @@ -5584,7 +5592,7 @@ class CustomStreamWrapper: elif "[DONE]" in chunk: return {"text": text, "is_finished": True, "finish_reason": "stop"} elif "error" in chunk: - raise ValueError(chunk) + raise litellm.together_ai.TogetherAIError(status_code=422, message=f"{str(chunk)}") else: return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason} @@ -6131,7 +6139,6 @@ class CustomStreamWrapper: except StopIteration: raise # Re-raise StopIteration except Exception as e: - print_verbose(f"HITS AN ERROR: {str(e)}\n\n {traceback.format_exc()}") traceback_exception = traceback.format_exc() # LOG FAILURE - handle streaming failure logging in the _next_ object, remove `handle_failure` once it's deprecated threading.Thread(target=self.logging_obj.failure_handler, args=(e, traceback_exception)).start() @@ -6180,7 +6187,7 @@ class CustomStreamWrapper: traceback_exception = traceback.format_exc() # Handle any exceptions that might occur during streaming asyncio.create_task(self.logging_obj.async_failure_handler(e, traceback_exception)) - raise StopAsyncIteration + raise e class TextCompletionStreamWrapper: def __init__(self, completion_stream, model):