diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index d1d221b45..1441d92a2 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -865,7 +865,7 @@ }, "deepseek-coder": { "max_tokens": 4096, - "max_input_tokens": 16000, + "max_input_tokens": 32000, "max_output_tokens": 4096, "input_cost_per_token": 0.00000014, "output_cost_per_token": 0.00000028, @@ -1984,6 +1984,15 @@ "litellm_provider": "replicate", "mode": "chat" }, + "openrouter/deepseek/deepseek-coder": { + "max_tokens": 4096, + "max_input_tokens": 32000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000014, + "output_cost_per_token": 0.00000028, + "litellm_provider": "openrouter", + "mode": "chat" + }, "openrouter/microsoft/wizardlm-2-8x22b:nitro": { "max_tokens": 65536, "input_cost_per_token": 0.000001, diff --git a/litellm/utils.py b/litellm/utils.py index 28cf7464b..6b958a580 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -5741,7 +5741,15 @@ def exception_type( response=original_exception.response, litellm_debug_info=extra_information, ) + elif "Web server is returning an unknown error" in error_str: + exception_mapping_worked = True + raise litellm.InternalServerError( + message=f"{exception_provider} - {message}", + model=model, + llm_provider=custom_llm_provider, + ) elif "Request too large" in error_str: + exception_mapping_worked = True raise RateLimitError( message=f"RateLimitError: {exception_provider} - {message}", model=model,