diff --git a/litellm/tests/test_amazing_vertex_completion.py b/litellm/tests/test_amazing_vertex_completion.py index 05eece834..1d79653ea 100644 --- a/litellm/tests/test_amazing_vertex_completion.py +++ b/litellm/tests/test_amazing_vertex_completion.py @@ -548,42 +548,6 @@ def test_gemini_pro_vision_base64(): def test_gemini_pro_function_calling(): - load_vertex_ai_credentials() - tools = [ - { - "type": "function", - "function": { - "name": "get_current_weather", - "description": "Get the current weather in a given location", - "parameters": { - "type": "object", - "properties": { - "location": { - "type": "string", - "description": "The city and state, e.g. San Francisco, CA", - }, - "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, - }, - "required": ["location"], - }, - }, - } - ] - - messages = [ - { - "role": "user", - "content": "What's the weather like in Boston today in fahrenheit?", - } - ] - completion = litellm.completion( - model="gemini-pro", messages=messages, tools=tools, tool_choice="auto" - ) - print(f"completion: {completion}") - if hasattr(completion.choices[0].message, "tool_calls") and isinstance( - completion.choices[0].message.tool_calls, list - ): - assert len(completion.choices[0].message.tool_calls) == 1 try: load_vertex_ai_credentials() tools = [ diff --git a/litellm/utils.py b/litellm/utils.py index ec296e9dc..80d26f58b 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -3974,12 +3974,10 @@ def calculage_img_tokens( tile_tokens = (base_tokens * 2) * tiles_needed_high_res total_tokens = base_tokens + tile_tokens return total_tokens - + def create_pretrained_tokenizer( - identifier: str, - revision="main", - auth_token: Optional[str] = None + identifier: str, revision="main", auth_token: Optional[str] = None ): """ Creates a tokenizer from an existing file on a HuggingFace repository to be used with `token_counter`. @@ -3993,7 +3991,9 @@ def create_pretrained_tokenizer( dict: A dictionary with the tokenizer and its type. """ - tokenizer = Tokenizer.from_pretrained(identifier, revision=revision, auth_token=auth_token) + tokenizer = Tokenizer.from_pretrained( + identifier, revision=revision, auth_token=auth_token + ) return {"type": "huggingface_tokenizer", "tokenizer": tokenizer} @@ -9001,7 +9001,16 @@ def exception_type( request=original_exception.request, ) elif custom_llm_provider == "azure": - if "This model's maximum context length is" in error_str: + if "Internal server error" in error_str: + exception_mapping_worked = True + raise APIError( + status_code=500, + message=f"AzureException - {original_exception.message}", + llm_provider="azure", + model=model, + request=httpx.Request(method="POST", url="https://openai.com/"), + ) + elif "This model's maximum context length is" in error_str: exception_mapping_worked = True raise ContextWindowExceededError( message=f"AzureException - {original_exception.message}",