Litellm dev 11 21 2024 (#6837)

* Fix Vertex AI function calling invoke: use JSON format instead of protobuf text format. (#6702)

* test: test tool_call conversion when arguments is empty dict

Fixes https://github.com/BerriAI/litellm/issues/6833

* fix(openai_like/handler.py): return more descriptive error message

Fixes https://github.com/BerriAI/litellm/issues/6812

* test: skip overloaded model

* docs(anthropic.md): update anthropic docs to show how to route to any new model

* feat(groq/): fake stream when 'response_format' param is passed

Groq doesn't support streaming when response_format is set

* feat(groq/): add response_format support for groq

Closes https://github.com/BerriAI/litellm/issues/6845

* fix(o1_handler.py): remove fake streaming for o1

Closes https://github.com/BerriAI/litellm/issues/6801

* build(model_prices_and_context_window.json): add groq llama3.2b model pricing

Closes https://github.com/BerriAI/litellm/issues/6807

* fix(utils.py): fix handling ollama response format param

Fixes https://github.com/BerriAI/litellm/issues/6848#issuecomment-2491215485

* docs(sidebars.js): refactor chat endpoint placement

* fix: fix linting errors

* test: fix test

* test: fix test

* fix(openai_like/handler): handle max retries

* fix(streaming_handler.py): fix streaming check for openai-compatible providers

* test: update test

* test: correctly handle model is overloaded error

* test: update test

* test: fix test

* test: mark flaky test

---------

Co-authored-by: Guowang Li <Guowang@users.noreply.github.com>
This commit is contained in:
Krish Dholakia 2024-11-22 01:53:52 +05:30 committed by GitHub
parent 9ef254ff35
commit 4eca6ede4e
31 changed files with 747 additions and 403 deletions

View file

@ -1739,15 +1739,15 @@ def supports_response_schema(model: str, custom_llm_provider: Optional[str]) ->
Does not raise error. Defaults to 'False'. Outputs logging.error.
"""
## GET LLM PROVIDER ##
model, custom_llm_provider, _, _ = get_llm_provider(
model=model, custom_llm_provider=custom_llm_provider
)
if custom_llm_provider == "predibase": # predibase supports this globally
return True
try:
## GET LLM PROVIDER ##
model, custom_llm_provider, _, _ = get_llm_provider(
model=model, custom_llm_provider=custom_llm_provider
)
if custom_llm_provider == "predibase": # predibase supports this globally
return True
## GET MODEL INFO
model_info = litellm.get_model_info(
model=model, custom_llm_provider=custom_llm_provider
@ -1755,12 +1755,17 @@ def supports_response_schema(model: str, custom_llm_provider: Optional[str]) ->
if model_info.get("supports_response_schema", False) is True:
return True
return False
except Exception:
verbose_logger.error(
f"Model not supports response_schema. You passed model={model}, custom_llm_provider={custom_llm_provider}."
## check if provider supports response schema globally
supported_params = get_supported_openai_params(
model=model,
custom_llm_provider=custom_llm_provider,
request_type="chat_completion",
)
return False
if supported_params is not None and "response_schema" in supported_params:
return True
return False
def supports_function_calling(
@ -2710,6 +2715,7 @@ def get_optional_params( # noqa: PLR0915
non_default_params["response_format"] = type_to_response_format_param(
response_format=non_default_params["response_format"]
)
if "tools" in non_default_params and isinstance(
non_default_params, list
): # fixes https://github.com/BerriAI/litellm/issues/4933
@ -3259,24 +3265,14 @@ def get_optional_params( # noqa: PLR0915
)
_check_valid_arg(supported_params=supported_params)
if max_tokens is not None:
optional_params["num_predict"] = max_tokens
if stream:
optional_params["stream"] = stream
if temperature is not None:
optional_params["temperature"] = temperature
if seed is not None:
optional_params["seed"] = seed
if top_p is not None:
optional_params["top_p"] = top_p
if frequency_penalty is not None:
optional_params["repeat_penalty"] = frequency_penalty
if stop is not None:
optional_params["stop"] = stop
if response_format is not None and response_format["type"] == "json_object":
optional_params["format"] = "json"
optional_params = litellm.OllamaConfig().map_openai_params(
non_default_params=non_default_params,
optional_params=optional_params,
)
elif custom_llm_provider == "ollama_chat":
supported_params = litellm.OllamaChatConfig().get_supported_openai_params()
supported_params = get_supported_openai_params(
model=model, custom_llm_provider=custom_llm_provider
)
_check_valid_arg(supported_params=supported_params)
@ -3494,24 +3490,16 @@ def get_optional_params( # noqa: PLR0915
)
_check_valid_arg(supported_params=supported_params)
if temperature is not None:
optional_params["temperature"] = temperature
if max_tokens is not None:
optional_params["max_tokens"] = max_tokens
if top_p is not None:
optional_params["top_p"] = top_p
if stream is not None:
optional_params["stream"] = stream
if stop is not None:
optional_params["stop"] = stop
if tools is not None:
optional_params["tools"] = tools
if tool_choice is not None:
optional_params["tool_choice"] = tool_choice
if response_format is not None:
optional_params["response_format"] = response_format
if seed is not None:
optional_params["seed"] = seed
optional_params = litellm.GroqChatConfig().map_openai_params(
non_default_params=non_default_params,
optional_params=optional_params,
model=model,
drop_params=(
drop_params
if drop_params is not None and isinstance(drop_params, bool)
else False
),
)
elif custom_llm_provider == "deepseek":
supported_params = get_supported_openai_params(
model=model, custom_llm_provider=custom_llm_provider
@ -6178,5 +6166,7 @@ class ProviderConfigManager:
return litellm.OpenAIO1Config()
elif litellm.LlmProviders.DEEPSEEK == provider:
return litellm.DeepSeekChatConfig()
elif litellm.LlmProviders.GROQ == provider:
return litellm.GroqChatConfig()
return OpenAIGPTConfig()