mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
Add /vllm/*
and /mistral/*
passthrough endpoints (adds support for Mistral OCR via passthrough)
* feat(llm_passthrough_endpoints.py): support mistral passthrough Closes https://github.com/BerriAI/litellm/issues/9051 * feat(llm_passthrough_endpoints.py): initial commit for adding vllm passthrough route * feat(vllm/common_utils.py): add new vllm model info route make it possible to use vllm passthrough route via factory function * fix(llm_passthrough_endpoints.py): add all methods to vllm passthrough route * fix: fix linting error * fix: fix linting error * fix: fix ruff check * fix(proxy/_types.py): add new passthrough routes * docs(config_settings.md): add mistral env vars to docs
This commit is contained in:
parent
5fcdf4becf
commit
3031fff297
12 changed files with 450 additions and 176 deletions
|
@ -516,9 +516,9 @@ def function_setup( # noqa: PLR0915
|
|||
function_id: Optional[str] = kwargs["id"] if "id" in kwargs else None
|
||||
|
||||
## DYNAMIC CALLBACKS ##
|
||||
dynamic_callbacks: Optional[List[Union[str, Callable, CustomLogger]]] = (
|
||||
kwargs.pop("callbacks", None)
|
||||
)
|
||||
dynamic_callbacks: Optional[
|
||||
List[Union[str, Callable, CustomLogger]]
|
||||
] = kwargs.pop("callbacks", None)
|
||||
all_callbacks = get_dynamic_callbacks(dynamic_callbacks=dynamic_callbacks)
|
||||
|
||||
if len(all_callbacks) > 0:
|
||||
|
@ -1202,9 +1202,9 @@ def client(original_function): # noqa: PLR0915
|
|||
exception=e,
|
||||
retry_policy=kwargs.get("retry_policy"),
|
||||
)
|
||||
kwargs["retry_policy"] = (
|
||||
reset_retry_policy()
|
||||
) # prevent infinite loops
|
||||
kwargs[
|
||||
"retry_policy"
|
||||
] = reset_retry_policy() # prevent infinite loops
|
||||
litellm.num_retries = (
|
||||
None # set retries to None to prevent infinite loops
|
||||
)
|
||||
|
@ -3013,16 +3013,16 @@ def get_optional_params( # noqa: PLR0915
|
|||
True # so that main.py adds the function call to the prompt
|
||||
)
|
||||
if "tools" in non_default_params:
|
||||
optional_params["functions_unsupported_model"] = (
|
||||
non_default_params.pop("tools")
|
||||
)
|
||||
optional_params[
|
||||
"functions_unsupported_model"
|
||||
] = non_default_params.pop("tools")
|
||||
non_default_params.pop(
|
||||
"tool_choice", None
|
||||
) # causes ollama requests to hang
|
||||
elif "functions" in non_default_params:
|
||||
optional_params["functions_unsupported_model"] = (
|
||||
non_default_params.pop("functions")
|
||||
)
|
||||
optional_params[
|
||||
"functions_unsupported_model"
|
||||
] = non_default_params.pop("functions")
|
||||
elif (
|
||||
litellm.add_function_to_prompt
|
||||
): # if user opts to add it to prompt instead
|
||||
|
@ -3045,10 +3045,10 @@ def get_optional_params( # noqa: PLR0915
|
|||
|
||||
if "response_format" in non_default_params:
|
||||
if provider_config is not None:
|
||||
non_default_params["response_format"] = (
|
||||
provider_config.get_json_schema_from_pydantic_object(
|
||||
response_format=non_default_params["response_format"]
|
||||
)
|
||||
non_default_params[
|
||||
"response_format"
|
||||
] = provider_config.get_json_schema_from_pydantic_object(
|
||||
response_format=non_default_params["response_format"]
|
||||
)
|
||||
else:
|
||||
non_default_params["response_format"] = type_to_response_format_param(
|
||||
|
@ -4064,9 +4064,9 @@ def _count_characters(text: str) -> int:
|
|||
|
||||
|
||||
def get_response_string(response_obj: Union[ModelResponse, ModelResponseStream]) -> str:
|
||||
_choices: Union[List[Union[Choices, StreamingChoices]], List[StreamingChoices]] = (
|
||||
response_obj.choices
|
||||
)
|
||||
_choices: Union[
|
||||
List[Union[Choices, StreamingChoices]], List[StreamingChoices]
|
||||
] = response_obj.choices
|
||||
|
||||
response_str = ""
|
||||
for choice in _choices:
|
||||
|
@ -4458,14 +4458,14 @@ def _get_model_info_helper( # noqa: PLR0915
|
|||
|
||||
if combined_model_name in litellm.model_cost:
|
||||
key = combined_model_name
|
||||
_model_info = _get_model_info_from_model_cost(key=key)
|
||||
_model_info = _get_model_info_from_model_cost(key=cast(str, key))
|
||||
if not _check_provider_match(
|
||||
model_info=_model_info, custom_llm_provider=custom_llm_provider
|
||||
):
|
||||
_model_info = None
|
||||
if _model_info is None and model in litellm.model_cost:
|
||||
key = model
|
||||
_model_info = _get_model_info_from_model_cost(key=key)
|
||||
_model_info = _get_model_info_from_model_cost(key=cast(str, key))
|
||||
if not _check_provider_match(
|
||||
model_info=_model_info, custom_llm_provider=custom_llm_provider
|
||||
):
|
||||
|
@ -4475,21 +4475,21 @@ def _get_model_info_helper( # noqa: PLR0915
|
|||
and combined_stripped_model_name in litellm.model_cost
|
||||
):
|
||||
key = combined_stripped_model_name
|
||||
_model_info = _get_model_info_from_model_cost(key=key)
|
||||
_model_info = _get_model_info_from_model_cost(key=cast(str, key))
|
||||
if not _check_provider_match(
|
||||
model_info=_model_info, custom_llm_provider=custom_llm_provider
|
||||
):
|
||||
_model_info = None
|
||||
if _model_info is None and stripped_model_name in litellm.model_cost:
|
||||
key = stripped_model_name
|
||||
_model_info = _get_model_info_from_model_cost(key=key)
|
||||
_model_info = _get_model_info_from_model_cost(key=cast(str, key))
|
||||
if not _check_provider_match(
|
||||
model_info=_model_info, custom_llm_provider=custom_llm_provider
|
||||
):
|
||||
_model_info = None
|
||||
if _model_info is None and split_model in litellm.model_cost:
|
||||
key = split_model
|
||||
_model_info = _get_model_info_from_model_cost(key=key)
|
||||
_model_info = _get_model_info_from_model_cost(key=cast(str, key))
|
||||
if not _check_provider_match(
|
||||
model_info=_model_info, custom_llm_provider=custom_llm_provider
|
||||
):
|
||||
|
@ -6510,7 +6510,12 @@ class ProviderConfigManager:
|
|||
return litellm.AnthropicModelInfo()
|
||||
elif LlmProviders.XAI == provider:
|
||||
return litellm.XAIModelInfo()
|
||||
elif LlmProviders.VLLM == provider:
|
||||
from litellm.llms.vllm.common_utils import (
|
||||
VLLMModelInfo, # experimental approach, to reduce bloat on __init__.py
|
||||
)
|
||||
|
||||
return VLLMModelInfo()
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue