mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
* fix(utils.py): initial commit to remove circular imports - moves llmproviders to utils.py * fix(router.py): fix 'litellm.EmbeddingResponse' import from router.py ' * refactor: fix litellm.ModelResponse import on pass through endpoints * refactor(litellm_logging.py): fix circular import for custom callbacks literal * fix(factory.py): fix circular imports inside prompt factory * fix(cost_calculator.py): fix circular import for 'litellm.Usage' * fix(proxy_server.py): fix potential circular import with `litellm.Router' * fix(proxy/utils.py): fix potential circular import in `litellm.Router` * fix: remove circular imports in 'auth_checks' and 'guardrails/' * fix(prompt_injection_detection.py): fix router impor t * fix(vertex_passthrough_logging_handler.py): fix potential circular imports in vertex pass through * fix(anthropic_pass_through_logging_handler.py): fix potential circular imports * fix(slack_alerting.py-+-ollama_chat.py): fix modelresponse import * fix(base.py): fix potential circular import * fix(handler.py): fix potential circular ref in codestral + cohere handler's * fix(azure.py): fix potential circular imports * fix(gpt_transformation.py): fix modelresponse import * fix(litellm_logging.py): add logging base class - simplify typing makes it easy for other files to type check the logging obj without introducing circular imports * fix(azure_ai/embed): fix potential circular import on handler.py * fix(databricks/): fix potential circular imports in databricks/ * fix(vertex_ai/): fix potential circular imports on vertex ai embeddings * fix(vertex_ai/image_gen): fix import * fix(watsonx-+-bedrock): cleanup imports * refactor(anthropic-pass-through-+-petals): cleanup imports * refactor(huggingface/): cleanup imports * fix(ollama-+-clarifai): cleanup circular imports * fix(openai_like/): fix impor t * fix(openai_like/): fix embedding handler cleanup imports * refactor(openai.py): cleanup imports * fix(sagemaker/transformation.py): fix import * ci(config.yml): add circular import test to ci/cd
130 lines
4.2 KiB
Python
130 lines
4.2 KiB
Python
"""
|
|
OpenAI-like chat completion transformation
|
|
"""
|
|
|
|
import types
|
|
from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Union
|
|
|
|
import httpx
|
|
from pydantic import BaseModel
|
|
|
|
import litellm
|
|
from litellm.secret_managers.main import get_secret_str
|
|
from litellm.types.llms.openai import AllMessageValues, ChatCompletionAssistantMessage
|
|
from litellm.types.utils import ModelResponse
|
|
|
|
from ....utils import _remove_additional_properties, _remove_strict_from_schema
|
|
from ...openai.chat.gpt_transformation import OpenAIGPTConfig
|
|
|
|
if TYPE_CHECKING:
|
|
from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
|
|
|
|
LiteLLMLoggingObj = _LiteLLMLoggingObj
|
|
else:
|
|
LiteLLMLoggingObj = Any
|
|
|
|
|
|
class OpenAILikeChatConfig(OpenAIGPTConfig):
|
|
def _get_openai_compatible_provider_info(
|
|
self,
|
|
api_base: Optional[str],
|
|
api_key: Optional[str],
|
|
model: Optional[str] = None,
|
|
) -> Tuple[Optional[str], Optional[str]]:
|
|
api_base = api_base or get_secret_str("OPENAI_LIKE_API_BASE") # type: ignore
|
|
dynamic_api_key = (
|
|
api_key or get_secret_str("OPENAI_LIKE_API_KEY") or ""
|
|
) # vllm does not require an api key
|
|
return api_base, dynamic_api_key
|
|
|
|
@staticmethod
|
|
def _convert_tool_response_to_message(
|
|
message: ChatCompletionAssistantMessage, json_mode: bool
|
|
) -> ChatCompletionAssistantMessage:
|
|
"""
|
|
if json_mode is true, convert the returned tool call response to a content with json str
|
|
|
|
e.g. input:
|
|
|
|
{"role": "assistant", "tool_calls": [{"id": "call_5ms4", "type": "function", "function": {"name": "json_tool_call", "arguments": "{\"key\": \"question\", \"value\": \"What is the capital of France?\"}"}}]}
|
|
|
|
output:
|
|
|
|
{"role": "assistant", "content": "{\"key\": \"question\", \"value\": \"What is the capital of France?\"}"}
|
|
"""
|
|
if not json_mode:
|
|
return message
|
|
|
|
_tool_calls = message.get("tool_calls")
|
|
|
|
if _tool_calls is None or len(_tool_calls) != 1:
|
|
return message
|
|
|
|
message["content"] = _tool_calls[0]["function"].get("arguments") or ""
|
|
message["tool_calls"] = None
|
|
|
|
return message
|
|
|
|
@staticmethod
|
|
def _transform_response(
|
|
model: str,
|
|
response: httpx.Response,
|
|
model_response: ModelResponse,
|
|
stream: bool,
|
|
logging_obj: LiteLLMLoggingObj,
|
|
optional_params: dict,
|
|
api_key: Optional[str],
|
|
data: Union[dict, str],
|
|
messages: List,
|
|
print_verbose,
|
|
encoding,
|
|
json_mode: bool,
|
|
custom_llm_provider: str,
|
|
base_model: Optional[str],
|
|
) -> ModelResponse:
|
|
response_json = response.json()
|
|
logging_obj.post_call(
|
|
input=messages,
|
|
api_key="",
|
|
original_response=response_json,
|
|
additional_args={"complete_input_dict": data},
|
|
)
|
|
|
|
if json_mode:
|
|
for choice in response_json["choices"]:
|
|
message = OpenAILikeChatConfig._convert_tool_response_to_message(
|
|
choice.get("message"), json_mode
|
|
)
|
|
choice["message"] = message
|
|
|
|
returned_response = ModelResponse(**response_json)
|
|
|
|
returned_response.model = (
|
|
custom_llm_provider + "/" + (returned_response.model or "")
|
|
)
|
|
|
|
if base_model is not None:
|
|
returned_response._hidden_params["model"] = base_model
|
|
return returned_response
|
|
|
|
def map_openai_params(
|
|
self,
|
|
non_default_params: dict,
|
|
optional_params: dict,
|
|
model: str,
|
|
drop_params: bool,
|
|
replace_max_completion_tokens_with_max_tokens: bool = True,
|
|
) -> dict:
|
|
mapped_params = super().map_openai_params(
|
|
non_default_params, optional_params, model, drop_params
|
|
)
|
|
if (
|
|
"max_completion_tokens" in non_default_params
|
|
and replace_max_completion_tokens_with_max_tokens
|
|
):
|
|
mapped_params["max_tokens"] = non_default_params[
|
|
"max_completion_tokens"
|
|
] # most openai-compatible providers support 'max_tokens' not 'max_completion_tokens'
|
|
mapped_params.pop("max_completion_tokens", None)
|
|
|
|
return mapped_params
|