LiteLLM Minor Fixes & Improvements (10/02/2024) (#6023)

* feat(together_ai/completion): handle together ai completion calls * fix: handle list of int / list of list of int for text completion calls * fix(utils.py): check if base model in bedrock converse model list Fixes https://github.com/BerriAI/litellm/issues/6003 * test(test_optional_params.py): add unit tests for bedrock optional param mapping Fixes https://github.com/BerriAI/litellm/issues/6003 * feat(utils.py): enable passing dummy tool call for anthropic/bedrock calls if tool_use blocks exist Fixes https://github.com/BerriAI/litellm/issues/5388 * fixed an issue with tool use of claude models with anthropic and bedrock (#6013) * fix(utils.py): handle empty schema for anthropic/bedrock Fixes https://github.com/BerriAI/litellm/issues/6012 * fix: fix linting errors * fix: fix linting errors * fix: fix linting errors * fix(proxy_cli.py): fix import route for app + health checks path (#6026) * (testing): Enable testing us.anthropic.claude-3-haiku-20240307-v1:0. (#6018) * fix(proxy_cli.py): fix import route for app + health checks gettsburg.wav Fixes https://github.com/BerriAI/litellm/issues/5999 --------- Co-authored-by: David Manouchehri <david.manouchehri@ai.moda> --------- Co-authored-by: Ved Patwardhan <54766411+vedpatwardhan@users.noreply.github.com> Co-authored-by: David Manouchehri <david.manouchehri@ai.moda>
2025-04-25 18:54:30 +00:00 · 2024-10-02 22:00:28 -04:00 · 2024-10-02 22:00:28 -04:00 · 14165d3648
commit 14165d3648
parent 8995ff49ae
20 changed files with 443 additions and 125 deletions
--- a/litellm/llms/OpenAI/openai.py
+++ b/litellm/llms/OpenAI/openai.py
@ -4,7 +4,7 @@ import os
 import time
 import traceback
 import types
-from typing import Any, Callable, Coroutine, Iterable, Literal, Optional, Union
+from typing import Any, Callable, Coroutine, Iterable, Literal, Optional, Union, cast

 import httpx
 import openai
@ -30,8 +30,10 @@ from litellm.utils import (

 from ...types.llms.openai import *
 from ..base import BaseLLM
+from ..prompt_templates.common_utils import convert_content_list_to_str
 from ..prompt_templates.factory import custom_prompt, prompt_factory
 from .common_utils import drop_params_from_unprocessable_entity_error
+from .completion.utils import is_tokens_or_list_of_tokens


 class OpenAIError(Exception):
@ -420,6 +422,35 @@ class OpenAITextCompletionConfig:
            and v is not None
        }

+    def _transform_prompt(
+        self,
+        messages: Union[List[AllMessageValues], List[OpenAITextCompletionUserMessage]],
+    ) -> AllPromptValues:
+        if len(messages) == 1:  # base case
+            message_content = messages[0].get("content")
+            if (
+                message_content
+                and isinstance(message_content, list)
+                and is_tokens_or_list_of_tokens(message_content)
+            ):
+                openai_prompt: AllPromptValues = cast(AllPromptValues, message_content)
+            else:
+                openai_prompt = ""
+                content = convert_content_list_to_str(
+                    cast(AllMessageValues, messages[0])
+                )
+                openai_prompt += content
+        else:
+            prompt_str_list: List[str] = []
+            for m in messages:
+                try:  # expect list of int/list of list of int to be a 1 message array only.
+                    content = convert_content_list_to_str(cast(AllMessageValues, m))
+                    prompt_str_list.append(content)
+                except Exception as e:
+                    raise e
+            openai_prompt = prompt_str_list
+        return openai_prompt
+
    def convert_to_chat_model_response_object(
        self,
        response_object: Optional[TextCompletionResponse] = None,
@ -459,6 +490,7 @@ class OpenAITextCompletionConfig:


 class OpenAIChatCompletion(BaseLLM):
+
    def __init__(self) -> None:
        super().__init__()

@ -1466,7 +1498,9 @@ class OpenAIChatCompletion(BaseLLM):
        elif mode == "audio_transcription":
            # Get the current directory of the file being run
            pwd = os.path.dirname(os.path.realpath(__file__))
-            file_path = os.path.join(pwd, "../tests/gettysburg.wav")
+            file_path = os.path.join(
+                pwd, "../../../tests/gettysburg.wav"
+            )  # proxy address
            audio_file = open(file_path, "rb")
            completion = await client.audio.transcriptions.with_raw_response.create(
                file=audio_file,
@ -1502,6 +1536,8 @@ class OpenAIChatCompletion(BaseLLM):


 class OpenAITextCompletion(BaseLLM):
+    openai_text_completion_global_config = OpenAITextCompletionConfig()
+
    def __init__(self) -> None:
        super().__init__()

@ -1518,7 +1554,7 @@ class OpenAITextCompletion(BaseLLM):
        model_response: ModelResponse,
        api_key: str,
        model: str,
-        messages: list,
+        messages: Union[List[AllMessageValues], List[OpenAITextCompletionUserMessage]],
        timeout: float,
        logging_obj: LiteLLMLoggingObj,
        optional_params: dict,
@ -1531,24 +1567,18 @@ class OpenAITextCompletion(BaseLLM):
        organization: Optional[str] = None,
        headers: Optional[dict] = None,
    ):
-        super().completion()
        try:
            if headers is None:
                headers = self.validate_environment(api_key=api_key)
            if model is None or messages is None:
                raise OpenAIError(status_code=422, message="Missing model or messages")

-            if (
-                len(messages) > 0
-                and "content" in messages[0]
-                and isinstance(messages[0]["content"], list)
-            ):
-                prompt = messages[0]["content"]
-            else:
-                prompt = [message["content"] for message in messages]  # type: ignore
-
            # don't send max retries to the api, if set

+            prompt = self.openai_text_completion_global_config._transform_prompt(
+                messages
+            )
+
            data = {"model": model, "prompt": prompt, **optional_params}
            max_retries = data.pop("max_retries", 2)
            ## LOGGING