LiteLLM Minor Fixes & Improvements (11/01/2024) (#6551)

* fix: add lm_studio support * fix(cohere_transformation.py): fix transformation logic for azure cohere embedding model name Fixes https://github.com/BerriAI/litellm/issues/6540 * fix(utils.py): require base64 str to begin with `data:` Fixes https://github.com/BerriAI/litellm/issues/6541 * fix: cleanup tests * docs(guardrails.md): fix typo * fix(opentelemetry.py): move to `.exception` and update 'response_obj' value to handle 'None' case Fixes https://github.com/BerriAI/litellm/issues/6510 * fix: fix linting noqa placement
2024-11-02 00:39:31 +04:00 · 2024-11-02 00:39:31 +04:00 · 22b8f93f53
commit 22b8f93f53
parent bac2ac2a49
12 changed files with 123 additions and 17 deletions
--- a/docs/my-website/docs/proxy/guardrails.md
+++ b/docs/my-website/docs/proxy/guardrails.md
@ -349,7 +349,7 @@ litellm_settings:
        callbacks: [hide_secrets]
        default_on: true
    - pii_masking:
-        callback: ["presidio"]
+        callbacks: ["presidio"]
        default_on: true
        logging_only: true
    - your-custom-guardrail
--- a/litellm/init.py
+++ b/litellm/init.py
@ -517,6 +517,7 @@ openai_compatible_providers: List = [
    "github",
    "litellm_proxy",
    "hosted_vllm",
+    "lm_studio",
 ]
 openai_text_completion_compatible_providers: List = (
    [  # providers that support `/v1/completions`
@ -776,6 +777,7 @@ class LlmProviders(str, Enum):
    CUSTOM = "custom"
    LITELLM_PROXY = "litellm_proxy"
    HOSTED_VLLM = "hosted_vllm"
+    LM_STUDIO = "lm_studio"


 provider_list: List[Union[LlmProviders, str]] = list(LlmProviders)
@ -1034,6 +1036,7 @@ from .llms.AzureOpenAI.azure import (

 from .llms.AzureOpenAI.chat.gpt_transformation import AzureOpenAIConfig
 from .llms.hosted_vllm.chat.transformation import HostedVLLMChatConfig
+from .llms.lm_studio.chat.transformation import LMStudioChatConfig
 from .llms.perplexity.chat.transformation import PerplexityChatConfig
 from .llms.AzureOpenAI.chat.o1_transformation import AzureOpenAIO1Config
 from .llms.watsonx import IBMWatsonXAIConfig
--- a/litellm/integrations/opentelemetry.py
+++ b/litellm/integrations/opentelemetry.py
@ -413,7 +413,9 @@ class OpenTelemetry(CustomLogger):
        except Exception:
            return ""

-    def set_attributes(self, span: Span, kwargs, response_obj):  # noqa: PLR0915
+    def set_attributes(  # noqa: PLR0915
+        self, span: Span, kwargs, response_obj: Optional[Any]
+    ):
        try:
            if self.callback_name == "arize":
                from litellm.integrations.arize_ai import ArizeLogger
@ -505,20 +507,20 @@ class OpenTelemetry(CustomLogger):
                )

            # The unique identifier for the completion.
-            if response_obj.get("id"):
+            if response_obj and response_obj.get("id"):
                self.safe_set_attribute(
                    span=span, key="gen_ai.response.id", value=response_obj.get("id")
                )

            # The model used to generate the response.
-            if response_obj.get("model"):
+            if response_obj and response_obj.get("model"):
                self.safe_set_attribute(
                    span=span,
                    key=SpanAttributes.LLM_RESPONSE_MODEL,
                    value=response_obj.get("model"),
                )

-            usage = response_obj.get("usage")
+            usage = response_obj and response_obj.get("usage")
            if usage:
                self.safe_set_attribute(
                    span=span,
@ -619,7 +621,7 @@ class OpenTelemetry(CustomLogger):
                                )

        except Exception as e:
-            verbose_logger.error(
+            verbose_logger.exception(
                "OpenTelemetry logging error in set_attributes %s", str(e)
            )

--- a/litellm/litellm_core_utils/get_llm_provider_logic.py
+++ b/litellm/litellm_core_utils/get_llm_provider_logic.py
@ -429,6 +429,14 @@ def _get_openai_compatible_provider_info(  # noqa: PLR0915
        ) = litellm.HostedVLLMChatConfig()._get_openai_compatible_provider_info(
            api_base, api_key
        )
+    elif custom_llm_provider == "lm_studio":
+        # lm_studio is openai compatible, we just need to set this to custom_openai
+        (
+            api_base,
+            dynamic_api_key,
+        ) = litellm.LMStudioChatConfig()._get_openai_compatible_provider_info(
+            api_base, api_key
+        )
    elif custom_llm_provider == "deepseek":
        # deepseek is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.deepseek.com/v1
        api_base = (
--- a/litellm/llms/azure_ai/embed/cohere_transformation.py
+++ b/litellm/llms/azure_ai/embed/cohere_transformation.py
@ -22,9 +22,10 @@ class AzureAICohereConfig:
        pass

    def _map_azure_model_group(self, model: str) -> str:
-        if "model=offer-cohere-embed-multili-paygo":
+
+        if model == "offer-cohere-embed-multili-paygo":
            return "Cohere-embed-v3-multilingual"
-        elif "model=offer-cohere-embed-english-paygo":
+        elif model == "offer-cohere-embed-english-paygo":
            return "Cohere-embed-v3-english"

        return model
--- a/litellm/llms/lm_studio/chat/transformation.py
+++ b/litellm/llms/lm_studio/chat/transformation.py
@ -0,0 +1,26 @@
+"""
+Translate from OpenAI's `/v1/chat/completions` to LM Studio's `/chat/completions`
+"""
+
+import types
+from typing import List, Optional, Tuple, Union
+
+from pydantic import BaseModel
+
+import litellm
+from litellm.secret_managers.main import get_secret_str
+from litellm.types.llms.openai import AllMessageValues, ChatCompletionAssistantMessage
+
+from ....utils import _remove_additional_properties, _remove_strict_from_schema
+from ...OpenAI.chat.gpt_transformation import OpenAIGPTConfig
+
+
+class LMStudioChatConfig(OpenAIGPTConfig):
+    def _get_openai_compatible_provider_info(
+        self, api_base: Optional[str], api_key: Optional[str]
+    ) -> Tuple[Optional[str], Optional[str]]:
+        api_base = api_base or get_secret_str("LM_STUDIO_API_BASE")  # type: ignore
+        dynamic_api_key = (
+            api_key or get_secret_str("LM_STUDIO_API_KEY") or ""
+        )  # vllm does not require an api key
+        return api_base, dynamic_api_key
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -3,7 +3,8 @@ model_list:
    litellm_params:
      model: claude-3-5-sonnet-20240620
      api_key: os.environ/ANTHROPIC_API_KEY
-  - model_name: claude-3-5-sonnet-aihubmix
+      api_base: "http://0.0.0.0:8000"
+  - model_name: my-fallback-openai-model
    litellm_params:
      model: openai/claude-3-5-sonnet-20240620
      input_cost_per_token: 0.000003 # 3$/M
@ -15,7 +16,7 @@ model_list:
      model: gemini/gemini-1.5-flash-002

 litellm_settings:
-  fallbacks: [{ "claude-3-5-sonnet-20240620": ["claude-3-5-sonnet-aihubmix"] }]
+  fallbacks: [{ "claude-3-5-sonnet-20240620": ["my-fallback-openai-model"] }]
  callbacks: ["otel", "prometheus"]

 router_settings:
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -8631,11 +8631,16 @@ def is_cached_message(message: AllMessageValues) -> bool:
 def is_base64_encoded(s: str) -> bool:
    try:
        # Strip out the prefix if it exists
-        if s.startswith("data:"):
-            s = s.split(",")[1]
+        if not s.startswith(
+            "data:"
+        ):  # require `data:` for base64 str, like openai. Prevents false positives like s='Dog'
+            return False
+
+        s = s.split(",")[1]

        # Try to decode the string
        decoded_bytes = base64.b64decode(s, validate=True)
+
        # Check if the original string can be re-encoded to the same string
        return base64.b64encode(decoded_bytes).decode("utf-8") == s
    except Exception:
--- a/tests/llm_translation/test_azure_ai.py
+++ b/tests/llm_translation/test_azure_ai.py
@ -0,0 +1,41 @@
+# What is this?
+## Unit tests for Azure AI integration
+
+import asyncio
+import os
+import sys
+import traceback
+
+from dotenv import load_dotenv
+
+import litellm.types
+import litellm.types.utils
+from litellm.llms.anthropic.chat import ModelResponseIterator
+
+load_dotenv()
+import io
+import os
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+from typing import Optional
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+import litellm
+
+
+@pytest.mark.parametrize(
+    "model_group_header, expected_model",
+    [
+        ("offer-cohere-embed-multili-paygo", "Cohere-embed-v3-multilingual"),
+        ("offer-cohere-embed-english-paygo", "Cohere-embed-v3-english"),
+    ],
+)
+def test_map_azure_model_group(model_group_header, expected_model):
+    from litellm.llms.azure_ai.embed.cohere_transformation import AzureAICohereConfig
+
+    config = AzureAICohereConfig()
+    assert config._map_azure_model_group(model_group_header) == expected_model
--- a/tests/local_testing/test_completion.py
+++ b/tests/local_testing/test_completion.py
@ -1905,7 +1905,9 @@ def test_hf_test_completion_tgi():
 # hf_test_completion_tgi()


-@pytest.mark.parametrize("provider", ["openai", "hosted_vllm"])  # "vertex_ai",
+@pytest.mark.parametrize(
+    "provider", ["openai", "hosted_vllm", "lm_studio"]
+)  # "vertex_ai",
@pytest.mark.asyncio
 async def test_openai_compatible_custom_api_base(provider):
    litellm.set_verbose = True
@ -1931,8 +1933,8 @@ async def test_openai_compatible_custom_api_base(provider):
                api_base="my-custom-api-base",
                hello="world",
            )
-        except Exception:
-            pass
+        except Exception as e:
+            print(e)

        mock_call.assert_called_once()

--- a/tests/local_testing/test_embedding.py
+++ b/tests/local_testing/test_embedding.py
@ -194,7 +194,7 @@ def _azure_ai_image_mock_response(*args, **kwargs):
        )
    ],
 )
-@pytest.mark.parametrize("sync_mode", [True, False])
+@pytest.mark.parametrize("sync_mode", [True])  # , False
@pytest.mark.asyncio
 async def test_azure_ai_embedding_image(model, api_base, api_key, sync_mode):
    try:
--- a/tests/local_testing/test_utils.py
+++ b/tests/local_testing/test_utils.py
@ -839,7 +839,11 @@ def test_is_base64_encoded():


@mock.patch("httpx.AsyncClient")
-@mock.patch.dict(os.environ, {"SSL_VERIFY": "/certificate.pem", "SSL_CERTIFICATE": "/client.pem"}, clear=True)
+@mock.patch.dict(
+    os.environ,
+    {"SSL_VERIFY": "/certificate.pem", "SSL_CERTIFICATE": "/client.pem"},
+    clear=True,
+)
 def test_async_http_handler(mock_async_client):
    import httpx

@ -861,6 +865,7 @@ def test_async_http_handler(mock_async_client):
        verify="/certificate.pem",
    )

+
@pytest.mark.parametrize(
    "model, expected_bool", [("gpt-3.5-turbo", False), ("gpt-4o-audio-preview", True)]
 )
@ -874,3 +879,15 @@ def test_supports_audio_input(model, expected_bool):

    assert supports_pc == expected_bool

+
+def test_is_base64_encoded_2():
+    from litellm.utils import is_base64_encoded
+
+    assert (
+        is_base64_encoded(
+            s="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/x+AAwMCAO+ip1sAAAAASUVORK5CYII="
+        )
+        is True
+    )
+
+    assert is_base64_encoded(s="Dog") is False