Litellm contributor prs 02 24 2025 (#8775)

* Adding VertexAI Claude 3.7 Sonnet (#8774) Co-authored-by: Emerson Gomes <emerson.gomes@thalesgroup.com> * build(model_prices_and_context_window.json): add anthropic 3-7 models on vertex ai and bedrock * Support video_url (#8743) * Support video_url Support VLMs that works with video. Example implemenation in vllm: https://github.com/vllm-project/vllm/pull/10020 * llms openai.py: Add ChatCompletionVideoObject Add data structures to support `video_url` in chat completion * test test_completion.py: add test for video_url * Arize Phoenix - ensure correct endpoint/protocol are used; and default to phoenix cloud (#8750) * minor fixes to default to http and to ensure that the correct endpoint is used * Update test_arize_phoenix.py * prioritize http over grpc --------- Co-authored-by: Emerson Gomes <emerson.gomes@gmail.com> Co-authored-by: Emerson Gomes <emerson.gomes@thalesgroup.com> Co-authored-by: Pang Wu <104795337+pang-wu@users.noreply.github.com> Co-authored-by: Nate Mar <67926244+nate-mar@users.noreply.github.com>
2025-04-26 03:04:13 +00:00 · 2025-02-24 18:55:48 -08:00 · 2025-02-24 18:55:48 -08:00 · 9914c166b7
commit 9914c166b7
parent 46cbaa8c0c
7 changed files with 135 additions and 14 deletions
--- a/litellm/init.py
+++ b/litellm/init.py
@ -338,7 +338,6 @@ config_path = None
 vertex_ai_safety_settings: Optional[dict] = None
 BEDROCK_CONVERSE_MODELS = [
    "anthropic.claude-3-5-haiku-20241022-v1:0",
    "anthropic.claude-3-7-sonnet-20250219-v1:0",
    "anthropic.claude-3-5-sonnet-20241022-v2:0",
    "anthropic.claude-3-5-sonnet-20240620-v1:0",
    "anthropic.claude-3-opus-20240229-v1:0",
--- a/litellm/integrations/arize/arize_phoenix.py
+++ b/litellm/integrations/arize/arize_phoenix.py
@ -34,24 +34,24 @@ class ArizePhoenixLogger:
        Returns:
            ArizePhoenixConfig: A Pydantic model containing Arize Phoenix configuration.
        """
-        api_key = os.environ.get("PHOENIX_API_KEY")
+        api_key = os.environ.get("PHOENIX_API_KEY", None)
-        grpc_endpoint = os.environ.get("PHOENIX_COLLECTOR_ENDPOINT")
+        grpc_endpoint = os.environ.get("PHOENIX_COLLECTOR_ENDPOINT", None)
-        http_endpoint = os.environ.get("PHOENIX_COLLECTOR_HTTP_ENDPOINT")
+        http_endpoint = os.environ.get("PHOENIX_COLLECTOR_HTTP_ENDPOINT", None)
        endpoint = None
-        protocol: Protocol = "otlp_grpc"
+        protocol: Protocol = "otlp_http"
-        if grpc_endpoint is not None:
+        if http_endpoint:
            endpoint = grpc_endpoint
            protocol = "otlp_grpc"
        elif http_endpoint is not None:
            endpoint = http_endpoint
            protocol = "otlp_http"
        elif grpc_endpoint:
            endpoint = grpc_endpoint
            protocol = "otlp_grpc"
        else:
            endpoint = ARIZE_HOSTED_PHOENIX_ENDPOINT
-            protocol = "otlp_grpc"       
+            protocol = "otlp_http"       
            verbose_logger.debug(
-                f"No PHOENIX_COLLECTOR_ENDPOINT or PHOENIX_COLLECTOR_HTTP_ENDPOINT found, using default endpoint: {ARIZE_HOSTED_PHOENIX_ENDPOINT}"
+                f"No PHOENIX_COLLECTOR_ENDPOINT or PHOENIX_COLLECTOR_HTTP_ENDPOINT found, using default endpoint with http: {ARIZE_HOSTED_PHOENIX_ENDPOINT}"
            )
        otlp_auth_headers = None
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -4074,6 +4074,25 @@
        "supports_assistant_prefill": true,
        "supports_tool_choice": true
    },
    "vertex_ai/claude-3-7-sonnet-20250219": {
        "max_tokens": 8192,
        "max_input_tokens": 200000,
        "max_output_tokens": 8192,
        "input_cost_per_token": 0.000003,
        "output_cost_per_token": 0.000015,
        "cache_creation_input_token_cost": 0.00000375,
        "cache_read_input_token_cost": 0.0000003,
        "litellm_provider": "vertex_ai-anthropic_models",
        "mode": "chat",
        "supports_function_calling": true,
        "supports_vision": true,
        "tool_use_system_prompt_tokens": 159,
        "supports_assistant_prefill": true,
        "supports_prompt_caching": true,
        "supports_response_schema": true,
        "deprecation_date": "2025-06-01",
        "supports_tool_choice": true
    },
    "vertex_ai/claude-3-haiku": {
        "max_tokens": 4096, 
        "max_input_tokens": 200000,
@ -6338,6 +6357,21 @@
        "supports_vision": true,
        "supports_tool_choice": true
    },
    "anthropic.claude-3-7-sonnet-20250219-v1:0": {
        "max_tokens": 8192,
        "max_input_tokens": 200000,
        "max_output_tokens": 8192,
        "input_cost_per_token": 0.000003,
        "output_cost_per_token": 0.000015,
        "litellm_provider": "bedrock_converse",
        "mode": "chat",
        "supports_function_calling": true,
        "supports_vision": true,
        "supports_assistant_prefill": true,
        "supports_prompt_caching": true, 
        "supports_response_schema": true,
        "supports_tool_choice": true
    },
    "anthropic.claude-3-5-sonnet-20241022-v2:0": {
        "max_tokens": 8192,
        "max_input_tokens": 200000,
--- a/litellm/types/llms/openai.py
+++ b/litellm/types/llms/openai.py
@ -377,6 +377,15 @@ class ChatCompletionImageObject(TypedDict):
    type: Literal["image_url"]
    image_url: Union[str, ChatCompletionImageUrlObject]
 class ChatCompletionVideoUrlObject(TypedDict, total=False):
    url: Required[str]
    detail: str
 class ChatCompletionVideoObject(TypedDict):
    type: Literal["video_url"]
    video_url: Union[str, ChatCompletionVideoUrlObject]
 class ChatCompletionAudioObject(ChatCompletionContentPartInputAudioParam):
    pass
@ -405,6 +414,7 @@ OpenAIMessageContentListBlock = Union[
    ChatCompletionImageObject,
    ChatCompletionAudioObject,
    ChatCompletionDocumentObject,
    ChatCompletionVideoObject,
 ]
 OpenAIMessageContent = Union[
@ -480,6 +490,7 @@ ValidUserMessageContentTypes = [
    "image_url",
    "input_audio",
    "document",
    "video_url",
 ]  # used for validating user messages. Prevent users from accidentally sending anthropic messages.
 AllMessageValues = Union[
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -4074,6 +4074,25 @@
        "supports_assistant_prefill": true,
        "supports_tool_choice": true
    },
    "vertex_ai/claude-3-7-sonnet-20250219": {
        "max_tokens": 8192,
        "max_input_tokens": 200000,
        "max_output_tokens": 8192,
        "input_cost_per_token": 0.000003,
        "output_cost_per_token": 0.000015,
        "cache_creation_input_token_cost": 0.00000375,
        "cache_read_input_token_cost": 0.0000003,
        "litellm_provider": "vertex_ai-anthropic_models",
        "mode": "chat",
        "supports_function_calling": true,
        "supports_vision": true,
        "tool_use_system_prompt_tokens": 159,
        "supports_assistant_prefill": true,
        "supports_prompt_caching": true,
        "supports_response_schema": true,
        "deprecation_date": "2025-06-01",
        "supports_tool_choice": true
    },
    "vertex_ai/claude-3-haiku": {
        "max_tokens": 4096, 
        "max_input_tokens": 200000,
@ -6344,7 +6363,7 @@
        "max_output_tokens": 8192,
        "input_cost_per_token": 0.000003,
        "output_cost_per_token": 0.000015,
-        "litellm_provider": "bedrock",
+        "litellm_provider": "bedrock_converse",
        "mode": "chat",
        "supports_function_calling": true,
        "supports_vision": true,
--- a/tests/local_testing/test_arize_phoenix.py
+++ b/tests/local_testing/test_arize_phoenix.py
@ -36,8 +36,22 @@ async def test_async_otel_callback():
            {"PHOENIX_API_KEY": "test_api_key"},
            "api_key=test_api_key",
            "https://app.phoenix.arize.com/v1/traces",
-            "otlp_grpc",
+            "otlp_http",
-            id="default to grpc protocol and Arize hosted Phoenix endpoint",
+            id="default to http protocol and Arize hosted Phoenix endpoint",
        ),
        pytest.param(
            {"PHOENIX_COLLECTOR_HTTP_ENDPOINT": "", "PHOENIX_API_KEY": "test_api_key"},
            "api_key=test_api_key",
            "https://app.phoenix.arize.com/v1/traces",
            "otlp_http",
            id="empty string/unset endpoint will default to http protocol and Arize hosted Phoenix endpoint",
        ),
        pytest.param(
            {"PHOENIX_COLLECTOR_HTTP_ENDPOINT": "http://localhost:4318", "PHOENIX_COLLECTOR_ENDPOINT": "http://localhost:4317", "PHOENIX_API_KEY": "test_api_key"},
            "Authorization=Bearer test_api_key",
            "http://localhost:4318",
            "otlp_http",
            id="prioritize http if both endpoints are set",
        ),
        pytest.param(
            {"PHOENIX_COLLECTOR_ENDPOINT": "https://localhost:6006", "PHOENIX_API_KEY": "test_api_key"},
--- a/tests/local_testing/test_completion.py
+++ b/tests/local_testing/test_completion.py
@ -1756,6 +1756,50 @@ async def test_openai_compatible_custom_api_base(provider):
        assert "hello" in mock_call.call_args.kwargs["extra_body"]
@pytest.mark.parametrize(
    "provider", ["openai", "hosted_vllm",]
 )  # "vertex_ai",
@pytest.mark.asyncio
 async def test_openai_compatible_custom_api_video(provider):
    litellm.set_verbose = True
    messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "What do you see in this video?",
                },
                {
                    "type": "video_url",
                    "video_url": {"url": "https://www.youtube.com/watch?v=29_ipKNI8I0"},
                }
            ],
        }
    ]
    from openai import OpenAI
    openai_client = OpenAI(api_key="fake-key")
    with patch.object(
        openai_client.chat.completions, "create", new=MagicMock()
    ) as mock_call:
        try:
            completion(
                model="{provider}/my-vllm-model".format(provider=provider),
                messages=messages,
                response_format={"type": "json_object"},
                client=openai_client,
                api_base="my-custom-api-base",
            )
        except Exception as e:
            print(e)
        mock_call.assert_called_once()
 def test_lm_studio_completion(monkeypatch):
    monkeypatch.delenv("LM_STUDIO_API_KEY", raising=False)
    monkeypatch.delenv("OPENAI_API_KEY", raising=False)