Litellm contributor prs 02 24 2025 (#8775)

* Adding VertexAI Claude 3.7 Sonnet (#8774)

Co-authored-by: Emerson Gomes <emerson.gomes@thalesgroup.com>

* build(model_prices_and_context_window.json): add anthropic 3-7 models on vertex ai and bedrock

* Support video_url (#8743)

* Support video_url

Support VLMs that works with video.
Example implemenation in vllm: https://github.com/vllm-project/vllm/pull/10020

* llms openai.py: Add ChatCompletionVideoObject

Add data structures to support `video_url` in chat completion

* test test_completion.py: add test for video_url

* Arize Phoenix - ensure correct endpoint/protocol are used; and default to phoenix cloud (#8750)

* minor fixes to default to http and to ensure that the correct endpoint is used

* Update test_arize_phoenix.py

* prioritize http over grpc

---------

Co-authored-by: Emerson Gomes <emerson.gomes@gmail.com>
Co-authored-by: Emerson Gomes <emerson.gomes@thalesgroup.com>
Co-authored-by: Pang Wu <104795337+pang-wu@users.noreply.github.com>
Co-authored-by: Nate Mar <67926244+nate-mar@users.noreply.github.com>
This commit is contained in:
Krish Dholakia 2025-02-24 18:55:48 -08:00 committed by GitHub
parent 46cbaa8c0c
commit 9914c166b7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 135 additions and 14 deletions

View file

@ -338,7 +338,6 @@ config_path = None
vertex_ai_safety_settings: Optional[dict] = None
BEDROCK_CONVERSE_MODELS = [
"anthropic.claude-3-5-haiku-20241022-v1:0",
"anthropic.claude-3-7-sonnet-20250219-v1:0",
"anthropic.claude-3-5-sonnet-20241022-v2:0",
"anthropic.claude-3-5-sonnet-20240620-v1:0",
"anthropic.claude-3-opus-20240229-v1:0",

View file

@ -34,24 +34,24 @@ class ArizePhoenixLogger:
Returns:
ArizePhoenixConfig: A Pydantic model containing Arize Phoenix configuration.
"""
api_key = os.environ.get("PHOENIX_API_KEY")
grpc_endpoint = os.environ.get("PHOENIX_COLLECTOR_ENDPOINT")
http_endpoint = os.environ.get("PHOENIX_COLLECTOR_HTTP_ENDPOINT")
api_key = os.environ.get("PHOENIX_API_KEY", None)
grpc_endpoint = os.environ.get("PHOENIX_COLLECTOR_ENDPOINT", None)
http_endpoint = os.environ.get("PHOENIX_COLLECTOR_HTTP_ENDPOINT", None)
endpoint = None
protocol: Protocol = "otlp_grpc"
protocol: Protocol = "otlp_http"
if grpc_endpoint is not None:
endpoint = grpc_endpoint
protocol = "otlp_grpc"
elif http_endpoint is not None:
if http_endpoint:
endpoint = http_endpoint
protocol = "otlp_http"
elif grpc_endpoint:
endpoint = grpc_endpoint
protocol = "otlp_grpc"
else:
endpoint = ARIZE_HOSTED_PHOENIX_ENDPOINT
protocol = "otlp_grpc"
protocol = "otlp_http"
verbose_logger.debug(
f"No PHOENIX_COLLECTOR_ENDPOINT or PHOENIX_COLLECTOR_HTTP_ENDPOINT found, using default endpoint: {ARIZE_HOSTED_PHOENIX_ENDPOINT}"
f"No PHOENIX_COLLECTOR_ENDPOINT or PHOENIX_COLLECTOR_HTTP_ENDPOINT found, using default endpoint with http: {ARIZE_HOSTED_PHOENIX_ENDPOINT}"
)
otlp_auth_headers = None

View file

@ -4074,6 +4074,25 @@
"supports_assistant_prefill": true,
"supports_tool_choice": true
},
"vertex_ai/claude-3-7-sonnet-20250219": {
"max_tokens": 8192,
"max_input_tokens": 200000,
"max_output_tokens": 8192,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000015,
"cache_creation_input_token_cost": 0.00000375,
"cache_read_input_token_cost": 0.0000003,
"litellm_provider": "vertex_ai-anthropic_models",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"tool_use_system_prompt_tokens": 159,
"supports_assistant_prefill": true,
"supports_prompt_caching": true,
"supports_response_schema": true,
"deprecation_date": "2025-06-01",
"supports_tool_choice": true
},
"vertex_ai/claude-3-haiku": {
"max_tokens": 4096,
"max_input_tokens": 200000,
@ -6338,6 +6357,21 @@
"supports_vision": true,
"supports_tool_choice": true
},
"anthropic.claude-3-7-sonnet-20250219-v1:0": {
"max_tokens": 8192,
"max_input_tokens": 200000,
"max_output_tokens": 8192,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000015,
"litellm_provider": "bedrock_converse",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"supports_assistant_prefill": true,
"supports_prompt_caching": true,
"supports_response_schema": true,
"supports_tool_choice": true
},
"anthropic.claude-3-5-sonnet-20241022-v2:0": {
"max_tokens": 8192,
"max_input_tokens": 200000,

View file

@ -377,6 +377,15 @@ class ChatCompletionImageObject(TypedDict):
type: Literal["image_url"]
image_url: Union[str, ChatCompletionImageUrlObject]
class ChatCompletionVideoUrlObject(TypedDict, total=False):
url: Required[str]
detail: str
class ChatCompletionVideoObject(TypedDict):
type: Literal["video_url"]
video_url: Union[str, ChatCompletionVideoUrlObject]
class ChatCompletionAudioObject(ChatCompletionContentPartInputAudioParam):
pass
@ -405,6 +414,7 @@ OpenAIMessageContentListBlock = Union[
ChatCompletionImageObject,
ChatCompletionAudioObject,
ChatCompletionDocumentObject,
ChatCompletionVideoObject,
]
OpenAIMessageContent = Union[
@ -480,6 +490,7 @@ ValidUserMessageContentTypes = [
"image_url",
"input_audio",
"document",
"video_url",
] # used for validating user messages. Prevent users from accidentally sending anthropic messages.
AllMessageValues = Union[

View file

@ -4074,6 +4074,25 @@
"supports_assistant_prefill": true,
"supports_tool_choice": true
},
"vertex_ai/claude-3-7-sonnet-20250219": {
"max_tokens": 8192,
"max_input_tokens": 200000,
"max_output_tokens": 8192,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000015,
"cache_creation_input_token_cost": 0.00000375,
"cache_read_input_token_cost": 0.0000003,
"litellm_provider": "vertex_ai-anthropic_models",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"tool_use_system_prompt_tokens": 159,
"supports_assistant_prefill": true,
"supports_prompt_caching": true,
"supports_response_schema": true,
"deprecation_date": "2025-06-01",
"supports_tool_choice": true
},
"vertex_ai/claude-3-haiku": {
"max_tokens": 4096,
"max_input_tokens": 200000,
@ -6344,7 +6363,7 @@
"max_output_tokens": 8192,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000015,
"litellm_provider": "bedrock",
"litellm_provider": "bedrock_converse",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,

View file

@ -36,8 +36,22 @@ async def test_async_otel_callback():
{"PHOENIX_API_KEY": "test_api_key"},
"api_key=test_api_key",
"https://app.phoenix.arize.com/v1/traces",
"otlp_grpc",
id="default to grpc protocol and Arize hosted Phoenix endpoint",
"otlp_http",
id="default to http protocol and Arize hosted Phoenix endpoint",
),
pytest.param(
{"PHOENIX_COLLECTOR_HTTP_ENDPOINT": "", "PHOENIX_API_KEY": "test_api_key"},
"api_key=test_api_key",
"https://app.phoenix.arize.com/v1/traces",
"otlp_http",
id="empty string/unset endpoint will default to http protocol and Arize hosted Phoenix endpoint",
),
pytest.param(
{"PHOENIX_COLLECTOR_HTTP_ENDPOINT": "http://localhost:4318", "PHOENIX_COLLECTOR_ENDPOINT": "http://localhost:4317", "PHOENIX_API_KEY": "test_api_key"},
"Authorization=Bearer test_api_key",
"http://localhost:4318",
"otlp_http",
id="prioritize http if both endpoints are set",
),
pytest.param(
{"PHOENIX_COLLECTOR_ENDPOINT": "https://localhost:6006", "PHOENIX_API_KEY": "test_api_key"},

View file

@ -1756,6 +1756,50 @@ async def test_openai_compatible_custom_api_base(provider):
assert "hello" in mock_call.call_args.kwargs["extra_body"]
@pytest.mark.parametrize(
"provider", ["openai", "hosted_vllm",]
) # "vertex_ai",
@pytest.mark.asyncio
async def test_openai_compatible_custom_api_video(provider):
litellm.set_verbose = True
messages = [
{
"role": "user",
"content": [
{
"type": "text",
"text": "What do you see in this video?",
},
{
"type": "video_url",
"video_url": {"url": "https://www.youtube.com/watch?v=29_ipKNI8I0"},
}
],
}
]
from openai import OpenAI
openai_client = OpenAI(api_key="fake-key")
with patch.object(
openai_client.chat.completions, "create", new=MagicMock()
) as mock_call:
try:
completion(
model="{provider}/my-vllm-model".format(provider=provider),
messages=messages,
response_format={"type": "json_object"},
client=openai_client,
api_base="my-custom-api-base",
)
except Exception as e:
print(e)
mock_call.assert_called_once()
def test_lm_studio_completion(monkeypatch):
monkeypatch.delenv("LM_STUDIO_API_KEY", raising=False)
monkeypatch.delenv("OPENAI_API_KEY", raising=False)