Merge pull request #5391 from BerriAI/litellm_add_ai21_support

[Feat] Add Vertex AI21 support
2024-08-27 15:06:26 -07:00 · 2024-08-27 15:06:26 -07:00 · 6ab8cbc105
commit 6ab8cbc105
parent d2239e3c52 33a3a01949
10 changed files with 390 additions and 64 deletions
--- a/docs/my-website/docs/providers/vertex.md
+++ b/docs/my-website/docs/providers/vertex.md
@ -983,6 +983,85 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
 </Tabs>


+## AI21 Models
+ 
+| Model Name       | Function Call                        |
+|------------------|--------------------------------------|
+| jamba-1.5-mini@001   | `completion(model='vertex_ai/jamba-1.5-mini@001', messages)` |
+| jamba-1.5-large@001   | `completion(model='vertex_ai/jamba-1.5-large@001', messages)` |
+
+### Usage
+
+<Tabs>
+<TabItem value="sdk" label="SDK">
+
+```python
+from litellm import completion
+import os
+
+os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = ""
+
+model = "meta/jamba-1.5-mini@001"
+
+vertex_ai_project = "your-vertex-project" # can also set this as os.environ["VERTEXAI_PROJECT"]
+vertex_ai_location = "your-vertex-location" # can also set this as os.environ["VERTEXAI_LOCATION"]
+
+response = completion(
+    model="vertex_ai/" + model,
+    messages=[{"role": "user", "content": "hi"}],
+    vertex_ai_project=vertex_ai_project,
+    vertex_ai_location=vertex_ai_location,
+)
+print("\nModel Response", response)
+```
+</TabItem>
+<TabItem value="proxy" label="Proxy">
+
+**1. Add to config**
+
+```yaml
+model_list:
+    - model_name: jamba-1.5-mini
+      litellm_params:
+        model: vertex_ai/jamba-1.5-mini@001
+        vertex_ai_project: "my-test-project"
+        vertex_ai_location: "us-east-1"
+    - model_name: jamba-1.5-large
+      litellm_params:
+        model: vertex_ai/jamba-1.5-large@001
+        vertex_ai_project: "my-test-project"
+        vertex_ai_location: "us-west-1"
+```
+
+**2. Start proxy**
+
+```bash
+litellm --config /path/to/config.yaml
+
+# RUNNING at http://0.0.0.0:4000
+```
+
+**3. Test it!**
+
+```bash
+curl --location 'http://0.0.0.0:4000/chat/completions' \
+      --header 'Authorization: Bearer sk-1234' \
+      --header 'Content-Type: application/json' \
+      --data '{
+            "model": "jamba-1.5-large",
+            "messages": [
+                {
+                "role": "user",
+                "content": "what llm are you"
+                }
+            ],
+        }'
+```
+
+</TabItem>
+</Tabs>
+
+

 ### Usage - Codestral FIM

--- a/litellm/init.py
+++ b/litellm/init.py
@ -859,9 +859,12 @@ from .llms.vertex_ai_and_google_ai_studio.vertex_ai_non_gemini import (
 from .llms.vertex_ai_and_google_ai_studio.vertex_ai_anthropic import (
    VertexAIAnthropicConfig,
 )
-from .llms.vertex_ai_and_google_ai_studio.vertex_ai_partner_models import (
+from .llms.vertex_ai_and_google_ai_studio.vertex_ai_partner_models.llama3.transformation import (
    VertexAILlama3Config,
 )
+from .llms.vertex_ai_and_google_ai_studio.vertex_ai_partner_models.ai21.transformation import (
+    VertexAIAi21Config,
+)
 from .llms.sagemaker.sagemaker import SagemakerConfig
 from .llms.ollama import OllamaConfig
 from .llms.ollama_chat import OllamaChatConfig
--- a/litellm/litellm_core_utils/llm_cost_calc/google.py
+++ b/litellm/litellm_core_utils/llm_cost_calc/google.py
@ -48,6 +48,7 @@ def cost_router(
        "claude" in model
        or "llama" in model
        or "mistral" in model
+        or "jamba" in model
        or "codestral" in model
    ):
        return "cost_per_token"
--- a/litellm/llms/databricks.py
+++ b/litellm/llms/databricks.py
@ -363,6 +363,12 @@ class DatabricksChatCompletion(BaseLLM):
        except Exception as e:
            raise DatabricksError(status_code=500, message=str(e))

+        logging_obj.post_call(
+            input=messages,
+            api_key="",
+            original_response=response_json,
+            additional_args={"complete_input_dict": data},
+        )
        response = ModelResponse(**response_json)

        if base_model is not None:
--- a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/ai21/transformation.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/ai21/transformation.py
@ -0,0 +1,53 @@
+import types
+from typing import Callable, Literal, Optional, Union
+
+import litellm
+
+
+class VertexAIAi21Config:
+    """
+    Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/ai21
+
+    The class `VertexAIAi21Config` provides configuration for the VertexAI's AI21 API interface
+
+    -> Supports all OpenAI parameters
+    """
+
+    def __init__(
+        self,
+        max_tokens: Optional[int] = None,
+    ) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+
+    def get_supported_openai_params(self):
+        return litellm.OpenAIConfig().get_supported_openai_params(model="gpt-3.5-turbo")
+
+    def map_openai_params(
+        self, non_default_params: dict, optional_params: dict, model: str
+    ):
+        return litellm.OpenAIConfig().map_openai_params(
+            non_default_params=non_default_params,
+            optional_params=optional_params,
+            model=model,
+        )
--- a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/llama3/transformation.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/llama3/transformation.py
@ -0,0 +1,59 @@
+import types
+from typing import Callable, Literal, Optional, Union
+
+import litellm
+
+
+class VertexAILlama3Config:
+    """
+    Reference:https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/llama#streaming
+
+    The class `VertexAILlama3Config` provides configuration for the VertexAI's Llama API interface. Below are the parameters:
+
+    - `max_tokens` Required (integer) max tokens,
+
+    Note: Please make sure to modify the default parameters as required for your use case.
+    """
+
+    max_tokens: Optional[int] = None
+
+    def __init__(
+        self,
+        max_tokens: Optional[int] = None,
+    ) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key == "max_tokens" and value is None:
+                value = self.max_tokens
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+
+    def get_supported_openai_params(self):
+        return litellm.OpenAIConfig().get_supported_openai_params(model="gpt-3.5-turbo")
+
+    def map_openai_params(
+        self, non_default_params: dict, optional_params: dict, model: str
+    ):
+        return litellm.OpenAIConfig().map_openai_params(
+            non_default_params=non_default_params,
+            optional_params=optional_params,
+            model=model,
+        )
--- a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/main.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/main.py
@ -1,6 +1,7 @@
 # What is this?
-## Handler for calling llama 3.1 API on Vertex AI
+## API Handler for calling Vertex AI Partner Models
 import types
+from enum import Enum
 from typing import Callable, Literal, Optional, Union

 import httpx  # type: ignore
@ -8,7 +9,13 @@ import httpx  # type: ignore
 import litellm
 from litellm.utils import ModelResponse

-from ..base import BaseLLM
+from ...base import BaseLLM
+
+
+class VertexPartnerProvider(str, Enum):
+    mistralai = "mistralai"
+    llama = "llama"
+    ai21 = "ai21"


 class VertexAIError(Exception):
@ -24,61 +31,6 @@ class VertexAIError(Exception):
        )  # Call the base class constructor with the parameters it needs


-class VertexAILlama3Config:
-    """
-    Reference:https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/llama#streaming
-
-    The class `VertexAILlama3Config` provides configuration for the VertexAI's Llama API interface. Below are the parameters:
-
-    - `max_tokens` Required (integer) max tokens,
-
-    Note: Please make sure to modify the default parameters as required for your use case.
-    """
-
-    max_tokens: Optional[int] = None
-
-    def __init__(
-        self,
-        max_tokens: Optional[int] = None,
-    ) -> None:
-        locals_ = locals()
-        for key, value in locals_.items():
-            if key == "max_tokens" and value is None:
-                value = self.max_tokens
-            if key != "self" and value is not None:
-                setattr(self.__class__, key, value)
-
-    @classmethod
-    def get_config(cls):
-        return {
-            k: v
-            for k, v in cls.__dict__.items()
-            if not k.startswith("__")
-            and not isinstance(
-                v,
-                (
-                    types.FunctionType,
-                    types.BuiltinFunctionType,
-                    classmethod,
-                    staticmethod,
-                ),
-            )
-            and v is not None
-        }
-
-    def get_supported_openai_params(self):
-        return litellm.OpenAIConfig().get_supported_openai_params(model="gpt-3.5-turbo")
-
-    def map_openai_params(
-        self, non_default_params: dict, optional_params: dict, model: str
-    ):
-        return litellm.OpenAIConfig().map_openai_params(
-            non_default_params=non_default_params,
-            optional_params=optional_params,
-            model=model,
-        )
-
-
 class VertexAIPartnerModels(BaseLLM):
    def __init__(self) -> None:
        pass
@ -87,17 +39,22 @@ class VertexAIPartnerModels(BaseLLM):
        self,
        vertex_location: str,
        vertex_project: str,
-        partner: Literal["llama", "mistralai"],
+        partner: VertexPartnerProvider,
        stream: Optional[bool],
        model: str,
    ) -> str:
-        if partner == "llama":
+        if partner == VertexPartnerProvider.llama:
            return f"https://{vertex_location}-aiplatform.googleapis.com/v1beta1/projects/{vertex_project}/locations/{vertex_location}/endpoints/openapi"
-        elif partner == "mistralai":
+        elif partner == VertexPartnerProvider.mistralai:
            if stream:
                return f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/mistralai/models/{model}:streamRawPredict"
            else:
                return f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/mistralai/models/{model}:rawPredict"
+        elif partner == VertexPartnerProvider.ai21:
+            if stream:
+                return f"https://{vertex_location}-aiplatform.googleapis.com/v1beta1/projects/{vertex_project}/locations/{vertex_location}/publishers/ai21/models/{model}:streamRawPredict"
+            else:
+                return f"https://{vertex_location}-aiplatform.googleapis.com/v1beta1/projects/{vertex_project}/locations/{vertex_location}/publishers/ai21/models/{model}:rawPredict"

    def completion(
        self,
@ -160,9 +117,12 @@ class VertexAIPartnerModels(BaseLLM):
            optional_params["stream"] = stream

            if "llama" in model:
-                partner = "llama"
+                partner = VertexPartnerProvider.llama
            elif "mistral" in model or "codestral" in model:
-                partner = "mistralai"
+                partner = VertexPartnerProvider.mistralai
+                optional_params["custom_endpoint"] = True
+            elif "jamba" in model:
+                partner = VertexPartnerProvider.ai21
                optional_params["custom_endpoint"] = True

            api_base = self.create_vertex_url(
--- a/litellm/main.py
+++ b/litellm/main.py
@ -126,7 +126,7 @@ from .llms.vertex_ai_and_google_ai_studio import (
    vertex_ai_anthropic,
    vertex_ai_non_gemini,
 )
-from .llms.vertex_ai_and_google_ai_studio.vertex_ai_partner_models import (
+from .llms.vertex_ai_and_google_ai_studio.vertex_ai_partner_models.main import (
    VertexAIPartnerModels,
 )
 from .llms.vertex_ai_and_google_ai_studio.vertex_and_google_ai_studio_gemini import (
@ -2080,6 +2080,7 @@ def completion(
                model.startswith("meta/")
                or model.startswith("mistral")
                or model.startswith("codestral")
+                or model.startswith("jamba")
            ):
                model_response = vertex_partner_models_chat_completion.completion(
                    model=model,
--- a/litellm/tests/test_amazing_vertex_completion.py
+++ b/litellm/tests/test_amazing_vertex_completion.py
@ -2356,3 +2356,157 @@ async def test_gemini_context_caching_anthropic_format():

        check_cache_mock.assert_called_once()
        assert mock_client.call_count == 3
+
+
+@pytest.mark.asyncio
+async def test_partner_models_httpx_ai21():
+    litellm.set_verbose = True
+    model = "vertex_ai/jamba-1.5-mini@001"
+
+    messages = [
+        {
+            "role": "system",
+            "content": "Your name is Litellm Bot, you are a helpful assistant",
+        },
+        {
+            "role": "user",
+            "content": "Hello, can you tell me the weather in San Francisco?",
+        },
+    ]
+
+    tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_weather",
+                "description": "Get the current weather in a given location",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "The city and state, e.g. San Francisco, CA",
+                        }
+                    },
+                    "required": ["location"],
+                },
+            },
+        }
+    ]
+
+    data = {
+        "model": model,
+        "messages": messages,
+        "tools": tools,
+        "top_p": 0.5,
+    }
+
+    mock_response = AsyncMock()
+
+    def return_val():
+        return {
+            "id": "chat-3d11cf95eb224966937b216d9494fe73",
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": " Sure, let me check that for you.",
+                        "tool_calls": [
+                            {
+                                "id": "b5cef16b-5946-4937-b9d5-beeaea871e77",
+                                "type": "function",
+                                "function": {
+                                    "name": "get_weather",
+                                    "arguments": '{"location": "San Francisco"}',
+                                },
+                            }
+                        ],
+                    },
+                    "finish_reason": "stop",
+                }
+            ],
+            "usage": {
+                "prompt_tokens": 158,
+                "completion_tokens": 36,
+                "total_tokens": 194,
+            },
+            "meta": {"requestDurationMillis": 501},
+            "model": "jamba-1.5",
+        }
+
+    mock_response.json = return_val
+    mock_response.status_code = 200
+
+    with patch(
+        "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
+        return_value=mock_response,
+    ) as mock_post:
+        response = await litellm.acompletion(**data)
+
+        # Assert
+        mock_post.assert_called_once()
+        url, kwargs = mock_post.call_args
+        print("url = ", url)
+        print("call args = ", kwargs)
+
+        print(kwargs["data"])
+
+        assert (
+            url[0]
+            == "https://us-central1-aiplatform.googleapis.com/v1beta1/projects/adroit-crow-413218/locations/us-central1/publishers/ai21/models/jamba-1.5-mini@001:rawPredict"
+        )
+
+        # json loads kwargs
+        kwargs["data"] = json.loads(kwargs["data"])
+
+        assert kwargs["data"] == {
+            "model": "jamba-1.5-mini",
+            "messages": [
+                {
+                    "role": "system",
+                    "content": "Your name is Litellm Bot, you are a helpful assistant",
+                },
+                {
+                    "role": "user",
+                    "content": "Hello, can you tell me the weather in San Francisco?",
+                },
+            ],
+            "top_p": 0.5,
+            "tools": [
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "get_weather",
+                        "description": "Get the current weather in a given location",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "location": {
+                                    "type": "string",
+                                    "description": "The city and state, e.g. San Francisco, CA",
+                                }
+                            },
+                            "required": ["location"],
+                        },
+                    },
+                }
+            ],
+            "stream": False,
+        }
+
+        assert response.id == "chat-3d11cf95eb224966937b216d9494fe73"
+        assert len(response.choices) == 1
+        assert (
+            response.choices[0].message.content == " Sure, let me check that for you."
+        )
+        assert response.choices[0].message.tool_calls[0].function.name == "get_weather"
+        assert (
+            response.choices[0].message.tool_calls[0].function.arguments
+            == '{"location": "San Francisco"}'
+        )
+        assert response.usage.prompt_tokens == 158
+        assert response.usage.completion_tokens == 36
+        assert response.usage.total_tokens == 194
+
+        print(f"response: {response}")
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -3267,6 +3267,16 @@ def get_optional_params(
                non_default_params=non_default_params,
                optional_params=optional_params,
            )
+    elif custom_llm_provider == "vertex_ai" and model in litellm.ai21_models:
+        supported_params = get_supported_openai_params(
+            model=model, custom_llm_provider=custom_llm_provider
+        )
+        _check_valid_arg(supported_params=supported_params)
+        optional_params = litellm.VertexAIAi21Config().map_openai_params(
+            non_default_params=non_default_params,
+            optional_params=optional_params,
+            model=model,
+        )
    elif custom_llm_provider == "sagemaker":
        ## check if unsupported param passed in
        supported_params = get_supported_openai_params(