diff --git a/litellm/__init__.py b/litellm/__init__.py index 9bb9a81cd..5eea6346c 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -357,6 +357,7 @@ vertex_text_models: List = [] vertex_code_text_models: List = [] vertex_embedding_models: List = [] vertex_anthropic_models: List = [] +vertex_llama3_models: List = [] ai21_models: List = [] nlp_cloud_models: List = [] aleph_alpha_models: List = [] @@ -828,6 +829,7 @@ from .llms.petals import PetalsConfig from .llms.vertex_httpx import VertexGeminiConfig, GoogleAIStudioGeminiConfig from .llms.vertex_ai import VertexAIConfig, VertexAITextEmbeddingConfig from .llms.vertex_ai_anthropic import VertexAIAnthropicConfig +from .llms.vertex_ai_llama import VertexAILlama3Config from .llms.sagemaker import SagemakerConfig from .llms.ollama import OllamaConfig from .llms.ollama_chat import OllamaChatConfig diff --git a/litellm/llms/vertex_ai_llama.py b/litellm/llms/vertex_ai_llama.py index 4b5407faa..f33c127f7 100644 --- a/litellm/llms/vertex_ai_llama.py +++ b/litellm/llms/vertex_ai_llama.py @@ -53,39 +53,20 @@ class VertexAIError(Exception): class VertexAILlama3Config: """ - Reference:https://docs.anthropic.com/claude/reference/messages_post + Reference:https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/llama#streaming - Note that the API for Claude on Vertex differs from the Anthropic API documentation in the following ways: - - - `model` is not a valid parameter. The model is instead specified in the Google Cloud endpoint URL. - - `anthropic_version` is a required parameter and must be set to "vertex-2023-10-16". - - The class `VertexAIAnthropicConfig` provides configuration for the VertexAI's Anthropic API interface. Below are the parameters: + The class `VertexAILlama3Config` provides configuration for the VertexAI's Llama API interface. Below are the parameters: - `max_tokens` Required (integer) max tokens, - - `anthropic_version` Required (string) version of anthropic for bedrock - e.g. "bedrock-2023-05-31" - - `system` Optional (string) the system prompt, conversion from openai format to this is handled in factory.py - - `temperature` Optional (float) The amount of randomness injected into the response - - `top_p` Optional (float) Use nucleus sampling. - - `top_k` Optional (int) Only sample from the top K options for each subsequent token - - `stop_sequences` Optional (List[str]) Custom text sequences that cause the model to stop generating Note: Please make sure to modify the default parameters as required for your use case. """ - max_tokens: Optional[int] = ( - 4096 # anthropic max - setting this doesn't impact response, but is required by anthropic. - ) - system: Optional[str] = None - temperature: Optional[float] = None - top_p: Optional[float] = None - top_k: Optional[int] = None - stop_sequences: Optional[List[str]] = None + max_tokens: Optional[int] = None def __init__( self, max_tokens: Optional[int] = None, - anthropic_version: Optional[str] = None, ) -> None: locals_ = locals() for key, value in locals_.items(): @@ -115,61 +96,13 @@ class VertexAILlama3Config: def get_supported_openai_params(self): return [ "max_tokens", - "tools", - "tool_choice", "stream", - "stop", - "temperature", - "top_p", - "response_format", ] def map_openai_params(self, non_default_params: dict, optional_params: dict): for param, value in non_default_params.items(): if param == "max_tokens": optional_params["max_tokens"] = value - if param == "tools": - optional_params["tools"] = value - if param == "tool_choice": - _tool_choice: Optional[AnthropicMessagesToolChoice] = None - if value == "auto": - _tool_choice = {"type": "auto"} - elif value == "required": - _tool_choice = {"type": "any"} - elif isinstance(value, dict): - _tool_choice = {"type": "tool", "name": value["function"]["name"]} - - if _tool_choice is not None: - optional_params["tool_choice"] = _tool_choice - if param == "stream": - optional_params["stream"] = value - if param == "stop": - optional_params["stop_sequences"] = value - if param == "temperature": - optional_params["temperature"] = value - if param == "top_p": - optional_params["top_p"] = value - if param == "response_format" and "response_schema" in value: - """ - When using tools in this way: - https://docs.anthropic.com/en/docs/build-with-claude/tool-use#json-mode - - You usually want to provide a single tool - - You should set tool_choice (see Forcing tool use) to instruct the model to explicitly use that tool - - Remember that the model will pass the input to the tool, so the name of the tool and description should be from the model’s perspective. - """ - _tool_choice = None - _tool_choice = {"name": "json_tool_call", "type": "tool"} - - _tool = AnthropicMessagesTool( - name="json_tool_call", - input_schema={ - "type": "object", - "properties": {"values": value["response_schema"]}, # type: ignore - }, - ) - - optional_params["tools"] = [_tool] - optional_params["tool_choice"] = _tool_choice - optional_params["json_mode"] = True return optional_params diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index f86ea8bd7..e9e599945 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -1948,6 +1948,16 @@ "supports_function_calling": true, "supports_vision": true }, + "vertex_ai/meta/llama3-405b-instruct-maas": { + "max_tokens": 32000, + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "vertex_ai-llama_models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models" + }, "vertex_ai/imagegeneration@006": { "cost_per_image": 0.020, "litellm_provider": "vertex_ai-image-models", diff --git a/litellm/tests/test_optional_params.py b/litellm/tests/test_optional_params.py index bbfc88710..b8011960e 100644 --- a/litellm/tests/test_optional_params.py +++ b/litellm/tests/test_optional_params.py @@ -128,6 +128,19 @@ def test_azure_ai_mistral_optional_params(): assert "user" not in optional_params +def test_vertex_ai_llama_3_optional_params(): + litellm.vertex_llama3_models = ["meta/llama3-405b-instruct-maas"] + litellm.drop_params = True + optional_params = get_optional_params( + model="meta/llama3-405b-instruct-maas", + user="John", + custom_llm_provider="vertex_ai", + max_tokens=10, + temperature=0.2, + ) + assert "user" not in optional_params + + def test_azure_gpt_optional_params_gpt_vision(): # for OpenAI, Azure all extra params need to get passed as extra_body to OpenAI python. We assert we actually set extra_body here optional_params = litellm.utils.get_optional_params( diff --git a/litellm/utils.py b/litellm/utils.py index 8baced4c5..035c1c72f 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -3088,6 +3088,15 @@ def get_optional_params( non_default_params=non_default_params, optional_params=optional_params, ) + elif custom_llm_provider == "vertex_ai" and model in litellm.vertex_llama3_models: + supported_params = get_supported_openai_params( + model=model, custom_llm_provider=custom_llm_provider + ) + _check_valid_arg(supported_params=supported_params) + optional_params = litellm.VertexAILlama3Config().map_openai_params( + non_default_params=non_default_params, + optional_params=optional_params, + ) elif custom_llm_provider == "sagemaker": ## check if unsupported param passed in supported_params = get_supported_openai_params( @@ -4189,6 +4198,9 @@ def get_supported_openai_params( return litellm.GoogleAIStudioGeminiConfig().get_supported_openai_params() elif custom_llm_provider == "vertex_ai": if request_type == "chat_completion": + if model.startswith("meta/"): + return litellm.VertexAILlama3Config().get_supported_openai_params() + return litellm.VertexAIConfig().get_supported_openai_params() elif request_type == "embeddings": return litellm.VertexAITextEmbeddingConfig().get_supported_openai_params() diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index f86ea8bd7..e9e599945 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -1948,6 +1948,16 @@ "supports_function_calling": true, "supports_vision": true }, + "vertex_ai/meta/llama3-405b-instruct-maas": { + "max_tokens": 32000, + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "vertex_ai-llama_models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models" + }, "vertex_ai/imagegeneration@006": { "cost_per_image": 0.020, "litellm_provider": "vertex_ai-image-models",