From 7c2ddba6c6e0e6dc01b525868ad792413a80a975 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 14 Sep 2024 17:23:04 -0700 Subject: [PATCH] sambanova support (#5547) (#5703) * add sambanova support * sambanova support * updated api endpoint for sambanova --------- Co-authored-by: Venu Anuganti Co-authored-by: Venu Anuganti --- docs/my-website/docs/providers/sambanova.md | 143 ++++++++++++++++++ docs/my-website/sidebars.js | 1 + litellm/__init__.py | 4 + .../get_llm_provider_logic.py | 10 ++ litellm/llms/sambanova/chat.py | 91 +++++++++++ litellm/main.py | 4 + ...odel_prices_and_context_window_backup.json | 30 ++++ 7 files changed, 283 insertions(+) create mode 100644 docs/my-website/docs/providers/sambanova.md create mode 100644 litellm/llms/sambanova/chat.py diff --git a/docs/my-website/docs/providers/sambanova.md b/docs/my-website/docs/providers/sambanova.md new file mode 100644 index 000000000..9fa6ce8b6 --- /dev/null +++ b/docs/my-website/docs/providers/sambanova.md @@ -0,0 +1,143 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Sambanova +https://community.sambanova.ai/t/create-chat-completion-api/ + +:::tip + +**We support ALL Sambanova models, just set `model=sambanova/` as a prefix when sending litellm requests. For the complete supported model list, visit https://sambanova.ai/technology/models ** + +::: + +## API Key +```python +# env variable +os.environ['SAMBANOVA_API_KEY'] +``` + +## Sample Usage +```python +from litellm import completion +import os + +os.environ['SAMBANOVA_API_KEY'] = "" +response = completion( + model="sambanova/Meta-Llama-3.1-8B-Instruct", + messages=[ + { + "role": "user", + "content": "What do you know about sambanova.ai", + } + ], + max_tokens=10, + response_format={ "type": "json_object" }, + seed=123, + stop=["\n\n"], + temperature=0.2, + top_p=0.9, + tool_choice="auto", + tools=[], + user="user", +) +print(response) +``` + +## Sample Usage - Streaming +```python +from litellm import completion +import os + +os.environ['SAMBANOVA_API_KEY'] = "" +response = completion( + model="sambanova/Meta-Llama-3.1-8B-Instruct", + messages=[ + { + "role": "user", + "content": "What do you know about sambanova.ai", + } + ], + stream=True, + max_tokens=10, + response_format={ "type": "json_object" }, + seed=123, + stop=["\n\n"], + temperature=0.2, + top_p=0.9, + tool_choice="auto", + tools=[], + user="user", +) + +for chunk in response: + print(chunk) +``` + + +## Usage with LiteLLM Proxy Server + +Here's how to call a Sambanova model with the LiteLLM Proxy Server + +1. Modify the config.yaml + + ```yaml + model_list: + - model_name: my-model + litellm_params: + model: sambanova/ # add sambanova/ prefix to route as Sambanova provider + api_key: api-key # api key to send your model + ``` + + +2. Start the proxy + + ```bash + $ litellm --config /path/to/config.yaml + ``` + +3. Send Request to LiteLLM Proxy Server + + + + + + ```python + import openai + client = openai.OpenAI( + api_key="sk-1234", # pass litellm proxy key, if you're using virtual keys + base_url="http://0.0.0.0:4000" # litellm-proxy-base url + ) + + response = client.chat.completions.create( + model="my-model", + messages = [ + { + "role": "user", + "content": "what llm are you" + } + ], + ) + + print(response) + ``` + + + + + ```shell + curl --location 'http://0.0.0.0:4000/chat/completions' \ + --header 'Authorization: Bearer sk-1234' \ + --header 'Content-Type: application/json' \ + --data '{ + "model": "my-model", + "messages": [ + { + "role": "user", + "content": "what llm are you" + } + ], + }' + ``` + + + diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 52a380147..7e2a2050b 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -174,6 +174,7 @@ const sidebars = { "providers/baseten", "providers/openrouter", "providers/palm", + "providers/sambanova", // "providers/custom_openai_proxy", "providers/custom_llm_server", "providers/petals", diff --git a/litellm/__init__.py b/litellm/__init__.py index 047927dd9..6439af29e 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -463,6 +463,7 @@ openai_compatible_endpoints: List = [ "api.together.xyz/v1", "app.empower.dev/api/v1", "inference.friendli.ai/v1", + "api.sambanova.ai/v1", ] # this is maintained for Exception Mapping @@ -472,6 +473,7 @@ openai_compatible_providers: List = [ "groq", "nvidia_nim", "cerebras", + "sambanova", "ai21_chat", "volcengine", "codestral", @@ -724,6 +726,7 @@ class LlmProviders(str, Enum): CODESTRAL = "codestral" TEXT_COMPLETION_CODESTRAL = "text-completion-codestral" DEEPSEEK = "deepseek" + SAMBANOVA = "sambanova" MARITALK = "maritalk" VOYAGE = "voyage" CLOUDFLARE = "cloudflare" @@ -953,6 +956,7 @@ from .llms.OpenAI.gpt_transformation import ( ) from .llms.nvidia_nim import NvidiaNimConfig from .llms.cerebras.chat import CerebrasConfig +from .llms.sambanova.chat import SambanovaConfig from .llms.AI21.chat import AI21ChatConfig from .llms.fireworks_ai import FireworksAIConfig from .llms.volcengine import VolcEngineConfig diff --git a/litellm/litellm_core_utils/get_llm_provider_logic.py b/litellm/litellm_core_utils/get_llm_provider_logic.py index 5afa96649..eaa8f730d 100644 --- a/litellm/litellm_core_utils/get_llm_provider_logic.py +++ b/litellm/litellm_core_utils/get_llm_provider_logic.py @@ -173,6 +173,13 @@ def get_llm_provider( or "https://api.cerebras.ai/v1" ) # type: ignore dynamic_api_key = api_key or get_secret("CEREBRAS_API_KEY") + elif custom_llm_provider == "sambanova": + api_base = ( + api_base + or get_secret("SAMBANOVA_API_BASE") + or "https://api.sambanova.ai/v1" + ) # type: ignore + dynamic_api_key = api_key or get_secret("SAMBANOVA_API_KEY") elif (custom_llm_provider == "ai21_chat") or ( custom_llm_provider == "ai21" and model in litellm.ai21_chat_models ): @@ -336,6 +343,9 @@ def get_llm_provider( elif endpoint == "https://api.cerebras.ai/v1": custom_llm_provider = "cerebras" dynamic_api_key = get_secret("CEREBRAS_API_KEY") + elif endpoint == "https://api.sambanova.ai/v1": + custom_llm_provider = "sambanova" + dynamic_api_key = get_secret("SAMBANOVA_API_KEY") elif endpoint == "https://api.ai21.com/studio/v1": custom_llm_provider = "ai21_chat" dynamic_api_key = get_secret("AI21_API_KEY") diff --git a/litellm/llms/sambanova/chat.py b/litellm/llms/sambanova/chat.py new file mode 100644 index 000000000..a194a1e0f --- /dev/null +++ b/litellm/llms/sambanova/chat.py @@ -0,0 +1,91 @@ +""" +Sambanova Chat Completions API + +this is OpenAI compatible - no translation needed / occurs +""" + +import types +from typing import Optional + + +class SambanovaConfig: + """ + Reference: https://community.sambanova.ai/t/create-chat-completion-api/ + + Below are the parameters: + """ + + max_tokens: Optional[int] = None + response_format: Optional[dict] = None + seed: Optional[int] = None + stop: Optional[str] = None + stream: Optional[bool] = None + temperature: Optional[float] = None + top_p: Optional[int] = None + tool_choice: Optional[str] = None + tools: Optional[list] = None + user: Optional[str] = None + + def __init__( + self, + max_tokens: Optional[int] = None, + response_format: Optional[dict] = None, + seed: Optional[int] = None, + stop: Optional[str] = None, + stream: Optional[bool] = None, + temperature: Optional[float] = None, + top_p: Optional[int] = None, + tool_choice: Optional[str] = None, + tools: Optional[list] = None, + user: Optional[str] = None, + ) -> None: + locals_ = locals().copy() + for key, value in locals_.items(): + if key != "self" and value is not None: + setattr(self.__class__, key, value) + + @classmethod + def get_config(cls): + return { + k: v + for k, v in cls.__dict__.items() + if not k.startswith("__") + and not isinstance( + v, + ( + types.FunctionType, + types.BuiltinFunctionType, + classmethod, + staticmethod, + ), + ) + and v is not None + } + + def get_supported_openai_params(self, model: str) -> list: + """ + Get the supported OpenAI params for the given model + + """ + + return [ + "max_tokens", + "response_format", + "seed", + "stop", + "stream", + "temperature", + "top_p", + "tool_choice", + "tools", + "user", + ] + + def map_openai_params( + self, model: str, non_default_params: dict, optional_params: dict + ) -> dict: + supported_openai_params = self.get_supported_openai_params(model=model) + for param, value in non_default_params.items(): + if param in supported_openai_params: + optional_params[param] = value + return optional_params diff --git a/litellm/main.py b/litellm/main.py index 80136e997..eb66bd5c1 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -388,6 +388,7 @@ async def acompletion( or custom_llm_provider == "groq" or custom_llm_provider == "nvidia_nim" or custom_llm_provider == "cerebras" + or custom_llm_provider == "sambanova" or custom_llm_provider == "ai21_chat" or custom_llm_provider == "volcengine" or custom_llm_provider == "codestral" @@ -1310,6 +1311,7 @@ def completion( or custom_llm_provider == "groq" or custom_llm_provider == "nvidia_nim" or custom_llm_provider == "cerebras" + or custom_llm_provider == "sambanova" or custom_llm_provider == "ai21_chat" or custom_llm_provider == "volcengine" or custom_llm_provider == "codestral" @@ -3164,6 +3166,7 @@ async def aembedding(*args, **kwargs) -> EmbeddingResponse: or custom_llm_provider == "groq" or custom_llm_provider == "nvidia_nim" or custom_llm_provider == "cerebras" + or custom_llm_provider == "sambanova" or custom_llm_provider == "ai21_chat" or custom_llm_provider == "volcengine" or custom_llm_provider == "deepseek" @@ -3829,6 +3832,7 @@ async def atext_completion( or custom_llm_provider == "groq" or custom_llm_provider == "nvidia_nim" or custom_llm_provider == "cerebras" + or custom_llm_provider == "sambanova" or custom_llm_provider == "ai21_chat" or custom_llm_provider == "volcengine" or custom_llm_provider == "text-completion-codestral" diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index d501d8572..85b0a7677 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -1392,6 +1392,36 @@ "mode": "chat", "supports_function_calling": true }, + "sambanova/Meta-Llama-3.1-8B-Instruct": { + "max_tokens": 256000, + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0000001, + "litellm_provider": "sambanova", + "mode": "chat", + "supports_function_calling": true + }, + "sambanova/Meta-Llama-3.1-70B-Instruct": { + "max_tokens": 256000, + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "input_cost_per_token": 0.0000003, + "output_cost_per_token": 0.0000003, + "litellm_provider": "sambanova", + "mode": "chat", + "supports_function_calling": true + }, + "sambanova/Meta-Llama-3.1-405B-Instruct": { + "max_tokens": 256000, + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "input_cost_per_token": 0.0000006, + "output_cost_per_token": 0.0000006, + "litellm_provider": "sambanova", + "mode": "chat", + "supports_function_calling": true + }, "friendliai/mixtral-8x7b-instruct-v0-1": { "max_tokens": 32768, "max_input_tokens": 32768,