diff --git a/docs/my-website/docs/providers/volcano.md b/docs/my-website/docs/providers/volcano.md new file mode 100644 index 0000000000..1742a43d81 --- /dev/null +++ b/docs/my-website/docs/providers/volcano.md @@ -0,0 +1,98 @@ +# Volcano Engine (Volcengine) +https://www.volcengine.com/docs/82379/1263482 + +:::tip + +**We support ALL Volcengine NIM models, just set `model=volcengine/` as a prefix when sending litellm requests** + +::: + +## API Key +```python +# env variable +os.environ['VOLCENGINE_API_KEY'] +``` + +## Sample Usage +```python +from litellm import completion +import os + +os.environ['VOLCENGINE_API_KEY'] = "" +response = completion( + model="volcengine/", + messages=[ + { + "role": "user", + "content": "What's the weather like in Boston today in Fahrenheit?", + } + ], + temperature=0.2, # optional + top_p=0.9, # optional + frequency_penalty=0.1, # optional + presence_penalty=0.1, # optional + max_tokens=10, # optional + stop=["\n\n"], # optional +) +print(response) +``` + +## Sample Usage - Streaming +```python +from litellm import completion +import os + +os.environ['VOLCENGINE_API_KEY'] = "" +response = completion( + model="volcengine/", + messages=[ + { + "role": "user", + "content": "What's the weather like in Boston today in Fahrenheit?", + } + ], + stream=True, + temperature=0.2, # optional + top_p=0.9, # optional + frequency_penalty=0.1, # optional + presence_penalty=0.1, # optional + max_tokens=10, # optional + stop=["\n\n"], # optional +) + +for chunk in response: + print(chunk) +``` + + +## Supported Models - 💥 ALL Volcengine NIM Models Supported! +We support ALL `volcengine` models, just set `volcengine/` as a prefix when sending completion requests + +## Sample Usage - LiteLLM Proxy + +### Config.yaml setting + +```yaml +model_list: + - model_name: volcengine-model + litellm_params: + model: volcengine/ + api_key: os.environ/VOLCENGINE_API_KEY +``` + +### Send Request + +```shell +curl --location 'http://localhost:4000/chat/completions' \ + --header 'Authorization: Bearer sk-1234' \ + --header 'Content-Type: application/json' \ + --data '{ + "model": "volcengine-model", + "messages": [ + { + "role": "user", + "content": "here is my api key. openai_api_key=sk-1234" + } + ] +}' +``` \ No newline at end of file diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 9835a260b3..31bc6abcb7 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -147,6 +147,7 @@ const sidebars = { "providers/watsonx", "providers/predibase", "providers/nvidia_nim", + "providers/volcano", "providers/triton-inference-server", "providers/ollama", "providers/perplexity", diff --git a/litellm/__init__.py b/litellm/__init__.py index cee80a32df..f1cc32cd16 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -413,6 +413,7 @@ openai_compatible_providers: List = [ "mistral", "groq", "nvidia_nim", + "volcengine", "codestral", "deepseek", "deepinfra", @@ -643,6 +644,7 @@ provider_list: List = [ "mistral", "groq", "nvidia_nim", + "volcengine", "codestral", "text-completion-codestral", "deepseek", @@ -818,6 +820,7 @@ from .llms.openai import ( ) from .llms.nvidia_nim import NvidiaNimConfig from .llms.fireworks_ai import FireworksAIConfig +from .llms.volcengine import VolcEngineConfig from .llms.text_completion_codestral import MistralTextCompletionConfig from .llms.azure import ( AzureOpenAIConfig, diff --git a/litellm/llms/volcengine.py b/litellm/llms/volcengine.py new file mode 100644 index 0000000000..eb289d1c49 --- /dev/null +++ b/litellm/llms/volcengine.py @@ -0,0 +1,87 @@ +import types +from typing import Literal, Optional, Union + +import litellm + + +class VolcEngineConfig: + frequency_penalty: Optional[int] = None + function_call: Optional[Union[str, dict]] = None + functions: Optional[list] = None + logit_bias: Optional[dict] = None + max_tokens: Optional[int] = None + n: Optional[int] = None + presence_penalty: Optional[int] = None + stop: Optional[Union[str, list]] = None + temperature: Optional[int] = None + top_p: Optional[int] = None + response_format: Optional[dict] = None + + def __init__( + self, + frequency_penalty: Optional[int] = None, + function_call: Optional[Union[str, dict]] = None, + functions: Optional[list] = None, + logit_bias: Optional[dict] = None, + max_tokens: Optional[int] = None, + n: Optional[int] = None, + presence_penalty: Optional[int] = None, + stop: Optional[Union[str, list]] = None, + temperature: Optional[int] = None, + top_p: Optional[int] = None, + response_format: Optional[dict] = None, + ) -> None: + locals_ = locals().copy() + for key, value in locals_.items(): + if key != "self" and value is not None: + setattr(self.__class__, key, value) + + @classmethod + def get_config(cls): + return { + k: v + for k, v in cls.__dict__.items() + if not k.startswith("__") + and not isinstance( + v, + ( + types.FunctionType, + types.BuiltinFunctionType, + classmethod, + staticmethod, + ), + ) + and v is not None + } + + def get_supported_openai_params(self, model: str) -> list: + return [ + "frequency_penalty", + "logit_bias", + "logprobs", + "top_logprobs", + "max_tokens", + "n", + "presence_penalty", + "seed", + "stop", + "stream", + "stream_options", + "temperature", + "top_p", + "tools", + "tool_choice", + "function_call", + "functions", + "max_retries", + "extra_headers", + ] # works across all models + + def map_openai_params( + self, non_default_params: dict, optional_params: dict, model: str + ) -> dict: + supported_openai_params = self.get_supported_openai_params(model) + for param, value in non_default_params.items(): + if param in supported_openai_params: + optional_params[param] = value + return optional_params diff --git a/litellm/main.py b/litellm/main.py index b7aa47ab74..6495819363 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -349,6 +349,7 @@ async def acompletion( or custom_llm_provider == "perplexity" or custom_llm_provider == "groq" or custom_llm_provider == "nvidia_nim" + or custom_llm_provider == "volcengine" or custom_llm_provider == "codestral" or custom_llm_provider == "text-completion-codestral" or custom_llm_provider == "deepseek" @@ -1192,6 +1193,7 @@ def completion( or custom_llm_provider == "perplexity" or custom_llm_provider == "groq" or custom_llm_provider == "nvidia_nim" + or custom_llm_provider == "volcengine" or custom_llm_provider == "codestral" or custom_llm_provider == "deepseek" or custom_llm_provider == "anyscale" @@ -2954,6 +2956,7 @@ async def aembedding(*args, **kwargs) -> EmbeddingResponse: or custom_llm_provider == "perplexity" or custom_llm_provider == "groq" or custom_llm_provider == "nvidia_nim" + or custom_llm_provider == "volcengine" or custom_llm_provider == "deepseek" or custom_llm_provider == "fireworks_ai" or custom_llm_provider == "ollama" @@ -3533,6 +3536,7 @@ async def atext_completion( or custom_llm_provider == "perplexity" or custom_llm_provider == "groq" or custom_llm_provider == "nvidia_nim" + or custom_llm_provider == "volcengine" or custom_llm_provider == "text-completion-codestral" or custom_llm_provider == "deepseek" or custom_llm_provider == "fireworks_ai" diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index a3b0e6ea26..2ceb11a79b 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -1222,44 +1222,6 @@ def test_completion_fireworks_ai(): pytest.fail(f"Error occurred: {e}") -def test_fireworks_ai_tool_calling(): - litellm.set_verbose = True - model_name = "fireworks_ai/accounts/fireworks/models/firefunction-v2" - tools = [ - { - "type": "function", - "function": { - "name": "get_current_weather", - "description": "Get the current weather in a given location", - "parameters": { - "type": "object", - "properties": { - "location": { - "type": "string", - "description": "The city and state, e.g. San Francisco, CA", - }, - "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, - }, - "required": ["location"], - }, - }, - } - ] - messages = [ - { - "role": "user", - "content": "What's the weather like in Boston today in Fahrenheit?", - } - ] - response = completion( - model=model_name, - messages=messages, - tools=tools, - tool_choice="required", - ) - print(response) - - @pytest.mark.skip(reason="this test is flaky") def test_completion_perplexity_api(): try: @@ -3508,6 +3470,30 @@ def test_completion_deep_infra_mistral(): # test_completion_deep_infra_mistral() +@pytest.mark.skip(reason="Local test - don't have a volcengine account as yet") +def test_completion_volcengine(): + litellm.set_verbose = True + model_name = "volcengine/" + try: + response = completion( + model=model_name, + messages=[ + { + "role": "user", + "content": "What's the weather like in Boston today in Fahrenheit?", + } + ], + api_key="", + ) + # Add any assertions here to check the response + print(response) + + except litellm.exceptions.Timeout as e: + pass + except Exception as e: + pytest.fail(f"Error occurred: {e}") + + def test_completion_nvidia_nim(): model_name = "nvidia_nim/databricks/dbrx-instruct" try: diff --git a/litellm/utils.py b/litellm/utils.py index 76c93d5898..42e8cba30b 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -2413,6 +2413,7 @@ def get_optional_params( and custom_llm_provider != "together_ai" and custom_llm_provider != "groq" and custom_llm_provider != "nvidia_nim" + and custom_llm_provider != "volcengine" and custom_llm_provider != "deepseek" and custom_llm_provider != "codestral" and custom_llm_provider != "mistral" @@ -3089,6 +3090,17 @@ def get_optional_params( optional_params=optional_params, model=model, ) + elif custom_llm_provider == "volcengine": + supported_params = get_supported_openai_params( + model=model, custom_llm_provider=custom_llm_provider + ) + _check_valid_arg(supported_params=supported_params) + optional_params = litellm.VolcEngineConfig().map_openai_params( + non_default_params=non_default_params, + optional_params=optional_params, + model=model, + ) + elif custom_llm_provider == "groq": supported_params = get_supported_openai_params( model=model, custom_llm_provider=custom_llm_provider @@ -3659,6 +3671,8 @@ def get_supported_openai_params( return litellm.FireworksAIConfig().get_supported_openai_params() elif custom_llm_provider == "nvidia_nim": return litellm.NvidiaNimConfig().get_supported_openai_params() + elif custom_llm_provider == "volcengine": + return litellm.VolcEngineConfig().get_supported_openai_params(model=model) elif custom_llm_provider == "groq": return [ "temperature", @@ -4023,6 +4037,10 @@ def get_llm_provider( # nvidia_nim is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.endpoints.anyscale.com/v1 api_base = "https://integrate.api.nvidia.com/v1" dynamic_api_key = get_secret("NVIDIA_NIM_API_KEY") + elif custom_llm_provider == "volcengine": + # volcengine is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.endpoints.anyscale.com/v1 + api_base = "https://ark.cn-beijing.volces.com/api/v3" + dynamic_api_key = get_secret("VOLCENGINE_API_KEY") elif custom_llm_provider == "codestral": # codestral is openai compatible, we just need to set this to custom_openai and have the api_base be https://codestral.mistral.ai/v1 api_base = "https://codestral.mistral.ai/v1" @@ -4945,6 +4963,11 @@ def validate_environment(model: Optional[str] = None) -> dict: keys_in_environment = True else: missing_keys.append("NVIDIA_NIM_API_KEY") + elif custom_llm_provider == "volcengine": + if "VOLCENGINE_API_KEY" in os.environ: + keys_in_environment = True + else: + missing_keys.append("VOLCENGINE_API_KEY") elif ( custom_llm_provider == "codestral" or custom_llm_provider == "text-completion-codestral"