From 160aa2e578e797024946042bbf068c3955113670 Mon Sep 17 00:00:00 2001 From: jhpiedrahitao Date: Tue, 11 Mar 2025 17:17:04 -0500 Subject: [PATCH 1/4] add sambanova to completion input params table --- docs/my-website/docs/completion/input.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/my-website/docs/completion/input.md b/docs/my-website/docs/completion/input.md index a8aa79b8cb..db49315e89 100644 --- a/docs/my-website/docs/completion/input.md +++ b/docs/my-website/docs/completion/input.md @@ -55,6 +55,7 @@ Use `litellm.get_supported_openai_params()` for an updated list of params for ea |Bedrock| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | | | | | | | | ✅ (model dependent) | | |Sagemaker| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | | |TogetherAI| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | | | | ✅ | | | ✅ | | ✅ | ✅ | | | | +|Sambanova| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | | | | | | ✅ | | ✅ | ✅ | | | | |AlephAlpha| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | | |NLP Cloud| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | | | | |Petals| ✅ | ✅ | | ✅ | ✅ | | | | | | From 167f3ebbc8d11e3601e554ae1b5b0836a3d86629 Mon Sep 17 00:00:00 2001 From: jhpiedrahitao Date: Tue, 11 Mar 2025 17:57:23 -0500 Subject: [PATCH 2/4] update sambanova supported args --- litellm/llms/sambanova/chat.py | 40 ++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/litellm/llms/sambanova/chat.py b/litellm/llms/sambanova/chat.py index abf55d44fb..53e6032f2e 100644 --- a/litellm/llms/sambanova/chat.py +++ b/litellm/llms/sambanova/chat.py @@ -4,7 +4,7 @@ Sambanova Chat Completions API this is OpenAI compatible - no translation needed / occurs """ -from typing import Optional +from typing import Optional, Union from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig @@ -17,26 +17,28 @@ class SambanovaConfig(OpenAIGPTConfig): """ max_tokens: Optional[int] = None - response_format: Optional[dict] = None - seed: Optional[int] = None + temperature: Optional[float] = None + top_p: Optional[float] = None + top_k: Optional[int] = None + stop: Optional[Union[str, list]] = None stream: Optional[bool] = None - top_p: Optional[int] = None + stream_options: Optional[dict] = None tool_choice: Optional[str] = None + response_format: Optional[dict] = None tools: Optional[list] = None - user: Optional[str] = None - + def __init__( self, max_tokens: Optional[int] = None, response_format: Optional[dict] = None, - seed: Optional[int] = None, stop: Optional[str] = None, stream: Optional[bool] = None, + stream_options: Optional[dict] = None, temperature: Optional[float] = None, - top_p: Optional[int] = None, + top_p: Optional[float] = None, + top_k: Optional[int] = None, tool_choice: Optional[str] = None, tools: Optional[list] = None, - user: Optional[str] = None, ) -> None: locals_ = locals().copy() for key, value in locals_.items(): @@ -56,12 +58,28 @@ class SambanovaConfig(OpenAIGPTConfig): return [ "max_tokens", "response_format", - "seed", "stop", "stream", + "stream_options", "temperature", "top_p", + "top_k", "tool_choice", "tools", - "user", + "parallel_tool_calls" ] + + def map_openai_params( + self, + non_default_params: dict, + optional_params: dict, + model: str, + drop_params: bool, + ) -> dict: + supported_openai_params = self.get_supported_openai_params(model=model) + for param, value in non_default_params.items(): + if param == "max_completion_tokens": + optional_params["max_tokens"] = value + elif param in supported_openai_params: + optional_params[param] = value + return optional_params \ No newline at end of file From 488dccd82410968b8f580697298d3442f0dc1cb1 Mon Sep 17 00:00:00 2001 From: jhpiedrahitao Date: Tue, 11 Mar 2025 18:26:10 -0500 Subject: [PATCH 3/4] update sambanova supported models --- ...odel_prices_and_context_window_backup.json | 189 +++++++++++++++--- model_prices_and_context_window.json | 189 +++++++++++++++--- 2 files changed, 332 insertions(+), 46 deletions(-) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 36eaa2f642..306914252a 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -9535,7 +9535,8 @@ "litellm_provider": "sambanova", "supports_function_calling": true, "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "source": "https://cloud.sambanova.ai/plans/pricing" }, "sambanova/Meta-Llama-3.1-70B-Instruct": { "max_tokens": 128000, @@ -9546,7 +9547,8 @@ "litellm_provider": "sambanova", "supports_function_calling": true, "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "source": "https://cloud.sambanova.ai/plans/pricing" }, "sambanova/Meta-Llama-3.1-405B-Instruct": { "max_tokens": 16000, @@ -9557,51 +9559,192 @@ "litellm_provider": "sambanova", "supports_function_calling": true, "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "source": "https://cloud.sambanova.ai/plans/pricing" }, "sambanova/Meta-Llama-3.2-1B-Instruct": { "max_tokens": 16000, "max_input_tokens": 16000, "max_output_tokens": 16000, - "input_cost_per_token": 0.0000004, - "output_cost_per_token": 0.0000008, + "input_cost_per_token": 0.00000004, + "output_cost_per_token": 0.00000008, "litellm_provider": "sambanova", - "supports_function_calling": true, + "supports_function_calling": false, "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": false, + "source": "https://cloud.sambanova.ai/plans/pricing" }, "sambanova/Meta-Llama-3.2-3B-Instruct": { - "max_tokens": 4000, - "max_input_tokens": 4000, - "max_output_tokens": 4000, - "input_cost_per_token": 0.0000008, - "output_cost_per_token": 0.0000016, - "litellm_provider": "sambanova", - "supports_function_calling": true, - "mode": "chat", - "supports_tool_choice": true - }, - "sambanova/Qwen2.5-Coder-32B-Instruct": { "max_tokens": 8000, "max_input_tokens": 8000, "max_output_tokens": 8000, + "input_cost_per_token": 0.00000008, + "output_cost_per_token": 0.00000016, + "litellm_provider": "sambanova", + "supports_function_calling": false, + "mode": "chat", + "supports_tool_choice": false, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/Llama-3.2-11B-Vision-Instruct": { + "max_tokens": 4000, + "max_input_tokens": 4000, + "max_output_tokens": 4000, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000030, + "litellm_provider": "sambanova", + "supports_function_calling": false, + "mode": "chat", + "supports_tool_choice": false, + "supports_vision": true, + "source": "https://cloud.sambanova.ai/plans/pricing", + "metadata": {"notes": "For vision models, images are converted to 6432 input tokens and are billed at that amount"} + }, + "sambanova/Llama-3.2-90B-Vision-Instruct": { + "max_tokens": 4000, + "max_input_tokens": 4000, + "max_output_tokens": 4000, + "input_cost_per_token": 0.00000080, + "output_cost_per_token": 0.0000016, + "litellm_provider": "sambanova", + "supports_function_calling": false, + "mode": "chat", + "supports_tool_choice": false, + "supports_vision": true, + "source": "https://cloud.sambanova.ai/plans/pricing", + "metadata": {"notes": "For vision models, images are converted to 6432 input tokens and are billed at that amount"} + }, + "sambanova/Meta-Llama-3.3-70B-Instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.0000006, + "output_cost_per_token": 0.0000012, + "litellm_provider": "sambanova", + "supports_function_calling": true, + "mode": "chat", + "supports_tool_choice": true, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/Llama-3.1-Swallow-8B-Instruct-v0.3": { + "max_tokens": 16000, + "max_input_tokens": 16000, + "max_output_tokens": 16000, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0000002, + "litellm_provider": "sambanova", + "supports_function_calling": true, + "mode": "chat", + "supports_tool_choice": true, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/Llama-3.1-Swallow-70B-Instruct-v0.3": { + "max_tokens": 16000, + "max_input_tokens": 16000, + "max_output_tokens": 16000, + "input_cost_per_token": 0.0000006, + "output_cost_per_token": 0.0000012, + "litellm_provider": "sambanova", + "supports_function_calling": true, + "mode": "chat", + "supports_tool_choice": true, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/Llama-3.1-Tulu-3-405B": { + "max_tokens": 16000, + "max_input_tokens": 16000, + "max_output_tokens": 16000, + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000010, + "litellm_provider": "sambanova", + "supports_function_calling": true, + "mode": "chat", + "supports_tool_choice": true, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/Meta-Llama-Guard-3-8B": { + "max_tokens": 8000, + "max_input_tokens": 8000, + "max_output_tokens": 8000, + "input_cost_per_token": 0.0000003, + "output_cost_per_token": 0.0000003, + "litellm_provider": "sambanova", + "supports_function_calling": false, + "mode": "chat", + "supports_tool_choice": false, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/Qwen2.5-Coder-32B-Instruct": { + "max_tokens": 16000, + "max_input_tokens": 16000, + "max_output_tokens": 16000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000003, "litellm_provider": "sambanova", "supports_function_calling": true, "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "source": "https://cloud.sambanova.ai/plans/pricing" }, "sambanova/Qwen2.5-72B-Instruct": { - "max_tokens": 8000, - "max_input_tokens": 8000, - "max_output_tokens": 8000, + "max_tokens": 16000, + "max_input_tokens": 16000, + "max_output_tokens": 16000, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000004, "litellm_provider": "sambanova", "supports_function_calling": true, "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/QwQ-32B-Preview": { + "max_tokens": 16000, + "max_input_tokens": 16000, + "max_output_tokens": 16000, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000003, + "litellm_provider": "sambanova", + "supports_function_calling": true, + "mode": "chat", + "supports_tool_choice": true, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/QwQ-32B": { + "max_tokens": 16000, + "max_input_tokens": 16000, + "max_output_tokens": 16000, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000003, + "litellm_provider": "sambanova", + "supports_function_calling": true, + "mode": "chat", + "supports_tool_choice": true, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/DeepSeek-R1-Distill-Llama-70B": { + "max_tokens": 32000, + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "input_cost_per_token": 0.0000007, + "output_cost_per_token": 0.0000014, + "litellm_provider": "sambanova", + "supports_function_calling": true, + "mode": "chat", + "supports_tool_choice": true, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/DeepSeek-R1": { + "max_tokens": 8000, + "max_input_tokens": 8000, + "max_output_tokens": 8000, + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000007, + "litellm_provider": "sambanova", + "supports_function_calling": true, + "mode": "chat", + "supports_tool_choice": true, + "source": "https://cloud.sambanova.ai/plans/pricing" }, "assemblyai/nano": { "mode": "audio_transcription", diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 36eaa2f642..306914252a 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -9535,7 +9535,8 @@ "litellm_provider": "sambanova", "supports_function_calling": true, "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "source": "https://cloud.sambanova.ai/plans/pricing" }, "sambanova/Meta-Llama-3.1-70B-Instruct": { "max_tokens": 128000, @@ -9546,7 +9547,8 @@ "litellm_provider": "sambanova", "supports_function_calling": true, "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "source": "https://cloud.sambanova.ai/plans/pricing" }, "sambanova/Meta-Llama-3.1-405B-Instruct": { "max_tokens": 16000, @@ -9557,51 +9559,192 @@ "litellm_provider": "sambanova", "supports_function_calling": true, "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "source": "https://cloud.sambanova.ai/plans/pricing" }, "sambanova/Meta-Llama-3.2-1B-Instruct": { "max_tokens": 16000, "max_input_tokens": 16000, "max_output_tokens": 16000, - "input_cost_per_token": 0.0000004, - "output_cost_per_token": 0.0000008, + "input_cost_per_token": 0.00000004, + "output_cost_per_token": 0.00000008, "litellm_provider": "sambanova", - "supports_function_calling": true, + "supports_function_calling": false, "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": false, + "source": "https://cloud.sambanova.ai/plans/pricing" }, "sambanova/Meta-Llama-3.2-3B-Instruct": { - "max_tokens": 4000, - "max_input_tokens": 4000, - "max_output_tokens": 4000, - "input_cost_per_token": 0.0000008, - "output_cost_per_token": 0.0000016, - "litellm_provider": "sambanova", - "supports_function_calling": true, - "mode": "chat", - "supports_tool_choice": true - }, - "sambanova/Qwen2.5-Coder-32B-Instruct": { "max_tokens": 8000, "max_input_tokens": 8000, "max_output_tokens": 8000, + "input_cost_per_token": 0.00000008, + "output_cost_per_token": 0.00000016, + "litellm_provider": "sambanova", + "supports_function_calling": false, + "mode": "chat", + "supports_tool_choice": false, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/Llama-3.2-11B-Vision-Instruct": { + "max_tokens": 4000, + "max_input_tokens": 4000, + "max_output_tokens": 4000, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000030, + "litellm_provider": "sambanova", + "supports_function_calling": false, + "mode": "chat", + "supports_tool_choice": false, + "supports_vision": true, + "source": "https://cloud.sambanova.ai/plans/pricing", + "metadata": {"notes": "For vision models, images are converted to 6432 input tokens and are billed at that amount"} + }, + "sambanova/Llama-3.2-90B-Vision-Instruct": { + "max_tokens": 4000, + "max_input_tokens": 4000, + "max_output_tokens": 4000, + "input_cost_per_token": 0.00000080, + "output_cost_per_token": 0.0000016, + "litellm_provider": "sambanova", + "supports_function_calling": false, + "mode": "chat", + "supports_tool_choice": false, + "supports_vision": true, + "source": "https://cloud.sambanova.ai/plans/pricing", + "metadata": {"notes": "For vision models, images are converted to 6432 input tokens and are billed at that amount"} + }, + "sambanova/Meta-Llama-3.3-70B-Instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.0000006, + "output_cost_per_token": 0.0000012, + "litellm_provider": "sambanova", + "supports_function_calling": true, + "mode": "chat", + "supports_tool_choice": true, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/Llama-3.1-Swallow-8B-Instruct-v0.3": { + "max_tokens": 16000, + "max_input_tokens": 16000, + "max_output_tokens": 16000, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0000002, + "litellm_provider": "sambanova", + "supports_function_calling": true, + "mode": "chat", + "supports_tool_choice": true, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/Llama-3.1-Swallow-70B-Instruct-v0.3": { + "max_tokens": 16000, + "max_input_tokens": 16000, + "max_output_tokens": 16000, + "input_cost_per_token": 0.0000006, + "output_cost_per_token": 0.0000012, + "litellm_provider": "sambanova", + "supports_function_calling": true, + "mode": "chat", + "supports_tool_choice": true, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/Llama-3.1-Tulu-3-405B": { + "max_tokens": 16000, + "max_input_tokens": 16000, + "max_output_tokens": 16000, + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000010, + "litellm_provider": "sambanova", + "supports_function_calling": true, + "mode": "chat", + "supports_tool_choice": true, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/Meta-Llama-Guard-3-8B": { + "max_tokens": 8000, + "max_input_tokens": 8000, + "max_output_tokens": 8000, + "input_cost_per_token": 0.0000003, + "output_cost_per_token": 0.0000003, + "litellm_provider": "sambanova", + "supports_function_calling": false, + "mode": "chat", + "supports_tool_choice": false, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/Qwen2.5-Coder-32B-Instruct": { + "max_tokens": 16000, + "max_input_tokens": 16000, + "max_output_tokens": 16000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000003, "litellm_provider": "sambanova", "supports_function_calling": true, "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "source": "https://cloud.sambanova.ai/plans/pricing" }, "sambanova/Qwen2.5-72B-Instruct": { - "max_tokens": 8000, - "max_input_tokens": 8000, - "max_output_tokens": 8000, + "max_tokens": 16000, + "max_input_tokens": 16000, + "max_output_tokens": 16000, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000004, "litellm_provider": "sambanova", "supports_function_calling": true, "mode": "chat", - "supports_tool_choice": true + "supports_tool_choice": true, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/QwQ-32B-Preview": { + "max_tokens": 16000, + "max_input_tokens": 16000, + "max_output_tokens": 16000, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000003, + "litellm_provider": "sambanova", + "supports_function_calling": true, + "mode": "chat", + "supports_tool_choice": true, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/QwQ-32B": { + "max_tokens": 16000, + "max_input_tokens": 16000, + "max_output_tokens": 16000, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000003, + "litellm_provider": "sambanova", + "supports_function_calling": true, + "mode": "chat", + "supports_tool_choice": true, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/DeepSeek-R1-Distill-Llama-70B": { + "max_tokens": 32000, + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "input_cost_per_token": 0.0000007, + "output_cost_per_token": 0.0000014, + "litellm_provider": "sambanova", + "supports_function_calling": true, + "mode": "chat", + "supports_tool_choice": true, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/DeepSeek-R1": { + "max_tokens": 8000, + "max_input_tokens": 8000, + "max_output_tokens": 8000, + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000007, + "litellm_provider": "sambanova", + "supports_function_calling": true, + "mode": "chat", + "supports_tool_choice": true, + "source": "https://cloud.sambanova.ai/plans/pricing" }, "assemblyai/nano": { "mode": "audio_transcription", From a4906a1afd6d357146fc01530022a2f94757669e Mon Sep 17 00:00:00 2001 From: jhpiedrahitao Date: Wed, 12 Mar 2025 09:50:49 -0500 Subject: [PATCH 4/4] minor changes --- litellm/llms/sambanova/chat.py | 3 +++ litellm/model_prices_and_context_window_backup.json | 12 ------------ model_prices_and_context_window.json | 12 ------------ 3 files changed, 3 insertions(+), 24 deletions(-) diff --git a/litellm/llms/sambanova/chat.py b/litellm/llms/sambanova/chat.py index 53e6032f2e..0810785c67 100644 --- a/litellm/llms/sambanova/chat.py +++ b/litellm/llms/sambanova/chat.py @@ -76,6 +76,9 @@ class SambanovaConfig(OpenAIGPTConfig): model: str, drop_params: bool, ) -> dict: + """ + map max_completion_tokens param to max_tokens + """ supported_openai_params = self.get_supported_openai_params(model=model) for param, value in non_default_params.items(): if param == "max_completion_tokens": diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 306914252a..5a8e938140 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -9698,18 +9698,6 @@ "supports_tool_choice": true, "source": "https://cloud.sambanova.ai/plans/pricing" }, - "sambanova/QwQ-32B-Preview": { - "max_tokens": 16000, - "max_input_tokens": 16000, - "max_output_tokens": 16000, - "input_cost_per_token": 0.0000015, - "output_cost_per_token": 0.000003, - "litellm_provider": "sambanova", - "supports_function_calling": true, - "mode": "chat", - "supports_tool_choice": true, - "source": "https://cloud.sambanova.ai/plans/pricing" - }, "sambanova/QwQ-32B": { "max_tokens": 16000, "max_input_tokens": 16000, diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 306914252a..5a8e938140 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -9698,18 +9698,6 @@ "supports_tool_choice": true, "source": "https://cloud.sambanova.ai/plans/pricing" }, - "sambanova/QwQ-32B-Preview": { - "max_tokens": 16000, - "max_input_tokens": 16000, - "max_output_tokens": 16000, - "input_cost_per_token": 0.0000015, - "output_cost_per_token": 0.000003, - "litellm_provider": "sambanova", - "supports_function_calling": true, - "mode": "chat", - "supports_tool_choice": true, - "source": "https://cloud.sambanova.ai/plans/pricing" - }, "sambanova/QwQ-32B": { "max_tokens": 16000, "max_input_tokens": 16000,