diff --git a/docs/my-website/docs/providers/nvidia_nim.md b/docs/my-website/docs/providers/nvidia_nim.md new file mode 100644 index 000000000..f90450768 --- /dev/null +++ b/docs/my-website/docs/providers/nvidia_nim.md @@ -0,0 +1,103 @@ +# Nvidia NIM +https://docs.api.nvidia.com/nim/reference/ + +:::tip + +**We support ALL Nvidia NIM models, just set `model=nvidia_nim/` as a prefix when sending litellm requests** + +::: + +## API Key +```python +# env variable +os.environ['NVIDIA_NIM_API_KEY'] +``` + +## Sample Usage +```python +from litellm import completion +import os + +os.environ['NVIDIA_NIM_API_KEY'] = "" +response = completion( + model=model_name, + messages=[ + { + "role": "user", + "content": "What's the weather like in Boston today in Fahrenheit?", + } + ], + temperature=0.2, # optional + top_p=0.9, # optional + frequency_penalty=0.1, # optional + presence_penalty=0.1, # optional + max_tokens=10, # optional + stop=["\n\n"], # optional +) +print(response) +``` + +## Sample Usage - Streaming +```python +from litellm import completion +import os + +os.environ['NVIDIA_NIM_API_KEY'] = "" +response = completion( + model=model_name, + messages=[ + { + "role": "user", + "content": "What's the weather like in Boston today in Fahrenheit?", + } + ], + stream=True, + temperature=0.2, # optional + top_p=0.9, # optional + frequency_penalty=0.1, # optional + presence_penalty=0.1, # optional + max_tokens=10, # optional + stop=["\n\n"], # optional +) + +for chunk in response: + print(chunk) +``` + + +## Supported Models - 💥 ALL Nvidia NIM Models Supported! +We support ALL `nvidia_nim` models, just set `nvidia_nim/` as a prefix when sending completion requests + +| Model Name | Function Call | +|------------|---------------| +| nvidia/nemotron-4-340b-reward | `completion(model="nvidia_nim/nvidia/nemotron-4-340b-reward", messages)` | +| 01-ai/yi-large | `completion(model="nvidia_nim/01-ai/yi-large", messages)` | +| aisingapore/sea-lion-7b-instruct | `completion(model="nvidia_nim/aisingapore/sea-lion-7b-instruct", messages)` | +| databricks/dbrx-instruct | `completion(model="nvidia_nim/databricks/dbrx-instruct", messages)` | +| google/gemma-7b | `completion(model="nvidia_nim/google/gemma-7b", messages)` | +| google/gemma-2b | `completion(model="nvidia_nim/google/gemma-2b", messages)` | +| google/codegemma-1.1-7b | `completion(model="nvidia_nim/google/codegemma-1.1-7b", messages)` | +| google/codegemma-7b | `completion(model="nvidia_nim/google/codegemma-7b", messages)` | +| google/recurrentgemma-2b | `completion(model="nvidia_nim/google/recurrentgemma-2b", messages)` | +| ibm/granite-34b-code-instruct | `completion(model="nvidia_nim/ibm/granite-34b-code-instruct", messages)` | +| ibm/granite-8b-code-instruct | `completion(model="nvidia_nim/ibm/granite-8b-code-instruct", messages)` | +| mediatek/breeze-7b-instruct | `completion(model="nvidia_nim/mediatek/breeze-7b-instruct", messages)` | +| meta/codellama-70b | `completion(model="nvidia_nim/meta/codellama-70b", messages)` | +| meta/llama2-70b | `completion(model="nvidia_nim/meta/llama2-70b", messages)` | +| meta/llama3-8b | `completion(model="nvidia_nim/meta/llama3-8b", messages)` | +| meta/llama3-70b | `completion(model="nvidia_nim/meta/llama3-70b", messages)` | +| microsoft/phi-3-medium-4k-instruct | `completion(model="nvidia_nim/microsoft/phi-3-medium-4k-instruct", messages)` | +| microsoft/phi-3-mini-128k-instruct | `completion(model="nvidia_nim/microsoft/phi-3-mini-128k-instruct", messages)` | +| microsoft/phi-3-mini-4k-instruct | `completion(model="nvidia_nim/microsoft/phi-3-mini-4k-instruct", messages)` | +| microsoft/phi-3-small-128k-instruct | `completion(model="nvidia_nim/microsoft/phi-3-small-128k-instruct", messages)` | +| microsoft/phi-3-small-8k-instruct | `completion(model="nvidia_nim/microsoft/phi-3-small-8k-instruct", messages)` | +| mistralai/codestral-22b-instruct-v0.1 | `completion(model="nvidia_nim/mistralai/codestral-22b-instruct-v0.1", messages)` | +| mistralai/mistral-7b-instruct | `completion(model="nvidia_nim/mistralai/mistral-7b-instruct", messages)` | +| mistralai/mistral-7b-instruct-v0.3 | `completion(model="nvidia_nim/mistralai/mistral-7b-instruct-v0.3", messages)` | +| mistralai/mixtral-8x7b-instruct | `completion(model="nvidia_nim/mistralai/mixtral-8x7b-instruct", messages)` | +| mistralai/mixtral-8x22b-instruct | `completion(model="nvidia_nim/mistralai/mixtral-8x22b-instruct", messages)` | +| mistralai/mistral-large | `completion(model="nvidia_nim/mistralai/mistral-large", messages)` | +| nvidia/nemotron-4-340b-instruct | `completion(model="nvidia_nim/nvidia/nemotron-4-340b-instruct", messages)` | +| seallms/seallm-7b-v2.5 | `completion(model="nvidia_nim/seallms/seallm-7b-v2.5", messages)` | +| snowflake/arctic | `completion(model="nvidia_nim/snowflake/arctic", messages)` | +| upstage/solar-10.7b-instruct | `completion(model="nvidia_nim/upstage/solar-10.7b-instruct", messages)` | \ No newline at end of file diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 2673933f4..9835a260b 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -146,13 +146,14 @@ const sidebars = { "providers/databricks", "providers/watsonx", "providers/predibase", - "providers/clarifai", + "providers/nvidia_nim", "providers/triton-inference-server", "providers/ollama", "providers/perplexity", "providers/groq", "providers/deepseek", - "providers/fireworks_ai", + "providers/fireworks_ai", + "providers/clarifai", "providers/vllm", "providers/xinference", "providers/cloudflare_workers",