forked from phoenix/litellm-mirror
Merge pull request #2861 from BerriAI/litellm_add_azure_command_r_plust
[FEAT] add azure command-r-plus
This commit is contained in:
commit
2174b240d8
7 changed files with 76 additions and 35 deletions
|
@ -1,46 +1,24 @@
|
||||||
# Azure AI Studio
|
# Azure AI Studio
|
||||||
|
|
||||||
## Using Mistral models deployed on Azure AI Studio
|
## Sample Usage
|
||||||
|
The `azure/` prefix sends this to Azure
|
||||||
|
|
||||||
### Sample Usage - setting env vars
|
Ensure you add `/v1` to your api_base. Your Azure AI studio `api_base` passed to litellm should look something like this
|
||||||
|
```python
|
||||||
Set `MISTRAL_AZURE_API_KEY` and `MISTRAL_AZURE_API_BASE` in your env
|
api_base = "https://Mistral-large-dfgfj-serverless.eastus2.inference.ai.azure.com/v1/"
|
||||||
|
|
||||||
```shell
|
|
||||||
MISTRAL_AZURE_API_KEY = "zE************""
|
|
||||||
MISTRAL_AZURE_API_BASE = "https://Mistral-large-nmefg-serverless.eastus2.inference.ai.azure.com/v1"
|
|
||||||
```
|
```
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from litellm import completion
|
import litellm
|
||||||
import os
|
response = litellm.completion(
|
||||||
|
model="azure/command-r-plus",
|
||||||
response = completion(
|
api_base="<your-deployment-base>/v1/"
|
||||||
model="mistral/Mistral-large-dfgfj",
|
api_key="eskk******"
|
||||||
messages=[
|
messages=[{"role": "user", "content": "What is the meaning of life?"}],
|
||||||
{"role": "user", "content": "hello from litellm"}
|
|
||||||
],
|
|
||||||
)
|
)
|
||||||
print(response)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Sample Usage - passing `api_base` and `api_key` to `litellm.completion`
|
## Sample Usage - LiteLLM Proxy
|
||||||
```python
|
|
||||||
from litellm import completion
|
|
||||||
import os
|
|
||||||
|
|
||||||
response = completion(
|
|
||||||
model="mistral/Mistral-large-dfgfj",
|
|
||||||
api_base="https://Mistral-large-dfgfj-serverless.eastus2.inference.ai.azure.com",
|
|
||||||
api_key = "JGbKodRcTp****"
|
|
||||||
messages=[
|
|
||||||
{"role": "user", "content": "hello from litellm"}
|
|
||||||
],
|
|
||||||
)
|
|
||||||
print(response)
|
|
||||||
```
|
|
||||||
|
|
||||||
### [LiteLLM Proxy] Using Mistral Models
|
|
||||||
|
|
||||||
Set this on your litellm proxy config.yaml
|
Set this on your litellm proxy config.yaml
|
||||||
```yaml
|
```yaml
|
||||||
|
@ -48,8 +26,17 @@ model_list:
|
||||||
- model_name: mistral
|
- model_name: mistral
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: mistral/Mistral-large-dfgfj
|
model: mistral/Mistral-large-dfgfj
|
||||||
api_base: https://Mistral-large-dfgfj-serverless.eastus2.inference.ai.azure.com
|
api_base: https://Mistral-large-dfgfj-serverless.eastus2.inference.ai.azure.com/v1/
|
||||||
api_key: JGbKodRcTp****
|
api_key: JGbKodRcTp****
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Supported Models
|
||||||
|
|
||||||
|
| Model Name | Function Call |
|
||||||
|
|--------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||||
|
| command-r-plus | `completion(model="azure/command-r-plus", messages)` |
|
||||||
|
| command-r | `completion(model="azure/command-r", messages)` |
|
||||||
|
| mistral-large-latest | `completion(model="azure/mistral-large-latest", messages)` |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -260,6 +260,7 @@ open_ai_chat_completion_models: List = []
|
||||||
open_ai_text_completion_models: List = []
|
open_ai_text_completion_models: List = []
|
||||||
cohere_models: List = []
|
cohere_models: List = []
|
||||||
cohere_chat_models: List = []
|
cohere_chat_models: List = []
|
||||||
|
mistral_chat_models: List = []
|
||||||
anthropic_models: List = []
|
anthropic_models: List = []
|
||||||
openrouter_models: List = []
|
openrouter_models: List = []
|
||||||
vertex_language_models: List = []
|
vertex_language_models: List = []
|
||||||
|
@ -285,6 +286,8 @@ for key, value in model_cost.items():
|
||||||
cohere_models.append(key)
|
cohere_models.append(key)
|
||||||
elif value.get("litellm_provider") == "cohere_chat":
|
elif value.get("litellm_provider") == "cohere_chat":
|
||||||
cohere_chat_models.append(key)
|
cohere_chat_models.append(key)
|
||||||
|
elif value.get("litellm_provider") == "mistral":
|
||||||
|
mistral_chat_models.append(key)
|
||||||
elif value.get("litellm_provider") == "anthropic":
|
elif value.get("litellm_provider") == "anthropic":
|
||||||
anthropic_models.append(key)
|
anthropic_models.append(key)
|
||||||
elif value.get("litellm_provider") == "openrouter":
|
elif value.get("litellm_provider") == "openrouter":
|
||||||
|
|
|
@ -474,6 +474,16 @@
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true
|
"supports_function_calling": true
|
||||||
},
|
},
|
||||||
|
"azure/command-r-plus": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.000015,
|
||||||
|
"litellm_provider": "azure",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
|
},
|
||||||
"azure/ada": {
|
"azure/ada": {
|
||||||
"max_tokens": 8191,
|
"max_tokens": 8191,
|
||||||
"max_input_tokens": 8191,
|
"max_input_tokens": 8191,
|
||||||
|
|
|
@ -53,6 +53,24 @@ def test_completion_custom_provider_model_name():
|
||||||
# test_completion_custom_provider_model_name()
|
# test_completion_custom_provider_model_name()
|
||||||
|
|
||||||
|
|
||||||
|
def test_completion_azure_command_r():
|
||||||
|
try:
|
||||||
|
litellm.set_verbose = True
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="azure/command-r-plus",
|
||||||
|
api_base=os.getenv("AZURE_COHERE_API_BASE"),
|
||||||
|
api_key=os.getenv("AZURE_COHERE_API_KEY"),
|
||||||
|
messages=[{"role": "user", "content": "What is the meaning of life?"}],
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
except litellm.Timeout as e:
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
def test_completion_claude():
|
def test_completion_claude():
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
litellm.cache = None
|
litellm.cache = None
|
||||||
|
|
|
@ -652,6 +652,7 @@ def load_vertex_ai_credentials():
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.skip(reason="Skipping on this PR to test other stuff")
|
||||||
async def test_async_chat_vertex_ai_stream():
|
async def test_async_chat_vertex_ai_stream():
|
||||||
try:
|
try:
|
||||||
load_vertex_ai_credentials()
|
load_vertex_ai_credentials()
|
||||||
|
|
|
@ -5588,6 +5588,18 @@ def get_llm_provider(
|
||||||
dynamic_api_key = None
|
dynamic_api_key = None
|
||||||
# check if llm provider provided
|
# check if llm provider provided
|
||||||
|
|
||||||
|
# AZURE AI-Studio Logic - Azure AI Studio supports AZURE/Cohere
|
||||||
|
# If User passes azure/command-r-plus -> we should send it to cohere_chat/command-r-plus
|
||||||
|
if model.split("/", 1)[0] == "azure":
|
||||||
|
model_name = model.split("/", 1)[1]
|
||||||
|
if (
|
||||||
|
model_name in litellm.cohere_chat_models
|
||||||
|
or f"mistral/{model_name}" in litellm.mistral_chat_models
|
||||||
|
):
|
||||||
|
custom_llm_provider = "openai"
|
||||||
|
model = model_name
|
||||||
|
return model, custom_llm_provider, dynamic_api_key, api_base
|
||||||
|
|
||||||
if custom_llm_provider:
|
if custom_llm_provider:
|
||||||
return model, custom_llm_provider, dynamic_api_key, api_base
|
return model, custom_llm_provider, dynamic_api_key, api_base
|
||||||
|
|
||||||
|
|
|
@ -474,6 +474,16 @@
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true
|
"supports_function_calling": true
|
||||||
},
|
},
|
||||||
|
"azure/command-r-plus": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.000015,
|
||||||
|
"litellm_provider": "azure",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
|
},
|
||||||
"azure/ada": {
|
"azure/ada": {
|
||||||
"max_tokens": 8191,
|
"max_tokens": 8191,
|
||||||
"max_input_tokens": 8191,
|
"max_input_tokens": 8191,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue