Merge pull request #4808 from BerriAI/litellm_Add_mistral_models

feat - add mistral `open-codestral-mamba` `open-mistral-nemo`
2024-07-20 17:23:28 -07:00 · 2024-07-20 17:23:28 -07:00 · 40ee954e8b
commit 40ee954e8b
parent 576cccaade 106ff31c4d
3 changed files with 184 additions and 0 deletions
--- a/docs/my-website/docs/providers/mistral.md
+++ b/docs/my-website/docs/providers/mistral.md
@ -1,3 +1,6 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
 # Mistral AI API
 https://docs.mistral.ai/api/

@ -41,9 +44,106 @@ for chunk in response:
 ```


+
+## Usage with LiteLLM Proxy 
+
+### 1. Set Mistral Models on config.yaml
+
+```yaml
+model_list:
+  - model_name: mistral-small-latest
+    litellm_params:
+      model: mistral/mistral-small-latest
+      api_key: "os.environ/MISTRAL_API_KEY" # ensure you have `MISTRAL_API_KEY` in your .env
+```
+
+### 2. Start Proxy 
+
+```
+litellm --config config.yaml
+```
+
+### 3. Test it
+
+
+<Tabs>
+<TabItem value="Curl" label="Curl Request">
+
+```shell
+curl --location 'http://0.0.0.0:4000/chat/completions' \
+--header 'Content-Type: application/json' \
+--data ' {
+      "model": "mistral-small-latest",
+      "messages": [
+        {
+          "role": "user",
+          "content": "what llm are you"
+        }
+      ]
+    }
+'
+```
+</TabItem>
+<TabItem value="openai" label="OpenAI v1.0.0+">
+
+```python
+import openai
+client = openai.OpenAI(
+    api_key="anything",
+    base_url="http://0.0.0.0:4000"
+)
+
+response = client.chat.completions.create(model="mistral-small-latest", messages = [
+    {
+        "role": "user",
+        "content": "this is a test request, write a short poem"
+    }
+])
+
+print(response)
+
+```
+</TabItem>
+<TabItem value="langchain" label="Langchain">
+
+```python
+from langchain.chat_models import ChatOpenAI
+from langchain.prompts.chat import (
+    ChatPromptTemplate,
+    HumanMessagePromptTemplate,
+    SystemMessagePromptTemplate,
+)
+from langchain.schema import HumanMessage, SystemMessage
+
+chat = ChatOpenAI(
+    openai_api_base="http://0.0.0.0:4000", # set openai_api_base to the LiteLLM Proxy
+    model = "mistral-small-latest",
+    temperature=0.1
+)
+
+messages = [
+    SystemMessage(
+        content="You are a helpful assistant that im using to make a test request to."
+    ),
+    HumanMessage(
+        content="test from litellm. tell me why it's amazing in 1 sentence"
+    ),
+]
+response = chat(messages)
+
+print(response)
+```
+</TabItem>
+</Tabs>
+
 ## Supported Models
+
+:::info
 All models listed here https://docs.mistral.ai/platform/endpoints are supported. We actively maintain the list of models, pricing, token window, etc. [here](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json).

+:::
+
+
 | Model Name     | Function Call                                                |
 |----------------|--------------------------------------------------------------|
 | Mistral Small  | `completion(model="mistral/mistral-small-latest", messages)` |
@ -53,6 +153,10 @@ All models listed here https://docs.mistral.ai/platform/endpoints are supported.
 | Mixtral 8x7B   | `completion(model="mistral/open-mixtral-8x7b", messages)`    |
 | Mixtral 8x22B  | `completion(model="mistral/open-mixtral-8x22b", messages)`   |
 | Codestral      | `completion(model="mistral/codestral-latest", messages)`     |
+| Mistral NeMo      | `completion(model="mistral/open-mistral-nemo", messages)`     |
+| Mistral NeMo 2407      | `completion(model="mistral/open-mistral-nemo-2407", messages)`     |
+| Codestral Mamba      | `completion(model="mistral/open-codestral-mamba", messages)`     |
+| Codestral Mamba    | `completion(model="mistral/codestral-mamba-latest"", messages)`     |

 ## Function Calling 

--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -929,6 +929,46 @@
        "litellm_provider": "mistral",
        "mode": "chat"
    },
+    "mistral/open-mistral-nemo": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 128000,
+        "input_cost_per_token":  0.0000003,
+        "output_cost_per_token": 0.0000003,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "source": "https://mistral.ai/technology/"
+    },
+    "mistral/open-mistral-nemo-2407": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 128000,
+        "input_cost_per_token":  0.0000003,
+        "output_cost_per_token": 0.0000003,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "source": "https://mistral.ai/technology/"
+    },
+    "mistral/open-codestral-mamba": {
+        "max_tokens": 256000,
+        "max_input_tokens": 256000,
+        "max_output_tokens": 256000,
+        "input_cost_per_token": 0.00000025,
+        "output_cost_per_token": 0.00000025,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "source": "https://mistral.ai/technology/"
+    },
+    "mistral/codestral-mamba-latest": {
+        "max_tokens": 256000,
+        "max_input_tokens": 256000,
+        "max_output_tokens": 256000,
+        "input_cost_per_token": 0.00000025,
+        "output_cost_per_token": 0.00000025,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "source": "https://mistral.ai/technology/"
+    },
    "mistral/mistral-embed": {
        "max_tokens": 8192,
        "max_input_tokens": 8192,
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -929,6 +929,46 @@
        "litellm_provider": "mistral",
        "mode": "chat"
    },
+    "mistral/open-mistral-nemo": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 128000,
+        "input_cost_per_token":  0.0000003,
+        "output_cost_per_token": 0.0000003,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "source": "https://mistral.ai/technology/"
+    },
+    "mistral/open-mistral-nemo-2407": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 128000,
+        "input_cost_per_token":  0.0000003,
+        "output_cost_per_token": 0.0000003,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "source": "https://mistral.ai/technology/"
+    },
+    "mistral/open-codestral-mamba": {
+        "max_tokens": 256000,
+        "max_input_tokens": 256000,
+        "max_output_tokens": 256000,
+        "input_cost_per_token": 0.00000025,
+        "output_cost_per_token": 0.00000025,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "source": "https://mistral.ai/technology/"
+    },
+    "mistral/codestral-mamba-latest": {
+        "max_tokens": 256000,
+        "max_input_tokens": 256000,
+        "max_output_tokens": 256000,
+        "input_cost_per_token": 0.00000025,
+        "output_cost_per_token": 0.00000025,
+        "litellm_provider": "mistral",
+        "mode": "chat",
+        "source": "https://mistral.ai/technology/"
+    },
    "mistral/mistral-embed": {
        "max_tokens": 8192,
        "max_input_tokens": 8192,