Merge pull request #3543 from kmheckel/main

Updated Ollama cost models to include LLaMa3 and Mistral/Mixtral Instruct series
2024-05-15 20:50:50 -07:00 · 2024-05-15 20:50:50 -07:00 · 881812d5de
commit 881812d5de
parent 370049c549 a4920200b0
3 changed files with 63 additions and 3 deletions
--- a/docs/my-website/docs/providers/ollama.md
+++ b/docs/my-website/docs/providers/ollama.md
@ -101,13 +101,19 @@ Ollama supported models: https://github.com/ollama/ollama
 | Model Name           | Function Call                                                                     |
 |----------------------|-----------------------------------------------------------------------------------
-| Mistral    | `completion(model='ollama/mistral', messages, api_base="http://localhost:11434", stream=True)` | 
+| Mistral    | `completion(model='ollama/mistral', messages, api_base="http://localhost:11434", stream=True)` |
 | Mistral-7B-Instruct-v0.1 | `completion(model='ollama/mistral-7B-Instruct-v0.1', messages, api_base="http://localhost:11434", stream=False)` |
 | Mistral-7B-Instruct-v0.2 | `completion(model='ollama/mistral-7B-Instruct-v0.2', messages, api_base="http://localhost:11434", stream=False)` |
 | Mixtral-8x7B-Instruct-v0.1 | `completion(model='ollama/mistral-8x7B-Instruct-v0.1', messages, api_base="http://localhost:11434", stream=False)` |
 | Mixtral-8x22B-Instruct-v0.1 | `completion(model='ollama/mixtral-8x22B-Instruct-v0.1', messages, api_base="http://localhost:11434", stream=False)` |
 | Llama2 7B            | `completion(model='ollama/llama2', messages, api_base="http://localhost:11434", stream=True)` | 
 | Llama2 13B           | `completion(model='ollama/llama2:13b', messages, api_base="http://localhost:11434", stream=True)` | 
 | Llama2 70B           | `completion(model='ollama/llama2:70b', messages, api_base="http://localhost:11434", stream=True)` | 
 | Llama2 Uncensored    | `completion(model='ollama/llama2-uncensored', messages, api_base="http://localhost:11434", stream=True)` | 
 | Code Llama    | `completion(model='ollama/codellama', messages, api_base="http://localhost:11434", stream=True)` | 
-| Llama2 Uncensored    | `completion(model='ollama/llama2-uncensored', messages, api_base="http://localhost:11434", stream=True)` | 
+| Llama2 Uncensored    | `completion(model='ollama/llama2-uncensored', messages, api_base="http://localhost:11434", stream=True)` |
 |Meta LLaMa3 8B | `completion(model='ollama/llama3', messages, api_base="http://localhost:11434", stream=False)` |
 | Meta LLaMa3 70B | `completion(model='ollama/llama3:70b', messages, api_base="http://localhost:11434", stream=False)` |
 | Orca Mini            | `completion(model='ollama/orca-mini', messages, api_base="http://localhost:11434", stream=True)` |
 | Vicuna               | `completion(model='ollama/vicuna', messages, api_base="http://localhost:11434", stream=True)` |
 | Nous-Hermes          | `completion(model='ollama/nous-hermes', messages, api_base="http://localhost:11434", stream=True)` |
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -6764,7 +6764,7 @@ def get_max_tokens(model: str):
            raise Exception()
    except:
        raise Exception(
-            "This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
+            f"Model {model} isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
        )
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -2967,6 +2967,24 @@
        "litellm_provider": "ollama",
        "mode": "completion"
    },
    "ollama/llama3": {
        "max_tokens": 8192,
        "max_input_tokens": 8192,
        "max_output_tokens": 8192,
        "input_cost_per_token": 0.0,
        "output_cost_per_token": 0.0,
        "litellm_provider": "ollama",
        "mode": "chat"
    },
    "ollama/llama3:70b": {
        "max_tokens": 8192,
        "max_input_tokens": 8192,
        "max_output_tokens": 8192,
        "input_cost_per_token": 0.0,
        "output_cost_per_token": 0.0,
        "litellm_provider": "ollama",
        "mode": "chat"
    },
    "ollama/mistral": {
        "max_tokens": 8192,
        "max_input_tokens": 8192,
@ -2976,6 +2994,42 @@
        "litellm_provider": "ollama",
        "mode": "completion"
    },
    "ollama/mistral-7B-Instruct-v0.1": {
        "max_tokens": 8192,
        "max_input_tokens": 8192,
        "max_output_tokens": 8192,
        "input_cost_per_token": 0.0,
        "output_cost_per_token": 0.0,
        "litellm_provider": "ollama",
        "mode": "chat"
    },
    "ollama/mistral-7B-Instruct-v0.2": {
        "max_tokens": 32768,
        "max_input_tokens": 32768,
        "max_output_tokens": 32768,
        "input_cost_per_token": 0.0,
        "output_cost_per_token": 0.0,
        "litellm_provider": "ollama",
        "mode": "chat"
    },
    "ollama/mixtral-8x7B-Instruct-v0.1": {
        "max_tokens": 32768,
        "max_input_tokens": 32768,
        "max_output_tokens": 32768,
        "input_cost_per_token": 0.0,
        "output_cost_per_token": 0.0,
        "litellm_provider": "ollama",
        "mode": "chat"
    },
    "ollama/mixtral-8x22B-Instruct-v0.1": {
        "max_tokens": 65536,
        "max_input_tokens": 65536,
        "max_output_tokens": 65536,
        "input_cost_per_token": 0.0,
        "output_cost_per_token": 0.0,
        "litellm_provider": "ollama",
        "mode": "chat"
    },
    "ollama/codellama": {
        "max_tokens": 4096, 
        "max_input_tokens": 4096,