diff --git a/docs/my-website/docs/providers/ollama.md b/docs/my-website/docs/providers/ollama.md index 1c913c08c8..c1c8fc57c8 100644 --- a/docs/my-website/docs/providers/ollama.md +++ b/docs/my-website/docs/providers/ollama.md @@ -101,13 +101,19 @@ Ollama supported models: https://github.com/ollama/ollama | Model Name | Function Call | |----------------------|----------------------------------------------------------------------------------- -| Mistral | `completion(model='ollama/mistral', messages, api_base="http://localhost:11434", stream=True)` | +| Mistral | `completion(model='ollama/mistral', messages, api_base="http://localhost:11434", stream=True)` | +| Mistral-7B-Instruct-v0.1 | `completion(model='ollama/mistral-7B-Instruct-v0.1', messages, api_base="http://localhost:11434", stream=False)` | +| Mistral-7B-Instruct-v0.2 | `completion(model='ollama/mistral-7B-Instruct-v0.2', messages, api_base="http://localhost:11434", stream=False)` | +| Mixtral-8x7B-Instruct-v0.1 | `completion(model='ollama/mistral-8x7B-Instruct-v0.1', messages, api_base="http://localhost:11434", stream=False)` | +| Mixtral-8x22B-Instruct-v0.1 | `completion(model='ollama/mixtral-8x22B-Instruct-v0.1', messages, api_base="http://localhost:11434", stream=False)` | | Llama2 7B | `completion(model='ollama/llama2', messages, api_base="http://localhost:11434", stream=True)` | | Llama2 13B | `completion(model='ollama/llama2:13b', messages, api_base="http://localhost:11434", stream=True)` | | Llama2 70B | `completion(model='ollama/llama2:70b', messages, api_base="http://localhost:11434", stream=True)` | | Llama2 Uncensored | `completion(model='ollama/llama2-uncensored', messages, api_base="http://localhost:11434", stream=True)` | | Code Llama | `completion(model='ollama/codellama', messages, api_base="http://localhost:11434", stream=True)` | -| Llama2 Uncensored | `completion(model='ollama/llama2-uncensored', messages, api_base="http://localhost:11434", stream=True)` | +| Llama2 Uncensored | `completion(model='ollama/llama2-uncensored', messages, api_base="http://localhost:11434", stream=True)` | +|Meta LLaMa3 8B | `completion(model='ollama/llama3', messages, api_base="http://localhost:11434", stream=False)` | +| Meta LLaMa3 70B | `completion(model='ollama/llama3:70b', messages, api_base="http://localhost:11434", stream=False)` | | Orca Mini | `completion(model='ollama/orca-mini', messages, api_base="http://localhost:11434", stream=True)` | | Vicuna | `completion(model='ollama/vicuna', messages, api_base="http://localhost:11434", stream=True)` | | Nous-Hermes | `completion(model='ollama/nous-hermes', messages, api_base="http://localhost:11434", stream=True)` | diff --git a/litellm/utils.py b/litellm/utils.py index 99b66498f8..36f4ad481f 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -6764,7 +6764,7 @@ def get_max_tokens(model: str): raise Exception() except: raise Exception( - "This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json" + f"Model {model} isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json" ) diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index e181e0ef32..ff9194578c 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -2967,6 +2967,24 @@ "litellm_provider": "ollama", "mode": "completion" }, + "ollama/llama3": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat" + }, + "ollama/llama3:70b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat" + }, "ollama/mistral": { "max_tokens": 8192, "max_input_tokens": 8192, @@ -2976,6 +2994,42 @@ "litellm_provider": "ollama", "mode": "completion" }, + "ollama/mistral-7B-Instruct-v0.1": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat" + }, + "ollama/mistral-7B-Instruct-v0.2": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat" + }, + "ollama/mixtral-8x7B-Instruct-v0.1": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat" + }, + "ollama/mixtral-8x22B-Instruct-v0.1": { + "max_tokens": 65536, + "max_input_tokens": 65536, + "max_output_tokens": 65536, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat" + }, "ollama/codellama": { "max_tokens": 4096, "max_input_tokens": 4096,