Merge pull request #3543 from kmheckel/main

Updated Ollama cost models to include LLaMa3 and Mistral/Mixtral Instruct series
This commit is contained in:
Ishaan Jaff 2024-05-15 20:50:50 -07:00 committed by GitHub
commit 881812d5de
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 63 additions and 3 deletions

View file

@ -102,12 +102,18 @@ Ollama supported models: https://github.com/ollama/ollama
| Model Name | Function Call |
|----------------------|-----------------------------------------------------------------------------------
| Mistral | `completion(model='ollama/mistral', messages, api_base="http://localhost:11434", stream=True)` |
| Mistral-7B-Instruct-v0.1 | `completion(model='ollama/mistral-7B-Instruct-v0.1', messages, api_base="http://localhost:11434", stream=False)` |
| Mistral-7B-Instruct-v0.2 | `completion(model='ollama/mistral-7B-Instruct-v0.2', messages, api_base="http://localhost:11434", stream=False)` |
| Mixtral-8x7B-Instruct-v0.1 | `completion(model='ollama/mistral-8x7B-Instruct-v0.1', messages, api_base="http://localhost:11434", stream=False)` |
| Mixtral-8x22B-Instruct-v0.1 | `completion(model='ollama/mixtral-8x22B-Instruct-v0.1', messages, api_base="http://localhost:11434", stream=False)` |
| Llama2 7B | `completion(model='ollama/llama2', messages, api_base="http://localhost:11434", stream=True)` |
| Llama2 13B | `completion(model='ollama/llama2:13b', messages, api_base="http://localhost:11434", stream=True)` |
| Llama2 70B | `completion(model='ollama/llama2:70b', messages, api_base="http://localhost:11434", stream=True)` |
| Llama2 Uncensored | `completion(model='ollama/llama2-uncensored', messages, api_base="http://localhost:11434", stream=True)` |
| Code Llama | `completion(model='ollama/codellama', messages, api_base="http://localhost:11434", stream=True)` |
| Llama2 Uncensored | `completion(model='ollama/llama2-uncensored', messages, api_base="http://localhost:11434", stream=True)` |
|Meta LLaMa3 8B | `completion(model='ollama/llama3', messages, api_base="http://localhost:11434", stream=False)` |
| Meta LLaMa3 70B | `completion(model='ollama/llama3:70b', messages, api_base="http://localhost:11434", stream=False)` |
| Orca Mini | `completion(model='ollama/orca-mini', messages, api_base="http://localhost:11434", stream=True)` |
| Vicuna | `completion(model='ollama/vicuna', messages, api_base="http://localhost:11434", stream=True)` |
| Nous-Hermes | `completion(model='ollama/nous-hermes', messages, api_base="http://localhost:11434", stream=True)` |

View file

@ -6764,7 +6764,7 @@ def get_max_tokens(model: str):
raise Exception()
except:
raise Exception(
"This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
f"Model {model} isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
)

View file

@ -2967,6 +2967,24 @@
"litellm_provider": "ollama",
"mode": "completion"
},
"ollama/llama3": {
"max_tokens": 8192,
"max_input_tokens": 8192,
"max_output_tokens": 8192,
"input_cost_per_token": 0.0,
"output_cost_per_token": 0.0,
"litellm_provider": "ollama",
"mode": "chat"
},
"ollama/llama3:70b": {
"max_tokens": 8192,
"max_input_tokens": 8192,
"max_output_tokens": 8192,
"input_cost_per_token": 0.0,
"output_cost_per_token": 0.0,
"litellm_provider": "ollama",
"mode": "chat"
},
"ollama/mistral": {
"max_tokens": 8192,
"max_input_tokens": 8192,
@ -2976,6 +2994,42 @@
"litellm_provider": "ollama",
"mode": "completion"
},
"ollama/mistral-7B-Instruct-v0.1": {
"max_tokens": 8192,
"max_input_tokens": 8192,
"max_output_tokens": 8192,
"input_cost_per_token": 0.0,
"output_cost_per_token": 0.0,
"litellm_provider": "ollama",
"mode": "chat"
},
"ollama/mistral-7B-Instruct-v0.2": {
"max_tokens": 32768,
"max_input_tokens": 32768,
"max_output_tokens": 32768,
"input_cost_per_token": 0.0,
"output_cost_per_token": 0.0,
"litellm_provider": "ollama",
"mode": "chat"
},
"ollama/mixtral-8x7B-Instruct-v0.1": {
"max_tokens": 32768,
"max_input_tokens": 32768,
"max_output_tokens": 32768,
"input_cost_per_token": 0.0,
"output_cost_per_token": 0.0,
"litellm_provider": "ollama",
"mode": "chat"
},
"ollama/mixtral-8x22B-Instruct-v0.1": {
"max_tokens": 65536,
"max_input_tokens": 65536,
"max_output_tokens": 65536,
"input_cost_per_token": 0.0,
"output_cost_per_token": 0.0,
"litellm_provider": "ollama",
"mode": "chat"
},
"ollama/codellama": {
"max_tokens": 4096,
"max_input_tokens": 4096,