forked from phoenix/litellm-mirror
Merge pull request #3543 from kmheckel/main
Updated Ollama cost models to include LLaMa3 and Mistral/Mixtral Instruct series
This commit is contained in:
commit
881812d5de
3 changed files with 63 additions and 3 deletions
|
@ -101,13 +101,19 @@ Ollama supported models: https://github.com/ollama/ollama
|
||||||
|
|
||||||
| Model Name | Function Call |
|
| Model Name | Function Call |
|
||||||
|----------------------|-----------------------------------------------------------------------------------
|
|----------------------|-----------------------------------------------------------------------------------
|
||||||
| Mistral | `completion(model='ollama/mistral', messages, api_base="http://localhost:11434", stream=True)` |
|
| Mistral | `completion(model='ollama/mistral', messages, api_base="http://localhost:11434", stream=True)` |
|
||||||
|
| Mistral-7B-Instruct-v0.1 | `completion(model='ollama/mistral-7B-Instruct-v0.1', messages, api_base="http://localhost:11434", stream=False)` |
|
||||||
|
| Mistral-7B-Instruct-v0.2 | `completion(model='ollama/mistral-7B-Instruct-v0.2', messages, api_base="http://localhost:11434", stream=False)` |
|
||||||
|
| Mixtral-8x7B-Instruct-v0.1 | `completion(model='ollama/mistral-8x7B-Instruct-v0.1', messages, api_base="http://localhost:11434", stream=False)` |
|
||||||
|
| Mixtral-8x22B-Instruct-v0.1 | `completion(model='ollama/mixtral-8x22B-Instruct-v0.1', messages, api_base="http://localhost:11434", stream=False)` |
|
||||||
| Llama2 7B | `completion(model='ollama/llama2', messages, api_base="http://localhost:11434", stream=True)` |
|
| Llama2 7B | `completion(model='ollama/llama2', messages, api_base="http://localhost:11434", stream=True)` |
|
||||||
| Llama2 13B | `completion(model='ollama/llama2:13b', messages, api_base="http://localhost:11434", stream=True)` |
|
| Llama2 13B | `completion(model='ollama/llama2:13b', messages, api_base="http://localhost:11434", stream=True)` |
|
||||||
| Llama2 70B | `completion(model='ollama/llama2:70b', messages, api_base="http://localhost:11434", stream=True)` |
|
| Llama2 70B | `completion(model='ollama/llama2:70b', messages, api_base="http://localhost:11434", stream=True)` |
|
||||||
| Llama2 Uncensored | `completion(model='ollama/llama2-uncensored', messages, api_base="http://localhost:11434", stream=True)` |
|
| Llama2 Uncensored | `completion(model='ollama/llama2-uncensored', messages, api_base="http://localhost:11434", stream=True)` |
|
||||||
| Code Llama | `completion(model='ollama/codellama', messages, api_base="http://localhost:11434", stream=True)` |
|
| Code Llama | `completion(model='ollama/codellama', messages, api_base="http://localhost:11434", stream=True)` |
|
||||||
| Llama2 Uncensored | `completion(model='ollama/llama2-uncensored', messages, api_base="http://localhost:11434", stream=True)` |
|
| Llama2 Uncensored | `completion(model='ollama/llama2-uncensored', messages, api_base="http://localhost:11434", stream=True)` |
|
||||||
|
|Meta LLaMa3 8B | `completion(model='ollama/llama3', messages, api_base="http://localhost:11434", stream=False)` |
|
||||||
|
| Meta LLaMa3 70B | `completion(model='ollama/llama3:70b', messages, api_base="http://localhost:11434", stream=False)` |
|
||||||
| Orca Mini | `completion(model='ollama/orca-mini', messages, api_base="http://localhost:11434", stream=True)` |
|
| Orca Mini | `completion(model='ollama/orca-mini', messages, api_base="http://localhost:11434", stream=True)` |
|
||||||
| Vicuna | `completion(model='ollama/vicuna', messages, api_base="http://localhost:11434", stream=True)` |
|
| Vicuna | `completion(model='ollama/vicuna', messages, api_base="http://localhost:11434", stream=True)` |
|
||||||
| Nous-Hermes | `completion(model='ollama/nous-hermes', messages, api_base="http://localhost:11434", stream=True)` |
|
| Nous-Hermes | `completion(model='ollama/nous-hermes', messages, api_base="http://localhost:11434", stream=True)` |
|
||||||
|
|
|
@ -6764,7 +6764,7 @@ def get_max_tokens(model: str):
|
||||||
raise Exception()
|
raise Exception()
|
||||||
except:
|
except:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
"This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
|
f"Model {model} isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2967,6 +2967,24 @@
|
||||||
"litellm_provider": "ollama",
|
"litellm_provider": "ollama",
|
||||||
"mode": "completion"
|
"mode": "completion"
|
||||||
},
|
},
|
||||||
|
"ollama/llama3": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 8192,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0.0,
|
||||||
|
"output_cost_per_token": 0.0,
|
||||||
|
"litellm_provider": "ollama",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
|
"ollama/llama3:70b": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 8192,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0.0,
|
||||||
|
"output_cost_per_token": 0.0,
|
||||||
|
"litellm_provider": "ollama",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
"ollama/mistral": {
|
"ollama/mistral": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 8192,
|
"max_input_tokens": 8192,
|
||||||
|
@ -2976,6 +2994,42 @@
|
||||||
"litellm_provider": "ollama",
|
"litellm_provider": "ollama",
|
||||||
"mode": "completion"
|
"mode": "completion"
|
||||||
},
|
},
|
||||||
|
"ollama/mistral-7B-Instruct-v0.1": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 8192,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0.0,
|
||||||
|
"output_cost_per_token": 0.0,
|
||||||
|
"litellm_provider": "ollama",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
|
"ollama/mistral-7B-Instruct-v0.2": {
|
||||||
|
"max_tokens": 32768,
|
||||||
|
"max_input_tokens": 32768,
|
||||||
|
"max_output_tokens": 32768,
|
||||||
|
"input_cost_per_token": 0.0,
|
||||||
|
"output_cost_per_token": 0.0,
|
||||||
|
"litellm_provider": "ollama",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
|
"ollama/mixtral-8x7B-Instruct-v0.1": {
|
||||||
|
"max_tokens": 32768,
|
||||||
|
"max_input_tokens": 32768,
|
||||||
|
"max_output_tokens": 32768,
|
||||||
|
"input_cost_per_token": 0.0,
|
||||||
|
"output_cost_per_token": 0.0,
|
||||||
|
"litellm_provider": "ollama",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
|
"ollama/mixtral-8x22B-Instruct-v0.1": {
|
||||||
|
"max_tokens": 65536,
|
||||||
|
"max_input_tokens": 65536,
|
||||||
|
"max_output_tokens": 65536,
|
||||||
|
"input_cost_per_token": 0.0,
|
||||||
|
"output_cost_per_token": 0.0,
|
||||||
|
"litellm_provider": "ollama",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
"ollama/codellama": {
|
"ollama/codellama": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
"max_input_tokens": 4096,
|
"max_input_tokens": 4096,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue