From dde2e3249afff2d8c0579c93662a08210cde62ec Mon Sep 17 00:00:00 2001 From: Kade Heckel <51248168+kmheckel@users.noreply.github.com> Date: Thu, 9 May 2024 11:01:25 +0100 Subject: [PATCH 1/5] Update model_prices_and_context_window.json Added Mistral and Mixtral Chat entries for Ollama. --- model_prices_and_context_window.json | 36 ++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 10c70a858..cb0f54aa6 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -2758,6 +2758,42 @@ "litellm_provider": "ollama", "mode": "completion" }, + "ollama/mistral-7B-Instruct-v0.1": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat" + }, + "ollama/mistral-7B-Instruct-v0.2": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat" + }, + "ollama/mixtral-8x7B-Instruct-v0.1": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat" + }, + "ollama/mixtral-8x22B-Instruct-v0.1": { + "max_tokens": 65536, + "max_input_tokens": 65536, + "max_output_tokens": 65536, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat" + }, "ollama/codellama": { "max_tokens": 4096, "max_input_tokens": 4096, From 256107c9990d5e8a66141c2110c1d12e813e913b Mon Sep 17 00:00:00 2001 From: Kade Heckel Date: Thu, 9 May 2024 12:06:51 +0100 Subject: [PATCH 2/5] Added Ollama LLMs for LLaMa and Mistral --- litellm/utils.py | 2 +- model_prices_and_context_window.json | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/litellm/utils.py b/litellm/utils.py index d1af1b44a..fab8beb14 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -6620,7 +6620,7 @@ def get_max_tokens(model: str): raise Exception() except: raise Exception( - "This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json" + f"Model {model} from provider {custom_llm_provider} isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json" ) diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index cb0f54aa6..d9dc82d54 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -2749,6 +2749,24 @@ "litellm_provider": "ollama", "mode": "completion" }, + "ollama/llama3": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000010, + "output_cost_per_token": 0.00000010, + "litellm_provider": "ollama", + "mode": "chat" + }, + "ollama/llama3:70b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "ollama", + "mode": "chat" + }, "ollama/mistral": { "max_tokens": 8192, "max_input_tokens": 8192, From 615a4760afc83388a0a165921003cdd6624fcd81 Mon Sep 17 00:00:00 2001 From: Kade Heckel Date: Thu, 9 May 2024 12:29:01 +0100 Subject: [PATCH 3/5] Updated docs for Ollama. --- docs/my-website/docs/providers/ollama.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/docs/my-website/docs/providers/ollama.md b/docs/my-website/docs/providers/ollama.md index 1c913c08c..c1c8fc57c 100644 --- a/docs/my-website/docs/providers/ollama.md +++ b/docs/my-website/docs/providers/ollama.md @@ -101,13 +101,19 @@ Ollama supported models: https://github.com/ollama/ollama | Model Name | Function Call | |----------------------|----------------------------------------------------------------------------------- -| Mistral | `completion(model='ollama/mistral', messages, api_base="http://localhost:11434", stream=True)` | +| Mistral | `completion(model='ollama/mistral', messages, api_base="http://localhost:11434", stream=True)` | +| Mistral-7B-Instruct-v0.1 | `completion(model='ollama/mistral-7B-Instruct-v0.1', messages, api_base="http://localhost:11434", stream=False)` | +| Mistral-7B-Instruct-v0.2 | `completion(model='ollama/mistral-7B-Instruct-v0.2', messages, api_base="http://localhost:11434", stream=False)` | +| Mixtral-8x7B-Instruct-v0.1 | `completion(model='ollama/mistral-8x7B-Instruct-v0.1', messages, api_base="http://localhost:11434", stream=False)` | +| Mixtral-8x22B-Instruct-v0.1 | `completion(model='ollama/mixtral-8x22B-Instruct-v0.1', messages, api_base="http://localhost:11434", stream=False)` | | Llama2 7B | `completion(model='ollama/llama2', messages, api_base="http://localhost:11434", stream=True)` | | Llama2 13B | `completion(model='ollama/llama2:13b', messages, api_base="http://localhost:11434", stream=True)` | | Llama2 70B | `completion(model='ollama/llama2:70b', messages, api_base="http://localhost:11434", stream=True)` | | Llama2 Uncensored | `completion(model='ollama/llama2-uncensored', messages, api_base="http://localhost:11434", stream=True)` | | Code Llama | `completion(model='ollama/codellama', messages, api_base="http://localhost:11434", stream=True)` | -| Llama2 Uncensored | `completion(model='ollama/llama2-uncensored', messages, api_base="http://localhost:11434", stream=True)` | +| Llama2 Uncensored | `completion(model='ollama/llama2-uncensored', messages, api_base="http://localhost:11434", stream=True)` | +|Meta LLaMa3 8B | `completion(model='ollama/llama3', messages, api_base="http://localhost:11434", stream=False)` | +| Meta LLaMa3 70B | `completion(model='ollama/llama3:70b', messages, api_base="http://localhost:11434", stream=False)` | | Orca Mini | `completion(model='ollama/orca-mini', messages, api_base="http://localhost:11434", stream=True)` | | Vicuna | `completion(model='ollama/vicuna', messages, api_base="http://localhost:11434", stream=True)` | | Nous-Hermes | `completion(model='ollama/nous-hermes', messages, api_base="http://localhost:11434", stream=True)` | From 54c57450ab67c482fa1609fe33cf92ecfbe4f516 Mon Sep 17 00:00:00 2001 From: Kade Heckel <51248168+kmheckel@users.noreply.github.com> Date: Thu, 9 May 2024 16:03:22 +0100 Subject: [PATCH 4/5] Update model_prices_and_context_window.json fixed typo with ollama/llama3 token cost (now set to 0) --- model_prices_and_context_window.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index d9dc82d54..a3a13ed81 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -2753,8 +2753,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.00000010, - "output_cost_per_token": 0.00000010, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, "litellm_provider": "ollama", "mode": "chat" }, From 68b3b6f4695bf75b50ed7e58ec1dca269d2f221c Mon Sep 17 00:00:00 2001 From: Kade Heckel <51248168+kmheckel@users.noreply.github.com> Date: Sat, 11 May 2024 18:05:10 +0100 Subject: [PATCH 5/5] changed error message --- litellm/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/utils.py b/litellm/utils.py index fab8beb14..f97694d8f 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -6620,7 +6620,7 @@ def get_max_tokens(model: str): raise Exception() except: raise Exception( - f"Model {model} from provider {custom_llm_provider} isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json" + f"Model {model} isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json" )