From 3df7231fa5aa4cb6637853361925899a6eb3c431 Mon Sep 17 00:00:00 2001
From: frob <rick+github@frob.com.au>
Date: Fri, 19 Apr 2024 21:38:42 +0200
Subject: [PATCH 1/4] Disable special tokens in ollama completion when counting
 tokens

Some(?) models (eg, codegemma) don't return a prompt_eval_count field, so ollama.py tries to compute the value based on encoding of the prompt.  Unfortunately FIM symbols used in the prompt (eg, "<|fim_prefix|>") cause the encoder to throw an exception, so we disable special processing.
---
 litellm/llms/ollama.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/litellm/llms/ollama.py b/litellm/llms/ollama.py
index a14c3cb50..96c75232d 100644
--- a/litellm/llms/ollama.py
+++ b/litellm/llms/ollama.py
@@ -228,7 +228,7 @@ def get_ollama_response(
         model_response["choices"][0]["message"]["content"] = response_json["response"]
     model_response["created"] = int(time.time())
     model_response["model"] = "ollama/" + model
-    prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(prompt)))  # type: ignore
+    prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(prompt, disallowed_special=())))  # type: ignore
     completion_tokens = response_json.get("eval_count", len(response_json.get("message",dict()).get("content", "")))
     model_response["usage"] = litellm.Usage(
         prompt_tokens=prompt_tokens,
@@ -330,7 +330,7 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
                 ]
             model_response["created"] = int(time.time())
             model_response["model"] = "ollama/" + data["model"]
-            prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(data["prompt"])))  # type: ignore
+            prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(data["prompt"], disallowed_special=())))  # type: ignore
             completion_tokens = response_json.get("eval_count", len(response_json.get("message",dict()).get("content", "")))
             model_response["usage"] = litellm.Usage(
                 prompt_tokens=prompt_tokens,

From 50a917a09664bd01a39e1749e84c0ad8d7317347 Mon Sep 17 00:00:00 2001
From: merefield <merefield@gmail.com>
Date: Sat, 20 Apr 2024 09:25:35 +0100
Subject: [PATCH 2/4] FIX: use value not param name when mapping
 frequency_penalty

---
 litellm/llms/ollama_chat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/litellm/llms/ollama_chat.py b/litellm/llms/ollama_chat.py
index aea00a303..917336d05 100644
--- a/litellm/llms/ollama_chat.py
+++ b/litellm/llms/ollama_chat.py
@@ -148,7 +148,7 @@ class OllamaChatConfig:
             if param == "top_p":
                 optional_params["top_p"] = value
             if param == "frequency_penalty":
-                optional_params["repeat_penalty"] = param
+                optional_params["repeat_penalty"] = value
             if param == "stop":
                 optional_params["stop"] = value
             if param == "response_format" and value["type"] == "json_object":

From d1217b955a8898d88be3d2ddbb1cf76f614adc89 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 20 Apr 2024 10:19:44 -0700
Subject: [PATCH 3/4] fix(_redis.py): support redis ssl as a kwarg `REDIS_SSL`

---
 litellm/_redis.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/litellm/_redis.py b/litellm/_redis.py
index e2688bf41..d7789472c 100644
--- a/litellm/_redis.py
+++ b/litellm/_redis.py
@@ -110,7 +110,7 @@ def _get_redis_client_logic(**env_overrides):
         redis_kwargs.pop("password", None)
     elif "host" not in redis_kwargs or redis_kwargs["host"] is None:
         raise ValueError("Either 'host' or 'url' must be specified for redis.")
-    litellm.print_verbose(f"redis_kwargs: {redis_kwargs}")
+    # litellm.print_verbose(f"redis_kwargs: {redis_kwargs}")
     return redis_kwargs
 
 
@@ -142,6 +142,7 @@ def get_redis_async_client(**env_overrides):
                     )
                 )
         return async_redis.Redis.from_url(**url_kwargs)
+
     return async_redis.Redis(
         socket_timeout=5,
         **redis_kwargs,
@@ -154,4 +155,9 @@ def get_redis_connection_pool(**env_overrides):
         return async_redis.BlockingConnectionPool.from_url(
             timeout=5, url=redis_kwargs["url"]
         )
+    connection_class = async_redis.Connection
+    if "ssl" in redis_kwargs and redis_kwargs["ssl"] is not None:
+        connection_class = async_redis.SSLConnection
+        redis_kwargs.pop("ssl", None)
+        redis_kwargs["connection_class"] = connection_class
     return async_redis.BlockingConnectionPool(timeout=5, **redis_kwargs)

From 33d828a0edd1d3a6ebdca102bddfc6b53853e9ef Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 20 Apr 2024 11:12:05 -0700
Subject: [PATCH 4/4] fix(utils.py): map vertex ai exceptions - rate limit
 error

---
 litellm/utils.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/litellm/utils.py b/litellm/utils.py
index e230675e6..e793357d2 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -7884,6 +7884,8 @@ def exception_type(
                 elif (
                     "429 Quota exceeded" in error_str
                     or "IndexError: list index out of range" in error_str
+                    or "429 Unable to submit request because the service is temporarily out of capacity."
+                    in error_str
                 ):
                     exception_mapping_worked = True
                     raise RateLimitError(