diff --git a/litellm/_redis.py b/litellm/_redis.py index e2688bf41..d7789472c 100644 --- a/litellm/_redis.py +++ b/litellm/_redis.py @@ -110,7 +110,7 @@ def _get_redis_client_logic(**env_overrides): redis_kwargs.pop("password", None) elif "host" not in redis_kwargs or redis_kwargs["host"] is None: raise ValueError("Either 'host' or 'url' must be specified for redis.") - litellm.print_verbose(f"redis_kwargs: {redis_kwargs}") + # litellm.print_verbose(f"redis_kwargs: {redis_kwargs}") return redis_kwargs @@ -142,6 +142,7 @@ def get_redis_async_client(**env_overrides): ) ) return async_redis.Redis.from_url(**url_kwargs) + return async_redis.Redis( socket_timeout=5, **redis_kwargs, @@ -154,4 +155,9 @@ def get_redis_connection_pool(**env_overrides): return async_redis.BlockingConnectionPool.from_url( timeout=5, url=redis_kwargs["url"] ) + connection_class = async_redis.Connection + if "ssl" in redis_kwargs and redis_kwargs["ssl"] is not None: + connection_class = async_redis.SSLConnection + redis_kwargs.pop("ssl", None) + redis_kwargs["connection_class"] = connection_class return async_redis.BlockingConnectionPool(timeout=5, **redis_kwargs) diff --git a/litellm/llms/ollama.py b/litellm/llms/ollama.py index a14c3cb50..96c75232d 100644 --- a/litellm/llms/ollama.py +++ b/litellm/llms/ollama.py @@ -228,7 +228,7 @@ def get_ollama_response( model_response["choices"][0]["message"]["content"] = response_json["response"] model_response["created"] = int(time.time()) model_response["model"] = "ollama/" + model - prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(prompt))) # type: ignore + prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(prompt, disallowed_special=()))) # type: ignore completion_tokens = response_json.get("eval_count", len(response_json.get("message",dict()).get("content", ""))) model_response["usage"] = litellm.Usage( prompt_tokens=prompt_tokens, @@ -330,7 +330,7 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj): ] model_response["created"] = int(time.time()) model_response["model"] = "ollama/" + data["model"] - prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(data["prompt"]))) # type: ignore + prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(data["prompt"], disallowed_special=()))) # type: ignore completion_tokens = response_json.get("eval_count", len(response_json.get("message",dict()).get("content", ""))) model_response["usage"] = litellm.Usage( prompt_tokens=prompt_tokens, diff --git a/litellm/llms/ollama_chat.py b/litellm/llms/ollama_chat.py index aea00a303..917336d05 100644 --- a/litellm/llms/ollama_chat.py +++ b/litellm/llms/ollama_chat.py @@ -148,7 +148,7 @@ class OllamaChatConfig: if param == "top_p": optional_params["top_p"] = value if param == "frequency_penalty": - optional_params["repeat_penalty"] = param + optional_params["repeat_penalty"] = value if param == "stop": optional_params["stop"] = value if param == "response_format" and value["type"] == "json_object": diff --git a/litellm/utils.py b/litellm/utils.py index 19118acbe..824957d3e 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -7927,6 +7927,8 @@ def exception_type( elif ( "429 Quota exceeded" in error_str or "IndexError: list index out of range" in error_str + or "429 Unable to submit request because the service is temporarily out of capacity." + in error_str ): exception_mapping_worked = True raise RateLimitError(