From 01e2e26bd10578d35448ddee720268e6ffb17e32 Mon Sep 17 00:00:00 2001
From: Krish Dholakia <krrishdholakia@gmail.com>
Date: Sun, 12 Jan 2025 18:15:35 -0800
Subject: [PATCH] add azure o1 pricing (#7715)

* build(model_prices_and_context_window.json): add azure o1 pricing

Closes https://github.com/BerriAI/litellm/issues/7712

* refactor: replace regex with string method for whitespace check in stop-sequences handling (#7713)

* Allows overriding keep_alive time in ollama (#7079)

* Allows overriding keep_alive time in ollama

* Also adds to ollama_chat

* Adds some info on the docs about this parameter

* fix: together ai warning (#7688)

Co-authored-by: Carl Senze <carl.senze@aleph-alpha.com>

* fix(proxy_server.py): handle config containing thread locked objects when using get_config_state

* fix(proxy_server.py): add exception to debug

* build(model_prices_and_context_window.json): update 'supports_vision' for azure o1

---------

Co-authored-by: Wolfram Ravenwolf <52386626+WolframRavenwolf@users.noreply.github.com>
Co-authored-by: Regis David Souza Mesquita <github@rdsm.dev>
Co-authored-by: Carl <45709281+capsenz@users.noreply.github.com>
Co-authored-by: Carl Senze <carl.senze@aleph-alpha.com>
---
 docs/my-website/docs/providers/ollama.md      |  1 +
 litellm/llms/anthropic/chat/transformation.py |  5 ++--
 litellm/llms/ollama_chat.py                   |  3 +++
 litellm/llms/together_ai/chat.py              |  2 +-
 ...odel_prices_and_context_window_backup.json | 14 +++++++++++
 litellm/proxy/proxy_server.py                 | 10 +++++++-
 model_prices_and_context_window.json          | 14 +++++++++++
 tests/proxy_unit_tests/test_proxy_utils.py    | 23 +++++++++++++++++++
 8 files changed, 67 insertions(+), 5 deletions(-)

diff --git a/docs/my-website/docs/providers/ollama.md b/docs/my-website/docs/providers/ollama.md
index 63b79fe3aa..3de21474fa 100644
--- a/docs/my-website/docs/providers/ollama.md
+++ b/docs/my-website/docs/providers/ollama.md
@@ -147,6 +147,7 @@ model_list:
   - model_name: "llama3.1"             
     litellm_params:
       model: "ollama_chat/llama3.1"
+      keep_alive: "8m" # Optional: Overrides default keep_alive, use -1 for Forever
     model_info:
       supports_function_calling: true
 ```
diff --git a/litellm/llms/anthropic/chat/transformation.py b/litellm/llms/anthropic/chat/transformation.py
index e517744b03..55d59ef015 100644
--- a/litellm/llms/anthropic/chat/transformation.py
+++ b/litellm/llms/anthropic/chat/transformation.py
@@ -1,5 +1,4 @@
 import json
-import re
 import time
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, cast
 
@@ -258,13 +257,13 @@ class AnthropicConfig(BaseConfig):
     ) -> Optional[List[str]]:
         new_stop: Optional[List[str]] = None
         if isinstance(stop, str):
-            if re.match(r'^\s+$', stop) and litellm.drop_params is True:  # anthropic doesn't allow whitespace characters as stop-sequences
+            if stop.isspace() and litellm.drop_params is True:  # anthropic doesn't allow whitespace characters as stop-sequences
                 return new_stop
             new_stop = [stop]
         elif isinstance(stop, list):
             new_v = []
             for v in stop:
-                if re.match(r'^\s+$', v) and litellm.drop_params is True:  # anthropic doesn't allow whitespace characters as stop-sequences
+                if v.isspace() and litellm.drop_params is True:  # anthropic doesn't allow whitespace characters as stop-sequences
                     continue
                 new_v.append(v)
             if len(new_v) > 0:
diff --git a/litellm/llms/ollama_chat.py b/litellm/llms/ollama_chat.py
index 5aa26ced46..76a0604c21 100644
--- a/litellm/llms/ollama_chat.py
+++ b/litellm/llms/ollama_chat.py
@@ -219,6 +219,7 @@ def get_ollama_response(  # noqa: PLR0915
 
     stream = optional_params.pop("stream", False)
     format = optional_params.pop("format", None)
+    keep_alive = optional_params.pop("keep_alive", None)
     function_name = optional_params.pop("function_name", None)
     tools = optional_params.pop("tools", None)
 
@@ -256,6 +257,8 @@ def get_ollama_response(  # noqa: PLR0915
         data["format"] = format
     if tools is not None:
         data["tools"] = tools
+    if keep_alive is not None:
+        data["keep_alive"] = keep_alive
     ## LOGGING
     logging_obj.pre_call(
         input=None,
diff --git a/litellm/llms/together_ai/chat.py b/litellm/llms/together_ai/chat.py
index 51933196ed..06d33f6975 100644
--- a/litellm/llms/together_ai/chat.py
+++ b/litellm/llms/together_ai/chat.py
@@ -32,7 +32,7 @@ class TogetherAIConfig(OpenAIGPTConfig):
 
         optional_params = super().get_supported_openai_params(model)
         if supports_function_calling is not True:
-            verbose_logger.warning(
+            verbose_logger.debug(
                 "Only some together models support function calling/response_format. Docs - https://docs.together.ai/docs/function-calling"
             )
             optional_params.remove("tools")
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index b4f3952dca..c924fa4cea 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -945,6 +945,20 @@
         "supports_vision": false,
         "supports_prompt_caching": true
     },
+    "azure/o1": {
+        "max_tokens": 100000,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 100000,
+        "input_cost_per_token": 0.000015,
+        "output_cost_per_token": 0.000060,
+        "cache_read_input_token_cost": 0.0000075,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true
+    },
     "azure/o1-preview": {
         "max_tokens": 32768,
         "max_input_tokens": 128000,
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 17b17687ae..8002863217 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -1653,7 +1653,15 @@ class ProxyConfig:
 
         Do this, to avoid mutating the config state outside of allowed methods
         """
-        return copy.deepcopy(self.config)
+        try:
+            return copy.deepcopy(self.config)
+        except Exception as e:
+            verbose_proxy_logger.debug(
+                "ProxyConfig:get_config_state(): Error returning copy of config state. self.config={}\nError: {}".format(
+                    self.config, e
+                )
+            )
+            return {}
 
     async def load_config(  # noqa: PLR0915
         self, router: Optional[litellm.Router], config_file_path: str
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index b4f3952dca..c924fa4cea 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -945,6 +945,20 @@
         "supports_vision": false,
         "supports_prompt_caching": true
     },
+    "azure/o1": {
+        "max_tokens": 100000,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 100000,
+        "input_cost_per_token": 0.000015,
+        "output_cost_per_token": 0.000060,
+        "cache_read_input_token_cost": 0.0000075,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true
+    },
     "azure/o1-preview": {
         "max_tokens": 32768,
         "max_input_tokens": 128000,
diff --git a/tests/proxy_unit_tests/test_proxy_utils.py b/tests/proxy_unit_tests/test_proxy_utils.py
index dd018f674f..2b63ba5a8b 100644
--- a/tests/proxy_unit_tests/test_proxy_utils.py
+++ b/tests/proxy_unit_tests/test_proxy_utils.py
@@ -1107,6 +1107,29 @@ def test_proxy_config_state_post_init_callback_call():
     assert config["litellm_settings"]["default_team_settings"][0]["team_id"] == "test"
 
 
+def test_proxy_config_state_get_config_state_error():
+    """
+    Ensures that get_config_state does not raise an error when the config is not a valid dictionary
+    """
+    from litellm.proxy.proxy_server import ProxyConfig
+    import threading
+
+    test_config = {
+        "callback_list": [
+            {
+                "lock": threading.RLock(),  # This will cause the deep copy to fail
+                "name": "test_callback",
+            }
+        ],
+        "model_list": ["gpt-4", "claude-3"],
+    }
+
+    pc = ProxyConfig()
+    pc.config = test_config
+    config = pc.get_config_state()
+    assert config == {}
+
+
 @pytest.mark.parametrize(
     "associated_budget_table, expected_user_api_key_auth_key, expected_user_api_key_auth_value",
     [