add azure o1 pricing (#7715)

* build(model_prices_and_context_window.json): add azure o1 pricing Closes https://github.com/BerriAI/litellm/issues/7712 * refactor: replace regex with string method for whitespace check in stop-sequences handling (#7713) * Allows overriding keep_alive time in ollama (#7079) * Allows overriding keep_alive time in ollama * Also adds to ollama_chat * Adds some info on the docs about this parameter * fix: together ai warning (#7688) Co-authored-by: Carl Senze <carl.senze@aleph-alpha.com> * fix(proxy_server.py): handle config containing thread locked objects when using get_config_state * fix(proxy_server.py): add exception to debug * build(model_prices_and_context_window.json): update 'supports_vision' for azure o1 --------- Co-authored-by: Wolfram Ravenwolf <52386626+WolframRavenwolf@users.noreply.github.com> Co-authored-by: Regis David Souza Mesquita <github@rdsm.dev> Co-authored-by: Carl <45709281+capsenz@users.noreply.github.com> Co-authored-by: Carl Senze <carl.senze@aleph-alpha.com>
2025-04-27 11:43:54 +00:00 · 2025-01-12 18:15:35 -08:00 · 2025-01-12 18:15:35 -08:00 · 01e2e26bd1
commit 01e2e26bd1
parent f778865836
8 changed files with 67 additions and 5 deletions
--- a/docs/my-website/docs/providers/ollama.md
+++ b/docs/my-website/docs/providers/ollama.md
@ -147,6 +147,7 @@ model_list:
  - model_name: "llama3.1"             
    litellm_params:
      model: "ollama_chat/llama3.1"
+      keep_alive: "8m" # Optional: Overrides default keep_alive, use -1 for Forever
    model_info:
      supports_function_calling: true
 ```
--- a/litellm/llms/anthropic/chat/transformation.py
+++ b/litellm/llms/anthropic/chat/transformation.py
@ -1,5 +1,4 @@
 import json
-import re
 import time
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, cast

@ -258,13 +257,13 @@ class AnthropicConfig(BaseConfig):
    ) -> Optional[List[str]]:
        new_stop: Optional[List[str]] = None
        if isinstance(stop, str):
-            if re.match(r'^\s+$', stop) and litellm.drop_params is True:  # anthropic doesn't allow whitespace characters as stop-sequences
+            if stop.isspace() and litellm.drop_params is True:  # anthropic doesn't allow whitespace characters as stop-sequences
                return new_stop
            new_stop = [stop]
        elif isinstance(stop, list):
            new_v = []
            for v in stop:
-                if re.match(r'^\s+$', v) and litellm.drop_params is True:  # anthropic doesn't allow whitespace characters as stop-sequences
+                if v.isspace() and litellm.drop_params is True:  # anthropic doesn't allow whitespace characters as stop-sequences
                    continue
                new_v.append(v)
            if len(new_v) > 0:
--- a/litellm/llms/ollama_chat.py
+++ b/litellm/llms/ollama_chat.py
@ -219,6 +219,7 @@ def get_ollama_response(  # noqa: PLR0915

    stream = optional_params.pop("stream", False)
    format = optional_params.pop("format", None)
+    keep_alive = optional_params.pop("keep_alive", None)
    function_name = optional_params.pop("function_name", None)
    tools = optional_params.pop("tools", None)

@ -256,6 +257,8 @@ def get_ollama_response(  # noqa: PLR0915
        data["format"] = format
    if tools is not None:
        data["tools"] = tools
+    if keep_alive is not None:
+        data["keep_alive"] = keep_alive
    ## LOGGING
    logging_obj.pre_call(
        input=None,
--- a/litellm/llms/together_ai/chat.py
+++ b/litellm/llms/together_ai/chat.py
@ -32,7 +32,7 @@ class TogetherAIConfig(OpenAIGPTConfig):

        optional_params = super().get_supported_openai_params(model)
        if supports_function_calling is not True:
-            verbose_logger.warning(
+            verbose_logger.debug(
                "Only some together models support function calling/response_format. Docs - https://docs.together.ai/docs/function-calling"
            )
            optional_params.remove("tools")
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -945,6 +945,20 @@
        "supports_vision": false,
        "supports_prompt_caching": true
    },
+    "azure/o1": {
+        "max_tokens": 100000,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 100000,
+        "input_cost_per_token": 0.000015,
+        "output_cost_per_token": 0.000060,
+        "cache_read_input_token_cost": 0.0000075,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true
+    },
    "azure/o1-preview": {
        "max_tokens": 32768,
        "max_input_tokens": 128000,
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -1653,7 +1653,15 @@ class ProxyConfig:

        Do this, to avoid mutating the config state outside of allowed methods
        """
-        return copy.deepcopy(self.config)
+        try:
+            return copy.deepcopy(self.config)
+        except Exception as e:
+            verbose_proxy_logger.debug(
+                "ProxyConfig:get_config_state(): Error returning copy of config state. self.config={}\nError: {}".format(
+                    self.config, e
+                )
+            )
+            return {}

    async def load_config(  # noqa: PLR0915
        self, router: Optional[litellm.Router], config_file_path: str
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -945,6 +945,20 @@
        "supports_vision": false,
        "supports_prompt_caching": true
    },
+    "azure/o1": {
+        "max_tokens": 100000,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 100000,
+        "input_cost_per_token": 0.000015,
+        "output_cost_per_token": 0.000060,
+        "cache_read_input_token_cost": 0.0000075,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_vision": true,
+        "supports_prompt_caching": true
+    },
    "azure/o1-preview": {
        "max_tokens": 32768,
        "max_input_tokens": 128000,
--- a/tests/proxy_unit_tests/test_proxy_utils.py
+++ b/tests/proxy_unit_tests/test_proxy_utils.py
@ -1107,6 +1107,29 @@ def test_proxy_config_state_post_init_callback_call():
    assert config["litellm_settings"]["default_team_settings"][0]["team_id"] == "test"


+def test_proxy_config_state_get_config_state_error():
+    """
+    Ensures that get_config_state does not raise an error when the config is not a valid dictionary
+    """
+    from litellm.proxy.proxy_server import ProxyConfig
+    import threading
+
+    test_config = {
+        "callback_list": [
+            {
+                "lock": threading.RLock(),  # This will cause the deep copy to fail
+                "name": "test_callback",
+            }
+        ],
+        "model_list": ["gpt-4", "claude-3"],
+    }
+
+    pc = ProxyConfig()
+    pc.config = test_config
+    config = pc.get_config_state()
+    assert config == {}
+
+
@pytest.mark.parametrize(
    "associated_budget_table, expected_user_api_key_auth_key, expected_user_api_key_auth_value",
    [