add azure o1 pricing (#7715)

* build(model_prices_and_context_window.json): add azure o1 pricing Closes https://github.com/BerriAI/litellm/issues/7712 * refactor: replace regex with string method for whitespace check in stop-sequences handling (#7713) * Allows overriding keep_alive time in ollama (#7079) * Allows overriding keep_alive time in ollama * Also adds to ollama_chat * Adds some info on the docs about this parameter * fix: together ai warning (#7688) Co-authored-by: Carl Senze <carl.senze@aleph-alpha.com> * fix(proxy_server.py): handle config containing thread locked objects when using get_config_state * fix(proxy_server.py): add exception to debug * build(model_prices_and_context_window.json): update 'supports_vision' for azure o1 --------- Co-authored-by: Wolfram Ravenwolf <52386626+WolframRavenwolf@users.noreply.github.com> Co-authored-by: Regis David Souza Mesquita <github@rdsm.dev> Co-authored-by: Carl <45709281+capsenz@users.noreply.github.com> Co-authored-by: Carl Senze <carl.senze@aleph-alpha.com>
2025-04-27 19:54:13 +00:00 · 2025-01-12 18:15:35 -08:00 · 2025-01-12 18:15:35 -08:00 · 01e2e26bd1
commit 01e2e26bd1
parent f778865836
8 changed files with 67 additions and 5 deletions
--- a/docs/my-website/docs/providers/ollama.md
+++ b/docs/my-website/docs/providers/ollama.md
@ -147,6 +147,7 @@ model_list:
  - model_name: "llama3.1"             
    litellm_params:
      model: "ollama_chat/llama3.1"
      keep_alive: "8m" # Optional: Overrides default keep_alive, use -1 for Forever
    model_info:
      supports_function_calling: true
 ```
--- a/litellm/llms/anthropic/chat/transformation.py
+++ b/litellm/llms/anthropic/chat/transformation.py
@ -1,5 +1,4 @@
 import json
 import re
 import time
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, cast
@ -258,13 +257,13 @@ class AnthropicConfig(BaseConfig):
    ) -> Optional[List[str]]:
        new_stop: Optional[List[str]] = None
        if isinstance(stop, str):
-            if re.match(r'^\s+$', stop) and litellm.drop_params is True:  # anthropic doesn't allow whitespace characters as stop-sequences
+            if stop.isspace() and litellm.drop_params is True:  # anthropic doesn't allow whitespace characters as stop-sequences
                return new_stop
            new_stop = [stop]
        elif isinstance(stop, list):
            new_v = []
            for v in stop:
-                if re.match(r'^\s+$', v) and litellm.drop_params is True:  # anthropic doesn't allow whitespace characters as stop-sequences
+                if v.isspace() and litellm.drop_params is True:  # anthropic doesn't allow whitespace characters as stop-sequences
                    continue
                new_v.append(v)
            if len(new_v) > 0:
--- a/litellm/llms/ollama_chat.py
+++ b/litellm/llms/ollama_chat.py
@ -219,6 +219,7 @@ def get_ollama_response(  # noqa: PLR0915
    stream = optional_params.pop("stream", False)
    format = optional_params.pop("format", None)
    keep_alive = optional_params.pop("keep_alive", None)
    function_name = optional_params.pop("function_name", None)
    tools = optional_params.pop("tools", None)
@ -256,6 +257,8 @@ def get_ollama_response(  # noqa: PLR0915
        data["format"] = format
    if tools is not None:
        data["tools"] = tools
    if keep_alive is not None:
        data["keep_alive"] = keep_alive
    ## LOGGING
    logging_obj.pre_call(
        input=None,
--- a/litellm/llms/together_ai/chat.py
+++ b/litellm/llms/together_ai/chat.py
@ -32,7 +32,7 @@ class TogetherAIConfig(OpenAIGPTConfig):
        optional_params = super().get_supported_openai_params(model)
        if supports_function_calling is not True:
-            verbose_logger.warning(
+            verbose_logger.debug(
                "Only some together models support function calling/response_format. Docs - https://docs.together.ai/docs/function-calling"
            )
            optional_params.remove("tools")
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -945,6 +945,20 @@
        "supports_vision": false,
        "supports_prompt_caching": true
    },
    "azure/o1": {
        "max_tokens": 100000,
        "max_input_tokens": 200000,
        "max_output_tokens": 100000,
        "input_cost_per_token": 0.000015,
        "output_cost_per_token": 0.000060,
        "cache_read_input_token_cost": 0.0000075,
        "litellm_provider": "azure",
        "mode": "chat",
        "supports_function_calling": true,
        "supports_parallel_function_calling": true,
        "supports_vision": true,
        "supports_prompt_caching": true
    },
    "azure/o1-preview": {
        "max_tokens": 32768,
        "max_input_tokens": 128000,
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -1653,7 +1653,15 @@ class ProxyConfig:
        Do this, to avoid mutating the config state outside of allowed methods
        """
        try:
            return copy.deepcopy(self.config)
        except Exception as e:
            verbose_proxy_logger.debug(
                "ProxyConfig:get_config_state(): Error returning copy of config state. self.config={}\nError: {}".format(
                    self.config, e
                )
            )
            return {}
    async def load_config(  # noqa: PLR0915
        self, router: Optional[litellm.Router], config_file_path: str
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -945,6 +945,20 @@
        "supports_vision": false,
        "supports_prompt_caching": true
    },
    "azure/o1": {
        "max_tokens": 100000,
        "max_input_tokens": 200000,
        "max_output_tokens": 100000,
        "input_cost_per_token": 0.000015,
        "output_cost_per_token": 0.000060,
        "cache_read_input_token_cost": 0.0000075,
        "litellm_provider": "azure",
        "mode": "chat",
        "supports_function_calling": true,
        "supports_parallel_function_calling": true,
        "supports_vision": true,
        "supports_prompt_caching": true
    },
    "azure/o1-preview": {
        "max_tokens": 32768,
        "max_input_tokens": 128000,
--- a/tests/proxy_unit_tests/test_proxy_utils.py
+++ b/tests/proxy_unit_tests/test_proxy_utils.py
@ -1107,6 +1107,29 @@ def test_proxy_config_state_post_init_callback_call():
    assert config["litellm_settings"]["default_team_settings"][0]["team_id"] == "test"
 def test_proxy_config_state_get_config_state_error():
    """
    Ensures that get_config_state does not raise an error when the config is not a valid dictionary
    """
    from litellm.proxy.proxy_server import ProxyConfig
    import threading
    test_config = {
        "callback_list": [
            {
                "lock": threading.RLock(),  # This will cause the deep copy to fail
                "name": "test_callback",
            }
        ],
        "model_list": ["gpt-4", "claude-3"],
    }
    pc = ProxyConfig()
    pc.config = test_config
    config = pc.get_config_state()
    assert config == {}
@pytest.mark.parametrize(
    "associated_budget_table, expected_user_api_key_auth_key, expected_user_api_key_auth_value",
    [