From 01e2e26bd10578d35448ddee720268e6ffb17e32 Mon Sep 17 00:00:00 2001 From: Krish Dholakia Date: Sun, 12 Jan 2025 18:15:35 -0800 Subject: [PATCH] add azure o1 pricing (#7715) * build(model_prices_and_context_window.json): add azure o1 pricing Closes https://github.com/BerriAI/litellm/issues/7712 * refactor: replace regex with string method for whitespace check in stop-sequences handling (#7713) * Allows overriding keep_alive time in ollama (#7079) * Allows overriding keep_alive time in ollama * Also adds to ollama_chat * Adds some info on the docs about this parameter * fix: together ai warning (#7688) Co-authored-by: Carl Senze * fix(proxy_server.py): handle config containing thread locked objects when using get_config_state * fix(proxy_server.py): add exception to debug * build(model_prices_and_context_window.json): update 'supports_vision' for azure o1 --------- Co-authored-by: Wolfram Ravenwolf <52386626+WolframRavenwolf@users.noreply.github.com> Co-authored-by: Regis David Souza Mesquita Co-authored-by: Carl <45709281+capsenz@users.noreply.github.com> Co-authored-by: Carl Senze --- docs/my-website/docs/providers/ollama.md | 1 + litellm/llms/anthropic/chat/transformation.py | 5 ++-- litellm/llms/ollama_chat.py | 3 +++ litellm/llms/together_ai/chat.py | 2 +- ...odel_prices_and_context_window_backup.json | 14 +++++++++++ litellm/proxy/proxy_server.py | 10 +++++++- model_prices_and_context_window.json | 14 +++++++++++ tests/proxy_unit_tests/test_proxy_utils.py | 23 +++++++++++++++++++ 8 files changed, 67 insertions(+), 5 deletions(-) diff --git a/docs/my-website/docs/providers/ollama.md b/docs/my-website/docs/providers/ollama.md index 63b79fe3aa..3de21474fa 100644 --- a/docs/my-website/docs/providers/ollama.md +++ b/docs/my-website/docs/providers/ollama.md @@ -147,6 +147,7 @@ model_list: - model_name: "llama3.1" litellm_params: model: "ollama_chat/llama3.1" + keep_alive: "8m" # Optional: Overrides default keep_alive, use -1 for Forever model_info: supports_function_calling: true ``` diff --git a/litellm/llms/anthropic/chat/transformation.py b/litellm/llms/anthropic/chat/transformation.py index e517744b03..55d59ef015 100644 --- a/litellm/llms/anthropic/chat/transformation.py +++ b/litellm/llms/anthropic/chat/transformation.py @@ -1,5 +1,4 @@ import json -import re import time from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, cast @@ -258,13 +257,13 @@ class AnthropicConfig(BaseConfig): ) -> Optional[List[str]]: new_stop: Optional[List[str]] = None if isinstance(stop, str): - if re.match(r'^\s+$', stop) and litellm.drop_params is True: # anthropic doesn't allow whitespace characters as stop-sequences + if stop.isspace() and litellm.drop_params is True: # anthropic doesn't allow whitespace characters as stop-sequences return new_stop new_stop = [stop] elif isinstance(stop, list): new_v = [] for v in stop: - if re.match(r'^\s+$', v) and litellm.drop_params is True: # anthropic doesn't allow whitespace characters as stop-sequences + if v.isspace() and litellm.drop_params is True: # anthropic doesn't allow whitespace characters as stop-sequences continue new_v.append(v) if len(new_v) > 0: diff --git a/litellm/llms/ollama_chat.py b/litellm/llms/ollama_chat.py index 5aa26ced46..76a0604c21 100644 --- a/litellm/llms/ollama_chat.py +++ b/litellm/llms/ollama_chat.py @@ -219,6 +219,7 @@ def get_ollama_response( # noqa: PLR0915 stream = optional_params.pop("stream", False) format = optional_params.pop("format", None) + keep_alive = optional_params.pop("keep_alive", None) function_name = optional_params.pop("function_name", None) tools = optional_params.pop("tools", None) @@ -256,6 +257,8 @@ def get_ollama_response( # noqa: PLR0915 data["format"] = format if tools is not None: data["tools"] = tools + if keep_alive is not None: + data["keep_alive"] = keep_alive ## LOGGING logging_obj.pre_call( input=None, diff --git a/litellm/llms/together_ai/chat.py b/litellm/llms/together_ai/chat.py index 51933196ed..06d33f6975 100644 --- a/litellm/llms/together_ai/chat.py +++ b/litellm/llms/together_ai/chat.py @@ -32,7 +32,7 @@ class TogetherAIConfig(OpenAIGPTConfig): optional_params = super().get_supported_openai_params(model) if supports_function_calling is not True: - verbose_logger.warning( + verbose_logger.debug( "Only some together models support function calling/response_format. Docs - https://docs.together.ai/docs/function-calling" ) optional_params.remove("tools") diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index b4f3952dca..c924fa4cea 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -945,6 +945,20 @@ "supports_vision": false, "supports_prompt_caching": true }, + "azure/o1": { + "max_tokens": 100000, + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000060, + "cache_read_input_token_cost": 0.0000075, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_prompt_caching": true + }, "azure/o1-preview": { "max_tokens": 32768, "max_input_tokens": 128000, diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 17b17687ae..8002863217 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1653,7 +1653,15 @@ class ProxyConfig: Do this, to avoid mutating the config state outside of allowed methods """ - return copy.deepcopy(self.config) + try: + return copy.deepcopy(self.config) + except Exception as e: + verbose_proxy_logger.debug( + "ProxyConfig:get_config_state(): Error returning copy of config state. self.config={}\nError: {}".format( + self.config, e + ) + ) + return {} async def load_config( # noqa: PLR0915 self, router: Optional[litellm.Router], config_file_path: str diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index b4f3952dca..c924fa4cea 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -945,6 +945,20 @@ "supports_vision": false, "supports_prompt_caching": true }, + "azure/o1": { + "max_tokens": 100000, + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000060, + "cache_read_input_token_cost": 0.0000075, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_prompt_caching": true + }, "azure/o1-preview": { "max_tokens": 32768, "max_input_tokens": 128000, diff --git a/tests/proxy_unit_tests/test_proxy_utils.py b/tests/proxy_unit_tests/test_proxy_utils.py index dd018f674f..2b63ba5a8b 100644 --- a/tests/proxy_unit_tests/test_proxy_utils.py +++ b/tests/proxy_unit_tests/test_proxy_utils.py @@ -1107,6 +1107,29 @@ def test_proxy_config_state_post_init_callback_call(): assert config["litellm_settings"]["default_team_settings"][0]["team_id"] == "test" +def test_proxy_config_state_get_config_state_error(): + """ + Ensures that get_config_state does not raise an error when the config is not a valid dictionary + """ + from litellm.proxy.proxy_server import ProxyConfig + import threading + + test_config = { + "callback_list": [ + { + "lock": threading.RLock(), # This will cause the deep copy to fail + "name": "test_callback", + } + ], + "model_list": ["gpt-4", "claude-3"], + } + + pc = ProxyConfig() + pc.config = test_config + config = pc.get_config_state() + assert config == {} + + @pytest.mark.parametrize( "associated_budget_table, expected_user_api_key_auth_key, expected_user_api_key_auth_value", [