add azure o1 pricing (#7715)

* build(model_prices_and_context_window.json): add azure o1 pricing

Closes https://github.com/BerriAI/litellm/issues/7712

* refactor: replace regex with string method for whitespace check in stop-sequences handling (#7713)

* Allows overriding keep_alive time in ollama (#7079)

* Allows overriding keep_alive time in ollama

* Also adds to ollama_chat

* Adds some info on the docs about this parameter

* fix: together ai warning (#7688)

Co-authored-by: Carl Senze <carl.senze@aleph-alpha.com>

* fix(proxy_server.py): handle config containing thread locked objects when using get_config_state

* fix(proxy_server.py): add exception to debug

* build(model_prices_and_context_window.json): update 'supports_vision' for azure o1

---------

Co-authored-by: Wolfram Ravenwolf <52386626+WolframRavenwolf@users.noreply.github.com>
Co-authored-by: Regis David Souza Mesquita <github@rdsm.dev>
Co-authored-by: Carl <45709281+capsenz@users.noreply.github.com>
Co-authored-by: Carl Senze <carl.senze@aleph-alpha.com>
This commit is contained in:
Krish Dholakia 2025-01-12 18:15:35 -08:00 committed by GitHub
parent d4779deb0b
commit ec5a354eac
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 67 additions and 5 deletions

View file

@ -147,6 +147,7 @@ model_list:
- model_name: "llama3.1" - model_name: "llama3.1"
litellm_params: litellm_params:
model: "ollama_chat/llama3.1" model: "ollama_chat/llama3.1"
keep_alive: "8m" # Optional: Overrides default keep_alive, use -1 for Forever
model_info: model_info:
supports_function_calling: true supports_function_calling: true
``` ```

View file

@ -1,5 +1,4 @@
import json import json
import re
import time import time
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, cast from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, cast
@ -258,13 +257,13 @@ class AnthropicConfig(BaseConfig):
) -> Optional[List[str]]: ) -> Optional[List[str]]:
new_stop: Optional[List[str]] = None new_stop: Optional[List[str]] = None
if isinstance(stop, str): if isinstance(stop, str):
if re.match(r'^\s+$', stop) and litellm.drop_params is True: # anthropic doesn't allow whitespace characters as stop-sequences if stop.isspace() and litellm.drop_params is True: # anthropic doesn't allow whitespace characters as stop-sequences
return new_stop return new_stop
new_stop = [stop] new_stop = [stop]
elif isinstance(stop, list): elif isinstance(stop, list):
new_v = [] new_v = []
for v in stop: for v in stop:
if re.match(r'^\s+$', v) and litellm.drop_params is True: # anthropic doesn't allow whitespace characters as stop-sequences if v.isspace() and litellm.drop_params is True: # anthropic doesn't allow whitespace characters as stop-sequences
continue continue
new_v.append(v) new_v.append(v)
if len(new_v) > 0: if len(new_v) > 0:

View file

@ -219,6 +219,7 @@ def get_ollama_response( # noqa: PLR0915
stream = optional_params.pop("stream", False) stream = optional_params.pop("stream", False)
format = optional_params.pop("format", None) format = optional_params.pop("format", None)
keep_alive = optional_params.pop("keep_alive", None)
function_name = optional_params.pop("function_name", None) function_name = optional_params.pop("function_name", None)
tools = optional_params.pop("tools", None) tools = optional_params.pop("tools", None)
@ -256,6 +257,8 @@ def get_ollama_response( # noqa: PLR0915
data["format"] = format data["format"] = format
if tools is not None: if tools is not None:
data["tools"] = tools data["tools"] = tools
if keep_alive is not None:
data["keep_alive"] = keep_alive
## LOGGING ## LOGGING
logging_obj.pre_call( logging_obj.pre_call(
input=None, input=None,

View file

@ -32,7 +32,7 @@ class TogetherAIConfig(OpenAIGPTConfig):
optional_params = super().get_supported_openai_params(model) optional_params = super().get_supported_openai_params(model)
if supports_function_calling is not True: if supports_function_calling is not True:
verbose_logger.warning( verbose_logger.debug(
"Only some together models support function calling/response_format. Docs - https://docs.together.ai/docs/function-calling" "Only some together models support function calling/response_format. Docs - https://docs.together.ai/docs/function-calling"
) )
optional_params.remove("tools") optional_params.remove("tools")

View file

@ -945,6 +945,20 @@
"supports_vision": false, "supports_vision": false,
"supports_prompt_caching": true "supports_prompt_caching": true
}, },
"azure/o1": {
"max_tokens": 100000,
"max_input_tokens": 200000,
"max_output_tokens": 100000,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000060,
"cache_read_input_token_cost": 0.0000075,
"litellm_provider": "azure",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true,
"supports_prompt_caching": true
},
"azure/o1-preview": { "azure/o1-preview": {
"max_tokens": 32768, "max_tokens": 32768,
"max_input_tokens": 128000, "max_input_tokens": 128000,

View file

@ -1653,7 +1653,15 @@ class ProxyConfig:
Do this, to avoid mutating the config state outside of allowed methods Do this, to avoid mutating the config state outside of allowed methods
""" """
try:
return copy.deepcopy(self.config) return copy.deepcopy(self.config)
except Exception as e:
verbose_proxy_logger.debug(
"ProxyConfig:get_config_state(): Error returning copy of config state. self.config={}\nError: {}".format(
self.config, e
)
)
return {}
async def load_config( # noqa: PLR0915 async def load_config( # noqa: PLR0915
self, router: Optional[litellm.Router], config_file_path: str self, router: Optional[litellm.Router], config_file_path: str

View file

@ -945,6 +945,20 @@
"supports_vision": false, "supports_vision": false,
"supports_prompt_caching": true "supports_prompt_caching": true
}, },
"azure/o1": {
"max_tokens": 100000,
"max_input_tokens": 200000,
"max_output_tokens": 100000,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000060,
"cache_read_input_token_cost": 0.0000075,
"litellm_provider": "azure",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true,
"supports_prompt_caching": true
},
"azure/o1-preview": { "azure/o1-preview": {
"max_tokens": 32768, "max_tokens": 32768,
"max_input_tokens": 128000, "max_input_tokens": 128000,

View file

@ -1107,6 +1107,29 @@ def test_proxy_config_state_post_init_callback_call():
assert config["litellm_settings"]["default_team_settings"][0]["team_id"] == "test" assert config["litellm_settings"]["default_team_settings"][0]["team_id"] == "test"
def test_proxy_config_state_get_config_state_error():
"""
Ensures that get_config_state does not raise an error when the config is not a valid dictionary
"""
from litellm.proxy.proxy_server import ProxyConfig
import threading
test_config = {
"callback_list": [
{
"lock": threading.RLock(), # This will cause the deep copy to fail
"name": "test_callback",
}
],
"model_list": ["gpt-4", "claude-3"],
}
pc = ProxyConfig()
pc.config = test_config
config = pc.get_config_state()
assert config == {}
@pytest.mark.parametrize( @pytest.mark.parametrize(
"associated_budget_table, expected_user_api_key_auth_key, expected_user_api_key_auth_value", "associated_budget_table, expected_user_api_key_auth_key, expected_user_api_key_auth_value",
[ [