mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 19:54:13 +00:00
add azure o1 pricing (#7715)
* build(model_prices_and_context_window.json): add azure o1 pricing Closes https://github.com/BerriAI/litellm/issues/7712 * refactor: replace regex with string method for whitespace check in stop-sequences handling (#7713) * Allows overriding keep_alive time in ollama (#7079) * Allows overriding keep_alive time in ollama * Also adds to ollama_chat * Adds some info on the docs about this parameter * fix: together ai warning (#7688) Co-authored-by: Carl Senze <carl.senze@aleph-alpha.com> * fix(proxy_server.py): handle config containing thread locked objects when using get_config_state * fix(proxy_server.py): add exception to debug * build(model_prices_and_context_window.json): update 'supports_vision' for azure o1 --------- Co-authored-by: Wolfram Ravenwolf <52386626+WolframRavenwolf@users.noreply.github.com> Co-authored-by: Regis David Souza Mesquita <github@rdsm.dev> Co-authored-by: Carl <45709281+capsenz@users.noreply.github.com> Co-authored-by: Carl Senze <carl.senze@aleph-alpha.com>
This commit is contained in:
parent
f778865836
commit
01e2e26bd1
8 changed files with 67 additions and 5 deletions
|
@ -147,6 +147,7 @@ model_list:
|
||||||
- model_name: "llama3.1"
|
- model_name: "llama3.1"
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: "ollama_chat/llama3.1"
|
model: "ollama_chat/llama3.1"
|
||||||
|
keep_alive: "8m" # Optional: Overrides default keep_alive, use -1 for Forever
|
||||||
model_info:
|
model_info:
|
||||||
supports_function_calling: true
|
supports_function_calling: true
|
||||||
```
|
```
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
import json
|
import json
|
||||||
import re
|
|
||||||
import time
|
import time
|
||||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, cast
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, cast
|
||||||
|
|
||||||
|
@ -258,13 +257,13 @@ class AnthropicConfig(BaseConfig):
|
||||||
) -> Optional[List[str]]:
|
) -> Optional[List[str]]:
|
||||||
new_stop: Optional[List[str]] = None
|
new_stop: Optional[List[str]] = None
|
||||||
if isinstance(stop, str):
|
if isinstance(stop, str):
|
||||||
if re.match(r'^\s+$', stop) and litellm.drop_params is True: # anthropic doesn't allow whitespace characters as stop-sequences
|
if stop.isspace() and litellm.drop_params is True: # anthropic doesn't allow whitespace characters as stop-sequences
|
||||||
return new_stop
|
return new_stop
|
||||||
new_stop = [stop]
|
new_stop = [stop]
|
||||||
elif isinstance(stop, list):
|
elif isinstance(stop, list):
|
||||||
new_v = []
|
new_v = []
|
||||||
for v in stop:
|
for v in stop:
|
||||||
if re.match(r'^\s+$', v) and litellm.drop_params is True: # anthropic doesn't allow whitespace characters as stop-sequences
|
if v.isspace() and litellm.drop_params is True: # anthropic doesn't allow whitespace characters as stop-sequences
|
||||||
continue
|
continue
|
||||||
new_v.append(v)
|
new_v.append(v)
|
||||||
if len(new_v) > 0:
|
if len(new_v) > 0:
|
||||||
|
|
|
@ -219,6 +219,7 @@ def get_ollama_response( # noqa: PLR0915
|
||||||
|
|
||||||
stream = optional_params.pop("stream", False)
|
stream = optional_params.pop("stream", False)
|
||||||
format = optional_params.pop("format", None)
|
format = optional_params.pop("format", None)
|
||||||
|
keep_alive = optional_params.pop("keep_alive", None)
|
||||||
function_name = optional_params.pop("function_name", None)
|
function_name = optional_params.pop("function_name", None)
|
||||||
tools = optional_params.pop("tools", None)
|
tools = optional_params.pop("tools", None)
|
||||||
|
|
||||||
|
@ -256,6 +257,8 @@ def get_ollama_response( # noqa: PLR0915
|
||||||
data["format"] = format
|
data["format"] = format
|
||||||
if tools is not None:
|
if tools is not None:
|
||||||
data["tools"] = tools
|
data["tools"] = tools
|
||||||
|
if keep_alive is not None:
|
||||||
|
data["keep_alive"] = keep_alive
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging_obj.pre_call(
|
logging_obj.pre_call(
|
||||||
input=None,
|
input=None,
|
||||||
|
|
|
@ -32,7 +32,7 @@ class TogetherAIConfig(OpenAIGPTConfig):
|
||||||
|
|
||||||
optional_params = super().get_supported_openai_params(model)
|
optional_params = super().get_supported_openai_params(model)
|
||||||
if supports_function_calling is not True:
|
if supports_function_calling is not True:
|
||||||
verbose_logger.warning(
|
verbose_logger.debug(
|
||||||
"Only some together models support function calling/response_format. Docs - https://docs.together.ai/docs/function-calling"
|
"Only some together models support function calling/response_format. Docs - https://docs.together.ai/docs/function-calling"
|
||||||
)
|
)
|
||||||
optional_params.remove("tools")
|
optional_params.remove("tools")
|
||||||
|
|
|
@ -945,6 +945,20 @@
|
||||||
"supports_vision": false,
|
"supports_vision": false,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true
|
||||||
},
|
},
|
||||||
|
"azure/o1": {
|
||||||
|
"max_tokens": 100000,
|
||||||
|
"max_input_tokens": 200000,
|
||||||
|
"max_output_tokens": 100000,
|
||||||
|
"input_cost_per_token": 0.000015,
|
||||||
|
"output_cost_per_token": 0.000060,
|
||||||
|
"cache_read_input_token_cost": 0.0000075,
|
||||||
|
"litellm_provider": "azure",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true,
|
||||||
|
"supports_vision": true,
|
||||||
|
"supports_prompt_caching": true
|
||||||
|
},
|
||||||
"azure/o1-preview": {
|
"azure/o1-preview": {
|
||||||
"max_tokens": 32768,
|
"max_tokens": 32768,
|
||||||
"max_input_tokens": 128000,
|
"max_input_tokens": 128000,
|
||||||
|
|
|
@ -1653,7 +1653,15 @@ class ProxyConfig:
|
||||||
|
|
||||||
Do this, to avoid mutating the config state outside of allowed methods
|
Do this, to avoid mutating the config state outside of allowed methods
|
||||||
"""
|
"""
|
||||||
|
try:
|
||||||
return copy.deepcopy(self.config)
|
return copy.deepcopy(self.config)
|
||||||
|
except Exception as e:
|
||||||
|
verbose_proxy_logger.debug(
|
||||||
|
"ProxyConfig:get_config_state(): Error returning copy of config state. self.config={}\nError: {}".format(
|
||||||
|
self.config, e
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return {}
|
||||||
|
|
||||||
async def load_config( # noqa: PLR0915
|
async def load_config( # noqa: PLR0915
|
||||||
self, router: Optional[litellm.Router], config_file_path: str
|
self, router: Optional[litellm.Router], config_file_path: str
|
||||||
|
|
|
@ -945,6 +945,20 @@
|
||||||
"supports_vision": false,
|
"supports_vision": false,
|
||||||
"supports_prompt_caching": true
|
"supports_prompt_caching": true
|
||||||
},
|
},
|
||||||
|
"azure/o1": {
|
||||||
|
"max_tokens": 100000,
|
||||||
|
"max_input_tokens": 200000,
|
||||||
|
"max_output_tokens": 100000,
|
||||||
|
"input_cost_per_token": 0.000015,
|
||||||
|
"output_cost_per_token": 0.000060,
|
||||||
|
"cache_read_input_token_cost": 0.0000075,
|
||||||
|
"litellm_provider": "azure",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true,
|
||||||
|
"supports_vision": true,
|
||||||
|
"supports_prompt_caching": true
|
||||||
|
},
|
||||||
"azure/o1-preview": {
|
"azure/o1-preview": {
|
||||||
"max_tokens": 32768,
|
"max_tokens": 32768,
|
||||||
"max_input_tokens": 128000,
|
"max_input_tokens": 128000,
|
||||||
|
|
|
@ -1107,6 +1107,29 @@ def test_proxy_config_state_post_init_callback_call():
|
||||||
assert config["litellm_settings"]["default_team_settings"][0]["team_id"] == "test"
|
assert config["litellm_settings"]["default_team_settings"][0]["team_id"] == "test"
|
||||||
|
|
||||||
|
|
||||||
|
def test_proxy_config_state_get_config_state_error():
|
||||||
|
"""
|
||||||
|
Ensures that get_config_state does not raise an error when the config is not a valid dictionary
|
||||||
|
"""
|
||||||
|
from litellm.proxy.proxy_server import ProxyConfig
|
||||||
|
import threading
|
||||||
|
|
||||||
|
test_config = {
|
||||||
|
"callback_list": [
|
||||||
|
{
|
||||||
|
"lock": threading.RLock(), # This will cause the deep copy to fail
|
||||||
|
"name": "test_callback",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"model_list": ["gpt-4", "claude-3"],
|
||||||
|
}
|
||||||
|
|
||||||
|
pc = ProxyConfig()
|
||||||
|
pc.config = test_config
|
||||||
|
config = pc.get_config_state()
|
||||||
|
assert config == {}
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"associated_budget_table, expected_user_api_key_auth_key, expected_user_api_key_auth_value",
|
"associated_budget_table, expected_user_api_key_auth_key, expected_user_api_key_auth_value",
|
||||||
[
|
[
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue