mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
add azure o1 pricing (#7715)
* build(model_prices_and_context_window.json): add azure o1 pricing Closes https://github.com/BerriAI/litellm/issues/7712 * refactor: replace regex with string method for whitespace check in stop-sequences handling (#7713) * Allows overriding keep_alive time in ollama (#7079) * Allows overriding keep_alive time in ollama * Also adds to ollama_chat * Adds some info on the docs about this parameter * fix: together ai warning (#7688) Co-authored-by: Carl Senze <carl.senze@aleph-alpha.com> * fix(proxy_server.py): handle config containing thread locked objects when using get_config_state * fix(proxy_server.py): add exception to debug * build(model_prices_and_context_window.json): update 'supports_vision' for azure o1 --------- Co-authored-by: Wolfram Ravenwolf <52386626+WolframRavenwolf@users.noreply.github.com> Co-authored-by: Regis David Souza Mesquita <github@rdsm.dev> Co-authored-by: Carl <45709281+capsenz@users.noreply.github.com> Co-authored-by: Carl Senze <carl.senze@aleph-alpha.com>
This commit is contained in:
parent
f778865836
commit
01e2e26bd1
8 changed files with 67 additions and 5 deletions
|
@ -147,6 +147,7 @@ model_list:
|
|||
- model_name: "llama3.1"
|
||||
litellm_params:
|
||||
model: "ollama_chat/llama3.1"
|
||||
keep_alive: "8m" # Optional: Overrides default keep_alive, use -1 for Forever
|
||||
model_info:
|
||||
supports_function_calling: true
|
||||
```
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
import json
|
||||
import re
|
||||
import time
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, cast
|
||||
|
||||
|
@ -258,13 +257,13 @@ class AnthropicConfig(BaseConfig):
|
|||
) -> Optional[List[str]]:
|
||||
new_stop: Optional[List[str]] = None
|
||||
if isinstance(stop, str):
|
||||
if re.match(r'^\s+$', stop) and litellm.drop_params is True: # anthropic doesn't allow whitespace characters as stop-sequences
|
||||
if stop.isspace() and litellm.drop_params is True: # anthropic doesn't allow whitespace characters as stop-sequences
|
||||
return new_stop
|
||||
new_stop = [stop]
|
||||
elif isinstance(stop, list):
|
||||
new_v = []
|
||||
for v in stop:
|
||||
if re.match(r'^\s+$', v) and litellm.drop_params is True: # anthropic doesn't allow whitespace characters as stop-sequences
|
||||
if v.isspace() and litellm.drop_params is True: # anthropic doesn't allow whitespace characters as stop-sequences
|
||||
continue
|
||||
new_v.append(v)
|
||||
if len(new_v) > 0:
|
||||
|
|
|
@ -219,6 +219,7 @@ def get_ollama_response( # noqa: PLR0915
|
|||
|
||||
stream = optional_params.pop("stream", False)
|
||||
format = optional_params.pop("format", None)
|
||||
keep_alive = optional_params.pop("keep_alive", None)
|
||||
function_name = optional_params.pop("function_name", None)
|
||||
tools = optional_params.pop("tools", None)
|
||||
|
||||
|
@ -256,6 +257,8 @@ def get_ollama_response( # noqa: PLR0915
|
|||
data["format"] = format
|
||||
if tools is not None:
|
||||
data["tools"] = tools
|
||||
if keep_alive is not None:
|
||||
data["keep_alive"] = keep_alive
|
||||
## LOGGING
|
||||
logging_obj.pre_call(
|
||||
input=None,
|
||||
|
|
|
@ -32,7 +32,7 @@ class TogetherAIConfig(OpenAIGPTConfig):
|
|||
|
||||
optional_params = super().get_supported_openai_params(model)
|
||||
if supports_function_calling is not True:
|
||||
verbose_logger.warning(
|
||||
verbose_logger.debug(
|
||||
"Only some together models support function calling/response_format. Docs - https://docs.together.ai/docs/function-calling"
|
||||
)
|
||||
optional_params.remove("tools")
|
||||
|
|
|
@ -945,6 +945,20 @@
|
|||
"supports_vision": false,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"azure/o1": {
|
||||
"max_tokens": 100000,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 100000,
|
||||
"input_cost_per_token": 0.000015,
|
||||
"output_cost_per_token": 0.000060,
|
||||
"cache_read_input_token_cost": 0.0000075,
|
||||
"litellm_provider": "azure",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"azure/o1-preview": {
|
||||
"max_tokens": 32768,
|
||||
"max_input_tokens": 128000,
|
||||
|
|
|
@ -1653,7 +1653,15 @@ class ProxyConfig:
|
|||
|
||||
Do this, to avoid mutating the config state outside of allowed methods
|
||||
"""
|
||||
return copy.deepcopy(self.config)
|
||||
try:
|
||||
return copy.deepcopy(self.config)
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.debug(
|
||||
"ProxyConfig:get_config_state(): Error returning copy of config state. self.config={}\nError: {}".format(
|
||||
self.config, e
|
||||
)
|
||||
)
|
||||
return {}
|
||||
|
||||
async def load_config( # noqa: PLR0915
|
||||
self, router: Optional[litellm.Router], config_file_path: str
|
||||
|
|
|
@ -945,6 +945,20 @@
|
|||
"supports_vision": false,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"azure/o1": {
|
||||
"max_tokens": 100000,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 100000,
|
||||
"input_cost_per_token": 0.000015,
|
||||
"output_cost_per_token": 0.000060,
|
||||
"cache_read_input_token_cost": 0.0000075,
|
||||
"litellm_provider": "azure",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"azure/o1-preview": {
|
||||
"max_tokens": 32768,
|
||||
"max_input_tokens": 128000,
|
||||
|
|
|
@ -1107,6 +1107,29 @@ def test_proxy_config_state_post_init_callback_call():
|
|||
assert config["litellm_settings"]["default_team_settings"][0]["team_id"] == "test"
|
||||
|
||||
|
||||
def test_proxy_config_state_get_config_state_error():
|
||||
"""
|
||||
Ensures that get_config_state does not raise an error when the config is not a valid dictionary
|
||||
"""
|
||||
from litellm.proxy.proxy_server import ProxyConfig
|
||||
import threading
|
||||
|
||||
test_config = {
|
||||
"callback_list": [
|
||||
{
|
||||
"lock": threading.RLock(), # This will cause the deep copy to fail
|
||||
"name": "test_callback",
|
||||
}
|
||||
],
|
||||
"model_list": ["gpt-4", "claude-3"],
|
||||
}
|
||||
|
||||
pc = ProxyConfig()
|
||||
pc.config = test_config
|
||||
config = pc.get_config_state()
|
||||
assert config == {}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"associated_budget_table, expected_user_api_key_auth_key, expected_user_api_key_auth_value",
|
||||
[
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue