mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
Merge branch 'main' into litellm_aws_kms_support
This commit is contained in:
commit
b6e0bf27b8
9 changed files with 104 additions and 7 deletions
|
@ -319,22 +319,35 @@ workflows:
|
||||||
version: 2
|
version: 2
|
||||||
build_and_test:
|
build_and_test:
|
||||||
jobs:
|
jobs:
|
||||||
- local_testing:
|
- hold:
|
||||||
|
type: approval
|
||||||
filters:
|
filters:
|
||||||
branches:
|
branches:
|
||||||
only:
|
ignore:
|
||||||
|
- main
|
||||||
|
- /litellm_.*/
|
||||||
|
- local_testing:
|
||||||
|
requires:
|
||||||
|
- hold
|
||||||
|
filters:
|
||||||
|
branches:
|
||||||
|
ignore:
|
||||||
- main
|
- main
|
||||||
- /litellm_.*/
|
- /litellm_.*/
|
||||||
- build_and_test:
|
- build_and_test:
|
||||||
|
requires:
|
||||||
|
- hold
|
||||||
filters:
|
filters:
|
||||||
branches:
|
branches:
|
||||||
only:
|
ignore:
|
||||||
- main
|
- main
|
||||||
- /litellm_.*/
|
- /litellm_.*/
|
||||||
- installing_litellm_on_python:
|
- installing_litellm_on_python:
|
||||||
|
requires:
|
||||||
|
- hold
|
||||||
filters:
|
filters:
|
||||||
branches:
|
branches:
|
||||||
only:
|
ignore:
|
||||||
- main
|
- main
|
||||||
- /litellm_.*/
|
- /litellm_.*/
|
||||||
- publish_to_pypi:
|
- publish_to_pypi:
|
||||||
|
|
|
@ -38,7 +38,7 @@ class MyCustomHandler(CustomLogger):
|
||||||
print(f"On Async Success")
|
print(f"On Async Success")
|
||||||
|
|
||||||
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
print(f"On Async Success")
|
print(f"On Async Failure")
|
||||||
|
|
||||||
customHandler = MyCustomHandler()
|
customHandler = MyCustomHandler()
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
llmcord.py lets you and your friends chat with LLMs directly in your Discord server. It works with practically any LLM, remote or locally hosted.
|
||||||
|
|
||||||
|
Github: https://github.com/jakobdylanc/discord-llm-chatbot
|
|
@ -4050,6 +4050,12 @@ class Router:
|
||||||
for idx in reversed(invalid_model_indices):
|
for idx in reversed(invalid_model_indices):
|
||||||
_returned_deployments.pop(idx)
|
_returned_deployments.pop(idx)
|
||||||
|
|
||||||
|
## ORDER FILTERING ## -> if user set 'order' in deployments, return deployments with lowest order (e.g. order=1 > order=2)
|
||||||
|
if len(_returned_deployments) > 0:
|
||||||
|
_returned_deployments = litellm.utils._get_order_filtered_deployments(
|
||||||
|
_returned_deployments
|
||||||
|
)
|
||||||
|
|
||||||
return _returned_deployments
|
return _returned_deployments
|
||||||
|
|
||||||
def _common_checks_available_deployment(
|
def _common_checks_available_deployment(
|
||||||
|
|
|
@ -2539,6 +2539,7 @@ def test_replicate_custom_prompt_dict():
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
mock_response="Hello world",
|
mock_response="Hello world",
|
||||||
|
mock_response="hello world",
|
||||||
repetition_penalty=0.1,
|
repetition_penalty=0.1,
|
||||||
num_retries=3,
|
num_retries=3,
|
||||||
)
|
)
|
||||||
|
|
|
@ -38,6 +38,48 @@ def test_router_sensitive_keys():
|
||||||
assert "special-key" not in str(e)
|
assert "special-key" not in str(e)
|
||||||
|
|
||||||
|
|
||||||
|
def test_router_order():
|
||||||
|
"""
|
||||||
|
Asserts for 2 models in a model group, model with order=1 always called first
|
||||||
|
"""
|
||||||
|
router = Router(
|
||||||
|
model_list=[
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "gpt-4o",
|
||||||
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||||
|
"mock_response": "Hello world",
|
||||||
|
"order": 1,
|
||||||
|
},
|
||||||
|
"model_info": {"id": "1"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "gpt-4o",
|
||||||
|
"api_key": "bad-key",
|
||||||
|
"mock_response": Exception("this is a bad key"),
|
||||||
|
"order": 2,
|
||||||
|
},
|
||||||
|
"model_info": {"id": "2"},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
num_retries=0,
|
||||||
|
allowed_fails=0,
|
||||||
|
enable_pre_call_checks=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
for _ in range(100):
|
||||||
|
response = router.completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||||
|
)
|
||||||
|
|
||||||
|
assert isinstance(response, litellm.ModelResponse)
|
||||||
|
assert response._hidden_params["model_id"] == "1"
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("num_retries", [None, 2])
|
@pytest.mark.parametrize("num_retries", [None, 2])
|
||||||
@pytest.mark.parametrize("max_retries", [None, 4])
|
@pytest.mark.parametrize("max_retries", [None, 4])
|
||||||
def test_router_num_retries_init(num_retries, max_retries):
|
def test_router_num_retries_init(num_retries, max_retries):
|
||||||
|
|
|
@ -186,3 +186,13 @@ def test_load_test_token_counter(model):
|
||||||
total_time = end_time - start_time
|
total_time = end_time - start_time
|
||||||
print("model={}, total test time={}".format(model, total_time))
|
print("model={}, total test time={}".format(model, total_time))
|
||||||
assert total_time < 10, f"Total encoding time > 10s, {total_time}"
|
assert total_time < 10, f"Total encoding time > 10s, {total_time}"
|
||||||
|
|
||||||
|
def test_openai_token_with_image_and_text():
|
||||||
|
model = "gpt-4o"
|
||||||
|
full_request = {'model': 'gpt-4o', 'tools': [{'type': 'function', 'function': {'name': 'json', 'parameters': {'type': 'object', 'required': ['clause'], 'properties': {'clause': {'type': 'string'}}}, 'description': 'Respond with a JSON object.'}}], 'logprobs': False, 'messages': [{'role': 'user', 'content': [{'text': '\n Just some long text, long long text, and you know it will be longer than 7 tokens definetly.', 'type': 'text'}]}], 'tool_choice': {'type': 'function', 'function': {'name': 'json'}}, 'exclude_models': [], 'disable_fallback': False, 'exclude_providers': []}
|
||||||
|
messages = full_request.get("messages", [])
|
||||||
|
|
||||||
|
token_count = token_counter(model=model, messages=messages)
|
||||||
|
print(token_count)
|
||||||
|
|
||||||
|
test_openai_token_with_image_and_text()
|
|
@ -4060,6 +4060,7 @@ def openai_token_counter(
|
||||||
for c in value:
|
for c in value:
|
||||||
if c["type"] == "text":
|
if c["type"] == "text":
|
||||||
text += c["text"]
|
text += c["text"]
|
||||||
|
num_tokens += len(encoding.encode(c["text"], disallowed_special=()))
|
||||||
elif c["type"] == "image_url":
|
elif c["type"] == "image_url":
|
||||||
if isinstance(c["image_url"], dict):
|
if isinstance(c["image_url"], dict):
|
||||||
image_url_dict = c["image_url"]
|
image_url_dict = c["image_url"]
|
||||||
|
@ -6196,6 +6197,27 @@ def calculate_max_parallel_requests(
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_order_filtered_deployments(healthy_deployments: List[Dict]) -> List:
|
||||||
|
min_order = min(
|
||||||
|
(
|
||||||
|
deployment["litellm_params"]["order"]
|
||||||
|
for deployment in healthy_deployments
|
||||||
|
if "order" in deployment["litellm_params"]
|
||||||
|
),
|
||||||
|
default=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
if min_order is not None:
|
||||||
|
filtered_deployments = [
|
||||||
|
deployment
|
||||||
|
for deployment in healthy_deployments
|
||||||
|
if deployment["litellm_params"].get("order") == min_order
|
||||||
|
]
|
||||||
|
|
||||||
|
return filtered_deployments
|
||||||
|
return healthy_deployments
|
||||||
|
|
||||||
|
|
||||||
def _get_model_region(
|
def _get_model_region(
|
||||||
custom_llm_provider: str, litellm_params: LiteLLM_Params
|
custom_llm_provider: str, litellm_params: LiteLLM_Params
|
||||||
) -> Optional[str]:
|
) -> Optional[str]:
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "1.40.4"
|
version = "1.40.5"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
@ -80,7 +80,7 @@ requires = ["poetry-core", "wheel"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
[tool.commitizen]
|
[tool.commitizen]
|
||||||
version = "1.40.4"
|
version = "1.40.5"
|
||||||
version_files = [
|
version_files = [
|
||||||
"pyproject.toml:^version"
|
"pyproject.toml:^version"
|
||||||
]
|
]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue