diff --git a/.circleci/config.yml b/.circleci/config.yml index 18bfeedb52..f806a4546a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -319,22 +319,35 @@ workflows: version: 2 build_and_test: jobs: - - local_testing: + - hold: + type: approval filters: branches: - only: + ignore: + - main + - /litellm_.*/ + - local_testing: + requires: + - hold + filters: + branches: + ignore: - main - /litellm_.*/ - build_and_test: + requires: + - hold filters: branches: - only: + ignore: - main - /litellm_.*/ - installing_litellm_on_python: + requires: + - hold filters: branches: - only: + ignore: - main - /litellm_.*/ - publish_to_pypi: diff --git a/docs/my-website/docs/observability/custom_callback.md b/docs/my-website/docs/observability/custom_callback.md index 3168222273..373b4a96c0 100644 --- a/docs/my-website/docs/observability/custom_callback.md +++ b/docs/my-website/docs/observability/custom_callback.md @@ -38,7 +38,7 @@ class MyCustomHandler(CustomLogger): print(f"On Async Success") async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): - print(f"On Async Success") + print(f"On Async Failure") customHandler = MyCustomHandler() diff --git a/docs/my-website/docs/projects/llmcord.py (Discord LLM Chatbot).md b/docs/my-website/docs/projects/llmcord.py (Discord LLM Chatbot).md new file mode 100644 index 0000000000..f8acb9383c --- /dev/null +++ b/docs/my-website/docs/projects/llmcord.py (Discord LLM Chatbot).md @@ -0,0 +1,3 @@ +llmcord.py lets you and your friends chat with LLMs directly in your Discord server. It works with practically any LLM, remote or locally hosted. + +Github: https://github.com/jakobdylanc/discord-llm-chatbot diff --git a/litellm/router.py b/litellm/router.py index 0015af4dbd..6ef75b76ad 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -4050,6 +4050,12 @@ class Router: for idx in reversed(invalid_model_indices): _returned_deployments.pop(idx) + ## ORDER FILTERING ## -> if user set 'order' in deployments, return deployments with lowest order (e.g. order=1 > order=2) + if len(_returned_deployments) > 0: + _returned_deployments = litellm.utils._get_order_filtered_deployments( + _returned_deployments + ) + return _returned_deployments def _common_checks_available_deployment( diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 9d1fa33b1e..81c838a1c4 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -2539,6 +2539,7 @@ def test_replicate_custom_prompt_dict(): } ], mock_response="Hello world", + mock_response="hello world", repetition_penalty=0.1, num_retries=3, ) diff --git a/litellm/tests/test_router.py b/litellm/tests/test_router.py index d76dec25c7..02bf9a16b8 100644 --- a/litellm/tests/test_router.py +++ b/litellm/tests/test_router.py @@ -38,6 +38,48 @@ def test_router_sensitive_keys(): assert "special-key" not in str(e) +def test_router_order(): + """ + Asserts for 2 models in a model group, model with order=1 always called first + """ + router = Router( + model_list=[ + { + "model_name": "gpt-3.5-turbo", + "litellm_params": { + "model": "gpt-4o", + "api_key": os.getenv("OPENAI_API_KEY"), + "mock_response": "Hello world", + "order": 1, + }, + "model_info": {"id": "1"}, + }, + { + "model_name": "gpt-3.5-turbo", + "litellm_params": { + "model": "gpt-4o", + "api_key": "bad-key", + "mock_response": Exception("this is a bad key"), + "order": 2, + }, + "model_info": {"id": "2"}, + }, + ], + num_retries=0, + allowed_fails=0, + enable_pre_call_checks=True, + ) + + for _ in range(100): + response = router.completion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Hey, how's it going?"}], + ) + + assert isinstance(response, litellm.ModelResponse) + assert response._hidden_params["model_id"] == "1" + + @pytest.mark.parametrize("num_retries", [None, 2]) @pytest.mark.parametrize("max_retries", [None, 4]) def test_router_num_retries_init(num_retries, max_retries): diff --git a/litellm/tests/test_token_counter.py b/litellm/tests/test_token_counter.py index 194dfb8af3..a6f7cd7616 100644 --- a/litellm/tests/test_token_counter.py +++ b/litellm/tests/test_token_counter.py @@ -186,3 +186,13 @@ def test_load_test_token_counter(model): total_time = end_time - start_time print("model={}, total test time={}".format(model, total_time)) assert total_time < 10, f"Total encoding time > 10s, {total_time}" + +def test_openai_token_with_image_and_text(): + model = "gpt-4o" + full_request = {'model': 'gpt-4o', 'tools': [{'type': 'function', 'function': {'name': 'json', 'parameters': {'type': 'object', 'required': ['clause'], 'properties': {'clause': {'type': 'string'}}}, 'description': 'Respond with a JSON object.'}}], 'logprobs': False, 'messages': [{'role': 'user', 'content': [{'text': '\n Just some long text, long long text, and you know it will be longer than 7 tokens definetly.', 'type': 'text'}]}], 'tool_choice': {'type': 'function', 'function': {'name': 'json'}}, 'exclude_models': [], 'disable_fallback': False, 'exclude_providers': []} + messages = full_request.get("messages", []) + + token_count = token_counter(model=model, messages=messages) + print(token_count) + +test_openai_token_with_image_and_text() \ No newline at end of file diff --git a/litellm/utils.py b/litellm/utils.py index 6ce41ad84a..1229ee8240 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4060,6 +4060,7 @@ def openai_token_counter( for c in value: if c["type"] == "text": text += c["text"] + num_tokens += len(encoding.encode(c["text"], disallowed_special=())) elif c["type"] == "image_url": if isinstance(c["image_url"], dict): image_url_dict = c["image_url"] @@ -6196,6 +6197,27 @@ def calculate_max_parallel_requests( return None +def _get_order_filtered_deployments(healthy_deployments: List[Dict]) -> List: + min_order = min( + ( + deployment["litellm_params"]["order"] + for deployment in healthy_deployments + if "order" in deployment["litellm_params"] + ), + default=None, + ) + + if min_order is not None: + filtered_deployments = [ + deployment + for deployment in healthy_deployments + if deployment["litellm_params"].get("order") == min_order + ] + + return filtered_deployments + return healthy_deployments + + def _get_model_region( custom_llm_provider: str, litellm_params: LiteLLM_Params ) -> Optional[str]: diff --git a/pyproject.toml b/pyproject.toml index 648a8b41ac..c346640396 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "1.40.4" +version = "1.40.5" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT" @@ -80,7 +80,7 @@ requires = ["poetry-core", "wheel"] build-backend = "poetry.core.masonry.api" [tool.commitizen] -version = "1.40.4" +version = "1.40.5" version_files = [ "pyproject.toml:^version" ]