Merge branch 'main' into litellm_aws_kms_support

2024-06-07 07:58:56 -07:00 · 2024-06-07 07:58:56 -07:00 · b6e0bf27b8
commit b6e0bf27b8
parent ddc82774b0 11de954abd
9 changed files with 104 additions and 7 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -319,22 +319,35 @@ workflows:
  version: 2
  build_and_test:
    jobs:
-      - local_testing:
+      - hold:
          type: approval
          filters:
            branches:
-              only:
+              ignore:
                - main
                - /litellm_.*/
      - local_testing:
          requires:
            - hold
          filters:
            branches:
              ignore:
                - main
                - /litellm_.*/
      - build_and_test:
          requires:
            - hold
          filters:
            branches:
-              only:
+              ignore: 
                - main
                - /litellm_.*/
      - installing_litellm_on_python:
          requires:
            - hold
          filters:
            branches:
-              only:
+              ignore:
                - main
                - /litellm_.*/
      - publish_to_pypi:
--- a/docs/my-website/docs/observability/custom_callback.md
+++ b/docs/my-website/docs/observability/custom_callback.md
@ -38,7 +38,7 @@ class MyCustomHandler(CustomLogger):
        print(f"On Async Success")
    async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
-        print(f"On Async Success")
+        print(f"On Async Failure")
 customHandler = MyCustomHandler()
--- a/docs/my-website/docs/projects/llmcord.py
+++ b/docs/my-website/docs/projects/llmcord.py
@ -0,0 +1,3 @@
 llmcord.py lets you and your friends chat with LLMs directly in your Discord server. It works with practically any LLM, remote or locally hosted.
 Github: https://github.com/jakobdylanc/discord-llm-chatbot
--- a/litellm/router.py
+++ b/litellm/router.py
@ -4050,6 +4050,12 @@ class Router:
            for idx in reversed(invalid_model_indices):
                _returned_deployments.pop(idx)
        ## ORDER FILTERING ## -> if user set 'order' in deployments, return deployments with lowest order (e.g. order=1 > order=2)
        if len(_returned_deployments) > 0:
            _returned_deployments = litellm.utils._get_order_filtered_deployments(
                _returned_deployments
            )
        return _returned_deployments
    def _common_checks_available_deployment(
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -2539,6 +2539,7 @@ def test_replicate_custom_prompt_dict():
                }
            ],
            mock_response="Hello world",
            mock_response="hello world",
            repetition_penalty=0.1,
            num_retries=3,
        )
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@ -38,6 +38,48 @@ def test_router_sensitive_keys():
        assert "special-key" not in str(e)
 def test_router_order():
    """
    Asserts for 2 models in a model group, model with order=1 always called first
    """
    router = Router(
        model_list=[
            {
                "model_name": "gpt-3.5-turbo",
                "litellm_params": {
                    "model": "gpt-4o",
                    "api_key": os.getenv("OPENAI_API_KEY"),
                    "mock_response": "Hello world",
                    "order": 1,
                },
                "model_info": {"id": "1"},
            },
            {
                "model_name": "gpt-3.5-turbo",
                "litellm_params": {
                    "model": "gpt-4o",
                    "api_key": "bad-key",
                    "mock_response": Exception("this is a bad key"),
                    "order": 2,
                },
                "model_info": {"id": "2"},
            },
        ],
        num_retries=0,
        allowed_fails=0,
        enable_pre_call_checks=True,
    )
    for _ in range(100):
        response = router.completion(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": "Hey, how's it going?"}],
        )
        assert isinstance(response, litellm.ModelResponse)
        assert response._hidden_params["model_id"] == "1"
@pytest.mark.parametrize("num_retries", [None, 2])
@pytest.mark.parametrize("max_retries", [None, 4])
 def test_router_num_retries_init(num_retries, max_retries):
--- a/litellm/tests/test_token_counter.py
+++ b/litellm/tests/test_token_counter.py
@ -186,3 +186,13 @@ def test_load_test_token_counter(model):
    total_time = end_time - start_time
    print("model={}, total test time={}".format(model, total_time))
    assert total_time < 10, f"Total encoding time > 10s, {total_time}"
 def test_openai_token_with_image_and_text():
    model = "gpt-4o"
    full_request = {'model': 'gpt-4o', 'tools': [{'type': 'function', 'function': {'name': 'json', 'parameters': {'type': 'object', 'required': ['clause'], 'properties': {'clause': {'type': 'string'}}}, 'description': 'Respond with a JSON object.'}}], 'logprobs': False, 'messages': [{'role': 'user', 'content': [{'text': '\n    Just some long text, long long text, and you know it will be longer than 7 tokens definetly.', 'type': 'text'}]}], 'tool_choice': {'type': 'function', 'function': {'name': 'json'}}, 'exclude_models': [], 'disable_fallback': False, 'exclude_providers': []}
    messages = full_request.get("messages", [])
    token_count = token_counter(model=model, messages=messages)
    print(token_count)
 test_openai_token_with_image_and_text()
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -4060,6 +4060,7 @@ def openai_token_counter(
                    for c in value:
                        if c["type"] == "text":
                            text += c["text"]
                            num_tokens += len(encoding.encode(c["text"], disallowed_special=()))
                        elif c["type"] == "image_url":
                            if isinstance(c["image_url"], dict):
                                image_url_dict = c["image_url"]
@ -6196,6 +6197,27 @@ def calculate_max_parallel_requests(
    return None
 def _get_order_filtered_deployments(healthy_deployments: List[Dict]) -> List:
    min_order = min(
        (
            deployment["litellm_params"]["order"]
            for deployment in healthy_deployments
            if "order" in deployment["litellm_params"]
        ),
        default=None,
    )
    if min_order is not None:
        filtered_deployments = [
            deployment
            for deployment in healthy_deployments
            if deployment["litellm_params"].get("order") == min_order
        ]
        return filtered_deployments
    return healthy_deployments
 def _get_model_region(
    custom_llm_provider: str, litellm_params: LiteLLM_Params
 ) -> Optional[str]:
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.40.4"
+version = "1.40.5"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@ -80,7 +80,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"
 [tool.commitizen]
-version = "1.40.4"
+version = "1.40.5"
 version_files = [
    "pyproject.toml:^version"
 ]
		`@ -0,0 +1,3 @@`
							`llmcord.py lets you and your friends chat with LLMs directly in your Discord server. It works with practically any LLM, remote or locally hosted.`

							`Github: https://github.com/jakobdylanc/discord-llm-chatbot`