diff --git a/.circleci/config.yml b/.circleci/config.yml index a23192b49..edbe59113 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -3,6 +3,9 @@ jobs: local_testing: docker: - image: cimg/python:3.11 + auth: + username: ${DOCKERHUB_USERNAME} + password: ${DOCKERHUB_PASSWORD} working_directory: ~/project steps: @@ -114,6 +117,9 @@ jobs: ui_endpoint_testing: docker: - image: cimg/python:3.11 + auth: + username: ${DOCKERHUB_USERNAME} + password: ${DOCKERHUB_PASSWORD} working_directory: ~/project steps: @@ -152,6 +158,9 @@ jobs: litellm_router_testing: # Runs all tests with the "router" keyword docker: - image: cimg/python:3.11 + auth: + username: ${DOCKERHUB_USERNAME} + password: ${DOCKERHUB_PASSWORD} working_directory: ~/project steps: @@ -179,6 +188,9 @@ jobs: litellm_assistants_api_testing: # Runs all tests with the "assistants" keyword docker: - image: cimg/python:3.11 + auth: + username: ${DOCKERHUB_USERNAME} + password: ${DOCKERHUB_PASSWORD} working_directory: ~/project steps: @@ -206,6 +218,9 @@ jobs: load_testing: docker: - image: cimg/python:3.11 + auth: + username: ${DOCKERHUB_USERNAME} + password: ${DOCKERHUB_PASSWORD} working_directory: ~/project steps: @@ -233,6 +248,9 @@ jobs: llm_translation_testing: docker: - image: cimg/python:3.11 + auth: + username: ${DOCKERHUB_USERNAME} + password: ${DOCKERHUB_PASSWORD} working_directory: ~/project steps: diff --git a/litellm/caching.py b/litellm/caching.py index 9bb03b99a..b2632be67 100644 --- a/litellm/caching.py +++ b/litellm/caching.py @@ -123,7 +123,7 @@ class InMemoryCache(BaseCache): async def async_set_cache(self, key, value, **kwargs): self.set_cache(key=key, value=value, **kwargs) - async def async_set_cache_pipeline(self, cache_list, ttl=None): + async def async_set_cache_pipeline(self, cache_list, ttl=None, **kwargs): for cache_key, cache_value in cache_list: if ttl is not None: self.set_cache(key=cache_key, value=cache_value, ttl=ttl) @@ -2038,7 +2038,7 @@ class DualCache(BaseCache): if self.redis_cache is not None and local_only == False: await self.redis_cache.async_set_cache_pipeline( - cache_list=cache_list, ttl=kwargs.get("ttl", None), **kwargs + cache_list=cache_list, ttl=kwargs.pop("ttl", None), **kwargs ) except Exception as e: verbose_logger.exception( diff --git a/litellm/proxy/hooks/parallel_request_limiter.py b/litellm/proxy/hooks/parallel_request_limiter.py index 7eaf515f2..d75440337 100644 --- a/litellm/proxy/hooks/parallel_request_limiter.py +++ b/litellm/proxy/hooks/parallel_request_limiter.py @@ -327,8 +327,13 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger): user_api_key_dict=user_api_key_dict, ) # get user tpm/rpm limits - if _user_id_rate_limits is not None and isinstance( - _user_id_rate_limits, dict + if ( + _user_id_rate_limits is not None + and isinstance(_user_id_rate_limits, dict) + and ( + _user_id_rate_limits.get("tpm_limit", None) is not None + or _user_id_rate_limits.get("rpm_limit", None) is not None + ) ): user_tpm_limit = _user_id_rate_limits.get("tpm_limit", None) user_rpm_limit = _user_id_rate_limits.get("rpm_limit", None) @@ -472,6 +477,8 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger): # Update usage - API Key # ------------ + values_to_update_in_cache = [] + if user_api_key is not None: request_count_api_key = ( f"{user_api_key}::{precise_minute}::request_count" @@ -495,12 +502,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger): self.print_verbose( f"updated_value in success call: {new_val}, precise_minute: {precise_minute}" ) - await self.internal_usage_cache.async_set_cache( - request_count_api_key, - new_val, - ttl=60, - litellm_parent_otel_span=litellm_parent_otel_span, - ) # store in cache for 1 min. + values_to_update_in_cache.append((request_count_api_key, new_val)) # ------------ # Update usage - model group + API Key @@ -536,12 +538,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger): self.print_verbose( f"updated_value in success call: {new_val}, precise_minute: {precise_minute}" ) - await self.internal_usage_cache.async_set_cache( - request_count_api_key, - new_val, - ttl=60, - litellm_parent_otel_span=litellm_parent_otel_span, - ) + values_to_update_in_cache.append((request_count_api_key, new_val)) # ------------ # Update usage - User @@ -574,12 +571,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger): self.print_verbose( f"updated_value in success call: {new_val}, precise_minute: {precise_minute}" ) - await self.internal_usage_cache.async_set_cache( - request_count_api_key, - new_val, - ttl=60, - litellm_parent_otel_span=litellm_parent_otel_span, - ) # store in cache for 1 min. + values_to_update_in_cache.append((request_count_api_key, new_val)) # ------------ # Update usage - Team @@ -612,12 +604,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger): self.print_verbose( f"updated_value in success call: {new_val}, precise_minute: {precise_minute}" ) - await self.internal_usage_cache.async_set_cache( - request_count_api_key, - new_val, - ttl=60, - litellm_parent_otel_span=litellm_parent_otel_span, - ) # store in cache for 1 min. + values_to_update_in_cache.append((request_count_api_key, new_val)) # ------------ # Update usage - End User @@ -650,13 +637,13 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger): self.print_verbose( f"updated_value in success call: {new_val}, precise_minute: {precise_minute}" ) - await self.internal_usage_cache.async_set_cache( - request_count_api_key, - new_val, - ttl=60, - litellm_parent_otel_span=litellm_parent_otel_span, - ) # store in cache for 1 min. + values_to_update_in_cache.append((request_count_api_key, new_val)) + await self.internal_usage_cache.async_batch_set_cache( + cache_list=values_to_update_in_cache, + ttl=60, + litellm_parent_otel_span=litellm_parent_otel_span, + ) except Exception as e: self.print_verbose(e) # noqa diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index dda6f8274..c8ca606a9 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -1,5 +1,5 @@ model_list: - - model_name: gpt-3.5-turbo + - model_name: db-openai-endpoint litellm_params: model: openai/gpt-3.5-turbo api_key: fake-key diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 18361bca1..8c61783a2 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -242,6 +242,20 @@ class InternalUsageCache: **kwargs, ) + async def async_batch_set_cache( + self, + cache_list: List, + litellm_parent_otel_span: Union[Span, None], + local_only: bool = False, + **kwargs, + ) -> None: + return await self.dual_cache.async_batch_set_cache( + cache_list=cache_list, + local_only=local_only, + litellm_parent_otel_span=litellm_parent_otel_span, + **kwargs, + ) + async def async_increment_cache( self, key, diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index f49fb6254..a51dcc693 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -24,7 +24,7 @@ from litellm import RateLimitError, Timeout, completion, completion_cost, embedd from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler from litellm.llms.prompt_templates.factory import anthropic_messages_pt -# litellm.num_retries = 3 +# litellm.num_retries=3 litellm.cache = None litellm.success_callback = [] diff --git a/litellm/tests/test_embedding.py b/litellm/tests/test_embedding.py index 732772e76..143784e88 100644 --- a/litellm/tests/test_embedding.py +++ b/litellm/tests/test_embedding.py @@ -316,6 +316,7 @@ def test_openai_azure_embedding(): os.environ.get("CIRCLE_OIDC_TOKEN") is None, reason="Cannot run without being in CircleCI Runner", ) +@pytest.mark.skip(reason="Azure east us 2 has a temp outage") def test_openai_azure_embedding_with_oidc_and_cf(): # TODO: Switch to our own Azure account, currently using ai.moda's account os.environ["AZURE_TENANT_ID"] = "17c0a27a-1246-4aa1-a3b6-d294e80e783c"