From e3ca2789df2d7cccc3e97a474d8927c12c25b5e6 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 18 Jul 2024 17:41:31 -0700 Subject: [PATCH] fix(bedrock_httpx.py): cache response stream shape Fixes https://github.com/BerriAI/litellm/issues/4774 --- litellm/llms/bedrock_httpx.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/litellm/llms/bedrock_httpx.py b/litellm/llms/bedrock_httpx.py index 874373d87e..b41dd542b9 100644 --- a/litellm/llms/bedrock_httpx.py +++ b/litellm/llms/bedrock_httpx.py @@ -77,7 +77,9 @@ BEDROCK_CONVERSE_MODELS = [ "anthropic.claude-instant-v1", ] + iam_cache = DualCache() +_response_stream_shape_cache = None class AmazonCohereChatConfig: @@ -1991,13 +1993,18 @@ class BedrockConverseLLM(BaseLLM): def get_response_stream_shape(): - from botocore.loaders import Loader - from botocore.model import ServiceModel + global _response_stream_shape_cache + if _response_stream_shape_cache is None: - loader = Loader() - bedrock_service_dict = loader.load_service_model("bedrock-runtime", "service-2") - bedrock_service_model = ServiceModel(bedrock_service_dict) - return bedrock_service_model.shape_for("ResponseStream") + from botocore.loaders import Loader + from botocore.model import ServiceModel + + loader = Loader() + bedrock_service_dict = loader.load_service_model("bedrock-runtime", "service-2") + bedrock_service_model = ServiceModel(bedrock_service_dict) + _response_stream_shape_cache = bedrock_service_model.shape_for("ResponseStream") + + return _response_stream_shape_cache class AWSEventStreamDecoder: