fix(bedrock_httpx.py): cache response stream shape

Fixes https://github.com/BerriAI/litellm/issues/4774
This commit is contained in:
Krrish Dholakia 2024-07-18 17:41:31 -07:00
parent 30d6c1d171
commit e3ca2789df

View file

@ -77,7 +77,9 @@ BEDROCK_CONVERSE_MODELS = [
"anthropic.claude-instant-v1", "anthropic.claude-instant-v1",
] ]
iam_cache = DualCache() iam_cache = DualCache()
_response_stream_shape_cache = None
class AmazonCohereChatConfig: class AmazonCohereChatConfig:
@ -1991,13 +1993,18 @@ class BedrockConverseLLM(BaseLLM):
def get_response_stream_shape(): def get_response_stream_shape():
from botocore.loaders import Loader global _response_stream_shape_cache
from botocore.model import ServiceModel if _response_stream_shape_cache is None:
loader = Loader() from botocore.loaders import Loader
bedrock_service_dict = loader.load_service_model("bedrock-runtime", "service-2") from botocore.model import ServiceModel
bedrock_service_model = ServiceModel(bedrock_service_dict)
return bedrock_service_model.shape_for("ResponseStream") loader = Loader()
bedrock_service_dict = loader.load_service_model("bedrock-runtime", "service-2")
bedrock_service_model = ServiceModel(bedrock_service_dict)
_response_stream_shape_cache = bedrock_service_model.shape_for("ResponseStream")
return _response_stream_shape_cache
class AWSEventStreamDecoder: class AWSEventStreamDecoder: