fix - support text completion caching

This commit is contained in:
Ishaan Jaff 2024-04-12 12:34:28 -07:00
parent 11cd1ec6cf
commit 0cef782ffa

View file

@ -2996,7 +2996,7 @@ def client(original_function):
) )
): # allow users to control returning cached responses from the completion function ): # allow users to control returning cached responses from the completion function
# checking cache # checking cache
print_verbose(f"INSIDE CHECKING CACHE") print_verbose("INSIDE CHECKING CACHE")
if ( if (
litellm.cache is not None litellm.cache is not None
and str(original_function.__name__) and str(original_function.__name__)
@ -3103,6 +3103,22 @@ def client(original_function):
response_object=cached_result, response_object=cached_result,
model_response_object=ModelResponse(), model_response_object=ModelResponse(),
) )
if (
call_type == CallTypes.atext_completion.value
and isinstance(cached_result, dict)
):
if kwargs.get("stream", False) == True:
cached_result = convert_to_streaming_response_async(
response_object=cached_result,
)
cached_result = CustomStreamWrapper(
completion_stream=cached_result,
model=model,
custom_llm_provider="cached_response",
logging_obj=logging_obj,
)
else:
cached_result = TextCompletionResponse(**cached_result)
elif call_type == CallTypes.aembedding.value and isinstance( elif call_type == CallTypes.aembedding.value and isinstance(
cached_result, dict cached_result, dict
): ):