forked from phoenix/litellm-mirror
Merge pull request #2991 from BerriAI/litellm_fix_text_completion_caching
[Feat] Support + Test caching for TextCompletion
This commit is contained in:
commit
0540ca4918
2 changed files with 51 additions and 1 deletions
|
@ -2996,7 +2996,7 @@ def client(original_function):
|
|||
)
|
||||
): # allow users to control returning cached responses from the completion function
|
||||
# checking cache
|
||||
print_verbose(f"INSIDE CHECKING CACHE")
|
||||
print_verbose("INSIDE CHECKING CACHE")
|
||||
if (
|
||||
litellm.cache is not None
|
||||
and str(original_function.__name__)
|
||||
|
@ -3103,6 +3103,22 @@ def client(original_function):
|
|||
response_object=cached_result,
|
||||
model_response_object=ModelResponse(),
|
||||
)
|
||||
if (
|
||||
call_type == CallTypes.atext_completion.value
|
||||
and isinstance(cached_result, dict)
|
||||
):
|
||||
if kwargs.get("stream", False) == True:
|
||||
cached_result = convert_to_streaming_response_async(
|
||||
response_object=cached_result,
|
||||
)
|
||||
cached_result = CustomStreamWrapper(
|
||||
completion_stream=cached_result,
|
||||
model=model,
|
||||
custom_llm_provider="cached_response",
|
||||
logging_obj=logging_obj,
|
||||
)
|
||||
else:
|
||||
cached_result = TextCompletionResponse(**cached_result)
|
||||
elif call_type == CallTypes.aembedding.value and isinstance(
|
||||
cached_result, dict
|
||||
):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue