mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
[Feat] Support for all litellm providers on Responses API (works with Codex) - Anthropic, Bedrock API, VertexAI, Ollama (#10132)
* transform request * basic handler for LiteLLMCompletionTransformationHandler * complete transform litellm to responses api * fixes to test * fix stream=True * fix streaming iterator * fixes for transformation * fixes for anthropic codex support * fix pass response_api_optional_params * test anthropic responses api tools * update responses types * working codex with litellm * add session handler * fixes streaming iterator * fix handler * add litellm codex example * fix code quality * test fix * docs litellm codex * litellm codexdoc * docs openai codex with litellm * docs litellm openai codex * litellm codex * linting fixes for transforming responses API * fix import error * fix responses api test * add sync iterator support for responses api
This commit is contained in:
parent
3e87ec4f16
commit
3d5022bd79
14 changed files with 1282 additions and 53 deletions
|
@ -0,0 +1,59 @@
|
|||
"""
|
||||
Responses API has previous_response_id, which is the id of the previous response.
|
||||
|
||||
LiteLLM needs to maintain a cache of the previous response input, output, previous_response_id, and model.
|
||||
|
||||
This class handles that cache.
|
||||
"""
|
||||
|
||||
from typing import List, Optional, Tuple, Union
|
||||
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
from litellm.caching import InMemoryCache
|
||||
from litellm.types.llms.openai import ResponseInputParam, ResponsesAPIResponse
|
||||
|
||||
RESPONSES_API_PREVIOUS_RESPONSES_CACHE = InMemoryCache()
|
||||
MAX_PREV_SESSION_INPUTS = 50
|
||||
|
||||
|
||||
class ResponsesAPISessionElement(TypedDict, total=False):
|
||||
input: Union[str, ResponseInputParam]
|
||||
output: ResponsesAPIResponse
|
||||
response_id: str
|
||||
previous_response_id: Optional[str]
|
||||
|
||||
|
||||
class SessionHandler:
|
||||
|
||||
def add_completed_response_to_cache(
|
||||
self, response_id: str, session_element: ResponsesAPISessionElement
|
||||
):
|
||||
RESPONSES_API_PREVIOUS_RESPONSES_CACHE.set_cache(
|
||||
key=response_id, value=session_element
|
||||
)
|
||||
|
||||
def get_chain_of_previous_input_output_pairs(
|
||||
self, previous_response_id: str
|
||||
) -> List[Tuple[ResponseInputParam, ResponsesAPIResponse]]:
|
||||
response_api_inputs: List[Tuple[ResponseInputParam, ResponsesAPIResponse]] = []
|
||||
current_previous_response_id = previous_response_id
|
||||
|
||||
count_session_elements = 0
|
||||
while current_previous_response_id:
|
||||
if count_session_elements > MAX_PREV_SESSION_INPUTS:
|
||||
break
|
||||
session_element = RESPONSES_API_PREVIOUS_RESPONSES_CACHE.get_cache(
|
||||
key=current_previous_response_id
|
||||
)
|
||||
if session_element:
|
||||
response_api_inputs.append(
|
||||
(session_element.get("input"), session_element.get("output"))
|
||||
)
|
||||
current_previous_response_id = session_element.get(
|
||||
"previous_response_id"
|
||||
)
|
||||
else:
|
||||
break
|
||||
count_session_elements += 1
|
||||
return response_api_inputs
|
Loading…
Add table
Add a link
Reference in a new issue