mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-24 18:24:20 +00:00
Support openrouter reasoning_content
on streaming (#9094)
* feat(convert_dict_to_response.py): support openrouter format of reasoning content * fix(transformation.py): fix openrouter streaming with reasoning content Fixes https://github.com/BerriAI/litellm/issues/8193#issuecomment-270892962 * fix: fix type error
This commit is contained in:
parent
42b7921ca1
commit
f899b828cf
6 changed files with 99 additions and 18 deletions
|
@ -239,6 +239,24 @@ def _parse_content_for_reasoning(
|
|||
return None, message_text
|
||||
|
||||
|
||||
def _extract_reasoning_content(message: dict) -> Tuple[Optional[str], Optional[str]]:
|
||||
"""
|
||||
Extract reasoning content and main content from a message.
|
||||
|
||||
Args:
|
||||
message (dict): The message dictionary that may contain reasoning_content
|
||||
|
||||
Returns:
|
||||
tuple[Optional[str], Optional[str]]: A tuple of (reasoning_content, content)
|
||||
"""
|
||||
if "reasoning_content" in message:
|
||||
return message["reasoning_content"], message["content"]
|
||||
elif "reasoning" in message:
|
||||
return message["reasoning"], message["content"]
|
||||
else:
|
||||
return _parse_content_for_reasoning(message.get("content"))
|
||||
|
||||
|
||||
class LiteLLMResponseObjectHandler:
|
||||
|
||||
@staticmethod
|
||||
|
@ -452,13 +470,9 @@ def convert_to_model_response_object( # noqa: PLR0915
|
|||
provider_specific_fields[field] = choice["message"][field]
|
||||
|
||||
# Handle reasoning models that display `reasoning_content` within `content`
|
||||
if "reasoning_content" in choice["message"]:
|
||||
reasoning_content = choice["message"]["reasoning_content"]
|
||||
content = choice["message"]["content"]
|
||||
else:
|
||||
reasoning_content, content = _parse_content_for_reasoning(
|
||||
choice["message"].get("content")
|
||||
)
|
||||
reasoning_content, content = _extract_reasoning_content(
|
||||
choice["message"]
|
||||
)
|
||||
|
||||
# Handle thinking models that display `thinking_blocks` within `content`
|
||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
||||
|
|
|
@ -6,7 +6,16 @@ Calls done in OpenAI/openai.py as OpenRouter is openai-compatible.
|
|||
Docs: https://openrouter.ai/docs/parameters
|
||||
"""
|
||||
|
||||
from typing import Any, AsyncIterator, Iterator, Optional, Union
|
||||
|
||||
import httpx
|
||||
|
||||
from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator
|
||||
from litellm.llms.base_llm.chat.transformation import BaseLLMException
|
||||
from litellm.types.utils import ModelResponse, ModelResponseStream
|
||||
|
||||
from ...openai.chat.gpt_transformation import OpenAIGPTConfig
|
||||
from ..common_utils import OpenRouterException
|
||||
|
||||
|
||||
class OpenrouterConfig(OpenAIGPTConfig):
|
||||
|
@ -37,3 +46,43 @@ class OpenrouterConfig(OpenAIGPTConfig):
|
|||
extra_body # openai client supports `extra_body` param
|
||||
)
|
||||
return mapped_openai_params
|
||||
|
||||
def get_error_class(
|
||||
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
|
||||
) -> BaseLLMException:
|
||||
return OpenRouterException(
|
||||
message=error_message,
|
||||
status_code=status_code,
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
def get_model_response_iterator(
|
||||
self,
|
||||
streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse],
|
||||
sync_stream: bool,
|
||||
json_mode: Optional[bool] = False,
|
||||
) -> Any:
|
||||
return OpenRouterChatCompletionStreamingHandler(
|
||||
streaming_response=streaming_response,
|
||||
sync_stream=sync_stream,
|
||||
json_mode=json_mode,
|
||||
)
|
||||
|
||||
|
||||
class OpenRouterChatCompletionStreamingHandler(BaseModelResponseIterator):
|
||||
|
||||
def chunk_parser(self, chunk: dict) -> ModelResponseStream:
|
||||
try:
|
||||
new_choices = []
|
||||
for choice in chunk["choices"]:
|
||||
choice["delta"]["reasoning_content"] = choice["delta"].get("reasoning")
|
||||
new_choices.append(choice)
|
||||
return ModelResponseStream(
|
||||
id=chunk["id"],
|
||||
object="chat.completion.chunk",
|
||||
created=chunk["created"],
|
||||
model=chunk["model"],
|
||||
choices=new_choices,
|
||||
)
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
|
5
litellm/llms/openrouter/common_utils.py
Normal file
5
litellm/llms/openrouter/common_utils.py
Normal file
|
@ -0,0 +1,5 @@
|
|||
from litellm.llms.base_llm.chat.transformation import BaseLLMException
|
||||
|
||||
|
||||
class OpenRouterException(BaseLLMException):
|
||||
pass
|
|
@ -2274,23 +2274,22 @@ def completion( # type: ignore # noqa: PLR0915
|
|||
data = {"model": model, "messages": messages, **optional_params}
|
||||
|
||||
## COMPLETION CALL
|
||||
response = openai_like_chat_completion.completion(
|
||||
response = base_llm_http_handler.completion(
|
||||
model=model,
|
||||
stream=stream,
|
||||
messages=messages,
|
||||
headers=headers,
|
||||
api_key=api_key,
|
||||
acompletion=acompletion,
|
||||
api_base=api_base,
|
||||
model_response=model_response,
|
||||
print_verbose=print_verbose,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
logger_fn=logger_fn,
|
||||
logging_obj=logging,
|
||||
acompletion=acompletion,
|
||||
timeout=timeout, # type: ignore
|
||||
custom_llm_provider="openrouter",
|
||||
custom_prompt_dict=custom_prompt_dict,
|
||||
timeout=timeout,
|
||||
headers=headers,
|
||||
encoding=encoding,
|
||||
api_key=api_key,
|
||||
logging_obj=logging, # model call logging done inside the class as we make need to modify I/O to fit aleph alpha's requirements
|
||||
client=client,
|
||||
)
|
||||
## LOGGING
|
||||
logging.post_call(
|
||||
|
|
|
@ -4830,3 +4830,14 @@ def test_completion_gpt_4o_empty_str():
|
|||
messages=[{"role": "user", "content": ""}],
|
||||
)
|
||||
assert resp.choices[0].message.content is not None
|
||||
|
||||
|
||||
def test_completion_openrouter_reasoning_content():
|
||||
litellm._turn_on_debug()
|
||||
resp = litellm.completion(
|
||||
model="openrouter/anthropic/claude-3.7-sonnet",
|
||||
messages=[{"role": "user", "content": "Hello world"}],
|
||||
reasoning={"effort": "high"},
|
||||
)
|
||||
print(resp)
|
||||
assert resp.choices[0].message.reasoning_content is not None
|
||||
|
|
|
@ -4069,7 +4069,8 @@ def test_mock_response_iterator_tool_use():
|
|||
"model",
|
||||
[
|
||||
# "deepseek/deepseek-reasoner",
|
||||
"anthropic/claude-3-7-sonnet-20250219",
|
||||
# "anthropic/claude-3-7-sonnet-20250219",
|
||||
"openrouter/anthropic/claude-3.7-sonnet",
|
||||
],
|
||||
)
|
||||
def test_reasoning_content_completion(model):
|
||||
|
@ -4080,7 +4081,9 @@ def test_reasoning_content_completion(model):
|
|||
model=model,
|
||||
messages=[{"role": "user", "content": "Tell me a joke."}],
|
||||
stream=True,
|
||||
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||
# thinking={"type": "enabled", "budget_tokens": 1024},
|
||||
reasoning={"effort": "high"},
|
||||
drop_params=True,
|
||||
)
|
||||
|
||||
reasoning_content_exists = False
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue