Support openrouter reasoning_content on streaming (#9094)

* feat(convert_dict_to_response.py): support openrouter format of reasoning content

* fix(transformation.py): fix openrouter streaming with reasoning content

Fixes https://github.com/BerriAI/litellm/issues/8193#issuecomment-270892962

* fix: fix type error
This commit is contained in:
Krish Dholakia 2025-03-09 20:03:59 -07:00 committed by GitHub
parent 42b7921ca1
commit f899b828cf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 99 additions and 18 deletions

View file

@ -239,6 +239,24 @@ def _parse_content_for_reasoning(
return None, message_text
def _extract_reasoning_content(message: dict) -> Tuple[Optional[str], Optional[str]]:
"""
Extract reasoning content and main content from a message.
Args:
message (dict): The message dictionary that may contain reasoning_content
Returns:
tuple[Optional[str], Optional[str]]: A tuple of (reasoning_content, content)
"""
if "reasoning_content" in message:
return message["reasoning_content"], message["content"]
elif "reasoning" in message:
return message["reasoning"], message["content"]
else:
return _parse_content_for_reasoning(message.get("content"))
class LiteLLMResponseObjectHandler:
@staticmethod
@ -452,13 +470,9 @@ def convert_to_model_response_object( # noqa: PLR0915
provider_specific_fields[field] = choice["message"][field]
# Handle reasoning models that display `reasoning_content` within `content`
if "reasoning_content" in choice["message"]:
reasoning_content = choice["message"]["reasoning_content"]
content = choice["message"]["content"]
else:
reasoning_content, content = _parse_content_for_reasoning(
choice["message"].get("content")
)
reasoning_content, content = _extract_reasoning_content(
choice["message"]
)
# Handle thinking models that display `thinking_blocks` within `content`
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None

View file

@ -6,7 +6,16 @@ Calls done in OpenAI/openai.py as OpenRouter is openai-compatible.
Docs: https://openrouter.ai/docs/parameters
"""
from typing import Any, AsyncIterator, Iterator, Optional, Union
import httpx
from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator
from litellm.llms.base_llm.chat.transformation import BaseLLMException
from litellm.types.utils import ModelResponse, ModelResponseStream
from ...openai.chat.gpt_transformation import OpenAIGPTConfig
from ..common_utils import OpenRouterException
class OpenrouterConfig(OpenAIGPTConfig):
@ -37,3 +46,43 @@ class OpenrouterConfig(OpenAIGPTConfig):
extra_body # openai client supports `extra_body` param
)
return mapped_openai_params
def get_error_class(
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
) -> BaseLLMException:
return OpenRouterException(
message=error_message,
status_code=status_code,
headers=headers,
)
def get_model_response_iterator(
self,
streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse],
sync_stream: bool,
json_mode: Optional[bool] = False,
) -> Any:
return OpenRouterChatCompletionStreamingHandler(
streaming_response=streaming_response,
sync_stream=sync_stream,
json_mode=json_mode,
)
class OpenRouterChatCompletionStreamingHandler(BaseModelResponseIterator):
def chunk_parser(self, chunk: dict) -> ModelResponseStream:
try:
new_choices = []
for choice in chunk["choices"]:
choice["delta"]["reasoning_content"] = choice["delta"].get("reasoning")
new_choices.append(choice)
return ModelResponseStream(
id=chunk["id"],
object="chat.completion.chunk",
created=chunk["created"],
model=chunk["model"],
choices=new_choices,
)
except Exception as e:
raise e

View file

@ -0,0 +1,5 @@
from litellm.llms.base_llm.chat.transformation import BaseLLMException
class OpenRouterException(BaseLLMException):
pass

View file

@ -2274,23 +2274,22 @@ def completion( # type: ignore # noqa: PLR0915
data = {"model": model, "messages": messages, **optional_params}
## COMPLETION CALL
response = openai_like_chat_completion.completion(
response = base_llm_http_handler.completion(
model=model,
stream=stream,
messages=messages,
headers=headers,
api_key=api_key,
acompletion=acompletion,
api_base=api_base,
model_response=model_response,
print_verbose=print_verbose,
optional_params=optional_params,
litellm_params=litellm_params,
logger_fn=logger_fn,
logging_obj=logging,
acompletion=acompletion,
timeout=timeout, # type: ignore
custom_llm_provider="openrouter",
custom_prompt_dict=custom_prompt_dict,
timeout=timeout,
headers=headers,
encoding=encoding,
api_key=api_key,
logging_obj=logging, # model call logging done inside the class as we make need to modify I/O to fit aleph alpha's requirements
client=client,
)
## LOGGING
logging.post_call(

View file

@ -4830,3 +4830,14 @@ def test_completion_gpt_4o_empty_str():
messages=[{"role": "user", "content": ""}],
)
assert resp.choices[0].message.content is not None
def test_completion_openrouter_reasoning_content():
litellm._turn_on_debug()
resp = litellm.completion(
model="openrouter/anthropic/claude-3.7-sonnet",
messages=[{"role": "user", "content": "Hello world"}],
reasoning={"effort": "high"},
)
print(resp)
assert resp.choices[0].message.reasoning_content is not None

View file

@ -4069,7 +4069,8 @@ def test_mock_response_iterator_tool_use():
"model",
[
# "deepseek/deepseek-reasoner",
"anthropic/claude-3-7-sonnet-20250219",
# "anthropic/claude-3-7-sonnet-20250219",
"openrouter/anthropic/claude-3.7-sonnet",
],
)
def test_reasoning_content_completion(model):
@ -4080,7 +4081,9 @@ def test_reasoning_content_completion(model):
model=model,
messages=[{"role": "user", "content": "Tell me a joke."}],
stream=True,
thinking={"type": "enabled", "budget_tokens": 1024},
# thinking={"type": "enabled", "budget_tokens": 1024},
reasoning={"effort": "high"},
drop_params=True,
)
reasoning_content_exists = False