mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
Support openrouter reasoning_content
on streaming (#9094)
* feat(convert_dict_to_response.py): support openrouter format of reasoning content * fix(transformation.py): fix openrouter streaming with reasoning content Fixes https://github.com/BerriAI/litellm/issues/8193#issuecomment-270892962 * fix: fix type error
This commit is contained in:
parent
91b9142378
commit
b401f2c06f
6 changed files with 99 additions and 18 deletions
|
@ -239,6 +239,24 @@ def _parse_content_for_reasoning(
|
||||||
return None, message_text
|
return None, message_text
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_reasoning_content(message: dict) -> Tuple[Optional[str], Optional[str]]:
|
||||||
|
"""
|
||||||
|
Extract reasoning content and main content from a message.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
message (dict): The message dictionary that may contain reasoning_content
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple[Optional[str], Optional[str]]: A tuple of (reasoning_content, content)
|
||||||
|
"""
|
||||||
|
if "reasoning_content" in message:
|
||||||
|
return message["reasoning_content"], message["content"]
|
||||||
|
elif "reasoning" in message:
|
||||||
|
return message["reasoning"], message["content"]
|
||||||
|
else:
|
||||||
|
return _parse_content_for_reasoning(message.get("content"))
|
||||||
|
|
||||||
|
|
||||||
class LiteLLMResponseObjectHandler:
|
class LiteLLMResponseObjectHandler:
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -452,13 +470,9 @@ def convert_to_model_response_object( # noqa: PLR0915
|
||||||
provider_specific_fields[field] = choice["message"][field]
|
provider_specific_fields[field] = choice["message"][field]
|
||||||
|
|
||||||
# Handle reasoning models that display `reasoning_content` within `content`
|
# Handle reasoning models that display `reasoning_content` within `content`
|
||||||
if "reasoning_content" in choice["message"]:
|
reasoning_content, content = _extract_reasoning_content(
|
||||||
reasoning_content = choice["message"]["reasoning_content"]
|
choice["message"]
|
||||||
content = choice["message"]["content"]
|
)
|
||||||
else:
|
|
||||||
reasoning_content, content = _parse_content_for_reasoning(
|
|
||||||
choice["message"].get("content")
|
|
||||||
)
|
|
||||||
|
|
||||||
# Handle thinking models that display `thinking_blocks` within `content`
|
# Handle thinking models that display `thinking_blocks` within `content`
|
||||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
||||||
|
|
|
@ -6,7 +6,16 @@ Calls done in OpenAI/openai.py as OpenRouter is openai-compatible.
|
||||||
Docs: https://openrouter.ai/docs/parameters
|
Docs: https://openrouter.ai/docs/parameters
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from typing import Any, AsyncIterator, Iterator, Optional, Union
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator
|
||||||
|
from litellm.llms.base_llm.chat.transformation import BaseLLMException
|
||||||
|
from litellm.types.utils import ModelResponse, ModelResponseStream
|
||||||
|
|
||||||
from ...openai.chat.gpt_transformation import OpenAIGPTConfig
|
from ...openai.chat.gpt_transformation import OpenAIGPTConfig
|
||||||
|
from ..common_utils import OpenRouterException
|
||||||
|
|
||||||
|
|
||||||
class OpenrouterConfig(OpenAIGPTConfig):
|
class OpenrouterConfig(OpenAIGPTConfig):
|
||||||
|
@ -37,3 +46,43 @@ class OpenrouterConfig(OpenAIGPTConfig):
|
||||||
extra_body # openai client supports `extra_body` param
|
extra_body # openai client supports `extra_body` param
|
||||||
)
|
)
|
||||||
return mapped_openai_params
|
return mapped_openai_params
|
||||||
|
|
||||||
|
def get_error_class(
|
||||||
|
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
|
||||||
|
) -> BaseLLMException:
|
||||||
|
return OpenRouterException(
|
||||||
|
message=error_message,
|
||||||
|
status_code=status_code,
|
||||||
|
headers=headers,
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_model_response_iterator(
|
||||||
|
self,
|
||||||
|
streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse],
|
||||||
|
sync_stream: bool,
|
||||||
|
json_mode: Optional[bool] = False,
|
||||||
|
) -> Any:
|
||||||
|
return OpenRouterChatCompletionStreamingHandler(
|
||||||
|
streaming_response=streaming_response,
|
||||||
|
sync_stream=sync_stream,
|
||||||
|
json_mode=json_mode,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class OpenRouterChatCompletionStreamingHandler(BaseModelResponseIterator):
|
||||||
|
|
||||||
|
def chunk_parser(self, chunk: dict) -> ModelResponseStream:
|
||||||
|
try:
|
||||||
|
new_choices = []
|
||||||
|
for choice in chunk["choices"]:
|
||||||
|
choice["delta"]["reasoning_content"] = choice["delta"].get("reasoning")
|
||||||
|
new_choices.append(choice)
|
||||||
|
return ModelResponseStream(
|
||||||
|
id=chunk["id"],
|
||||||
|
object="chat.completion.chunk",
|
||||||
|
created=chunk["created"],
|
||||||
|
model=chunk["model"],
|
||||||
|
choices=new_choices,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
5
litellm/llms/openrouter/common_utils.py
Normal file
5
litellm/llms/openrouter/common_utils.py
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
from litellm.llms.base_llm.chat.transformation import BaseLLMException
|
||||||
|
|
||||||
|
|
||||||
|
class OpenRouterException(BaseLLMException):
|
||||||
|
pass
|
|
@ -2274,23 +2274,22 @@ def completion( # type: ignore # noqa: PLR0915
|
||||||
data = {"model": model, "messages": messages, **optional_params}
|
data = {"model": model, "messages": messages, **optional_params}
|
||||||
|
|
||||||
## COMPLETION CALL
|
## COMPLETION CALL
|
||||||
response = openai_like_chat_completion.completion(
|
response = base_llm_http_handler.completion(
|
||||||
model=model,
|
model=model,
|
||||||
|
stream=stream,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
headers=headers,
|
acompletion=acompletion,
|
||||||
api_key=api_key,
|
|
||||||
api_base=api_base,
|
api_base=api_base,
|
||||||
model_response=model_response,
|
model_response=model_response,
|
||||||
print_verbose=print_verbose,
|
|
||||||
optional_params=optional_params,
|
optional_params=optional_params,
|
||||||
litellm_params=litellm_params,
|
litellm_params=litellm_params,
|
||||||
logger_fn=logger_fn,
|
|
||||||
logging_obj=logging,
|
|
||||||
acompletion=acompletion,
|
|
||||||
timeout=timeout, # type: ignore
|
|
||||||
custom_llm_provider="openrouter",
|
custom_llm_provider="openrouter",
|
||||||
custom_prompt_dict=custom_prompt_dict,
|
timeout=timeout,
|
||||||
|
headers=headers,
|
||||||
encoding=encoding,
|
encoding=encoding,
|
||||||
|
api_key=api_key,
|
||||||
|
logging_obj=logging, # model call logging done inside the class as we make need to modify I/O to fit aleph alpha's requirements
|
||||||
|
client=client,
|
||||||
)
|
)
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging.post_call(
|
logging.post_call(
|
||||||
|
|
|
@ -4830,3 +4830,14 @@ def test_completion_gpt_4o_empty_str():
|
||||||
messages=[{"role": "user", "content": ""}],
|
messages=[{"role": "user", "content": ""}],
|
||||||
)
|
)
|
||||||
assert resp.choices[0].message.content is not None
|
assert resp.choices[0].message.content is not None
|
||||||
|
|
||||||
|
|
||||||
|
def test_completion_openrouter_reasoning_content():
|
||||||
|
litellm._turn_on_debug()
|
||||||
|
resp = litellm.completion(
|
||||||
|
model="openrouter/anthropic/claude-3.7-sonnet",
|
||||||
|
messages=[{"role": "user", "content": "Hello world"}],
|
||||||
|
reasoning={"effort": "high"},
|
||||||
|
)
|
||||||
|
print(resp)
|
||||||
|
assert resp.choices[0].message.reasoning_content is not None
|
||||||
|
|
|
@ -4069,7 +4069,8 @@ def test_mock_response_iterator_tool_use():
|
||||||
"model",
|
"model",
|
||||||
[
|
[
|
||||||
# "deepseek/deepseek-reasoner",
|
# "deepseek/deepseek-reasoner",
|
||||||
"anthropic/claude-3-7-sonnet-20250219",
|
# "anthropic/claude-3-7-sonnet-20250219",
|
||||||
|
"openrouter/anthropic/claude-3.7-sonnet",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_reasoning_content_completion(model):
|
def test_reasoning_content_completion(model):
|
||||||
|
@ -4080,7 +4081,9 @@ def test_reasoning_content_completion(model):
|
||||||
model=model,
|
model=model,
|
||||||
messages=[{"role": "user", "content": "Tell me a joke."}],
|
messages=[{"role": "user", "content": "Tell me a joke."}],
|
||||||
stream=True,
|
stream=True,
|
||||||
thinking={"type": "enabled", "budget_tokens": 1024},
|
# thinking={"type": "enabled", "budget_tokens": 1024},
|
||||||
|
reasoning={"effort": "high"},
|
||||||
|
drop_params=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
reasoning_content_exists = False
|
reasoning_content_exists = False
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue