Refactor deep_research to support SSE-based streaming.
Updated the `deep_research` function to use asynchronous generators and send Server-Sent Events (SSE) for real-time streaming updates. Added a new utility for formatting SSE events and improved research lifecycle visibility with intermediate progress steps and error handling.
This commit is contained in:
parent
11bfff7ff7
commit
cda44df1a0
1 changed files with 72 additions and 22 deletions
|
@ -6,10 +6,11 @@ to conduct web research and generate reports via the MCP protocol.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import json
|
||||||
import uuid
|
import uuid
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, Any, Optional, List
|
from typing import Dict, Any, Optional, AsyncGenerator
|
||||||
|
import asyncio
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from mcp.server.fastmcp import FastMCP
|
from mcp.server.fastmcp import FastMCP
|
||||||
from gpt_researcher import GPTResearcher
|
from gpt_researcher import GPTResearcher
|
||||||
|
@ -35,6 +36,11 @@ logging.basicConfig(
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
def format_sse_event(event_name: str, data: Dict[str, Any]) -> str:
|
||||||
|
"""Formatiert Daten als SSE Event String."""
|
||||||
|
json_data = json.dumps(data)
|
||||||
|
return f"event: {event_name}\ndata: {json_data}\n\n"
|
||||||
|
|
||||||
# Initialize FastMCP server
|
# Initialize FastMCP server
|
||||||
mcp = FastMCP("GPT Researcher", host="0.0.0.0", port=8000, timeout_keep_alive=720)
|
mcp = FastMCP("GPT Researcher", host="0.0.0.0", port=8000, timeout_keep_alive=720)
|
||||||
|
|
||||||
|
@ -88,7 +94,7 @@ async def research_resource(topic: str) -> str:
|
||||||
|
|
||||||
|
|
||||||
@mcp.tool()
|
@mcp.tool()
|
||||||
async def deep_research(query: str) -> Dict[str, Any]:
|
async def deep_research(query: str) -> AsyncGenerator[str, None]:
|
||||||
"""
|
"""
|
||||||
Conduct a web deep research on a given query using GPT Researcher.
|
Conduct a web deep research on a given query using GPT Researcher.
|
||||||
Use this tool when you need time-sensitive, real-time information like stock prices, news, people, specific knowledge, etc.
|
Use this tool when you need time-sensitive, real-time information like stock prices, news, people, specific knowledge, etc.
|
||||||
|
@ -100,39 +106,83 @@ async def deep_research(query: str) -> Dict[str, Any]:
|
||||||
Dict containing research status, ID, and the actual research context and sources
|
Dict containing research status, ID, and the actual research context and sources
|
||||||
that can be used directly by LLMs for context enrichment
|
that can be used directly by LLMs for context enrichment
|
||||||
"""
|
"""
|
||||||
logger.info(f"Conducting research on query: {query}...")
|
logger.info(f"Starting streaming deep research on query: {query}...")
|
||||||
|
|
||||||
# Generate a unique ID for this research session
|
|
||||||
research_id = str(uuid.uuid4())
|
research_id = str(uuid.uuid4())
|
||||||
|
|
||||||
# Initialize GPT Researcher
|
|
||||||
researcher = GPTResearcher(query)
|
|
||||||
|
|
||||||
# Start research
|
|
||||||
try:
|
try:
|
||||||
await researcher.conduct_research()
|
yield format_sse_event(
|
||||||
mcp.researchers[research_id] = researcher
|
"tool_update",
|
||||||
logger.info(f"Research completed for ID: {research_id}")
|
{"research_id": research_id, "query": query, "status": "Research initiated"}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Initialize GPT Researcher
|
||||||
|
# In neueren GPTResearcher Versionen wird der query beim Initialisieren übergeben
|
||||||
|
researcher = GPTResearcher(query=query, report_type="research_report", config_path=None)
|
||||||
|
# Alternativ, falls deine Version es anders handhabt: researcher = GPTResearcher(query)
|
||||||
|
|
||||||
|
mcp.researchers[research_id] = researcher # Speichere früh, falls benötigt
|
||||||
|
|
||||||
|
yield format_sse_event(
|
||||||
|
"tool_update",
|
||||||
|
{"research_id": research_id, "status": "GPT Researcher initialized. Starting information gathering."}
|
||||||
|
)
|
||||||
|
await asyncio.sleep(0.1) # Kurze Pause, damit das Event sicher gesendet wird
|
||||||
|
|
||||||
|
# Simuliertes Update: Beginn der Recherche
|
||||||
|
yield format_sse_event(
|
||||||
|
"tool_update",
|
||||||
|
{"research_id": research_id, "status": "Starting web crawling and source analysis...", "progress": 10}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Der eigentliche langlaufende Prozess
|
||||||
|
await researcher.conduct_research() # Dieser Aufruf blockiert für die Dauer der Recherche
|
||||||
|
|
||||||
|
# Simuliertes Update: Recherche abgeschlossen, beginne mit der Verarbeitung
|
||||||
|
yield format_sse_event(
|
||||||
|
"tool_update",
|
||||||
|
{"research_id": research_id, "status": "Web crawling finished. Processing and consolidating information...", "progress": 70}
|
||||||
|
)
|
||||||
|
await asyncio.sleep(0.1)
|
||||||
|
|
||||||
|
logger.info(f"Core research completed for ID: {research_id}. Fetching results...")
|
||||||
|
|
||||||
# Get the research context and sources
|
# Get the research context and sources
|
||||||
context = researcher.get_research_context()
|
context = researcher.get_research_context()
|
||||||
sources = researcher.get_research_sources()
|
sources = researcher.get_research_sources() # Dies sind strukturierte Source-Objekte
|
||||||
source_urls = researcher.get_source_urls()
|
source_urls = researcher.get_source_urls() # Dies sind nur die URLs
|
||||||
|
|
||||||
# Store in the research store for the resource API
|
# Store in the research store for the resource API (optional, je nach Logik)
|
||||||
store_research_results(query, context, sources, source_urls)
|
# Überlege, ob formatted_context hier gebraucht wird, wenn der Client die Rohdaten bekommt
|
||||||
|
store_research_results(query, context, sources, source_urls, context) # Vereinfacht für den Store
|
||||||
|
|
||||||
return create_success_response({
|
# Finale Daten vorbereiten
|
||||||
|
final_data_payload = {
|
||||||
"research_id": research_id,
|
"research_id": research_id,
|
||||||
"query": query,
|
"query": query,
|
||||||
|
"status": "Research completed successfully",
|
||||||
"source_count": len(sources),
|
"source_count": len(sources),
|
||||||
"context": context,
|
"context": context,
|
||||||
"sources": format_sources_for_response(sources),
|
"sources": format_sources_for_response(sources), # Nutze deine Formatierungsfunktion
|
||||||
"source_urls": source_urls
|
"source_urls": source_urls
|
||||||
})
|
}
|
||||||
except Exception as e:
|
|
||||||
return handle_exception(e, "Research")
|
|
||||||
|
|
||||||
|
# Sende das finale Ergebnis als 'tool_result' Event
|
||||||
|
yield format_sse_event("tool_result", final_data_payload)
|
||||||
|
logger.info(f"Sent final research result for ID: {research_id}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error during deep_research for query '{query}': {e}", exc_info=True)
|
||||||
|
# Sende ein Fehler-Event an den Client
|
||||||
|
error_payload = {
|
||||||
|
"research_id": research_id, # Kann None sein, wenn Fehler sehr früh auftritt
|
||||||
|
"query": query,
|
||||||
|
"status": "Error occurred",
|
||||||
|
"error_message": str(e),
|
||||||
|
"error_details": "Check server logs for more information."
|
||||||
|
}
|
||||||
|
yield format_sse_event("tool_error", error_payload) # Eigener Event-Typ für Fehler
|
||||||
|
# Du könntest hier auch handle_exception verwenden, wenn es ein SSE-Event zurückgibt
|
||||||
|
# oder die Exception weiter werfen, wenn FastMCP das besser handhabt.
|
||||||
|
|
||||||
@mcp.tool()
|
@mcp.tool()
|
||||||
async def quick_search(query: str) -> Dict[str, Any]:
|
async def quick_search(query: str) -> Dict[str, Any]:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue