Introduce research resource API and improve research caching

Add a `research://{topic}` resource endpoint for direct access to research context, reducing redundant API calls. Introduced `research_store` for caching research results and modularized helper methods like `store_research_results` and `format_context_with_sources` for better reusability and clarity. Refactored existing researcher initialization for simplicity and improved comments to clarify intended usage.
This commit is contained in:
ThomasTaroni 2025-05-31 23:54:51 +02:00
parent ba48f44321
commit b1ad64cd75
2 changed files with 98 additions and 17 deletions

View file

@ -18,10 +18,13 @@ from gpt_researcher import GPTResearcher
load_dotenv() load_dotenv()
from utils import ( from utils import (
research_store,
create_success_response, create_success_response,
handle_exception, handle_exception,
get_researcher_by_id, get_researcher_by_id,
format_sources_for_response, format_sources_for_response,
format_context_with_sources,
store_research_results,
create_research_prompt create_research_prompt
) )
@ -33,30 +36,62 @@ logging.basicConfig(
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Initialize FastMCP server # Initialize FastMCP server
mcp = FastMCP("GPT Researcher", host="0.0.0.0", port=8000, timeout_keep_alive=720) mcp = FastMCP("GPT Researcher")
research_type = os.getenv("RESEARCH_TYPE", "deep")
# Initialize researchers dictionary # Initialize researchers dictionary
if not hasattr(mcp, "researchers"): if not hasattr(mcp, "researchers"):
mcp.researchers = {} mcp.researchers = {}
class CustomLogsHandler: @mcp.resource("research://{topic}")
"""A custom Logs handler class to handle JSON data.""" async def research_resource(topic: str) -> str:
def __init__(self): """
self.logs = [] # Initialize logs to store data Provide research context for a given topic directly as a resource.
async def send_json(self, data: Dict[str, Any]) -> None: This allows LLMs to access web-sourced information without explicit function calls.
"""Send JSON data and log it."""
self.logs.append(data) # Append data to logs Args:
print(f"MCP Log: {data}") # For demonstration, print the log topic: The research topic or query
Returns:
String containing the research context with source information
"""
# Check if we've already researched this topic
if topic in research_store:
logger.info(f"Returning cached research for topic: {topic}")
return research_store[topic]["context"]
# If not, conduct the research
logger.info(f"Conducting new research for resource on topic: {topic}")
# Initialize GPT Researcher
researcher = GPTResearcher(topic)
try:
# Conduct the research
await researcher.conduct_research()
# Get the context and sources
context = researcher.get_research_context()
sources = researcher.get_research_sources()
source_urls = researcher.get_source_urls()
# Format with sources included
formatted_context = format_context_with_sources(topic, context, sources)
# Store for future use
store_research_results(topic, context, sources, source_urls, formatted_context)
return formatted_context
except Exception as e:
return f"Error conducting research on '{topic}': {str(e)}"
@mcp.tool() @mcp.tool()
async def deep_research(query: str) -> Dict[str, Any]: async def deep_research(query: str) -> Dict[str, Any]:
""" """
Conduct a web deep research on a given query using GPT Researcher. Conduct a web deep research on a given query using GPT Researcher.
Use this tool when you need a deep research on a topic. Use this tool when you need time-sensitive, real-time information like stock prices, news, people, specific knowledge, etc.
Args: Args:
query: The research query or topic query: The research query or topic
@ -69,10 +104,9 @@ async def deep_research(query: str) -> Dict[str, Any]:
# Generate a unique ID for this research session # Generate a unique ID for this research session
research_id = str(uuid.uuid4()) research_id = str(uuid.uuid4())
custom_logs_handler = CustomLogsHandler()
# Initialize GPT Researcher # Initialize GPT Researcher
researcher = GPTResearcher(query=query, report_type=research_type, websocket=custom_logs_handler) researcher = GPTResearcher(query)
# Start research # Start research
try: try:
@ -85,6 +119,9 @@ async def deep_research(query: str) -> Dict[str, Any]:
sources = researcher.get_research_sources() sources = researcher.get_research_sources()
source_urls = researcher.get_source_urls() source_urls = researcher.get_source_urls()
# Store in the research store for the resource API
store_research_results(query, context, sources, source_urls)
return create_success_response({ return create_success_response({
"research_id": research_id, "research_id": research_id,
"query": query, "query": query,
@ -101,7 +138,8 @@ async def deep_research(query: str) -> Dict[str, Any]:
async def quick_search(query: str) -> Dict[str, Any]: async def quick_search(query: str) -> Dict[str, Any]:
""" """
Perform a quick web search on a given query and return search results with snippets. Perform a quick web search on a given query and return search results with snippets.
Use this tool when you need a quick research on a topic. This optimizes for speed over quality and is useful when an LLM doesn't need in-depth
information on a topic.
Args: Args:
query: The search query query: The search query
@ -113,9 +151,9 @@ async def quick_search(query: str) -> Dict[str, Any]:
# Generate a unique ID for this search session # Generate a unique ID for this search session
search_id = str(uuid.uuid4()) search_id = str(uuid.uuid4())
custom_logs_handler = CustomLogsHandler()
# Initialize GPT Researcher # Initialize GPT Researcher
researcher = GPTResearcher(query=query, report_type=research_type, websocket=custom_logs_handler) researcher = GPTResearcher(query)
try: try:
# Perform quick search # Perform quick search

View file

@ -11,6 +11,8 @@ from loguru import logger
# Configure logging for console only (no file logging) # Configure logging for console only (no file logging)
logger.configure(handlers=[{"sink": sys.stderr, "level": "INFO"}]) logger.configure(handlers=[{"sink": sys.stderr, "level": "INFO"}])
# Research store to track ongoing research topics and contexts
research_store = {}
# API Response Utilities # API Response Utilities
def create_error_response(message: str) -> Dict[str, Any]: def create_error_response(message: str) -> Dict[str, Any]:
@ -66,6 +68,44 @@ def format_sources_for_response(sources: List[Dict[str, Any]]) -> List[Dict[str,
] ]
def format_context_with_sources(topic: str, context: str, sources: List[Dict[str, Any]]) -> str:
"""
Format research context with sources for display.
Args:
topic: Research topic
context: Research context
sources: List of sources
Returns:
Formatted context string with sources
"""
formatted_context = f"## Research: {topic}\n\n{context}\n\n"
formatted_context += "## Sources:\n"
for i, source in enumerate(sources):
formatted_context += f"{i+1}. {source.get('title', 'Unknown')}: {source.get('url', '')}\n"
return formatted_context
def store_research_results(topic: str, context: str, sources: List[Dict[str, Any]],
source_urls: List[str], formatted_context: Optional[str] = None):
"""
Store research results in the research store.
Args:
topic: Research topic
context: Research context
sources: List of sources
source_urls: List of source URLs
formatted_context: Optional pre-formatted context
"""
research_store[topic] = {
"context": formatted_context or context,
"sources": sources,
"source_urls": source_urls
}
def create_research_prompt(topic: str, goal: str, report_format: str = "research_report") -> str: def create_research_prompt(topic: str, goal: str, report_format: str = "research_report") -> str:
""" """
Create a research query prompt for GPT Researcher. Create a research query prompt for GPT Researcher.
@ -85,7 +125,10 @@ def create_research_prompt(topic: str, goal: str, report_format: str = "research
You have two methods to access web-sourced information: You have two methods to access web-sourced information:
Use the conduct_research tool to perform new research and get a research_id for later use. 1. Use the "research://{topic}" resource to directly access context about this topic if it exists
or if you want to get straight to the information without tracking a research ID.
2. Use the conduct_research tool to perform new research and get a research_id for later use.
This tool also returns the context directly in its response, which you can use immediately. This tool also returns the context directly in its response, which you can use immediately.
After getting context, you can: After getting context, you can: