From b1ad64cd75b8b90ac0ab58ce47f3d4b807208568 Mon Sep 17 00:00:00 2001 From: ThomasTaroni Date: Sat, 31 May 2025 23:54:51 +0200 Subject: [PATCH] Introduce research resource API and improve research caching Add a `research://{topic}` resource endpoint for direct access to research context, reducing redundant API calls. Introduced `research_store` for caching research results and modularized helper methods like `store_research_results` and `format_context_with_sources` for better reusability and clarity. Refactored existing researcher initialization for simplicity and improved comments to clarify intended usage. --- .../gpt_researcher/server.py | 70 ++++++++++++++----- .../gpt_researcher/utils.py | 45 +++++++++++- 2 files changed, 98 insertions(+), 17 deletions(-) diff --git a/src/phoenix_technologies/gpt_researcher/server.py b/src/phoenix_technologies/gpt_researcher/server.py index a96ee6e..2fdcd25 100644 --- a/src/phoenix_technologies/gpt_researcher/server.py +++ b/src/phoenix_technologies/gpt_researcher/server.py @@ -18,10 +18,13 @@ from gpt_researcher import GPTResearcher load_dotenv() from utils import ( + research_store, create_success_response, handle_exception, get_researcher_by_id, format_sources_for_response, + format_context_with_sources, + store_research_results, create_research_prompt ) @@ -33,30 +36,62 @@ logging.basicConfig( logger = logging.getLogger(__name__) # Initialize FastMCP server -mcp = FastMCP("GPT Researcher", host="0.0.0.0", port=8000, timeout_keep_alive=720) -research_type = os.getenv("RESEARCH_TYPE", "deep") +mcp = FastMCP("GPT Researcher") # Initialize researchers dictionary if not hasattr(mcp, "researchers"): mcp.researchers = {} -class CustomLogsHandler: - """A custom Logs handler class to handle JSON data.""" - def __init__(self): - self.logs = [] # Initialize logs to store data +@mcp.resource("research://{topic}") +async def research_resource(topic: str) -> str: + """ + Provide research context for a given topic directly as a resource. - async def send_json(self, data: Dict[str, Any]) -> None: - """Send JSON data and log it.""" - self.logs.append(data) # Append data to logs - print(f"MCP Log: {data}") # For demonstration, print the log + This allows LLMs to access web-sourced information without explicit function calls. + + Args: + topic: The research topic or query + + Returns: + String containing the research context with source information + """ + # Check if we've already researched this topic + if topic in research_store: + logger.info(f"Returning cached research for topic: {topic}") + return research_store[topic]["context"] + + # If not, conduct the research + logger.info(f"Conducting new research for resource on topic: {topic}") + + # Initialize GPT Researcher + researcher = GPTResearcher(topic) + + try: + # Conduct the research + await researcher.conduct_research() + + # Get the context and sources + context = researcher.get_research_context() + sources = researcher.get_research_sources() + source_urls = researcher.get_source_urls() + + # Format with sources included + formatted_context = format_context_with_sources(topic, context, sources) + + # Store for future use + store_research_results(topic, context, sources, source_urls, formatted_context) + + return formatted_context + except Exception as e: + return f"Error conducting research on '{topic}': {str(e)}" @mcp.tool() async def deep_research(query: str) -> Dict[str, Any]: """ Conduct a web deep research on a given query using GPT Researcher. - Use this tool when you need a deep research on a topic. + Use this tool when you need time-sensitive, real-time information like stock prices, news, people, specific knowledge, etc. Args: query: The research query or topic @@ -69,10 +104,9 @@ async def deep_research(query: str) -> Dict[str, Any]: # Generate a unique ID for this research session research_id = str(uuid.uuid4()) - custom_logs_handler = CustomLogsHandler() # Initialize GPT Researcher - researcher = GPTResearcher(query=query, report_type=research_type, websocket=custom_logs_handler) + researcher = GPTResearcher(query) # Start research try: @@ -85,6 +119,9 @@ async def deep_research(query: str) -> Dict[str, Any]: sources = researcher.get_research_sources() source_urls = researcher.get_source_urls() + # Store in the research store for the resource API + store_research_results(query, context, sources, source_urls) + return create_success_response({ "research_id": research_id, "query": query, @@ -101,7 +138,8 @@ async def deep_research(query: str) -> Dict[str, Any]: async def quick_search(query: str) -> Dict[str, Any]: """ Perform a quick web search on a given query and return search results with snippets. - Use this tool when you need a quick research on a topic. + This optimizes for speed over quality and is useful when an LLM doesn't need in-depth + information on a topic. Args: query: The search query @@ -113,9 +151,9 @@ async def quick_search(query: str) -> Dict[str, Any]: # Generate a unique ID for this search session search_id = str(uuid.uuid4()) - custom_logs_handler = CustomLogsHandler() + # Initialize GPT Researcher - researcher = GPTResearcher(query=query, report_type=research_type, websocket=custom_logs_handler) + researcher = GPTResearcher(query) try: # Perform quick search diff --git a/src/phoenix_technologies/gpt_researcher/utils.py b/src/phoenix_technologies/gpt_researcher/utils.py index 283d22d..8ad7d12 100644 --- a/src/phoenix_technologies/gpt_researcher/utils.py +++ b/src/phoenix_technologies/gpt_researcher/utils.py @@ -11,6 +11,8 @@ from loguru import logger # Configure logging for console only (no file logging) logger.configure(handlers=[{"sink": sys.stderr, "level": "INFO"}]) +# Research store to track ongoing research topics and contexts +research_store = {} # API Response Utilities def create_error_response(message: str) -> Dict[str, Any]: @@ -66,6 +68,44 @@ def format_sources_for_response(sources: List[Dict[str, Any]]) -> List[Dict[str, ] +def format_context_with_sources(topic: str, context: str, sources: List[Dict[str, Any]]) -> str: + """ + Format research context with sources for display. + + Args: + topic: Research topic + context: Research context + sources: List of sources + + Returns: + Formatted context string with sources + """ + formatted_context = f"## Research: {topic}\n\n{context}\n\n" + formatted_context += "## Sources:\n" + for i, source in enumerate(sources): + formatted_context += f"{i+1}. {source.get('title', 'Unknown')}: {source.get('url', '')}\n" + return formatted_context + + +def store_research_results(topic: str, context: str, sources: List[Dict[str, Any]], + source_urls: List[str], formatted_context: Optional[str] = None): + """ + Store research results in the research store. + + Args: + topic: Research topic + context: Research context + sources: List of sources + source_urls: List of source URLs + formatted_context: Optional pre-formatted context + """ + research_store[topic] = { + "context": formatted_context or context, + "sources": sources, + "source_urls": source_urls + } + + def create_research_prompt(topic: str, goal: str, report_format: str = "research_report") -> str: """ Create a research query prompt for GPT Researcher. @@ -85,7 +125,10 @@ def create_research_prompt(topic: str, goal: str, report_format: str = "research You have two methods to access web-sourced information: - Use the conduct_research tool to perform new research and get a research_id for later use. + 1. Use the "research://{topic}" resource to directly access context about this topic if it exists + or if you want to get straight to the information without tracking a research ID. + + 2. Use the conduct_research tool to perform new research and get a research_id for later use. This tool also returns the context directly in its response, which you can use immediately. After getting context, you can: