From b1ad64cd75b8b90ac0ab58ce47f3d4b807208568 Mon Sep 17 00:00:00 2001
From: ThomasTaroni <thomas.taroni@phoenix-technologies.ch>
Date: Sat, 31 May 2025 23:54:51 +0200
Subject: [PATCH] Introduce research resource API and improve research caching

Add a `research://{topic}` resource endpoint for direct access to research context, reducing redundant API calls. Introduced `research_store` for caching research results and modularized helper methods like `store_research_results` and `format_context_with_sources` for better reusability and clarity. Refactored existing researcher initialization for simplicity and improved comments to clarify intended usage.
---
 .../gpt_researcher/server.py                  | 70 ++++++++++++++-----
 .../gpt_researcher/utils.py                   | 45 +++++++++++-
 2 files changed, 98 insertions(+), 17 deletions(-)

diff --git a/src/phoenix_technologies/gpt_researcher/server.py b/src/phoenix_technologies/gpt_researcher/server.py
index a96ee6e..2fdcd25 100644
--- a/src/phoenix_technologies/gpt_researcher/server.py
+++ b/src/phoenix_technologies/gpt_researcher/server.py
@@ -18,10 +18,13 @@ from gpt_researcher import GPTResearcher
 load_dotenv()
 
 from utils import (
+    research_store,
     create_success_response,
     handle_exception,
     get_researcher_by_id,
     format_sources_for_response,
+    format_context_with_sources,
+    store_research_results,
     create_research_prompt
 )
 
@@ -33,30 +36,62 @@ logging.basicConfig(
 logger = logging.getLogger(__name__)
 
 # Initialize FastMCP server
-mcp = FastMCP("GPT Researcher", host="0.0.0.0", port=8000, timeout_keep_alive=720)
-research_type = os.getenv("RESEARCH_TYPE", "deep")
+mcp = FastMCP("GPT Researcher")
 
 # Initialize researchers dictionary
 if not hasattr(mcp, "researchers"):
     mcp.researchers = {}
 
 
-class CustomLogsHandler:
-    """A custom Logs handler class to handle JSON data."""
-    def __init__(self):
-        self.logs = []  # Initialize logs to store data
+@mcp.resource("research://{topic}")
+async def research_resource(topic: str) -> str:
+    """
+    Provide research context for a given topic directly as a resource.
 
-    async def send_json(self, data: Dict[str, Any]) -> None:
-        """Send JSON data and log it."""
-        self.logs.append(data)  # Append data to logs
-        print(f"MCP Log: {data}")  # For demonstration, print the log
+    This allows LLMs to access web-sourced information without explicit function calls.
+
+    Args:
+        topic: The research topic or query
+
+    Returns:
+        String containing the research context with source information
+    """
+    # Check if we've already researched this topic
+    if topic in research_store:
+        logger.info(f"Returning cached research for topic: {topic}")
+        return research_store[topic]["context"]
+
+    # If not, conduct the research
+    logger.info(f"Conducting new research for resource on topic: {topic}")
+
+    # Initialize GPT Researcher
+    researcher = GPTResearcher(topic)
+
+    try:
+        # Conduct the research
+        await researcher.conduct_research()
+
+        # Get the context and sources
+        context = researcher.get_research_context()
+        sources = researcher.get_research_sources()
+        source_urls = researcher.get_source_urls()
+
+        # Format with sources included
+        formatted_context = format_context_with_sources(topic, context, sources)
+
+        # Store for future use
+        store_research_results(topic, context, sources, source_urls, formatted_context)
+
+        return formatted_context
+    except Exception as e:
+        return f"Error conducting research on '{topic}': {str(e)}"
 
 
 @mcp.tool()
 async def deep_research(query: str) -> Dict[str, Any]:
     """
     Conduct a web deep research on a given query using GPT Researcher.
-    Use this tool when you need a deep research on a topic.
+    Use this tool when you need time-sensitive, real-time information like stock prices, news, people, specific knowledge, etc.
 
     Args:
         query: The research query or topic
@@ -69,10 +104,9 @@ async def deep_research(query: str) -> Dict[str, Any]:
 
     # Generate a unique ID for this research session
     research_id = str(uuid.uuid4())
-    custom_logs_handler = CustomLogsHandler()
 
     # Initialize GPT Researcher
-    researcher = GPTResearcher(query=query, report_type=research_type, websocket=custom_logs_handler)
+    researcher = GPTResearcher(query)
 
     # Start research
     try:
@@ -85,6 +119,9 @@ async def deep_research(query: str) -> Dict[str, Any]:
         sources = researcher.get_research_sources()
         source_urls = researcher.get_source_urls()
 
+        # Store in the research store for the resource API
+        store_research_results(query, context, sources, source_urls)
+
         return create_success_response({
             "research_id": research_id,
             "query": query,
@@ -101,7 +138,8 @@ async def deep_research(query: str) -> Dict[str, Any]:
 async def quick_search(query: str) -> Dict[str, Any]:
     """
     Perform a quick web search on a given query and return search results with snippets.
-    Use this tool when you need a quick research on a topic.
+    This optimizes for speed over quality and is useful when an LLM doesn't need in-depth
+    information on a topic.
 
     Args:
         query: The search query
@@ -113,9 +151,9 @@ async def quick_search(query: str) -> Dict[str, Any]:
 
     # Generate a unique ID for this search session
     search_id = str(uuid.uuid4())
-    custom_logs_handler = CustomLogsHandler()
+
     # Initialize GPT Researcher
-    researcher = GPTResearcher(query=query, report_type=research_type, websocket=custom_logs_handler)
+    researcher = GPTResearcher(query)
 
     try:
         # Perform quick search
diff --git a/src/phoenix_technologies/gpt_researcher/utils.py b/src/phoenix_technologies/gpt_researcher/utils.py
index 283d22d..8ad7d12 100644
--- a/src/phoenix_technologies/gpt_researcher/utils.py
+++ b/src/phoenix_technologies/gpt_researcher/utils.py
@@ -11,6 +11,8 @@ from loguru import logger
 # Configure logging for console only (no file logging)
 logger.configure(handlers=[{"sink": sys.stderr, "level": "INFO"}])
 
+# Research store to track ongoing research topics and contexts
+research_store = {}
 
 # API Response Utilities
 def create_error_response(message: str) -> Dict[str, Any]:
@@ -66,6 +68,44 @@ def format_sources_for_response(sources: List[Dict[str, Any]]) -> List[Dict[str,
     ]
 
 
+def format_context_with_sources(topic: str, context: str, sources: List[Dict[str, Any]]) -> str:
+    """
+    Format research context with sources for display.
+
+    Args:
+        topic: Research topic
+        context: Research context
+        sources: List of sources
+
+    Returns:
+        Formatted context string with sources
+    """
+    formatted_context = f"## Research: {topic}\n\n{context}\n\n"
+    formatted_context += "## Sources:\n"
+    for i, source in enumerate(sources):
+        formatted_context += f"{i+1}. {source.get('title', 'Unknown')}: {source.get('url', '')}\n"
+    return formatted_context
+
+
+def store_research_results(topic: str, context: str, sources: List[Dict[str, Any]],
+                           source_urls: List[str], formatted_context: Optional[str] = None):
+    """
+    Store research results in the research store.
+
+    Args:
+        topic: Research topic
+        context: Research context
+        sources: List of sources
+        source_urls: List of source URLs
+        formatted_context: Optional pre-formatted context
+    """
+    research_store[topic] = {
+        "context": formatted_context or context,
+        "sources": sources,
+        "source_urls": source_urls
+    }
+
+
 def create_research_prompt(topic: str, goal: str, report_format: str = "research_report") -> str:
     """
     Create a research query prompt for GPT Researcher.
@@ -85,7 +125,10 @@ def create_research_prompt(topic: str, goal: str, report_format: str = "research
     
     You have two methods to access web-sourced information:
     
-    Use the conduct_research tool to perform new research and get a research_id for later use.
+    1. Use the "research://{topic}" resource to directly access context about this topic if it exists
+       or if you want to get straight to the information without tracking a research ID.
+       
+    2. Use the conduct_research tool to perform new research and get a research_id for later use.
        This tool also returns the context directly in its response, which you can use immediately.
     
     After getting context, you can: