Introduce research resource API and improve research caching
Add a `research://{topic}` resource endpoint for direct access to research context, reducing redundant API calls. Introduced `research_store` for caching research results and modularized helper methods like `store_research_results` and `format_context_with_sources` for better reusability and clarity. Refactored existing researcher initialization for simplicity and improved comments to clarify intended usage.
This commit is contained in:
parent
ba48f44321
commit
b1ad64cd75
2 changed files with 98 additions and 17 deletions
|
@ -18,10 +18,13 @@ from gpt_researcher import GPTResearcher
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
from utils import (
|
from utils import (
|
||||||
|
research_store,
|
||||||
create_success_response,
|
create_success_response,
|
||||||
handle_exception,
|
handle_exception,
|
||||||
get_researcher_by_id,
|
get_researcher_by_id,
|
||||||
format_sources_for_response,
|
format_sources_for_response,
|
||||||
|
format_context_with_sources,
|
||||||
|
store_research_results,
|
||||||
create_research_prompt
|
create_research_prompt
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -33,30 +36,62 @@ logging.basicConfig(
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Initialize FastMCP server
|
# Initialize FastMCP server
|
||||||
mcp = FastMCP("GPT Researcher", host="0.0.0.0", port=8000, timeout_keep_alive=720)
|
mcp = FastMCP("GPT Researcher")
|
||||||
research_type = os.getenv("RESEARCH_TYPE", "deep")
|
|
||||||
|
|
||||||
# Initialize researchers dictionary
|
# Initialize researchers dictionary
|
||||||
if not hasattr(mcp, "researchers"):
|
if not hasattr(mcp, "researchers"):
|
||||||
mcp.researchers = {}
|
mcp.researchers = {}
|
||||||
|
|
||||||
|
|
||||||
class CustomLogsHandler:
|
@mcp.resource("research://{topic}")
|
||||||
"""A custom Logs handler class to handle JSON data."""
|
async def research_resource(topic: str) -> str:
|
||||||
def __init__(self):
|
"""
|
||||||
self.logs = [] # Initialize logs to store data
|
Provide research context for a given topic directly as a resource.
|
||||||
|
|
||||||
async def send_json(self, data: Dict[str, Any]) -> None:
|
This allows LLMs to access web-sourced information without explicit function calls.
|
||||||
"""Send JSON data and log it."""
|
|
||||||
self.logs.append(data) # Append data to logs
|
Args:
|
||||||
print(f"MCP Log: {data}") # For demonstration, print the log
|
topic: The research topic or query
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
String containing the research context with source information
|
||||||
|
"""
|
||||||
|
# Check if we've already researched this topic
|
||||||
|
if topic in research_store:
|
||||||
|
logger.info(f"Returning cached research for topic: {topic}")
|
||||||
|
return research_store[topic]["context"]
|
||||||
|
|
||||||
|
# If not, conduct the research
|
||||||
|
logger.info(f"Conducting new research for resource on topic: {topic}")
|
||||||
|
|
||||||
|
# Initialize GPT Researcher
|
||||||
|
researcher = GPTResearcher(topic)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Conduct the research
|
||||||
|
await researcher.conduct_research()
|
||||||
|
|
||||||
|
# Get the context and sources
|
||||||
|
context = researcher.get_research_context()
|
||||||
|
sources = researcher.get_research_sources()
|
||||||
|
source_urls = researcher.get_source_urls()
|
||||||
|
|
||||||
|
# Format with sources included
|
||||||
|
formatted_context = format_context_with_sources(topic, context, sources)
|
||||||
|
|
||||||
|
# Store for future use
|
||||||
|
store_research_results(topic, context, sources, source_urls, formatted_context)
|
||||||
|
|
||||||
|
return formatted_context
|
||||||
|
except Exception as e:
|
||||||
|
return f"Error conducting research on '{topic}': {str(e)}"
|
||||||
|
|
||||||
|
|
||||||
@mcp.tool()
|
@mcp.tool()
|
||||||
async def deep_research(query: str) -> Dict[str, Any]:
|
async def deep_research(query: str) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Conduct a web deep research on a given query using GPT Researcher.
|
Conduct a web deep research on a given query using GPT Researcher.
|
||||||
Use this tool when you need a deep research on a topic.
|
Use this tool when you need time-sensitive, real-time information like stock prices, news, people, specific knowledge, etc.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
query: The research query or topic
|
query: The research query or topic
|
||||||
|
@ -69,10 +104,9 @@ async def deep_research(query: str) -> Dict[str, Any]:
|
||||||
|
|
||||||
# Generate a unique ID for this research session
|
# Generate a unique ID for this research session
|
||||||
research_id = str(uuid.uuid4())
|
research_id = str(uuid.uuid4())
|
||||||
custom_logs_handler = CustomLogsHandler()
|
|
||||||
|
|
||||||
# Initialize GPT Researcher
|
# Initialize GPT Researcher
|
||||||
researcher = GPTResearcher(query=query, report_type=research_type, websocket=custom_logs_handler)
|
researcher = GPTResearcher(query)
|
||||||
|
|
||||||
# Start research
|
# Start research
|
||||||
try:
|
try:
|
||||||
|
@ -85,6 +119,9 @@ async def deep_research(query: str) -> Dict[str, Any]:
|
||||||
sources = researcher.get_research_sources()
|
sources = researcher.get_research_sources()
|
||||||
source_urls = researcher.get_source_urls()
|
source_urls = researcher.get_source_urls()
|
||||||
|
|
||||||
|
# Store in the research store for the resource API
|
||||||
|
store_research_results(query, context, sources, source_urls)
|
||||||
|
|
||||||
return create_success_response({
|
return create_success_response({
|
||||||
"research_id": research_id,
|
"research_id": research_id,
|
||||||
"query": query,
|
"query": query,
|
||||||
|
@ -101,7 +138,8 @@ async def deep_research(query: str) -> Dict[str, Any]:
|
||||||
async def quick_search(query: str) -> Dict[str, Any]:
|
async def quick_search(query: str) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Perform a quick web search on a given query and return search results with snippets.
|
Perform a quick web search on a given query and return search results with snippets.
|
||||||
Use this tool when you need a quick research on a topic.
|
This optimizes for speed over quality and is useful when an LLM doesn't need in-depth
|
||||||
|
information on a topic.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
query: The search query
|
query: The search query
|
||||||
|
@ -113,9 +151,9 @@ async def quick_search(query: str) -> Dict[str, Any]:
|
||||||
|
|
||||||
# Generate a unique ID for this search session
|
# Generate a unique ID for this search session
|
||||||
search_id = str(uuid.uuid4())
|
search_id = str(uuid.uuid4())
|
||||||
custom_logs_handler = CustomLogsHandler()
|
|
||||||
# Initialize GPT Researcher
|
# Initialize GPT Researcher
|
||||||
researcher = GPTResearcher(query=query, report_type=research_type, websocket=custom_logs_handler)
|
researcher = GPTResearcher(query)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Perform quick search
|
# Perform quick search
|
||||||
|
|
|
@ -11,6 +11,8 @@ from loguru import logger
|
||||||
# Configure logging for console only (no file logging)
|
# Configure logging for console only (no file logging)
|
||||||
logger.configure(handlers=[{"sink": sys.stderr, "level": "INFO"}])
|
logger.configure(handlers=[{"sink": sys.stderr, "level": "INFO"}])
|
||||||
|
|
||||||
|
# Research store to track ongoing research topics and contexts
|
||||||
|
research_store = {}
|
||||||
|
|
||||||
# API Response Utilities
|
# API Response Utilities
|
||||||
def create_error_response(message: str) -> Dict[str, Any]:
|
def create_error_response(message: str) -> Dict[str, Any]:
|
||||||
|
@ -66,6 +68,44 @@ def format_sources_for_response(sources: List[Dict[str, Any]]) -> List[Dict[str,
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def format_context_with_sources(topic: str, context: str, sources: List[Dict[str, Any]]) -> str:
|
||||||
|
"""
|
||||||
|
Format research context with sources for display.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
topic: Research topic
|
||||||
|
context: Research context
|
||||||
|
sources: List of sources
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Formatted context string with sources
|
||||||
|
"""
|
||||||
|
formatted_context = f"## Research: {topic}\n\n{context}\n\n"
|
||||||
|
formatted_context += "## Sources:\n"
|
||||||
|
for i, source in enumerate(sources):
|
||||||
|
formatted_context += f"{i+1}. {source.get('title', 'Unknown')}: {source.get('url', '')}\n"
|
||||||
|
return formatted_context
|
||||||
|
|
||||||
|
|
||||||
|
def store_research_results(topic: str, context: str, sources: List[Dict[str, Any]],
|
||||||
|
source_urls: List[str], formatted_context: Optional[str] = None):
|
||||||
|
"""
|
||||||
|
Store research results in the research store.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
topic: Research topic
|
||||||
|
context: Research context
|
||||||
|
sources: List of sources
|
||||||
|
source_urls: List of source URLs
|
||||||
|
formatted_context: Optional pre-formatted context
|
||||||
|
"""
|
||||||
|
research_store[topic] = {
|
||||||
|
"context": formatted_context or context,
|
||||||
|
"sources": sources,
|
||||||
|
"source_urls": source_urls
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def create_research_prompt(topic: str, goal: str, report_format: str = "research_report") -> str:
|
def create_research_prompt(topic: str, goal: str, report_format: str = "research_report") -> str:
|
||||||
"""
|
"""
|
||||||
Create a research query prompt for GPT Researcher.
|
Create a research query prompt for GPT Researcher.
|
||||||
|
@ -85,7 +125,10 @@ def create_research_prompt(topic: str, goal: str, report_format: str = "research
|
||||||
|
|
||||||
You have two methods to access web-sourced information:
|
You have two methods to access web-sourced information:
|
||||||
|
|
||||||
Use the conduct_research tool to perform new research and get a research_id for later use.
|
1. Use the "research://{topic}" resource to directly access context about this topic if it exists
|
||||||
|
or if you want to get straight to the information without tracking a research ID.
|
||||||
|
|
||||||
|
2. Use the conduct_research tool to perform new research and get a research_id for later use.
|
||||||
This tool also returns the context directly in its response, which you can use immediately.
|
This tool also returns the context directly in its response, which you can use immediately.
|
||||||
|
|
||||||
After getting context, you can:
|
After getting context, you can:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue