mcp-gpt-researcher/src/phoenix_technologies/gptresearch/deepresearch.py

from gpt_researcher import GPTResearcher


class ReportGenerator:
    """
    A class to handle the generation of research-based reports.

    This class integrates with GPTResearcher to conduct research, retrieve results,
    and format them into consumable chunks for asynchronous streaming.
    """

    def __init__(self, query: str, report_type: str):
        """
        Initializes the ReportGenerator instance with a query and report type.

        :param query: The main topic or question for research.
        :param report_type: The type of report to generate.
        """
        self.query = query
        self.report_type = report_type
        self.researcher = GPTResearcher(query, report_type)
        self._chunks = None  # Placeholder for report chunks
        self._index = 0  # Iterator index for streaming

    def __aiter__(self):
        """
        Makes the ReportGenerator instance asynchronously iterable.

        :return: Self instance for iteration.
        """
        return self

    async def __anext__(self):
        """
        Defines the logic for asynchronous iteration over report chunks.

        :return: The next chunk of the report.
        :raises StopAsyncIteration: Raised when all chunks are yielded.
        """
        if self._chunks is None:
            # Generate report chunks on first access
            self._chunks = await self._generate_report_chunks()

        if self._index >= len(self._chunks):
            # End iteration once all chunks are exhausted
            raise StopAsyncIteration

        chunk = self._chunks[self._index]
        self._index += 1
        return chunk

    async def _generate_report_chunks(self):
        """
        Conducts research using the researcher and generates the report in chunks.

        :return: A list of report chunks.
        """
        try:
            # Asynchronous research and report generation
            research_result = await self.researcher.conduct_research()
            report = await self.researcher.write_report()

            # Retrieve additional research details
            research_context = self.researcher.get_research_context() or {}
            research_costs = self.researcher.get_costs() or {}
            research_images = self.researcher.get_research_images() or []
            research_sources = self.researcher.get_research_sources() or []

            # Construct the complete research response
            full_report = {
                "report": report,
                "context": research_context,
                "costs": research_costs,
                "images": research_images,
                "sources": research_sources,
            }

            # Generate chunks for streaming
            return self._split_into_chunks(full_report)

        except Exception as e:
            # Handle potential errors during research and report generation
            raise RuntimeError(f"Error generating report chunks: {e}")

    def _split_into_chunks(self, report):
        """
        Splits the report dictionary into smaller chunks for easier streaming.

        :param report: A dictionary containing the full report data.
        :return: A list of formatted text chunks.
        """
        if not report:
            raise ValueError("Cannot split an empty or None report into chunks.")

        chunks = []
        for key, value in report.items():
            chunks.append(f"{key}: {value}")
        return chunks

    def get_query_details(self):
        """
        Retrieves the details of the query and report type.

        :return: A dictionary containing the query and report type.
        """
        return {
            "query": self.query,
            "report_type": self.report_type
        }