Refactor and improve ReportGenerator for clarity and robustness

Enhanced docstrings for better clarity and consistency across methods. Added error handling for report generation and ensured default values for research details to avoid null errors. Improved comments and logic flow for readability and maintainability.
2025-04-25 10:03:21 +02:00 · 2025-04-25 10:03:21 +02:00 · 6993a52d47
commit 6993a52d47
parent 9f87253082
1 changed files with 58 additions and 31 deletions
--- a/src/phoenix_technologies/gptresearch/deepresearch.py
+++ b/src/phoenix_technologies/gptresearch/deepresearch.py
@ -2,54 +2,71 @@ from gpt_researcher import GPTResearcher


 class ReportGenerator:
+    """
+    A class to handle the generation of research-based reports.
+
+    This class integrates with GPTResearcher to conduct research, retrieve results,
+    and format them into consumable chunks for asynchronous streaming.
+    """
+
    def __init__(self, query: str, report_type: str):
        """
-        Initializes the ReportGenerator with a query and report type.
+        Initializes the ReportGenerator instance with a query and report type.
+
+        :param query: The main topic or question for research.
+        :param report_type: The type of report to generate.
        """
        self.query = query
        self.report_type = report_type
        self.researcher = GPTResearcher(query, report_type)
        self._chunks = None  # Placeholder for report chunks
-        self._index = 0  # Index for iteration
+        self._index = 0  # Iterator index for streaming

    def __aiter__(self):
        """
-        Make this class asynchronously iterable.
+        Makes the ReportGenerator instance asynchronously iterable.
+
+        :return: Self instance for iteration.
        """
        return self

    async def __anext__(self):
        """
-        Defines the asynchronous iteration logic.
+        Defines the logic for asynchronous iteration over report chunks.
+
+        :return: The next chunk of the report.
+        :raises StopAsyncIteration: Raised when all chunks are yielded.
        """
        if self._chunks is None:
-            # If chunks are not generated yet, generate the report
+            # Generate report chunks on first access
            self._chunks = await self._generate_report_chunks()

        if self._index >= len(self._chunks):
-            # Stop iteration when all chunks are yielded
+            # End iteration once all chunks are exhausted
            raise StopAsyncIteration

-        # Return the next chunk and increment the index
        chunk = self._chunks[self._index]
        self._index += 1
        return chunk

    async def _generate_report_chunks(self):
        """
-        Conducts research and generates the report in chunks.
+        Conducts research using the researcher and generates the report in chunks.
+
+        :return: A list of report chunks.
        """
-        # Conduct research
+        try:
+            # Asynchronous research and report generation
            research_result = await self.researcher.conduct_research()
            report = await self.researcher.write_report()

-        # Retrieve additional information
-        research_context = self.researcher.get_research_context()
-        research_costs = self.researcher.get_costs()
-        research_images = self.researcher.get_research_images()
-        research_sources = self.researcher.get_research_sources()
+            # Retrieve additional research details
+            research_context = self.researcher.get_research_context() or {}
+            research_costs = self.researcher.get_costs() or {}
+            research_images = self.researcher.get_research_images() or []
+            research_sources = self.researcher.get_research_sources() or []

-        # Construct the full response
+            # Construct the complete research response
            full_report = {
                "report": report,
                "context": research_context,
@ -58,23 +75,33 @@ class ReportGenerator:
                "sources": research_sources,
            }

-        # Split the report into smaller chunks for streaming
+            # Generate chunks for streaming
            return self._split_into_chunks(full_report)

+        except Exception as e:
+            # Handle potential errors during research and report generation
+            raise RuntimeError(f"Error generating report chunks: {e}")
+
    def _split_into_chunks(self, report):
        """
-        Splits a report dictionary into smaller chunks for streaming.
+        Splits the report dictionary into smaller chunks for easier streaming.
+
+        :param report: A dictionary containing the full report data.
+        :return: A list of formatted text chunks.
        """
-        # Convert the report dictionary into a list of key-value pairs,
-        # where each pair represents a chunk.
+        if not report:
+            raise ValueError("Cannot split an empty or None report into chunks.")
+
        chunks = []
        for key, value in report.items():
            chunks.append(f"{key}: {value}")
-        return chunks  # Return the list of chunks
+        return chunks

    def get_query_details(self):
        """
-        Returns details of the query and report type.
+        Retrieves the details of the query and report type.
+
+        :return: A dictionary containing the query and report type.
        """
        return {
            "query": self.query,