Refactor and improve ReportGenerator for clarity and robustness

Enhanced docstrings for better clarity and consistency across methods. Added error handling for report generation and ensured default values for research details to avoid null errors. Improved comments and logic flow for readability and maintainability.
This commit is contained in:
ThomasTaroni 2025-04-25 10:03:21 +02:00
parent 9f87253082
commit 6993a52d47

View file

@ -2,54 +2,71 @@ from gpt_researcher import GPTResearcher
class ReportGenerator:
"""
A class to handle the generation of research-based reports.
This class integrates with GPTResearcher to conduct research, retrieve results,
and format them into consumable chunks for asynchronous streaming.
"""
def __init__(self, query: str, report_type: str):
"""
Initializes the ReportGenerator with a query and report type.
Initializes the ReportGenerator instance with a query and report type.
:param query: The main topic or question for research.
:param report_type: The type of report to generate.
"""
self.query = query
self.report_type = report_type
self.researcher = GPTResearcher(query, report_type)
self._chunks = None # Placeholder for report chunks
self._index = 0 # Index for iteration
self._index = 0 # Iterator index for streaming
def __aiter__(self):
"""
Make this class asynchronously iterable.
Makes the ReportGenerator instance asynchronously iterable.
:return: Self instance for iteration.
"""
return self
async def __anext__(self):
"""
Defines the asynchronous iteration logic.
Defines the logic for asynchronous iteration over report chunks.
:return: The next chunk of the report.
:raises StopAsyncIteration: Raised when all chunks are yielded.
"""
if self._chunks is None:
# If chunks are not generated yet, generate the report
# Generate report chunks on first access
self._chunks = await self._generate_report_chunks()
if self._index >= len(self._chunks):
# Stop iteration when all chunks are yielded
# End iteration once all chunks are exhausted
raise StopAsyncIteration
# Return the next chunk and increment the index
chunk = self._chunks[self._index]
self._index += 1
return chunk
async def _generate_report_chunks(self):
"""
Conducts research and generates the report in chunks.
Conducts research using the researcher and generates the report in chunks.
:return: A list of report chunks.
"""
# Conduct research
try:
# Asynchronous research and report generation
research_result = await self.researcher.conduct_research()
report = await self.researcher.write_report()
# Retrieve additional information
research_context = self.researcher.get_research_context()
research_costs = self.researcher.get_costs()
research_images = self.researcher.get_research_images()
research_sources = self.researcher.get_research_sources()
# Retrieve additional research details
research_context = self.researcher.get_research_context() or {}
research_costs = self.researcher.get_costs() or {}
research_images = self.researcher.get_research_images() or []
research_sources = self.researcher.get_research_sources() or []
# Construct the full response
# Construct the complete research response
full_report = {
"report": report,
"context": research_context,
@ -58,23 +75,33 @@ class ReportGenerator:
"sources": research_sources,
}
# Split the report into smaller chunks for streaming
# Generate chunks for streaming
return self._split_into_chunks(full_report)
except Exception as e:
# Handle potential errors during research and report generation
raise RuntimeError(f"Error generating report chunks: {e}")
def _split_into_chunks(self, report):
"""
Splits a report dictionary into smaller chunks for streaming.
Splits the report dictionary into smaller chunks for easier streaming.
:param report: A dictionary containing the full report data.
:return: A list of formatted text chunks.
"""
# Convert the report dictionary into a list of key-value pairs,
# where each pair represents a chunk.
if not report:
raise ValueError("Cannot split an empty or None report into chunks.")
chunks = []
for key, value in report.items():
chunks.append(f"{key}: {value}")
return chunks # Return the list of chunks
return chunks
def get_query_details(self):
"""
Returns details of the query and report type.
Retrieves the details of the query and report type.
:return: A dictionary containing the query and report type.
"""
return {
"query": self.query,