From 3d0d2b277020c0490e25a095b5bc0c32d28802a2 Mon Sep 17 00:00:00 2001
From: ThomasTaroni <thomas.taroni@phoenix-technologies.ch>
Date: Fri, 25 Apr 2025 08:34:18 +0200
Subject: [PATCH] Add FastAPI app for report generation with Docker support

Implement a modular FastAPI-based service for generating research reports using `GPTResearcher`. Includes secure API key authentication, a streaming response endpoint, and a Dockerized deployment setup. Also adds documentation, core dependencies, and project structure.
---
 Dockerfile                                    |  25 +++
 README.md                                     | 166 ++++++++++++++++++
 requirements.txt                              |   5 +
 src/__init__.py                               |   0
 src/main.py                                   |  47 +++++
 src/phoenix_technologies/__init__.py          |   4 +
 .../gptresearch/__init__.py                   |   0
 .../gptresearch/deepresearch.py               |  42 +++++
 8 files changed, 289 insertions(+)
 create mode 100644 Dockerfile
 create mode 100644 README.md
 create mode 100644 requirements.txt
 create mode 100644 src/__init__.py
 create mode 100644 src/main.py
 create mode 100644 src/phoenix_technologies/__init__.py
 create mode 100644 src/phoenix_technologies/gptresearch/__init__.py
 create mode 100644 src/phoenix_technologies/gptresearch/deepresearch.py

diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..51f00ad
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,25 @@
+# Use the official Python image as a base
+FROM python:3.13-slim
+
+# Set environment variable for Python
+ENV PYTHONDONTWRITEBYTECODE 1
+ENV PYTHONUNBUFFERED 1
+
+# Set working directory within the container
+WORKDIR /app
+
+# Copy the requirements file if you have it (or define dependencies manually)
+# If you don't have a requirements.txt, let me know and I'll guide you further
+COPY requirements.txt .
+
+# Install dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy the rest of the project files into the container
+COPY src/ /app/
+
+# Expose the port that FastAPI will run on
+EXPOSE 8000
+
+# Set the default command to run the app with `uvicorn`
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..e59265c
--- /dev/null
+++ b/README.md
@@ -0,0 +1,166 @@
+# README for FastAPI-Based Report GPT Generation Service
+
+## Overview
+
+This repository contains the implementation of a **FastAPI**-based service designed to generate research reports. The service processes user-provided queries and report types, performing advanced research powered by `GPTResearcher` and responding with comprehensive results, including details, cost, context, images, and other associated metadata.
+
+## Features
+
+- **RESTful API** to handle user queries and generate reports.
+- **Streaming responses** to deliver research output in chunks.
+- **Secure API access** with API Key authentication.
+- Completely containerized setup with Docker.
+- Built with modular design for easier scalability and maintenance.
+
+---
+
+## System Architecture
+
+### Core Components
+
+1. **FastAPI App (`main.py`)**:
+    - Hosts the API endpoints.
+    - Handles API Key authentication for secure use.
+    - Accepts user inputs (query and report type) and generates a chunked streaming response.
+
+2. **Research Logic (`deepresearch.py`)**:
+    - Encapsulates research and report generation.
+    - Utilizes `GPTResearcher` to conduct research, generate reports, and retrieve extended data like images, contexts, or costs.
+
+3. **Docker Integration**:
+    - The application is containerized with a well-defined `Dockerfile`.
+    - Includes dependency installation, environment setup, and FastAPI server configuration for rapid deployment.
+
+---
+
+## Prerequisites
+
+Before running the application, ensure the following are installed on your system:
+
+- **Docker**: Version 24.0+
+- **Python**: Version 3.13+
+- **pip**: Pre-installed Python package manager.
+
+---
+
+## Running the Application Locally
+
+### Cloning the Repository
+
+Clone the repository to a directory of your choice:
+
+```shell script
+git clone https://git.kvant.cloud/phoenix/gpt-researcher.git
+cd gpt-researcher
+```
+
+### Environment Variable Configuration
+
+Create a `.env` file in the root of the project and define:
+
+```
+API_KEY=your_api_key # Replace "your_api_key" with your desired key
+```
+
+### Installing Dependencies
+
+Install the required Python modules based on the generated `requirements.txt`.
+
+```shell script
+pip install --no-cache-dir -r requirements.txt
+```
+
+### Running the App
+
+Run the FastAPI app locally:
+
+```shell script
+uvicorn main:app --host 0.0.0.0 --port 8000
+```
+
+After running, your app will be available at `http://127.0.0.1:8000`.
+
+---
+
+## Using Docker for Deployment
+
+### Building the Docker Image
+
+Build the Docker image using the **Dockerfile** provided:
+
+```shell script
+docker build -t fastapi-report-service .
+```
+
+### Running the Docker Container
+
+Spin up a container and map FastAPI's default port, `8000`:
+
+```shell script
+docker run --env-file .env -p 8000:8000 fastapi-report-service
+```
+
+---
+
+## API Usage
+
+### 1. **`/get_report`**
+
+- **Method**: `POST`
+- **Description**: Generates a report based on user input.
+- **Headers**:
+    - `X-API-KEY`: API Key for authentication.
+- **Request Body** (`JSON`):
+
+```json
+{
+  "query": "Research on AI in healthcare",
+  "report_type": "research_report|resource_report|outline_report|custom_report|detailed_report|subtopic_report|deep"
+}
+```
+
+- **Streaming Response**: Research and report are provided in chunks.
+
+---
+
+## Code Structure
+
+```
+├── Dockerfile                   # Configuration for Dockerizing the application
+├── requirements.txt             # Python dependencies list
+├── main.py                      # FastAPI server entry point
+├── deepresearch.py              # Research-related logic and GPTResearcher integration
+└── src/                         # Other project files and assets
+```
+
+---
+
+## Features Under the Hood
+
+1. **Authentication**:
+    - An API key mechanism ensures that only authorized users can access endpoints.
+
+2. **Streaming Response**:
+    - Large research reports are sent incrementally using `StreamingResponse` for better experience and efficiency.
+
+3. **Modular Research Logic**:
+    - Research and generation tasks are handled by a dedicated class (`ReportGenerator`), making the application extensible.
+
+---
+
+## Future Enhancements
+
+- **Asynchronous Enhancements**:
+    - Improve async handling for long-running queries.
+
+- **Database Integration**:
+    - Save request history for auditing and reference purposes.
+
+- **Web Interface**:
+    - A user-friendly web application for interacting with the API.
+
+---
+
+## Contributing
+
+Contributions are welcome! Feel free to fork the repository, make updates, and submit a pull request.
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..06e6b07
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+fastapi
+uvicorn
+pydantic
+gpt-researcher
+asyncio
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/main.py b/src/main.py
new file mode 100644
index 0000000..95db1a8
--- /dev/null
+++ b/src/main.py
@@ -0,0 +1,47 @@
+from fastapi import  FastAPI, HTTPException, Request, Depends
+from pydantic import BaseModel
+from phoenix_technologies import ReportGenerator
+from fastapi.responses import StreamingResponse
+import os
+import asyncio
+
+# FastAPI app instance
+app = FastAPI()
+
+# Define a request body structure using Pydantic
+class ReportRequest(BaseModel):
+    query: str
+    report_type: str
+
+# Define a dependency to validate the API Key
+def verify_api_key(request: Request):
+    # Define the API key from the environment variables
+    expected_api_key = os.getenv("API_KEY", None)
+    if not expected_api_key:
+        raise HTTPException(status_code=500, detail="API key is not configured on the server.")
+
+    # Get the API key from the request headers
+    provided_api_key = request.headers.get("X-API-KEY", None)
+
+    # Check if the API key is correct
+    if not provided_api_key or provided_api_key != expected_api_key:
+        raise HTTPException(status_code=403, detail="Invalid or missing API key.")
+
+@app.post("/get_report", dependencies=[Depends(verify_api_key)])
+async def get_report_endpoint(request: ReportRequest):
+    """
+    Expose the `get_report` function as a POST API endpoint, with a streaming response.
+    """
+
+    async def generate_report():
+        try:
+            # Call the asynchronous get_report function
+            report_generator = ReportGenerator(request.query, request.report_type)
+            async for chunk in report_generator:
+                yield chunk
+        except Exception as e:
+            yield f"Error: {str(e)}"
+
+    # Return streaming response
+    return StreamingResponse(generate_report(), media_type="text/plain")
+
diff --git a/src/phoenix_technologies/__init__.py b/src/phoenix_technologies/__init__.py
new file mode 100644
index 0000000..142e2fc
--- /dev/null
+++ b/src/phoenix_technologies/__init__.py
@@ -0,0 +1,4 @@
+# phoenix-technologies/__init__.py
+from .gptresearch.deepresearch import ReportGenerator
+
+__all__ = ["ReportGenerator"]
\ No newline at end of file
diff --git a/src/phoenix_technologies/gptresearch/__init__.py b/src/phoenix_technologies/gptresearch/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/phoenix_technologies/gptresearch/deepresearch.py b/src/phoenix_technologies/gptresearch/deepresearch.py
new file mode 100644
index 0000000..fcabcf9
--- /dev/null
+++ b/src/phoenix_technologies/gptresearch/deepresearch.py
@@ -0,0 +1,42 @@
+from gpt_researcher import GPTResearcher
+
+
+class ReportGenerator:
+    def __init__(self, query: str, report_type: str):
+        """
+        Initializes the ReportGenerator with a query and report type.
+        """
+        self.query = query
+        self.report_type = report_type
+        self.researcher = GPTResearcher(query, report_type)
+
+    async def generate_report(self):
+        """
+        Conducts research and generates the report along with additional information.
+        """
+        # Conduct research
+        research_result = await self.researcher.conduct_research()
+        report = await self.researcher.write_report()
+
+        # Retrieve additional information
+        research_context = self.researcher.get_research_context()
+        research_costs = self.researcher.get_costs()
+        research_images = self.researcher.get_research_images()
+        research_sources = self.researcher.get_research_sources()
+
+        return {
+            "report": report,
+            "context": research_context,
+            "costs": research_costs,
+            "images": research_images,
+            "sources": research_sources
+        }
+
+    def get_query_details(self):
+        """
+        Returns details of the query and report type.
+        """
+        return {
+            "query": self.query,
+            "report_type": self.report_type
+        }