From 3d0d2b277020c0490e25a095b5bc0c32d28802a2 Mon Sep 17 00:00:00 2001 From: ThomasTaroni Date: Fri, 25 Apr 2025 08:34:18 +0200 Subject: [PATCH] Add FastAPI app for report generation with Docker support Implement a modular FastAPI-based service for generating research reports using `GPTResearcher`. Includes secure API key authentication, a streaming response endpoint, and a Dockerized deployment setup. Also adds documentation, core dependencies, and project structure. --- Dockerfile | 25 +++ README.md | 166 ++++++++++++++++++ requirements.txt | 5 + src/__init__.py | 0 src/main.py | 47 +++++ src/phoenix_technologies/__init__.py | 4 + .../gptresearch/__init__.py | 0 .../gptresearch/deepresearch.py | 42 +++++ 8 files changed, 289 insertions(+) create mode 100644 Dockerfile create mode 100644 README.md create mode 100644 requirements.txt create mode 100644 src/__init__.py create mode 100644 src/main.py create mode 100644 src/phoenix_technologies/__init__.py create mode 100644 src/phoenix_technologies/gptresearch/__init__.py create mode 100644 src/phoenix_technologies/gptresearch/deepresearch.py diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..51f00ad --- /dev/null +++ b/Dockerfile @@ -0,0 +1,25 @@ +# Use the official Python image as a base +FROM python:3.13-slim + +# Set environment variable for Python +ENV PYTHONDONTWRITEBYTECODE 1 +ENV PYTHONUNBUFFERED 1 + +# Set working directory within the container +WORKDIR /app + +# Copy the requirements file if you have it (or define dependencies manually) +# If you don't have a requirements.txt, let me know and I'll guide you further +COPY requirements.txt . + +# Install dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# Copy the rest of the project files into the container +COPY src/ /app/ + +# Expose the port that FastAPI will run on +EXPOSE 8000 + +# Set the default command to run the app with `uvicorn` +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..e59265c --- /dev/null +++ b/README.md @@ -0,0 +1,166 @@ +# README for FastAPI-Based Report GPT Generation Service + +## Overview + +This repository contains the implementation of a **FastAPI**-based service designed to generate research reports. The service processes user-provided queries and report types, performing advanced research powered by `GPTResearcher` and responding with comprehensive results, including details, cost, context, images, and other associated metadata. + +## Features + +- **RESTful API** to handle user queries and generate reports. +- **Streaming responses** to deliver research output in chunks. +- **Secure API access** with API Key authentication. +- Completely containerized setup with Docker. +- Built with modular design for easier scalability and maintenance. + +--- + +## System Architecture + +### Core Components + +1. **FastAPI App (`main.py`)**: + - Hosts the API endpoints. + - Handles API Key authentication for secure use. + - Accepts user inputs (query and report type) and generates a chunked streaming response. + +2. **Research Logic (`deepresearch.py`)**: + - Encapsulates research and report generation. + - Utilizes `GPTResearcher` to conduct research, generate reports, and retrieve extended data like images, contexts, or costs. + +3. **Docker Integration**: + - The application is containerized with a well-defined `Dockerfile`. + - Includes dependency installation, environment setup, and FastAPI server configuration for rapid deployment. + +--- + +## Prerequisites + +Before running the application, ensure the following are installed on your system: + +- **Docker**: Version 24.0+ +- **Python**: Version 3.13+ +- **pip**: Pre-installed Python package manager. + +--- + +## Running the Application Locally + +### Cloning the Repository + +Clone the repository to a directory of your choice: + +```shell script +git clone https://git.kvant.cloud/phoenix/gpt-researcher.git +cd gpt-researcher +``` + +### Environment Variable Configuration + +Create a `.env` file in the root of the project and define: + +``` +API_KEY=your_api_key # Replace "your_api_key" with your desired key +``` + +### Installing Dependencies + +Install the required Python modules based on the generated `requirements.txt`. + +```shell script +pip install --no-cache-dir -r requirements.txt +``` + +### Running the App + +Run the FastAPI app locally: + +```shell script +uvicorn main:app --host 0.0.0.0 --port 8000 +``` + +After running, your app will be available at `http://127.0.0.1:8000`. + +--- + +## Using Docker for Deployment + +### Building the Docker Image + +Build the Docker image using the **Dockerfile** provided: + +```shell script +docker build -t fastapi-report-service . +``` + +### Running the Docker Container + +Spin up a container and map FastAPI's default port, `8000`: + +```shell script +docker run --env-file .env -p 8000:8000 fastapi-report-service +``` + +--- + +## API Usage + +### 1. **`/get_report`** + +- **Method**: `POST` +- **Description**: Generates a report based on user input. +- **Headers**: + - `X-API-KEY`: API Key for authentication. +- **Request Body** (`JSON`): + +```json +{ + "query": "Research on AI in healthcare", + "report_type": "research_report|resource_report|outline_report|custom_report|detailed_report|subtopic_report|deep" +} +``` + +- **Streaming Response**: Research and report are provided in chunks. + +--- + +## Code Structure + +``` +├── Dockerfile # Configuration for Dockerizing the application +├── requirements.txt # Python dependencies list +├── main.py # FastAPI server entry point +├── deepresearch.py # Research-related logic and GPTResearcher integration +└── src/ # Other project files and assets +``` + +--- + +## Features Under the Hood + +1. **Authentication**: + - An API key mechanism ensures that only authorized users can access endpoints. + +2. **Streaming Response**: + - Large research reports are sent incrementally using `StreamingResponse` for better experience and efficiency. + +3. **Modular Research Logic**: + - Research and generation tasks are handled by a dedicated class (`ReportGenerator`), making the application extensible. + +--- + +## Future Enhancements + +- **Asynchronous Enhancements**: + - Improve async handling for long-running queries. + +- **Database Integration**: + - Save request history for auditing and reference purposes. + +- **Web Interface**: + - A user-friendly web application for interacting with the API. + +--- + +## Contributing + +Contributions are welcome! Feel free to fork the repository, make updates, and submit a pull request. \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..06e6b07 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +fastapi +uvicorn +pydantic +gpt-researcher +asyncio diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..95db1a8 --- /dev/null +++ b/src/main.py @@ -0,0 +1,47 @@ +from fastapi import FastAPI, HTTPException, Request, Depends +from pydantic import BaseModel +from phoenix_technologies import ReportGenerator +from fastapi.responses import StreamingResponse +import os +import asyncio + +# FastAPI app instance +app = FastAPI() + +# Define a request body structure using Pydantic +class ReportRequest(BaseModel): + query: str + report_type: str + +# Define a dependency to validate the API Key +def verify_api_key(request: Request): + # Define the API key from the environment variables + expected_api_key = os.getenv("API_KEY", None) + if not expected_api_key: + raise HTTPException(status_code=500, detail="API key is not configured on the server.") + + # Get the API key from the request headers + provided_api_key = request.headers.get("X-API-KEY", None) + + # Check if the API key is correct + if not provided_api_key or provided_api_key != expected_api_key: + raise HTTPException(status_code=403, detail="Invalid or missing API key.") + +@app.post("/get_report", dependencies=[Depends(verify_api_key)]) +async def get_report_endpoint(request: ReportRequest): + """ + Expose the `get_report` function as a POST API endpoint, with a streaming response. + """ + + async def generate_report(): + try: + # Call the asynchronous get_report function + report_generator = ReportGenerator(request.query, request.report_type) + async for chunk in report_generator: + yield chunk + except Exception as e: + yield f"Error: {str(e)}" + + # Return streaming response + return StreamingResponse(generate_report(), media_type="text/plain") + diff --git a/src/phoenix_technologies/__init__.py b/src/phoenix_technologies/__init__.py new file mode 100644 index 0000000..142e2fc --- /dev/null +++ b/src/phoenix_technologies/__init__.py @@ -0,0 +1,4 @@ +# phoenix-technologies/__init__.py +from .gptresearch.deepresearch import ReportGenerator + +__all__ = ["ReportGenerator"] \ No newline at end of file diff --git a/src/phoenix_technologies/gptresearch/__init__.py b/src/phoenix_technologies/gptresearch/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/phoenix_technologies/gptresearch/deepresearch.py b/src/phoenix_technologies/gptresearch/deepresearch.py new file mode 100644 index 0000000..fcabcf9 --- /dev/null +++ b/src/phoenix_technologies/gptresearch/deepresearch.py @@ -0,0 +1,42 @@ +from gpt_researcher import GPTResearcher + + +class ReportGenerator: + def __init__(self, query: str, report_type: str): + """ + Initializes the ReportGenerator with a query and report type. + """ + self.query = query + self.report_type = report_type + self.researcher = GPTResearcher(query, report_type) + + async def generate_report(self): + """ + Conducts research and generates the report along with additional information. + """ + # Conduct research + research_result = await self.researcher.conduct_research() + report = await self.researcher.write_report() + + # Retrieve additional information + research_context = self.researcher.get_research_context() + research_costs = self.researcher.get_costs() + research_images = self.researcher.get_research_images() + research_sources = self.researcher.get_research_sources() + + return { + "report": report, + "context": research_context, + "costs": research_costs, + "images": research_images, + "sources": research_sources + } + + def get_query_details(self): + """ + Returns details of the query and report type. + """ + return { + "query": self.query, + "report_type": self.report_type + }