From 849c12b9ac072f8d37645945d2534b7c8d2c0dc6 Mon Sep 17 00:00:00 2001 From: Omar Abdelwahab Date: Wed, 20 Aug 2025 11:15:31 -0700 Subject: [PATCH] Added llama stack-langChain integration example scripts --- docs/notebooks/langChain/README.md | 306 +++++++++++++ .../langChain/langchain_llamastack.py | 290 +++++++++++++ .../langChain/langchain_llamastack_ray.py | 403 ++++++++++++++++++ 3 files changed, 999 insertions(+) create mode 100644 docs/notebooks/langChain/README.md create mode 100644 docs/notebooks/langChain/langchain_llamastack.py create mode 100644 docs/notebooks/langChain/langchain_llamastack_ray.py diff --git a/docs/notebooks/langChain/README.md b/docs/notebooks/langChain/README.md new file mode 100644 index 000000000..f36a39ce9 --- /dev/null +++ b/docs/notebooks/langChain/README.md @@ -0,0 +1,306 @@ +# LangChain + Llama Stack Document Processing + +This repository contains two different implementations of document processing using LangChain and Llama Stack: + +1. **`langchain_llamastack.py`** - Interactive CLI version +2. **`langchain_llamastack_ray.py`** - Ray Serve API version + +Both versions provide AI-powered document processing capabilities including summarization, fact extraction, and question-answering. + +--- + +## ๐Ÿ“‹ Prerequisites + +### System Requirements +- Python 3.12+ +- Ray Serve (for API version) +- Llama Stack server running on `http://localhost:8321/` +- Ollama or compatible model server + +### Required Python Packages +```bash +pip install llama-stack-client langchain langchain-core langchain-community +pip install beautifulsoup4 markdownify readability-lxml requests +pip install ray[serve] starlette # For Ray Serve version only +``` + +### Environment Setup +```bash +# Create and activate virtual environment +python3.12 -m venv llama-env-py312 +source llama-env-py312/bin/activate + +# Install dependencies +pip install llama-stack-client langchain langchain-core langchain-community beautifulsoup4 markdownify readability-lxml requests ray[serve] starlette +``` + +--- + +## ๐Ÿš€ Quick Start + +### Start Llama Stack Server +Before running either version, ensure your Llama Stack server is running: +```bash +# Start Llama Stack server (example) +llama stack run your-config --port 8321 +``` + +--- + +## ๐Ÿ“– Option 1: Interactive CLI Version (`langchain_llamastack_updated.py`) + +### Features +- โœ… Interactive command-line interface +- โœ… Document loading from URLs and PDFs +- โœ… AI-powered summarization and fact extraction +- โœ… Question-answering based on document content +- โœ… Session-based document storage + +### How to Run +```bash +# Activate environment +source llama-env-py312/bin/activate + +# Run the interactive CLI +cd /home/omara/langchain_llamastack +python langchain_llamastack_updated.py +``` + +### Usage Commands +Once running, you can use these interactive commands: + +``` +๐ŸŽฏ Interactive Document Processing Demo +Commands: + load - Process a document + ask - Ask about the document + summary - Show document summary + facts - Show extracted facts + help - Show commands + quit - Exit demo +``` + +### Example Session +``` +> load https://en.wikipedia.org/wiki/Artificial_intelligence +๐Ÿ“„ Loading document from: https://en.wikipedia.org/wiki/Artificial_intelligence +โœ… Loaded 45,832 characters +๐Ÿ“ Generating summary... +๐Ÿ” Extracting key facts... +โœ… Processing complete! + +> summary +๐Ÿ“ Summary: +Artificial intelligence (AI) is the simulation of human intelligence... + +> ask What are the main types of AI? +๐Ÿ’ฌ Q: What are the main types of AI? +๐Ÿ“ A: Based on the document, the main types of AI include... + +> facts +๐Ÿ” Key Facts: +- AI was founded as an academic discipline in 1956 +- Machine learning is a subset of AI... + +> quit +๐Ÿ‘‹ Thanks for exploring LangChain chains! +``` + +--- + +## ๐ŸŒ Option 2: Ray Serve API Version (`langchain_llamastack_ray.py`) + +### Features +- โœ… RESTful HTTP API +- โœ… Persistent service (runs indefinitely) +- โœ… Multiple endpoints for different operations +- โœ… JSON request/response format +- โœ… Concurrent request handling + +### How to Run +```bash +# Activate environment +source llama-env-py312/bin/activate + +# Start the Ray Serve API +cd /home/omara/langchain_llamastack +python langchain_llamastack_ray.py +``` + +### Service Endpoints + +| Method | Endpoint | Description | Parameters | +|--------|----------|-------------|------------| +| GET | `/` | Service status | None | +| POST | `/process` | Process document | `{"source": "url_or_path"}` | +| POST | `/ask` | Ask question | `{"question": "text", "source": "optional"}` | +| GET | `/summary` | Get summary | `?source=url` (optional) | +| GET | `/facts` | Get facts | `?source=url` (optional) | +| GET | `/docs` | List documents | None | + +### API Usage Examples + +#### Using curl: +```bash +# Check service status +curl http://localhost:8000/ + +# Process a document +curl -X POST http://localhost:8000/process \ + -H 'Content-Type: application/json' \ + -d '{"source": "https://en.wikipedia.org/wiki/Machine_learning"}' + +# Ask a question +curl -X POST http://localhost:8000/ask \ + -H 'Content-Type: application/json' \ + -d '{"question": "What is machine learning?"}' + +# Get summary +curl http://localhost:8000/summary + +# Get facts +curl http://localhost:8000/facts + +# List all processed documents +curl http://localhost:8000/docs +``` + +#### Using Python requests: +```python +import requests + +# Process a document +response = requests.post( + "http://localhost:8000/process", + json={"source": "https://en.wikipedia.org/wiki/Deep_learning"} +) +print(response.json()) + +# Ask a question +response = requests.post( + "http://localhost:8000/ask", + json={"question": "What are neural networks?"} +) +print(response.json()) + +# Get facts +response = requests.get("http://localhost:8000/facts") +print(response.json()) +``` + +--- + +## ๐Ÿ”ง Configuration + +### Model Configuration +Both versions use these models by default: +- **Model ID**: `llama3.2:3b` +- **Llama Stack URL**: `http://localhost:8321/` + +To change the model, edit the `model_id` parameter in the respective files. + +### Supported Document Types +- โœ… **URLs**: Any web page (extracted using readability) +- โœ… **PDF files**: Local or remote PDF documents +- โŒ Plain text files (can be added if needed) + +--- + +## ๐Ÿ› ๏ธ Troubleshooting + +### Common Issues + +#### 1. Connection Refused to Llama Stack +**Error**: `Connection refused to http://localhost:8321/` +**Solution**: +- Ensure Llama Stack server is running +- Check if port 8321 is correct +- Verify network connectivity + +#### 2. Model Not Found +**Error**: `Model not found: llama3.2:3b` +**Solution**: +- Check available models: `curl http://localhost:8321/models/list` +- Update `model_id` in the code to match available models + +#### 3. Ray Serve Port Already in Use +**Error**: `Port 8000 already in use` +**Solution**: +```bash +# Kill process using port 8000 +lsof -ti :8000 | xargs kill -9 + +# Or use a different port by modifying the code +``` + +#### 4. Missing Dependencies +**Error**: `ModuleNotFoundError: No module named 'ray'` +**Solution**: +```bash +pip install ray[serve] starlette +``` + +### Debug Mode +To enable verbose logging, add this to the beginning of either file: +```python +import logging +logging.basicConfig(level=logging.DEBUG) +``` + +--- + +## ๐Ÿ“Š Performance Notes + +### CLI Version +- **Pros**: Simple to use, interactive, good for testing +- **Cons**: Single-threaded, session-based only +- **Best for**: Development, testing, manual document analysis + +### Ray Serve Version +- **Pros**: Concurrent requests, persistent service, API integration +- **Cons**: More complex setup, requires Ray +- **Best for**: Production, integration with other services, high throughput + +--- + +## ๐Ÿ›‘ Stopping Services + +### CLI Version +- Press `Ctrl+C` or type `quit` in the interactive prompt + +### Ray Serve Version +- Press `Ctrl+C` in the terminal running the service +- The service will gracefully shutdown and clean up resources + +--- + +## ๐Ÿ“ Examples + +### CLI Workflow +1. Start: `python langchain_llamastack_updated.py` +2. Load document: `load https://arxiv.org/pdf/2103.00020.pdf` +3. Get summary: `summary` +4. Ask questions: `ask What are the main contributions?` +5. Exit: `quit` + +### API Workflow +1. Start: `python langchain_llamastack_ray.py` +2. Process: `curl -X POST http://localhost:8000/process -d '{"source": "https://example.com"}'` +3. Query: `curl -X POST http://localhost:8000/ask -d '{"question": "What is this about?"}'` +4. Stop: `Ctrl+C` + +--- + +## ๐Ÿค Contributing + +To extend functionality: +1. Add new prompt templates for different analysis types +2. Support additional document formats +3. Add caching for processed documents +4. Implement user authentication for API version + +--- + +## ๐Ÿ“œ License + +This project is for educational and research purposes. diff --git a/docs/notebooks/langChain/langchain_llamastack.py b/docs/notebooks/langChain/langchain_llamastack.py new file mode 100644 index 000000000..2f8301ea9 --- /dev/null +++ b/docs/notebooks/langChain/langchain_llamastack.py @@ -0,0 +1,290 @@ +import os +import re +import html +import requests +from bs4 import BeautifulSoup +from readability import Document as ReadabilityDocument +from markdownify import markdownify +from langchain_community.document_loaders import PyPDFLoader, TextLoader +import tempfile + +from llama_stack_client import LlamaStackClient + +from langchain_core.language_models.llms import LLM +from typing import Optional, List, Any +from langchain.chains import LLMChain +from langchain_core.prompts import PromptTemplate +from rich.pretty import pprint + +# Global variables +client = None +llm = None +summary_chain = None +facts_chain = None +qa_chain = None +processed_docs = {} + +# Prompt Templates (defined globally) +summary_template = PromptTemplate( + input_variables=["document"], + template="""Create a concise summary of this document in 5-10 sentences: + +{document} + +SUMMARY:""" +) + +facts_template = PromptTemplate( + input_variables=["document"], + template="""Extract the most important facts from this document. List them as bullet points: + +{document} + +KEY FACTS: +-""" +) + +qa_template = PromptTemplate( + input_variables=["document", "question"], + template="""Based on the following document, answer the question. If the answer isn't in the document, say so. + +DOCUMENT: +{document} + +QUESTION: {question} + +ANSWER:""" +) + +class LlamaStackLLM(LLM): + """Simple LangChain wrapper for Llama Stack""" + + # Pydantic model fields + client: Any = None + #model_id: str = "meta-llama/Llama-4-Maverick-17B-128E-Instruct" + # model_id: str = "meta-llama/Llama-3.3-70B-Instruct" + model_id: str = "llama3:70b-instruct" + + def __init__(self, client, model_id: str = "llama3:70b-instruct"): + # Initialize with field values + super().__init__(client=client, model_id=model_id) + + def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs) -> str: + """Make inference call to Llama Stack""" + response = self.client.inference.chat_completion( + model_id=self.model_id, + messages=[{"role": "user", "content": prompt}] + ) + return response.completion_message.content + + @property + def _llm_type(self) -> str: + return "llama_stack" + + +def load_document(source: str) -> str: + is_url = source.startswith(('http://', 'https://')) + is_pdf = source.lower().endswith('.pdf') + if is_pdf: + return load_pdf(source, is_url=is_url) + elif is_url: + return load_from_url(source) + else: + raise ValueError(f"Unsupported format. Use URLs or PDF files.") + + +def load_pdf(source: str, is_url: bool = False) -> str: + if is_url: + response = requests.get(source) + response.raise_for_status() + with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: + temp_file.write(response.content) + file_path = temp_file.name + else: + file_path = source + try: + loader = PyPDFLoader(file_path) + docs = loader.load() + return "\\n\\n".join([doc.page_content for doc in docs]) + finally: + if is_url: + os.remove(file_path) + + +def load_from_url(url: str) -> str: + headers = {'User-Agent': 'Mozilla/5.0 (compatible; DocumentLoader/1.0)'} + response = requests.get(url, headers=headers, timeout=15) + response.raise_for_status() + doc = ReadabilityDocument(response.text) + html_main = doc.summary(html_partial=True) + soup = BeautifulSoup(html_main, "html.parser") + for tag in soup(["script", "style", "noscript", "header", "footer", "nav", "aside"]): + tag.decompose() + md_text = markdownify(str(soup), heading_style="ATX") + md_text = html.unescape(md_text) + md_text = re.sub(r"\n{3,}", "\n\n", md_text).strip() + return md_text + +def process_document(source: str): + global summary_chain, facts_chain, processed_docs + + print(f"๐Ÿ“„ Loading document from: {source}") + document = load_document(source) + print(f"โœ… Loaded {len(document):,} characters") + print("\n๐Ÿ“ Generating summary...") + summary = summary_chain.invoke({"document": document})["text"] + print("Summary generated") + print("๐Ÿ” Extracting key facts...") + facts = facts_chain.invoke({"document": document})["text"] + processed_docs[source] = { + "document": document, + "summary": summary, + "facts": facts + } + print(f"\nโœ… Processing complete!") + print(f"๐Ÿ“Š Document: {len(document):,} chars") + print(f"๐Ÿ“ Summary: {summary[:100]}...") + print(f"๐Ÿ” Facts: {facts[:1000]}...") + return processed_docs[source] + +def ask_question(question: str, source: str = None): + """Answer questions about processed documents""" + global qa_chain, processed_docs + + if not processed_docs: + return "No documents processed yet. Use process_document() first." + if source and source in processed_docs: + doc_data = processed_docs[source] + else: + # Use the most recent document + doc_data = list(processed_docs.values())[-1] + answer = qa_chain.invoke({ + "document": doc_data["document"], + "question": question + })["text"] + return answer + + +def interactive_demo(): + print("\n๐ŸŽฏ Interactive Document Processing Demo") + print("Commands:") + print(" load - Process a document") + print(" ask - Ask about the document") + print(" summary - Show document summary") + print(" facts - Show extracted facts") + print(" help - Show commands") + print(" quit - Exit demo") + + while True: + try: + command = input("\n> ").strip() + if command.lower() in ['quit', 'exit']: + print("๐Ÿ‘‹ Thanks for exploring LangChain chains!") + break + elif command.lower() == 'help': + print("\nCommands:") + print(" load - Process a document") + print(" ask - Ask about the document") + print(" summary - Show document summary") + print(" facts - Show extracted facts") + elif command.startswith('load '): + source = command[5:].strip() + if source: + try: + process_document(source) + except Exception as e: + print(f"โŒ Error processing document: {e}") + else: + print("โ“ Please provide a URL or file path") + elif command.startswith('ask '): + question = command[4:].strip() + if question: + try: + answer = ask_question(question) + print(f"\n๐Ÿ’ฌ Q: {question}") + print(f"๐Ÿ“ A: {answer}") + except Exception as e: + print(f"โŒ Error: {e}") + else: + print("โ“ Please provide a question") + elif command.lower() == 'summary': + if processed_docs: + latest_doc = list(processed_docs.values())[-1] + print(f"\n๐Ÿ“ Summary:\n{latest_doc['summary']}") + else: + print("โ“ No documents processed yet") + elif command.lower() == 'facts': + if processed_docs: + latest_doc = list(processed_docs.values())[-1] + print(f"\n๐Ÿ” Key Facts:\n{latest_doc['facts']}") + else: + print("โ“ No documents processed yet") + else: + print("โ“ Unknown command. Type 'help' for options") + except (EOFError, KeyboardInterrupt): + print("\n๐Ÿ‘‹ Goodbye!") + break + + +def main(): + global client, llm, summary_chain, facts_chain, qa_chain, processed_docs + + print("๐Ÿš€ Starting LangChain + Llama Stack Document Processing Demo") + + client = LlamaStackClient( + base_url="http://localhost:8321/", + ) + + # Initialize the LangChain-compatible LLM + llm = LlamaStackLLM(client) + + # Test the wrapper + test_response = llm.invoke("Can you help me with the document processing?") + print(f"โœ… LangChain wrapper working!") + print(f"Response: {test_response[:100]}...") + + print("Available models:") + for m in client.models.list(): + print(f"- {m.identifier}") + + print("----") + print("Available shields (safety models):") + for s in client.shields.list(): + print(s.identifier) + print("----") + + # model_id = "llama3.2:3b" + model_id = "ollama/llama3:70b-instruct" + + response = client.inference.chat_completion( + model_id=model_id, + messages=[ + {"role": "system", "content": "You are a friendly assistant."}, + {"role": "user", "content": "Write a two-sentence poem about llama."}, + ], + ) + + print(response.completion_message.content) + + # Create chains by combining our LLM with prompt templates + summary_chain = LLMChain(llm=llm, prompt=summary_template) + facts_chain = LLMChain(llm=llm, prompt=facts_template) + qa_chain = LLMChain(llm=llm, prompt=qa_template) + + # Initialize storage for processed documents + processed_docs = {} + + print("โœ… Created 3 prompt templates:") + print(" โ€ข Summary: Condenses documents into key points") + print(" โ€ข Facts: Extracts important information as bullets") + print(" โ€ข Q&A: Answers questions based on document content") + + # Test template formatting + test_prompt = summary_template.format(document="This is a sample document about AI...") + print(f"\n๐Ÿ“ Example prompt: {len(test_prompt)} characters") + + # Start the interactive demo + interactive_demo() + +if __name__ == "__main__": + main() diff --git a/docs/notebooks/langChain/langchain_llamastack_ray.py b/docs/notebooks/langChain/langchain_llamastack_ray.py new file mode 100644 index 000000000..7ef42dfd7 --- /dev/null +++ b/docs/notebooks/langChain/langchain_llamastack_ray.py @@ -0,0 +1,403 @@ +import os +import re +import html +import json +import time +import requests +from bs4 import BeautifulSoup +from readability import Document as ReadabilityDocument +from markdownify import markdownify +from langchain_community.document_loaders import PyPDFLoader, TextLoader +import tempfile + +from llama_stack_client import LlamaStackClient +from langchain_core.language_models.llms import LLM +from typing import Optional, List, Any, Dict +from langchain.chains import LLMChain +from langchain_core.prompts import PromptTemplate + +from starlette.requests import Request +from ray import serve + +# Prompt Templates (defined globally) +summary_template = PromptTemplate( + input_variables=["document"], + template="""Create a concise summary of this document in 5-10 sentences: + +{document} + +SUMMARY:""" +) + +facts_template = PromptTemplate( + input_variables=["document"], + template="""Extract the most important facts from this document. List them as bullet points: + +{document} + +KEY FACTS: +-""" +) + +qa_template = PromptTemplate( + input_variables=["document", "question"], + template="""Based on the following document, answer the question. If the answer isn't in the document, say so. + +DOCUMENT: +{document} + +QUESTION: {question} + +ANSWER:""" +) + +class LlamaStackLLM(LLM): + """Simple LangChain wrapper for Llama Stack""" + + # Pydantic model fields + client: Any = None + model_id: str = "llama3.2:3b" + + def __init__(self, client, model_id: str = "llama3.2:3b"): + # Initialize with field values + super().__init__(client=client, model_id=model_id) + + def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs) -> str: + """Make inference call to Llama Stack""" + response = self.client.inference.chat_completion( + model_id=self.model_id, + messages=[{"role": "user", "content": prompt}] + ) + return response.completion_message.content + + @property + def _llm_type(self) -> str: + return "llama_stack" + + +def load_document(source: str) -> str: + is_url = source.startswith(('http://', 'https://')) + is_pdf = source.lower().endswith('.pdf') + if is_pdf: + return load_pdf(source, is_url=is_url) + elif is_url: + return load_from_url(source) + else: + raise ValueError(f"Unsupported format. Use URLs or PDF files.") + + +def load_pdf(source: str, is_url: bool = False) -> str: + if is_url: + response = requests.get(source) + response.raise_for_status() + with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: + temp_file.write(response.content) + file_path = temp_file.name + else: + file_path = source + try: + loader = PyPDFLoader(file_path) + docs = loader.load() + return "\\n\\n".join([doc.page_content for doc in docs]) + finally: + if is_url: + os.remove(file_path) + + +def load_from_url(url: str) -> str: + headers = {'User-Agent': 'Mozilla/5.0 (compatible; DocumentLoader/1.0)'} + response = requests.get(url, headers=headers, timeout=15) + response.raise_for_status() + doc = ReadabilityDocument(response.text) + html_main = doc.summary(html_partial=True) + soup = BeautifulSoup(html_main, "html.parser") + for tag in soup(["script", "style", "noscript", "header", "footer", "nav", "aside"]): + tag.decompose() + md_text = markdownify(str(soup), heading_style="ATX") + md_text = html.unescape(md_text) + md_text = re.sub(r"\n{3,}", "\n\n", md_text).strip() + return md_text + + +@serve.deployment +class LangChainLlamaStackService: + """Ray Serve deployment for LangChain + Llama Stack document processing""" + + def __init__(self): + print("๐Ÿš€ Initializing LangChain + Llama Stack Service...") + + # Initialize Llama Stack client + self.client = LlamaStackClient(base_url="http://localhost:8321/") + + # Initialize LangChain-compatible LLM + self.llm = LlamaStackLLM(self.client) + + # Create processing chains + self.summary_chain = LLMChain(llm=self.llm, prompt=summary_template) + self.facts_chain = LLMChain(llm=self.llm, prompt=facts_template) + self.qa_chain = LLMChain(llm=self.llm, prompt=qa_template) + + # Storage for processed documents + self.processed_docs = {} + + print("โœ… Service initialized successfully!") + + async def __call__(self, request: Request) -> Dict: + """Handle HTTP requests to different endpoints""" + path = request.url.path + method = request.method + + try: + if path == "/" and method == "GET": + return await self._handle_status() + elif path == "/process" and method == "POST": + return await self._handle_process(request) + elif path == "/ask" and method == "POST": + return await self._handle_ask(request) + elif path == "/summary" and method == "GET": + return await self._handle_summary(request) + elif path == "/facts" and method == "GET": + return await self._handle_facts(request) + elif path == "/docs" and method == "GET": + return await self._handle_list_docs() + else: + return { + "error": "Not found", + "available_endpoints": { + "GET /": "Service status", + "POST /process": "Process document (body: {\"source\": \"url_or_path\"})", + "POST /ask": "Ask question (body: {\"question\": \"your_question\", \"source\": \"optional_doc_id\"})", + "GET /summary?source=doc_id": "Get document summary", + "GET /facts?source=doc_id": "Get document facts", + "GET /docs": "List processed documents" + } + } + except Exception as e: + return {"error": str(e)} + + async def _handle_status(self) -> Dict: + """Return service status""" + return { + "status": "healthy", + "service": "LangChain + Llama Stack Document Processing", + "documents_processed": len(self.processed_docs), + "available_models": [m.identifier for m in self.client.models.list()], + "endpoints": ["/", "/process", "/ask", "/summary", "/facts", "/docs"] + } + + async def _handle_process(self, request: Request) -> Dict: + """Process a document from URL or file path""" + body = await request.json() + source = body.get("source") + + if not source: + return {"error": "Missing 'source' in request body"} + + try: + # Load document + document = load_document(source) + + # Generate summary and facts + summary = self.summary_chain.invoke({"document": document})["text"] + facts = self.facts_chain.invoke({"document": document})["text"] + + # Store processed document + self.processed_docs[source] = { + "document": document, + "summary": summary, + "facts": facts, + "processed_at": time.time() + } + + return { + "success": True, + "source": source, + "document_length": len(document), + "summary_preview": summary[:200] + "..." if len(summary) > 200 else summary, + "facts_preview": facts[:300] + "..." if len(facts) > 300 else facts + } + + except Exception as e: + return {"error": f"Failed to process document: {str(e)}"} + + async def _handle_ask(self, request: Request) -> Dict: + """Answer questions about processed documents""" + body = await request.json() + question = body.get("question") + source = body.get("source") + + if not question: + return {"error": "Missing 'question' in request body"} + + if not self.processed_docs: + return {"error": "No documents processed yet. Use /process endpoint first."} + + try: + # Select document + if source and source in self.processed_docs: + doc_data = self.processed_docs[source] + else: + # Use the most recent document + doc_data = list(self.processed_docs.values())[-1] + source = list(self.processed_docs.keys())[-1] + + # Generate answer + answer = self.qa_chain.invoke({ + "document": doc_data["document"], + "question": question + })["text"] + + return { + "question": question, + "answer": answer, + "source": source + } + + except Exception as e: + return {"error": f"Failed to answer question: {str(e)}"} + + async def _handle_summary(self, request: Request) -> Dict: + """Get summary of a processed document""" + source = request.query_params.get("source") + + if not self.processed_docs: + return {"error": "No documents processed yet"} + + if source and source in self.processed_docs: + doc_data = self.processed_docs[source] + else: + # Use the most recent document + doc_data = list(self.processed_docs.values())[-1] + source = list(self.processed_docs.keys())[-1] + + return { + "source": source, + "summary": doc_data["summary"] + } + + async def _handle_facts(self, request: Request) -> Dict: + """Get facts from a processed document""" + source = request.query_params.get("source") + + if not self.processed_docs: + return {"error": "No documents processed yet"} + + if source and source in self.processed_docs: + doc_data = self.processed_docs[source] + else: + # Use the most recent document + doc_data = list(self.processed_docs.values())[-1] + source = list(self.processed_docs.keys())[-1] + + return { + "source": source, + "facts": doc_data["facts"] + } + + async def _handle_list_docs(self) -> Dict: + """List all processed documents""" + docs_info = [] + for source, data in self.processed_docs.items(): + docs_info.append({ + "source": source, + "document_length": len(data["document"]), + "processed_at": data["processed_at"], + "summary_preview": data["summary"][:100] + "..." if len(data["summary"]) > 100 else data["summary"] + }) + + return { + "processed_documents": docs_info, + "total_count": len(self.processed_docs) + } + + +def main(): + """Main function to start the Ray Serve application""" + + # Create the application + app = LangChainLlamaStackService.bind() + + # Deploy the application locally + print("๐Ÿš€ Starting LangChain + Llama Stack Ray Serve application...") + serve.run(app, route_prefix="/") + + # Wait for service to initialize + print("โณ Waiting for service to initialize...") + time.sleep(5) + + # Test the service + try: + response = requests.get("http://localhost:8000/") + print(f"โœ… Service response: {response.json()}") + print("๐ŸŽ‰ Service is running successfully!") + except Exception as e: + print(f"โš ๏ธ Could not test service: {e}") + print(" Service might still be starting up...") + + # Show service information + print("\n" + "="*60) + print("๐ŸŒ LangChain + Llama Stack Service is running on:") + print(" http://localhost:8000/") + print("="*60) + print("๐Ÿ“‹ Available endpoints:") + print(" GET / - Service status") + print(" POST /process - Process document") + print(" POST /ask - Ask questions") + print(" GET /summary - Get document summary") + print(" GET /facts - Get document facts") + print(" GET /docs - List processed documents") + print("="*60) + print("๐Ÿงช Example requests:") + print(" # Process a document:") + print(" curl -X POST http://localhost:8000/process \\") + print(" -H 'Content-Type: application/json' \\") + print(" -d '{\"source\": \"https://example.com/article\"}'") + print("") + print(" # Ask a question:") + print(" curl -X POST http://localhost:8000/ask \\") + print(" -H 'Content-Type: application/json' \\") + print(" -d '{\"question\": \"What is the main topic?\"}'") + print("") + print(" # Get summary:") + print(" curl http://localhost:8000/summary") + print("="*60) + print("๐Ÿ›‘ Press Ctrl+C to stop the service...") + + try: + # Keep the service alive + while True: + time.sleep(1) + except KeyboardInterrupt: + print("\n๐Ÿ›‘ Stopping service...") + serve.shutdown() + print("๐Ÿ‘‹ Service stopped successfully!") + +if __name__ == "__main__": + main() + + + + + + + + +# import requests + +# # Step 1: First, process/load the document +# process_response = requests.post( +# "http://localhost:8000/process", +# json={"source": "https://en.wikipedia.org/wiki/What%27s_Happening!!"} +# ) +# print("Processing result:", process_response.json()) + +# # Step 2: Then get the facts +# facts_response = requests.get("http://localhost:8000/facts") +# print("Facts:", facts_response.json()) + +# # Or get facts for specific document +# facts_response = requests.get( +# "http://localhost:8000/facts", +# params={"source": "https://en.wikipedia.org/wiki/What%27s_Happening!!"} +# ) +# print("Facts for specific doc:", facts_response.json())