From 849c12b9ac072f8d37645945d2534b7c8d2c0dc6 Mon Sep 17 00:00:00 2001 From: Omar Abdelwahab Date: Wed, 20 Aug 2025 11:15:31 -0700 Subject: [PATCH 01/14] Added llama stack-langChain integration example scripts --- docs/notebooks/langChain/README.md | 306 +++++++++++++ .../langChain/langchain_llamastack.py | 290 +++++++++++++ .../langChain/langchain_llamastack_ray.py | 403 ++++++++++++++++++ 3 files changed, 999 insertions(+) create mode 100644 docs/notebooks/langChain/README.md create mode 100644 docs/notebooks/langChain/langchain_llamastack.py create mode 100644 docs/notebooks/langChain/langchain_llamastack_ray.py diff --git a/docs/notebooks/langChain/README.md b/docs/notebooks/langChain/README.md new file mode 100644 index 000000000..f36a39ce9 --- /dev/null +++ b/docs/notebooks/langChain/README.md @@ -0,0 +1,306 @@ +# LangChain + Llama Stack Document Processing + +This repository contains two different implementations of document processing using LangChain and Llama Stack: + +1. **`langchain_llamastack.py`** - Interactive CLI version +2. **`langchain_llamastack_ray.py`** - Ray Serve API version + +Both versions provide AI-powered document processing capabilities including summarization, fact extraction, and question-answering. + +--- + +## ๐Ÿ“‹ Prerequisites + +### System Requirements +- Python 3.12+ +- Ray Serve (for API version) +- Llama Stack server running on `http://localhost:8321/` +- Ollama or compatible model server + +### Required Python Packages +```bash +pip install llama-stack-client langchain langchain-core langchain-community +pip install beautifulsoup4 markdownify readability-lxml requests +pip install ray[serve] starlette # For Ray Serve version only +``` + +### Environment Setup +```bash +# Create and activate virtual environment +python3.12 -m venv llama-env-py312 +source llama-env-py312/bin/activate + +# Install dependencies +pip install llama-stack-client langchain langchain-core langchain-community beautifulsoup4 markdownify readability-lxml requests ray[serve] starlette +``` + +--- + +## ๐Ÿš€ Quick Start + +### Start Llama Stack Server +Before running either version, ensure your Llama Stack server is running: +```bash +# Start Llama Stack server (example) +llama stack run your-config --port 8321 +``` + +--- + +## ๐Ÿ“– Option 1: Interactive CLI Version (`langchain_llamastack_updated.py`) + +### Features +- โœ… Interactive command-line interface +- โœ… Document loading from URLs and PDFs +- โœ… AI-powered summarization and fact extraction +- โœ… Question-answering based on document content +- โœ… Session-based document storage + +### How to Run +```bash +# Activate environment +source llama-env-py312/bin/activate + +# Run the interactive CLI +cd /home/omara/langchain_llamastack +python langchain_llamastack_updated.py +``` + +### Usage Commands +Once running, you can use these interactive commands: + +``` +๐ŸŽฏ Interactive Document Processing Demo +Commands: + load - Process a document + ask - Ask about the document + summary - Show document summary + facts - Show extracted facts + help - Show commands + quit - Exit demo +``` + +### Example Session +``` +> load https://en.wikipedia.org/wiki/Artificial_intelligence +๐Ÿ“„ Loading document from: https://en.wikipedia.org/wiki/Artificial_intelligence +โœ… Loaded 45,832 characters +๐Ÿ“ Generating summary... +๐Ÿ” Extracting key facts... +โœ… Processing complete! + +> summary +๐Ÿ“ Summary: +Artificial intelligence (AI) is the simulation of human intelligence... + +> ask What are the main types of AI? +๐Ÿ’ฌ Q: What are the main types of AI? +๐Ÿ“ A: Based on the document, the main types of AI include... + +> facts +๐Ÿ” Key Facts: +- AI was founded as an academic discipline in 1956 +- Machine learning is a subset of AI... + +> quit +๐Ÿ‘‹ Thanks for exploring LangChain chains! +``` + +--- + +## ๐ŸŒ Option 2: Ray Serve API Version (`langchain_llamastack_ray.py`) + +### Features +- โœ… RESTful HTTP API +- โœ… Persistent service (runs indefinitely) +- โœ… Multiple endpoints for different operations +- โœ… JSON request/response format +- โœ… Concurrent request handling + +### How to Run +```bash +# Activate environment +source llama-env-py312/bin/activate + +# Start the Ray Serve API +cd /home/omara/langchain_llamastack +python langchain_llamastack_ray.py +``` + +### Service Endpoints + +| Method | Endpoint | Description | Parameters | +|--------|----------|-------------|------------| +| GET | `/` | Service status | None | +| POST | `/process` | Process document | `{"source": "url_or_path"}` | +| POST | `/ask` | Ask question | `{"question": "text", "source": "optional"}` | +| GET | `/summary` | Get summary | `?source=url` (optional) | +| GET | `/facts` | Get facts | `?source=url` (optional) | +| GET | `/docs` | List documents | None | + +### API Usage Examples + +#### Using curl: +```bash +# Check service status +curl http://localhost:8000/ + +# Process a document +curl -X POST http://localhost:8000/process \ + -H 'Content-Type: application/json' \ + -d '{"source": "https://en.wikipedia.org/wiki/Machine_learning"}' + +# Ask a question +curl -X POST http://localhost:8000/ask \ + -H 'Content-Type: application/json' \ + -d '{"question": "What is machine learning?"}' + +# Get summary +curl http://localhost:8000/summary + +# Get facts +curl http://localhost:8000/facts + +# List all processed documents +curl http://localhost:8000/docs +``` + +#### Using Python requests: +```python +import requests + +# Process a document +response = requests.post( + "http://localhost:8000/process", + json={"source": "https://en.wikipedia.org/wiki/Deep_learning"} +) +print(response.json()) + +# Ask a question +response = requests.post( + "http://localhost:8000/ask", + json={"question": "What are neural networks?"} +) +print(response.json()) + +# Get facts +response = requests.get("http://localhost:8000/facts") +print(response.json()) +``` + +--- + +## ๐Ÿ”ง Configuration + +### Model Configuration +Both versions use these models by default: +- **Model ID**: `llama3.2:3b` +- **Llama Stack URL**: `http://localhost:8321/` + +To change the model, edit the `model_id` parameter in the respective files. + +### Supported Document Types +- โœ… **URLs**: Any web page (extracted using readability) +- โœ… **PDF files**: Local or remote PDF documents +- โŒ Plain text files (can be added if needed) + +--- + +## ๐Ÿ› ๏ธ Troubleshooting + +### Common Issues + +#### 1. Connection Refused to Llama Stack +**Error**: `Connection refused to http://localhost:8321/` +**Solution**: +- Ensure Llama Stack server is running +- Check if port 8321 is correct +- Verify network connectivity + +#### 2. Model Not Found +**Error**: `Model not found: llama3.2:3b` +**Solution**: +- Check available models: `curl http://localhost:8321/models/list` +- Update `model_id` in the code to match available models + +#### 3. Ray Serve Port Already in Use +**Error**: `Port 8000 already in use` +**Solution**: +```bash +# Kill process using port 8000 +lsof -ti :8000 | xargs kill -9 + +# Or use a different port by modifying the code +``` + +#### 4. Missing Dependencies +**Error**: `ModuleNotFoundError: No module named 'ray'` +**Solution**: +```bash +pip install ray[serve] starlette +``` + +### Debug Mode +To enable verbose logging, add this to the beginning of either file: +```python +import logging +logging.basicConfig(level=logging.DEBUG) +``` + +--- + +## ๐Ÿ“Š Performance Notes + +### CLI Version +- **Pros**: Simple to use, interactive, good for testing +- **Cons**: Single-threaded, session-based only +- **Best for**: Development, testing, manual document analysis + +### Ray Serve Version +- **Pros**: Concurrent requests, persistent service, API integration +- **Cons**: More complex setup, requires Ray +- **Best for**: Production, integration with other services, high throughput + +--- + +## ๐Ÿ›‘ Stopping Services + +### CLI Version +- Press `Ctrl+C` or type `quit` in the interactive prompt + +### Ray Serve Version +- Press `Ctrl+C` in the terminal running the service +- The service will gracefully shutdown and clean up resources + +--- + +## ๐Ÿ“ Examples + +### CLI Workflow +1. Start: `python langchain_llamastack_updated.py` +2. Load document: `load https://arxiv.org/pdf/2103.00020.pdf` +3. Get summary: `summary` +4. Ask questions: `ask What are the main contributions?` +5. Exit: `quit` + +### API Workflow +1. Start: `python langchain_llamastack_ray.py` +2. Process: `curl -X POST http://localhost:8000/process -d '{"source": "https://example.com"}'` +3. Query: `curl -X POST http://localhost:8000/ask -d '{"question": "What is this about?"}'` +4. Stop: `Ctrl+C` + +--- + +## ๐Ÿค Contributing + +To extend functionality: +1. Add new prompt templates for different analysis types +2. Support additional document formats +3. Add caching for processed documents +4. Implement user authentication for API version + +--- + +## ๐Ÿ“œ License + +This project is for educational and research purposes. diff --git a/docs/notebooks/langChain/langchain_llamastack.py b/docs/notebooks/langChain/langchain_llamastack.py new file mode 100644 index 000000000..2f8301ea9 --- /dev/null +++ b/docs/notebooks/langChain/langchain_llamastack.py @@ -0,0 +1,290 @@ +import os +import re +import html +import requests +from bs4 import BeautifulSoup +from readability import Document as ReadabilityDocument +from markdownify import markdownify +from langchain_community.document_loaders import PyPDFLoader, TextLoader +import tempfile + +from llama_stack_client import LlamaStackClient + +from langchain_core.language_models.llms import LLM +from typing import Optional, List, Any +from langchain.chains import LLMChain +from langchain_core.prompts import PromptTemplate +from rich.pretty import pprint + +# Global variables +client = None +llm = None +summary_chain = None +facts_chain = None +qa_chain = None +processed_docs = {} + +# Prompt Templates (defined globally) +summary_template = PromptTemplate( + input_variables=["document"], + template="""Create a concise summary of this document in 5-10 sentences: + +{document} + +SUMMARY:""" +) + +facts_template = PromptTemplate( + input_variables=["document"], + template="""Extract the most important facts from this document. List them as bullet points: + +{document} + +KEY FACTS: +-""" +) + +qa_template = PromptTemplate( + input_variables=["document", "question"], + template="""Based on the following document, answer the question. If the answer isn't in the document, say so. + +DOCUMENT: +{document} + +QUESTION: {question} + +ANSWER:""" +) + +class LlamaStackLLM(LLM): + """Simple LangChain wrapper for Llama Stack""" + + # Pydantic model fields + client: Any = None + #model_id: str = "meta-llama/Llama-4-Maverick-17B-128E-Instruct" + # model_id: str = "meta-llama/Llama-3.3-70B-Instruct" + model_id: str = "llama3:70b-instruct" + + def __init__(self, client, model_id: str = "llama3:70b-instruct"): + # Initialize with field values + super().__init__(client=client, model_id=model_id) + + def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs) -> str: + """Make inference call to Llama Stack""" + response = self.client.inference.chat_completion( + model_id=self.model_id, + messages=[{"role": "user", "content": prompt}] + ) + return response.completion_message.content + + @property + def _llm_type(self) -> str: + return "llama_stack" + + +def load_document(source: str) -> str: + is_url = source.startswith(('http://', 'https://')) + is_pdf = source.lower().endswith('.pdf') + if is_pdf: + return load_pdf(source, is_url=is_url) + elif is_url: + return load_from_url(source) + else: + raise ValueError(f"Unsupported format. Use URLs or PDF files.") + + +def load_pdf(source: str, is_url: bool = False) -> str: + if is_url: + response = requests.get(source) + response.raise_for_status() + with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: + temp_file.write(response.content) + file_path = temp_file.name + else: + file_path = source + try: + loader = PyPDFLoader(file_path) + docs = loader.load() + return "\\n\\n".join([doc.page_content for doc in docs]) + finally: + if is_url: + os.remove(file_path) + + +def load_from_url(url: str) -> str: + headers = {'User-Agent': 'Mozilla/5.0 (compatible; DocumentLoader/1.0)'} + response = requests.get(url, headers=headers, timeout=15) + response.raise_for_status() + doc = ReadabilityDocument(response.text) + html_main = doc.summary(html_partial=True) + soup = BeautifulSoup(html_main, "html.parser") + for tag in soup(["script", "style", "noscript", "header", "footer", "nav", "aside"]): + tag.decompose() + md_text = markdownify(str(soup), heading_style="ATX") + md_text = html.unescape(md_text) + md_text = re.sub(r"\n{3,}", "\n\n", md_text).strip() + return md_text + +def process_document(source: str): + global summary_chain, facts_chain, processed_docs + + print(f"๐Ÿ“„ Loading document from: {source}") + document = load_document(source) + print(f"โœ… Loaded {len(document):,} characters") + print("\n๐Ÿ“ Generating summary...") + summary = summary_chain.invoke({"document": document})["text"] + print("Summary generated") + print("๐Ÿ” Extracting key facts...") + facts = facts_chain.invoke({"document": document})["text"] + processed_docs[source] = { + "document": document, + "summary": summary, + "facts": facts + } + print(f"\nโœ… Processing complete!") + print(f"๐Ÿ“Š Document: {len(document):,} chars") + print(f"๐Ÿ“ Summary: {summary[:100]}...") + print(f"๐Ÿ” Facts: {facts[:1000]}...") + return processed_docs[source] + +def ask_question(question: str, source: str = None): + """Answer questions about processed documents""" + global qa_chain, processed_docs + + if not processed_docs: + return "No documents processed yet. Use process_document() first." + if source and source in processed_docs: + doc_data = processed_docs[source] + else: + # Use the most recent document + doc_data = list(processed_docs.values())[-1] + answer = qa_chain.invoke({ + "document": doc_data["document"], + "question": question + })["text"] + return answer + + +def interactive_demo(): + print("\n๐ŸŽฏ Interactive Document Processing Demo") + print("Commands:") + print(" load - Process a document") + print(" ask - Ask about the document") + print(" summary - Show document summary") + print(" facts - Show extracted facts") + print(" help - Show commands") + print(" quit - Exit demo") + + while True: + try: + command = input("\n> ").strip() + if command.lower() in ['quit', 'exit']: + print("๐Ÿ‘‹ Thanks for exploring LangChain chains!") + break + elif command.lower() == 'help': + print("\nCommands:") + print(" load - Process a document") + print(" ask - Ask about the document") + print(" summary - Show document summary") + print(" facts - Show extracted facts") + elif command.startswith('load '): + source = command[5:].strip() + if source: + try: + process_document(source) + except Exception as e: + print(f"โŒ Error processing document: {e}") + else: + print("โ“ Please provide a URL or file path") + elif command.startswith('ask '): + question = command[4:].strip() + if question: + try: + answer = ask_question(question) + print(f"\n๐Ÿ’ฌ Q: {question}") + print(f"๐Ÿ“ A: {answer}") + except Exception as e: + print(f"โŒ Error: {e}") + else: + print("โ“ Please provide a question") + elif command.lower() == 'summary': + if processed_docs: + latest_doc = list(processed_docs.values())[-1] + print(f"\n๐Ÿ“ Summary:\n{latest_doc['summary']}") + else: + print("โ“ No documents processed yet") + elif command.lower() == 'facts': + if processed_docs: + latest_doc = list(processed_docs.values())[-1] + print(f"\n๐Ÿ” Key Facts:\n{latest_doc['facts']}") + else: + print("โ“ No documents processed yet") + else: + print("โ“ Unknown command. Type 'help' for options") + except (EOFError, KeyboardInterrupt): + print("\n๐Ÿ‘‹ Goodbye!") + break + + +def main(): + global client, llm, summary_chain, facts_chain, qa_chain, processed_docs + + print("๐Ÿš€ Starting LangChain + Llama Stack Document Processing Demo") + + client = LlamaStackClient( + base_url="http://localhost:8321/", + ) + + # Initialize the LangChain-compatible LLM + llm = LlamaStackLLM(client) + + # Test the wrapper + test_response = llm.invoke("Can you help me with the document processing?") + print(f"โœ… LangChain wrapper working!") + print(f"Response: {test_response[:100]}...") + + print("Available models:") + for m in client.models.list(): + print(f"- {m.identifier}") + + print("----") + print("Available shields (safety models):") + for s in client.shields.list(): + print(s.identifier) + print("----") + + # model_id = "llama3.2:3b" + model_id = "ollama/llama3:70b-instruct" + + response = client.inference.chat_completion( + model_id=model_id, + messages=[ + {"role": "system", "content": "You are a friendly assistant."}, + {"role": "user", "content": "Write a two-sentence poem about llama."}, + ], + ) + + print(response.completion_message.content) + + # Create chains by combining our LLM with prompt templates + summary_chain = LLMChain(llm=llm, prompt=summary_template) + facts_chain = LLMChain(llm=llm, prompt=facts_template) + qa_chain = LLMChain(llm=llm, prompt=qa_template) + + # Initialize storage for processed documents + processed_docs = {} + + print("โœ… Created 3 prompt templates:") + print(" โ€ข Summary: Condenses documents into key points") + print(" โ€ข Facts: Extracts important information as bullets") + print(" โ€ข Q&A: Answers questions based on document content") + + # Test template formatting + test_prompt = summary_template.format(document="This is a sample document about AI...") + print(f"\n๐Ÿ“ Example prompt: {len(test_prompt)} characters") + + # Start the interactive demo + interactive_demo() + +if __name__ == "__main__": + main() diff --git a/docs/notebooks/langChain/langchain_llamastack_ray.py b/docs/notebooks/langChain/langchain_llamastack_ray.py new file mode 100644 index 000000000..7ef42dfd7 --- /dev/null +++ b/docs/notebooks/langChain/langchain_llamastack_ray.py @@ -0,0 +1,403 @@ +import os +import re +import html +import json +import time +import requests +from bs4 import BeautifulSoup +from readability import Document as ReadabilityDocument +from markdownify import markdownify +from langchain_community.document_loaders import PyPDFLoader, TextLoader +import tempfile + +from llama_stack_client import LlamaStackClient +from langchain_core.language_models.llms import LLM +from typing import Optional, List, Any, Dict +from langchain.chains import LLMChain +from langchain_core.prompts import PromptTemplate + +from starlette.requests import Request +from ray import serve + +# Prompt Templates (defined globally) +summary_template = PromptTemplate( + input_variables=["document"], + template="""Create a concise summary of this document in 5-10 sentences: + +{document} + +SUMMARY:""" +) + +facts_template = PromptTemplate( + input_variables=["document"], + template="""Extract the most important facts from this document. List them as bullet points: + +{document} + +KEY FACTS: +-""" +) + +qa_template = PromptTemplate( + input_variables=["document", "question"], + template="""Based on the following document, answer the question. If the answer isn't in the document, say so. + +DOCUMENT: +{document} + +QUESTION: {question} + +ANSWER:""" +) + +class LlamaStackLLM(LLM): + """Simple LangChain wrapper for Llama Stack""" + + # Pydantic model fields + client: Any = None + model_id: str = "llama3.2:3b" + + def __init__(self, client, model_id: str = "llama3.2:3b"): + # Initialize with field values + super().__init__(client=client, model_id=model_id) + + def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs) -> str: + """Make inference call to Llama Stack""" + response = self.client.inference.chat_completion( + model_id=self.model_id, + messages=[{"role": "user", "content": prompt}] + ) + return response.completion_message.content + + @property + def _llm_type(self) -> str: + return "llama_stack" + + +def load_document(source: str) -> str: + is_url = source.startswith(('http://', 'https://')) + is_pdf = source.lower().endswith('.pdf') + if is_pdf: + return load_pdf(source, is_url=is_url) + elif is_url: + return load_from_url(source) + else: + raise ValueError(f"Unsupported format. Use URLs or PDF files.") + + +def load_pdf(source: str, is_url: bool = False) -> str: + if is_url: + response = requests.get(source) + response.raise_for_status() + with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: + temp_file.write(response.content) + file_path = temp_file.name + else: + file_path = source + try: + loader = PyPDFLoader(file_path) + docs = loader.load() + return "\\n\\n".join([doc.page_content for doc in docs]) + finally: + if is_url: + os.remove(file_path) + + +def load_from_url(url: str) -> str: + headers = {'User-Agent': 'Mozilla/5.0 (compatible; DocumentLoader/1.0)'} + response = requests.get(url, headers=headers, timeout=15) + response.raise_for_status() + doc = ReadabilityDocument(response.text) + html_main = doc.summary(html_partial=True) + soup = BeautifulSoup(html_main, "html.parser") + for tag in soup(["script", "style", "noscript", "header", "footer", "nav", "aside"]): + tag.decompose() + md_text = markdownify(str(soup), heading_style="ATX") + md_text = html.unescape(md_text) + md_text = re.sub(r"\n{3,}", "\n\n", md_text).strip() + return md_text + + +@serve.deployment +class LangChainLlamaStackService: + """Ray Serve deployment for LangChain + Llama Stack document processing""" + + def __init__(self): + print("๐Ÿš€ Initializing LangChain + Llama Stack Service...") + + # Initialize Llama Stack client + self.client = LlamaStackClient(base_url="http://localhost:8321/") + + # Initialize LangChain-compatible LLM + self.llm = LlamaStackLLM(self.client) + + # Create processing chains + self.summary_chain = LLMChain(llm=self.llm, prompt=summary_template) + self.facts_chain = LLMChain(llm=self.llm, prompt=facts_template) + self.qa_chain = LLMChain(llm=self.llm, prompt=qa_template) + + # Storage for processed documents + self.processed_docs = {} + + print("โœ… Service initialized successfully!") + + async def __call__(self, request: Request) -> Dict: + """Handle HTTP requests to different endpoints""" + path = request.url.path + method = request.method + + try: + if path == "/" and method == "GET": + return await self._handle_status() + elif path == "/process" and method == "POST": + return await self._handle_process(request) + elif path == "/ask" and method == "POST": + return await self._handle_ask(request) + elif path == "/summary" and method == "GET": + return await self._handle_summary(request) + elif path == "/facts" and method == "GET": + return await self._handle_facts(request) + elif path == "/docs" and method == "GET": + return await self._handle_list_docs() + else: + return { + "error": "Not found", + "available_endpoints": { + "GET /": "Service status", + "POST /process": "Process document (body: {\"source\": \"url_or_path\"})", + "POST /ask": "Ask question (body: {\"question\": \"your_question\", \"source\": \"optional_doc_id\"})", + "GET /summary?source=doc_id": "Get document summary", + "GET /facts?source=doc_id": "Get document facts", + "GET /docs": "List processed documents" + } + } + except Exception as e: + return {"error": str(e)} + + async def _handle_status(self) -> Dict: + """Return service status""" + return { + "status": "healthy", + "service": "LangChain + Llama Stack Document Processing", + "documents_processed": len(self.processed_docs), + "available_models": [m.identifier for m in self.client.models.list()], + "endpoints": ["/", "/process", "/ask", "/summary", "/facts", "/docs"] + } + + async def _handle_process(self, request: Request) -> Dict: + """Process a document from URL or file path""" + body = await request.json() + source = body.get("source") + + if not source: + return {"error": "Missing 'source' in request body"} + + try: + # Load document + document = load_document(source) + + # Generate summary and facts + summary = self.summary_chain.invoke({"document": document})["text"] + facts = self.facts_chain.invoke({"document": document})["text"] + + # Store processed document + self.processed_docs[source] = { + "document": document, + "summary": summary, + "facts": facts, + "processed_at": time.time() + } + + return { + "success": True, + "source": source, + "document_length": len(document), + "summary_preview": summary[:200] + "..." if len(summary) > 200 else summary, + "facts_preview": facts[:300] + "..." if len(facts) > 300 else facts + } + + except Exception as e: + return {"error": f"Failed to process document: {str(e)}"} + + async def _handle_ask(self, request: Request) -> Dict: + """Answer questions about processed documents""" + body = await request.json() + question = body.get("question") + source = body.get("source") + + if not question: + return {"error": "Missing 'question' in request body"} + + if not self.processed_docs: + return {"error": "No documents processed yet. Use /process endpoint first."} + + try: + # Select document + if source and source in self.processed_docs: + doc_data = self.processed_docs[source] + else: + # Use the most recent document + doc_data = list(self.processed_docs.values())[-1] + source = list(self.processed_docs.keys())[-1] + + # Generate answer + answer = self.qa_chain.invoke({ + "document": doc_data["document"], + "question": question + })["text"] + + return { + "question": question, + "answer": answer, + "source": source + } + + except Exception as e: + return {"error": f"Failed to answer question: {str(e)}"} + + async def _handle_summary(self, request: Request) -> Dict: + """Get summary of a processed document""" + source = request.query_params.get("source") + + if not self.processed_docs: + return {"error": "No documents processed yet"} + + if source and source in self.processed_docs: + doc_data = self.processed_docs[source] + else: + # Use the most recent document + doc_data = list(self.processed_docs.values())[-1] + source = list(self.processed_docs.keys())[-1] + + return { + "source": source, + "summary": doc_data["summary"] + } + + async def _handle_facts(self, request: Request) -> Dict: + """Get facts from a processed document""" + source = request.query_params.get("source") + + if not self.processed_docs: + return {"error": "No documents processed yet"} + + if source and source in self.processed_docs: + doc_data = self.processed_docs[source] + else: + # Use the most recent document + doc_data = list(self.processed_docs.values())[-1] + source = list(self.processed_docs.keys())[-1] + + return { + "source": source, + "facts": doc_data["facts"] + } + + async def _handle_list_docs(self) -> Dict: + """List all processed documents""" + docs_info = [] + for source, data in self.processed_docs.items(): + docs_info.append({ + "source": source, + "document_length": len(data["document"]), + "processed_at": data["processed_at"], + "summary_preview": data["summary"][:100] + "..." if len(data["summary"]) > 100 else data["summary"] + }) + + return { + "processed_documents": docs_info, + "total_count": len(self.processed_docs) + } + + +def main(): + """Main function to start the Ray Serve application""" + + # Create the application + app = LangChainLlamaStackService.bind() + + # Deploy the application locally + print("๐Ÿš€ Starting LangChain + Llama Stack Ray Serve application...") + serve.run(app, route_prefix="/") + + # Wait for service to initialize + print("โณ Waiting for service to initialize...") + time.sleep(5) + + # Test the service + try: + response = requests.get("http://localhost:8000/") + print(f"โœ… Service response: {response.json()}") + print("๐ŸŽ‰ Service is running successfully!") + except Exception as e: + print(f"โš ๏ธ Could not test service: {e}") + print(" Service might still be starting up...") + + # Show service information + print("\n" + "="*60) + print("๐ŸŒ LangChain + Llama Stack Service is running on:") + print(" http://localhost:8000/") + print("="*60) + print("๐Ÿ“‹ Available endpoints:") + print(" GET / - Service status") + print(" POST /process - Process document") + print(" POST /ask - Ask questions") + print(" GET /summary - Get document summary") + print(" GET /facts - Get document facts") + print(" GET /docs - List processed documents") + print("="*60) + print("๐Ÿงช Example requests:") + print(" # Process a document:") + print(" curl -X POST http://localhost:8000/process \\") + print(" -H 'Content-Type: application/json' \\") + print(" -d '{\"source\": \"https://example.com/article\"}'") + print("") + print(" # Ask a question:") + print(" curl -X POST http://localhost:8000/ask \\") + print(" -H 'Content-Type: application/json' \\") + print(" -d '{\"question\": \"What is the main topic?\"}'") + print("") + print(" # Get summary:") + print(" curl http://localhost:8000/summary") + print("="*60) + print("๐Ÿ›‘ Press Ctrl+C to stop the service...") + + try: + # Keep the service alive + while True: + time.sleep(1) + except KeyboardInterrupt: + print("\n๐Ÿ›‘ Stopping service...") + serve.shutdown() + print("๐Ÿ‘‹ Service stopped successfully!") + +if __name__ == "__main__": + main() + + + + + + + + +# import requests + +# # Step 1: First, process/load the document +# process_response = requests.post( +# "http://localhost:8000/process", +# json={"source": "https://en.wikipedia.org/wiki/What%27s_Happening!!"} +# ) +# print("Processing result:", process_response.json()) + +# # Step 2: Then get the facts +# facts_response = requests.get("http://localhost:8000/facts") +# print("Facts:", facts_response.json()) + +# # Or get facts for specific document +# facts_response = requests.get( +# "http://localhost:8000/facts", +# params={"source": "https://en.wikipedia.org/wiki/What%27s_Happening!!"} +# ) +# print("Facts for specific doc:", facts_response.json()) From 7d2ccd1d9f5b4e2d3865ffd26773d87359ca86e0 Mon Sep 17 00:00:00 2001 From: Omar Abdelwahab Date: Thu, 21 Aug 2025 18:59:02 -0700 Subject: [PATCH 02/14] Delete docs/notebooks/langChain/langchain_llamastack_ray.py Removed ray example --- .../langChain/langchain_llamastack_ray.py | 403 ------------------ 1 file changed, 403 deletions(-) delete mode 100644 docs/notebooks/langChain/langchain_llamastack_ray.py diff --git a/docs/notebooks/langChain/langchain_llamastack_ray.py b/docs/notebooks/langChain/langchain_llamastack_ray.py deleted file mode 100644 index 7ef42dfd7..000000000 --- a/docs/notebooks/langChain/langchain_llamastack_ray.py +++ /dev/null @@ -1,403 +0,0 @@ -import os -import re -import html -import json -import time -import requests -from bs4 import BeautifulSoup -from readability import Document as ReadabilityDocument -from markdownify import markdownify -from langchain_community.document_loaders import PyPDFLoader, TextLoader -import tempfile - -from llama_stack_client import LlamaStackClient -from langchain_core.language_models.llms import LLM -from typing import Optional, List, Any, Dict -from langchain.chains import LLMChain -from langchain_core.prompts import PromptTemplate - -from starlette.requests import Request -from ray import serve - -# Prompt Templates (defined globally) -summary_template = PromptTemplate( - input_variables=["document"], - template="""Create a concise summary of this document in 5-10 sentences: - -{document} - -SUMMARY:""" -) - -facts_template = PromptTemplate( - input_variables=["document"], - template="""Extract the most important facts from this document. List them as bullet points: - -{document} - -KEY FACTS: --""" -) - -qa_template = PromptTemplate( - input_variables=["document", "question"], - template="""Based on the following document, answer the question. If the answer isn't in the document, say so. - -DOCUMENT: -{document} - -QUESTION: {question} - -ANSWER:""" -) - -class LlamaStackLLM(LLM): - """Simple LangChain wrapper for Llama Stack""" - - # Pydantic model fields - client: Any = None - model_id: str = "llama3.2:3b" - - def __init__(self, client, model_id: str = "llama3.2:3b"): - # Initialize with field values - super().__init__(client=client, model_id=model_id) - - def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs) -> str: - """Make inference call to Llama Stack""" - response = self.client.inference.chat_completion( - model_id=self.model_id, - messages=[{"role": "user", "content": prompt}] - ) - return response.completion_message.content - - @property - def _llm_type(self) -> str: - return "llama_stack" - - -def load_document(source: str) -> str: - is_url = source.startswith(('http://', 'https://')) - is_pdf = source.lower().endswith('.pdf') - if is_pdf: - return load_pdf(source, is_url=is_url) - elif is_url: - return load_from_url(source) - else: - raise ValueError(f"Unsupported format. Use URLs or PDF files.") - - -def load_pdf(source: str, is_url: bool = False) -> str: - if is_url: - response = requests.get(source) - response.raise_for_status() - with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: - temp_file.write(response.content) - file_path = temp_file.name - else: - file_path = source - try: - loader = PyPDFLoader(file_path) - docs = loader.load() - return "\\n\\n".join([doc.page_content for doc in docs]) - finally: - if is_url: - os.remove(file_path) - - -def load_from_url(url: str) -> str: - headers = {'User-Agent': 'Mozilla/5.0 (compatible; DocumentLoader/1.0)'} - response = requests.get(url, headers=headers, timeout=15) - response.raise_for_status() - doc = ReadabilityDocument(response.text) - html_main = doc.summary(html_partial=True) - soup = BeautifulSoup(html_main, "html.parser") - for tag in soup(["script", "style", "noscript", "header", "footer", "nav", "aside"]): - tag.decompose() - md_text = markdownify(str(soup), heading_style="ATX") - md_text = html.unescape(md_text) - md_text = re.sub(r"\n{3,}", "\n\n", md_text).strip() - return md_text - - -@serve.deployment -class LangChainLlamaStackService: - """Ray Serve deployment for LangChain + Llama Stack document processing""" - - def __init__(self): - print("๐Ÿš€ Initializing LangChain + Llama Stack Service...") - - # Initialize Llama Stack client - self.client = LlamaStackClient(base_url="http://localhost:8321/") - - # Initialize LangChain-compatible LLM - self.llm = LlamaStackLLM(self.client) - - # Create processing chains - self.summary_chain = LLMChain(llm=self.llm, prompt=summary_template) - self.facts_chain = LLMChain(llm=self.llm, prompt=facts_template) - self.qa_chain = LLMChain(llm=self.llm, prompt=qa_template) - - # Storage for processed documents - self.processed_docs = {} - - print("โœ… Service initialized successfully!") - - async def __call__(self, request: Request) -> Dict: - """Handle HTTP requests to different endpoints""" - path = request.url.path - method = request.method - - try: - if path == "/" and method == "GET": - return await self._handle_status() - elif path == "/process" and method == "POST": - return await self._handle_process(request) - elif path == "/ask" and method == "POST": - return await self._handle_ask(request) - elif path == "/summary" and method == "GET": - return await self._handle_summary(request) - elif path == "/facts" and method == "GET": - return await self._handle_facts(request) - elif path == "/docs" and method == "GET": - return await self._handle_list_docs() - else: - return { - "error": "Not found", - "available_endpoints": { - "GET /": "Service status", - "POST /process": "Process document (body: {\"source\": \"url_or_path\"})", - "POST /ask": "Ask question (body: {\"question\": \"your_question\", \"source\": \"optional_doc_id\"})", - "GET /summary?source=doc_id": "Get document summary", - "GET /facts?source=doc_id": "Get document facts", - "GET /docs": "List processed documents" - } - } - except Exception as e: - return {"error": str(e)} - - async def _handle_status(self) -> Dict: - """Return service status""" - return { - "status": "healthy", - "service": "LangChain + Llama Stack Document Processing", - "documents_processed": len(self.processed_docs), - "available_models": [m.identifier for m in self.client.models.list()], - "endpoints": ["/", "/process", "/ask", "/summary", "/facts", "/docs"] - } - - async def _handle_process(self, request: Request) -> Dict: - """Process a document from URL or file path""" - body = await request.json() - source = body.get("source") - - if not source: - return {"error": "Missing 'source' in request body"} - - try: - # Load document - document = load_document(source) - - # Generate summary and facts - summary = self.summary_chain.invoke({"document": document})["text"] - facts = self.facts_chain.invoke({"document": document})["text"] - - # Store processed document - self.processed_docs[source] = { - "document": document, - "summary": summary, - "facts": facts, - "processed_at": time.time() - } - - return { - "success": True, - "source": source, - "document_length": len(document), - "summary_preview": summary[:200] + "..." if len(summary) > 200 else summary, - "facts_preview": facts[:300] + "..." if len(facts) > 300 else facts - } - - except Exception as e: - return {"error": f"Failed to process document: {str(e)}"} - - async def _handle_ask(self, request: Request) -> Dict: - """Answer questions about processed documents""" - body = await request.json() - question = body.get("question") - source = body.get("source") - - if not question: - return {"error": "Missing 'question' in request body"} - - if not self.processed_docs: - return {"error": "No documents processed yet. Use /process endpoint first."} - - try: - # Select document - if source and source in self.processed_docs: - doc_data = self.processed_docs[source] - else: - # Use the most recent document - doc_data = list(self.processed_docs.values())[-1] - source = list(self.processed_docs.keys())[-1] - - # Generate answer - answer = self.qa_chain.invoke({ - "document": doc_data["document"], - "question": question - })["text"] - - return { - "question": question, - "answer": answer, - "source": source - } - - except Exception as e: - return {"error": f"Failed to answer question: {str(e)}"} - - async def _handle_summary(self, request: Request) -> Dict: - """Get summary of a processed document""" - source = request.query_params.get("source") - - if not self.processed_docs: - return {"error": "No documents processed yet"} - - if source and source in self.processed_docs: - doc_data = self.processed_docs[source] - else: - # Use the most recent document - doc_data = list(self.processed_docs.values())[-1] - source = list(self.processed_docs.keys())[-1] - - return { - "source": source, - "summary": doc_data["summary"] - } - - async def _handle_facts(self, request: Request) -> Dict: - """Get facts from a processed document""" - source = request.query_params.get("source") - - if not self.processed_docs: - return {"error": "No documents processed yet"} - - if source and source in self.processed_docs: - doc_data = self.processed_docs[source] - else: - # Use the most recent document - doc_data = list(self.processed_docs.values())[-1] - source = list(self.processed_docs.keys())[-1] - - return { - "source": source, - "facts": doc_data["facts"] - } - - async def _handle_list_docs(self) -> Dict: - """List all processed documents""" - docs_info = [] - for source, data in self.processed_docs.items(): - docs_info.append({ - "source": source, - "document_length": len(data["document"]), - "processed_at": data["processed_at"], - "summary_preview": data["summary"][:100] + "..." if len(data["summary"]) > 100 else data["summary"] - }) - - return { - "processed_documents": docs_info, - "total_count": len(self.processed_docs) - } - - -def main(): - """Main function to start the Ray Serve application""" - - # Create the application - app = LangChainLlamaStackService.bind() - - # Deploy the application locally - print("๐Ÿš€ Starting LangChain + Llama Stack Ray Serve application...") - serve.run(app, route_prefix="/") - - # Wait for service to initialize - print("โณ Waiting for service to initialize...") - time.sleep(5) - - # Test the service - try: - response = requests.get("http://localhost:8000/") - print(f"โœ… Service response: {response.json()}") - print("๐ŸŽ‰ Service is running successfully!") - except Exception as e: - print(f"โš ๏ธ Could not test service: {e}") - print(" Service might still be starting up...") - - # Show service information - print("\n" + "="*60) - print("๐ŸŒ LangChain + Llama Stack Service is running on:") - print(" http://localhost:8000/") - print("="*60) - print("๐Ÿ“‹ Available endpoints:") - print(" GET / - Service status") - print(" POST /process - Process document") - print(" POST /ask - Ask questions") - print(" GET /summary - Get document summary") - print(" GET /facts - Get document facts") - print(" GET /docs - List processed documents") - print("="*60) - print("๐Ÿงช Example requests:") - print(" # Process a document:") - print(" curl -X POST http://localhost:8000/process \\") - print(" -H 'Content-Type: application/json' \\") - print(" -d '{\"source\": \"https://example.com/article\"}'") - print("") - print(" # Ask a question:") - print(" curl -X POST http://localhost:8000/ask \\") - print(" -H 'Content-Type: application/json' \\") - print(" -d '{\"question\": \"What is the main topic?\"}'") - print("") - print(" # Get summary:") - print(" curl http://localhost:8000/summary") - print("="*60) - print("๐Ÿ›‘ Press Ctrl+C to stop the service...") - - try: - # Keep the service alive - while True: - time.sleep(1) - except KeyboardInterrupt: - print("\n๐Ÿ›‘ Stopping service...") - serve.shutdown() - print("๐Ÿ‘‹ Service stopped successfully!") - -if __name__ == "__main__": - main() - - - - - - - - -# import requests - -# # Step 1: First, process/load the document -# process_response = requests.post( -# "http://localhost:8000/process", -# json={"source": "https://en.wikipedia.org/wiki/What%27s_Happening!!"} -# ) -# print("Processing result:", process_response.json()) - -# # Step 2: Then get the facts -# facts_response = requests.get("http://localhost:8000/facts") -# print("Facts:", facts_response.json()) - -# # Or get facts for specific document -# facts_response = requests.get( -# "http://localhost:8000/facts", -# params={"source": "https://en.wikipedia.org/wiki/What%27s_Happening!!"} -# ) -# print("Facts for specific doc:", facts_response.json()) From 4969953852c70fc3ff2840862f71e197b13a7f0e Mon Sep 17 00:00:00 2001 From: Omar Abdelwahab Date: Thu, 21 Aug 2025 19:35:41 -0700 Subject: [PATCH 03/14] Updated the README and the name of the python script. Updated the README and the name of the python script to langchain-llama-stack --- docs/notebooks/langChain/README.md | 77 +------------------ ...llamastack.py => langchain-llama-stack.py} | 0 2 files changed, 2 insertions(+), 75 deletions(-) rename docs/notebooks/langChain/{langchain_llamastack.py => langchain-llama-stack.py} (100%) diff --git a/docs/notebooks/langChain/README.md b/docs/notebooks/langChain/README.md index f36a39ce9..dde6385d6 100644 --- a/docs/notebooks/langChain/README.md +++ b/docs/notebooks/langChain/README.md @@ -1,19 +1,12 @@ # LangChain + Llama Stack Document Processing -This repository contains two different implementations of document processing using LangChain and Llama Stack: - -1. **`langchain_llamastack.py`** - Interactive CLI version -2. **`langchain_llamastack_ray.py`** - Ray Serve API version - -Both versions provide AI-powered document processing capabilities including summarization, fact extraction, and question-answering. - +1. **`langchain-llamastack.py`** - Interactive CLI version --- ## ๐Ÿ“‹ Prerequisites ### System Requirements - Python 3.12+ -- Ray Serve (for API version) - Llama Stack server running on `http://localhost:8321/` - Ollama or compatible model server @@ -21,7 +14,6 @@ Both versions provide AI-powered document processing capabilities including summ ```bash pip install llama-stack-client langchain langchain-core langchain-community pip install beautifulsoup4 markdownify readability-lxml requests -pip install ray[serve] starlette # For Ray Serve version only ``` ### Environment Setup @@ -31,7 +23,7 @@ python3.12 -m venv llama-env-py312 source llama-env-py312/bin/activate # Install dependencies -pip install llama-stack-client langchain langchain-core langchain-community beautifulsoup4 markdownify readability-lxml requests ray[serve] starlette +pip install llama-stack-client langchain langchain-core langchain-community beautifulsoup4 markdownify readability-lxml requests ``` --- @@ -106,39 +98,6 @@ Artificial intelligence (AI) is the simulation of human intelligence... ๐Ÿ‘‹ Thanks for exploring LangChain chains! ``` ---- - -## ๐ŸŒ Option 2: Ray Serve API Version (`langchain_llamastack_ray.py`) - -### Features -- โœ… RESTful HTTP API -- โœ… Persistent service (runs indefinitely) -- โœ… Multiple endpoints for different operations -- โœ… JSON request/response format -- โœ… Concurrent request handling - -### How to Run -```bash -# Activate environment -source llama-env-py312/bin/activate - -# Start the Ray Serve API -cd /home/omara/langchain_llamastack -python langchain_llamastack_ray.py -``` - -### Service Endpoints - -| Method | Endpoint | Description | Parameters | -|--------|----------|-------------|------------| -| GET | `/` | Service status | None | -| POST | `/process` | Process document | `{"source": "url_or_path"}` | -| POST | `/ask` | Ask question | `{"question": "text", "source": "optional"}` | -| GET | `/summary` | Get summary | `?source=url` (optional) | -| GET | `/facts` | Get facts | `?source=url` (optional) | -| GET | `/docs` | List documents | None | - -### API Usage Examples #### Using curl: ```bash @@ -223,23 +182,8 @@ To change the model, edit the `model_id` parameter in the respective files. - Check available models: `curl http://localhost:8321/models/list` - Update `model_id` in the code to match available models -#### 3. Ray Serve Port Already in Use -**Error**: `Port 8000 already in use` -**Solution**: -```bash -# Kill process using port 8000 -lsof -ti :8000 | xargs kill -9 - -# Or use a different port by modifying the code -``` #### 4. Missing Dependencies -**Error**: `ModuleNotFoundError: No module named 'ray'` -**Solution**: -```bash -pip install ray[serve] starlette -``` - ### Debug Mode To enable verbose logging, add this to the beginning of either file: ```python @@ -255,23 +199,12 @@ logging.basicConfig(level=logging.DEBUG) - **Pros**: Simple to use, interactive, good for testing - **Cons**: Single-threaded, session-based only - **Best for**: Development, testing, manual document analysis - -### Ray Serve Version -- **Pros**: Concurrent requests, persistent service, API integration -- **Cons**: More complex setup, requires Ray -- **Best for**: Production, integration with other services, high throughput - --- ## ๐Ÿ›‘ Stopping Services ### CLI Version - Press `Ctrl+C` or type `quit` in the interactive prompt - -### Ray Serve Version -- Press `Ctrl+C` in the terminal running the service -- The service will gracefully shutdown and clean up resources - --- ## ๐Ÿ“ Examples @@ -283,12 +216,6 @@ logging.basicConfig(level=logging.DEBUG) 4. Ask questions: `ask What are the main contributions?` 5. Exit: `quit` -### API Workflow -1. Start: `python langchain_llamastack_ray.py` -2. Process: `curl -X POST http://localhost:8000/process -d '{"source": "https://example.com"}'` -3. Query: `curl -X POST http://localhost:8000/ask -d '{"question": "What is this about?"}'` -4. Stop: `Ctrl+C` - --- ## ๐Ÿค Contributing diff --git a/docs/notebooks/langChain/langchain_llamastack.py b/docs/notebooks/langChain/langchain-llama-stack.py similarity index 100% rename from docs/notebooks/langChain/langchain_llamastack.py rename to docs/notebooks/langChain/langchain-llama-stack.py From 17597808180cb550eba74cd774ab80ba2ad79f2a Mon Sep 17 00:00:00 2001 From: Omar Abdelwahab Date: Thu, 21 Aug 2025 19:37:47 -0700 Subject: [PATCH 04/14] Updated the README Added fixes to the README file. --- docs/notebooks/langChain/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/notebooks/langChain/README.md b/docs/notebooks/langChain/README.md index dde6385d6..a6dbd2266 100644 --- a/docs/notebooks/langChain/README.md +++ b/docs/notebooks/langChain/README.md @@ -1,6 +1,6 @@ # LangChain + Llama Stack Document Processing -1. **`langchain-llamastack.py`** - Interactive CLI version +1. **`langchain-llama-stack.py`** - Interactive CLI version --- ## ๐Ÿ“‹ Prerequisites @@ -39,7 +39,7 @@ llama stack run your-config --port 8321 --- -## ๐Ÿ“– Option 1: Interactive CLI Version (`langchain_llamastack_updated.py`) +## ๐Ÿ“– Option 1: Interactive CLI Version (`langchain-llama-stack.py`) ### Features - โœ… Interactive command-line interface @@ -55,7 +55,7 @@ source llama-env-py312/bin/activate # Run the interactive CLI cd /home/omara/langchain_llamastack -python langchain_llamastack_updated.py +python langchain-llama-stack.py ``` ### Usage Commands @@ -210,7 +210,7 @@ logging.basicConfig(level=logging.DEBUG) ## ๐Ÿ“ Examples ### CLI Workflow -1. Start: `python langchain_llamastack_updated.py` +1. Start: `python langchain-llama-stack.py` 2. Load document: `load https://arxiv.org/pdf/2103.00020.pdf` 3. Get summary: `summary` 4. Ask questions: `ask What are the main contributions?` From 534bf972a37354458c02e0a718f9c339427541ba Mon Sep 17 00:00:00 2001 From: Omar Abdelwahab Date: Thu, 21 Aug 2025 19:43:08 -0700 Subject: [PATCH 05/14] removed dead code removed some dead code --- docs/notebooks/langChain/langchain-llama-stack.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/notebooks/langChain/langchain-llama-stack.py b/docs/notebooks/langChain/langchain-llama-stack.py index 2f8301ea9..98aaa8d6c 100644 --- a/docs/notebooks/langChain/langchain-llama-stack.py +++ b/docs/notebooks/langChain/langchain-llama-stack.py @@ -61,8 +61,6 @@ class LlamaStackLLM(LLM): # Pydantic model fields client: Any = None - #model_id: str = "meta-llama/Llama-4-Maverick-17B-128E-Instruct" - # model_id: str = "meta-llama/Llama-3.3-70B-Instruct" model_id: str = "llama3:70b-instruct" def __init__(self, client, model_id: str = "llama3:70b-instruct"): From 84c960e9b508227bb5399acd740ecb3165283014 Mon Sep 17 00:00:00 2001 From: Omar Abdelwahab Date: Thu, 21 Aug 2025 19:50:50 -0700 Subject: [PATCH 06/14] Changing the langchain dir name Changing the langchain dir name --- docs/notebooks/langChain/README.md | 233 -------------- .../langChain/langchain-llama-stack.py | 288 ------------------ 2 files changed, 521 deletions(-) delete mode 100644 docs/notebooks/langChain/README.md delete mode 100644 docs/notebooks/langChain/langchain-llama-stack.py diff --git a/docs/notebooks/langChain/README.md b/docs/notebooks/langChain/README.md deleted file mode 100644 index a6dbd2266..000000000 --- a/docs/notebooks/langChain/README.md +++ /dev/null @@ -1,233 +0,0 @@ -# LangChain + Llama Stack Document Processing - -1. **`langchain-llama-stack.py`** - Interactive CLI version ---- - -## ๐Ÿ“‹ Prerequisites - -### System Requirements -- Python 3.12+ -- Llama Stack server running on `http://localhost:8321/` -- Ollama or compatible model server - -### Required Python Packages -```bash -pip install llama-stack-client langchain langchain-core langchain-community -pip install beautifulsoup4 markdownify readability-lxml requests -``` - -### Environment Setup -```bash -# Create and activate virtual environment -python3.12 -m venv llama-env-py312 -source llama-env-py312/bin/activate - -# Install dependencies -pip install llama-stack-client langchain langchain-core langchain-community beautifulsoup4 markdownify readability-lxml requests -``` - ---- - -## ๐Ÿš€ Quick Start - -### Start Llama Stack Server -Before running either version, ensure your Llama Stack server is running: -```bash -# Start Llama Stack server (example) -llama stack run your-config --port 8321 -``` - ---- - -## ๐Ÿ“– Option 1: Interactive CLI Version (`langchain-llama-stack.py`) - -### Features -- โœ… Interactive command-line interface -- โœ… Document loading from URLs and PDFs -- โœ… AI-powered summarization and fact extraction -- โœ… Question-answering based on document content -- โœ… Session-based document storage - -### How to Run -```bash -# Activate environment -source llama-env-py312/bin/activate - -# Run the interactive CLI -cd /home/omara/langchain_llamastack -python langchain-llama-stack.py -``` - -### Usage Commands -Once running, you can use these interactive commands: - -``` -๐ŸŽฏ Interactive Document Processing Demo -Commands: - load - Process a document - ask - Ask about the document - summary - Show document summary - facts - Show extracted facts - help - Show commands - quit - Exit demo -``` - -### Example Session -``` -> load https://en.wikipedia.org/wiki/Artificial_intelligence -๐Ÿ“„ Loading document from: https://en.wikipedia.org/wiki/Artificial_intelligence -โœ… Loaded 45,832 characters -๐Ÿ“ Generating summary... -๐Ÿ” Extracting key facts... -โœ… Processing complete! - -> summary -๐Ÿ“ Summary: -Artificial intelligence (AI) is the simulation of human intelligence... - -> ask What are the main types of AI? -๐Ÿ’ฌ Q: What are the main types of AI? -๐Ÿ“ A: Based on the document, the main types of AI include... - -> facts -๐Ÿ” Key Facts: -- AI was founded as an academic discipline in 1956 -- Machine learning is a subset of AI... - -> quit -๐Ÿ‘‹ Thanks for exploring LangChain chains! -``` - - -#### Using curl: -```bash -# Check service status -curl http://localhost:8000/ - -# Process a document -curl -X POST http://localhost:8000/process \ - -H 'Content-Type: application/json' \ - -d '{"source": "https://en.wikipedia.org/wiki/Machine_learning"}' - -# Ask a question -curl -X POST http://localhost:8000/ask \ - -H 'Content-Type: application/json' \ - -d '{"question": "What is machine learning?"}' - -# Get summary -curl http://localhost:8000/summary - -# Get facts -curl http://localhost:8000/facts - -# List all processed documents -curl http://localhost:8000/docs -``` - -#### Using Python requests: -```python -import requests - -# Process a document -response = requests.post( - "http://localhost:8000/process", - json={"source": "https://en.wikipedia.org/wiki/Deep_learning"} -) -print(response.json()) - -# Ask a question -response = requests.post( - "http://localhost:8000/ask", - json={"question": "What are neural networks?"} -) -print(response.json()) - -# Get facts -response = requests.get("http://localhost:8000/facts") -print(response.json()) -``` - ---- - -## ๐Ÿ”ง Configuration - -### Model Configuration -Both versions use these models by default: -- **Model ID**: `llama3.2:3b` -- **Llama Stack URL**: `http://localhost:8321/` - -To change the model, edit the `model_id` parameter in the respective files. - -### Supported Document Types -- โœ… **URLs**: Any web page (extracted using readability) -- โœ… **PDF files**: Local or remote PDF documents -- โŒ Plain text files (can be added if needed) - ---- - -## ๐Ÿ› ๏ธ Troubleshooting - -### Common Issues - -#### 1. Connection Refused to Llama Stack -**Error**: `Connection refused to http://localhost:8321/` -**Solution**: -- Ensure Llama Stack server is running -- Check if port 8321 is correct -- Verify network connectivity - -#### 2. Model Not Found -**Error**: `Model not found: llama3.2:3b` -**Solution**: -- Check available models: `curl http://localhost:8321/models/list` -- Update `model_id` in the code to match available models - - -#### 4. Missing Dependencies -### Debug Mode -To enable verbose logging, add this to the beginning of either file: -```python -import logging -logging.basicConfig(level=logging.DEBUG) -``` - ---- - -## ๐Ÿ“Š Performance Notes - -### CLI Version -- **Pros**: Simple to use, interactive, good for testing -- **Cons**: Single-threaded, session-based only -- **Best for**: Development, testing, manual document analysis ---- - -## ๐Ÿ›‘ Stopping Services - -### CLI Version -- Press `Ctrl+C` or type `quit` in the interactive prompt ---- - -## ๐Ÿ“ Examples - -### CLI Workflow -1. Start: `python langchain-llama-stack.py` -2. Load document: `load https://arxiv.org/pdf/2103.00020.pdf` -3. Get summary: `summary` -4. Ask questions: `ask What are the main contributions?` -5. Exit: `quit` - ---- - -## ๐Ÿค Contributing - -To extend functionality: -1. Add new prompt templates for different analysis types -2. Support additional document formats -3. Add caching for processed documents -4. Implement user authentication for API version - ---- - -## ๐Ÿ“œ License - -This project is for educational and research purposes. diff --git a/docs/notebooks/langChain/langchain-llama-stack.py b/docs/notebooks/langChain/langchain-llama-stack.py deleted file mode 100644 index 98aaa8d6c..000000000 --- a/docs/notebooks/langChain/langchain-llama-stack.py +++ /dev/null @@ -1,288 +0,0 @@ -import os -import re -import html -import requests -from bs4 import BeautifulSoup -from readability import Document as ReadabilityDocument -from markdownify import markdownify -from langchain_community.document_loaders import PyPDFLoader, TextLoader -import tempfile - -from llama_stack_client import LlamaStackClient - -from langchain_core.language_models.llms import LLM -from typing import Optional, List, Any -from langchain.chains import LLMChain -from langchain_core.prompts import PromptTemplate -from rich.pretty import pprint - -# Global variables -client = None -llm = None -summary_chain = None -facts_chain = None -qa_chain = None -processed_docs = {} - -# Prompt Templates (defined globally) -summary_template = PromptTemplate( - input_variables=["document"], - template="""Create a concise summary of this document in 5-10 sentences: - -{document} - -SUMMARY:""" -) - -facts_template = PromptTemplate( - input_variables=["document"], - template="""Extract the most important facts from this document. List them as bullet points: - -{document} - -KEY FACTS: --""" -) - -qa_template = PromptTemplate( - input_variables=["document", "question"], - template="""Based on the following document, answer the question. If the answer isn't in the document, say so. - -DOCUMENT: -{document} - -QUESTION: {question} - -ANSWER:""" -) - -class LlamaStackLLM(LLM): - """Simple LangChain wrapper for Llama Stack""" - - # Pydantic model fields - client: Any = None - model_id: str = "llama3:70b-instruct" - - def __init__(self, client, model_id: str = "llama3:70b-instruct"): - # Initialize with field values - super().__init__(client=client, model_id=model_id) - - def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs) -> str: - """Make inference call to Llama Stack""" - response = self.client.inference.chat_completion( - model_id=self.model_id, - messages=[{"role": "user", "content": prompt}] - ) - return response.completion_message.content - - @property - def _llm_type(self) -> str: - return "llama_stack" - - -def load_document(source: str) -> str: - is_url = source.startswith(('http://', 'https://')) - is_pdf = source.lower().endswith('.pdf') - if is_pdf: - return load_pdf(source, is_url=is_url) - elif is_url: - return load_from_url(source) - else: - raise ValueError(f"Unsupported format. Use URLs or PDF files.") - - -def load_pdf(source: str, is_url: bool = False) -> str: - if is_url: - response = requests.get(source) - response.raise_for_status() - with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: - temp_file.write(response.content) - file_path = temp_file.name - else: - file_path = source - try: - loader = PyPDFLoader(file_path) - docs = loader.load() - return "\\n\\n".join([doc.page_content for doc in docs]) - finally: - if is_url: - os.remove(file_path) - - -def load_from_url(url: str) -> str: - headers = {'User-Agent': 'Mozilla/5.0 (compatible; DocumentLoader/1.0)'} - response = requests.get(url, headers=headers, timeout=15) - response.raise_for_status() - doc = ReadabilityDocument(response.text) - html_main = doc.summary(html_partial=True) - soup = BeautifulSoup(html_main, "html.parser") - for tag in soup(["script", "style", "noscript", "header", "footer", "nav", "aside"]): - tag.decompose() - md_text = markdownify(str(soup), heading_style="ATX") - md_text = html.unescape(md_text) - md_text = re.sub(r"\n{3,}", "\n\n", md_text).strip() - return md_text - -def process_document(source: str): - global summary_chain, facts_chain, processed_docs - - print(f"๐Ÿ“„ Loading document from: {source}") - document = load_document(source) - print(f"โœ… Loaded {len(document):,} characters") - print("\n๐Ÿ“ Generating summary...") - summary = summary_chain.invoke({"document": document})["text"] - print("Summary generated") - print("๐Ÿ” Extracting key facts...") - facts = facts_chain.invoke({"document": document})["text"] - processed_docs[source] = { - "document": document, - "summary": summary, - "facts": facts - } - print(f"\nโœ… Processing complete!") - print(f"๐Ÿ“Š Document: {len(document):,} chars") - print(f"๐Ÿ“ Summary: {summary[:100]}...") - print(f"๐Ÿ” Facts: {facts[:1000]}...") - return processed_docs[source] - -def ask_question(question: str, source: str = None): - """Answer questions about processed documents""" - global qa_chain, processed_docs - - if not processed_docs: - return "No documents processed yet. Use process_document() first." - if source and source in processed_docs: - doc_data = processed_docs[source] - else: - # Use the most recent document - doc_data = list(processed_docs.values())[-1] - answer = qa_chain.invoke({ - "document": doc_data["document"], - "question": question - })["text"] - return answer - - -def interactive_demo(): - print("\n๐ŸŽฏ Interactive Document Processing Demo") - print("Commands:") - print(" load - Process a document") - print(" ask - Ask about the document") - print(" summary - Show document summary") - print(" facts - Show extracted facts") - print(" help - Show commands") - print(" quit - Exit demo") - - while True: - try: - command = input("\n> ").strip() - if command.lower() in ['quit', 'exit']: - print("๐Ÿ‘‹ Thanks for exploring LangChain chains!") - break - elif command.lower() == 'help': - print("\nCommands:") - print(" load - Process a document") - print(" ask - Ask about the document") - print(" summary - Show document summary") - print(" facts - Show extracted facts") - elif command.startswith('load '): - source = command[5:].strip() - if source: - try: - process_document(source) - except Exception as e: - print(f"โŒ Error processing document: {e}") - else: - print("โ“ Please provide a URL or file path") - elif command.startswith('ask '): - question = command[4:].strip() - if question: - try: - answer = ask_question(question) - print(f"\n๐Ÿ’ฌ Q: {question}") - print(f"๐Ÿ“ A: {answer}") - except Exception as e: - print(f"โŒ Error: {e}") - else: - print("โ“ Please provide a question") - elif command.lower() == 'summary': - if processed_docs: - latest_doc = list(processed_docs.values())[-1] - print(f"\n๐Ÿ“ Summary:\n{latest_doc['summary']}") - else: - print("โ“ No documents processed yet") - elif command.lower() == 'facts': - if processed_docs: - latest_doc = list(processed_docs.values())[-1] - print(f"\n๐Ÿ” Key Facts:\n{latest_doc['facts']}") - else: - print("โ“ No documents processed yet") - else: - print("โ“ Unknown command. Type 'help' for options") - except (EOFError, KeyboardInterrupt): - print("\n๐Ÿ‘‹ Goodbye!") - break - - -def main(): - global client, llm, summary_chain, facts_chain, qa_chain, processed_docs - - print("๐Ÿš€ Starting LangChain + Llama Stack Document Processing Demo") - - client = LlamaStackClient( - base_url="http://localhost:8321/", - ) - - # Initialize the LangChain-compatible LLM - llm = LlamaStackLLM(client) - - # Test the wrapper - test_response = llm.invoke("Can you help me with the document processing?") - print(f"โœ… LangChain wrapper working!") - print(f"Response: {test_response[:100]}...") - - print("Available models:") - for m in client.models.list(): - print(f"- {m.identifier}") - - print("----") - print("Available shields (safety models):") - for s in client.shields.list(): - print(s.identifier) - print("----") - - # model_id = "llama3.2:3b" - model_id = "ollama/llama3:70b-instruct" - - response = client.inference.chat_completion( - model_id=model_id, - messages=[ - {"role": "system", "content": "You are a friendly assistant."}, - {"role": "user", "content": "Write a two-sentence poem about llama."}, - ], - ) - - print(response.completion_message.content) - - # Create chains by combining our LLM with prompt templates - summary_chain = LLMChain(llm=llm, prompt=summary_template) - facts_chain = LLMChain(llm=llm, prompt=facts_template) - qa_chain = LLMChain(llm=llm, prompt=qa_template) - - # Initialize storage for processed documents - processed_docs = {} - - print("โœ… Created 3 prompt templates:") - print(" โ€ข Summary: Condenses documents into key points") - print(" โ€ข Facts: Extracts important information as bullets") - print(" โ€ข Q&A: Answers questions based on document content") - - # Test template formatting - test_prompt = summary_template.format(document="This is a sample document about AI...") - print(f"\n๐Ÿ“ Example prompt: {len(test_prompt)} characters") - - # Start the interactive demo - interactive_demo() - -if __name__ == "__main__": - main() From 0da0732b07bfc799af525e61535730ddcb48c72f Mon Sep 17 00:00:00 2001 From: Omar Abdelwahab Date: Thu, 21 Aug 2025 19:51:33 -0700 Subject: [PATCH 07/14] Changed the langChain dir name to langchain Changed the langChain dir name to langchain --- docs/notebooks/langchain/README.md | 233 ++++++++++++++ .../langchain/langchain-llama-stack.py | 288 ++++++++++++++++++ 2 files changed, 521 insertions(+) create mode 100644 docs/notebooks/langchain/README.md create mode 100644 docs/notebooks/langchain/langchain-llama-stack.py diff --git a/docs/notebooks/langchain/README.md b/docs/notebooks/langchain/README.md new file mode 100644 index 000000000..a6dbd2266 --- /dev/null +++ b/docs/notebooks/langchain/README.md @@ -0,0 +1,233 @@ +# LangChain + Llama Stack Document Processing + +1. **`langchain-llama-stack.py`** - Interactive CLI version +--- + +## ๐Ÿ“‹ Prerequisites + +### System Requirements +- Python 3.12+ +- Llama Stack server running on `http://localhost:8321/` +- Ollama or compatible model server + +### Required Python Packages +```bash +pip install llama-stack-client langchain langchain-core langchain-community +pip install beautifulsoup4 markdownify readability-lxml requests +``` + +### Environment Setup +```bash +# Create and activate virtual environment +python3.12 -m venv llama-env-py312 +source llama-env-py312/bin/activate + +# Install dependencies +pip install llama-stack-client langchain langchain-core langchain-community beautifulsoup4 markdownify readability-lxml requests +``` + +--- + +## ๐Ÿš€ Quick Start + +### Start Llama Stack Server +Before running either version, ensure your Llama Stack server is running: +```bash +# Start Llama Stack server (example) +llama stack run your-config --port 8321 +``` + +--- + +## ๐Ÿ“– Option 1: Interactive CLI Version (`langchain-llama-stack.py`) + +### Features +- โœ… Interactive command-line interface +- โœ… Document loading from URLs and PDFs +- โœ… AI-powered summarization and fact extraction +- โœ… Question-answering based on document content +- โœ… Session-based document storage + +### How to Run +```bash +# Activate environment +source llama-env-py312/bin/activate + +# Run the interactive CLI +cd /home/omara/langchain_llamastack +python langchain-llama-stack.py +``` + +### Usage Commands +Once running, you can use these interactive commands: + +``` +๐ŸŽฏ Interactive Document Processing Demo +Commands: + load - Process a document + ask - Ask about the document + summary - Show document summary + facts - Show extracted facts + help - Show commands + quit - Exit demo +``` + +### Example Session +``` +> load https://en.wikipedia.org/wiki/Artificial_intelligence +๐Ÿ“„ Loading document from: https://en.wikipedia.org/wiki/Artificial_intelligence +โœ… Loaded 45,832 characters +๐Ÿ“ Generating summary... +๐Ÿ” Extracting key facts... +โœ… Processing complete! + +> summary +๐Ÿ“ Summary: +Artificial intelligence (AI) is the simulation of human intelligence... + +> ask What are the main types of AI? +๐Ÿ’ฌ Q: What are the main types of AI? +๐Ÿ“ A: Based on the document, the main types of AI include... + +> facts +๐Ÿ” Key Facts: +- AI was founded as an academic discipline in 1956 +- Machine learning is a subset of AI... + +> quit +๐Ÿ‘‹ Thanks for exploring LangChain chains! +``` + + +#### Using curl: +```bash +# Check service status +curl http://localhost:8000/ + +# Process a document +curl -X POST http://localhost:8000/process \ + -H 'Content-Type: application/json' \ + -d '{"source": "https://en.wikipedia.org/wiki/Machine_learning"}' + +# Ask a question +curl -X POST http://localhost:8000/ask \ + -H 'Content-Type: application/json' \ + -d '{"question": "What is machine learning?"}' + +# Get summary +curl http://localhost:8000/summary + +# Get facts +curl http://localhost:8000/facts + +# List all processed documents +curl http://localhost:8000/docs +``` + +#### Using Python requests: +```python +import requests + +# Process a document +response = requests.post( + "http://localhost:8000/process", + json={"source": "https://en.wikipedia.org/wiki/Deep_learning"} +) +print(response.json()) + +# Ask a question +response = requests.post( + "http://localhost:8000/ask", + json={"question": "What are neural networks?"} +) +print(response.json()) + +# Get facts +response = requests.get("http://localhost:8000/facts") +print(response.json()) +``` + +--- + +## ๐Ÿ”ง Configuration + +### Model Configuration +Both versions use these models by default: +- **Model ID**: `llama3.2:3b` +- **Llama Stack URL**: `http://localhost:8321/` + +To change the model, edit the `model_id` parameter in the respective files. + +### Supported Document Types +- โœ… **URLs**: Any web page (extracted using readability) +- โœ… **PDF files**: Local or remote PDF documents +- โŒ Plain text files (can be added if needed) + +--- + +## ๐Ÿ› ๏ธ Troubleshooting + +### Common Issues + +#### 1. Connection Refused to Llama Stack +**Error**: `Connection refused to http://localhost:8321/` +**Solution**: +- Ensure Llama Stack server is running +- Check if port 8321 is correct +- Verify network connectivity + +#### 2. Model Not Found +**Error**: `Model not found: llama3.2:3b` +**Solution**: +- Check available models: `curl http://localhost:8321/models/list` +- Update `model_id` in the code to match available models + + +#### 4. Missing Dependencies +### Debug Mode +To enable verbose logging, add this to the beginning of either file: +```python +import logging +logging.basicConfig(level=logging.DEBUG) +``` + +--- + +## ๐Ÿ“Š Performance Notes + +### CLI Version +- **Pros**: Simple to use, interactive, good for testing +- **Cons**: Single-threaded, session-based only +- **Best for**: Development, testing, manual document analysis +--- + +## ๐Ÿ›‘ Stopping Services + +### CLI Version +- Press `Ctrl+C` or type `quit` in the interactive prompt +--- + +## ๐Ÿ“ Examples + +### CLI Workflow +1. Start: `python langchain-llama-stack.py` +2. Load document: `load https://arxiv.org/pdf/2103.00020.pdf` +3. Get summary: `summary` +4. Ask questions: `ask What are the main contributions?` +5. Exit: `quit` + +--- + +## ๐Ÿค Contributing + +To extend functionality: +1. Add new prompt templates for different analysis types +2. Support additional document formats +3. Add caching for processed documents +4. Implement user authentication for API version + +--- + +## ๐Ÿ“œ License + +This project is for educational and research purposes. diff --git a/docs/notebooks/langchain/langchain-llama-stack.py b/docs/notebooks/langchain/langchain-llama-stack.py new file mode 100644 index 000000000..98aaa8d6c --- /dev/null +++ b/docs/notebooks/langchain/langchain-llama-stack.py @@ -0,0 +1,288 @@ +import os +import re +import html +import requests +from bs4 import BeautifulSoup +from readability import Document as ReadabilityDocument +from markdownify import markdownify +from langchain_community.document_loaders import PyPDFLoader, TextLoader +import tempfile + +from llama_stack_client import LlamaStackClient + +from langchain_core.language_models.llms import LLM +from typing import Optional, List, Any +from langchain.chains import LLMChain +from langchain_core.prompts import PromptTemplate +from rich.pretty import pprint + +# Global variables +client = None +llm = None +summary_chain = None +facts_chain = None +qa_chain = None +processed_docs = {} + +# Prompt Templates (defined globally) +summary_template = PromptTemplate( + input_variables=["document"], + template="""Create a concise summary of this document in 5-10 sentences: + +{document} + +SUMMARY:""" +) + +facts_template = PromptTemplate( + input_variables=["document"], + template="""Extract the most important facts from this document. List them as bullet points: + +{document} + +KEY FACTS: +-""" +) + +qa_template = PromptTemplate( + input_variables=["document", "question"], + template="""Based on the following document, answer the question. If the answer isn't in the document, say so. + +DOCUMENT: +{document} + +QUESTION: {question} + +ANSWER:""" +) + +class LlamaStackLLM(LLM): + """Simple LangChain wrapper for Llama Stack""" + + # Pydantic model fields + client: Any = None + model_id: str = "llama3:70b-instruct" + + def __init__(self, client, model_id: str = "llama3:70b-instruct"): + # Initialize with field values + super().__init__(client=client, model_id=model_id) + + def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs) -> str: + """Make inference call to Llama Stack""" + response = self.client.inference.chat_completion( + model_id=self.model_id, + messages=[{"role": "user", "content": prompt}] + ) + return response.completion_message.content + + @property + def _llm_type(self) -> str: + return "llama_stack" + + +def load_document(source: str) -> str: + is_url = source.startswith(('http://', 'https://')) + is_pdf = source.lower().endswith('.pdf') + if is_pdf: + return load_pdf(source, is_url=is_url) + elif is_url: + return load_from_url(source) + else: + raise ValueError(f"Unsupported format. Use URLs or PDF files.") + + +def load_pdf(source: str, is_url: bool = False) -> str: + if is_url: + response = requests.get(source) + response.raise_for_status() + with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: + temp_file.write(response.content) + file_path = temp_file.name + else: + file_path = source + try: + loader = PyPDFLoader(file_path) + docs = loader.load() + return "\\n\\n".join([doc.page_content for doc in docs]) + finally: + if is_url: + os.remove(file_path) + + +def load_from_url(url: str) -> str: + headers = {'User-Agent': 'Mozilla/5.0 (compatible; DocumentLoader/1.0)'} + response = requests.get(url, headers=headers, timeout=15) + response.raise_for_status() + doc = ReadabilityDocument(response.text) + html_main = doc.summary(html_partial=True) + soup = BeautifulSoup(html_main, "html.parser") + for tag in soup(["script", "style", "noscript", "header", "footer", "nav", "aside"]): + tag.decompose() + md_text = markdownify(str(soup), heading_style="ATX") + md_text = html.unescape(md_text) + md_text = re.sub(r"\n{3,}", "\n\n", md_text).strip() + return md_text + +def process_document(source: str): + global summary_chain, facts_chain, processed_docs + + print(f"๐Ÿ“„ Loading document from: {source}") + document = load_document(source) + print(f"โœ… Loaded {len(document):,} characters") + print("\n๐Ÿ“ Generating summary...") + summary = summary_chain.invoke({"document": document})["text"] + print("Summary generated") + print("๐Ÿ” Extracting key facts...") + facts = facts_chain.invoke({"document": document})["text"] + processed_docs[source] = { + "document": document, + "summary": summary, + "facts": facts + } + print(f"\nโœ… Processing complete!") + print(f"๐Ÿ“Š Document: {len(document):,} chars") + print(f"๐Ÿ“ Summary: {summary[:100]}...") + print(f"๐Ÿ” Facts: {facts[:1000]}...") + return processed_docs[source] + +def ask_question(question: str, source: str = None): + """Answer questions about processed documents""" + global qa_chain, processed_docs + + if not processed_docs: + return "No documents processed yet. Use process_document() first." + if source and source in processed_docs: + doc_data = processed_docs[source] + else: + # Use the most recent document + doc_data = list(processed_docs.values())[-1] + answer = qa_chain.invoke({ + "document": doc_data["document"], + "question": question + })["text"] + return answer + + +def interactive_demo(): + print("\n๐ŸŽฏ Interactive Document Processing Demo") + print("Commands:") + print(" load - Process a document") + print(" ask - Ask about the document") + print(" summary - Show document summary") + print(" facts - Show extracted facts") + print(" help - Show commands") + print(" quit - Exit demo") + + while True: + try: + command = input("\n> ").strip() + if command.lower() in ['quit', 'exit']: + print("๐Ÿ‘‹ Thanks for exploring LangChain chains!") + break + elif command.lower() == 'help': + print("\nCommands:") + print(" load - Process a document") + print(" ask - Ask about the document") + print(" summary - Show document summary") + print(" facts - Show extracted facts") + elif command.startswith('load '): + source = command[5:].strip() + if source: + try: + process_document(source) + except Exception as e: + print(f"โŒ Error processing document: {e}") + else: + print("โ“ Please provide a URL or file path") + elif command.startswith('ask '): + question = command[4:].strip() + if question: + try: + answer = ask_question(question) + print(f"\n๐Ÿ’ฌ Q: {question}") + print(f"๐Ÿ“ A: {answer}") + except Exception as e: + print(f"โŒ Error: {e}") + else: + print("โ“ Please provide a question") + elif command.lower() == 'summary': + if processed_docs: + latest_doc = list(processed_docs.values())[-1] + print(f"\n๐Ÿ“ Summary:\n{latest_doc['summary']}") + else: + print("โ“ No documents processed yet") + elif command.lower() == 'facts': + if processed_docs: + latest_doc = list(processed_docs.values())[-1] + print(f"\n๐Ÿ” Key Facts:\n{latest_doc['facts']}") + else: + print("โ“ No documents processed yet") + else: + print("โ“ Unknown command. Type 'help' for options") + except (EOFError, KeyboardInterrupt): + print("\n๐Ÿ‘‹ Goodbye!") + break + + +def main(): + global client, llm, summary_chain, facts_chain, qa_chain, processed_docs + + print("๐Ÿš€ Starting LangChain + Llama Stack Document Processing Demo") + + client = LlamaStackClient( + base_url="http://localhost:8321/", + ) + + # Initialize the LangChain-compatible LLM + llm = LlamaStackLLM(client) + + # Test the wrapper + test_response = llm.invoke("Can you help me with the document processing?") + print(f"โœ… LangChain wrapper working!") + print(f"Response: {test_response[:100]}...") + + print("Available models:") + for m in client.models.list(): + print(f"- {m.identifier}") + + print("----") + print("Available shields (safety models):") + for s in client.shields.list(): + print(s.identifier) + print("----") + + # model_id = "llama3.2:3b" + model_id = "ollama/llama3:70b-instruct" + + response = client.inference.chat_completion( + model_id=model_id, + messages=[ + {"role": "system", "content": "You are a friendly assistant."}, + {"role": "user", "content": "Write a two-sentence poem about llama."}, + ], + ) + + print(response.completion_message.content) + + # Create chains by combining our LLM with prompt templates + summary_chain = LLMChain(llm=llm, prompt=summary_template) + facts_chain = LLMChain(llm=llm, prompt=facts_template) + qa_chain = LLMChain(llm=llm, prompt=qa_template) + + # Initialize storage for processed documents + processed_docs = {} + + print("โœ… Created 3 prompt templates:") + print(" โ€ข Summary: Condenses documents into key points") + print(" โ€ข Facts: Extracts important information as bullets") + print(" โ€ข Q&A: Answers questions based on document content") + + # Test template formatting + test_prompt = summary_template.format(document="This is a sample document about AI...") + print(f"\n๐Ÿ“ Example prompt: {len(test_prompt)} characters") + + # Start the interactive demo + interactive_demo() + +if __name__ == "__main__": + main() From 63375b8f45b09182fd4493e23c74410cf7ebdf57 Mon Sep 17 00:00:00 2001 From: Omar Abdelwahab Date: Tue, 26 Aug 2025 22:28:22 -0700 Subject: [PATCH 08/14] Update langchain-llama-stack.py --- .../langchain/langchain-llama-stack.py | 109 +++++++----------- 1 file changed, 39 insertions(+), 70 deletions(-) diff --git a/docs/notebooks/langchain/langchain-llama-stack.py b/docs/notebooks/langchain/langchain-llama-stack.py index 98aaa8d6c..aa0d9bd1c 100644 --- a/docs/notebooks/langchain/langchain-llama-stack.py +++ b/docs/notebooks/langchain/langchain-llama-stack.py @@ -1,19 +1,21 @@ +import html import os import re -import html +import tempfile +from typing import Any, List, Optional + import requests from bs4 import BeautifulSoup -from readability import Document as ReadabilityDocument -from markdownify import markdownify +from langchain.chains import LLMChain from langchain_community.document_loaders import PyPDFLoader, TextLoader -import tempfile - -from llama_stack_client import LlamaStackClient from langchain_core.language_models.llms import LLM -from typing import Optional, List, Any -from langchain.chains import LLMChain from langchain_core.prompts import PromptTemplate +from langchain_openai import ChatOpenAI + +from llama_stack_client import LlamaStackClient +from markdownify import markdownify +from readability import Document as ReadabilityDocument from rich.pretty import pprint # Global variables @@ -31,7 +33,7 @@ summary_template = PromptTemplate( {document} -SUMMARY:""" +SUMMARY:""", ) facts_template = PromptTemplate( @@ -41,7 +43,7 @@ facts_template = PromptTemplate( {document} KEY FACTS: --""" +-""", ) qa_template = PromptTemplate( @@ -53,36 +55,13 @@ DOCUMENT: QUESTION: {question} -ANSWER:""" +ANSWER:""", ) -class LlamaStackLLM(LLM): - """Simple LangChain wrapper for Llama Stack""" - - # Pydantic model fields - client: Any = None - model_id: str = "llama3:70b-instruct" - - def __init__(self, client, model_id: str = "llama3:70b-instruct"): - # Initialize with field values - super().__init__(client=client, model_id=model_id) - - def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs) -> str: - """Make inference call to Llama Stack""" - response = self.client.inference.chat_completion( - model_id=self.model_id, - messages=[{"role": "user", "content": prompt}] - ) - return response.completion_message.content - - @property - def _llm_type(self) -> str: - return "llama_stack" - def load_document(source: str) -> str: - is_url = source.startswith(('http://', 'https://')) - is_pdf = source.lower().endswith('.pdf') + is_url = source.startswith(("http://", "https://")) + is_pdf = source.lower().endswith(".pdf") if is_pdf: return load_pdf(source, is_url=is_url) elif is_url: @@ -110,19 +89,22 @@ def load_pdf(source: str, is_url: bool = False) -> str: def load_from_url(url: str) -> str: - headers = {'User-Agent': 'Mozilla/5.0 (compatible; DocumentLoader/1.0)'} + headers = {"User-Agent": "Mozilla/5.0 (compatible; DocumentLoader/1.0)"} response = requests.get(url, headers=headers, timeout=15) response.raise_for_status() doc = ReadabilityDocument(response.text) html_main = doc.summary(html_partial=True) soup = BeautifulSoup(html_main, "html.parser") - for tag in soup(["script", "style", "noscript", "header", "footer", "nav", "aside"]): + for tag in soup( + ["script", "style", "noscript", "header", "footer", "nav", "aside"] + ): tag.decompose() md_text = markdownify(str(soup), heading_style="ATX") md_text = html.unescape(md_text) md_text = re.sub(r"\n{3,}", "\n\n", md_text).strip() return md_text + def process_document(source: str): global summary_chain, facts_chain, processed_docs @@ -134,17 +116,14 @@ def process_document(source: str): print("Summary generated") print("๐Ÿ” Extracting key facts...") facts = facts_chain.invoke({"document": document})["text"] - processed_docs[source] = { - "document": document, - "summary": summary, - "facts": facts - } + processed_docs[source] = {"document": document, "summary": summary, "facts": facts} print(f"\nโœ… Processing complete!") print(f"๐Ÿ“Š Document: {len(document):,} chars") print(f"๐Ÿ“ Summary: {summary[:100]}...") print(f"๐Ÿ” Facts: {facts[:1000]}...") return processed_docs[source] + def ask_question(question: str, source: str = None): """Answer questions about processed documents""" global qa_chain, processed_docs @@ -156,10 +135,9 @@ def ask_question(question: str, source: str = None): else: # Use the most recent document doc_data = list(processed_docs.values())[-1] - answer = qa_chain.invoke({ - "document": doc_data["document"], - "question": question - })["text"] + answer = qa_chain.invoke({"document": doc_data["document"], "question": question})[ + "text" + ] return answer @@ -176,16 +154,16 @@ def interactive_demo(): while True: try: command = input("\n> ").strip() - if command.lower() in ['quit', 'exit']: + if command.lower() in ["quit", "exit"]: print("๐Ÿ‘‹ Thanks for exploring LangChain chains!") break - elif command.lower() == 'help': + elif command.lower() == "help": print("\nCommands:") print(" load - Process a document") print(" ask - Ask about the document") print(" summary - Show document summary") print(" facts - Show extracted facts") - elif command.startswith('load '): + elif command.startswith("load "): source = command[5:].strip() if source: try: @@ -194,7 +172,7 @@ def interactive_demo(): print(f"โŒ Error processing document: {e}") else: print("โ“ Please provide a URL or file path") - elif command.startswith('ask '): + elif command.startswith("ask "): question = command[4:].strip() if question: try: @@ -205,13 +183,13 @@ def interactive_demo(): print(f"โŒ Error: {e}") else: print("โ“ Please provide a question") - elif command.lower() == 'summary': + elif command.lower() == "summary": if processed_docs: latest_doc = list(processed_docs.values())[-1] print(f"\n๐Ÿ“ Summary:\n{latest_doc['summary']}") else: print("โ“ No documents processed yet") - elif command.lower() == 'facts': + elif command.lower() == "facts": if processed_docs: latest_doc = list(processed_docs.values())[-1] print(f"\n๐Ÿ” Key Facts:\n{latest_doc['facts']}") @@ -232,14 +210,14 @@ def main(): client = LlamaStackClient( base_url="http://localhost:8321/", ) - - # Initialize the LangChain-compatible LLM - llm = LlamaStackLLM(client) + os.environ["OPENAI_API_KEY"] = "dummy" + os.environ["OPENAI_BASE_URL"] = "http://0.0.0.0:8321/v1/openai/v1" + llm = ChatOpenAI(model="ollama/llama3:70b-instruct") # Test the wrapper test_response = llm.invoke("Can you help me with the document processing?") print(f"โœ… LangChain wrapper working!") - print(f"Response: {test_response[:100]}...") + print(f"Response: {test_response.content[:100]}...") print("Available models:") for m in client.models.list(): @@ -251,19 +229,7 @@ def main(): print(s.identifier) print("----") - # model_id = "llama3.2:3b" model_id = "ollama/llama3:70b-instruct" - - response = client.inference.chat_completion( - model_id=model_id, - messages=[ - {"role": "system", "content": "You are a friendly assistant."}, - {"role": "user", "content": "Write a two-sentence poem about llama."}, - ], - ) - - print(response.completion_message.content) - # Create chains by combining our LLM with prompt templates summary_chain = LLMChain(llm=llm, prompt=summary_template) facts_chain = LLMChain(llm=llm, prompt=facts_template) @@ -278,11 +244,14 @@ def main(): print(" โ€ข Q&A: Answers questions based on document content") # Test template formatting - test_prompt = summary_template.format(document="This is a sample document about AI...") + test_prompt = summary_template.format( + document="This is a sample document about AI..." + ) print(f"\n๐Ÿ“ Example prompt: {len(test_prompt)} characters") # Start the interactive demo interactive_demo() + if __name__ == "__main__": main() From 74e95524d76483721dfd27eabeb395c2aa2c9fbe Mon Sep 17 00:00:00 2001 From: Omar Abdelwahab Date: Thu, 28 Aug 2025 15:36:38 -0700 Subject: [PATCH 09/14] Update README.md --- docs/notebooks/langchain/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/notebooks/langchain/README.md b/docs/notebooks/langchain/README.md index a6dbd2266..bf03ef5a4 100644 --- a/docs/notebooks/langchain/README.md +++ b/docs/notebooks/langchain/README.md @@ -54,7 +54,7 @@ llama stack run your-config --port 8321 source llama-env-py312/bin/activate # Run the interactive CLI -cd /home/omara/langchain_llamastack +cd /langchain_llamastack python langchain-llama-stack.py ``` From 035ac8af41c3e8783b833ba25b3a25da004a036d Mon Sep 17 00:00:00 2001 From: Omar Abdelwahab Date: Thu, 28 Aug 2025 17:30:33 -0700 Subject: [PATCH 10/14] Update README.md --- docs/notebooks/langchain/README.md | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/docs/notebooks/langchain/README.md b/docs/notebooks/langchain/README.md index bf03ef5a4..6661c586e 100644 --- a/docs/notebooks/langchain/README.md +++ b/docs/notebooks/langchain/README.md @@ -10,12 +10,6 @@ - Llama Stack server running on `http://localhost:8321/` - Ollama or compatible model server -### Required Python Packages -```bash -pip install llama-stack-client langchain langchain-core langchain-community -pip install beautifulsoup4 markdownify readability-lxml requests -``` - ### Environment Setup ```bash # Create and activate virtual environment @@ -50,11 +44,9 @@ llama stack run your-config --port 8321 ### How to Run ```bash -# Activate environment -source llama-env-py312/bin/activate # Run the interactive CLI -cd /langchain_llamastack +cd /docs/notebooks/langchain/ python langchain-llama-stack.py ``` From 4f87a5dd72d2b6b137d8b461aaef4aab42a70ff1 Mon Sep 17 00:00:00 2001 From: Omar Abdelwahab Date: Thu, 28 Aug 2025 17:39:37 -0700 Subject: [PATCH 11/14] Update README.md --- docs/notebooks/langchain/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/notebooks/langchain/README.md b/docs/notebooks/langchain/README.md index 6661c586e..fb0fe21a7 100644 --- a/docs/notebooks/langchain/README.md +++ b/docs/notebooks/langchain/README.md @@ -17,7 +17,7 @@ python3.12 -m venv llama-env-py312 source llama-env-py312/bin/activate # Install dependencies -pip install llama-stack-client langchain langchain-core langchain-community beautifulsoup4 markdownify readability-lxml requests +pip install llama-stack-client langchain langchain-core langchain-community beautifulsoup4 markdownify readability-lxml requests langchain_openai ``` --- From 9cf73a6bdff0634d9664dcfc02274eff537d3863 Mon Sep 17 00:00:00 2001 From: Omar Abdelwahab Date: Tue, 2 Sep 2025 10:30:17 -0700 Subject: [PATCH 12/14] Update langchain-llama-stack.py --- docs/notebooks/langchain/langchain-llama-stack.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/notebooks/langchain/langchain-llama-stack.py b/docs/notebooks/langchain/langchain-llama-stack.py index aa0d9bd1c..8f61b33c2 100644 --- a/docs/notebooks/langchain/langchain-llama-stack.py +++ b/docs/notebooks/langchain/langchain-llama-stack.py @@ -212,7 +212,7 @@ def main(): ) os.environ["OPENAI_API_KEY"] = "dummy" os.environ["OPENAI_BASE_URL"] = "http://0.0.0.0:8321/v1/openai/v1" - llm = ChatOpenAI(model="ollama/llama3:70b-instruct") + llm = ChatOpenAI(model="ollama/llama3:70b-instruct", base_url="http://localhost:8321/v1/openai/v1") # Test the wrapper test_response = llm.invoke("Can you help me with the document processing?") From 875069f535763b9924549c023552e918e632832c Mon Sep 17 00:00:00 2001 From: Omar Abdelwahab Date: Thu, 18 Sep 2025 13:57:14 -0700 Subject: [PATCH 13/14] Update langchain-llama-stack.py --- docs/notebooks/langchain/langchain-llama-stack.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/notebooks/langchain/langchain-llama-stack.py b/docs/notebooks/langchain/langchain-llama-stack.py index 8f61b33c2..99e4ebab6 100644 --- a/docs/notebooks/langchain/langchain-llama-stack.py +++ b/docs/notebooks/langchain/langchain-llama-stack.py @@ -210,8 +210,6 @@ def main(): client = LlamaStackClient( base_url="http://localhost:8321/", ) - os.environ["OPENAI_API_KEY"] = "dummy" - os.environ["OPENAI_BASE_URL"] = "http://0.0.0.0:8321/v1/openai/v1" llm = ChatOpenAI(model="ollama/llama3:70b-instruct", base_url="http://localhost:8321/v1/openai/v1") # Test the wrapper From d064c9e99e698e7a5487660ac5e7ae7af9259317 Mon Sep 17 00:00:00 2001 From: Omar Abdelwahab Date: Thu, 18 Sep 2025 14:03:43 -0700 Subject: [PATCH 14/14] ran pre-commit ran pre-commit --- docs/notebooks/langchain/README.md | 6 +++--- docs/notebooks/langchain/langchain-llama-stack.py | 6 ++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/docs/notebooks/langchain/README.md b/docs/notebooks/langchain/README.md index fb0fe21a7..b72a5e65b 100644 --- a/docs/notebooks/langchain/README.md +++ b/docs/notebooks/langchain/README.md @@ -123,14 +123,13 @@ import requests # Process a document response = requests.post( "http://localhost:8000/process", - json={"source": "https://en.wikipedia.org/wiki/Deep_learning"} + json={"source": "https://en.wikipedia.org/wiki/Deep_learning"}, ) print(response.json()) # Ask a question response = requests.post( - "http://localhost:8000/ask", - json={"question": "What are neural networks?"} + "http://localhost:8000/ask", json={"question": "What are neural networks?"} ) print(response.json()) @@ -180,6 +179,7 @@ To change the model, edit the `model_id` parameter in the respective files. To enable verbose logging, add this to the beginning of either file: ```python import logging + logging.basicConfig(level=logging.DEBUG) ``` diff --git a/docs/notebooks/langchain/langchain-llama-stack.py b/docs/notebooks/langchain/langchain-llama-stack.py index 99e4ebab6..d67f23f50 100644 --- a/docs/notebooks/langchain/langchain-llama-stack.py +++ b/docs/notebooks/langchain/langchain-llama-stack.py @@ -1,3 +1,9 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + import html import os import re