From 849c12b9ac072f8d37645945d2534b7c8d2c0dc6 Mon Sep 17 00:00:00 2001
From: Omar Abdelwahab <omara@meta.com>
Date: Wed, 20 Aug 2025 11:15:31 -0700
Subject: [PATCH 01/14] Added llama stack-langChain integration example scripts

---
 docs/notebooks/langChain/README.md            | 306 +++++++++++++
 .../langChain/langchain_llamastack.py         | 290 +++++++++++++
 .../langChain/langchain_llamastack_ray.py     | 403 ++++++++++++++++++
 3 files changed, 999 insertions(+)
 create mode 100644 docs/notebooks/langChain/README.md
 create mode 100644 docs/notebooks/langChain/langchain_llamastack.py
 create mode 100644 docs/notebooks/langChain/langchain_llamastack_ray.py
diff --git a/docs/notebooks/langChain/README.md b/docs/notebooks/langChain/README.md
new file mode 100644
index 000000000..f36a39ce9
--- /dev/null
+++ b/docs/notebooks/langChain/README.md
@@ -0,0 +1,306 @@
+# LangChain + Llama Stack Document Processing
+
+This repository contains two different implementations of document processing using LangChain and Llama Stack:
+
+1. **`langchain_llamastack.py`** - Interactive CLI version
+2. **`langchain_llamastack_ray.py`** - Ray Serve API version
+
+Both versions provide AI-powered document processing capabilities including summarization, fact extraction, and question-answering.
+
+---
+
+## 📋 Prerequisites
+
+### System Requirements
+- Python 3.12+
+- Ray Serve (for API version)
+- Llama Stack server running on `http://localhost:8321/`
+- Ollama or compatible model server
+
+### Required Python Packages
+```bash
+pip install llama-stack-client langchain langchain-core langchain-community
+pip install beautifulsoup4 markdownify readability-lxml requests
+pip install ray[serve] starlette  # For Ray Serve version only
+```
+
+### Environment Setup
+```bash
+# Create and activate virtual environment
+python3.12 -m venv llama-env-py312
+source llama-env-py312/bin/activate
+
+# Install dependencies
+pip install llama-stack-client langchain langchain-core langchain-community beautifulsoup4 markdownify readability-lxml requests ray[serve] starlette
+```
+
+---
+
+## 🚀 Quick Start
+
+### Start Llama Stack Server
+Before running either version, ensure your Llama Stack server is running:
+```bash
+# Start Llama Stack server (example)
+llama stack run your-config --port 8321
+```
+
+---
+
+## 📖 Option 1: Interactive CLI Version (`langchain_llamastack_updated.py`)
+
+### Features
+- ✅ Interactive command-line interface
+- ✅ Document loading from URLs and PDFs
+- ✅ AI-powered summarization and fact extraction
+- ✅ Question-answering based on document content
+- ✅ Session-based document storage
+
+### How to Run
+```bash
+# Activate environment
+source llama-env-py312/bin/activate
+
+# Run the interactive CLI
+cd /home/omara/langchain_llamastack
+python langchain_llamastack_updated.py
+```
+
+### Usage Commands
+Once running, you can use these interactive commands:
+
+```
+🎯 Interactive Document Processing Demo
+Commands:
+  load <url_or_path>  - Process a document
+  ask <question>      - Ask about the document
+  summary            - Show document summary
+  facts              - Show extracted facts
+  help               - Show commands
+  quit               - Exit demo
+```
+
+### Example Session
+```
+> load https://en.wikipedia.org/wiki/Artificial_intelligence
+📄 Loading document from: https://en.wikipedia.org/wiki/Artificial_intelligence
+✅ Loaded 45,832 characters
+📝 Generating summary...
+🔍 Extracting key facts...
+✅ Processing complete!
+
+> summary
+📝 Summary:
+Artificial intelligence (AI) is the simulation of human intelligence...
+
+> ask What are the main types of AI?
+💬 Q: What are the main types of AI?
+📝 A: Based on the document, the main types of AI include...
+
+> facts
+🔍 Key Facts:
+- AI was founded as an academic discipline in 1956
+- Machine learning is a subset of AI...
+
+> quit
+👋 Thanks for exploring LangChain chains!
+```
+
+---
+
+## 🌐 Option 2: Ray Serve API Version (`langchain_llamastack_ray.py`)
+
+### Features
+- ✅ RESTful HTTP API
+- ✅ Persistent service (runs indefinitely)
+- ✅ Multiple endpoints for different operations
+- ✅ JSON request/response format
+- ✅ Concurrent request handling
+
+### How to Run
+```bash
+# Activate environment
+source llama-env-py312/bin/activate
+
+# Start the Ray Serve API
+cd /home/omara/langchain_llamastack
+python langchain_llamastack_ray.py
+```
+
+### Service Endpoints
+
+| Method | Endpoint | Description | Parameters |
+|--------|----------|-------------|------------|
+| GET | `/` | Service status | None |
+| POST | `/process` | Process document | `{"source": "url_or_path"}` |
+| POST | `/ask` | Ask question | `{"question": "text", "source": "optional"}` |
+| GET | `/summary` | Get summary | `?source=url` (optional) |
+| GET | `/facts` | Get facts | `?source=url` (optional) |
+| GET | `/docs` | List documents | None |
+
+### API Usage Examples
+
+#### Using curl:
+```bash
+# Check service status
+curl http://localhost:8000/
+
+# Process a document
+curl -X POST http://localhost:8000/process \
+     -H 'Content-Type: application/json' \
+     -d '{"source": "https://en.wikipedia.org/wiki/Machine_learning"}'
+
+# Ask a question
+curl -X POST http://localhost:8000/ask \
+     -H 'Content-Type: application/json' \
+     -d '{"question": "What is machine learning?"}'
+
+# Get summary
+curl http://localhost:8000/summary
+
+# Get facts
+curl http://localhost:8000/facts
+
+# List all processed documents
+curl http://localhost:8000/docs
+```
+
+#### Using Python requests:
+```python
+import requests
+
+# Process a document
+response = requests.post(
+    "http://localhost:8000/process",
+    json={"source": "https://en.wikipedia.org/wiki/Deep_learning"}
+)
+print(response.json())
+
+# Ask a question
+response = requests.post(
+    "http://localhost:8000/ask",
+    json={"question": "What are neural networks?"}
+)
+print(response.json())
+
+# Get facts
+response = requests.get("http://localhost:8000/facts")
+print(response.json())
+```
+
+---
+
+## 🔧 Configuration
+
+### Model Configuration
+Both versions use these models by default:
+- **Model ID**: `llama3.2:3b`
+- **Llama Stack URL**: `http://localhost:8321/`
+
+To change the model, edit the `model_id` parameter in the respective files.
+
+### Supported Document Types
+- ✅ **URLs**: Any web page (extracted using readability)
+- ✅ **PDF files**: Local or remote PDF documents
+- ❌ Plain text files (can be added if needed)
+
+---
+
+## 🛠️ Troubleshooting
+
+### Common Issues
+
+#### 1. Connection Refused to Llama Stack
+**Error**: `Connection refused to http://localhost:8321/`
+**Solution**:
+- Ensure Llama Stack server is running
+- Check if port 8321 is correct
+- Verify network connectivity
+
+#### 2. Model Not Found
+**Error**: `Model not found: llama3.2:3b`
+**Solution**:
+- Check available models: `curl http://localhost:8321/models/list`
+- Update `model_id` in the code to match available models
+
+#### 3. Ray Serve Port Already in Use
+**Error**: `Port 8000 already in use`
+**Solution**:
+```bash
+# Kill process using port 8000
+lsof -ti :8000 | xargs kill -9
+
+# Or use a different port by modifying the code
+```
+
+#### 4. Missing Dependencies
+**Error**: `ModuleNotFoundError: No module named 'ray'`
+**Solution**:
+```bash
+pip install ray[serve] starlette
+```
+
+### Debug Mode
+To enable verbose logging, add this to the beginning of either file:
+```python
+import logging
+logging.basicConfig(level=logging.DEBUG)
+```
+
+---
+
+## 📊 Performance Notes
+
+### CLI Version
+- **Pros**: Simple to use, interactive, good for testing
+- **Cons**: Single-threaded, session-based only
+- **Best for**: Development, testing, manual document analysis
+
+### Ray Serve Version
+- **Pros**: Concurrent requests, persistent service, API integration
+- **Cons**: More complex setup, requires Ray
+- **Best for**: Production, integration with other services, high throughput
+
+---
+
+## 🛑 Stopping Services
+
+### CLI Version
+- Press `Ctrl+C` or type `quit` in the interactive prompt
+
+### Ray Serve Version
+- Press `Ctrl+C` in the terminal running the service
+- The service will gracefully shutdown and clean up resources
+
+---
+
+## 📝 Examples
+
+### CLI Workflow
+1. Start: `python langchain_llamastack_updated.py`
+2. Load document: `load https://arxiv.org/pdf/2103.00020.pdf`
+3. Get summary: `summary`
+4. Ask questions: `ask What are the main contributions?`
+5. Exit: `quit`
+
+### API Workflow
+1. Start: `python langchain_llamastack_ray.py`
+2. Process: `curl -X POST http://localhost:8000/process -d '{"source": "https://example.com"}'`
+3. Query: `curl -X POST http://localhost:8000/ask -d '{"question": "What is this about?"}'`
+4. Stop: `Ctrl+C`
+
+---
+
+## 🤝 Contributing
+
+To extend functionality:
+1. Add new prompt templates for different analysis types
+2. Support additional document formats
+3. Add caching for processed documents
+4. Implement user authentication for API version
+
+---
+
+## 📜 License
+
+This project is for educational and research purposes.
diff --git a/docs/notebooks/langChain/langchain_llamastack.py b/docs/notebooks/langChain/langchain_llamastack.py
new file mode 100644
index 000000000..2f8301ea9
--- /dev/null
+++ b/docs/notebooks/langChain/langchain_llamastack.py
@@ -0,0 +1,290 @@
+import os
+import re
+import html
+import requests
+from bs4 import BeautifulSoup
+from readability import Document as ReadabilityDocument
+from markdownify import markdownify
+from langchain_community.document_loaders import PyPDFLoader, TextLoader
+import tempfile
+
+from llama_stack_client import LlamaStackClient
+
+from langchain_core.language_models.llms import LLM
+from typing import Optional, List, Any
+from langchain.chains import LLMChain
+from langchain_core.prompts import PromptTemplate
+from rich.pretty import pprint
+
+# Global variables
+client = None
+llm = None
+summary_chain = None
+facts_chain = None
+qa_chain = None
+processed_docs = {}
+
+# Prompt Templates (defined globally)
+summary_template = PromptTemplate(
+    input_variables=["document"],
+    template="""Create a concise summary of this document in 5-10 sentences:
+
+{document}
+
+SUMMARY:"""
+)
+
+facts_template = PromptTemplate(
+    input_variables=["document"],
+    template="""Extract the most important facts from this document. List them as bullet points:
+
+{document}
+
+KEY FACTS:
+-"""
+)
+
+qa_template = PromptTemplate(
+    input_variables=["document", "question"],
+    template="""Based on the following document, answer the question. If the answer isn't in the document, say so.
+
+DOCUMENT:
+{document}
+
+QUESTION: {question}
+
+ANSWER:"""
+)
+
+class LlamaStackLLM(LLM):
+    """Simple LangChain wrapper for Llama Stack"""
+
+    # Pydantic model fields
+    client: Any = None
+    #model_id: str = "meta-llama/Llama-4-Maverick-17B-128E-Instruct"
+    # model_id: str = "meta-llama/Llama-3.3-70B-Instruct"
+    model_id: str = "llama3:70b-instruct"
+
+    def __init__(self, client, model_id: str = "llama3:70b-instruct"):
+        # Initialize with field values
+        super().__init__(client=client, model_id=model_id)
+
+    def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs) -> str:
+        """Make inference call to Llama Stack"""
+        response = self.client.inference.chat_completion(
+            model_id=self.model_id,
+            messages=[{"role": "user", "content": prompt}]
+        )
+        return response.completion_message.content
+
+    @property
+    def _llm_type(self) -> str:
+        return "llama_stack"
+
+
+def load_document(source: str) -> str:
+    is_url = source.startswith(('http://', 'https://'))
+    is_pdf = source.lower().endswith('.pdf')
+    if is_pdf:
+        return load_pdf(source, is_url=is_url)
+    elif is_url:
+        return load_from_url(source)
+    else:
+        raise ValueError(f"Unsupported format. Use URLs or PDF files.")
+
+
+def load_pdf(source: str, is_url: bool = False) -> str:
+    if is_url:
+        response = requests.get(source)
+        response.raise_for_status()
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
+            temp_file.write(response.content)
+            file_path = temp_file.name
+    else:
+        file_path = source
+    try:
+        loader = PyPDFLoader(file_path)
+        docs = loader.load()
+        return "\\n\\n".join([doc.page_content for doc in docs])
+    finally:
+        if is_url:
+            os.remove(file_path)
+
+
+def load_from_url(url: str) -> str:
+    headers = {'User-Agent': 'Mozilla/5.0 (compatible; DocumentLoader/1.0)'}
+    response = requests.get(url, headers=headers, timeout=15)
+    response.raise_for_status()
+    doc = ReadabilityDocument(response.text)
+    html_main = doc.summary(html_partial=True)
+    soup = BeautifulSoup(html_main, "html.parser")
+    for tag in soup(["script", "style", "noscript", "header", "footer", "nav", "aside"]):
+        tag.decompose()
+    md_text = markdownify(str(soup), heading_style="ATX")
+    md_text = html.unescape(md_text)
+    md_text = re.sub(r"\n{3,}", "\n\n", md_text).strip()
+    return md_text
+
+def process_document(source: str):
+    global summary_chain, facts_chain, processed_docs
+
+    print(f"📄 Loading document from: {source}")
+    document = load_document(source)
+    print(f"✅ Loaded {len(document):,} characters")
+    print("\n📝 Generating summary...")
+    summary = summary_chain.invoke({"document": document})["text"]
+    print("Summary generated")
+    print("🔍 Extracting key facts...")
+    facts = facts_chain.invoke({"document": document})["text"]
+    processed_docs[source] = {
+        "document": document,
+        "summary": summary,
+        "facts": facts
+    }
+    print(f"\n✅ Processing complete!")
+    print(f"📊 Document: {len(document):,} chars")
+    print(f"📝 Summary: {summary[:100]}...")
+    print(f"🔍 Facts: {facts[:1000]}...")
+    return processed_docs[source]
+
+def ask_question(question: str, source: str = None):
+    """Answer questions about processed documents"""
+    global qa_chain, processed_docs
+
+    if not processed_docs:
+        return "No documents processed yet. Use process_document() first."
+    if source and source in processed_docs:
+        doc_data = processed_docs[source]
+    else:
+        # Use the most recent document
+        doc_data = list(processed_docs.values())[-1]
+    answer = qa_chain.invoke({
+        "document": doc_data["document"],
+        "question": question
+    })["text"]
+    return answer
+
+
+def interactive_demo():
+    print("\n🎯 Interactive Document Processing Demo")
+    print("Commands:")
+    print("  load <url_or_path>  - Process a document")
+    print("  ask <question>      - Ask about the document")
+    print("  summary            - Show document summary")
+    print("  facts              - Show extracted facts")
+    print("  help               - Show commands")
+    print("  quit               - Exit demo")
+
+    while True:
+        try:
+            command = input("\n> ").strip()
+            if command.lower() in ['quit', 'exit']:
+                print("👋 Thanks for exploring LangChain chains!")
+                break
+            elif command.lower() == 'help':
+                print("\nCommands:")
+                print("  load <url_or_path>  - Process a document")
+                print("  ask <question>      - Ask about the document")
+                print("  summary            - Show document summary")
+                print("  facts              - Show extracted facts")
+            elif command.startswith('load '):
+                source = command[5:].strip()
+                if source:
+                    try:
+                        process_document(source)
+                    except Exception as e:
+                        print(f"❌ Error processing document: {e}")
+                else:
+                    print("❓ Please provide a URL or file path")
+            elif command.startswith('ask '):
+                question = command[4:].strip()
+                if question:
+                    try:
+                        answer = ask_question(question)
+                        print(f"\n💬 Q: {question}")
+                        print(f"📝 A: {answer}")
+                    except Exception as e:
+                        print(f"❌ Error: {e}")
+                else:
+                    print("❓ Please provide a question")
+            elif command.lower() == 'summary':
+                if processed_docs:
+                    latest_doc = list(processed_docs.values())[-1]
+                    print(f"\n📝 Summary:\n{latest_doc['summary']}")
+                else:
+                    print("❓ No documents processed yet")
+            elif command.lower() == 'facts':
+                if processed_docs:
+                    latest_doc = list(processed_docs.values())[-1]
+                    print(f"\n🔍 Key Facts:\n{latest_doc['facts']}")
+                else:
+                    print("❓ No documents processed yet")
+            else:
+                print("❓ Unknown command. Type 'help' for options")
+        except (EOFError, KeyboardInterrupt):
+            print("\n👋 Goodbye!")
+            break
+
+
+def main():
+    global client, llm, summary_chain, facts_chain, qa_chain, processed_docs
+
+    print("🚀 Starting LangChain + Llama Stack Document Processing Demo")
+
+    client = LlamaStackClient(
+        base_url="http://localhost:8321/",
+    )
+
+    # Initialize the LangChain-compatible LLM
+    llm = LlamaStackLLM(client)
+
+    # Test the wrapper
+    test_response = llm.invoke("Can you help me with the document processing?")
+    print(f"✅ LangChain wrapper working!")
+    print(f"Response: {test_response[:100]}...")
+
+    print("Available models:")
+    for m in client.models.list():
+        print(f"- {m.identifier}")
+
+    print("----")
+    print("Available shields (safety models):")
+    for s in client.shields.list():
+        print(s.identifier)
+    print("----")
+
+    # model_id = "llama3.2:3b"
+    model_id = "ollama/llama3:70b-instruct"
+
+    response = client.inference.chat_completion(
+        model_id=model_id,
+        messages=[
+            {"role": "system", "content": "You are a friendly assistant."},
+            {"role": "user", "content": "Write a two-sentence poem about llama."},
+        ],
+    )
+
+    print(response.completion_message.content)
+
+    # Create chains by combining our LLM with prompt templates
+    summary_chain = LLMChain(llm=llm, prompt=summary_template)
+    facts_chain = LLMChain(llm=llm, prompt=facts_template)
+    qa_chain = LLMChain(llm=llm, prompt=qa_template)
+
+    # Initialize storage for processed documents
+    processed_docs = {}
+
+    print("✅ Created 3 prompt templates:")
+    print("  • Summary: Condenses documents into key points")
+    print("  • Facts: Extracts important information as bullets")
+    print("  • Q&A: Answers questions based on document content")
+
+    # Test template formatting
+    test_prompt = summary_template.format(document="This is a sample document about AI...")
+    print(f"\n📝 Example prompt: {len(test_prompt)} characters")
+
+    # Start the interactive demo
+    interactive_demo()
+
+if __name__ == "__main__":
+    main()
diff --git a/docs/notebooks/langChain/langchain_llamastack_ray.py b/docs/notebooks/langChain/langchain_llamastack_ray.py
new file mode 100644
index 000000000..7ef42dfd7
--- /dev/null
+++ b/docs/notebooks/langChain/langchain_llamastack_ray.py
@@ -0,0 +1,403 @@
+import os
+import re
+import html
+import json
+import time
+import requests
+from bs4 import BeautifulSoup
+from readability import Document as ReadabilityDocument
+from markdownify import markdownify
+from langchain_community.document_loaders import PyPDFLoader, TextLoader
+import tempfile
+
+from llama_stack_client import LlamaStackClient
+from langchain_core.language_models.llms import LLM
+from typing import Optional, List, Any, Dict
+from langchain.chains import LLMChain
+from langchain_core.prompts import PromptTemplate
+
+from starlette.requests import Request
+from ray import serve
+
+# Prompt Templates (defined globally)
+summary_template = PromptTemplate(
+    input_variables=["document"],
+    template="""Create a concise summary of this document in 5-10 sentences:
+
+{document}
+
+SUMMARY:"""
+)
+
+facts_template = PromptTemplate(
+    input_variables=["document"],
+    template="""Extract the most important facts from this document. List them as bullet points:
+
+{document}
+
+KEY FACTS:
+-"""
+)
+
+qa_template = PromptTemplate(
+    input_variables=["document", "question"],
+    template="""Based on the following document, answer the question. If the answer isn't in the document, say so.
+
+DOCUMENT:
+{document}
+
+QUESTION: {question}
+
+ANSWER:"""
+)
+
+class LlamaStackLLM(LLM):
+    """Simple LangChain wrapper for Llama Stack"""
+
+    # Pydantic model fields
+    client: Any = None
+    model_id: str = "llama3.2:3b"
+
+    def __init__(self, client, model_id: str = "llama3.2:3b"):
+        # Initialize with field values
+        super().__init__(client=client, model_id=model_id)
+
+    def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs) -> str:
+        """Make inference call to Llama Stack"""
+        response = self.client.inference.chat_completion(
+            model_id=self.model_id,
+            messages=[{"role": "user", "content": prompt}]
+        )
+        return response.completion_message.content
+
+    @property
+    def _llm_type(self) -> str:
+        return "llama_stack"
+
+
+def load_document(source: str) -> str:
+    is_url = source.startswith(('http://', 'https://'))
+    is_pdf = source.lower().endswith('.pdf')
+    if is_pdf:
+        return load_pdf(source, is_url=is_url)
+    elif is_url:
+        return load_from_url(source)
+    else:
+        raise ValueError(f"Unsupported format. Use URLs or PDF files.")
+
+
+def load_pdf(source: str, is_url: bool = False) -> str:
+    if is_url:
+        response = requests.get(source)
+        response.raise_for_status()
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
+            temp_file.write(response.content)
+            file_path = temp_file.name
+    else:
+        file_path = source
+    try:
+        loader = PyPDFLoader(file_path)
+        docs = loader.load()
+        return "\\n\\n".join([doc.page_content for doc in docs])
+    finally:
+        if is_url:
+            os.remove(file_path)
+
+
+def load_from_url(url: str) -> str:
+    headers = {'User-Agent': 'Mozilla/5.0 (compatible; DocumentLoader/1.0)'}
+    response = requests.get(url, headers=headers, timeout=15)
+    response.raise_for_status()
+    doc = ReadabilityDocument(response.text)
+    html_main = doc.summary(html_partial=True)
+    soup = BeautifulSoup(html_main, "html.parser")
+    for tag in soup(["script", "style", "noscript", "header", "footer", "nav", "aside"]):
+        tag.decompose()
+    md_text = markdownify(str(soup), heading_style="ATX")
+    md_text = html.unescape(md_text)
+    md_text = re.sub(r"\n{3,}", "\n\n", md_text).strip()
+    return md_text
+
+
+@serve.deployment
+class LangChainLlamaStackService:
+    """Ray Serve deployment for LangChain + Llama Stack document processing"""
+
+    def __init__(self):
+        print("🚀 Initializing LangChain + Llama Stack Service...")
+
+        # Initialize Llama Stack client
+        self.client = LlamaStackClient(base_url="http://localhost:8321/")
+
+        # Initialize LangChain-compatible LLM
+        self.llm = LlamaStackLLM(self.client)
+
+        # Create processing chains
+        self.summary_chain = LLMChain(llm=self.llm, prompt=summary_template)
+        self.facts_chain = LLMChain(llm=self.llm, prompt=facts_template)
+        self.qa_chain = LLMChain(llm=self.llm, prompt=qa_template)
+
+        # Storage for processed documents
+        self.processed_docs = {}
+
+        print("✅ Service initialized successfully!")
+
+    async def __call__(self, request: Request) -> Dict:
+        """Handle HTTP requests to different endpoints"""
+        path = request.url.path
+        method = request.method
+
+        try:
+            if path == "/" and method == "GET":
+                return await self._handle_status()
+            elif path == "/process" and method == "POST":
+                return await self._handle_process(request)
+            elif path == "/ask" and method == "POST":
+                return await self._handle_ask(request)
+            elif path == "/summary" and method == "GET":
+                return await self._handle_summary(request)
+            elif path == "/facts" and method == "GET":
+                return await self._handle_facts(request)
+            elif path == "/docs" and method == "GET":
+                return await self._handle_list_docs()
+            else:
+                return {
+                    "error": "Not found",
+                    "available_endpoints": {
+                        "GET /": "Service status",
+                        "POST /process": "Process document (body: {\"source\": \"url_or_path\"})",
+                        "POST /ask": "Ask question (body: {\"question\": \"your_question\", \"source\": \"optional_doc_id\"})",
+                        "GET /summary?source=doc_id": "Get document summary",
+                        "GET /facts?source=doc_id": "Get document facts",
+                        "GET /docs": "List processed documents"
+                    }
+                }
+        except Exception as e:
+            return {"error": str(e)}
+
+    async def _handle_status(self) -> Dict:
+        """Return service status"""
+        return {
+            "status": "healthy",
+            "service": "LangChain + Llama Stack Document Processing",
+            "documents_processed": len(self.processed_docs),
+            "available_models": [m.identifier for m in self.client.models.list()],
+            "endpoints": ["/", "/process", "/ask", "/summary", "/facts", "/docs"]
+        }
+
+    async def _handle_process(self, request: Request) -> Dict:
+        """Process a document from URL or file path"""
+        body = await request.json()
+        source = body.get("source")
+
+        if not source:
+            return {"error": "Missing 'source' in request body"}
+
+        try:
+            # Load document
+            document = load_document(source)
+
+            # Generate summary and facts
+            summary = self.summary_chain.invoke({"document": document})["text"]
+            facts = self.facts_chain.invoke({"document": document})["text"]
+
+            # Store processed document
+            self.processed_docs[source] = {
+                "document": document,
+                "summary": summary,
+                "facts": facts,
+                "processed_at": time.time()
+            }
+
+            return {
+                "success": True,
+                "source": source,
+                "document_length": len(document),
+                "summary_preview": summary[:200] + "..." if len(summary) > 200 else summary,
+                "facts_preview": facts[:300] + "..." if len(facts) > 300 else facts
+            }
+
+        except Exception as e:
+            return {"error": f"Failed to process document: {str(e)}"}
+
+    async def _handle_ask(self, request: Request) -> Dict:
+        """Answer questions about processed documents"""
+        body = await request.json()
+        question = body.get("question")
+        source = body.get("source")
+
+        if not question:
+            return {"error": "Missing 'question' in request body"}
+
+        if not self.processed_docs:
+            return {"error": "No documents processed yet. Use /process endpoint first."}
+
+        try:
+            # Select document
+            if source and source in self.processed_docs:
+                doc_data = self.processed_docs[source]
+            else:
+                # Use the most recent document
+                doc_data = list(self.processed_docs.values())[-1]
+                source = list(self.processed_docs.keys())[-1]
+
+            # Generate answer
+            answer = self.qa_chain.invoke({
+                "document": doc_data["document"],
+                "question": question
+            })["text"]
+
+            return {
+                "question": question,
+                "answer": answer,
+                "source": source
+            }
+
+        except Exception as e:
+            return {"error": f"Failed to answer question: {str(e)}"}
+
+    async def _handle_summary(self, request: Request) -> Dict:
+        """Get summary of a processed document"""
+        source = request.query_params.get("source")
+
+        if not self.processed_docs:
+            return {"error": "No documents processed yet"}
+
+        if source and source in self.processed_docs:
+            doc_data = self.processed_docs[source]
+        else:
+            # Use the most recent document
+            doc_data = list(self.processed_docs.values())[-1]
+            source = list(self.processed_docs.keys())[-1]
+
+        return {
+            "source": source,
+            "summary": doc_data["summary"]
+        }
+
+    async def _handle_facts(self, request: Request) -> Dict:
+        """Get facts from a processed document"""
+        source = request.query_params.get("source")
+
+        if not self.processed_docs:
+            return {"error": "No documents processed yet"}
+
+        if source and source in self.processed_docs:
+            doc_data = self.processed_docs[source]
+        else:
+            # Use the most recent document
+            doc_data = list(self.processed_docs.values())[-1]
+            source = list(self.processed_docs.keys())[-1]
+
+        return {
+            "source": source,
+            "facts": doc_data["facts"]
+        }
+
+    async def _handle_list_docs(self) -> Dict:
+        """List all processed documents"""
+        docs_info = []
+        for source, data in self.processed_docs.items():
+            docs_info.append({
+                "source": source,
+                "document_length": len(data["document"]),
+                "processed_at": data["processed_at"],
+                "summary_preview": data["summary"][:100] + "..." if len(data["summary"]) > 100 else data["summary"]
+            })
+
+        return {
+            "processed_documents": docs_info,
+            "total_count": len(self.processed_docs)
+        }
+
+
+def main():
+    """Main function to start the Ray Serve application"""
+
+    # Create the application
+    app = LangChainLlamaStackService.bind()
+
+    # Deploy the application locally
+    print("🚀 Starting LangChain + Llama Stack Ray Serve application...")
+    serve.run(app, route_prefix="/")
+
+    # Wait for service to initialize
+    print("⏳ Waiting for service to initialize...")
+    time.sleep(5)
+
+    # Test the service
+    try:
+        response = requests.get("http://localhost:8000/")
+        print(f"✅ Service response: {response.json()}")
+        print("🎉 Service is running successfully!")
+    except Exception as e:
+        print(f"⚠️ Could not test service: {e}")
+        print("   Service might still be starting up...")
+
+    # Show service information
+    print("\n" + "="*60)
+    print("🌐 LangChain + Llama Stack Service is running on:")
+    print("   http://localhost:8000/")
+    print("="*60)
+    print("📋 Available endpoints:")
+    print("   GET  /           - Service status")
+    print("   POST /process    - Process document")
+    print("   POST /ask        - Ask questions")
+    print("   GET  /summary    - Get document summary")
+    print("   GET  /facts      - Get document facts")
+    print("   GET  /docs       - List processed documents")
+    print("="*60)
+    print("🧪 Example requests:")
+    print("   # Process a document:")
+    print("   curl -X POST http://localhost:8000/process \\")
+    print("        -H 'Content-Type: application/json' \\")
+    print("        -d '{\"source\": \"https://example.com/article\"}'")
+    print("")
+    print("   # Ask a question:")
+    print("   curl -X POST http://localhost:8000/ask \\")
+    print("        -H 'Content-Type: application/json' \\")
+    print("        -d '{\"question\": \"What is the main topic?\"}'")
+    print("")
+    print("   # Get summary:")
+    print("   curl http://localhost:8000/summary")
+    print("="*60)
+    print("🛑 Press Ctrl+C to stop the service...")
+
+    try:
+        # Keep the service alive
+        while True:
+            time.sleep(1)
+    except KeyboardInterrupt:
+        print("\n🛑 Stopping service...")
+        serve.shutdown()
+        print("👋 Service stopped successfully!")
+
+if __name__ == "__main__":
+    main()
+
+
+
+
+
+
+
+
+# import requests
+
+# # Step 1: First, process/load the document
+# process_response = requests.post(
+#     "http://localhost:8000/process",
+#     json={"source": "https://en.wikipedia.org/wiki/What%27s_Happening!!"}
+# )
+# print("Processing result:", process_response.json())
+
+# # Step 2: Then get the facts
+# facts_response = requests.get("http://localhost:8000/facts")
+# print("Facts:", facts_response.json())
+
+# # Or get facts for specific document
+# facts_response = requests.get(
+#     "http://localhost:8000/facts",
+#     params={"source": "https://en.wikipedia.org/wiki/What%27s_Happening!!"}
+# )
+# print("Facts for specific doc:", facts_response.json())

From 7d2ccd1d9f5b4e2d3865ffd26773d87359ca86e0 Mon Sep 17 00:00:00 2001
From: Omar Abdelwahab <omaryashraf10@gmail.com>
Date: Thu, 21 Aug 2025 18:59:02 -0700
Subject: [PATCH 02/14] Delete
 docs/notebooks/langChain/langchain_llamastack_ray.py

Removed ray example
---
 .../langChain/langchain_llamastack_ray.py     | 403 ------------------
 1 file changed, 403 deletions(-)
 delete mode 100644 docs/notebooks/langChain/langchain_llamastack_ray.py

diff --git a/docs/notebooks/langChain/langchain_llamastack_ray.py b/docs/notebooks/langChain/langchain_llamastack_ray.py
deleted file mode 100644
index 7ef42dfd7..000000000
--- a/docs/notebooks/langChain/langchain_llamastack_ray.py
+++ /dev/null
@@ -1,403 +0,0 @@
-import os
-import re
-import html
-import json
-import time
-import requests
-from bs4 import BeautifulSoup
-from readability import Document as ReadabilityDocument
-from markdownify import markdownify
-from langchain_community.document_loaders import PyPDFLoader, TextLoader
-import tempfile
-
-from llama_stack_client import LlamaStackClient
-from langchain_core.language_models.llms import LLM
-from typing import Optional, List, Any, Dict
-from langchain.chains import LLMChain
-from langchain_core.prompts import PromptTemplate
-
-from starlette.requests import Request
-from ray import serve
-
-# Prompt Templates (defined globally)
-summary_template = PromptTemplate(
-    input_variables=["document"],
-    template="""Create a concise summary of this document in 5-10 sentences:
-
-{document}
-
-SUMMARY:"""
-)
-
-facts_template = PromptTemplate(
-    input_variables=["document"],
-    template="""Extract the most important facts from this document. List them as bullet points:
-
-{document}
-
-KEY FACTS:
--"""
-)
-
-qa_template = PromptTemplate(
-    input_variables=["document", "question"],
-    template="""Based on the following document, answer the question. If the answer isn't in the document, say so.
-
-DOCUMENT:
-{document}
-
-QUESTION: {question}
-
-ANSWER:"""
-)
-
-class LlamaStackLLM(LLM):
-    """Simple LangChain wrapper for Llama Stack"""
-
-    # Pydantic model fields
-    client: Any = None
-    model_id: str = "llama3.2:3b"
-
-    def __init__(self, client, model_id: str = "llama3.2:3b"):
-        # Initialize with field values
-        super().__init__(client=client, model_id=model_id)
-
-    def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs) -> str:
-        """Make inference call to Llama Stack"""
-        response = self.client.inference.chat_completion(
-            model_id=self.model_id,
-            messages=[{"role": "user", "content": prompt}]
-        )
-        return response.completion_message.content
-
-    @property
-    def _llm_type(self) -> str:
-        return "llama_stack"
-
-
-def load_document(source: str) -> str:
-    is_url = source.startswith(('http://', 'https://'))
-    is_pdf = source.lower().endswith('.pdf')
-    if is_pdf:
-        return load_pdf(source, is_url=is_url)
-    elif is_url:
-        return load_from_url(source)
-    else:
-        raise ValueError(f"Unsupported format. Use URLs or PDF files.")
-
-
-def load_pdf(source: str, is_url: bool = False) -> str:
-    if is_url:
-        response = requests.get(source)
-        response.raise_for_status()
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
-            temp_file.write(response.content)
-            file_path = temp_file.name
-    else:
-        file_path = source
-    try:
-        loader = PyPDFLoader(file_path)
-        docs = loader.load()
-        return "\\n\\n".join([doc.page_content for doc in docs])
-    finally:
-        if is_url:
-            os.remove(file_path)
-
-
-def load_from_url(url: str) -> str:
-    headers = {'User-Agent': 'Mozilla/5.0 (compatible; DocumentLoader/1.0)'}
-    response = requests.get(url, headers=headers, timeout=15)
-    response.raise_for_status()
-    doc = ReadabilityDocument(response.text)
-    html_main = doc.summary(html_partial=True)
-    soup = BeautifulSoup(html_main, "html.parser")
-    for tag in soup(["script", "style", "noscript", "header", "footer", "nav", "aside"]):
-        tag.decompose()
-    md_text = markdownify(str(soup), heading_style="ATX")
-    md_text = html.unescape(md_text)
-    md_text = re.sub(r"\n{3,}", "\n\n", md_text).strip()
-    return md_text
-
-
-@serve.deployment
-class LangChainLlamaStackService:
-    """Ray Serve deployment for LangChain + Llama Stack document processing"""
-
-    def __init__(self):
-        print("🚀 Initializing LangChain + Llama Stack Service...")
-
-        # Initialize Llama Stack client
-        self.client = LlamaStackClient(base_url="http://localhost:8321/")
-
-        # Initialize LangChain-compatible LLM
-        self.llm = LlamaStackLLM(self.client)
-
-        # Create processing chains
-        self.summary_chain = LLMChain(llm=self.llm, prompt=summary_template)
-        self.facts_chain = LLMChain(llm=self.llm, prompt=facts_template)
-        self.qa_chain = LLMChain(llm=self.llm, prompt=qa_template)
-
-        # Storage for processed documents
-        self.processed_docs = {}
-
-        print("✅ Service initialized successfully!")
-
-    async def __call__(self, request: Request) -> Dict:
-        """Handle HTTP requests to different endpoints"""
-        path = request.url.path
-        method = request.method
-
-        try:
-            if path == "/" and method == "GET":
-                return await self._handle_status()
-            elif path == "/process" and method == "POST":
-                return await self._handle_process(request)
-            elif path == "/ask" and method == "POST":
-                return await self._handle_ask(request)
-            elif path == "/summary" and method == "GET":
-                return await self._handle_summary(request)
-            elif path == "/facts" and method == "GET":
-                return await self._handle_facts(request)
-            elif path == "/docs" and method == "GET":
-                return await self._handle_list_docs()
-            else:
-                return {
-                    "error": "Not found",
-                    "available_endpoints": {
-                        "GET /": "Service status",
-                        "POST /process": "Process document (body: {\"source\": \"url_or_path\"})",
-                        "POST /ask": "Ask question (body: {\"question\": \"your_question\", \"source\": \"optional_doc_id\"})",
-                        "GET /summary?source=doc_id": "Get document summary",
-                        "GET /facts?source=doc_id": "Get document facts",
-                        "GET /docs": "List processed documents"
-                    }
-                }
-        except Exception as e:
-            return {"error": str(e)}
-
-    async def _handle_status(self) -> Dict:
-        """Return service status"""
-        return {
-            "status": "healthy",
-            "service": "LangChain + Llama Stack Document Processing",
-            "documents_processed": len(self.processed_docs),
-            "available_models": [m.identifier for m in self.client.models.list()],
-            "endpoints": ["/", "/process", "/ask", "/summary", "/facts", "/docs"]
-        }
-
-    async def _handle_process(self, request: Request) -> Dict:
-        """Process a document from URL or file path"""
-        body = await request.json()
-        source = body.get("source")
-
-        if not source:
-            return {"error": "Missing 'source' in request body"}
-
-        try:
-            # Load document
-            document = load_document(source)
-
-            # Generate summary and facts
-            summary = self.summary_chain.invoke({"document": document})["text"]
-            facts = self.facts_chain.invoke({"document": document})["text"]
-
-            # Store processed document
-            self.processed_docs[source] = {
-                "document": document,
-                "summary": summary,
-                "facts": facts,
-                "processed_at": time.time()
-            }
-
-            return {
-                "success": True,
-                "source": source,
-                "document_length": len(document),
-                "summary_preview": summary[:200] + "..." if len(summary) > 200 else summary,
-                "facts_preview": facts[:300] + "..." if len(facts) > 300 else facts
-            }
-
-        except Exception as e:
-            return {"error": f"Failed to process document: {str(e)}"}
-
-    async def _handle_ask(self, request: Request) -> Dict:
-        """Answer questions about processed documents"""
-        body = await request.json()
-        question = body.get("question")
-        source = body.get("source")
-
-        if not question:
-            return {"error": "Missing 'question' in request body"}
-
-        if not self.processed_docs:
-            return {"error": "No documents processed yet. Use /process endpoint first."}
-
-        try:
-            # Select document
-            if source and source in self.processed_docs:
-                doc_data = self.processed_docs[source]
-            else:
-                # Use the most recent document
-                doc_data = list(self.processed_docs.values())[-1]
-                source = list(self.processed_docs.keys())[-1]
-
-            # Generate answer
-            answer = self.qa_chain.invoke({
-                "document": doc_data["document"],
-                "question": question
-            })["text"]
-
-            return {
-                "question": question,
-                "answer": answer,
-                "source": source
-            }
-
-        except Exception as e:
-            return {"error": f"Failed to answer question: {str(e)}"}
-
-    async def _handle_summary(self, request: Request) -> Dict:
-        """Get summary of a processed document"""
-        source = request.query_params.get("source")
-
-        if not self.processed_docs:
-            return {"error": "No documents processed yet"}
-
-        if source and source in self.processed_docs:
-            doc_data = self.processed_docs[source]
-        else:
-            # Use the most recent document
-            doc_data = list(self.processed_docs.values())[-1]
-            source = list(self.processed_docs.keys())[-1]
-
-        return {
-            "source": source,
-            "summary": doc_data["summary"]
-        }
-
-    async def _handle_facts(self, request: Request) -> Dict:
-        """Get facts from a processed document"""
-        source = request.query_params.get("source")
-
-        if not self.processed_docs:
-            return {"error": "No documents processed yet"}
-
-        if source and source in self.processed_docs:
-            doc_data = self.processed_docs[source]
-        else:
-            # Use the most recent document
-            doc_data = list(self.processed_docs.values())[-1]
-            source = list(self.processed_docs.keys())[-1]
-
-        return {
-            "source": source,
-            "facts": doc_data["facts"]
-        }
-
-    async def _handle_list_docs(self) -> Dict:
-        """List all processed documents"""
-        docs_info = []
-        for source, data in self.processed_docs.items():
-            docs_info.append({
-                "source": source,
-                "document_length": len(data["document"]),
-                "processed_at": data["processed_at"],
-                "summary_preview": data["summary"][:100] + "..." if len(data["summary"]) > 100 else data["summary"]
-            })
-
-        return {
-            "processed_documents": docs_info,
-            "total_count": len(self.processed_docs)
-        }
-
-
-def main():
-    """Main function to start the Ray Serve application"""
-
-    # Create the application
-    app = LangChainLlamaStackService.bind()
-
-    # Deploy the application locally
-    print("🚀 Starting LangChain + Llama Stack Ray Serve application...")
-    serve.run(app, route_prefix="/")
-
-    # Wait for service to initialize
-    print("⏳ Waiting for service to initialize...")
-    time.sleep(5)
-
-    # Test the service
-    try:
-        response = requests.get("http://localhost:8000/")
-        print(f"✅ Service response: {response.json()}")
-        print("🎉 Service is running successfully!")
-    except Exception as e:
-        print(f"⚠️ Could not test service: {e}")
-        print("   Service might still be starting up...")
-
-    # Show service information
-    print("\n" + "="*60)
-    print("🌐 LangChain + Llama Stack Service is running on:")
-    print("   http://localhost:8000/")
-    print("="*60)
-    print("📋 Available endpoints:")
-    print("   GET  /           - Service status")
-    print("   POST /process    - Process document")
-    print("   POST /ask        - Ask questions")
-    print("   GET  /summary    - Get document summary")
-    print("   GET  /facts      - Get document facts")
-    print("   GET  /docs       - List processed documents")
-    print("="*60)
-    print("🧪 Example requests:")
-    print("   # Process a document:")
-    print("   curl -X POST http://localhost:8000/process \\")
-    print("        -H 'Content-Type: application/json' \\")
-    print("        -d '{\"source\": \"https://example.com/article\"}'")
-    print("")
-    print("   # Ask a question:")
-    print("   curl -X POST http://localhost:8000/ask \\")
-    print("        -H 'Content-Type: application/json' \\")
-    print("        -d '{\"question\": \"What is the main topic?\"}'")
-    print("")
-    print("   # Get summary:")
-    print("   curl http://localhost:8000/summary")
-    print("="*60)
-    print("🛑 Press Ctrl+C to stop the service...")
-
-    try:
-        # Keep the service alive
-        while True:
-            time.sleep(1)
-    except KeyboardInterrupt:
-        print("\n🛑 Stopping service...")
-        serve.shutdown()
-        print("👋 Service stopped successfully!")
-
-if __name__ == "__main__":
-    main()
-
-
-
-
-
-
-
-
-# import requests
-
-# # Step 1: First, process/load the document
-# process_response = requests.post(
-#     "http://localhost:8000/process",
-#     json={"source": "https://en.wikipedia.org/wiki/What%27s_Happening!!"}
-# )
-# print("Processing result:", process_response.json())
-
-# # Step 2: Then get the facts
-# facts_response = requests.get("http://localhost:8000/facts")
-# print("Facts:", facts_response.json())
-
-# # Or get facts for specific document
-# facts_response = requests.get(
-#     "http://localhost:8000/facts",
-#     params={"source": "https://en.wikipedia.org/wiki/What%27s_Happening!!"}
-# )
-# print("Facts for specific doc:", facts_response.json())

From 4969953852c70fc3ff2840862f71e197b13a7f0e Mon Sep 17 00:00:00 2001
From: Omar Abdelwahab <omaryashraf10@gmail.com>
Date: Thu, 21 Aug 2025 19:35:41 -0700
Subject: [PATCH 03/14] Updated the README and the name of the python script.

Updated the README and the name of the python script to langchain-llama-stack
---
 docs/notebooks/langChain/README.md            | 77 +------------------
 ...llamastack.py => langchain-llama-stack.py} |  0
 2 files changed, 2 insertions(+), 75 deletions(-)
 rename docs/notebooks/langChain/{langchain_llamastack.py => langchain-llama-stack.py} (100%)

diff --git a/docs/notebooks/langChain/README.md b/docs/notebooks/langChain/README.md
index f36a39ce9..dde6385d6 100644
--- a/docs/notebooks/langChain/README.md
+++ b/docs/notebooks/langChain/README.md
@@ -1,19 +1,12 @@
 # LangChain + Llama Stack Document Processing
 
-This repository contains two different implementations of document processing using LangChain and Llama Stack:
-
-1. **`langchain_llamastack.py`** - Interactive CLI version
-2. **`langchain_llamastack_ray.py`** - Ray Serve API version
-
-Both versions provide AI-powered document processing capabilities including summarization, fact extraction, and question-answering.
-
+1. **`langchain-llamastack.py`** - Interactive CLI version
 ---
 
 ## 📋 Prerequisites
 
 ### System Requirements
 - Python 3.12+
-- Ray Serve (for API version)
 - Llama Stack server running on `http://localhost:8321/`
 - Ollama or compatible model server
 
@@ -21,7 +14,6 @@ Both versions provide AI-powered document processing capabilities including summ
 ```bash
 pip install llama-stack-client langchain langchain-core langchain-community
 pip install beautifulsoup4 markdownify readability-lxml requests
-pip install ray[serve] starlette  # For Ray Serve version only
 ```
 
 ### Environment Setup
@@ -31,7 +23,7 @@ python3.12 -m venv llama-env-py312
 source llama-env-py312/bin/activate
 
 # Install dependencies
-pip install llama-stack-client langchain langchain-core langchain-community beautifulsoup4 markdownify readability-lxml requests ray[serve] starlette
+pip install llama-stack-client langchain langchain-core langchain-community beautifulsoup4 markdownify readability-lxml requests
 ```
 
 ---
@@ -106,39 +98,6 @@ Artificial intelligence (AI) is the simulation of human intelligence...
 👋 Thanks for exploring LangChain chains!
 ```
 
----
-
-## 🌐 Option 2: Ray Serve API Version (`langchain_llamastack_ray.py`)
-
-### Features
-- ✅ RESTful HTTP API
-- ✅ Persistent service (runs indefinitely)
-- ✅ Multiple endpoints for different operations
-- ✅ JSON request/response format
-- ✅ Concurrent request handling
-
-### How to Run
-```bash
-# Activate environment
-source llama-env-py312/bin/activate
-
-# Start the Ray Serve API
-cd /home/omara/langchain_llamastack
-python langchain_llamastack_ray.py
-```
-
-### Service Endpoints
-
-| Method | Endpoint | Description | Parameters |
-|--------|----------|-------------|------------|
-| GET | `/` | Service status | None |
-| POST | `/process` | Process document | `{"source": "url_or_path"}` |
-| POST | `/ask` | Ask question | `{"question": "text", "source": "optional"}` |
-| GET | `/summary` | Get summary | `?source=url` (optional) |
-| GET | `/facts` | Get facts | `?source=url` (optional) |
-| GET | `/docs` | List documents | None |
-
-### API Usage Examples
 
 #### Using curl:
 ```bash
@@ -223,23 +182,8 @@ To change the model, edit the `model_id` parameter in the respective files.
 - Check available models: `curl http://localhost:8321/models/list`
 - Update `model_id` in the code to match available models
 
-#### 3. Ray Serve Port Already in Use
-**Error**: `Port 8000 already in use`
-**Solution**:
-```bash
-# Kill process using port 8000
-lsof -ti :8000 | xargs kill -9
-
-# Or use a different port by modifying the code
-```
 
 #### 4. Missing Dependencies
-**Error**: `ModuleNotFoundError: No module named 'ray'`
-**Solution**:
-```bash
-pip install ray[serve] starlette
-```
-
 ### Debug Mode
 To enable verbose logging, add this to the beginning of either file:
 ```python
@@ -255,23 +199,12 @@ logging.basicConfig(level=logging.DEBUG)
 - **Pros**: Simple to use, interactive, good for testing
 - **Cons**: Single-threaded, session-based only
 - **Best for**: Development, testing, manual document analysis
-
-### Ray Serve Version
-- **Pros**: Concurrent requests, persistent service, API integration
-- **Cons**: More complex setup, requires Ray
-- **Best for**: Production, integration with other services, high throughput
-
 ---
 
 ## 🛑 Stopping Services
 
 ### CLI Version
 - Press `Ctrl+C` or type `quit` in the interactive prompt
-
-### Ray Serve Version
-- Press `Ctrl+C` in the terminal running the service
-- The service will gracefully shutdown and clean up resources
-
 ---
 
 ## 📝 Examples
@@ -283,12 +216,6 @@ logging.basicConfig(level=logging.DEBUG)
 4. Ask questions: `ask What are the main contributions?`
 5. Exit: `quit`
 
-### API Workflow
-1. Start: `python langchain_llamastack_ray.py`
-2. Process: `curl -X POST http://localhost:8000/process -d '{"source": "https://example.com"}'`
-3. Query: `curl -X POST http://localhost:8000/ask -d '{"question": "What is this about?"}'`
-4. Stop: `Ctrl+C`
-
 ---
 
 ## 🤝 Contributing
diff --git a/docs/notebooks/langChain/langchain_llamastack.py b/docs/notebooks/langChain/langchain-llama-stack.py
similarity index 100%
rename from docs/notebooks/langChain/langchain_llamastack.py
rename to docs/notebooks/langChain/langchain-llama-stack.py

From 17597808180cb550eba74cd774ab80ba2ad79f2a Mon Sep 17 00:00:00 2001
From: Omar Abdelwahab <omaryashraf10@gmail.com>
Date: Thu, 21 Aug 2025 19:37:47 -0700
Subject: [PATCH 04/14] Updated the README

Added fixes to the README file.
---
 docs/notebooks/langChain/README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/notebooks/langChain/README.md b/docs/notebooks/langChain/README.md
index dde6385d6..a6dbd2266 100644
--- a/docs/notebooks/langChain/README.md
+++ b/docs/notebooks/langChain/README.md
@@ -1,6 +1,6 @@
 # LangChain + Llama Stack Document Processing
 
-1. **`langchain-llamastack.py`** - Interactive CLI version
+1. **`langchain-llama-stack.py`** - Interactive CLI version
 ---
 
 ## 📋 Prerequisites
@@ -39,7 +39,7 @@ llama stack run your-config --port 8321
 
 ---
 
-## 📖 Option 1: Interactive CLI Version (`langchain_llamastack_updated.py`)
+## 📖 Option 1: Interactive CLI Version (`langchain-llama-stack.py`)
 
 ### Features
 - ✅ Interactive command-line interface
@@ -55,7 +55,7 @@ source llama-env-py312/bin/activate
 
 # Run the interactive CLI
 cd /home/omara/langchain_llamastack
-python langchain_llamastack_updated.py
+python langchain-llama-stack.py
 ```
 
 ### Usage Commands
@@ -210,7 +210,7 @@ logging.basicConfig(level=logging.DEBUG)
 ## 📝 Examples
 
 ### CLI Workflow
-1. Start: `python langchain_llamastack_updated.py`
+1. Start: `python langchain-llama-stack.py`
 2. Load document: `load https://arxiv.org/pdf/2103.00020.pdf`
 3. Get summary: `summary`
 4. Ask questions: `ask What are the main contributions?`

From 534bf972a37354458c02e0a718f9c339427541ba Mon Sep 17 00:00:00 2001
From: Omar Abdelwahab <omaryashraf10@gmail.com>
Date: Thu, 21 Aug 2025 19:43:08 -0700
Subject: [PATCH 05/14] removed dead code

removed some dead code
---
 docs/notebooks/langChain/langchain-llama-stack.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/docs/notebooks/langChain/langchain-llama-stack.py b/docs/notebooks/langChain/langchain-llama-stack.py
index 2f8301ea9..98aaa8d6c 100644
--- a/docs/notebooks/langChain/langchain-llama-stack.py
+++ b/docs/notebooks/langChain/langchain-llama-stack.py
@@ -61,8 +61,6 @@ class LlamaStackLLM(LLM):
 
     # Pydantic model fields
     client: Any = None
-    #model_id: str = "meta-llama/Llama-4-Maverick-17B-128E-Instruct"
-    # model_id: str = "meta-llama/Llama-3.3-70B-Instruct"
     model_id: str = "llama3:70b-instruct"
 
     def __init__(self, client, model_id: str = "llama3:70b-instruct"):

From 84c960e9b508227bb5399acd740ecb3165283014 Mon Sep 17 00:00:00 2001
From: Omar Abdelwahab <omaryashraf10@gmail.com>
Date: Thu, 21 Aug 2025 19:50:50 -0700
Subject: [PATCH 06/14] Changing the langchain dir name

Changing the langchain dir name
---
 docs/notebooks/langChain/README.md            | 233 --------------
 .../langChain/langchain-llama-stack.py        | 288 ------------------
 2 files changed, 521 deletions(-)
 delete mode 100644 docs/notebooks/langChain/README.md
 delete mode 100644 docs/notebooks/langChain/langchain-llama-stack.py

diff --git a/docs/notebooks/langChain/README.md b/docs/notebooks/langChain/README.md
deleted file mode 100644
index a6dbd2266..000000000
--- a/docs/notebooks/langChain/README.md
+++ /dev/null
@@ -1,233 +0,0 @@
-# LangChain + Llama Stack Document Processing
-
-1. **`langchain-llama-stack.py`** - Interactive CLI version
----
-
-## 📋 Prerequisites
-
-### System Requirements
-- Python 3.12+
-- Llama Stack server running on `http://localhost:8321/`
-- Ollama or compatible model server
-
-### Required Python Packages
-```bash
-pip install llama-stack-client langchain langchain-core langchain-community
-pip install beautifulsoup4 markdownify readability-lxml requests
-```
-
-### Environment Setup
-```bash
-# Create and activate virtual environment
-python3.12 -m venv llama-env-py312
-source llama-env-py312/bin/activate
-
-# Install dependencies
-pip install llama-stack-client langchain langchain-core langchain-community beautifulsoup4 markdownify readability-lxml requests
-```
-
----
-
-## 🚀 Quick Start
-
-### Start Llama Stack Server
-Before running either version, ensure your Llama Stack server is running:
-```bash
-# Start Llama Stack server (example)
-llama stack run your-config --port 8321
-```
-
----
-
-## 📖 Option 1: Interactive CLI Version (`langchain-llama-stack.py`)
-
-### Features
-- ✅ Interactive command-line interface
-- ✅ Document loading from URLs and PDFs
-- ✅ AI-powered summarization and fact extraction
-- ✅ Question-answering based on document content
-- ✅ Session-based document storage
-
-### How to Run
-```bash
-# Activate environment
-source llama-env-py312/bin/activate
-
-# Run the interactive CLI
-cd /home/omara/langchain_llamastack
-python langchain-llama-stack.py
-```
-
-### Usage Commands
-Once running, you can use these interactive commands:
-
-```
-🎯 Interactive Document Processing Demo
-Commands:
-  load <url_or_path>  - Process a document
-  ask <question>      - Ask about the document
-  summary            - Show document summary
-  facts              - Show extracted facts
-  help               - Show commands
-  quit               - Exit demo
-```
-
-### Example Session
-```
-> load https://en.wikipedia.org/wiki/Artificial_intelligence
-📄 Loading document from: https://en.wikipedia.org/wiki/Artificial_intelligence
-✅ Loaded 45,832 characters
-📝 Generating summary...
-🔍 Extracting key facts...
-✅ Processing complete!
-
-> summary
-📝 Summary:
-Artificial intelligence (AI) is the simulation of human intelligence...
-
-> ask What are the main types of AI?
-💬 Q: What are the main types of AI?
-📝 A: Based on the document, the main types of AI include...
-
-> facts
-🔍 Key Facts:
-- AI was founded as an academic discipline in 1956
-- Machine learning is a subset of AI...
-
-> quit
-👋 Thanks for exploring LangChain chains!
-```
-
-
-#### Using curl:
-```bash
-# Check service status
-curl http://localhost:8000/
-
-# Process a document
-curl -X POST http://localhost:8000/process \
-     -H 'Content-Type: application/json' \
-     -d '{"source": "https://en.wikipedia.org/wiki/Machine_learning"}'
-
-# Ask a question
-curl -X POST http://localhost:8000/ask \
-     -H 'Content-Type: application/json' \
-     -d '{"question": "What is machine learning?"}'
-
-# Get summary
-curl http://localhost:8000/summary
-
-# Get facts
-curl http://localhost:8000/facts
-
-# List all processed documents
-curl http://localhost:8000/docs
-```
-
-#### Using Python requests:
-```python
-import requests
-
-# Process a document
-response = requests.post(
-    "http://localhost:8000/process",
-    json={"source": "https://en.wikipedia.org/wiki/Deep_learning"}
-)
-print(response.json())
-
-# Ask a question
-response = requests.post(
-    "http://localhost:8000/ask",
-    json={"question": "What are neural networks?"}
-)
-print(response.json())
-
-# Get facts
-response = requests.get("http://localhost:8000/facts")
-print(response.json())
-```
-
----
-
-## 🔧 Configuration
-
-### Model Configuration
-Both versions use these models by default:
-- **Model ID**: `llama3.2:3b`
-- **Llama Stack URL**: `http://localhost:8321/`
-
-To change the model, edit the `model_id` parameter in the respective files.
-
-### Supported Document Types
-- ✅ **URLs**: Any web page (extracted using readability)
-- ✅ **PDF files**: Local or remote PDF documents
-- ❌ Plain text files (can be added if needed)
-
----
-
-## 🛠️ Troubleshooting
-
-### Common Issues
-
-#### 1. Connection Refused to Llama Stack
-**Error**: `Connection refused to http://localhost:8321/`
-**Solution**:
-- Ensure Llama Stack server is running
-- Check if port 8321 is correct
-- Verify network connectivity
-
-#### 2. Model Not Found
-**Error**: `Model not found: llama3.2:3b`
-**Solution**:
-- Check available models: `curl http://localhost:8321/models/list`
-- Update `model_id` in the code to match available models
-
-
-#### 4. Missing Dependencies
-### Debug Mode
-To enable verbose logging, add this to the beginning of either file:
-```python
-import logging
-logging.basicConfig(level=logging.DEBUG)
-```
-
----
-
-## 📊 Performance Notes
-
-### CLI Version
-- **Pros**: Simple to use, interactive, good for testing
-- **Cons**: Single-threaded, session-based only
-- **Best for**: Development, testing, manual document analysis
----
-
-## 🛑 Stopping Services
-
-### CLI Version
-- Press `Ctrl+C` or type `quit` in the interactive prompt
----
-
-## 📝 Examples
-
-### CLI Workflow
-1. Start: `python langchain-llama-stack.py`
-2. Load document: `load https://arxiv.org/pdf/2103.00020.pdf`
-3. Get summary: `summary`
-4. Ask questions: `ask What are the main contributions?`
-5. Exit: `quit`
-
----
-
-## 🤝 Contributing
-
-To extend functionality:
-1. Add new prompt templates for different analysis types
-2. Support additional document formats
-3. Add caching for processed documents
-4. Implement user authentication for API version
-
----
-
-## 📜 License
-
-This project is for educational and research purposes.
diff --git a/docs/notebooks/langChain/langchain-llama-stack.py b/docs/notebooks/langChain/langchain-llama-stack.py
deleted file mode 100644
index 98aaa8d6c..000000000
--- a/docs/notebooks/langChain/langchain-llama-stack.py
+++ /dev/null
@@ -1,288 +0,0 @@
-import os
-import re
-import html
-import requests
-from bs4 import BeautifulSoup
-from readability import Document as ReadabilityDocument
-from markdownify import markdownify
-from langchain_community.document_loaders import PyPDFLoader, TextLoader
-import tempfile
-
-from llama_stack_client import LlamaStackClient
-
-from langchain_core.language_models.llms import LLM
-from typing import Optional, List, Any
-from langchain.chains import LLMChain
-from langchain_core.prompts import PromptTemplate
-from rich.pretty import pprint
-
-# Global variables
-client = None
-llm = None
-summary_chain = None
-facts_chain = None
-qa_chain = None
-processed_docs = {}
-
-# Prompt Templates (defined globally)
-summary_template = PromptTemplate(
-    input_variables=["document"],
-    template="""Create a concise summary of this document in 5-10 sentences:
-
-{document}
-
-SUMMARY:"""
-)
-
-facts_template = PromptTemplate(
-    input_variables=["document"],
-    template="""Extract the most important facts from this document. List them as bullet points:
-
-{document}
-
-KEY FACTS:
--"""
-)
-
-qa_template = PromptTemplate(
-    input_variables=["document", "question"],
-    template="""Based on the following document, answer the question. If the answer isn't in the document, say so.
-
-DOCUMENT:
-{document}
-
-QUESTION: {question}
-
-ANSWER:"""
-)
-
-class LlamaStackLLM(LLM):
-    """Simple LangChain wrapper for Llama Stack"""
-
-    # Pydantic model fields
-    client: Any = None
-    model_id: str = "llama3:70b-instruct"
-
-    def __init__(self, client, model_id: str = "llama3:70b-instruct"):
-        # Initialize with field values
-        super().__init__(client=client, model_id=model_id)
-
-    def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs) -> str:
-        """Make inference call to Llama Stack"""
-        response = self.client.inference.chat_completion(
-            model_id=self.model_id,
-            messages=[{"role": "user", "content": prompt}]
-        )
-        return response.completion_message.content
-
-    @property
-    def _llm_type(self) -> str:
-        return "llama_stack"
-
-
-def load_document(source: str) -> str:
-    is_url = source.startswith(('http://', 'https://'))
-    is_pdf = source.lower().endswith('.pdf')
-    if is_pdf:
-        return load_pdf(source, is_url=is_url)
-    elif is_url:
-        return load_from_url(source)
-    else:
-        raise ValueError(f"Unsupported format. Use URLs or PDF files.")
-
-
-def load_pdf(source: str, is_url: bool = False) -> str:
-    if is_url:
-        response = requests.get(source)
-        response.raise_for_status()
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
-            temp_file.write(response.content)
-            file_path = temp_file.name
-    else:
-        file_path = source
-    try:
-        loader = PyPDFLoader(file_path)
-        docs = loader.load()
-        return "\\n\\n".join([doc.page_content for doc in docs])
-    finally:
-        if is_url:
-            os.remove(file_path)
-
-
-def load_from_url(url: str) -> str:
-    headers = {'User-Agent': 'Mozilla/5.0 (compatible; DocumentLoader/1.0)'}
-    response = requests.get(url, headers=headers, timeout=15)
-    response.raise_for_status()
-    doc = ReadabilityDocument(response.text)
-    html_main = doc.summary(html_partial=True)
-    soup = BeautifulSoup(html_main, "html.parser")
-    for tag in soup(["script", "style", "noscript", "header", "footer", "nav", "aside"]):
-        tag.decompose()
-    md_text = markdownify(str(soup), heading_style="ATX")
-    md_text = html.unescape(md_text)
-    md_text = re.sub(r"\n{3,}", "\n\n", md_text).strip()
-    return md_text
-
-def process_document(source: str):
-    global summary_chain, facts_chain, processed_docs
-
-    print(f"📄 Loading document from: {source}")
-    document = load_document(source)
-    print(f"✅ Loaded {len(document):,} characters")
-    print("\n📝 Generating summary...")
-    summary = summary_chain.invoke({"document": document})["text"]
-    print("Summary generated")
-    print("🔍 Extracting key facts...")
-    facts = facts_chain.invoke({"document": document})["text"]
-    processed_docs[source] = {
-        "document": document,
-        "summary": summary,
-        "facts": facts
-    }
-    print(f"\n✅ Processing complete!")
-    print(f"📊 Document: {len(document):,} chars")
-    print(f"📝 Summary: {summary[:100]}...")
-    print(f"🔍 Facts: {facts[:1000]}...")
-    return processed_docs[source]
-
-def ask_question(question: str, source: str = None):
-    """Answer questions about processed documents"""
-    global qa_chain, processed_docs
-
-    if not processed_docs:
-        return "No documents processed yet. Use process_document() first."
-    if source and source in processed_docs:
-        doc_data = processed_docs[source]
-    else:
-        # Use the most recent document
-        doc_data = list(processed_docs.values())[-1]
-    answer = qa_chain.invoke({
-        "document": doc_data["document"],
-        "question": question
-    })["text"]
-    return answer
-
-
-def interactive_demo():
-    print("\n🎯 Interactive Document Processing Demo")
-    print("Commands:")
-    print("  load <url_or_path>  - Process a document")
-    print("  ask <question>      - Ask about the document")
-    print("  summary            - Show document summary")
-    print("  facts              - Show extracted facts")
-    print("  help               - Show commands")
-    print("  quit               - Exit demo")
-
-    while True:
-        try:
-            command = input("\n> ").strip()
-            if command.lower() in ['quit', 'exit']:
-                print("👋 Thanks for exploring LangChain chains!")
-                break
-            elif command.lower() == 'help':
-                print("\nCommands:")
-                print("  load <url_or_path>  - Process a document")
-                print("  ask <question>      - Ask about the document")
-                print("  summary            - Show document summary")
-                print("  facts              - Show extracted facts")
-            elif command.startswith('load '):
-                source = command[5:].strip()
-                if source:
-                    try:
-                        process_document(source)
-                    except Exception as e:
-                        print(f"❌ Error processing document: {e}")
-                else:
-                    print("❓ Please provide a URL or file path")
-            elif command.startswith('ask '):
-                question = command[4:].strip()
-                if question:
-                    try:
-                        answer = ask_question(question)
-                        print(f"\n💬 Q: {question}")
-                        print(f"📝 A: {answer}")
-                    except Exception as e:
-                        print(f"❌ Error: {e}")
-                else:
-                    print("❓ Please provide a question")
-            elif command.lower() == 'summary':
-                if processed_docs:
-                    latest_doc = list(processed_docs.values())[-1]
-                    print(f"\n📝 Summary:\n{latest_doc['summary']}")
-                else:
-                    print("❓ No documents processed yet")
-            elif command.lower() == 'facts':
-                if processed_docs:
-                    latest_doc = list(processed_docs.values())[-1]
-                    print(f"\n🔍 Key Facts:\n{latest_doc['facts']}")
-                else:
-                    print("❓ No documents processed yet")
-            else:
-                print("❓ Unknown command. Type 'help' for options")
-        except (EOFError, KeyboardInterrupt):
-            print("\n👋 Goodbye!")
-            break
-
-
-def main():
-    global client, llm, summary_chain, facts_chain, qa_chain, processed_docs
-
-    print("🚀 Starting LangChain + Llama Stack Document Processing Demo")
-
-    client = LlamaStackClient(
-        base_url="http://localhost:8321/",
-    )
-
-    # Initialize the LangChain-compatible LLM
-    llm = LlamaStackLLM(client)
-
-    # Test the wrapper
-    test_response = llm.invoke("Can you help me with the document processing?")
-    print(f"✅ LangChain wrapper working!")
-    print(f"Response: {test_response[:100]}...")
-
-    print("Available models:")
-    for m in client.models.list():
-        print(f"- {m.identifier}")
-
-    print("----")
-    print("Available shields (safety models):")
-    for s in client.shields.list():
-        print(s.identifier)
-    print("----")
-
-    # model_id = "llama3.2:3b"
-    model_id = "ollama/llama3:70b-instruct"
-
-    response = client.inference.chat_completion(
-        model_id=model_id,
-        messages=[
-            {"role": "system", "content": "You are a friendly assistant."},
-            {"role": "user", "content": "Write a two-sentence poem about llama."},
-        ],
-    )
-
-    print(response.completion_message.content)
-
-    # Create chains by combining our LLM with prompt templates
-    summary_chain = LLMChain(llm=llm, prompt=summary_template)
-    facts_chain = LLMChain(llm=llm, prompt=facts_template)
-    qa_chain = LLMChain(llm=llm, prompt=qa_template)
-
-    # Initialize storage for processed documents
-    processed_docs = {}
-
-    print("✅ Created 3 prompt templates:")
-    print("  • Summary: Condenses documents into key points")
-    print("  • Facts: Extracts important information as bullets")
-    print("  • Q&A: Answers questions based on document content")
-
-    # Test template formatting
-    test_prompt = summary_template.format(document="This is a sample document about AI...")
-    print(f"\n📝 Example prompt: {len(test_prompt)} characters")
-
-    # Start the interactive demo
-    interactive_demo()
-
-if __name__ == "__main__":
-    main()

From 0da0732b07bfc799af525e61535730ddcb48c72f Mon Sep 17 00:00:00 2001
From: Omar Abdelwahab <omaryashraf10@gmail.com>
Date: Thu, 21 Aug 2025 19:51:33 -0700
Subject: [PATCH 07/14] Changed the langChain dir name to langchain

Changed the langChain dir name to langchain
---
 docs/notebooks/langchain/README.md            | 233 ++++++++++++++
 .../langchain/langchain-llama-stack.py        | 288 ++++++++++++++++++
 2 files changed, 521 insertions(+)
 create mode 100644 docs/notebooks/langchain/README.md
 create mode 100644 docs/notebooks/langchain/langchain-llama-stack.py

diff --git a/docs/notebooks/langchain/README.md b/docs/notebooks/langchain/README.md
new file mode 100644
index 000000000..a6dbd2266
--- /dev/null
+++ b/docs/notebooks/langchain/README.md
@@ -0,0 +1,233 @@
+# LangChain + Llama Stack Document Processing
+
+1. **`langchain-llama-stack.py`** - Interactive CLI version
+---
+
+## 📋 Prerequisites
+
+### System Requirements
+- Python 3.12+
+- Llama Stack server running on `http://localhost:8321/`
+- Ollama or compatible model server
+
+### Required Python Packages
+```bash
+pip install llama-stack-client langchain langchain-core langchain-community
+pip install beautifulsoup4 markdownify readability-lxml requests
+```
+
+### Environment Setup
+```bash
+# Create and activate virtual environment
+python3.12 -m venv llama-env-py312
+source llama-env-py312/bin/activate
+
+# Install dependencies
+pip install llama-stack-client langchain langchain-core langchain-community beautifulsoup4 markdownify readability-lxml requests
+```
+
+---
+
+## 🚀 Quick Start
+
+### Start Llama Stack Server
+Before running either version, ensure your Llama Stack server is running:
+```bash
+# Start Llama Stack server (example)
+llama stack run your-config --port 8321
+```
+
+---
+
+## 📖 Option 1: Interactive CLI Version (`langchain-llama-stack.py`)
+
+### Features
+- ✅ Interactive command-line interface
+- ✅ Document loading from URLs and PDFs
+- ✅ AI-powered summarization and fact extraction
+- ✅ Question-answering based on document content
+- ✅ Session-based document storage
+
+### How to Run
+```bash
+# Activate environment
+source llama-env-py312/bin/activate
+
+# Run the interactive CLI
+cd /home/omara/langchain_llamastack
+python langchain-llama-stack.py
+```
+
+### Usage Commands
+Once running, you can use these interactive commands:
+
+```
+🎯 Interactive Document Processing Demo
+Commands:
+  load <url_or_path>  - Process a document
+  ask <question>      - Ask about the document
+  summary            - Show document summary
+  facts              - Show extracted facts
+  help               - Show commands
+  quit               - Exit demo
+```
+
+### Example Session
+```
+> load https://en.wikipedia.org/wiki/Artificial_intelligence
+📄 Loading document from: https://en.wikipedia.org/wiki/Artificial_intelligence
+✅ Loaded 45,832 characters
+📝 Generating summary...
+🔍 Extracting key facts...
+✅ Processing complete!
+
+> summary
+📝 Summary:
+Artificial intelligence (AI) is the simulation of human intelligence...
+
+> ask What are the main types of AI?
+💬 Q: What are the main types of AI?
+📝 A: Based on the document, the main types of AI include...
+
+> facts
+🔍 Key Facts:
+- AI was founded as an academic discipline in 1956
+- Machine learning is a subset of AI...
+
+> quit
+👋 Thanks for exploring LangChain chains!
+```
+
+
+#### Using curl:
+```bash
+# Check service status
+curl http://localhost:8000/
+
+# Process a document
+curl -X POST http://localhost:8000/process \
+     -H 'Content-Type: application/json' \
+     -d '{"source": "https://en.wikipedia.org/wiki/Machine_learning"}'
+
+# Ask a question
+curl -X POST http://localhost:8000/ask \
+     -H 'Content-Type: application/json' \
+     -d '{"question": "What is machine learning?"}'
+
+# Get summary
+curl http://localhost:8000/summary
+
+# Get facts
+curl http://localhost:8000/facts
+
+# List all processed documents
+curl http://localhost:8000/docs
+```
+
+#### Using Python requests:
+```python
+import requests
+
+# Process a document
+response = requests.post(
+    "http://localhost:8000/process",
+    json={"source": "https://en.wikipedia.org/wiki/Deep_learning"}
+)
+print(response.json())
+
+# Ask a question
+response = requests.post(
+    "http://localhost:8000/ask",
+    json={"question": "What are neural networks?"}
+)
+print(response.json())
+
+# Get facts
+response = requests.get("http://localhost:8000/facts")
+print(response.json())
+```
+
+---
+
+## 🔧 Configuration
+
+### Model Configuration
+Both versions use these models by default:
+- **Model ID**: `llama3.2:3b`
+- **Llama Stack URL**: `http://localhost:8321/`
+
+To change the model, edit the `model_id` parameter in the respective files.
+
+### Supported Document Types
+- ✅ **URLs**: Any web page (extracted using readability)
+- ✅ **PDF files**: Local or remote PDF documents
+- ❌ Plain text files (can be added if needed)
+
+---
+
+## 🛠️ Troubleshooting
+
+### Common Issues
+
+#### 1. Connection Refused to Llama Stack
+**Error**: `Connection refused to http://localhost:8321/`
+**Solution**:
+- Ensure Llama Stack server is running
+- Check if port 8321 is correct
+- Verify network connectivity
+
+#### 2. Model Not Found
+**Error**: `Model not found: llama3.2:3b`
+**Solution**:
+- Check available models: `curl http://localhost:8321/models/list`
+- Update `model_id` in the code to match available models
+
+
+#### 4. Missing Dependencies
+### Debug Mode
+To enable verbose logging, add this to the beginning of either file:
+```python
+import logging
+logging.basicConfig(level=logging.DEBUG)
+```
+
+---
+
+## 📊 Performance Notes
+
+### CLI Version
+- **Pros**: Simple to use, interactive, good for testing
+- **Cons**: Single-threaded, session-based only
+- **Best for**: Development, testing, manual document analysis
+---
+
+## 🛑 Stopping Services
+
+### CLI Version
+- Press `Ctrl+C` or type `quit` in the interactive prompt
+---
+
+## 📝 Examples
+
+### CLI Workflow
+1. Start: `python langchain-llama-stack.py`
+2. Load document: `load https://arxiv.org/pdf/2103.00020.pdf`
+3. Get summary: `summary`
+4. Ask questions: `ask What are the main contributions?`
+5. Exit: `quit`
+
+---
+
+## 🤝 Contributing
+
+To extend functionality:
+1. Add new prompt templates for different analysis types
+2. Support additional document formats
+3. Add caching for processed documents
+4. Implement user authentication for API version
+
+---
+
+## 📜 License
+
+This project is for educational and research purposes.
diff --git a/docs/notebooks/langchain/langchain-llama-stack.py b/docs/notebooks/langchain/langchain-llama-stack.py
new file mode 100644
index 000000000..98aaa8d6c
--- /dev/null
+++ b/docs/notebooks/langchain/langchain-llama-stack.py
@@ -0,0 +1,288 @@
+import os
+import re
+import html
+import requests
+from bs4 import BeautifulSoup
+from readability import Document as ReadabilityDocument
+from markdownify import markdownify
+from langchain_community.document_loaders import PyPDFLoader, TextLoader
+import tempfile
+
+from llama_stack_client import LlamaStackClient
+
+from langchain_core.language_models.llms import LLM
+from typing import Optional, List, Any
+from langchain.chains import LLMChain
+from langchain_core.prompts import PromptTemplate
+from rich.pretty import pprint
+
+# Global variables
+client = None
+llm = None
+summary_chain = None
+facts_chain = None
+qa_chain = None
+processed_docs = {}
+
+# Prompt Templates (defined globally)
+summary_template = PromptTemplate(
+    input_variables=["document"],
+    template="""Create a concise summary of this document in 5-10 sentences:
+
+{document}
+
+SUMMARY:"""
+)
+
+facts_template = PromptTemplate(
+    input_variables=["document"],
+    template="""Extract the most important facts from this document. List them as bullet points:
+
+{document}
+
+KEY FACTS:
+-"""
+)
+
+qa_template = PromptTemplate(
+    input_variables=["document", "question"],
+    template="""Based on the following document, answer the question. If the answer isn't in the document, say so.
+
+DOCUMENT:
+{document}
+
+QUESTION: {question}
+
+ANSWER:"""
+)
+
+class LlamaStackLLM(LLM):
+    """Simple LangChain wrapper for Llama Stack"""
+
+    # Pydantic model fields
+    client: Any = None
+    model_id: str = "llama3:70b-instruct"
+
+    def __init__(self, client, model_id: str = "llama3:70b-instruct"):
+        # Initialize with field values
+        super().__init__(client=client, model_id=model_id)
+
+    def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs) -> str:
+        """Make inference call to Llama Stack"""
+        response = self.client.inference.chat_completion(
+            model_id=self.model_id,
+            messages=[{"role": "user", "content": prompt}]
+        )
+        return response.completion_message.content
+
+    @property
+    def _llm_type(self) -> str:
+        return "llama_stack"
+
+
+def load_document(source: str) -> str:
+    is_url = source.startswith(('http://', 'https://'))
+    is_pdf = source.lower().endswith('.pdf')
+    if is_pdf:
+        return load_pdf(source, is_url=is_url)
+    elif is_url:
+        return load_from_url(source)
+    else:
+        raise ValueError(f"Unsupported format. Use URLs or PDF files.")
+
+
+def load_pdf(source: str, is_url: bool = False) -> str:
+    if is_url:
+        response = requests.get(source)
+        response.raise_for_status()
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
+            temp_file.write(response.content)
+            file_path = temp_file.name
+    else:
+        file_path = source
+    try:
+        loader = PyPDFLoader(file_path)
+        docs = loader.load()
+        return "\\n\\n".join([doc.page_content for doc in docs])
+    finally:
+        if is_url:
+            os.remove(file_path)
+
+
+def load_from_url(url: str) -> str:
+    headers = {'User-Agent': 'Mozilla/5.0 (compatible; DocumentLoader/1.0)'}
+    response = requests.get(url, headers=headers, timeout=15)
+    response.raise_for_status()
+    doc = ReadabilityDocument(response.text)
+    html_main = doc.summary(html_partial=True)
+    soup = BeautifulSoup(html_main, "html.parser")
+    for tag in soup(["script", "style", "noscript", "header", "footer", "nav", "aside"]):
+        tag.decompose()
+    md_text = markdownify(str(soup), heading_style="ATX")
+    md_text = html.unescape(md_text)
+    md_text = re.sub(r"\n{3,}", "\n\n", md_text).strip()
+    return md_text
+
+def process_document(source: str):
+    global summary_chain, facts_chain, processed_docs
+
+    print(f"📄 Loading document from: {source}")
+    document = load_document(source)
+    print(f"✅ Loaded {len(document):,} characters")
+    print("\n📝 Generating summary...")
+    summary = summary_chain.invoke({"document": document})["text"]
+    print("Summary generated")
+    print("🔍 Extracting key facts...")
+    facts = facts_chain.invoke({"document": document})["text"]
+    processed_docs[source] = {
+        "document": document,
+        "summary": summary,
+        "facts": facts
+    }
+    print(f"\n✅ Processing complete!")
+    print(f"📊 Document: {len(document):,} chars")
+    print(f"📝 Summary: {summary[:100]}...")
+    print(f"🔍 Facts: {facts[:1000]}...")
+    return processed_docs[source]
+
+def ask_question(question: str, source: str = None):
+    """Answer questions about processed documents"""
+    global qa_chain, processed_docs
+
+    if not processed_docs:
+        return "No documents processed yet. Use process_document() first."
+    if source and source in processed_docs:
+        doc_data = processed_docs[source]
+    else:
+        # Use the most recent document
+        doc_data = list(processed_docs.values())[-1]
+    answer = qa_chain.invoke({
+        "document": doc_data["document"],
+        "question": question
+    })["text"]
+    return answer
+
+
+def interactive_demo():
+    print("\n🎯 Interactive Document Processing Demo")
+    print("Commands:")
+    print("  load <url_or_path>  - Process a document")
+    print("  ask <question>      - Ask about the document")
+    print("  summary            - Show document summary")
+    print("  facts              - Show extracted facts")
+    print("  help               - Show commands")
+    print("  quit               - Exit demo")
+
+    while True:
+        try:
+            command = input("\n> ").strip()
+            if command.lower() in ['quit', 'exit']:
+                print("👋 Thanks for exploring LangChain chains!")
+                break
+            elif command.lower() == 'help':
+                print("\nCommands:")
+                print("  load <url_or_path>  - Process a document")
+                print("  ask <question>      - Ask about the document")
+                print("  summary            - Show document summary")
+                print("  facts              - Show extracted facts")
+            elif command.startswith('load '):
+                source = command[5:].strip()
+                if source:
+                    try:
+                        process_document(source)
+                    except Exception as e:
+                        print(f"❌ Error processing document: {e}")
+                else:
+                    print("❓ Please provide a URL or file path")
+            elif command.startswith('ask '):
+                question = command[4:].strip()
+                if question:
+                    try:
+                        answer = ask_question(question)
+                        print(f"\n💬 Q: {question}")
+                        print(f"📝 A: {answer}")
+                    except Exception as e:
+                        print(f"❌ Error: {e}")
+                else:
+                    print("❓ Please provide a question")
+            elif command.lower() == 'summary':
+                if processed_docs:
+                    latest_doc = list(processed_docs.values())[-1]
+                    print(f"\n📝 Summary:\n{latest_doc['summary']}")
+                else:
+                    print("❓ No documents processed yet")
+            elif command.lower() == 'facts':
+                if processed_docs:
+                    latest_doc = list(processed_docs.values())[-1]
+                    print(f"\n🔍 Key Facts:\n{latest_doc['facts']}")
+                else:
+                    print("❓ No documents processed yet")
+            else:
+                print("❓ Unknown command. Type 'help' for options")
+        except (EOFError, KeyboardInterrupt):
+            print("\n👋 Goodbye!")
+            break
+
+
+def main():
+    global client, llm, summary_chain, facts_chain, qa_chain, processed_docs
+
+    print("🚀 Starting LangChain + Llama Stack Document Processing Demo")
+
+    client = LlamaStackClient(
+        base_url="http://localhost:8321/",
+    )
+
+    # Initialize the LangChain-compatible LLM
+    llm = LlamaStackLLM(client)
+
+    # Test the wrapper
+    test_response = llm.invoke("Can you help me with the document processing?")
+    print(f"✅ LangChain wrapper working!")
+    print(f"Response: {test_response[:100]}...")
+
+    print("Available models:")
+    for m in client.models.list():
+        print(f"- {m.identifier}")
+
+    print("----")
+    print("Available shields (safety models):")
+    for s in client.shields.list():
+        print(s.identifier)
+    print("----")
+
+    # model_id = "llama3.2:3b"
+    model_id = "ollama/llama3:70b-instruct"
+
+    response = client.inference.chat_completion(
+        model_id=model_id,
+        messages=[
+            {"role": "system", "content": "You are a friendly assistant."},
+            {"role": "user", "content": "Write a two-sentence poem about llama."},
+        ],
+    )
+
+    print(response.completion_message.content)
+
+    # Create chains by combining our LLM with prompt templates
+    summary_chain = LLMChain(llm=llm, prompt=summary_template)
+    facts_chain = LLMChain(llm=llm, prompt=facts_template)
+    qa_chain = LLMChain(llm=llm, prompt=qa_template)
+
+    # Initialize storage for processed documents
+    processed_docs = {}
+
+    print("✅ Created 3 prompt templates:")
+    print("  • Summary: Condenses documents into key points")
+    print("  • Facts: Extracts important information as bullets")
+    print("  • Q&A: Answers questions based on document content")
+
+    # Test template formatting
+    test_prompt = summary_template.format(document="This is a sample document about AI...")
+    print(f"\n📝 Example prompt: {len(test_prompt)} characters")
+
+    # Start the interactive demo
+    interactive_demo()
+
+if __name__ == "__main__":
+    main()

From 63375b8f45b09182fd4493e23c74410cf7ebdf57 Mon Sep 17 00:00:00 2001
From: Omar Abdelwahab <omaryashraf10@gmail.com>
Date: Tue, 26 Aug 2025 22:28:22 -0700
Subject: [PATCH 08/14] Update langchain-llama-stack.py

---
 .../langchain/langchain-llama-stack.py        | 109 +++++++-----------
 1 file changed, 39 insertions(+), 70 deletions(-)

diff --git a/docs/notebooks/langchain/langchain-llama-stack.py b/docs/notebooks/langchain/langchain-llama-stack.py
index 98aaa8d6c..aa0d9bd1c 100644
--- a/docs/notebooks/langchain/langchain-llama-stack.py
+++ b/docs/notebooks/langchain/langchain-llama-stack.py
@@ -1,19 +1,21 @@
+import html
 import os
 import re
-import html
+import tempfile
+from typing import Any, List, Optional
+
 import requests
 from bs4 import BeautifulSoup
-from readability import Document as ReadabilityDocument
-from markdownify import markdownify
+from langchain.chains import LLMChain
 from langchain_community.document_loaders import PyPDFLoader, TextLoader
-import tempfile
-
-from llama_stack_client import LlamaStackClient
 
 from langchain_core.language_models.llms import LLM
-from typing import Optional, List, Any
-from langchain.chains import LLMChain
 from langchain_core.prompts import PromptTemplate
+from langchain_openai import ChatOpenAI
+
+from llama_stack_client import LlamaStackClient
+from markdownify import markdownify
+from readability import Document as ReadabilityDocument
 from rich.pretty import pprint
 
 # Global variables
@@ -31,7 +33,7 @@ summary_template = PromptTemplate(
 
 {document}
 
-SUMMARY:"""
+SUMMARY:""",
 )
 
 facts_template = PromptTemplate(
@@ -41,7 +43,7 @@ facts_template = PromptTemplate(
 {document}
 
 KEY FACTS:
--"""
+-""",
 )
 
 qa_template = PromptTemplate(
@@ -53,36 +55,13 @@ DOCUMENT:
 
 QUESTION: {question}
 
-ANSWER:"""
+ANSWER:""",
 )
 
-class LlamaStackLLM(LLM):
-    """Simple LangChain wrapper for Llama Stack"""
-
-    # Pydantic model fields
-    client: Any = None
-    model_id: str = "llama3:70b-instruct"
-
-    def __init__(self, client, model_id: str = "llama3:70b-instruct"):
-        # Initialize with field values
-        super().__init__(client=client, model_id=model_id)
-
-    def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs) -> str:
-        """Make inference call to Llama Stack"""
-        response = self.client.inference.chat_completion(
-            model_id=self.model_id,
-            messages=[{"role": "user", "content": prompt}]
-        )
-        return response.completion_message.content
-
-    @property
-    def _llm_type(self) -> str:
-        return "llama_stack"
-
 
 def load_document(source: str) -> str:
-    is_url = source.startswith(('http://', 'https://'))
-    is_pdf = source.lower().endswith('.pdf')
+    is_url = source.startswith(("http://", "https://"))
+    is_pdf = source.lower().endswith(".pdf")
     if is_pdf:
         return load_pdf(source, is_url=is_url)
     elif is_url:
@@ -110,19 +89,22 @@ def load_pdf(source: str, is_url: bool = False) -> str:
 
 
 def load_from_url(url: str) -> str:
-    headers = {'User-Agent': 'Mozilla/5.0 (compatible; DocumentLoader/1.0)'}
+    headers = {"User-Agent": "Mozilla/5.0 (compatible; DocumentLoader/1.0)"}
     response = requests.get(url, headers=headers, timeout=15)
     response.raise_for_status()
     doc = ReadabilityDocument(response.text)
     html_main = doc.summary(html_partial=True)
     soup = BeautifulSoup(html_main, "html.parser")
-    for tag in soup(["script", "style", "noscript", "header", "footer", "nav", "aside"]):
+    for tag in soup(
+        ["script", "style", "noscript", "header", "footer", "nav", "aside"]
+    ):
         tag.decompose()
     md_text = markdownify(str(soup), heading_style="ATX")
     md_text = html.unescape(md_text)
     md_text = re.sub(r"\n{3,}", "\n\n", md_text).strip()
     return md_text
 
+
 def process_document(source: str):
     global summary_chain, facts_chain, processed_docs
 
@@ -134,17 +116,14 @@ def process_document(source: str):
     print("Summary generated")
     print("🔍 Extracting key facts...")
     facts = facts_chain.invoke({"document": document})["text"]
-    processed_docs[source] = {
-        "document": document,
-        "summary": summary,
-        "facts": facts
-    }
+    processed_docs[source] = {"document": document, "summary": summary, "facts": facts}
     print(f"\n✅ Processing complete!")
     print(f"📊 Document: {len(document):,} chars")
     print(f"📝 Summary: {summary[:100]}...")
     print(f"🔍 Facts: {facts[:1000]}...")
     return processed_docs[source]
 
+
 def ask_question(question: str, source: str = None):
     """Answer questions about processed documents"""
     global qa_chain, processed_docs
@@ -156,10 +135,9 @@ def ask_question(question: str, source: str = None):
     else:
         # Use the most recent document
         doc_data = list(processed_docs.values())[-1]
-    answer = qa_chain.invoke({
-        "document": doc_data["document"],
-        "question": question
-    })["text"]
+    answer = qa_chain.invoke({"document": doc_data["document"], "question": question})[
+        "text"
+    ]
     return answer
 
 
@@ -176,16 +154,16 @@ def interactive_demo():
     while True:
         try:
             command = input("\n> ").strip()
-            if command.lower() in ['quit', 'exit']:
+            if command.lower() in ["quit", "exit"]:
                 print("👋 Thanks for exploring LangChain chains!")
                 break
-            elif command.lower() == 'help':
+            elif command.lower() == "help":
                 print("\nCommands:")
                 print("  load <url_or_path>  - Process a document")
                 print("  ask <question>      - Ask about the document")
                 print("  summary            - Show document summary")
                 print("  facts              - Show extracted facts")
-            elif command.startswith('load '):
+            elif command.startswith("load "):
                 source = command[5:].strip()
                 if source:
                     try:
@@ -194,7 +172,7 @@ def interactive_demo():
                         print(f"❌ Error processing document: {e}")
                 else:
                     print("❓ Please provide a URL or file path")
-            elif command.startswith('ask '):
+            elif command.startswith("ask "):
                 question = command[4:].strip()
                 if question:
                     try:
@@ -205,13 +183,13 @@ def interactive_demo():
                         print(f"❌ Error: {e}")
                 else:
                     print("❓ Please provide a question")
-            elif command.lower() == 'summary':
+            elif command.lower() == "summary":
                 if processed_docs:
                     latest_doc = list(processed_docs.values())[-1]
                     print(f"\n📝 Summary:\n{latest_doc['summary']}")
                 else:
                     print("❓ No documents processed yet")
-            elif command.lower() == 'facts':
+            elif command.lower() == "facts":
                 if processed_docs:
                     latest_doc = list(processed_docs.values())[-1]
                     print(f"\n🔍 Key Facts:\n{latest_doc['facts']}")
@@ -232,14 +210,14 @@ def main():
     client = LlamaStackClient(
         base_url="http://localhost:8321/",
     )
-
-    # Initialize the LangChain-compatible LLM
-    llm = LlamaStackLLM(client)
+    os.environ["OPENAI_API_KEY"] = "dummy"
+    os.environ["OPENAI_BASE_URL"] = "http://0.0.0.0:8321/v1/openai/v1"
+    llm = ChatOpenAI(model="ollama/llama3:70b-instruct")
 
     # Test the wrapper
     test_response = llm.invoke("Can you help me with the document processing?")
     print(f"✅ LangChain wrapper working!")
-    print(f"Response: {test_response[:100]}...")
+    print(f"Response: {test_response.content[:100]}...")
 
     print("Available models:")
     for m in client.models.list():
@@ -251,19 +229,7 @@ def main():
         print(s.identifier)
     print("----")
 
-    # model_id = "llama3.2:3b"
     model_id = "ollama/llama3:70b-instruct"
-
-    response = client.inference.chat_completion(
-        model_id=model_id,
-        messages=[
-            {"role": "system", "content": "You are a friendly assistant."},
-            {"role": "user", "content": "Write a two-sentence poem about llama."},
-        ],
-    )
-
-    print(response.completion_message.content)
-
     # Create chains by combining our LLM with prompt templates
     summary_chain = LLMChain(llm=llm, prompt=summary_template)
     facts_chain = LLMChain(llm=llm, prompt=facts_template)
@@ -278,11 +244,14 @@ def main():
     print("  • Q&A: Answers questions based on document content")
 
     # Test template formatting
-    test_prompt = summary_template.format(document="This is a sample document about AI...")
+    test_prompt = summary_template.format(
+        document="This is a sample document about AI..."
+    )
     print(f"\n📝 Example prompt: {len(test_prompt)} characters")
 
     # Start the interactive demo
     interactive_demo()
 
+
 if __name__ == "__main__":
     main()

From 74e95524d76483721dfd27eabeb395c2aa2c9fbe Mon Sep 17 00:00:00 2001
From: Omar Abdelwahab <omaryashraf10@gmail.com>
Date: Thu, 28 Aug 2025 15:36:38 -0700
Subject: [PATCH 09/14] Update README.md

---
 docs/notebooks/langchain/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/notebooks/langchain/README.md b/docs/notebooks/langchain/README.md
index a6dbd2266..bf03ef5a4 100644
--- a/docs/notebooks/langchain/README.md
+++ b/docs/notebooks/langchain/README.md
@@ -54,7 +54,7 @@ llama stack run your-config --port 8321
 source llama-env-py312/bin/activate
 
 # Run the interactive CLI
-cd /home/omara/langchain_llamastack
+cd /langchain_llamastack
 python langchain-llama-stack.py
 ```
 

From 035ac8af41c3e8783b833ba25b3a25da004a036d Mon Sep 17 00:00:00 2001
From: Omar Abdelwahab <omaryashraf10@gmail.com>
Date: Thu, 28 Aug 2025 17:30:33 -0700
Subject: [PATCH 10/14] Update README.md

---
 docs/notebooks/langchain/README.md | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/docs/notebooks/langchain/README.md b/docs/notebooks/langchain/README.md
index bf03ef5a4..6661c586e 100644
--- a/docs/notebooks/langchain/README.md
+++ b/docs/notebooks/langchain/README.md
@@ -10,12 +10,6 @@
 - Llama Stack server running on `http://localhost:8321/`
 - Ollama or compatible model server
 
-### Required Python Packages
-```bash
-pip install llama-stack-client langchain langchain-core langchain-community
-pip install beautifulsoup4 markdownify readability-lxml requests
-```
-
 ### Environment Setup
 ```bash
 # Create and activate virtual environment
@@ -50,11 +44,9 @@ llama stack run your-config --port 8321
 
 ### How to Run
 ```bash
-# Activate environment
-source llama-env-py312/bin/activate
 
 # Run the interactive CLI
-cd /langchain_llamastack
+cd /docs/notebooks/langchain/
 python langchain-llama-stack.py
 ```
 

From 4f87a5dd72d2b6b137d8b461aaef4aab42a70ff1 Mon Sep 17 00:00:00 2001
From: Omar Abdelwahab <omaryashraf10@gmail.com>
Date: Thu, 28 Aug 2025 17:39:37 -0700
Subject: [PATCH 11/14] Update README.md

---
 docs/notebooks/langchain/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/notebooks/langchain/README.md b/docs/notebooks/langchain/README.md
index 6661c586e..fb0fe21a7 100644
--- a/docs/notebooks/langchain/README.md
+++ b/docs/notebooks/langchain/README.md
@@ -17,7 +17,7 @@ python3.12 -m venv llama-env-py312
 source llama-env-py312/bin/activate
 
 # Install dependencies
-pip install llama-stack-client langchain langchain-core langchain-community beautifulsoup4 markdownify readability-lxml requests
+pip install llama-stack-client langchain langchain-core langchain-community beautifulsoup4 markdownify readability-lxml requests langchain_openai
 ```
 
 ---

From 9cf73a6bdff0634d9664dcfc02274eff537d3863 Mon Sep 17 00:00:00 2001
From: Omar Abdelwahab <omaryashraf10@gmail.com>
Date: Tue, 2 Sep 2025 10:30:17 -0700
Subject: [PATCH 12/14] Update langchain-llama-stack.py

---
 docs/notebooks/langchain/langchain-llama-stack.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/notebooks/langchain/langchain-llama-stack.py b/docs/notebooks/langchain/langchain-llama-stack.py
index aa0d9bd1c..8f61b33c2 100644
--- a/docs/notebooks/langchain/langchain-llama-stack.py
+++ b/docs/notebooks/langchain/langchain-llama-stack.py
@@ -212,7 +212,7 @@ def main():
     )
     os.environ["OPENAI_API_KEY"] = "dummy"
     os.environ["OPENAI_BASE_URL"] = "http://0.0.0.0:8321/v1/openai/v1"
-    llm = ChatOpenAI(model="ollama/llama3:70b-instruct")
+    llm = ChatOpenAI(model="ollama/llama3:70b-instruct", base_url="http://localhost:8321/v1/openai/v1")
 
     # Test the wrapper
     test_response = llm.invoke("Can you help me with the document processing?")

From 875069f535763b9924549c023552e918e632832c Mon Sep 17 00:00:00 2001
From: Omar Abdelwahab <omaryashraf10@gmail.com>
Date: Thu, 18 Sep 2025 13:57:14 -0700
Subject: [PATCH 13/14] Update langchain-llama-stack.py

---
 docs/notebooks/langchain/langchain-llama-stack.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/docs/notebooks/langchain/langchain-llama-stack.py b/docs/notebooks/langchain/langchain-llama-stack.py
index 8f61b33c2..99e4ebab6 100644
--- a/docs/notebooks/langchain/langchain-llama-stack.py
+++ b/docs/notebooks/langchain/langchain-llama-stack.py
@@ -210,8 +210,6 @@ def main():
     client = LlamaStackClient(
         base_url="http://localhost:8321/",
     )
-    os.environ["OPENAI_API_KEY"] = "dummy"
-    os.environ["OPENAI_BASE_URL"] = "http://0.0.0.0:8321/v1/openai/v1"
     llm = ChatOpenAI(model="ollama/llama3:70b-instruct", base_url="http://localhost:8321/v1/openai/v1")
 
     # Test the wrapper

From d064c9e99e698e7a5487660ac5e7ae7af9259317 Mon Sep 17 00:00:00 2001
From: Omar Abdelwahab <omaryashraf10@gmail.com>
Date: Thu, 18 Sep 2025 14:03:43 -0700
Subject: [PATCH 14/14] ran pre-commit

ran pre-commit
---
 docs/notebooks/langchain/README.md                | 6 +++---
 docs/notebooks/langchain/langchain-llama-stack.py | 6 ++++++
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/docs/notebooks/langchain/README.md b/docs/notebooks/langchain/README.md
index fb0fe21a7..b72a5e65b 100644
--- a/docs/notebooks/langchain/README.md
+++ b/docs/notebooks/langchain/README.md
@@ -123,14 +123,13 @@ import requests
 # Process a document
 response = requests.post(
     "http://localhost:8000/process",
-    json={"source": "https://en.wikipedia.org/wiki/Deep_learning"}
+    json={"source": "https://en.wikipedia.org/wiki/Deep_learning"},
 )
 print(response.json())
 
 # Ask a question
 response = requests.post(
-    "http://localhost:8000/ask",
-    json={"question": "What are neural networks?"}
+    "http://localhost:8000/ask", json={"question": "What are neural networks?"}
 )
 print(response.json())
 
@@ -180,6 +179,7 @@ To change the model, edit the `model_id` parameter in the respective files.
 To enable verbose logging, add this to the beginning of either file:
 ```python
 import logging
+
 logging.basicConfig(level=logging.DEBUG)
 ```
 
diff --git a/docs/notebooks/langchain/langchain-llama-stack.py b/docs/notebooks/langchain/langchain-llama-stack.py
index 99e4ebab6..d67f23f50 100644
--- a/docs/notebooks/langchain/langchain-llama-stack.py
+++ b/docs/notebooks/langchain/langchain-llama-stack.py
@@ -1,3 +1,9 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
 import html
 import os
 import re