Update langchain-llama-stack.py

This commit is contained in:
Omar Abdelwahab 2025-08-26 22:28:22 -07:00
parent 0da0732b07
commit 63375b8f45

View file

@ -1,19 +1,21 @@
import html
import os import os
import re import re
import html import tempfile
from typing import Any, List, Optional
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from readability import Document as ReadabilityDocument from langchain.chains import LLMChain
from markdownify import markdownify
from langchain_community.document_loaders import PyPDFLoader, TextLoader from langchain_community.document_loaders import PyPDFLoader, TextLoader
import tempfile
from llama_stack_client import LlamaStackClient
from langchain_core.language_models.llms import LLM from langchain_core.language_models.llms import LLM
from typing import Optional, List, Any
from langchain.chains import LLMChain
from langchain_core.prompts import PromptTemplate from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from llama_stack_client import LlamaStackClient
from markdownify import markdownify
from readability import Document as ReadabilityDocument
from rich.pretty import pprint from rich.pretty import pprint
# Global variables # Global variables
@ -31,7 +33,7 @@ summary_template = PromptTemplate(
{document} {document}
SUMMARY:""" SUMMARY:""",
) )
facts_template = PromptTemplate( facts_template = PromptTemplate(
@ -41,7 +43,7 @@ facts_template = PromptTemplate(
{document} {document}
KEY FACTS: KEY FACTS:
-""" -""",
) )
qa_template = PromptTemplate( qa_template = PromptTemplate(
@ -53,36 +55,13 @@ DOCUMENT:
QUESTION: {question} QUESTION: {question}
ANSWER:""" ANSWER:""",
) )
class LlamaStackLLM(LLM):
"""Simple LangChain wrapper for Llama Stack"""
# Pydantic model fields
client: Any = None
model_id: str = "llama3:70b-instruct"
def __init__(self, client, model_id: str = "llama3:70b-instruct"):
# Initialize with field values
super().__init__(client=client, model_id=model_id)
def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs) -> str:
"""Make inference call to Llama Stack"""
response = self.client.inference.chat_completion(
model_id=self.model_id,
messages=[{"role": "user", "content": prompt}]
)
return response.completion_message.content
@property
def _llm_type(self) -> str:
return "llama_stack"
def load_document(source: str) -> str: def load_document(source: str) -> str:
is_url = source.startswith(('http://', 'https://')) is_url = source.startswith(("http://", "https://"))
is_pdf = source.lower().endswith('.pdf') is_pdf = source.lower().endswith(".pdf")
if is_pdf: if is_pdf:
return load_pdf(source, is_url=is_url) return load_pdf(source, is_url=is_url)
elif is_url: elif is_url:
@ -110,19 +89,22 @@ def load_pdf(source: str, is_url: bool = False) -> str:
def load_from_url(url: str) -> str: def load_from_url(url: str) -> str:
headers = {'User-Agent': 'Mozilla/5.0 (compatible; DocumentLoader/1.0)'} headers = {"User-Agent": "Mozilla/5.0 (compatible; DocumentLoader/1.0)"}
response = requests.get(url, headers=headers, timeout=15) response = requests.get(url, headers=headers, timeout=15)
response.raise_for_status() response.raise_for_status()
doc = ReadabilityDocument(response.text) doc = ReadabilityDocument(response.text)
html_main = doc.summary(html_partial=True) html_main = doc.summary(html_partial=True)
soup = BeautifulSoup(html_main, "html.parser") soup = BeautifulSoup(html_main, "html.parser")
for tag in soup(["script", "style", "noscript", "header", "footer", "nav", "aside"]): for tag in soup(
["script", "style", "noscript", "header", "footer", "nav", "aside"]
):
tag.decompose() tag.decompose()
md_text = markdownify(str(soup), heading_style="ATX") md_text = markdownify(str(soup), heading_style="ATX")
md_text = html.unescape(md_text) md_text = html.unescape(md_text)
md_text = re.sub(r"\n{3,}", "\n\n", md_text).strip() md_text = re.sub(r"\n{3,}", "\n\n", md_text).strip()
return md_text return md_text
def process_document(source: str): def process_document(source: str):
global summary_chain, facts_chain, processed_docs global summary_chain, facts_chain, processed_docs
@ -134,17 +116,14 @@ def process_document(source: str):
print("Summary generated") print("Summary generated")
print("🔍 Extracting key facts...") print("🔍 Extracting key facts...")
facts = facts_chain.invoke({"document": document})["text"] facts = facts_chain.invoke({"document": document})["text"]
processed_docs[source] = { processed_docs[source] = {"document": document, "summary": summary, "facts": facts}
"document": document,
"summary": summary,
"facts": facts
}
print(f"\n✅ Processing complete!") print(f"\n✅ Processing complete!")
print(f"📊 Document: {len(document):,} chars") print(f"📊 Document: {len(document):,} chars")
print(f"📝 Summary: {summary[:100]}...") print(f"📝 Summary: {summary[:100]}...")
print(f"🔍 Facts: {facts[:1000]}...") print(f"🔍 Facts: {facts[:1000]}...")
return processed_docs[source] return processed_docs[source]
def ask_question(question: str, source: str = None): def ask_question(question: str, source: str = None):
"""Answer questions about processed documents""" """Answer questions about processed documents"""
global qa_chain, processed_docs global qa_chain, processed_docs
@ -156,10 +135,9 @@ def ask_question(question: str, source: str = None):
else: else:
# Use the most recent document # Use the most recent document
doc_data = list(processed_docs.values())[-1] doc_data = list(processed_docs.values())[-1]
answer = qa_chain.invoke({ answer = qa_chain.invoke({"document": doc_data["document"], "question": question})[
"document": doc_data["document"], "text"
"question": question ]
})["text"]
return answer return answer
@ -176,16 +154,16 @@ def interactive_demo():
while True: while True:
try: try:
command = input("\n> ").strip() command = input("\n> ").strip()
if command.lower() in ['quit', 'exit']: if command.lower() in ["quit", "exit"]:
print("👋 Thanks for exploring LangChain chains!") print("👋 Thanks for exploring LangChain chains!")
break break
elif command.lower() == 'help': elif command.lower() == "help":
print("\nCommands:") print("\nCommands:")
print(" load <url_or_path> - Process a document") print(" load <url_or_path> - Process a document")
print(" ask <question> - Ask about the document") print(" ask <question> - Ask about the document")
print(" summary - Show document summary") print(" summary - Show document summary")
print(" facts - Show extracted facts") print(" facts - Show extracted facts")
elif command.startswith('load '): elif command.startswith("load "):
source = command[5:].strip() source = command[5:].strip()
if source: if source:
try: try:
@ -194,7 +172,7 @@ def interactive_demo():
print(f"❌ Error processing document: {e}") print(f"❌ Error processing document: {e}")
else: else:
print("❓ Please provide a URL or file path") print("❓ Please provide a URL or file path")
elif command.startswith('ask '): elif command.startswith("ask "):
question = command[4:].strip() question = command[4:].strip()
if question: if question:
try: try:
@ -205,13 +183,13 @@ def interactive_demo():
print(f"❌ Error: {e}") print(f"❌ Error: {e}")
else: else:
print("❓ Please provide a question") print("❓ Please provide a question")
elif command.lower() == 'summary': elif command.lower() == "summary":
if processed_docs: if processed_docs:
latest_doc = list(processed_docs.values())[-1] latest_doc = list(processed_docs.values())[-1]
print(f"\n📝 Summary:\n{latest_doc['summary']}") print(f"\n📝 Summary:\n{latest_doc['summary']}")
else: else:
print("❓ No documents processed yet") print("❓ No documents processed yet")
elif command.lower() == 'facts': elif command.lower() == "facts":
if processed_docs: if processed_docs:
latest_doc = list(processed_docs.values())[-1] latest_doc = list(processed_docs.values())[-1]
print(f"\n🔍 Key Facts:\n{latest_doc['facts']}") print(f"\n🔍 Key Facts:\n{latest_doc['facts']}")
@ -232,14 +210,14 @@ def main():
client = LlamaStackClient( client = LlamaStackClient(
base_url="http://localhost:8321/", base_url="http://localhost:8321/",
) )
os.environ["OPENAI_API_KEY"] = "dummy"
# Initialize the LangChain-compatible LLM os.environ["OPENAI_BASE_URL"] = "http://0.0.0.0:8321/v1/openai/v1"
llm = LlamaStackLLM(client) llm = ChatOpenAI(model="ollama/llama3:70b-instruct")
# Test the wrapper # Test the wrapper
test_response = llm.invoke("Can you help me with the document processing?") test_response = llm.invoke("Can you help me with the document processing?")
print(f"✅ LangChain wrapper working!") print(f"✅ LangChain wrapper working!")
print(f"Response: {test_response[:100]}...") print(f"Response: {test_response.content[:100]}...")
print("Available models:") print("Available models:")
for m in client.models.list(): for m in client.models.list():
@ -251,19 +229,7 @@ def main():
print(s.identifier) print(s.identifier)
print("----") print("----")
# model_id = "llama3.2:3b"
model_id = "ollama/llama3:70b-instruct" model_id = "ollama/llama3:70b-instruct"
response = client.inference.chat_completion(
model_id=model_id,
messages=[
{"role": "system", "content": "You are a friendly assistant."},
{"role": "user", "content": "Write a two-sentence poem about llama."},
],
)
print(response.completion_message.content)
# Create chains by combining our LLM with prompt templates # Create chains by combining our LLM with prompt templates
summary_chain = LLMChain(llm=llm, prompt=summary_template) summary_chain = LLMChain(llm=llm, prompt=summary_template)
facts_chain = LLMChain(llm=llm, prompt=facts_template) facts_chain = LLMChain(llm=llm, prompt=facts_template)
@ -278,11 +244,14 @@ def main():
print(" • Q&A: Answers questions based on document content") print(" • Q&A: Answers questions based on document content")
# Test template formatting # Test template formatting
test_prompt = summary_template.format(document="This is a sample document about AI...") test_prompt = summary_template.format(
document="This is a sample document about AI..."
)
print(f"\n📝 Example prompt: {len(test_prompt)} characters") print(f"\n📝 Example prompt: {len(test_prompt)} characters")
# Start the interactive demo # Start the interactive demo
interactive_demo() interactive_demo()
if __name__ == "__main__": if __name__ == "__main__":
main() main()