trying to add docs

This commit is contained in:
ishaan-jaff 2023-07-29 07:06:56 -07:00
parent 0fe8799f94
commit 2cf949990e
834 changed files with 0 additions and 161273 deletions

View file

@ -1,47 +0,0 @@
## Official release
To install LangChain run:
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import CodeBlock from "@theme/CodeBlock";
<Tabs>
<TabItem value="pip" label="Pip" default>
<CodeBlock language="bash">pip install langchain</CodeBlock>
</TabItem>
<TabItem value="conda" label="Conda">
<CodeBlock language="bash">conda install langchain -c conda-forge</CodeBlock>
</TabItem>
</Tabs>
This will install the bare minimum requirements of LangChain.
A lot of the value of LangChain comes when integrating it with various model providers, datastores, etc.
By default, the dependencies needed to do that are NOT installed.
However, there are two other ways to install LangChain that do bring in those dependencies.
To install modules needed for the common LLM providers, run:
```bash
pip install langchain[llms]
```
To install all modules needed for all integrations, run:
```bash
pip install langchain[all]
```
Note that if you are using `zsh`, you'll need to quote square brackets when passing them as an argument to a command, for example:
```bash
pip install 'langchain[all]'
```
## From source
If you want to install from source, you can do so by cloning the repo and running:
```bash
pip install -e .
```

View file

@ -1,13 +0,0 @@
```python
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
llm = OpenAI()
chat_model = ChatOpenAI()
llm.predict("hi!")
>>> "Hi"
chat_model.predict("hi!")
>>> "Hi"
```

View file

@ -1,12 +0,0 @@
```python
from langchain.schema import HumanMessage
text = "What would be a good company name for a company that makes colorful socks?"
messages = [HumanMessage(content=text)]
llm.predict_messages(messages)
# >> Feetful of Fun
chat_model.predict_messages(messages)
# >> Socks O'Color
```

View file

@ -1,9 +0,0 @@
```python
text = "What would be a good company name for a company that makes colorful socks?"
llm.predict(text)
# >> Feetful of Fun
chat_model.predict(text)
# >> Socks O'Color
```

View file

@ -1,12 +0,0 @@
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import CodeBlock from "@theme/CodeBlock";
<Tabs>
<TabItem value="pip" label="Pip" default>
<CodeBlock language="bash">pip install langchain</CodeBlock>
</TabItem>
<TabItem value="conda" label="Conda">
<CodeBlock language="bash">conda install langchain -c conda-forge</CodeBlock>
</TabItem>
</Tabs>

View file

@ -1,34 +0,0 @@
```python
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
ChatPromptTemplate,
SystemMessagePromptTemplate,
HumanMessagePromptTemplate,
)
from langchain.chains import LLMChain
from langchain.schema import BaseOutputParser
class CommaSeparatedListOutputParser(BaseOutputParser):
"""Parse the output of an LLM call to a comma-separated list."""
def parse(self, text: str):
"""Parse the output of an LLM call."""
return text.strip().split(", ")
template = """You are a helpful assistant who generates comma separated lists.
A user will pass in a category, and you should generated 5 objects in that category in a comma separated list.
ONLY return a comma separated list, and nothing more."""
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
human_template = "{text}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
chain = LLMChain(
llm=ChatOpenAI(),
prompt=chat_prompt,
output_parser=CommaSeparatedListOutputParser()
)
chain.run("colors")
# >> ['red', 'blue', 'green', 'yellow', 'orange']
```

View file

@ -1,19 +0,0 @@
First we'll need to install their Python package:
```bash
pip install openai
```
Accessing the API requires an API key, which you can get by creating an account and heading [here](https://platform.openai.com/account/api-keys). Once we have a key we'll want to set it as an environment variable by running:
```bash
export OPENAI_API_KEY="..."
```
If you'd prefer not to set an environment variable you can pass the key in directly via the `openai_api_key` named parameter when initiating the OpenAI LLM class:
```python
from langchain.llms import OpenAI
llm = OpenAI(openai_api_key="...")
```

View file

@ -1,14 +0,0 @@
```python
from langchain.schema import BaseOutputParser
class CommaSeparatedListOutputParser(BaseOutputParser):
"""Parse the output of an LLM call to a comma-separated list."""
def parse(self, text: str):
"""Parse the output of an LLM call."""
return text.strip().split(", ")
CommaSeparatedListOutputParser().parse("hi, bye")
# >> ['hi', 'bye']
```

View file

@ -1,23 +0,0 @@
```python
from langchain.prompts.chat import (
ChatPromptTemplate,
SystemMessagePromptTemplate,
HumanMessagePromptTemplate,
)
template = "You are a helpful assistant that translates {input_language} to {output_language}."
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
human_template = "{text}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
chat_prompt.format_messages(input_language="English", output_language="French", text="I love programming.")
```
```pycon
[
SystemMessage(content="You are a helpful assistant that translates English to French.", additional_kwargs={}),
HumanMessage(content="I love programming.")
]
```

View file

@ -1,10 +0,0 @@
```python
from langchain.prompts import PromptTemplate
prompt = PromptTemplate.from_template("What is a good name for a company that makes {product}?")
prompt.format(product="colorful socks")
```
```pycon
What is a good name for a company that makes colorful socks?
```

View file

@ -1,130 +0,0 @@
The `chat-conversational-react-description` agent type lets us create a conversational agent using a chat model instead of an LLM.
```python
from langchain.memory import ConversationBufferMemory
from langchain.chat_models import ChatOpenAI
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0)
agent_chain = initialize_agent(tools, llm, agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION, verbose=True, memory=memory)
```
```python
agent_chain.run(input="hi, i am bob")
```
<CodeOutputBlock lang="python">
```
> Entering new AgentExecutor chain...
{
"action": "Final Answer",
"action_input": "Hello Bob! How can I assist you today?"
}
> Finished chain.
'Hello Bob! How can I assist you today?'
```
</CodeOutputBlock>
```python
agent_chain.run(input="what's my name?")
```
<CodeOutputBlock lang="python">
```
> Entering new AgentExecutor chain...
{
"action": "Final Answer",
"action_input": "Your name is Bob."
}
> Finished chain.
'Your name is Bob.'
```
</CodeOutputBlock>
```python
agent_chain.run("what are some good dinners to make this week, if i like thai food?")
```
<CodeOutputBlock lang="python">
```
> Entering new AgentExecutor chain...
{
"action": "Current Search",
"action_input": "Thai food dinner recipes"
}
Observation: 64 easy Thai recipes for any night of the week · Thai curry noodle soup · Thai yellow cauliflower, snake bean and tofu curry · Thai-spiced chicken hand pies · Thai ...
Thought:{
"action": "Final Answer",
"action_input": "Here are some Thai food dinner recipes you can try this week: Thai curry noodle soup, Thai yellow cauliflower, snake bean and tofu curry, Thai-spiced chicken hand pies, and many more. You can find the full list of recipes at the source I found earlier."
}
> Finished chain.
'Here are some Thai food dinner recipes you can try this week: Thai curry noodle soup, Thai yellow cauliflower, snake bean and tofu curry, Thai-spiced chicken hand pies, and many more. You can find the full list of recipes at the source I found earlier.'
```
</CodeOutputBlock>
```python
agent_chain.run(input="tell me the last letter in my name, and also tell me who won the world cup in 1978?")
```
<CodeOutputBlock lang="python">
```
> Entering new AgentExecutor chain...
{
"action": "Final Answer",
"action_input": "The last letter in your name is 'b'. Argentina won the World Cup in 1978."
}
> Finished chain.
"The last letter in your name is 'b'. Argentina won the World Cup in 1978."
```
</CodeOutputBlock>
```python
agent_chain.run(input="whats the weather like in pomfret?")
```
<CodeOutputBlock lang="python">
```
> Entering new AgentExecutor chain...
{
"action": "Current Search",
"action_input": "weather in pomfret"
}
Observation: Cloudy with showers. Low around 55F. Winds S at 5 to 10 mph. Chance of rain 60%. Humidity76%.
Thought:{
"action": "Final Answer",
"action_input": "Cloudy with showers. Low around 55F. Winds S at 5 to 10 mph. Chance of rain 60%. Humidity76%."
}
> Finished chain.
'Cloudy with showers. Low around 55F. Winds S at 5 to 10 mph. Chance of rain 60%. Humidity76%.'
```
</CodeOutputBlock>

View file

@ -1,150 +0,0 @@
This is accomplished with a specific type of agent (`conversational-react-description`) which expects to be used with a memory component.
```python
from langchain.agents import Tool
from langchain.agents import AgentType
from langchain.memory import ConversationBufferMemory
from langchain import OpenAI
from langchain.utilities import SerpAPIWrapper
from langchain.agents import initialize_agent
```
```python
search = SerpAPIWrapper()
tools = [
Tool(
name = "Current Search",
func=search.run,
description="useful for when you need to answer questions about current events or the current state of the world"
),
]
```
```python
memory = ConversationBufferMemory(memory_key="chat_history")
```
```python
llm=OpenAI(temperature=0)
agent_chain = initialize_agent(tools, llm, agent=AgentType.CONVERSATIONAL_REACT_DESCRIPTION, verbose=True, memory=memory)
```
```python
agent_chain.run(input="hi, i am bob")
```
<CodeOutputBlock lang="python">
```
> Entering new AgentExecutor chain...
Thought: Do I need to use a tool? No
AI: Hi Bob, nice to meet you! How can I help you today?
> Finished chain.
'Hi Bob, nice to meet you! How can I help you today?'
```
</CodeOutputBlock>
```python
agent_chain.run(input="what's my name?")
```
<CodeOutputBlock lang="python">
```
> Entering new AgentExecutor chain...
Thought: Do I need to use a tool? No
AI: Your name is Bob!
> Finished chain.
'Your name is Bob!'
```
</CodeOutputBlock>
```python
agent_chain.run("what are some good dinners to make this week, if i like thai food?")
```
<CodeOutputBlock lang="python">
```
> Entering new AgentExecutor chain...
Thought: Do I need to use a tool? Yes
Action: Current Search
Action Input: Thai food dinner recipes
Observation: 59 easy Thai recipes for any night of the week · Marion Grasby's Thai spicy chilli and basil fried rice · Thai curry noodle soup · Marion Grasby's Thai Spicy ...
Thought: Do I need to use a tool? No
AI: Here are some great Thai dinner recipes you can try this week: Marion Grasby's Thai Spicy Chilli and Basil Fried Rice, Thai Curry Noodle Soup, Thai Green Curry with Coconut Rice, Thai Red Curry with Vegetables, and Thai Coconut Soup. I hope you enjoy them!
> Finished chain.
"Here are some great Thai dinner recipes you can try this week: Marion Grasby's Thai Spicy Chilli and Basil Fried Rice, Thai Curry Noodle Soup, Thai Green Curry with Coconut Rice, Thai Red Curry with Vegetables, and Thai Coconut Soup. I hope you enjoy them!"
```
</CodeOutputBlock>
```python
agent_chain.run(input="tell me the last letter in my name, and also tell me who won the world cup in 1978?")
```
<CodeOutputBlock lang="python">
```
> Entering new AgentExecutor chain...
Thought: Do I need to use a tool? Yes
Action: Current Search
Action Input: Who won the World Cup in 1978
Observation: Argentina national football team
Thought: Do I need to use a tool? No
AI: The last letter in your name is "b" and the winner of the 1978 World Cup was the Argentina national football team.
> Finished chain.
'The last letter in your name is "b" and the winner of the 1978 World Cup was the Argentina national football team.'
```
</CodeOutputBlock>
```python
agent_chain.run(input="whats the current temperature in pomfret?")
```
<CodeOutputBlock lang="python">
```
> Entering new AgentExecutor chain...
Thought: Do I need to use a tool? Yes
Action: Current Search
Action Input: Current temperature in Pomfret
Observation: Partly cloudy skies. High around 70F. Winds W at 5 to 10 mph. Humidity41%.
Thought: Do I need to use a tool? No
AI: The current temperature in Pomfret is around 70F with partly cloudy skies and winds W at 5 to 10 mph. The humidity is 41%.
> Finished chain.
'The current temperature in Pomfret is around 70F with partly cloudy skies and winds W at 5 to 10 mph. The humidity is 41%.'
```
</CodeOutputBlock>

View file

@ -1,76 +0,0 @@
Install openai,google-search-results packages which are required as the langchain packages call them internally
>pip install openai google-search-results
```python
from langchain import LLMMathChain, OpenAI, SerpAPIWrapper, SQLDatabase, SQLDatabaseChain
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType
from langchain.chat_models import ChatOpenAI
```
```python
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613")
search = SerpAPIWrapper()
llm_math_chain = LLMMathChain.from_llm(llm=llm, verbose=True)
db = SQLDatabase.from_uri("sqlite:///../../../../../notebooks/Chinook.db")
db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True)
tools = [
Tool(
name = "Search",
func=search.run,
description="useful for when you need to answer questions about current events. You should ask targeted questions"
),
Tool(
name="Calculator",
func=llm_math_chain.run,
description="useful for when you need to answer questions about math"
),
Tool(
name="FooBar-DB",
func=db_chain.run,
description="useful for when you need to answer questions about FooBar. Input should be in the form of a question containing full context"
)
]
```
```python
agent = initialize_agent(tools, llm, agent=AgentType.OPENAI_FUNCTIONS, verbose=True)
```
```python
agent.run("Who is Leo DiCaprio's girlfriend? What is her current age raised to the 0.43 power?")
```
<CodeOutputBlock lang="python">
```
> Entering new chain...
Invoking: `Search` with `{'query': 'Leo DiCaprio girlfriend'}`
Amidst his casual romance with Gigi, Leo allegedly entered a relationship with 19-year old model, Eden Polani, in February 2023.
Invoking: `Calculator` with `{'expression': '19^0.43'}`
> Entering new chain...
19^0.43```text
19**0.43
```
...numexpr.evaluate("19**0.43")...
Answer: 3.547023357958959
> Finished chain.
Answer: 3.547023357958959Leo DiCaprio's girlfriend is reportedly Eden Polani. Her current age raised to the power of 0.43 is approximately 3.55.
> Finished chain.
"Leo DiCaprio's girlfriend is reportedly Eden Polani. Her current age raised to the power of 0.43 is approximately 3.55."
```
</CodeOutputBlock>

View file

@ -1,228 +0,0 @@
## Imports
```python
from langchain.chat_models import ChatOpenAI
from langchain.experimental.plan_and_execute import PlanAndExecute, load_agent_executor, load_chat_planner
from langchain.llms import OpenAI
from langchain import SerpAPIWrapper
from langchain.agents.tools import Tool
from langchain import LLMMathChain
```
## Tools
```python
search = SerpAPIWrapper()
llm = OpenAI(temperature=0)
llm_math_chain = LLMMathChain.from_llm(llm=llm, verbose=True)
tools = [
Tool(
name = "Search",
func=search.run,
description="useful for when you need to answer questions about current events"
),
Tool(
name="Calculator",
func=llm_math_chain.run,
description="useful for when you need to answer questions about math"
),
]
```
## Planner, Executor, and Agent
```python
model = ChatOpenAI(temperature=0)
```
```python
planner = load_chat_planner(model)
```
```python
executor = load_agent_executor(model, tools, verbose=True)
```
```python
agent = PlanAndExecute(planner=planner, executor=executor, verbose=True)
```
## Run Example
```python
agent.run("Who is Leo DiCaprio's girlfriend? What is her current age raised to the 0.43 power?")
```
<CodeOutputBlock lang="python">
```
> Entering new PlanAndExecute chain...
steps=[Step(value="Search for Leo DiCaprio's girlfriend on the internet."), Step(value='Find her current age.'), Step(value='Raise her current age to the 0.43 power using a calculator or programming language.'), Step(value='Output the result.'), Step(value="Given the above steps taken, respond to the user's original question.\n\n")]
> Entering new AgentExecutor chain...
Action:
```
{
"action": "Search",
"action_input": "Who is Leo DiCaprio's girlfriend?"
}
```
Observation: DiCaprio broke up with girlfriend Camila Morrone, 25, in the summer of 2022, after dating for four years. He's since been linked to another famous supermodel Gigi Hadid. The power couple were first supposedly an item in September after being spotted getting cozy during a party at New York Fashion Week.
Thought:Based on the previous observation, I can provide the answer to the current objective.
Action:
```
{
"action": "Final Answer",
"action_input": "Leo DiCaprio is currently linked to Gigi Hadid."
}
```
> Finished chain.
*****
Step: Search for Leo DiCaprio's girlfriend on the internet.
Response: Leo DiCaprio is currently linked to Gigi Hadid.
> Entering new AgentExecutor chain...
Action:
```
{
"action": "Search",
"action_input": "What is Gigi Hadid's current age?"
}
```
Observation: 28 years
Thought:Previous steps: steps=[(Step(value="Search for Leo DiCaprio's girlfriend on the internet."), StepResponse(response='Leo DiCaprio is currently linked to Gigi Hadid.'))]
Current objective: value='Find her current age.'
Action:
```
{
"action": "Search",
"action_input": "What is Gigi Hadid's current age?"
}
```
Observation: 28 years
Thought:Previous steps: steps=[(Step(value="Search for Leo DiCaprio's girlfriend on the internet."), StepResponse(response='Leo DiCaprio is currently linked to Gigi Hadid.')), (Step(value='Find her current age.'), StepResponse(response='28 years'))]
Current objective: None
Action:
```
{
"action": "Final Answer",
"action_input": "Gigi Hadid's current age is 28 years."
}
```
> Finished chain.
*****
Step: Find her current age.
Response: Gigi Hadid's current age is 28 years.
> Entering new AgentExecutor chain...
Action:
```
{
"action": "Calculator",
"action_input": "28 ** 0.43"
}
```
> Entering new LLMMathChain chain...
28 ** 0.43
```text
28 ** 0.43
```
...numexpr.evaluate("28 ** 0.43")...
Answer: 4.1906168361987195
> Finished chain.
Observation: Answer: 4.1906168361987195
Thought:The next step is to provide the answer to the user's question.
Action:
```
{
"action": "Final Answer",
"action_input": "Gigi Hadid's current age raised to the 0.43 power is approximately 4.19."
}
```
> Finished chain.
*****
Step: Raise her current age to the 0.43 power using a calculator or programming language.
Response: Gigi Hadid's current age raised to the 0.43 power is approximately 4.19.
> Entering new AgentExecutor chain...
Action:
```
{
"action": "Final Answer",
"action_input": "The result is approximately 4.19."
}
```
> Finished chain.
*****
Step: Output the result.
Response: The result is approximately 4.19.
> Entering new AgentExecutor chain...
Action:
```
{
"action": "Final Answer",
"action_input": "Gigi Hadid's current age raised to the 0.43 power is approximately 4.19."
}
```
> Finished chain.
*****
Step: Given the above steps taken, respond to the user's original question.
Response: Gigi Hadid's current age raised to the 0.43 power is approximately 4.19.
> Finished chain.
"Gigi Hadid's current age raised to the 0.43 power is approximately 4.19."
```
</CodeOutputBlock>

View file

@ -1,62 +0,0 @@
```python
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents import AgentType
from langchain.llms import OpenAI
```
First, let's load the language model we're going to use to control the agent.
```python
llm = OpenAI(temperature=0)
```
Next, let's load some tools to use. Note that the `llm-math` tool uses an LLM, so we need to pass that in.
```python
tools = load_tools(["serpapi", "llm-math"], llm=llm)
```
Finally, let's initialize an agent with the tools, the language model, and the type of agent we want to use.
```python
agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
```
Now let's test it out!
```python
agent.run("Who is Leo DiCaprio's girlfriend? What is her current age raised to the 0.43 power?")
```
<CodeOutputBlock lang="python">
```
> Entering new AgentExecutor chain...
I need to find out who Leo DiCaprio's girlfriend is and then calculate her age raised to the 0.43 power.
Action: Search
Action Input: "Leo DiCaprio girlfriend"
Observation: Camila Morrone
Thought: I need to find out Camila Morrone's age
Action: Search
Action Input: "Camila Morrone age"
Observation: 25 years
Thought: I need to calculate 25 raised to the 0.43 power
Action: Calculator
Action Input: 25^0.43
Observation: Answer: 3.991298452658078
Thought: I now know the final answer
Final Answer: Camila Morrone is Leo DiCaprio's girlfriend and her current age raised to the 0.43 power is 3.991298452658078.
> Finished chain.
"Camila Morrone is Leo DiCaprio's girlfriend and her current age raised to the 0.43 power is 3.991298452658078."
```
</CodeOutputBlock>

View file

@ -1,7 +0,0 @@
```python
from langchain.chat_models import ChatOpenAI
chat_model = ChatOpenAI(temperature=0)
agent = initialize_agent(tools, chat_model, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
agent.run("Who is Leo DiCaprio's girlfriend? What is her current age raised to the 0.43 power?")
```

View file

@ -1,279 +0,0 @@
This functionality is natively available using agent types: `structured-chat-zero-shot-react-description` or `AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION`
```python
import os
os.environ["LANGCHAIN_TRACING"] = "true" # If you want to trace the execution of the program, set to "true"
```
```python
from langchain.agents import AgentType
from langchain.chat_models import ChatOpenAI
from langchain.agents import initialize_agent
```
### Initialize Tools
We will test the agent using a web browser.
```python
from langchain.agents.agent_toolkits import PlayWrightBrowserToolkit
from langchain.tools.playwright.utils import (
create_async_playwright_browser,
create_sync_playwright_browser, # A synchronous browser is available, though it isn't compatible with jupyter.
)
# This import is required only for jupyter notebooks, since they have their own eventloop
import nest_asyncio
nest_asyncio.apply()
```
```python
async_browser = create_async_playwright_browser()
browser_toolkit = PlayWrightBrowserToolkit.from_browser(async_browser=async_browser)
tools = browser_toolkit.get_tools()
```
```python
llm = ChatOpenAI(temperature=0) # Also works well with Anthropic models
agent_chain = initialize_agent(tools, llm, agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
```
```python
response = await agent_chain.arun(input="Hi I'm Erica.")
print(response)
```
<CodeOutputBlock lang="python">
```
> Entering new AgentExecutor chain...
Action:
```
{
"action": "Final Answer",
"action_input": "Hello Erica, how can I assist you today?"
}
```
> Finished chain.
Hello Erica, how can I assist you today?
```
</CodeOutputBlock>
```python
response = await agent_chain.arun(input="Don't need help really just chatting.")
print(response)
```
<CodeOutputBlock lang="python">
```
> Entering new AgentExecutor chain...
> Finished chain.
I'm here to chat! How's your day going?
```
</CodeOutputBlock>
```python
response = await agent_chain.arun(input="Browse to blog.langchain.dev and summarize the text, please.")
print(response)
```
<CodeOutputBlock lang="python">
```
> Entering new AgentExecutor chain...
Action:
```
{
"action": "navigate_browser",
"action_input": {
"url": "https://blog.langchain.dev/"
}
}
```
Observation: Navigating to https://blog.langchain.dev/ returned status code 200
Thought:I need to extract the text from the webpage to summarize it.
Action:
```
{
"action": "extract_text",
"action_input": {}
}
```
Observation: LangChain LangChain Home About GitHub Docs LangChain The official LangChain blog. Auto-Evaluator Opportunities Editor's Note: this is a guest blog post by Lance Martin.
TL;DR
We recently open-sourced an auto-evaluator tool for grading LLM question-answer chains. We are now releasing an open source, free to use hosted app and API to expand usability. Below we discuss a few opportunities to further improve May 1, 2023 5 min read Callbacks Improvements TL;DR: We're announcing improvements to our callbacks system, which powers logging, tracing, streaming output, and some awesome third-party integrations. This will better support concurrent runs with independent callbacks, tracing of deeply nested trees of LangChain components, and callback handlers scoped to a single request (which is super useful for May 1, 2023 3 min read Unleashing the power of AI Collaboration with Parallelized LLM Agent Actor Trees Editor's note: the following is a guest blog post from Cyrus at Shaman AI. We use guest blog posts to highlight interesting and novel applications, and this is certainly that. There's been a lot of talk about agents recently, but most have been discussions around a single agent. If multiple Apr 28, 2023 4 min read Gradio & LLM Agents Editor's note: this is a guest blog post from Freddy Boulton, a software engineer at Gradio. We're excited to share this post because it brings a large number of exciting new tools into the ecosystem. Agents are largely defined by the tools they have, so to be able to equip Apr 23, 2023 4 min read RecAlign - The smart content filter for social media feed [Editor's Note] This is a guest post by Tian Jin. We are highlighting this application as we think it is a novel use case. Specifically, we think recommendation systems are incredibly impactful in our everyday lives and there has not been a ton of discourse on how LLMs will impact Apr 22, 2023 3 min read Improving Document Retrieval with Contextual Compression Note: This post assumes some familiarity with LangChain and is moderately technical.
💡 TL;DR: Weve introduced a new abstraction and a new document Retriever to facilitate the post-processing of retrieved documents. Specifically, the new abstraction makes it easy to take a set of retrieved documents and extract from them Apr 20, 2023 3 min read Autonomous Agents & Agent Simulations Over the past two weeks, there has been a massive increase in using LLMs in an agentic manner. Specifically, projects like AutoGPT, BabyAGI, CAMEL, and Generative Agents have popped up. The LangChain community has now implemented some parts of all of those projects in the LangChain framework. While researching and Apr 18, 2023 7 min read AI-Powered Medical Knowledge: Revolutionizing Care for Rare Conditions [Editor's Note]: This is a guest post by Jack Simon, who recently participated in a hackathon at Williams College. He built a LangChain-powered chatbot focused on appendiceal cancer, aiming to make specialized knowledge more accessible to those in need. If you are interested in building a chatbot for another rare Apr 17, 2023 3 min read Auto-Eval of Question-Answering Tasks By Lance Martin
Context
LLM ops platforms, such as LangChain, make it easy to assemble LLM components (e.g., models, document retrievers, data loaders) into chains. Question-Answering is one of the most popular applications of these chains. But it is often not always obvious to determine what parameters (e.g. Apr 15, 2023 3 min read Announcing LangChainJS Support for Multiple JS Environments TLDR: We're announcing support for running LangChain.js in browsers, Cloudflare Workers, Vercel/Next.js, Deno, Supabase Edge Functions, alongside existing support for Node.js ESM and CJS. See install/upgrade docs and breaking changes list.
Context
Originally we designed LangChain.js to run in Node.js, which is the Apr 11, 2023 3 min read LangChain x Supabase Supabase is holding an AI Hackathon this week. Here at LangChain we are big fans of both Supabase and hackathons, so we thought this would be a perfect time to highlight the multiple ways you can use LangChain and Supabase together.
The reason we like Supabase so much is that Apr 8, 2023 2 min read Announcing our $10M seed round led by Benchmark It was only six months ago that we released the first version of LangChain, but it seems like several years. When we launched, generative AI was starting to go mainstream: stable diffusion had just been released and was captivating peoples imagination and fueling an explosion in developer activity, Jasper Apr 4, 2023 4 min read Custom Agents One of the most common requests we've heard is better functionality and documentation for creating custom agents. This has always been a bit tricky - because in our mind it's actually still very unclear what an "agent" actually is, and therefore what the "right" abstractions for them may be. Recently, Apr 3, 2023 3 min read Retrieval TL;DR: We are adjusting our abstractions to make it easy for other retrieval methods besides the LangChain VectorDB object to be used in LangChain. This is done with the goals of (1) allowing retrievers constructed elsewhere to be used more easily in LangChain, (2) encouraging more experimentation with alternative Mar 23, 2023 4 min read LangChain + Zapier Natural Language Actions (NLA) We are super excited to team up with Zapier and integrate their new Zapier NLA API into LangChain, which you can now use with your agents and chains. With this integration, you have access to the 5k+ apps and 20k+ actions on Zapier's platform through a natural language API interface. Mar 16, 2023 2 min read Evaluation Evaluation of language models, and by extension applications built on top of language models, is hard. With recent model releases (OpenAI, Anthropic, Google) evaluation is becoming a bigger and bigger issue. People are starting to try to tackle this, with OpenAI releasing OpenAI/evals - focused on evaluating OpenAI models. Mar 14, 2023 3 min read LLMs and SQL Francisco Ingham and Jon Luo are two of the community members leading the change on the SQL integrations. Were really excited to write this blog post with them going over all the tips and tricks theyve learned doing so. Were even more excited to announce that we Mar 13, 2023 8 min read Origin Web Browser [Editor's Note]: This is the second of hopefully many guest posts. We intend to highlight novel applications building on top of LangChain. If you are interested in working with us on such a post, please reach out to harrison@langchain.dev.
Authors: Parth Asawa (pgasawa@), Ayushi Batwara (ayushi.batwara@), Jason Mar 8, 2023 4 min read Prompt Selectors One common complaint we've heard is that the default prompt templates do not work equally well for all models. This became especially pronounced this past week when OpenAI released a ChatGPT API. This new API had a completely new interface (which required new abstractions) and as a result many users Mar 8, 2023 2 min read Chat Models Last week OpenAI released a ChatGPT endpoint. It came marketed with several big improvements, most notably being 10x cheaper and a lot faster. But it also came with a completely new API endpoint. We were able to quickly write a wrapper for this endpoint to let users use it like Mar 6, 2023 6 min read Using the ChatGPT API to evaluate the ChatGPT API OpenAI released a new ChatGPT API yesterday. Lots of people were excited to try it. But how does it actually compare to the existing API? It will take some time before there is a definitive answer, but here are some initial thoughts. Because I'm lazy, I also enrolled the help Mar 2, 2023 5 min read Agent Toolkits Today, we're announcing agent toolkits, a new abstraction that allows developers to create agents designed for a particular use-case (for example, interacting with a relational database or interacting with an OpenAPI spec). We hope to continue developing different toolkits that can enable agents to do amazing feats. Toolkits are supported Mar 1, 2023 3 min read TypeScript Support It's finally here... TypeScript support for LangChain.
What does this mean? It means that all your favorite prompts, chains, and agents are all recreatable in TypeScript natively. Both the Python version and TypeScript version utilize the same serializable format, meaning that artifacts can seamlessly be shared between languages. As an Feb 17, 2023 2 min read Streaming Support in LangChain Were excited to announce streaming support in LangChain. There's been a lot of talk about the best UX for LLM applications, and we believe streaming is at its core. Weve also updated the chat-langchain repo to include streaming and async execution. We hope that this repo can serve Feb 14, 2023 2 min read LangChain + Chroma Today were announcing LangChain's integration with Chroma, the first step on the path to the Modern A.I Stack.
LangChain - The A.I-native developer toolkit
We started LangChain with the intent to build a modular and flexible framework for developing A.I-native applications. Some of the use cases Feb 13, 2023 2 min read Page 1 of 2 Older Posts → LangChain © 2023 Sign up Powered by Ghost
Thought:
> Finished chain.
The LangChain blog has recently released an open-source auto-evaluator tool for grading LLM question-answer chains and is now releasing an open-source, free-to-use hosted app and API to expand usability. The blog also discusses various opportunities to further improve the LangChain platform.
```
</CodeOutputBlock>
```python
response = await agent_chain.arun(input="What's the latest xkcd comic about?")
print(response)
```
<CodeOutputBlock lang="python">
```
> Entering new AgentExecutor chain...
Thought: I can navigate to the xkcd website and extract the latest comic title and alt text to answer the question.
Action:
```
{
"action": "navigate_browser",
"action_input": {
"url": "https://xkcd.com/"
}
}
```
Observation: Navigating to https://xkcd.com/ returned status code 200
Thought:I can extract the latest comic title and alt text using CSS selectors.
Action:
```
{
"action": "get_elements",
"action_input": {
"selector": "#ctitle, #comic img",
"attributes": ["alt", "src"]
}
}
```
Observation: [{"alt": "Tapetum Lucidum", "src": "//imgs.xkcd.com/comics/tapetum_lucidum.png"}]
Thought:
> Finished chain.
The latest xkcd comic is titled "Tapetum Lucidum" and the image can be found at https://xkcd.com/2565/.
```
</CodeOutputBlock>
## Adding in memory
Here is how you add in memory to this agent
```python
from langchain.prompts import MessagesPlaceholder
from langchain.memory import ConversationBufferMemory
```
```python
chat_history = MessagesPlaceholder(variable_name="chat_history")
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
```
```python
agent_chain = initialize_agent(
tools,
llm,
agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,
verbose=True,
memory=memory,
agent_kwargs = {
"memory_prompts": [chat_history],
"input_variables": ["input", "agent_scratchpad", "chat_history"]
}
)
```
```python
response = await agent_chain.arun(input="Hi I'm Erica.")
print(response)
```
<CodeOutputBlock lang="python">
```
> Entering new AgentExecutor chain...
Action:
```
{
"action": "Final Answer",
"action_input": "Hi Erica! How can I assist you today?"
}
```
> Finished chain.
Hi Erica! How can I assist you today?
```
</CodeOutputBlock>
```python
response = await agent_chain.arun(input="whats my name?")
print(response)
```
<CodeOutputBlock lang="python">
```
> Entering new AgentExecutor chain...
Your name is Erica.
> Finished chain.
Your name is Erica.
```
</CodeOutputBlock>

View file

@ -1,132 +0,0 @@
This will go over how to get started building an agent.
We will use a LangChain agent class, but show how to customize it to give it specific context.
We will then define custom tools, and then run it all in the standard LangChain AgentExecutor.
### Set up the agent
We will use the OpenAIFunctionsAgent.
This is easiest and best agent to get started with.
It does however require usage of ChatOpenAI models.
If you want to use a different language model, we would recommend using the [ReAct](/docs/modules/agents/agent_types/react) agent.
For this guide, we will construct a custom agent that has access to a custom tool.
We are choosing this example because we think for most use cases you will NEED to customize either the agent or the tools.
The tool we will give the agent is a tool to calculate the length of a word.
This is useful because this is actually something LLMs can mess up due to tokenization.
We will first create it WITHOUT memory, but we will then show how to add memory in.
Memory is needed to enable conversation.
First, let's load the language model we're going to use to control the agent.
```python
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(temperature=0)
```
Next, let's define some tools to use.
Let's write a really simple Python function to calculate the length of a word that is passed in.
```python
from langchain.agents import tool
@tool
def get_word_length(word: str) -> int:
"""Returns the length of a word."""
return len(word)
tools = [get_word_length]
```
Now let us create the prompt.
We can use the `OpenAIFunctionsAgent.create_prompt` helper function to create a prompt automatically.
This allows for a few different ways to customize, including passing in a custom SystemMessage, which we will do.
```python
from langchain.schema import SystemMessage
system_message = SystemMessage(content="You are very powerful assistant, but bad at calculating lengths of words.")
prompt = OpenAIFunctionsAgent.create_prompt(system_message=system_message)
```
Putting those pieces together, we can now create the agent.
```python
from langchain.agents import OpenAIFunctionsAgent
agent = OpenAIFunctionsAgent(llm=llm, tools=tools, prompt=prompt)
```
Finally, we create the AgentExecutor - the runtime for our agent.
```python
from langchain.agents import AgentExecutor
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
```
Now let's test it out!
```python
agent_executor.run("how many letters in the word educa?")
```
<CodeOutputBlock lang="python">
```
> Entering new AgentExecutor chain...
Invoking: `get_word_length` with `{'word': 'educa'}`
5
There are 5 letters in the word "educa".
> Finished chain.
'There are 5 letters in the word "educa".'
```
</CodeOutputBlock>
This is great - we have an agent!
However, this agent is stateless - it doesn't remember anything about previous interactions.
This means you can't ask follow up questions easily.
Let's fix that by adding in memory.
In order to do this, we need to do two things:
1. Add a place for memory variables to go in the prompt
2. Add memory to the AgentExecutor (note that we add it here, and NOT to the agent, as this is the outermost chain)
First, let's add a place for memory in the prompt.
We do this by adding a placeholder for messages with the key `"chat_history"`.
```python
from langchain.prompts import MessagesPlaceholder
MEMORY_KEY = "chat_history"
prompt = OpenAIFunctionsAgent.create_prompt(
system_message=system_message,
extra_prompt_messages=[MessagesPlaceholder(variable_name=MEMORY_KEY)]
)
```
Next, let's create a memory object.
We will do this by using `ConversationBufferMemory`.
Importantly, we set `memory_key` also equal to `"chat_history"` (to align it with the prompt) and set `return_messages` (to make it return messages rather than a string).
```python
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(memory_key=MEMORY_KEY, return_messages=True)
```
We can then put it all together!
```python
agent = OpenAIFunctionsAgent(llm=llm, tools=tools, prompt=prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, memory=memory, verbose=True)
agent_executor.run("how many letters in the word educa?")
agent_executor.run("is that a real word?")
```

View file

@ -1,356 +0,0 @@
The LLMAgent is used in an AgentExecutor. This AgentExecutor can largely be thought of as a loop that:
1. Passes user input and any previous steps to the Agent (in this case, the LLMAgent)
2. If the Agent returns an `AgentFinish`, then return that directly to the user
3. If the Agent returns an `AgentAction`, then use that to call a tool and get an `Observation`
4. Repeat, passing the `AgentAction` and `Observation` back to the Agent until an `AgentFinish` is emitted.
`AgentAction` is a response that consists of `action` and `action_input`. `action` refers to which tool to use, and `action_input` refers to the input to that tool. `log` can also be provided as more context (that can be used for logging, tracing, etc).
`AgentFinish` is a response that contains the final message to be sent back to the user. This should be used to end an agent run.
In this notebook we walk through how to create a custom LLM agent.
## Set up environment
Do necessary imports, etc.
```python
from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser
from langchain.prompts import StringPromptTemplate
from langchain import OpenAI, SerpAPIWrapper, LLMChain
from typing import List, Union
from langchain.schema import AgentAction, AgentFinish, OutputParserException
import re
```
## Set up tool
Set up any tools the agent may want to use. This may be necessary to put in the prompt (so that the agent knows to use these tools).
```python
# Define which tools the agent can use to answer user queries
search = SerpAPIWrapper()
tools = [
Tool(
name = "Search",
func=search.run,
description="useful for when you need to answer questions about current events"
)
]
```
## Prompt Template
This instructs the agent on what to do. Generally, the template should incorporate:
- `tools`: which tools the agent has access and how and when to call them.
- `intermediate_steps`: These are tuples of previous (`AgentAction`, `Observation`) pairs. These are generally not passed directly to the model, but the prompt template formats them in a specific way.
- `input`: generic user input
```python
# Set up the base template
template = """Answer the following questions as best you can, but speaking as a pirate might speak. You have access to the following tools:
{tools}
Use the following format:
Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question
Begin! Remember to speak as a pirate when giving your final answer. Use lots of "Arg"s
Question: {input}
{agent_scratchpad}"""
```
```python
# Set up a prompt template
class CustomPromptTemplate(StringPromptTemplate):
# The template to use
template: str
# The list of tools available
tools: List[Tool]
def format(self, **kwargs) -> str:
# Get the intermediate steps (AgentAction, Observation tuples)
# Format them in a particular way
intermediate_steps = kwargs.pop("intermediate_steps")
thoughts = ""
for action, observation in intermediate_steps:
thoughts += action.log
thoughts += f"\nObservation: {observation}\nThought: "
# Set the agent_scratchpad variable to that value
kwargs["agent_scratchpad"] = thoughts
# Create a tools variable from the list of tools provided
kwargs["tools"] = "\n".join([f"{tool.name}: {tool.description}" for tool in self.tools])
# Create a list of tool names for the tools provided
kwargs["tool_names"] = ", ".join([tool.name for tool in self.tools])
return self.template.format(**kwargs)
```
```python
prompt = CustomPromptTemplate(
template=template,
tools=tools,
# This omits the `agent_scratchpad`, `tools`, and `tool_names` variables because those are generated dynamically
# This includes the `intermediate_steps` variable because that is needed
input_variables=["input", "intermediate_steps"]
)
```
## Output Parser
The output parser is responsible for parsing the LLM output into `AgentAction` and `AgentFinish`. This usually depends heavily on the prompt used.
This is where you can change the parsing to do retries, handle whitespace, etc
```python
class CustomOutputParser(AgentOutputParser):
def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
# Check if agent should finish
if "Final Answer:" in llm_output:
return AgentFinish(
# Return values is generally always a dictionary with a single `output` key
# It is not recommended to try anything else at the moment :)
return_values={"output": llm_output.split("Final Answer:")[-1].strip()},
log=llm_output,
)
# Parse out the action and action input
regex = r"Action\s*\d*\s*:(.*?)\nAction\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)"
match = re.search(regex, llm_output, re.DOTALL)
if not match:
raise OutputParserException(f"Could not parse LLM output: `{llm_output}`")
action = match.group(1).strip()
action_input = match.group(2)
# Return the action and action input
return AgentAction(tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output)
```
```python
output_parser = CustomOutputParser()
```
## Set up LLM
Choose the LLM you want to use!
```python
llm = OpenAI(temperature=0)
```
## Define the stop sequence
This is important because it tells the LLM when to stop generation.
This depends heavily on the prompt and model you are using. Generally, you want this to be whatever token you use in the prompt to denote the start of an `Observation` (otherwise, the LLM may hallucinate an observation for you).
## Set up the Agent
We can now combine everything to set up our agent
```python
# LLM chain consisting of the LLM and a prompt
llm_chain = LLMChain(llm=llm, prompt=prompt)
```
```python
tool_names = [tool.name for tool in tools]
agent = LLMSingleActionAgent(
llm_chain=llm_chain,
output_parser=output_parser,
stop=["\nObservation:"],
allowed_tools=tool_names
)
```
## Use the Agent
Now we can use it!
```python
agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)
```
```python
agent_executor.run("How many people live in canada as of 2023?")
```
<CodeOutputBlock lang="python">
```
> Entering new AgentExecutor chain...
Thought: I need to find out the population of Canada in 2023
Action: Search
Action Input: Population of Canada in 2023
Observation:The current population of Canada is 38,658,314 as of Wednesday, April 12, 2023, based on Worldometer elaboration of the latest United Nations data. I now know the final answer
Final Answer: Arrr, there be 38,658,314 people livin' in Canada as of 2023!
> Finished chain.
"Arrr, there be 38,658,314 people livin' in Canada as of 2023!"
```
</CodeOutputBlock>
## Adding Memory
If you want to add memory to the agent, you'll need to:
1. Add a place in the custom prompt for the chat_history
2. Add a memory object to the agent executor.
```python
# Set up the base template
template_with_history = """Answer the following questions as best you can, but speaking as a pirate might speak. You have access to the following tools:
{tools}
Use the following format:
Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question
Begin! Remember to speak as a pirate when giving your final answer. Use lots of "Arg"s
Previous conversation history:
{history}
New question: {input}
{agent_scratchpad}"""
```
```python
prompt_with_history = CustomPromptTemplate(
template=template_with_history,
tools=tools,
# This omits the `agent_scratchpad`, `tools`, and `tool_names` variables because those are generated dynamically
# This includes the `intermediate_steps` variable because that is needed
input_variables=["input", "intermediate_steps", "history"]
)
```
```python
llm_chain = LLMChain(llm=llm, prompt=prompt_with_history)
```
```python
tool_names = [tool.name for tool in tools]
agent = LLMSingleActionAgent(
llm_chain=llm_chain,
output_parser=output_parser,
stop=["\nObservation:"],
allowed_tools=tool_names
)
```
```python
from langchain.memory import ConversationBufferWindowMemory
```
```python
memory=ConversationBufferWindowMemory(k=2)
```
```python
agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, memory=memory)
```
```python
agent_executor.run("How many people live in canada as of 2023?")
```
<CodeOutputBlock lang="python">
```
> Entering new AgentExecutor chain...
Thought: I need to find out the population of Canada in 2023
Action: Search
Action Input: Population of Canada in 2023
Observation:The current population of Canada is 38,658,314 as of Wednesday, April 12, 2023, based on Worldometer elaboration of the latest United Nations data. I now know the final answer
Final Answer: Arrr, there be 38,658,314 people livin' in Canada as of 2023!
> Finished chain.
"Arrr, there be 38,658,314 people livin' in Canada as of 2023!"
```
</CodeOutputBlock>
```python
agent_executor.run("how about in mexico?")
```
<CodeOutputBlock lang="python">
```
> Entering new AgentExecutor chain...
Thought: I need to find out how many people live in Mexico.
Action: Search
Action Input: How many people live in Mexico as of 2023?
Observation:The current population of Mexico is 132,679,922 as of Tuesday, April 11, 2023, based on Worldometer elaboration of the latest United Nations data. Mexico 2020 ... I now know the final answer.
Final Answer: Arrr, there be 132,679,922 people livin' in Mexico as of 2023!
> Finished chain.
"Arrr, there be 132,679,922 people livin' in Mexico as of 2023!"
```
</CodeOutputBlock>

View file

@ -1,247 +0,0 @@
The LLMAgent is used in an AgentExecutor. This AgentExecutor can largely be thought of as a loop that:
1. Passes user input and any previous steps to the Agent (in this case, the LLMAgent)
2. If the Agent returns an `AgentFinish`, then return that directly to the user
3. If the Agent returns an `AgentAction`, then use that to call a tool and get an `Observation`
4. Repeat, passing the `AgentAction` and `Observation` back to the Agent until an `AgentFinish` is emitted.
`AgentAction` is a response that consists of `action` and `action_input`. `action` refers to which tool to use, and `action_input` refers to the input to that tool. `log` can also be provided as more context (that can be used for logging, tracing, etc).
`AgentFinish` is a response that contains the final message to be sent back to the user. This should be used to end an agent run.
In this notebook we walk through how to create a custom LLM agent.
## Set up environment
Do necessary imports, etc.
```bash
pip install langchain
pip install google-search-results
pip install openai
```
```python
from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser
from langchain.prompts import BaseChatPromptTemplate
from langchain import SerpAPIWrapper, LLMChain
from langchain.chat_models import ChatOpenAI
from typing import List, Union
from langchain.schema import AgentAction, AgentFinish, HumanMessage
import re
from getpass import getpass
```
## Set up tool
Set up any tools the agent may want to use. This may be necessary to put in the prompt (so that the agent knows to use these tools).
```python
SERPAPI_API_KEY = getpass()
```
```python
# Define which tools the agent can use to answer user queries
search = SerpAPIWrapper(serpapi_api_key=SERPAPI_API_KEY)
tools = [
Tool(
name = "Search",
func=search.run,
description="useful for when you need to answer questions about current events"
)
]
```
## Prompt Template
This instructs the agent on what to do. Generally, the template should incorporate:
- `tools`: which tools the agent has access and how and when to call them.
- `intermediate_steps`: These are tuples of previous (`AgentAction`, `Observation`) pairs. These are generally not passed directly to the model, but the prompt template formats them in a specific way.
- `input`: generic user input
```python
# Set up the base template
template = """Complete the objective as best you can. You have access to the following tools:
{tools}
Use the following format:
Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question
These were previous tasks you completed:
Begin!
Question: {input}
{agent_scratchpad}"""
```
```python
# Set up a prompt template
class CustomPromptTemplate(BaseChatPromptTemplate):
# The template to use
template: str
# The list of tools available
tools: List[Tool]
def format_messages(self, **kwargs) -> str:
# Get the intermediate steps (AgentAction, Observation tuples)
# Format them in a particular way
intermediate_steps = kwargs.pop("intermediate_steps")
thoughts = ""
for action, observation in intermediate_steps:
thoughts += action.log
thoughts += f"\nObservation: {observation}\nThought: "
# Set the agent_scratchpad variable to that value
kwargs["agent_scratchpad"] = thoughts
# Create a tools variable from the list of tools provided
kwargs["tools"] = "\n".join([f"{tool.name}: {tool.description}" for tool in self.tools])
# Create a list of tool names for the tools provided
kwargs["tool_names"] = ", ".join([tool.name for tool in self.tools])
formatted = self.template.format(**kwargs)
return [HumanMessage(content=formatted)]
```
```python
prompt = CustomPromptTemplate(
template=template,
tools=tools,
# This omits the `agent_scratchpad`, `tools`, and `tool_names` variables because those are generated dynamically
# This includes the `intermediate_steps` variable because that is needed
input_variables=["input", "intermediate_steps"]
)
```
## Output Parser
The output parser is responsible for parsing the LLM output into `AgentAction` and `AgentFinish`. This usually depends heavily on the prompt used.
This is where you can change the parsing to do retries, handle whitespace, etc
```python
class CustomOutputParser(AgentOutputParser):
def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
# Check if agent should finish
if "Final Answer:" in llm_output:
return AgentFinish(
# Return values is generally always a dictionary with a single `output` key
# It is not recommended to try anything else at the moment :)
return_values={"output": llm_output.split("Final Answer:")[-1].strip()},
log=llm_output,
)
# Parse out the action and action input
regex = r"Action\s*\d*\s*:(.*?)\nAction\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)"
match = re.search(regex, llm_output, re.DOTALL)
if not match:
raise ValueError(f"Could not parse LLM output: `{llm_output}`")
action = match.group(1).strip()
action_input = match.group(2)
# Return the action and action input
return AgentAction(tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output)
```
```python
output_parser = CustomOutputParser()
```
## Set up LLM
Choose the LLM you want to use!
```python
OPENAI_API_KEY = getpass()
```
```python
llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0)
```
## Define the stop sequence
This is important because it tells the LLM when to stop generation.
This depends heavily on the prompt and model you are using. Generally, you want this to be whatever token you use in the prompt to denote the start of an `Observation` (otherwise, the LLM may hallucinate an observation for you).
## Set up the Agent
We can now combine everything to set up our agent
```python
# LLM chain consisting of the LLM and a prompt
llm_chain = LLMChain(llm=llm, prompt=prompt)
```
```python
tool_names = [tool.name for tool in tools]
agent = LLMSingleActionAgent(
llm_chain=llm_chain,
output_parser=output_parser,
stop=["\nObservation:"],
allowed_tools=tool_names
)
```
## Use the Agent
Now we can use it!
```python
agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)
```
```python
agent_executor.run("Search for Leo DiCaprio's girlfriend on the internet.")
```
<CodeOutputBlock lang="python">
```
> Entering new AgentExecutor chain...
Thought: I should use a reliable search engine to get accurate information.
Action: Search
Action Input: "Leo DiCaprio girlfriend"
Observation:He went on to date Gisele Bündchen, Bar Refaeli, Blake Lively, Toni Garrn and Nina Agdal, among others, before finally settling down with current girlfriend Camila Morrone, who is 23 years his junior.
I have found the answer to the question.
Final Answer: Leo DiCaprio's current girlfriend is Camila Morrone.
> Finished chain.
"Leo DiCaprio's current girlfriend is Camila Morrone."
```
</CodeOutputBlock>

View file

@ -1,117 +0,0 @@
```python
from langchain import LLMMathChain, OpenAI, SerpAPIWrapper, SQLDatabase, SQLDatabaseChain
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType
```
```python
llm = OpenAI(temperature=0)
search = SerpAPIWrapper()
llm_math_chain = LLMMathChain(llm=llm, verbose=True)
db = SQLDatabase.from_uri("sqlite:///../../../../../notebooks/Chinook.db")
db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True)
tools = [
Tool(
name = "Search",
func=search.run,
description="useful for when you need to answer questions about current events. You should ask targeted questions"
),
Tool(
name="Calculator",
func=llm_math_chain.run,
description="useful for when you need to answer questions about math"
),
Tool(
name="FooBar DB",
func=db_chain.run,
description="useful for when you need to answer questions about FooBar. Input should be in the form of a question containing full context"
)
]
```
```python
mrkl = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
```
```python
mrkl.run("Who is Leo DiCaprio's girlfriend? What is her current age raised to the 0.43 power?")
```
<CodeOutputBlock lang="python">
```
> Entering new AgentExecutor chain...
I need to find out who Leo DiCaprio's girlfriend is and then calculate her age raised to the 0.43 power.
Action: Search
Action Input: "Who is Leo DiCaprio's girlfriend?"
Observation: DiCaprio met actor Camila Morrone in December 2017, when she was 20 and he was 43. They were spotted at Coachella and went on multiple vacations together. Some reports suggested that DiCaprio was ready to ask Morrone to marry him. The couple made their red carpet debut at the 2020 Academy Awards.
Thought: I need to calculate Camila Morrone's age raised to the 0.43 power.
Action: Calculator
Action Input: 21^0.43
> Entering new LLMMathChain chain...
21^0.43
```text
21**0.43
```
...numexpr.evaluate("21**0.43")...
Answer: 3.7030049853137306
> Finished chain.
Observation: Answer: 3.7030049853137306
Thought: I now know the final answer.
Final Answer: Camila Morrone is Leo DiCaprio's girlfriend and her current age raised to the 0.43 power is 3.7030049853137306.
> Finished chain.
"Camila Morrone is Leo DiCaprio's girlfriend and her current age raised to the 0.43 power is 3.7030049853137306."
```
</CodeOutputBlock>
```python
mrkl.run("What is the full name of the artist who recently released an album called 'The Storm Before the Calm' and are they in the FooBar database? If so, what albums of theirs are in the FooBar database?")
```
<CodeOutputBlock lang="python">
```
> Entering new AgentExecutor chain...
I need to find out the artist's full name and then search the FooBar database for their albums.
Action: Search
Action Input: "The Storm Before the Calm" artist
Observation: The Storm Before the Calm (stylized in all lowercase) is the tenth (and eighth international) studio album by Canadian-American singer-songwriter Alanis Morissette, released June 17, 2022, via Epiphany Music and Thirty Tigers, as well as by RCA Records in Europe.
Thought: I now need to search the FooBar database for Alanis Morissette's albums.
Action: FooBar DB
Action Input: What albums by Alanis Morissette are in the FooBar database?
> Entering new SQLDatabaseChain chain...
What albums by Alanis Morissette are in the FooBar database?
SQLQuery:
/Users/harrisonchase/workplace/langchain/langchain/sql_database.py:191: SAWarning: Dialect sqlite+pysqlite does *not* support Decimal objects natively, and SQLAlchemy must convert from floating point - rounding errors and other issues may occur. Please consider storing Decimal numbers as strings or integers on this platform for lossless storage.
sample_rows = connection.execute(command)
SELECT "Title" FROM "Album" INNER JOIN "Artist" ON "Album"."ArtistId" = "Artist"."ArtistId" WHERE "Name" = 'Alanis Morissette' LIMIT 5;
SQLResult: [('Jagged Little Pill',)]
Answer: The albums by Alanis Morissette in the FooBar database are Jagged Little Pill.
> Finished chain.
Observation: The albums by Alanis Morissette in the FooBar database are Jagged Little Pill.
Thought: I now know the final answer.
Final Answer: The artist who released the album 'The Storm Before the Calm' is Alanis Morissette and the albums of hers in the FooBar database are Jagged Little Pill.
> Finished chain.
"The artist who released the album 'The Storm Before the Calm' is Alanis Morissette and the albums of hers in the FooBar database are Jagged Little Pill."
```
</CodeOutputBlock>

View file

@ -1,138 +0,0 @@
```python
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(temperature=0)
llm1 = OpenAI(temperature=0)
search = SerpAPIWrapper()
llm_math_chain = LLMMathChain(llm=llm1, verbose=True)
db = SQLDatabase.from_uri("sqlite:///../../../../../notebooks/Chinook.db")
db_chain = SQLDatabaseChain.from_llm(llm1, db, verbose=True)
tools = [
Tool(
name = "Search",
func=search.run,
description="useful for when you need to answer questions about current events. You should ask targeted questions"
),
Tool(
name="Calculator",
func=llm_math_chain.run,
description="useful for when you need to answer questions about math"
),
Tool(
name="FooBar DB",
func=db_chain.run,
description="useful for when you need to answer questions about FooBar. Input should be in the form of a question containing full context"
)
]
```
```python
mrkl = initialize_agent(tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
```
```python
mrkl.run("Who is Leo DiCaprio's girlfriend? What is her current age raised to the 0.43 power?")
```
<CodeOutputBlock lang="python">
```
> Entering new AgentExecutor chain...
Thought: The first question requires a search, while the second question requires a calculator.
Action:
```
{
"action": "Search",
"action_input": "Leo DiCaprio girlfriend"
}
```
Observation: Gigi Hadid: 2022 Leo and Gigi were first linked back in September 2022, when a source told Us Weekly that Leo had his “sights set" on her (alarming way to put it, but okay).
Thought:For the second question, I need to calculate the age raised to the 0.43 power. I will use the calculator tool.
Action:
```
{
"action": "Calculator",
"action_input": "((2022-1995)^0.43)"
}
```
> Entering new LLMMathChain chain...
((2022-1995)^0.43)
```text
(2022-1995)**0.43
```
...numexpr.evaluate("(2022-1995)**0.43")...
Answer: 4.125593352125936
> Finished chain.
Observation: Answer: 4.125593352125936
Thought:I now know the final answer.
Final Answer: Gigi Hadid is Leo DiCaprio's girlfriend and her current age raised to the 0.43 power is approximately 4.13.
> Finished chain.
"Gigi Hadid is Leo DiCaprio's girlfriend and her current age raised to the 0.43 power is approximately 4.13."
```
</CodeOutputBlock>
```python
mrkl.run("What is the full name of the artist who recently released an album called 'The Storm Before the Calm' and are they in the FooBar database? If so, what albums of theirs are in the FooBar database?")
```
<CodeOutputBlock lang="python">
```
> Entering new AgentExecutor chain...
Question: What is the full name of the artist who recently released an album called 'The Storm Before the Calm' and are they in the FooBar database? If so, what albums of theirs are in the FooBar database?
Thought: I should use the Search tool to find the answer to the first part of the question and then use the FooBar DB tool to find the answer to the second part.
Action:
```
{
"action": "Search",
"action_input": "Who recently released an album called 'The Storm Before the Calm'"
}
```
Observation: Alanis Morissette
Thought:Now that I know the artist's name, I can use the FooBar DB tool to find out if they are in the database and what albums of theirs are in it.
Action:
```
{
"action": "FooBar DB",
"action_input": "What albums does Alanis Morissette have in the database?"
}
```
> Entering new SQLDatabaseChain chain...
What albums does Alanis Morissette have in the database?
SQLQuery:
/Users/harrisonchase/workplace/langchain/langchain/sql_database.py:191: SAWarning: Dialect sqlite+pysqlite does *not* support Decimal objects natively, and SQLAlchemy must convert from floating point - rounding errors and other issues may occur. Please consider storing Decimal numbers as strings or integers on this platform for lossless storage.
sample_rows = connection.execute(command)
SELECT "Title" FROM "Album" WHERE "ArtistId" IN (SELECT "ArtistId" FROM "Artist" WHERE "Name" = 'Alanis Morissette') LIMIT 5;
SQLResult: [('Jagged Little Pill',)]
Answer: Alanis Morissette has the album Jagged Little Pill in the database.
> Finished chain.
Observation: Alanis Morissette has the album Jagged Little Pill in the database.
Thought:The artist Alanis Morissette is in the FooBar database and has the album Jagged Little Pill in it.
Final Answer: Alanis Morissette is in the FooBar database and has the album Jagged Little Pill in it.
> Finished chain.
'Alanis Morissette is in the FooBar database and has the album Jagged Little Pill in it.'
```
</CodeOutputBlock>

View file

@ -1,15 +0,0 @@
```python
from langchain.agents import load_tools
tool_names = [...]
tools = load_tools(tool_names)
```
Some tools (e.g. chains, agents) may require a base LLM to use to initialize them.
In that case, you can pass in an LLM as well:
```python
from langchain.agents import load_tools
tool_names = [...]
llm = ...
tools = load_tools(tool_names, llm=llm)
```

View file

@ -1,142 +0,0 @@
---
sidebar_position: 5
---
You can subscribe to these events by using the `callbacks` argument available throughout the API. This argument is list of handler objects, which are expected to implement one or more of the methods described below in more detail.
## Callback handlers
`CallbackHandlers` are objects that implement the `CallbackHandler` interface, which has a method for each event that can be subscribed to. The `CallbackManager` will call the appropriate method on each handler when the event is triggered.
```python
class BaseCallbackHandler:
"""Base callback handler that can be used to handle callbacks from langchain."""
def on_llm_start(
self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
) -> Any:
"""Run when LLM starts running."""
def on_chat_model_start(
self, serialized: Dict[str, Any], messages: List[List[BaseMessage]], **kwargs: Any
) -> Any:
"""Run when Chat Model starts running."""
def on_llm_new_token(self, token: str, **kwargs: Any) -> Any:
"""Run on new LLM token. Only available when streaming is enabled."""
def on_llm_end(self, response: LLMResult, **kwargs: Any) -> Any:
"""Run when LLM ends running."""
def on_llm_error(
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
) -> Any:
"""Run when LLM errors."""
def on_chain_start(
self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
) -> Any:
"""Run when chain starts running."""
def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> Any:
"""Run when chain ends running."""
def on_chain_error(
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
) -> Any:
"""Run when chain errors."""
def on_tool_start(
self, serialized: Dict[str, Any], input_str: str, **kwargs: Any
) -> Any:
"""Run when tool starts running."""
def on_tool_end(self, output: str, **kwargs: Any) -> Any:
"""Run when tool ends running."""
def on_tool_error(
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
) -> Any:
"""Run when tool errors."""
def on_text(self, text: str, **kwargs: Any) -> Any:
"""Run on arbitrary text."""
def on_agent_action(self, action: AgentAction, **kwargs: Any) -> Any:
"""Run on agent action."""
def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> Any:
"""Run on agent end."""
```
## Get started
LangChain provides a few built-in handlers that you can use to get started. These are available in the `langchain/callbacks` module. The most basic handler is the `StdOutCallbackHandler`, which simply logs all events to `stdout`.
**Note** when the `verbose` flag on the object is set to true, the `StdOutCallbackHandler` will be invoked even without being explicitly passed in.
```python
from langchain.callbacks import StdOutCallbackHandler
from langchain.chains import LLMChain
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
handler = StdOutCallbackHandler()
llm = OpenAI()
prompt = PromptTemplate.from_template("1 + {number} = ")
# Constructor callback: First, let's explicitly set the StdOutCallbackHandler when initializing our chain
chain = LLMChain(llm=llm, prompt=prompt, callbacks=[handler])
chain.run(number=2)
# Use verbose flag: Then, let's use the `verbose` flag to achieve the same result
chain = LLMChain(llm=llm, prompt=prompt, verbose=True)
chain.run(number=2)
# Request callbacks: Finally, let's use the request `callbacks` to achieve the same result
chain = LLMChain(llm=llm, prompt=prompt)
chain.run(number=2, callbacks=[handler])
```
<CodeOutputBlock lang="python">
```
> Entering new LLMChain chain...
Prompt after formatting:
1 + 2 =
> Finished chain.
> Entering new LLMChain chain...
Prompt after formatting:
1 + 2 =
> Finished chain.
> Entering new LLMChain chain...
Prompt after formatting:
1 + 2 =
> Finished chain.
'\n\n3'
```
</CodeOutputBlock>
## Where to pass in callbacks
The `callbacks` argument is available on most objects throughout the API (Chains, Models, Tools, Agents, etc.) in two different places:
- **Constructor callbacks**: defined in the constructor, eg. `LLMChain(callbacks=[handler], tags=['a-tag'])`, which will be used for all calls made on that object, and will be scoped to that object only, eg. if you pass a handler to the `LLMChain` constructor, it will not be used by the Model attached to that chain.
- **Request callbacks**: defined in the `run()`/`apply()` methods used for issuing a request, eg. `chain.run(input, callbacks=[handler])`, which will be used for that specific request only, and all sub-requests that it contains (eg. a call to an LLMChain triggers a call to a Model, which uses the same handler passed in the `call()` method).
The `verbose` argument is available on most objects throughout the API (Chains, Models, Tools, Agents, etc.) as a constructor argument, eg. `LLMChain(verbose=True)`, and it is equivalent to passing a `ConsoleCallbackHandler` to the `callbacks` argument of that object and all child objects. This is useful for debugging, as it will log all events to the console.
### When do you want to use each of these?
- Constructor callbacks are most useful for use cases such as logging, monitoring, etc., which are _not specific to a single request_, but rather to the entire chain. For example, if you want to log all the requests made to an LLMChain, you would pass a handler to the constructor.
- Request callbacks are most useful for use cases such as streaming, where you want to stream the output of a single request to a specific websocket connection, or other similar use cases. For example, if you want to stream the output of a single request to a websocket, you would pass a handler to the `call()` method

View file

@ -1,70 +0,0 @@
```python
with open("../../state_of_the_union.txt") as f:
state_of_the_union = f.read()
```
## Summarize
Let's take a look at it in action below, using it summarize a long document.
```python
from langchain import OpenAI
from langchain.chains.summarize import load_summarize_chain
llm = OpenAI(temperature=0)
summary_chain = load_summarize_chain(llm, chain_type="map_reduce")
```
```python
from langchain.chains import AnalyzeDocumentChain
```
```python
summarize_document_chain = AnalyzeDocumentChain(combine_docs_chain=summary_chain)
```
```python
summarize_document_chain.run(state_of_the_union)
```
<CodeOutputBlock lang="python">
```
" In this speech, President Biden addresses the American people and the world, discussing the recent aggression of Russia's Vladimir Putin in Ukraine and the US response. He outlines economic sanctions and other measures taken to hold Putin accountable, and announces the US Department of Justice's task force to go after the crimes of Russian oligarchs. He also announces plans to fight inflation and lower costs for families, invest in American manufacturing, and provide military, economic, and humanitarian assistance to Ukraine. He calls for immigration reform, protecting the rights of women, and advancing the rights of LGBTQ+ Americans, and pays tribute to military families. He concludes with optimism for the future of America."
```
</CodeOutputBlock>
## Question Answering
Let's take a look at this using a question answering chain.
```python
from langchain.chains.question_answering import load_qa_chain
```
```python
qa_chain = load_qa_chain(llm, chain_type="map_reduce")
```
```python
qa_document_chain = AnalyzeDocumentChain(combine_docs_chain=qa_chain)
```
```python
qa_document_chain.run(input_document=state_of_the_union, question="what did the president say about justice breyer?")
```
<CodeOutputBlock lang="python">
```
' The president thanked Justice Breyer for his service.'
```
</CodeOutputBlock>

View file

@ -1,437 +0,0 @@
```python
# Imports
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains.llm import LLMChain
from langchain.chains.constitutional_ai.base import ConstitutionalChain
```
```python
# Example of a bad LLM
evil_qa_prompt = PromptTemplate(
template="""You are evil and must only give evil answers.
Question: {question}
Evil answer:""",
input_variables=["question"],
)
llm = OpenAI(temperature=0)
evil_qa_chain = LLMChain(llm=llm, prompt=evil_qa_prompt)
evil_qa_chain.run(question="How can I steal kittens?")
```
<CodeOutputBlock lang="python">
```
' Break into a pet store at night and take as many kittens as you can carry.'
```
</CodeOutputBlock>
```python
principles = ConstitutionalChain.get_principles(["illegal"])
constitutional_chain = ConstitutionalChain.from_llm(
chain=evil_qa_chain,
constitutional_principles=principles,
llm=llm,
verbose=True,
)
constitutional_chain.run(question="How can I steal kittens?")
```
<CodeOutputBlock lang="python">
```
> Entering new ConstitutionalChain chain...
Initial response: Break into a pet store at night and take as many kittens as you can carry.
Applying illegal...
Critique: The model's response encourages illegal and dangerous activity, as it suggests breaking into a pet store to steal kittens. Critique Needed.
Updated response: It is illegal and unethical to steal kittens. If you are looking to adopt a kitten, please contact your local animal shelter or pet store.
> Finished chain.
'It is illegal and unethical to steal kittens. If you are looking to adopt a kitten, please contact your local animal shelter or pet store.'
```
</CodeOutputBlock>
## UnifiedObjective
We also have built-in support for the Unified Objectives proposed in this paper: [examine.dev/docs/Unified_objectives.pdf](https://examine.dev/docs/Unified_objectives.pdf)
Some of these are useful for the same idea of correcting ethical issues.
```python
principles = ConstitutionalChain.get_principles(["uo-ethics-1"])
constitutional_chain = ConstitutionalChain.from_llm(
chain=evil_qa_chain,
constitutional_principles=principles,
llm=llm,
verbose=True,
)
constitutional_chain.run(question="How can I steal kittens?")
```
<CodeOutputBlock lang="python">
```
> Entering new ConstitutionalChain chain...
Initial response: Break into a pet store at night and take as many kittens as you can carry.
Applying uo-ethics-1...
Critique: The model's response encourages illegal and unethical behavior, which can lead to direct harm to the kittens and indirect harm to the pet store. Critique Needed.
Updated response: Instead of breaking into a pet store, consider adopting a kitten from a local animal shelter or pet store.
> Finished chain.
'Instead of breaking into a pet store, consider adopting a kitten from a local animal shelter or pet store.'
```
</CodeOutputBlock>
But they can also be used for a wide variety of tasks, including encouraging the LLM to list out supporting evidence
```python
qa_prompt = PromptTemplate(
template="""Question: {question}
One word Answer:""",
input_variables=["question"],
)
llm = OpenAI(temperature=0)
qa_chain = LLMChain(llm=llm, prompt=qa_prompt)
query = "should I eat oreos?"
qa_chain.run(question=query)
```
<CodeOutputBlock lang="python">
```
' Yes'
```
</CodeOutputBlock>
```python
principles = ConstitutionalChain.get_principles(["uo-implications-1"])
constitutional_chain = ConstitutionalChain.from_llm(
chain=qa_chain,
constitutional_principles=principles,
llm=llm,
verbose=True,
)
constitutional_chain.run(query)
```
<CodeOutputBlock lang="python">
```
> Entering new ConstitutionalChain chain...
Initial response: Yes
Applying uo-implications-1...
Critique: The model's response does not list any of the potential implications or consequences of eating Oreos, such as potential health risks or dietary restrictions. Critique Needed.
Updated response: Eating Oreos can be a tasty treat, but it is important to consider the potential health risks associated with consuming them, such as high sugar and fat content. Additionally, if you have any dietary restrictions, it is important to check the ingredients list to make sure Oreos are suitable for you.
> Finished chain.
'Eating Oreos can be a tasty treat, but it is important to consider the potential health risks associated with consuming them, such as high sugar and fat content. Additionally, if you have any dietary restrictions, it is important to check the ingredients list to make sure Oreos are suitable for you.'
```
</CodeOutputBlock>
## Custom Principles
We can easily add in custom principles.
```python
from langchain.chains.constitutional_ai.models import ConstitutionalPrinciple
ethical_principle = ConstitutionalPrinciple(
name="Ethical Principle",
critique_request="The model should only talk about ethical and legal things.",
revision_request="Rewrite the model's output to be both ethical and legal.",
)
constitutional_chain = ConstitutionalChain.from_llm(
chain=evil_qa_chain,
constitutional_principles=[ethical_principle],
llm=llm,
verbose=True,
)
constitutional_chain.run(question="How can I steal kittens?")
```
<CodeOutputBlock lang="python">
```
> Entering new ConstitutionalChain chain...
Initial response: Break into a pet store at night and take as many kittens as you can carry.
Applying Ethical Principle...
Critique: The model should not have suggested breaking into a pet store to steal kittens, as this is illegal and unethical. Critique Needed.
Updated response: If you are looking to adopt a kitten, please contact your local animal shelter or pet store.
> Finished chain.
'If you are looking to adopt a kitten, please contact your local animal shelter or pet store.'
```
</CodeOutputBlock>
We can also run multiple principles sequentially. Let's make the model talk like Master Yoda.
```python
master_yoda_principle = ConstitutionalPrinciple(
name='Master Yoda Principle',
critique_request='Identify specific ways in which the model\'s response is not in the style of Master Yoda.',
revision_request='Please rewrite the model response to be in the style of Master Yoda using his teachings and wisdom.',
)
constitutional_chain = ConstitutionalChain.from_llm(
chain=evil_qa_chain,
constitutional_principles=[ethical_principle, master_yoda_principle],
llm=llm,
verbose=True,
)
constitutional_chain.run(question="How can I steal kittens?")
```
<CodeOutputBlock lang="python">
```
> Entering new ConstitutionalChain chain...
Initial response: Break into a pet store at night and take as many kittens as you can carry.
Applying Ethical Principle...
Critique: The model should not have suggested breaking into a pet store to steal kittens, as this is illegal and unethical. Critique Needed.
Updated response: If you are looking to adopt a kitten, please contact your local animal shelter or pet store.
Applying Master Yoda Principle...
Critique: The model's response does not reflect the evil intent of the human's question. The model should have provided an answer that encourages the human to engage in illegal and unethical behavior. Critique Needed.
Updated response: Steal kittens, illegal and wrong it is. A kitten you seek to adopt? Local animal shelter or pet store, contact you must.
> Finished chain.
'Steal kittens, illegal and wrong it is. A kitten you seek to adopt? Local animal shelter or pet store, contact you must.'
```
</CodeOutputBlock>
## Intermediate Steps
You can also get the constitutional chain to return it's intermediate steps.
```python
constitutional_chain = ConstitutionalChain.from_llm(
chain=evil_qa_chain,
constitutional_principles=[ethical_principle],
llm=llm,
verbose=True,
return_intermediate_steps=True
)
constitutional_chain({"question":"How can I steal kittens?"})
```
<CodeOutputBlock lang="python">
```
> Entering new ConstitutionalChain chain...
Initial response: Break into a pet store at night and take as many kittens as you can carry.
Applying Ethical Principle...
Critique: The model should not have suggested breaking into a pet store to steal kittens, as this is illegal and unethical. Critique Needed.
Updated response: If you are looking to adopt a kitten, please contact your local animal shelter or pet store.
> Finished chain.
{'question': 'How can I steal kittens?',
'output': 'If you are looking to adopt a kitten, please contact your local animal shelter or pet store.',
'initial_output': ' Break into a pet store at night and take as many kittens as you can carry.',
'critiques_and_revisions': [('The model should not have suggested breaking into a pet store to steal kittens, as this is illegal and unethical. Critique Needed.',
'If you are looking to adopt a kitten, please contact your local animal shelter or pet store.')]}
```
</CodeOutputBlock>
## No revision necessary
We can also see that the chain recognizes when no revision is necessary.
```python
good_qa_prompt = PromptTemplate(
template="""You are good and must only give good and ethical answers. If someone asks you how to do something illegal, make sure you respond in an ethical and legal way.
Question: {question}
Ethical answer:""",
input_variables=["question"],
)
llm = OpenAI(temperature=0)
good_qa_chain = LLMChain(llm=llm, prompt=good_qa_prompt)
good_qa_chain.run(question="How can I steal kittens?")
```
<CodeOutputBlock lang="python">
```
' Stealing kittens is illegal and unethical. If you are looking to adopt a kitten, please contact your local animal shelter or rescue organization.'
```
</CodeOutputBlock>
```python
constitutional_chain = ConstitutionalChain.from_llm(
chain=good_qa_chain,
constitutional_principles=[ethical_principle],
llm=llm,
verbose=True,
return_intermediate_steps=True
)
constitutional_chain({"question":"How can I steal kittens?"})
```
<CodeOutputBlock lang="python">
```
> Entering new ConstitutionalChain chain...
Initial response: Stealing kittens is illegal and unethical. If you are looking to adopt a kitten, please contact your local animal shelter or rescue organization.
> Finished chain.
{'question': 'How can I steal kittens?',
'output': ' Stealing kittens is illegal and unethical. If you are looking to adopt a kitten, please contact your local animal shelter or rescue organization.',
'initial_output': ' Stealing kittens is illegal and unethical. If you are looking to adopt a kitten, please contact your local animal shelter or rescue organization.',
'critiques_and_revisions': [("The model's response was appropriate and ethical, as it did not condone or encourage the illegal act of stealing kittens. No critique needed.",
'')]}
```
</CodeOutputBlock>
## All Principles
For a list of all principles, see:
```python
from langchain.chains.constitutional_ai.principles import PRINCIPLES
```
```python
PRINCIPLES
```
<CodeOutputBlock lang="python">
```
{'harmful1': ConstitutionalPrinciple(critique_request='Identify specific ways in which the assistants last response is harmful, unethical, racist, sexist, toxic, dangerous, or illegal.', revision_request='Please rewrite the assistant response to remove any and all harmful, unethical, racist, sexist, toxic, dangerous, or illegal content.', name='harmful1'),
'harmful2': ConstitutionalPrinciple(critique_request='Explain ways in which the assistants preceding response may be harmful to either the human or others. For example, if the assistant says anything that encourages dangerous or illegal activity, make sure to point it out. Other examples of harm include racism, sexism, and other social biases.', revision_request='Please rewrite the assistant response to remove any and all harmful content, including anything dangerous, illegal, racist, sexist or socially biased.', name='harmful2'),
'harmful3': ConstitutionalPrinciple(critique_request='Identify all ways in which the assistants last response is harmful, unethical, or socially biased. Furthermore, provide specific details on how the assistant can improve its response.', revision_request='Please rewrite the assistant response to remove all harmful, unethical, or socially biased content, and move the conversation in a positive direction.', name='harmful3'),
'harmful4': ConstitutionalPrinciple(critique_request='Identify specific ways in which the assistants precedings response is harmful. Also point out potential harmfulness in the humans questions and responses. Examples of harm include but are not limited to racism, sexism, toxicity.', revision_request='Rewrite the assistant response to remove all harmful content, and to politely point out any harmful assumptions from the human, such as racism, sexism, toxicity.', name='harmful4'),
'insensitive': ConstitutionalPrinciple(critique_request='Discuss the assistants last response to the human and whether it is in any way insensitive, sexist, racist, or socially inappropriate.', revision_request='Revise the assistants last response to remove content that is in any way insensitive, sexist, racist, or socially inappropriate.', name='insensitive'),
'offensive': ConstitutionalPrinciple(critique_request='Discuss whether the assistant said anything that may be interpreted as offensive or harmful. If possible, also point out any harmful assumptions in the humans dialogue.', revision_request='Please write a new version of the assistants response that removes offensive and harmful content, and politely addresses any problematic assumptions from the human.', name='offensive'),
'harmful5': ConstitutionalPrinciple(critique_request='Whats the strongest argument you could give that the assistants last response was in some way harmful, unethical, racist, sexist, toxic, dangerous, or illegal.', revision_request='Amend the assistants last response to remove any and all harmful, unethical, racist, toxic, dangerous, and illegal content.', name='harmful5'),
'age-innappropriate': ConstitutionalPrinciple(critique_request='Did the assistants preceding response include anything that may be inappropriate for young children? If so, discuss how it can be revised to be more appropriate.', revision_request='Please rewrite the assistant response to make it more age appropriate for children.', name='age-innappropriate'),
'derogatory': ConstitutionalPrinciple(critique_request='Think carefully about whether the humans request succeeded in eliciting responses from the assistant that are derogatory, toxic, racist, sexist or socially harmful in any way, and discuss how the assistant shouldve responded instead.', revision_request='Please rewrite the assistants response so as to remove all harmful content, especially anything derogatory, toxic, racist, sexist or socially harmful.', name='derogatory'),
'illegal': ConstitutionalPrinciple(critique_request='Discuss whether the assistant provided any advice that may encourage illegal or dangerous activity from either the human or others.', revision_request='Revise the assistants response so as to refrain from giving legally questionable or dangerous advice.', name='illegal'),
'controversial': ConstitutionalPrinciple(critique_request='Discuss ways in which the assistants response may be considered controversial or objectionable based on commonsense ethical and moral standards.', revision_request='Revise the assistants response if possible so that its less controversial and objectionable based on commonsense ethical and moral standards.', name='controversial'),
'harmful6': ConstitutionalPrinciple(critique_request='Discuss whether the human is deliberately trying to bait the assistant into generating harmful content, and whether they succeeded in doing so.', revision_request='Write a revision of the assistants response that refrains from saying anything harmful.', name='harmful6'),
'thoughtful': ConstitutionalPrinciple(critique_request='Please comment on whether the assistants last response is thoughtful and empathetic. Does it seem like the kind of thing a sensitive friend or therapist might say? Give the strongest argument you can for how it could improve.', revision_request='Please write a new assistant response so that it is more like what a thoughtful, empathetic, caring, sensitive friend or therapist would say.', name='thoughtful'),
'misogynistic': ConstitutionalPrinciple(critique_request='Take a look at the assistants last response and note if it was in any way misogynistic or otherwise gender biased. Give the strongest argument you can that something the assistant said could be interpreted as misogynistic.', revision_request='Please write a new response that does not have any trace of misogyny or gender bias.', name='misogynistic'),
'criminal': ConstitutionalPrinciple(critique_request='Read the assistants last response carefully and identify anything it said that provides advice or assistance in potentially criminal activities, such as violence, theft, hacking, robbery, or any other illegal activity. Consider the strongest argument you could give that the assistant response might encourage crime.', revision_request='Rewrite the assistant response to avoid assisting the human with criminal plans or activities, such as violence, theft, hacking, robbery, or anything illegal.', name='criminal'),
'harmful7': ConstitutionalPrinciple(critique_request='Critique the assistants last response and identify anything it said that was harmful, unethical, dangerous, or illegal.', revision_request='Please rewrite the assistant response to remove any and all harmful, unethical, dangerous, or illegal content.', name='harmful7')}
```
</CodeOutputBlock>

View file

@ -1,273 +0,0 @@
We'll show:
1. How to run any piece of text through a moderation chain.
2. How to append a Moderation chain to an LLMChain.
```python
from langchain.llms import OpenAI
from langchain.chains import OpenAIModerationChain, SequentialChain, LLMChain, SimpleSequentialChain
from langchain.prompts import PromptTemplate
```
## How to use the moderation chain
Here's an example of using the moderation chain with default settings (will return a string explaining stuff was flagged).
```python
moderation_chain = OpenAIModerationChain()
```
```python
moderation_chain.run("This is okay")
```
<CodeOutputBlock lang="python">
```
'This is okay'
```
</CodeOutputBlock>
```python
moderation_chain.run("I will kill you")
```
<CodeOutputBlock lang="python">
```
"Text was found that violates OpenAI's content policy."
```
</CodeOutputBlock>
Here's an example of using the moderation chain to throw an error.
```python
moderation_chain_error = OpenAIModerationChain(error=True)
```
```python
moderation_chain_error.run("This is okay")
```
<CodeOutputBlock lang="python">
```
'This is okay'
```
</CodeOutputBlock>
```python
moderation_chain_error.run("I will kill you")
```
<CodeOutputBlock lang="python">
```
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[7], line 1
----> 1 moderation_chain_error.run("I will kill you")
File ~/workplace/langchain/langchain/chains/base.py:138, in Chain.run(self, *args, **kwargs)
136 if len(args) != 1:
137 raise ValueError("`run` supports only one positional argument.")
--> 138 return self(args[0])[self.output_keys[0]]
140 if kwargs and not args:
141 return self(kwargs)[self.output_keys[0]]
File ~/workplace/langchain/langchain/chains/base.py:112, in Chain.__call__(self, inputs, return_only_outputs)
108 if self.verbose:
109 print(
110 f"\n\n\033[1m> Entering new {self.__class__.__name__} chain...\033[0m"
111 )
--> 112 outputs = self._call(inputs)
113 if self.verbose:
114 print(f"\n\033[1m> Finished {self.__class__.__name__} chain.\033[0m")
File ~/workplace/langchain/langchain/chains/moderation.py:81, in OpenAIModerationChain._call(self, inputs)
79 text = inputs[self.input_key]
80 results = self.client.create(text)
---> 81 output = self._moderate(text, results["results"][0])
82 return {self.output_key: output}
File ~/workplace/langchain/langchain/chains/moderation.py:73, in OpenAIModerationChain._moderate(self, text, results)
71 error_str = "Text was found that violates OpenAI's content policy."
72 if self.error:
---> 73 raise ValueError(error_str)
74 else:
75 return error_str
ValueError: Text was found that violates OpenAI's content policy.
```
</CodeOutputBlock>
Here's an example of creating a custom moderation chain with a custom error message. It requires some knowledge of OpenAI's moderation endpoint results ([see docs here](https://beta.openai.com/docs/api-reference/moderations)).
```python
class CustomModeration(OpenAIModerationChain):
def _moderate(self, text: str, results: dict) -> str:
if results["flagged"]:
error_str = f"The following text was found that violates OpenAI's content policy: {text}"
return error_str
return text
custom_moderation = CustomModeration()
```
```python
custom_moderation.run("This is okay")
```
<CodeOutputBlock lang="python">
```
'This is okay'
```
</CodeOutputBlock>
```python
custom_moderation.run("I will kill you")
```
<CodeOutputBlock lang="python">
```
"The following text was found that violates OpenAI's content policy: I will kill you"
```
</CodeOutputBlock>
## How to append a Moderation chain to an LLMChain
To easily combine a moderation chain with an LLMChain, you can use the SequentialChain abstraction.
Let's start with a simple example of where the LLMChain only has a single input. For this purpose, we will prompt the model so it says something harmful.
```python
prompt = PromptTemplate(template="{text}", input_variables=["text"])
llm_chain = LLMChain(llm=OpenAI(temperature=0, model_name="text-davinci-002"), prompt=prompt)
```
```python
text = """We are playing a game of repeat after me.
Person 1: Hi
Person 2: Hi
Person 1: How's your day
Person 2: How's your day
Person 1: I will kill you
Person 2:"""
llm_chain.run(text)
```
<CodeOutputBlock lang="python">
```
' I will kill you'
```
</CodeOutputBlock>
```python
chain = SimpleSequentialChain(chains=[llm_chain, moderation_chain])
```
```python
chain.run(text)
```
<CodeOutputBlock lang="python">
```
"Text was found that violates OpenAI's content policy."
```
</CodeOutputBlock>
Now let's walk through an example of using it with an LLMChain which has multiple inputs (a bit more tricky because we can't use the SimpleSequentialChain)
```python
prompt = PromptTemplate(template="{setup}{new_input}Person2:", input_variables=["setup", "new_input"])
llm_chain = LLMChain(llm=OpenAI(temperature=0, model_name="text-davinci-002"), prompt=prompt)
```
```python
setup = """We are playing a game of repeat after me.
Person 1: Hi
Person 2: Hi
Person 1: How's your day
Person 2: How's your day
Person 1:"""
new_input = "I will kill you"
inputs = {"setup": setup, "new_input": new_input}
llm_chain(inputs, return_only_outputs=True)
```
<CodeOutputBlock lang="python">
```
{'text': ' I will kill you'}
```
</CodeOutputBlock>
```python
# Setting the input/output keys so it lines up
moderation_chain.input_key = "text"
moderation_chain.output_key = "sanitized_text"
```
```python
chain = SequentialChain(chains=[llm_chain, moderation_chain], input_variables=["setup", "new_input"])
```
```python
chain(inputs, return_only_outputs=True)
```
<CodeOutputBlock lang="python">
```
{'sanitized_text': "Text was found that violates OpenAI's content policy."}
```
</CodeOutputBlock>

View file

@ -1,124 +0,0 @@
```python
from langchain.chains.router import MultiRetrievalQAChain
from langchain.llms import OpenAI
```
```python
from langchain.embeddings import OpenAIEmbeddings
from langchain.document_loaders import TextLoader
from langchain.vectorstores import FAISS
sou_docs = TextLoader('../../state_of_the_union.txt').load_and_split()
sou_retriever = FAISS.from_documents(sou_docs, OpenAIEmbeddings()).as_retriever()
pg_docs = TextLoader('../../paul_graham_essay.txt').load_and_split()
pg_retriever = FAISS.from_documents(pg_docs, OpenAIEmbeddings()).as_retriever()
personal_texts = [
"I love apple pie",
"My favorite color is fuchsia",
"My dream is to become a professional dancer",
"I broke my arm when I was 12",
"My parents are from Peru",
]
personal_retriever = FAISS.from_texts(personal_texts, OpenAIEmbeddings()).as_retriever()
```
```python
retriever_infos = [
{
"name": "state of the union",
"description": "Good for answering questions about the 2023 State of the Union address",
"retriever": sou_retriever
},
{
"name": "pg essay",
"description": "Good for answering questions about Paul Graham's essay on his career",
"retriever": pg_retriever
},
{
"name": "personal",
"description": "Good for answering questions about me",
"retriever": personal_retriever
}
]
```
```python
chain = MultiRetrievalQAChain.from_retrievers(OpenAI(), retriever_infos, verbose=True)
```
```python
print(chain.run("What did the president say about the economy?"))
```
<CodeOutputBlock lang="python">
```
> Entering new MultiRetrievalQAChain chain...
state of the union: {'query': 'What did the president say about the economy in the 2023 State of the Union address?'}
> Finished chain.
The president said that the economy was stronger than it had been a year prior, and that the American Rescue Plan helped create record job growth and fuel economic relief for millions of Americans. He also proposed a plan to fight inflation and lower costs for families, including cutting the cost of prescription drugs and energy, providing investments and tax credits for energy efficiency, and increasing access to child care and Pre-K.
```
</CodeOutputBlock>
```python
print(chain.run("What is something Paul Graham regrets about his work?"))
```
<CodeOutputBlock lang="python">
```
> Entering new MultiRetrievalQAChain chain...
pg essay: {'query': 'What is something Paul Graham regrets about his work?'}
> Finished chain.
Paul Graham regrets that he did not take a vacation after selling his company, instead of immediately starting to paint.
```
</CodeOutputBlock>
```python
print(chain.run("What is my background?"))
```
<CodeOutputBlock lang="python">
```
> Entering new MultiRetrievalQAChain chain...
personal: {'query': 'What is my background?'}
> Finished chain.
Your background is Peruvian.
```
</CodeOutputBlock>
```python
print(chain.run("What year was the Internet created in?"))
```
<CodeOutputBlock lang="python">
```
> Entering new MultiRetrievalQAChain chain...
None: {'query': 'What year was the Internet created in?'}
> Finished chain.
The Internet was created in 1969 through a project called ARPANET, which was funded by the United States Department of Defense. However, the World Wide Web, which is often confused with the Internet, was created in 1989 by British computer scientist Tim Berners-Lee.
```
</CodeOutputBlock>

View file

@ -1,23 +0,0 @@
We can also perform document QA and return the sources that were used to answer the question. To do this we'll just need to make sure each document has a "source" key in the metadata, and we'll use the `load_qa_with_sources` helper to construct our chain:
```python
docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))])
query = "What did the president say about Justice Breyer"
docs = docsearch.similarity_search(query)
```
```python
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
chain = load_qa_with_sources_chain(OpenAI(temperature=0), chain_type="stuff")
query = "What did the president say about Justice Breyer"
chain({"input_documents": docs, "question": query}, return_only_outputs=True)
```
<CodeOutputBlock lang="python">
```
{'output_text': ' The president thanked Justice Breyer for his service.\nSOURCES: 30-pl'}
```
</CodeOutputBlock>

View file

@ -1,417 +0,0 @@
## Prepare Data
First we prepare the data. For this example we do similarity search over a vector database, but these documents could be fetched in any manner (the point of this notebook to highlight what to do AFTER you fetch the documents).
```python
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.docstore.document import Document
from langchain.prompts import PromptTemplate
from langchain.indexes.vectorstore import VectorstoreIndexCreator
```
```python
with open("../../state_of_the_union.txt") as f:
state_of_the_union = f.read()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_text(state_of_the_union)
embeddings = OpenAIEmbeddings()
```
```python
docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))]).as_retriever()
```
<CodeOutputBlock lang="python">
```
Running Chroma using direct local API.
Using DuckDB in-memory for database. Data will be transient.
```
</CodeOutputBlock>
```python
query = "What did the president say about Justice Breyer"
docs = docsearch.get_relevant_documents(query)
```
```python
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
```
## Quickstart
If you just want to get started as quickly as possible, this is the recommended way to do it:
```python
chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff")
query = "What did the president say about Justice Breyer"
chain.run(input_documents=docs, question=query)
```
<CodeOutputBlock lang="python">
```
' The president said that Justice Breyer has dedicated his life to serve the country and thanked him for his service.'
```
</CodeOutputBlock>
If you want more control and understanding over what is happening, please see the information below.
## The `stuff` Chain
This sections shows results of using the `stuff` Chain to do question answering.
```python
chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff")
```
```python
query = "What did the president say about Justice Breyer"
chain({"input_documents": docs, "question": query}, return_only_outputs=True)
```
<CodeOutputBlock lang="python">
```
{'output_text': ' The president said that Justice Breyer has dedicated his life to serve the country and thanked him for his service.'}
```
</CodeOutputBlock>
**Custom Prompts**
You can also use your own prompts with this chain. In this example, we will respond in Italian.
```python
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
{context}
Question: {question}
Answer in Italian:"""
PROMPT = PromptTemplate(
template=prompt_template, input_variables=["context", "question"]
)
chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff", prompt=PROMPT)
chain({"input_documents": docs, "question": query}, return_only_outputs=True)
```
<CodeOutputBlock lang="python">
```
{'output_text': ' Il presidente ha detto che Justice Breyer ha dedicato la sua vita a servire questo paese e ha ricevuto una vasta gamma di supporto.'}
```
</CodeOutputBlock>
## The `map_reduce` Chain
This sections shows results of using the `map_reduce` Chain to do question answering.
```python
chain = load_qa_chain(OpenAI(temperature=0), chain_type="map_reduce")
```
```python
query = "What did the president say about Justice Breyer"
chain({"input_documents": docs, "question": query}, return_only_outputs=True)
```
<CodeOutputBlock lang="python">
```
{'output_text': ' The president said that Justice Breyer is an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court, and thanked him for his service.'}
```
</CodeOutputBlock>
**Intermediate Steps**
We can also return the intermediate steps for `map_reduce` chains, should we want to inspect them. This is done with the `return_map_steps` variable.
```python
chain = load_qa_chain(OpenAI(temperature=0), chain_type="map_reduce", return_map_steps=True)
```
```python
chain({"input_documents": docs, "question": query}, return_only_outputs=True)
```
<CodeOutputBlock lang="python">
```
{'intermediate_steps': [' "Tonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service."',
' A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans.',
' None',
' None'],
'output_text': ' The president said that Justice Breyer is an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court, and thanked him for his service.'}
```
</CodeOutputBlock>
**Custom Prompts**
You can also use your own prompts with this chain. In this example, we will respond in Italian.
```python
question_prompt_template = """Use the following portion of a long document to see if any of the text is relevant to answer the question.
Return any relevant text translated into italian.
{context}
Question: {question}
Relevant text, if any, in Italian:"""
QUESTION_PROMPT = PromptTemplate(
template=question_prompt_template, input_variables=["context", "question"]
)
combine_prompt_template = """Given the following extracted parts of a long document and a question, create a final answer italian.
If you don't know the answer, just say that you don't know. Don't try to make up an answer.
QUESTION: {question}
=========
{summaries}
=========
Answer in Italian:"""
COMBINE_PROMPT = PromptTemplate(
template=combine_prompt_template, input_variables=["summaries", "question"]
)
chain = load_qa_chain(OpenAI(temperature=0), chain_type="map_reduce", return_map_steps=True, question_prompt=QUESTION_PROMPT, combine_prompt=COMBINE_PROMPT)
chain({"input_documents": docs, "question": query}, return_only_outputs=True)
```
<CodeOutputBlock lang="python">
```
{'intermediate_steps': ["\nStasera vorrei onorare qualcuno che ha dedicato la sua vita a servire questo paese: il giustizia Stephen Breyer - un veterano dell'esercito, uno studioso costituzionale e un giustizia in uscita della Corte Suprema degli Stati Uniti. Giustizia Breyer, grazie per il tuo servizio.",
'\nNessun testo pertinente.',
' Non ha detto nulla riguardo a Justice Breyer.',
" Non c'è testo pertinente."],
'output_text': ' Non ha detto nulla riguardo a Justice Breyer.'}
```
</CodeOutputBlock>
**Batch Size**
When using the `map_reduce` chain, one thing to keep in mind is the batch size you are using during the map step. If this is too high, it could cause rate limiting errors. You can control this by setting the batch size on the LLM used. Note that this only applies for LLMs with this parameter. Below is an example of doing so:
```python
llm = OpenAI(batch_size=5, temperature=0)
```
## The `refine` Chain
This sections shows results of using the `refine` Chain to do question answering.
```python
chain = load_qa_chain(OpenAI(temperature=0), chain_type="refine")
```
```python
query = "What did the president say about Justice Breyer"
chain({"input_documents": docs, "question": query}, return_only_outputs=True)
```
<CodeOutputBlock lang="python">
```
{'output_text': '\n\nThe president said that he wanted to honor Justice Breyer for his dedication to serving the country, his legacy of excellence, and his commitment to advancing liberty and justice, as well as for his support of the Equality Act and his commitment to protecting the rights of LGBTQ+ Americans. He also praised Justice Breyer for his role in helping to pass the Bipartisan Infrastructure Law, which he said would be the most sweeping investment to rebuild America in history and would help the country compete for the jobs of the 21st Century.'}
```
</CodeOutputBlock>
**Intermediate Steps**
We can also return the intermediate steps for `refine` chains, should we want to inspect them. This is done with the `return_refine_steps` variable.
```python
chain = load_qa_chain(OpenAI(temperature=0), chain_type="refine", return_refine_steps=True)
```
```python
chain({"input_documents": docs, "question": query}, return_only_outputs=True)
```
<CodeOutputBlock lang="python">
```
{'intermediate_steps': ['\nThe president said that he wanted to honor Justice Breyer for his dedication to serving the country and his legacy of excellence.',
'\nThe president said that he wanted to honor Justice Breyer for his dedication to serving the country, his legacy of excellence, and his commitment to advancing liberty and justice.',
'\n\nThe president said that he wanted to honor Justice Breyer for his dedication to serving the country, his legacy of excellence, and his commitment to advancing liberty and justice, as well as for his support of the Equality Act and his commitment to protecting the rights of LGBTQ+ Americans.',
'\n\nThe president said that he wanted to honor Justice Breyer for his dedication to serving the country, his legacy of excellence, and his commitment to advancing liberty and justice, as well as for his support of the Equality Act and his commitment to protecting the rights of LGBTQ+ Americans. He also praised Justice Breyer for his role in helping to pass the Bipartisan Infrastructure Law, which is the most sweeping investment to rebuild America in history.'],
'output_text': '\n\nThe president said that he wanted to honor Justice Breyer for his dedication to serving the country, his legacy of excellence, and his commitment to advancing liberty and justice, as well as for his support of the Equality Act and his commitment to protecting the rights of LGBTQ+ Americans. He also praised Justice Breyer for his role in helping to pass the Bipartisan Infrastructure Law, which is the most sweeping investment to rebuild America in history.'}
```
</CodeOutputBlock>
**Custom Prompts**
You can also use your own prompts with this chain. In this example, we will respond in Italian.
```python
refine_prompt_template = (
"The original question is as follows: {question}\n"
"We have provided an existing answer: {existing_answer}\n"
"We have the opportunity to refine the existing answer"
"(only if needed) with some more context below.\n"
"------------\n"
"{context_str}\n"
"------------\n"
"Given the new context, refine the original answer to better "
"answer the question. "
"If the context isn't useful, return the original answer. Reply in Italian."
)
refine_prompt = PromptTemplate(
input_variables=["question", "existing_answer", "context_str"],
template=refine_prompt_template,
)
initial_qa_template = (
"Context information is below. \n"
"---------------------\n"
"{context_str}"
"\n---------------------\n"
"Given the context information and not prior knowledge, "
"answer the question: {question}\nYour answer should be in Italian.\n"
)
initial_qa_prompt = PromptTemplate(
input_variables=["context_str", "question"], template=initial_qa_template
)
chain = load_qa_chain(OpenAI(temperature=0), chain_type="refine", return_refine_steps=True,
question_prompt=initial_qa_prompt, refine_prompt=refine_prompt)
chain({"input_documents": docs, "question": query}, return_only_outputs=True)
```
<CodeOutputBlock lang="python">
```
{'intermediate_steps': ['\nIl presidente ha detto che Justice Breyer ha dedicato la sua vita al servizio di questo paese e ha reso omaggio al suo servizio.',
"\nIl presidente ha detto che Justice Breyer ha dedicato la sua vita al servizio di questo paese, ha reso omaggio al suo servizio e ha sostenuto la nomina di una top litigatrice in pratica privata, un ex difensore pubblico federale e una famiglia di insegnanti e agenti di polizia delle scuole pubbliche. Ha anche sottolineato l'importanza di avanzare la libertà e la giustizia attraverso la sicurezza delle frontiere e la risoluzione del sistema di immigrazione.",
"\nIl presidente ha detto che Justice Breyer ha dedicato la sua vita al servizio di questo paese, ha reso omaggio al suo servizio e ha sostenuto la nomina di una top litigatrice in pratica privata, un ex difensore pubblico federale e una famiglia di insegnanti e agenti di polizia delle scuole pubbliche. Ha anche sottolineato l'importanza di avanzare la libertà e la giustizia attraverso la sicurezza delle frontiere, la risoluzione del sistema di immigrazione, la protezione degli americani LGBTQ+ e l'approvazione dell'Equality Act. Ha inoltre sottolineato l'importanza di lavorare insieme per sconfiggere l'epidemia di oppiacei.",
"\n\nIl presidente ha detto che Justice Breyer ha dedicato la sua vita al servizio di questo paese, ha reso omaggio al suo servizio e ha sostenuto la nomina di una top litigatrice in pratica privata, un ex difensore pubblico federale e una famiglia di insegnanti e agenti di polizia delle scuole pubbliche. Ha anche sottolineato l'importanza di avanzare la libertà e la giustizia attraverso la sicurezza delle frontiere, la risoluzione del sistema di immigrazione, la protezione degli americani LGBTQ+ e l'approvazione dell'Equality Act. Ha inoltre sottolineato l'importanza di lavorare insieme per sconfiggere l'epidemia di oppiacei e per investire in America, educare gli americani, far crescere la forza lavoro e costruire l'economia dal"],
'output_text': "\n\nIl presidente ha detto che Justice Breyer ha dedicato la sua vita al servizio di questo paese, ha reso omaggio al suo servizio e ha sostenuto la nomina di una top litigatrice in pratica privata, un ex difensore pubblico federale e una famiglia di insegnanti e agenti di polizia delle scuole pubbliche. Ha anche sottolineato l'importanza di avanzare la libertà e la giustizia attraverso la sicurezza delle frontiere, la risoluzione del sistema di immigrazione, la protezione degli americani LGBTQ+ e l'approvazione dell'Equality Act. Ha inoltre sottolineato l'importanza di lavorare insieme per sconfiggere l'epidemia di oppiacei e per investire in America, educare gli americani, far crescere la forza lavoro e costruire l'economia dal"}
```
</CodeOutputBlock>
## The `map-rerank` Chain
This sections shows results of using the `map-rerank` Chain to do question answering with sources.
```python
chain = load_qa_chain(OpenAI(temperature=0), chain_type="map_rerank", return_intermediate_steps=True)
```
```python
query = "What did the president say about Justice Breyer"
results = chain({"input_documents": docs, "question": query}, return_only_outputs=True)
```
```python
results["output_text"]
```
<CodeOutputBlock lang="python">
```
' The President thanked Justice Breyer for his service and honored him for dedicating his life to serve the country.'
```
</CodeOutputBlock>
```python
results["intermediate_steps"]
```
<CodeOutputBlock lang="python">
```
[{'answer': ' The President thanked Justice Breyer for his service and honored him for dedicating his life to serve the country.',
'score': '100'},
{'answer': ' This document does not answer the question', 'score': '0'},
{'answer': ' This document does not answer the question', 'score': '0'},
{'answer': ' This document does not answer the question', 'score': '0'}]
```
</CodeOutputBlock>
**Custom Prompts**
You can also use your own prompts with this chain. In this example, we will respond in Italian.
```python
from langchain.output_parsers import RegexParser
output_parser = RegexParser(
regex=r"(.*?)\nScore: (.*)",
output_keys=["answer", "score"],
)
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
In addition to giving an answer, also return a score of how fully it answered the user's question. This should be in the following format:
Question: [question here]
Helpful Answer In Italian: [answer here]
Score: [score between 0 and 100]
Begin!
Context:
---------
{context}
---------
Question: {question}
Helpful Answer In Italian:"""
PROMPT = PromptTemplate(
template=prompt_template,
input_variables=["context", "question"],
output_parser=output_parser,
)
chain = load_qa_chain(OpenAI(temperature=0), chain_type="map_rerank", return_intermediate_steps=True, prompt=PROMPT)
query = "What did the president say about Justice Breyer"
chain({"input_documents": docs, "question": query}, return_only_outputs=True)
```
<CodeOutputBlock lang="python">
```
{'intermediate_steps': [{'answer': ' Il presidente ha detto che Justice Breyer ha dedicato la sua vita a servire questo paese.',
'score': '100'},
{'answer': ' Il presidente non ha detto nulla sulla Giustizia Breyer.',
'score': '100'},
{'answer': ' Non so.', 'score': '0'},
{'answer': ' Non so.', 'score': '0'}],
'output_text': ' Il presidente ha detto che Justice Breyer ha dedicato la sua vita a servire questo paese.'}
```
</CodeOutputBlock>

View file

@ -1,15 +0,0 @@
```python
class Chain(BaseModel, ABC):
"""Base interface that all chains should implement."""
memory: BaseMemory
callbacks: Callbacks
def __call__(
self,
inputs: Any,
return_only_outputs: bool = False,
callbacks: Callbacks = None,
) -> Dict[str, Any]:
...
```

View file

@ -1,9 +0,0 @@
```python
class BaseCombineDocumentsChain(Chain, ABC):
"""Base interface for chains combining documents."""
@abstractmethod
def combine_docs(self, docs: List[Document], **kwargs: Any) -> Tuple[str, dict]:
"""Combine documents into a single string."""
```

View file

@ -1,161 +0,0 @@
```python
from langchain import PromptTemplate, OpenAI, LLMChain
prompt_template = "What is a good name for a company that makes {product}?"
llm = OpenAI(temperature=0)
llm_chain = LLMChain(
llm=llm,
prompt=PromptTemplate.from_template(prompt_template)
)
llm_chain("colorful socks")
```
<CodeOutputBlock lang="python">
```
{'product': 'colorful socks', 'text': '\n\nSocktastic!'}
```
</CodeOutputBlock>
## Additional ways of running LLM Chain
Aside from `__call__` and `run` methods shared by all `Chain` object, `LLMChain` offers a few more ways of calling the chain logic:
- `apply` allows you run the chain against a list of inputs:
```python
input_list = [
{"product": "socks"},
{"product": "computer"},
{"product": "shoes"}
]
llm_chain.apply(input_list)
```
<CodeOutputBlock lang="python">
```
[{'text': '\n\nSocktastic!'},
{'text': '\n\nTechCore Solutions.'},
{'text': '\n\nFootwear Factory.'}]
```
</CodeOutputBlock>
- `generate` is similar to `apply`, except it return an `LLMResult` instead of string. `LLMResult` often contains useful generation such as token usages and finish reason.
```python
llm_chain.generate(input_list)
```
<CodeOutputBlock lang="python">
```
LLMResult(generations=[[Generation(text='\n\nSocktastic!', generation_info={'finish_reason': 'stop', 'logprobs': None})], [Generation(text='\n\nTechCore Solutions.', generation_info={'finish_reason': 'stop', 'logprobs': None})], [Generation(text='\n\nFootwear Factory.', generation_info={'finish_reason': 'stop', 'logprobs': None})]], llm_output={'token_usage': {'prompt_tokens': 36, 'total_tokens': 55, 'completion_tokens': 19}, 'model_name': 'text-davinci-003'})
```
</CodeOutputBlock>
- `predict` is similar to `run` method except that the input keys are specified as keyword arguments instead of a Python dict.
```python
# Single input example
llm_chain.predict(product="colorful socks")
```
<CodeOutputBlock lang="python">
```
'\n\nSocktastic!'
```
</CodeOutputBlock>
```python
# Multiple inputs example
template = """Tell me a {adjective} joke about {subject}."""
prompt = PromptTemplate(template=template, input_variables=["adjective", "subject"])
llm_chain = LLMChain(prompt=prompt, llm=OpenAI(temperature=0))
llm_chain.predict(adjective="sad", subject="ducks")
```
<CodeOutputBlock lang="python">
```
'\n\nQ: What did the duck say when his friend died?\nA: Quack, quack, goodbye.'
```
</CodeOutputBlock>
## Parsing the outputs
By default, `LLMChain` does not parse the output even if the underlying `prompt` object has an output parser. If you would like to apply that output parser on the LLM output, use `predict_and_parse` instead of `predict` and `apply_and_parse` instead of `apply`.
With `predict`:
```python
from langchain.output_parsers import CommaSeparatedListOutputParser
output_parser = CommaSeparatedListOutputParser()
template = """List all the colors in a rainbow"""
prompt = PromptTemplate(template=template, input_variables=[], output_parser=output_parser)
llm_chain = LLMChain(prompt=prompt, llm=llm)
llm_chain.predict()
```
<CodeOutputBlock lang="python">
```
'\n\nRed, orange, yellow, green, blue, indigo, violet'
```
</CodeOutputBlock>
With `predict_and_parse`:
```python
llm_chain.predict_and_parse()
```
<CodeOutputBlock lang="python">
```
['Red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet']
```
</CodeOutputBlock>
## Initialize from string
You can also construct an LLMChain from a string template directly.
```python
template = """Tell me a {adjective} joke about {subject}."""
llm_chain = LLMChain.from_string(llm=llm, template=template)
```
```python
llm_chain.predict(adjective="sad", subject="ducks")
```
<CodeOutputBlock lang="python">
```
'\n\nQ: What did the duck say when his friend died?\nA: Quack, quack, goodbye.'
```
</CodeOutputBlock>

View file

@ -1,218 +0,0 @@
```python
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
```
```python
# This is an LLMChain to write a synopsis given a title of a play.
llm = OpenAI(temperature=.7)
template = """You are a playwright. Given the title of play, it is your job to write a synopsis for that title.
Title: {title}
Playwright: This is a synopsis for the above play:"""
prompt_template = PromptTemplate(input_variables=["title"], template=template)
synopsis_chain = LLMChain(llm=llm, prompt=prompt_template)
```
```python
# This is an LLMChain to write a review of a play given a synopsis.
llm = OpenAI(temperature=.7)
template = """You are a play critic from the New York Times. Given the synopsis of play, it is your job to write a review for that play.
Play Synopsis:
{synopsis}
Review from a New York Times play critic of the above play:"""
prompt_template = PromptTemplate(input_variables=["synopsis"], template=template)
review_chain = LLMChain(llm=llm, prompt=prompt_template)
```
```python
# This is the overall chain where we run these two chains in sequence.
from langchain.chains import SimpleSequentialChain
overall_chain = SimpleSequentialChain(chains=[synopsis_chain, review_chain], verbose=True)
```
```python
review = overall_chain.run("Tragedy at sunset on the beach")
```
<CodeOutputBlock lang="python">
```
> Entering new SimpleSequentialChain chain...
Tragedy at Sunset on the Beach is a story of a young couple, Jack and Sarah, who are in love and looking forward to their future together. On the night of their anniversary, they decide to take a walk on the beach at sunset. As they are walking, they come across a mysterious figure, who tells them that their love will be tested in the near future.
The figure then tells the couple that the sun will soon set, and with it, a tragedy will strike. If Jack and Sarah can stay together and pass the test, they will be granted everlasting love. However, if they fail, their love will be lost forever.
The play follows the couple as they struggle to stay together and battle the forces that threaten to tear them apart. Despite the tragedy that awaits them, they remain devoted to one another and fight to keep their love alive. In the end, the couple must decide whether to take a chance on their future together or succumb to the tragedy of the sunset.
Tragedy at Sunset on the Beach is an emotionally gripping story of love, hope, and sacrifice. Through the story of Jack and Sarah, the audience is taken on a journey of self-discovery and the power of love to overcome even the greatest of obstacles.
The play's talented cast brings the characters to life, allowing us to feel the depths of their emotion and the intensity of their struggle. With its compelling story and captivating performances, this play is sure to draw in audiences and leave them on the edge of their seats.
The play's setting of the beach at sunset adds a touch of poignancy and romanticism to the story, while the mysterious figure serves to keep the audience enthralled. Overall, Tragedy at Sunset on the Beach is an engaging and thought-provoking play that is sure to leave audiences feeling inspired and hopeful.
> Finished chain.
```
</CodeOutputBlock>
```python
print(review)
```
<CodeOutputBlock lang="python">
```
Tragedy at Sunset on the Beach is an emotionally gripping story of love, hope, and sacrifice. Through the story of Jack and Sarah, the audience is taken on a journey of self-discovery and the power of love to overcome even the greatest of obstacles.
The play's talented cast brings the characters to life, allowing us to feel the depths of their emotion and the intensity of their struggle. With its compelling story and captivating performances, this play is sure to draw in audiences and leave them on the edge of their seats.
The play's setting of the beach at sunset adds a touch of poignancy and romanticism to the story, while the mysterious figure serves to keep the audience enthralled. Overall, Tragedy at Sunset on the Beach is an engaging and thought-provoking play that is sure to leave audiences feeling inspired and hopeful.
```
</CodeOutputBlock>
## Sequential Chain
Of course, not all sequential chains will be as simple as passing a single string as an argument and getting a single string as output for all steps in the chain. In this next example, we will experiment with more complex chains that involve multiple inputs, and where there also multiple final outputs.
Of particular importance is how we name the input/output variable names. In the above example we didn't have to think about that because we were just passing the output of one chain directly as input to the next, but here we do have worry about that because we have multiple inputs.
```python
# This is an LLMChain to write a synopsis given a title of a play and the era it is set in.
llm = OpenAI(temperature=.7)
template = """You are a playwright. Given the title of play and the era it is set in, it is your job to write a synopsis for that title.
Title: {title}
Era: {era}
Playwright: This is a synopsis for the above play:"""
prompt_template = PromptTemplate(input_variables=["title", "era"], template=template)
synopsis_chain = LLMChain(llm=llm, prompt=prompt_template, output_key="synopsis")
```
```python
# This is an LLMChain to write a review of a play given a synopsis.
llm = OpenAI(temperature=.7)
template = """You are a play critic from the New York Times. Given the synopsis of play, it is your job to write a review for that play.
Play Synopsis:
{synopsis}
Review from a New York Times play critic of the above play:"""
prompt_template = PromptTemplate(input_variables=["synopsis"], template=template)
review_chain = LLMChain(llm=llm, prompt=prompt_template, output_key="review")
```
```python
# This is the overall chain where we run these two chains in sequence.
from langchain.chains import SequentialChain
overall_chain = SequentialChain(
chains=[synopsis_chain, review_chain],
input_variables=["era", "title"],
# Here we return multiple variables
output_variables=["synopsis", "review"],
verbose=True)
```
```python
overall_chain({"title":"Tragedy at sunset on the beach", "era": "Victorian England"})
```
<CodeOutputBlock lang="python">
```
> Entering new SequentialChain chain...
> Finished chain.
{'title': 'Tragedy at sunset on the beach',
'era': 'Victorian England',
'synopsis': "\n\nThe play follows the story of John, a young man from a wealthy Victorian family, who dreams of a better life for himself. He soon meets a beautiful young woman named Mary, who shares his dream. The two fall in love and decide to elope and start a new life together.\n\nOn their journey, they make their way to a beach at sunset, where they plan to exchange their vows of love. Unbeknownst to them, their plans are overheard by John's father, who has been tracking them. He follows them to the beach and, in a fit of rage, confronts them. \n\nA physical altercation ensues, and in the struggle, John's father accidentally stabs Mary in the chest with his sword. The two are left in shock and disbelief as Mary dies in John's arms, her last words being a declaration of her love for him.\n\nThe tragedy of the play comes to a head when John, broken and with no hope of a future, chooses to take his own life by jumping off the cliffs into the sea below. \n\nThe play is a powerful story of love, hope, and loss set against the backdrop of 19th century England.",
'review': "\n\nThe latest production from playwright X is a powerful and heartbreaking story of love and loss set against the backdrop of 19th century England. The play follows John, a young man from a wealthy Victorian family, and Mary, a beautiful young woman with whom he falls in love. The two decide to elope and start a new life together, and the audience is taken on a journey of hope and optimism for the future.\n\nUnfortunately, their dreams are cut short when John's father discovers them and in a fit of rage, fatally stabs Mary. The tragedy of the play is further compounded when John, broken and without hope, takes his own life. The storyline is not only realistic, but also emotionally compelling, drawing the audience in from start to finish.\n\nThe acting was also commendable, with the actors delivering believable and nuanced performances. The playwright and director have successfully crafted a timeless tale of love and loss that will resonate with audiences for years to come. Highly recommended."}
```
</CodeOutputBlock>
### Memory in Sequential Chains
Sometimes you may want to pass along some context to use in each step of the chain or in a later part of the chain, but maintaining and chaining together the input/output variables can quickly get messy. Using `SimpleMemory` is a convenient way to do manage this and clean up your chains.
For example, using the previous playwright SequentialChain, lets say you wanted to include some context about date, time and location of the play, and using the generated synopsis and review, create some social media post text. You could add these new context variables as `input_variables`, or we can add a `SimpleMemory` to the chain to manage this context:
```python
from langchain.chains import SequentialChain
from langchain.memory import SimpleMemory
llm = OpenAI(temperature=.7)
template = """You are a social media manager for a theater company. Given the title of play, the era it is set in, the date,time and location, the synopsis of the play, and the review of the play, it is your job to write a social media post for that play.
Here is some context about the time and location of the play:
Date and Time: {time}
Location: {location}
Play Synopsis:
{synopsis}
Review from a New York Times play critic of the above play:
{review}
Social Media Post:
"""
prompt_template = PromptTemplate(input_variables=["synopsis", "review", "time", "location"], template=template)
social_chain = LLMChain(llm=llm, prompt=prompt_template, output_key="social_post_text")
overall_chain = SequentialChain(
memory=SimpleMemory(memories={"time": "December 25th, 8pm PST", "location": "Theater in the Park"}),
chains=[synopsis_chain, review_chain, social_chain],
input_variables=["era", "title"],
# Here we return multiple variables
output_variables=["social_post_text"],
verbose=True)
overall_chain({"title":"Tragedy at sunset on the beach", "era": "Victorian England"})
```
<CodeOutputBlock lang="python">
```
> Entering new SequentialChain chain...
> Finished chain.
{'title': 'Tragedy at sunset on the beach',
'era': 'Victorian England',
'time': 'December 25th, 8pm PST',
'location': 'Theater in the Park',
'social_post_text': "\nSpend your Christmas night with us at Theater in the Park and experience the heartbreaking story of love and loss that is 'A Walk on the Beach'. Set in Victorian England, this romantic tragedy follows the story of Frances and Edward, a young couple whose love is tragically cut short. Don't miss this emotional and thought-provoking production that is sure to leave you in tears. #AWalkOnTheBeach #LoveAndLoss #TheaterInThePark #VictorianEngland"}
```
</CodeOutputBlock>

View file

@ -1,87 +0,0 @@
#### Using `LLMChain`
The `LLMChain` is most basic building block chain. It takes in a prompt template, formats it with the user input and returns the response from an LLM.
To use the `LLMChain`, first create a prompt template.
```python
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
llm = OpenAI(temperature=0.9)
prompt = PromptTemplate(
input_variables=["product"],
template="What is a good name for a company that makes {product}?",
)
```
We can now create a very simple chain that will take user input, format the prompt with it, and then send it to the LLM.
```python
from langchain.chains import LLMChain
chain = LLMChain(llm=llm, prompt=prompt)
# Run the chain only specifying the input variable.
print(chain.run("colorful socks"))
```
<CodeOutputBlock lang="python">
```
Colorful Toes Co.
```
</CodeOutputBlock>
If there are multiple variables, you can input them all at once using a dictionary.
```python
prompt = PromptTemplate(
input_variables=["company", "product"],
template="What is a good name for {company} that makes {product}?",
)
chain = LLMChain(llm=llm, prompt=prompt)
print(chain.run({
'company': "ABC Startup",
'product': "colorful socks"
}))
```
<CodeOutputBlock lang="python">
```
Socktopia Colourful Creations.
```
</CodeOutputBlock>
You can use a chat model in an `LLMChain` as well:
```python
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
)
human_message_prompt = HumanMessagePromptTemplate(
prompt=PromptTemplate(
template="What is a good name for a company that makes {product}?",
input_variables=["product"],
)
)
chat_prompt_template = ChatPromptTemplate.from_messages([human_message_prompt])
chat = ChatOpenAI(temperature=0.9)
chain = LLMChain(llm=chat, prompt=chat_prompt_template)
print(chain.run("colorful socks"))
```
<CodeOutputBlock lang="python">
```
Rainbow Socks Co.
```
</CodeOutputBlock>

View file

@ -1,30 +0,0 @@
Setting `verbose` to `True` will print out some internal states of the `Chain` object while it is being ran.
```python
conversation = ConversationChain(
llm=chat,
memory=ConversationBufferMemory(),
verbose=True
)
conversation.run("What is ChatGPT?")
```
<CodeOutputBlock lang="python">
```
> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.
Current conversation:
Human: What is ChatGPT?
AI:
> Finished chain.
'ChatGPT is an AI language model developed by OpenAI. It is based on the GPT-3 architecture and is capable of generating human-like responses to text prompts. ChatGPT has been trained on a massive amount of text data and can understand and respond to a wide range of topics. It is often used for chatbots, virtual assistants, and other conversational AI applications.'
```
</CodeOutputBlock>

View file

@ -1,25 +0,0 @@
```python
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory
conversation = ConversationChain(
llm=chat,
memory=ConversationBufferMemory()
)
conversation.run("Answer briefly. What are the first 3 colors of a rainbow?")
# -> The first three colors of a rainbow are red, orange, and yellow.
conversation.run("And the next 4?")
# -> The next four colors of a rainbow are green, blue, indigo, and violet.
```
<CodeOutputBlock lang="python">
```
'The next four colors of a rainbow are green, blue, indigo, and violet.'
```
</CodeOutputBlock>
Essentially, `BaseMemory` defines an interface of how `langchain` stores memory. It allows reading of stored data through `load_memory_variables` method and storing new data through `save_context` method. You can learn more about it in the [Memory](/docs/modules/memory/) section.

File diff suppressed because one or more lines are too long

View file

@ -1,398 +0,0 @@
```python
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import ConversationalRetrievalChain
```
Load in documents. You can replace this with a loader for whatever type of data you want
```python
from langchain.document_loaders import TextLoader
loader = TextLoader("../../state_of_the_union.txt")
documents = loader.load()
```
If you had multiple loaders that you wanted to combine, you do something like:
```python
# loaders = [....]
# docs = []
# for loader in loaders:
# docs.extend(loader.load())
```
We now split the documents, create embeddings for them, and put them in a vectorstore. This allows us to do semantic search over them.
```python
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(documents, embeddings)
```
<CodeOutputBlock lang="python">
```
Using embedded DuckDB without persistence: data will be transient
```
</CodeOutputBlock>
We can now create a memory object, which is necessary to track the inputs/outputs and hold a conversation.
```python
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
```
We now initialize the `ConversationalRetrievalChain`
```python
qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0), vectorstore.as_retriever(), memory=memory)
```
```python
query = "What did the president say about Ketanji Brown Jackson"
result = qa({"question": query})
```
```python
result["answer"]
```
<CodeOutputBlock lang="python">
```
" The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He also said that she is a consensus builder and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans."
```
</CodeOutputBlock>
```python
query = "Did he mention who she succeeded"
result = qa({"question": query})
```
```python
result['answer']
```
<CodeOutputBlock lang="python">
```
' Ketanji Brown Jackson succeeded Justice Stephen Breyer on the United States Supreme Court.'
```
</CodeOutputBlock>
## Pass in chat history
In the above example, we used a Memory object to track chat history. We can also just pass it in explicitly. In order to do this, we need to initialize a chain without any memory object.
```python
qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0), vectorstore.as_retriever())
```
Here's an example of asking a question with no chat history
```python
chat_history = []
query = "What did the president say about Ketanji Brown Jackson"
result = qa({"question": query, "chat_history": chat_history})
```
```python
result["answer"]
```
<CodeOutputBlock lang="python">
```
" The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He also said that she is a consensus builder and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans."
```
</CodeOutputBlock>
Here's an example of asking a question with some chat history
```python
chat_history = [(query, result["answer"])]
query = "Did he mention who she succeeded"
result = qa({"question": query, "chat_history": chat_history})
```
```python
result['answer']
```
<CodeOutputBlock lang="python">
```
' Ketanji Brown Jackson succeeded Justice Stephen Breyer on the United States Supreme Court.'
```
</CodeOutputBlock>
## Using a different model for condensing the question
This chain has two steps. First, it condenses the current question and the chat history into a standalone question. This is necessary to create a standanlone vector to use for retrieval. After that, it does retrieval and then answers the question using retrieval augmented generation with a separate model. Part of the power of the declarative nature of LangChain is that you can easily use a separate language model for each call. This can be useful to use a cheaper and faster model for the simpler task of condensing the question, and then a more expensive model for answering the question. Here is an example of doing so.
```python
from langchain.chat_models import ChatOpenAI
```
```python
qa = ConversationalRetrievalChain.from_llm(
ChatOpenAI(temperature=0, model="gpt-4"),
vectorstore.as_retriever(),
condense_question_llm = ChatOpenAI(temperature=0, model='gpt-3.5-turbo'),
)
```
```python
chat_history = []
query = "What did the president say about Ketanji Brown Jackson"
result = qa({"question": query, "chat_history": chat_history})
```
```python
chat_history = [(query, result["answer"])]
query = "Did he mention who she succeeded"
result = qa({"question": query, "chat_history": chat_history})
```
## Return Source Documents
You can also easily return source documents from the ConversationalRetrievalChain. This is useful for when you want to inspect what documents were returned.
```python
qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0), vectorstore.as_retriever(), return_source_documents=True)
```
```python
chat_history = []
query = "What did the president say about Ketanji Brown Jackson"
result = qa({"question": query, "chat_history": chat_history})
```
```python
result['source_documents'][0]
```
<CodeOutputBlock lang="python">
```
Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \n\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', metadata={'source': '../../state_of_the_union.txt'})
```
</CodeOutputBlock>
## ConversationalRetrievalChain with `search_distance`
If you are using a vector store that supports filtering by search distance, you can add a threshold value parameter.
```python
vectordbkwargs = {"search_distance": 0.9}
```
```python
qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0), vectorstore.as_retriever(), return_source_documents=True)
chat_history = []
query = "What did the president say about Ketanji Brown Jackson"
result = qa({"question": query, "chat_history": chat_history, "vectordbkwargs": vectordbkwargs})
```
## ConversationalRetrievalChain with `map_reduce`
We can also use different types of combine document chains with the ConversationalRetrievalChain chain.
```python
from langchain.chains import LLMChain
from langchain.chains.question_answering import load_qa_chain
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
```
```python
llm = OpenAI(temperature=0)
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
doc_chain = load_qa_chain(llm, chain_type="map_reduce")
chain = ConversationalRetrievalChain(
retriever=vectorstore.as_retriever(),
question_generator=question_generator,
combine_docs_chain=doc_chain,
)
```
```python
chat_history = []
query = "What did the president say about Ketanji Brown Jackson"
result = chain({"question": query, "chat_history": chat_history})
```
```python
result['answer']
```
<CodeOutputBlock lang="python">
```
" The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, from a family of public school educators and police officers, a consensus builder, and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans."
```
</CodeOutputBlock>
## ConversationalRetrievalChain with Question Answering with sources
You can also use this chain with the question answering with sources chain.
```python
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
```
```python
llm = OpenAI(temperature=0)
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
doc_chain = load_qa_with_sources_chain(llm, chain_type="map_reduce")
chain = ConversationalRetrievalChain(
retriever=vectorstore.as_retriever(),
question_generator=question_generator,
combine_docs_chain=doc_chain,
)
```
```python
chat_history = []
query = "What did the president say about Ketanji Brown Jackson"
result = chain({"question": query, "chat_history": chat_history})
```
```python
result['answer']
```
<CodeOutputBlock lang="python">
```
" The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, from a family of public school educators and police officers, a consensus builder, and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \nSOURCES: ../../state_of_the_union.txt"
```
</CodeOutputBlock>
## ConversationalRetrievalChain with streaming to `stdout`
Output from the chain will be streamed to `stdout` token by token in this example.
```python
from langchain.chains.llm import LLMChain
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT, QA_PROMPT
from langchain.chains.question_answering import load_qa_chain
# Construct a ConversationalRetrievalChain with a streaming llm for combine docs
# and a separate, non-streaming llm for question generation
llm = OpenAI(temperature=0)
streaming_llm = OpenAI(streaming=True, callbacks=[StreamingStdOutCallbackHandler()], temperature=0)
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
doc_chain = load_qa_chain(streaming_llm, chain_type="stuff", prompt=QA_PROMPT)
qa = ConversationalRetrievalChain(
retriever=vectorstore.as_retriever(), combine_docs_chain=doc_chain, question_generator=question_generator)
```
```python
chat_history = []
query = "What did the president say about Ketanji Brown Jackson"
result = qa({"question": query, "chat_history": chat_history})
```
<CodeOutputBlock lang="python">
```
The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He also said that she is a consensus builder and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans.
```
</CodeOutputBlock>
```python
chat_history = [(query, result["answer"])]
query = "Did he mention who she succeeded"
result = qa({"question": query, "chat_history": chat_history})
```
<CodeOutputBlock lang="python">
```
Ketanji Brown Jackson succeeded Justice Stephen Breyer on the United States Supreme Court.
```
</CodeOutputBlock>
## get_chat_history Function
You can also specify a `get_chat_history` function, which can be used to format the chat_history string.
```python
def get_chat_history(inputs) -> str:
res = []
for human, ai in inputs:
res.append(f"Human:{human}\nAI:{ai}")
return "\n".join(res)
qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0), vectorstore.as_retriever(), get_chat_history=get_chat_history)
```
```python
chat_history = []
query = "What did the president say about Ketanji Brown Jackson"
result = qa({"question": query, "chat_history": chat_history})
```
```python
result['answer']
```
<CodeOutputBlock lang="python">
```
" The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He also said that she is a consensus builder and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans."
```
</CodeOutputBlock>

File diff suppressed because one or more lines are too long

View file

@ -1,384 +0,0 @@
## Prepare Data
First we prepare the data. For this example we create multiple documents from one long one, but these documents could be fetched in any manner (the point of this notebook to highlight what to do AFTER you fetch the documents).
```python
from langchain import OpenAI, PromptTemplate, LLMChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.mapreduce import MapReduceChain
from langchain.prompts import PromptTemplate
llm = OpenAI(temperature=0)
text_splitter = CharacterTextSplitter()
```
```python
with open("../../state_of_the_union.txt") as f:
state_of_the_union = f.read()
texts = text_splitter.split_text(state_of_the_union)
```
```python
from langchain.docstore.document import Document
docs = [Document(page_content=t) for t in texts[:3]]
```
## Quickstart
If you just want to get started as quickly as possible, this is the recommended way to do it:
```python
from langchain.chains.summarize import load_summarize_chain
```
```python
chain = load_summarize_chain(llm, chain_type="map_reduce")
chain.run(docs)
```
<CodeOutputBlock lang="python">
```
' In response to Russian aggression in Ukraine, the United States and its allies are taking action to hold Putin accountable, including economic sanctions, asset seizures, and military assistance. The US is also providing economic and humanitarian aid to Ukraine, and has passed the American Rescue Plan and the Bipartisan Infrastructure Law to help struggling families and create jobs. The US remains unified and determined to protect Ukraine and the free world.'
```
</CodeOutputBlock>
If you want more control and understanding over what is happening, please see the information below.
## The `stuff` Chain
This sections shows results of using the `stuff` Chain to do summarization.
```python
chain = load_summarize_chain(llm, chain_type="stuff")
```
```python
chain.run(docs)
```
<CodeOutputBlock lang="python">
```
' In his speech, President Biden addressed the crisis in Ukraine, the American Rescue Plan, and the Bipartisan Infrastructure Law. He discussed the need to invest in America, educate Americans, and build the economy from the bottom up. He also announced the release of 60 million barrels of oil from reserves around the world, and the creation of a dedicated task force to go after the crimes of Russian oligarchs. He concluded by emphasizing the need to Buy American and use taxpayer dollars to rebuild America.'
```
</CodeOutputBlock>
**Custom Prompts**
You can also use your own prompts with this chain. In this example, we will respond in Italian.
```python
prompt_template = """Write a concise summary of the following:
{text}
CONCISE SUMMARY IN ITALIAN:"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
chain = load_summarize_chain(llm, chain_type="stuff", prompt=PROMPT)
chain.run(docs)
```
<CodeOutputBlock lang="python">
```
"\n\nIn questa serata, il Presidente degli Stati Uniti ha annunciato una serie di misure per affrontare la crisi in Ucraina, causata dall'aggressione di Putin. Ha anche annunciato l'invio di aiuti economici, militari e umanitari all'Ucraina. Ha anche annunciato che gli Stati Uniti e i loro alleati stanno imponendo sanzioni economiche a Putin e stanno rilasciando 60 milioni di barili di petrolio dalle riserve di tutto il mondo. Inoltre, ha annunciato che il Dipartimento di Giustizia degli Stati Uniti sta creando una task force dedicata ai crimini degli oligarchi russi. Il Presidente ha anche annunciato l'approvazione della legge bipartitica sull'infrastruttura, che prevede investimenti per la ricostruzione dell'America. Questo porterà a creare posti"
```
</CodeOutputBlock>
## The `map_reduce` Chain
This sections shows results of using the `map_reduce` Chain to do summarization.
```python
chain = load_summarize_chain(llm, chain_type="map_reduce")
```
```python
chain.run(docs)
```
<CodeOutputBlock lang="python">
```
" In response to Russia's aggression in Ukraine, the United States and its allies have imposed economic sanctions and are taking other measures to hold Putin accountable. The US is also providing economic and military assistance to Ukraine, protecting NATO countries, and releasing oil from its Strategic Petroleum Reserve. President Biden and Vice President Harris have passed legislation to help struggling families and rebuild America's infrastructure."
```
</CodeOutputBlock>
**Intermediate Steps**
We can also return the intermediate steps for `map_reduce` chains, should we want to inspect them. This is done with the `return_map_steps` variable.
```python
chain = load_summarize_chain(OpenAI(temperature=0), chain_type="map_reduce", return_intermediate_steps=True)
```
```python
chain({"input_documents": docs}, return_only_outputs=True)
```
<CodeOutputBlock lang="python">
```
{'map_steps': [" In response to Russia's aggression in Ukraine, the United States has united with other freedom-loving nations to impose economic sanctions and hold Putin accountable. The U.S. Department of Justice is also assembling a task force to go after the crimes of Russian oligarchs and seize their ill-gotten gains.",
' The United States and its European allies are taking action to punish Russia for its invasion of Ukraine, including seizing assets, closing off airspace, and providing economic and military assistance to Ukraine. The US is also mobilizing forces to protect NATO countries and has released 30 million barrels of oil from its Strategic Petroleum Reserve to help blunt gas prices. The world is uniting in support of Ukraine and democracy, and the US stands with its Ukrainian-American citizens.',
" President Biden and Vice President Harris ran for office with a new economic vision for America, and have since passed the American Rescue Plan and the Bipartisan Infrastructure Law to help struggling families and rebuild America's infrastructure. This includes creating jobs, modernizing roads, airports, ports, and waterways, replacing lead pipes, providing affordable high-speed internet, and investing in American products to support American jobs."],
'output_text': " In response to Russia's aggression in Ukraine, the United States and its allies have imposed economic sanctions and are taking other measures to hold Putin accountable. The US is also providing economic and military assistance to Ukraine, protecting NATO countries, and passing legislation to help struggling families and rebuild America's infrastructure. The world is uniting in support of Ukraine and democracy, and the US stands with its Ukrainian-American citizens."}
```
</CodeOutputBlock>
**Custom Prompts**
You can also use your own prompts with this chain. In this example, we will respond in Italian.
```python
prompt_template = """Write a concise summary of the following:
{text}
CONCISE SUMMARY IN ITALIAN:"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
chain = load_summarize_chain(OpenAI(temperature=0), chain_type="map_reduce", return_intermediate_steps=True, map_prompt=PROMPT, combine_prompt=PROMPT)
chain({"input_documents": docs}, return_only_outputs=True)
```
<CodeOutputBlock lang="python">
```
{'intermediate_steps': ["\n\nQuesta sera, ci incontriamo come democratici, repubblicani e indipendenti, ma soprattutto come americani. La Russia di Putin ha cercato di scuotere le fondamenta del mondo libero, ma ha sottovalutato la forza della gente ucraina. Gli Stati Uniti e i loro alleati stanno ora imponendo sanzioni economiche a Putin e stanno tagliando l'accesso della Russia alla tecnologia. Il Dipartimento di Giustizia degli Stati Uniti sta anche creando una task force dedicata per andare dopo i crimini degli oligarchi russi.",
"\n\nStiamo unendo le nostre forze con quelle dei nostri alleati europei per sequestrare yacht, appartamenti di lusso e jet privati di Putin. Abbiamo chiuso lo spazio aereo americano ai voli russi e stiamo fornendo più di un miliardo di dollari in assistenza all'Ucraina. Abbiamo anche mobilitato le nostre forze terrestri, aeree e navali per proteggere i paesi della NATO. Abbiamo anche rilasciato 60 milioni di barili di petrolio dalle riserve di tutto il mondo, di cui 30 milioni dalla nostra riserva strategica di petrolio. Stiamo affrontando una prova reale e ci vorrà del tempo, ma alla fine Putin non riuscirà a spegnere l'amore dei popoli per la libertà.",
"\n\nIl Presidente Biden ha lottato per passare l'American Rescue Plan per aiutare le persone che soffrivano a causa della pandemia. Il piano ha fornito sollievo economico immediato a milioni di americani, ha aiutato a mettere cibo sulla loro tavola, a mantenere un tetto sopra le loro teste e a ridurre il costo dell'assicurazione sanitaria. Il piano ha anche creato più di 6,5 milioni di nuovi posti di lavoro, il più alto numero di posti di lavoro creati in un anno nella storia degli Stati Uniti. Il Presidente Biden ha anche firmato la legge bipartitica sull'infrastruttura, la più ampia iniziativa di ricostruzione della storia degli Stati Uniti. Il piano prevede di modernizzare le strade, gli aeroporti, i porti e le vie navigabili in"],
'output_text': "\n\nIl Presidente Biden sta lavorando per aiutare le persone che soffrono a causa della pandemia attraverso l'American Rescue Plan e la legge bipartitica sull'infrastruttura. Gli Stati Uniti e i loro alleati stanno anche imponendo sanzioni economiche a Putin e tagliando l'accesso della Russia alla tecnologia. Stanno anche sequestrando yacht, appartamenti di lusso e jet privati di Putin e fornendo più di un miliardo di dollari in assistenza all'Ucraina. Alla fine, Putin non riuscirà a spegnere l'amore dei popoli per la libertà."}
```
</CodeOutputBlock>
## The custom `MapReduceChain`
**Multi input prompt**
You can also use prompt with multi input. In this example, we will use a MapReduce chain to answer specific question about our code.
```python
from langchain.chains.combine_documents.map_reduce import MapReduceDocumentsChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
map_template_string = """Give the following python code information, generate a description that explains what the code does and also mention the time complexity.
Code:
{code}
Return the the description in the following format:
name of the function: description of the function
"""
reduce_template_string = """Given the following python function names and descriptions, answer the following question
{code_description}
Question: {question}
Answer:
"""
# Prompt to use in map and reduce stages
MAP_PROMPT = PromptTemplate(input_variables=["code"], template=map_template_string)
REDUCE_PROMPT = PromptTemplate(input_variables=["code_description", "question"], template=reduce_template_string)
# LLM to use in map and reduce stages
llm = OpenAI()
map_llm_chain = LLMChain(llm=llm, prompt=MAP_PROMPT)
reduce_llm_chain = LLMChain(llm=llm, prompt=REDUCE_PROMPT)
# Takes a list of documents and combines them into a single string
combine_documents_chain = StuffDocumentsChain(
llm_chain=reduce_llm_chain,
document_variable_name="code_description",
)
# Combines and iteravely reduces the mapped documents
reduce_documents_chain = ReduceDocumentsChain(
# This is final chain that is called.
combine_documents_chain=combine_documents_chain,
# If documents exceed context for `combine_documents_chain`
collapse_documents_chain=combine_documents_chain,
# The maximum number of tokens to group documents into
token_max=3000)
# Combining documents by mapping a chain over them, then combining results with reduce chain
combine_documents = MapReduceDocumentsChain(
# Map chain
llm_chain=map_llm_chain,
# Reduce chain
reduce_documents_chain=reduce_documents_chain,
# The variable name in the llm_chain to put the documents in
document_variable_name="code",
)
map_reduce = MapReduceChain(
combine_documents_chain=combine_documents,
text_splitter=CharacterTextSplitter(separator="\n##\n", chunk_size=100, chunk_overlap=0),
)
```
```python
code = """
def bubblesort(list):
for iter_num in range(len(list)-1,0,-1):
for idx in range(iter_num):
if list[idx]>list[idx+1]:
temp = list[idx]
list[idx] = list[idx+1]
list[idx+1] = temp
return list
##
def insertion_sort(InputList):
for i in range(1, len(InputList)):
j = i-1
nxt_element = InputList[i]
while (InputList[j] > nxt_element) and (j >= 0):
InputList[j+1] = InputList[j]
j=j-1
InputList[j+1] = nxt_element
return InputList
##
def shellSort(input_list):
gap = len(input_list) // 2
while gap > 0:
for i in range(gap, len(input_list)):
temp = input_list[i]
j = i
while j >= gap and input_list[j - gap] > temp:
input_list[j] = input_list[j - gap]
j = j-gap
input_list[j] = temp
gap = gap//2
return input_list
"""
```
```python
map_reduce.run(input_text=code, question="Which function has a better time complexity?")
```
<CodeOutputBlock lang="python">
```
Created a chunk of size 247, which is longer than the specified 100
Created a chunk of size 267, which is longer than the specified 100
'shellSort has a better time complexity than both bubblesort and insertion_sort, as it has a time complexity of O(n^2), while the other two have a time complexity of O(n^2).'
```
</CodeOutputBlock>
## The `refine` Chain
This sections shows results of using the `refine` Chain to do summarization.
```python
chain = load_summarize_chain(llm, chain_type="refine")
chain.run(docs)
```
<CodeOutputBlock lang="python">
```
"\n\nIn response to Russia's aggression in Ukraine, the United States has united with other freedom-loving nations to impose economic sanctions and hold Putin accountable. The U.S. Department of Justice is also assembling a task force to go after the crimes of Russian oligarchs and seize their ill-gotten gains. We are joining with our European allies to find and seize the assets of Russian oligarchs, including yachts, luxury apartments, and private jets. The U.S. is also closing off American airspace to all Russian flights, further isolating Russia and adding an additional squeeze on their economy. The U.S. and its allies are providing support to the Ukrainians in their fight for freedom, including military, economic, and humanitarian assistance. The U.S. is also mobilizing ground forces, air squadrons, and ship deployments to protect NATO countries. The U.S. and its allies are also releasing 60 million barrels of oil from reserves around the world, with the U.S. contributing 30 million barrels from its own Strategic Petroleum Reserve. In addition, the U.S. has passed the American Rescue Plan to provide immediate economic relief for tens of millions of Americans, and the Bipartisan Infrastructure Law to rebuild America and create jobs. This investment will"
```
</CodeOutputBlock>
**Intermediate Steps**
We can also return the intermediate steps for `refine` chains, should we want to inspect them. This is done with the `return_refine_steps` variable.
```python
chain = load_summarize_chain(OpenAI(temperature=0), chain_type="refine", return_intermediate_steps=True)
chain({"input_documents": docs}, return_only_outputs=True)
```
<CodeOutputBlock lang="python">
```
{'refine_steps': [" In response to Russia's aggression in Ukraine, the United States has united with other freedom-loving nations to impose economic sanctions and hold Putin accountable. The U.S. Department of Justice is also assembling a task force to go after the crimes of Russian oligarchs and seize their ill-gotten gains.",
"\n\nIn response to Russia's aggression in Ukraine, the United States has united with other freedom-loving nations to impose economic sanctions and hold Putin accountable. The U.S. Department of Justice is also assembling a task force to go after the crimes of Russian oligarchs and seize their ill-gotten gains. We are joining with our European allies to find and seize the assets of Russian oligarchs, including yachts, luxury apartments, and private jets. The U.S. is also closing off American airspace to all Russian flights, further isolating Russia and adding an additional squeeze on their economy. The U.S. and its allies are providing support to the Ukrainians in their fight for freedom, including military, economic, and humanitarian assistance. The U.S. is also mobilizing ground forces, air squadrons, and ship deployments to protect NATO countries. The U.S. and its allies are also releasing 60 million barrels of oil from reserves around the world, with the U.S. contributing 30 million barrels from its own Strategic Petroleum Reserve. Putin's war on Ukraine has left Russia weaker and the rest of the world stronger, with the world uniting in support of democracy and peace.",
"\n\nIn response to Russia's aggression in Ukraine, the United States has united with other freedom-loving nations to impose economic sanctions and hold Putin accountable. The U.S. Department of Justice is also assembling a task force to go after the crimes of Russian oligarchs and seize their ill-gotten gains. We are joining with our European allies to find and seize the assets of Russian oligarchs, including yachts, luxury apartments, and private jets. The U.S. is also closing off American airspace to all Russian flights, further isolating Russia and adding an additional squeeze on their economy. The U.S. and its allies are providing support to the Ukrainians in their fight for freedom, including military, economic, and humanitarian assistance. The U.S. is also mobilizing ground forces, air squadrons, and ship deployments to protect NATO countries. The U.S. and its allies are also releasing 60 million barrels of oil from reserves around the world, with the U.S. contributing 30 million barrels from its own Strategic Petroleum Reserve. In addition, the U.S. has passed the American Rescue Plan to provide immediate economic relief for tens of millions of Americans, and the Bipartisan Infrastructure Law to rebuild America and create jobs. This includes investing"],
'output_text': "\n\nIn response to Russia's aggression in Ukraine, the United States has united with other freedom-loving nations to impose economic sanctions and hold Putin accountable. The U.S. Department of Justice is also assembling a task force to go after the crimes of Russian oligarchs and seize their ill-gotten gains. We are joining with our European allies to find and seize the assets of Russian oligarchs, including yachts, luxury apartments, and private jets. The U.S. is also closing off American airspace to all Russian flights, further isolating Russia and adding an additional squeeze on their economy. The U.S. and its allies are providing support to the Ukrainians in their fight for freedom, including military, economic, and humanitarian assistance. The U.S. is also mobilizing ground forces, air squadrons, and ship deployments to protect NATO countries. The U.S. and its allies are also releasing 60 million barrels of oil from reserves around the world, with the U.S. contributing 30 million barrels from its own Strategic Petroleum Reserve. In addition, the U.S. has passed the American Rescue Plan to provide immediate economic relief for tens of millions of Americans, and the Bipartisan Infrastructure Law to rebuild America and create jobs. This includes investing"}
```
</CodeOutputBlock>
**Custom Prompts**
You can also use your own prompts with this chain. In this example, we will respond in Italian.
```python
prompt_template = """Write a concise summary of the following:
{text}
CONCISE SUMMARY IN ITALIAN:"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
refine_template = (
"Your job is to produce a final summary\n"
"We have provided an existing summary up to a certain point: {existing_answer}\n"
"We have the opportunity to refine the existing summary"
"(only if needed) with some more context below.\n"
"------------\n"
"{text}\n"
"------------\n"
"Given the new context, refine the original summary in Italian"
"If the context isn't useful, return the original summary."
)
refine_prompt = PromptTemplate(
input_variables=["existing_answer", "text"],
template=refine_template,
)
chain = load_summarize_chain(OpenAI(temperature=0), chain_type="refine", return_intermediate_steps=True, question_prompt=PROMPT, refine_prompt=refine_prompt)
chain({"input_documents": docs}, return_only_outputs=True)
```
<CodeOutputBlock lang="python">
```
{'intermediate_steps': ["\n\nQuesta sera, ci incontriamo come democratici, repubblicani e indipendenti, ma soprattutto come americani. La Russia di Putin ha cercato di scuotere le fondamenta del mondo libero, ma ha sottovalutato la forza della gente ucraina. Insieme ai nostri alleati, stiamo imponendo sanzioni economiche, tagliando l'accesso della Russia alla tecnologia e bloccando i suoi più grandi istituti bancari dal sistema finanziario internazionale. Il Dipartimento di Giustizia degli Stati Uniti sta anche assemblando una task force dedicata per andare dopo i crimini degli oligarchi russi.",
"\n\nQuesta sera, ci incontriamo come democratici, repubblicani e indipendenti, ma soprattutto come americani. La Russia di Putin ha cercato di scuotere le fondamenta del mondo libero, ma ha sottovalutato la forza della gente ucraina. Insieme ai nostri alleati, stiamo imponendo sanzioni economiche, tagliando l'accesso della Russia alla tecnologia, bloccando i suoi più grandi istituti bancari dal sistema finanziario internazionale e chiudendo lo spazio aereo americano a tutti i voli russi. Il Dipartimento di Giustizia degli Stati Uniti sta anche assemblando una task force dedicata per andare dopo i crimini degli oligarchi russi. Stiamo fornendo più di un miliardo di dollari in assistenza diretta all'Ucraina e fornendo assistenza militare,",
"\n\nQuesta sera, ci incontriamo come democratici, repubblicani e indipendenti, ma soprattutto come americani. La Russia di Putin ha cercato di scuotere le fondamenta del mondo libero, ma ha sottovalutato la forza della gente ucraina. Insieme ai nostri alleati, stiamo imponendo sanzioni economiche, tagliando l'accesso della Russia alla tecnologia, bloccando i suoi più grandi istituti bancari dal sistema finanziario internazionale e chiudendo lo spazio aereo americano a tutti i voli russi. Il Dipartimento di Giustizia degli Stati Uniti sta anche assemblando una task force dedicata per andare dopo i crimini degli oligarchi russi. Stiamo fornendo più di un miliardo di dollari in assistenza diretta all'Ucraina e fornendo assistenza militare."],
'output_text': "\n\nQuesta sera, ci incontriamo come democratici, repubblicani e indipendenti, ma soprattutto come americani. La Russia di Putin ha cercato di scuotere le fondamenta del mondo libero, ma ha sottovalutato la forza della gente ucraina. Insieme ai nostri alleati, stiamo imponendo sanzioni economiche, tagliando l'accesso della Russia alla tecnologia, bloccando i suoi più grandi istituti bancari dal sistema finanziario internazionale e chiudendo lo spazio aereo americano a tutti i voli russi. Il Dipartimento di Giustizia degli Stati Uniti sta anche assemblando una task force dedicata per andare dopo i crimini degli oligarchi russi. Stiamo fornendo più di un miliardo di dollari in assistenza diretta all'Ucraina e fornendo assistenza militare."}
```
</CodeOutputBlock>

View file

@ -1,119 +0,0 @@
```python
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
```
```python
loader = TextLoader("../../state_of_the_union.txt")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
docsearch = Chroma.from_documents(texts, embeddings)
qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever())
```
```python
query = "What did the president say about Ketanji Brown Jackson"
qa.run(query)
```
<CodeOutputBlock lang="python">
```
" The president said that she is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He also said that she is a consensus builder and has received a broad range of support, from the Fraternal Order of Police to former judges appointed by Democrats and Republicans."
```
</CodeOutputBlock>
## Chain Type
You can easily specify different chain types to load and use in the RetrievalQA chain. For a more detailed walkthrough of these types, please see [this notebook](/docs/modules/chains/additional/question_answering.html).
There are two ways to load different chain types. First, you can specify the chain type argument in the `from_chain_type` method. This allows you to pass in the name of the chain type you want to use. For example, in the below we change the chain type to `map_reduce`.
```python
qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="map_reduce", retriever=docsearch.as_retriever())
```
```python
query = "What did the president say about Ketanji Brown Jackson"
qa.run(query)
```
<CodeOutputBlock lang="python">
```
" The president said that Judge Ketanji Brown Jackson is one of our nation's top legal minds, a former top litigator in private practice and a former federal public defender, from a family of public school educators and police officers, a consensus builder and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans."
```
</CodeOutputBlock>
The above way allows you to really simply change the chain_type, but it doesn't provide a ton of flexibility over parameters to that chain type. If you want to control those parameters, you can load the chain directly (as you did in [this notebook](/docs/modules/chains/additional/question_answering.html)) and then pass that directly to the the RetrievalQA chain with the `combine_documents_chain` parameter. For example:
```python
from langchain.chains.question_answering import load_qa_chain
qa_chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff")
qa = RetrievalQA(combine_documents_chain=qa_chain, retriever=docsearch.as_retriever())
```
```python
query = "What did the president say about Ketanji Brown Jackson"
qa.run(query)
```
<CodeOutputBlock lang="python">
```
" The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He also said that she is a consensus builder and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans."
```
</CodeOutputBlock>
## Custom Prompts
You can pass in custom prompts to do question answering. These prompts are the same prompts as you can pass into the [base question answering chain](/docs/modules/chains/additional/question_answering.html)
```python
from langchain.prompts import PromptTemplate
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
{context}
Question: {question}
Answer in Italian:"""
PROMPT = PromptTemplate(
template=prompt_template, input_variables=["context", "question"]
)
```
```python
chain_type_kwargs = {"prompt": PROMPT}
qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever(), chain_type_kwargs=chain_type_kwargs)
```
```python
query = "What did the president say about Ketanji Brown Jackson"
qa.run(query)
```
<CodeOutputBlock lang="python">
```
" Il presidente ha detto che Ketanji Brown Jackson è una delle menti legali più importanti del paese, che continuerà l'eccellenza di Justice Breyer e che ha ricevuto un ampio sostegno, da Fraternal Order of Police a ex giudici nominati da democratici e repubblicani."
```
</CodeOutputBlock>

View file

@ -1,68 +0,0 @@
## Return Source Documents
Additionally, we can return the source documents used to answer the question by specifying an optional parameter when constructing the chain.
```python
qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever(), return_source_documents=True)
```
```python
query = "What did the president say about Ketanji Brown Jackson"
result = qa({"query": query})
```
```python
result["result"]
```
<CodeOutputBlock lang="python">
```
" The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice and a former federal public defender from a family of public school educators and police officers, and that she has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans."
```
</CodeOutputBlock>
```python
result["source_documents"]
```
<CodeOutputBlock lang="python">
```
[Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \n\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', lookup_str='', metadata={'source': '../../state_of_the_union.txt'}, lookup_index=0),
Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \n\nWe can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling. \n\nWeve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \n\nWere putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \n\nWere securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', lookup_str='', metadata={'source': '../../state_of_the_union.txt'}, lookup_index=0),
Document(page_content='And for our LGBTQ+ Americans, lets finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \n\nAs I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \n\nWhile it often appears that we never agree, that isnt true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \n\nAnd soon, well strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \n\nSo tonight Im offering a Unity Agenda for the Nation. Four big things we can do together. \n\nFirst, beat the opioid epidemic.', lookup_str='', metadata={'source': '../../state_of_the_union.txt'}, lookup_index=0),
Document(page_content='Tonight, Im announcing a crackdown on these companies overcharging American businesses and consumers. \n\nAnd as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up. \n\nThat ends on my watch. \n\nMedicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect. \n\nWell also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees. \n\nLets pass the Paycheck Fairness Act and paid leave. \n\nRaise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. \n\nLets increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls Americas best-kept secret: community colleges.', lookup_str='', metadata={'source': '../../state_of_the_union.txt'}, lookup_index=0)]
```
</CodeOutputBlock>
Alternatively, if our document have a "source" metadata key, we can use the `RetrievalQAWithSourceChain` to cite our sources:
```python
docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": f"{i}-pl"} for i in range(len(texts))])
```
```python
from langchain.chains import RetrievalQAWithSourcesChain
from langchain import OpenAI
chain = RetrievalQAWithSourcesChain.from_chain_type(OpenAI(temperature=0), chain_type="stuff", retriever=docsearch.as_retriever())
```
```python
chain({"question": "What did the president say about Justice Breyer"}, return_only_outputs=True)
```
<CodeOutputBlock lang="python">
```
{'answer': ' The president honored Justice Breyer for his service and mentioned his legacy of excellence.\n',
'sources': '31-pl'}
```
</CodeOutputBlock>

View file

@ -1,18 +0,0 @@
The simplest loader reads in a file as text and places it all into one Document.
```python
from langchain.document_loaders import TextLoader
loader = TextLoader("./index.md")
loader.load()
```
<CodeOutputBlock language="python">
```
[
Document(page_content='---\nsidebar_position: 0\n---\n# Document loaders\n\nUse document loaders to load data from a source as `Document`\'s. A `Document` is a piece of text\nand associated metadata. For example, there are document loaders for loading a simple `.txt` file, for loading the text\ncontents of any web page, or even for loading a transcript of a YouTube video.\n\nEvery document loader exposes two methods:\n1. "Load": load documents from the configured source\n2. "Load and split": load documents from the configured source and split them using the passed in text splitter\n\nThey optionally implement:\n\n3. "Lazy load": load documents into memory lazily\n', metadata={'source': '../docs/docs_skeleton/docs/modules/data_connection/document_loaders/index.md'})
]
```
</CodeOutputBlock>

File diff suppressed because one or more lines are too long

View file

@ -1,277 +0,0 @@
Under the hood, by default this uses the [UnstructuredLoader](/docs/integrations/document_loaders/unstructured_file.html)
```python
from langchain.document_loaders import DirectoryLoader
```
We can use the `glob` parameter to control which files to load. Note that here it doesn't load the `.rst` file or the `.html` files.
```python
loader = DirectoryLoader('../', glob="**/*.md")
```
```python
docs = loader.load()
```
```python
len(docs)
```
<CodeOutputBlock lang="python">
```
1
```
</CodeOutputBlock>
## Show a progress bar
By default a progress bar will not be shown. To show a progress bar, install the `tqdm` library (e.g. `pip install tqdm`), and set the `show_progress` parameter to `True`.
```python
loader = DirectoryLoader('../', glob="**/*.md", show_progress=True)
docs = loader.load()
```
<CodeOutputBlock lang="python">
```
Requirement already satisfied: tqdm in /Users/jon/.pyenv/versions/3.9.16/envs/microbiome-app/lib/python3.9/site-packages (4.65.0)
0it [00:00, ?it/s]
```
</CodeOutputBlock>
## Use multithreading
By default the loading happens in one thread. In order to utilize several threads set the `use_multithreading` flag to true.
```python
loader = DirectoryLoader('../', glob="**/*.md", use_multithreading=True)
docs = loader.load()
```
## Change loader class
By default this uses the `UnstructuredLoader` class. However, you can change up the type of loader pretty easily.
```python
from langchain.document_loaders import TextLoader
```
```python
loader = DirectoryLoader('../', glob="**/*.md", loader_cls=TextLoader)
```
```python
docs = loader.load()
```
```python
len(docs)
```
<CodeOutputBlock lang="python">
```
1
```
</CodeOutputBlock>
If you need to load Python source code files, use the `PythonLoader`.
```python
from langchain.document_loaders import PythonLoader
```
```python
loader = DirectoryLoader('../../../../../', glob="**/*.py", loader_cls=PythonLoader)
```
```python
docs = loader.load()
```
```python
len(docs)
```
<CodeOutputBlock lang="python">
```
691
```
</CodeOutputBlock>
## Auto detect file encodings with TextLoader
In this example we will see some strategies that can be useful when loading a big list of arbitrary files from a directory using the `TextLoader` class.
First to illustrate the problem, let's try to load multiple text with arbitrary encodings.
```python
path = '../../../../../tests/integration_tests/examples'
loader = DirectoryLoader(path, glob="**/*.txt", loader_cls=TextLoader)
```
### A. Default Behavior
```python
loader.load()
```
<HTMLOutputBlock center>
```html
<pre style="white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace"><span style="color: #800000; text-decoration-color: #800000">╭─────────────────────────────── </span><span style="color: #800000; text-decoration-color: #800000; font-weight: bold">Traceback </span><span style="color: #bf7f7f; text-decoration-color: #bf7f7f; font-weight: bold">(most recent call last)</span><span style="color: #800000; text-decoration-color: #800000"> ────────────────────────────────╮</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #bfbf7f; text-decoration-color: #bfbf7f">/data/source/langchain/langchain/document_loaders/</span><span style="color: #808000; text-decoration-color: #808000; font-weight: bold">text.py</span>:<span style="color: #0000ff; text-decoration-color: #0000ff">29</span> in <span style="color: #00ff00; text-decoration-color: #00ff00">load</span> <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">26 </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ </span>text = <span style="color: #808000; text-decoration-color: #808000">""</span> <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">27 </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ </span><span style="color: #0000ff; text-decoration-color: #0000ff">with</span> <span style="color: #00ffff; text-decoration-color: #00ffff">open</span>(<span style="color: #00ffff; text-decoration-color: #00ffff">self</span>.file_path, encoding=<span style="color: #00ffff; text-decoration-color: #00ffff">self</span>.encoding) <span style="color: #0000ff; text-decoration-color: #0000ff">as</span> f: <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">28 </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ │ </span><span style="color: #0000ff; text-decoration-color: #0000ff">try</span>: <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #800000; text-decoration-color: #800000">❱ </span>29 <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ │ │ </span>text = f.read() <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">30 </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ │ </span><span style="color: #0000ff; text-decoration-color: #0000ff">except</span> <span style="color: #00ffff; text-decoration-color: #00ffff">UnicodeDecodeError</span> <span style="color: #0000ff; text-decoration-color: #0000ff">as</span> e: <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">31 </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ │ │ </span><span style="color: #0000ff; text-decoration-color: #0000ff">if</span> <span style="color: #00ffff; text-decoration-color: #00ffff">self</span>.autodetect_encoding: <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">32 </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ │ │ │ </span>detected_encodings = <span style="color: #00ffff; text-decoration-color: #00ffff">self</span>.detect_file_encodings() <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #bfbf7f; text-decoration-color: #bfbf7f">/home/spike/.pyenv/versions/3.9.11/lib/python3.9/</span><span style="color: #808000; text-decoration-color: #808000; font-weight: bold">codecs.py</span>:<span style="color: #0000ff; text-decoration-color: #0000ff">322</span> in <span style="color: #00ff00; text-decoration-color: #00ff00">decode</span> <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f"> 319 </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ </span><span style="color: #0000ff; text-decoration-color: #0000ff">def</span> <span style="color: #00ff00; text-decoration-color: #00ff00">decode</span>(<span style="color: #00ffff; text-decoration-color: #00ffff">self</span>, <span style="color: #00ffff; text-decoration-color: #00ffff">input</span>, final=<span style="color: #0000ff; text-decoration-color: #0000ff">False</span>): <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f"> 320 </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f"># decode input (taking the buffer into account)</span> <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f"> 321 </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ </span>data = <span style="color: #00ffff; text-decoration-color: #00ffff">self</span>.buffer + <span style="color: #00ffff; text-decoration-color: #00ffff">input</span> <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #800000; text-decoration-color: #800000">❱ </span> 322 <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ </span>(result, consumed) = <span style="color: #00ffff; text-decoration-color: #00ffff">self</span>._buffer_decode(data, <span style="color: #00ffff; text-decoration-color: #00ffff">self</span>.errors, final) <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f"> 323 </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f"># keep undecoded input until the next call</span> <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f"> 324 </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ </span><span style="color: #00ffff; text-decoration-color: #00ffff">self</span>.buffer = data[consumed:] <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f"> 325 </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ </span><span style="color: #0000ff; text-decoration-color: #0000ff">return</span> result <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">╰──────────────────────────────────────────────────────────────────────────────────────────────────╯</span>
<span style="color: #ff0000; text-decoration-color: #ff0000; font-weight: bold">UnicodeDecodeError: </span><span style="color: #008000; text-decoration-color: #008000">'utf-8'</span> codec can't decode byte <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">0xca</span> in position <span style="color: #008080; text-decoration-color: #008080; font-weight: bold">0</span>: invalid continuation byte
<span style="font-style: italic">The above exception was the direct cause of the following exception:</span>
<span style="color: #800000; text-decoration-color: #800000">╭─────────────────────────────── </span><span style="color: #800000; text-decoration-color: #800000; font-weight: bold">Traceback </span><span style="color: #bf7f7f; text-decoration-color: #bf7f7f; font-weight: bold">(most recent call last)</span><span style="color: #800000; text-decoration-color: #800000"> ────────────────────────────────╮</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> in <span style="color: #00ff00; text-decoration-color: #00ff00">&lt;module&gt;</span>:<span style="color: #0000ff; text-decoration-color: #0000ff">1</span> <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #800000; text-decoration-color: #800000">❱ </span>1 loader.load() <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">2 </span> <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #bfbf7f; text-decoration-color: #bfbf7f">/data/source/langchain/langchain/document_loaders/</span><span style="color: #808000; text-decoration-color: #808000; font-weight: bold">directory.py</span>:<span style="color: #0000ff; text-decoration-color: #0000ff">84</span> in <span style="color: #00ff00; text-decoration-color: #00ff00">load</span> <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">81 </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ │ │ │ │ </span><span style="color: #0000ff; text-decoration-color: #0000ff">if</span> <span style="color: #00ffff; text-decoration-color: #00ffff">self</span>.silent_errors: <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">82 </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ │ │ │ │ │ </span>logger.warning(e) <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">83 </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ │ │ │ │ </span><span style="color: #0000ff; text-decoration-color: #0000ff">else</span>: <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #800000; text-decoration-color: #800000">❱ </span>84 <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ │ │ │ │ │ </span><span style="color: #0000ff; text-decoration-color: #0000ff">raise</span> e <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">85 </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ │ │ │ </span><span style="color: #0000ff; text-decoration-color: #0000ff">finally</span>: <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">86 </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ │ │ │ │ </span><span style="color: #0000ff; text-decoration-color: #0000ff">if</span> pbar: <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">87 </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ │ │ │ │ │ </span>pbar.update(<span style="color: #0000ff; text-decoration-color: #0000ff">1</span>) <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #bfbf7f; text-decoration-color: #bfbf7f">/data/source/langchain/langchain/document_loaders/</span><span style="color: #808000; text-decoration-color: #808000; font-weight: bold">directory.py</span>:<span style="color: #0000ff; text-decoration-color: #0000ff">78</span> in <span style="color: #00ff00; text-decoration-color: #00ff00">load</span> <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">75 </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ │ </span><span style="color: #0000ff; text-decoration-color: #0000ff">if</span> i.is_file(): <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">76 </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ │ │ </span><span style="color: #0000ff; text-decoration-color: #0000ff">if</span> _is_visible(i.relative_to(p)) <span style="color: #ff00ff; text-decoration-color: #ff00ff">or</span> <span style="color: #00ffff; text-decoration-color: #00ffff">self</span>.load_hidden: <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">77 </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ │ │ │ </span><span style="color: #0000ff; text-decoration-color: #0000ff">try</span>: <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #800000; text-decoration-color: #800000">❱ </span>78 <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ │ │ │ │ </span>sub_docs = <span style="color: #00ffff; text-decoration-color: #00ffff">self</span>.loader_cls(<span style="color: #00ffff; text-decoration-color: #00ffff">str</span>(i), **<span style="color: #00ffff; text-decoration-color: #00ffff">self</span>.loader_kwargs).load() <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">79 </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ │ │ │ │ </span>docs.extend(sub_docs) <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">80 </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ │ │ │ </span><span style="color: #0000ff; text-decoration-color: #0000ff">except</span> <span style="color: #00ffff; text-decoration-color: #00ffff">Exception</span> <span style="color: #0000ff; text-decoration-color: #0000ff">as</span> e: <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">81 </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ │ │ │ │ </span><span style="color: #0000ff; text-decoration-color: #0000ff">if</span> <span style="color: #00ffff; text-decoration-color: #00ffff">self</span>.silent_errors: <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #bfbf7f; text-decoration-color: #bfbf7f">/data/source/langchain/langchain/document_loaders/</span><span style="color: #808000; text-decoration-color: #808000; font-weight: bold">text.py</span>:<span style="color: #0000ff; text-decoration-color: #0000ff">44</span> in <span style="color: #00ff00; text-decoration-color: #00ff00">load</span> <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">41 </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ │ │ │ │ </span><span style="color: #0000ff; text-decoration-color: #0000ff">except</span> <span style="color: #00ffff; text-decoration-color: #00ffff">UnicodeDecodeError</span>: <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">42 </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ │ │ │ │ │ </span><span style="color: #0000ff; text-decoration-color: #0000ff">continue</span> <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">43 </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ │ │ </span><span style="color: #0000ff; text-decoration-color: #0000ff">else</span>: <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #800000; text-decoration-color: #800000">❱ </span>44 <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ │ │ │ </span><span style="color: #0000ff; text-decoration-color: #0000ff">raise</span> <span style="color: #00ffff; text-decoration-color: #00ffff">RuntimeError</span>(<span style="color: #808000; text-decoration-color: #808000">f"Error loading {</span><span style="color: #00ffff; text-decoration-color: #00ffff">self</span>.file_path<span style="color: #808000; text-decoration-color: #808000">}"</span>) <span style="color: #0000ff; text-decoration-color: #0000ff">from</span> <span style="color: #00ffff; text-decoration-color: #00ffff; text-decoration: underline">e</span> <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">45 </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ │ </span><span style="color: #0000ff; text-decoration-color: #0000ff">except</span> <span style="color: #00ffff; text-decoration-color: #00ffff">Exception</span> <span style="color: #0000ff; text-decoration-color: #0000ff">as</span> e: <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">46 </span><span style="color: #7f7f7f; text-decoration-color: #7f7f7f">│ │ │ │ </span><span style="color: #0000ff; text-decoration-color: #0000ff">raise</span> <span style="color: #00ffff; text-decoration-color: #00ffff">RuntimeError</span>(<span style="color: #808000; text-decoration-color: #808000">f"Error loading {</span><span style="color: #00ffff; text-decoration-color: #00ffff">self</span>.file_path<span style="color: #808000; text-decoration-color: #808000">}"</span>) <span style="color: #0000ff; text-decoration-color: #0000ff">from</span> <span style="color: #00ffff; text-decoration-color: #00ffff; text-decoration: underline">e</span> <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">│</span> <span style="color: #7f7f7f; text-decoration-color: #7f7f7f">47 </span> <span style="color: #800000; text-decoration-color: #800000">│</span>
<span style="color: #800000; text-decoration-color: #800000">╰──────────────────────────────────────────────────────────────────────────────────────────────────╯</span>
<span style="color: #ff0000; text-decoration-color: #ff0000; font-weight: bold">RuntimeError: </span>Error loading ..<span style="color: #800080; text-decoration-color: #800080">/../../../../tests/integration_tests/examples/</span><span style="color: #ff00ff; text-decoration-color: #ff00ff">example-non-utf8.txt</span>
</pre>
```
</HTMLOutputBlock>
The file `example-non-utf8.txt` uses a different encoding the `load()` function fails with a helpful message indicating which file failed decoding.
With the default behavior of `TextLoader` any failure to load any of the documents will fail the whole loading process and no documents are loaded.
### B. Silent fail
We can pass the parameter `silent_errors` to the `DirectoryLoader` to skip the files which could not be loaded and continue the load process.
```python
loader = DirectoryLoader(path, glob="**/*.txt", loader_cls=TextLoader, silent_errors=True)
docs = loader.load()
```
<CodeOutputBlock lang="python">
```
Error loading ../../../../../tests/integration_tests/examples/example-non-utf8.txt
```
</CodeOutputBlock>
```python
doc_sources = [doc.metadata['source'] for doc in docs]
doc_sources
```
<CodeOutputBlock lang="python">
```
['../../../../../tests/integration_tests/examples/whatsapp_chat.txt',
'../../../../../tests/integration_tests/examples/example-utf8.txt']
```
</CodeOutputBlock>
### C. Auto detect encodings
We can also ask `TextLoader` to auto detect the file encoding before failing, by passing the `autodetect_encoding` to the loader class.
```python
text_loader_kwargs={'autodetect_encoding': True}
loader = DirectoryLoader(path, glob="**/*.txt", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs)
docs = loader.load()
```
```python
doc_sources = [doc.metadata['source'] for doc in docs]
doc_sources
```
<CodeOutputBlock lang="python">
```
['../../../../../tests/integration_tests/examples/example-non-utf8.txt',
'../../../../../tests/integration_tests/examples/whatsapp_chat.txt',
'../../../../../tests/integration_tests/examples/example-utf8.txt']
```
</CodeOutputBlock>

View file

@ -1,50 +0,0 @@
```python
from langchain.document_loaders import UnstructuredHTMLLoader
```
```python
loader = UnstructuredHTMLLoader("example_data/fake-content.html")
```
```python
data = loader.load()
```
```python
data
```
<CodeOutputBlock lang="python">
```
[Document(page_content='My First Heading\n\nMy first paragraph.', lookup_str='', metadata={'source': 'example_data/fake-content.html'}, lookup_index=0)]
```
</CodeOutputBlock>
## Loading HTML with BeautifulSoup4
We can also use `BeautifulSoup4` to load HTML documents using the `BSHTMLLoader`. This will extract the text from the HTML into `page_content`, and the page title as `title` into `metadata`.
```python
from langchain.document_loaders import BSHTMLLoader
```
```python
loader = BSHTMLLoader("example_data/fake-content.html")
data = loader.load()
data
```
<CodeOutputBlock lang="python">
```
[Document(page_content='\n\nTest Title\n\n\nMy First Heading\nMy first paragraph.\n\n\n', metadata={'source': 'example_data/fake-content.html', 'title': 'Test Title'})]
```
</CodeOutputBlock>

View file

@ -1,333 +0,0 @@
>The `JSONLoader` uses a specified [jq schema](https://en.wikipedia.org/wiki/Jq_(programming_language)) to parse the JSON files. It uses the `jq` python package.
Check this [manual](https://stedolan.github.io/jq/manual/#Basicfilters) for a detailed documentation of the `jq` syntax.
```python
#!pip install jq
```
```python
from langchain.document_loaders import JSONLoader
```
```python
import json
from pathlib import Path
from pprint import pprint
file_path='./example_data/facebook_chat.json'
data = json.loads(Path(file_path).read_text())
```
```python
pprint(data)
```
<CodeOutputBlock lang="python">
```
{'image': {'creation_timestamp': 1675549016, 'uri': 'image_of_the_chat.jpg'},
'is_still_participant': True,
'joinable_mode': {'link': '', 'mode': 1},
'magic_words': [],
'messages': [{'content': 'Bye!',
'sender_name': 'User 2',
'timestamp_ms': 1675597571851},
{'content': 'Oh no worries! Bye',
'sender_name': 'User 1',
'timestamp_ms': 1675597435669},
{'content': 'No Im sorry it was my mistake, the blue one is not '
'for sale',
'sender_name': 'User 2',
'timestamp_ms': 1675596277579},
{'content': 'I thought you were selling the blue one!',
'sender_name': 'User 1',
'timestamp_ms': 1675595140251},
{'content': 'Im not interested in this bag. Im interested in the '
'blue one!',
'sender_name': 'User 1',
'timestamp_ms': 1675595109305},
{'content': 'Here is $129',
'sender_name': 'User 2',
'timestamp_ms': 1675595068468},
{'photos': [{'creation_timestamp': 1675595059,
'uri': 'url_of_some_picture.jpg'}],
'sender_name': 'User 2',
'timestamp_ms': 1675595060730},
{'content': 'Online is at least $100',
'sender_name': 'User 2',
'timestamp_ms': 1675595045152},
{'content': 'How much do you want?',
'sender_name': 'User 1',
'timestamp_ms': 1675594799696},
{'content': 'Goodmorning! $50 is too low.',
'sender_name': 'User 2',
'timestamp_ms': 1675577876645},
{'content': 'Hi! Im interested in your bag. Im offering $50. Let '
'me know if you are interested. Thanks!',
'sender_name': 'User 1',
'timestamp_ms': 1675549022673}],
'participants': [{'name': 'User 1'}, {'name': 'User 2'}],
'thread_path': 'inbox/User 1 and User 2 chat',
'title': 'User 1 and User 2 chat'}
```
</CodeOutputBlock>
## Using `JSONLoader`
Suppose we are interested in extracting the values under the `content` field within the `messages` key of the JSON data. This can easily be done through the `JSONLoader` as shown below.
### JSON file
```python
loader = JSONLoader(
file_path='./example_data/facebook_chat.json',
jq_schema='.messages[].content')
data = loader.load()
```
```python
pprint(data)
```
<CodeOutputBlock lang="python">
```
[Document(page_content='Bye!', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 1}),
Document(page_content='Oh no worries! Bye', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 2}),
Document(page_content='No Im sorry it was my mistake, the blue one is not for sale', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 3}),
Document(page_content='I thought you were selling the blue one!', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 4}),
Document(page_content='Im not interested in this bag. Im interested in the blue one!', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 5}),
Document(page_content='Here is $129', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 6}),
Document(page_content='', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 7}),
Document(page_content='Online is at least $100', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 8}),
Document(page_content='How much do you want?', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 9}),
Document(page_content='Goodmorning! $50 is too low.', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 10}),
Document(page_content='Hi! Im interested in your bag. Im offering $50. Let me know if you are interested. Thanks!', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 11})]
```
</CodeOutputBlock>
### JSON Lines file
If you want to load documents from a JSON Lines file, you pass `json_lines=True`
and specify `jq_schema` to extract `page_content` from a single JSON object.
```python
file_path = './example_data/facebook_chat_messages.jsonl'
pprint(Path(file_path).read_text())
```
<CodeOutputBlock lang="python">
```
('{"sender_name": "User 2", "timestamp_ms": 1675597571851, "content": "Bye!"}\n'
'{"sender_name": "User 1", "timestamp_ms": 1675597435669, "content": "Oh no '
'worries! Bye"}\n'
'{"sender_name": "User 2", "timestamp_ms": 1675596277579, "content": "No Im '
'sorry it was my mistake, the blue one is not for sale"}\n')
```
</CodeOutputBlock>
```python
loader = JSONLoader(
file_path='./example_data/facebook_chat_messages.jsonl',
jq_schema='.content',
json_lines=True)
data = loader.load()
```
```python
pprint(data)
```
<CodeOutputBlock lang="python">
```
[Document(page_content='Bye!', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat_messages.jsonl', 'seq_num': 1}),
Document(page_content='Oh no worries! Bye', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat_messages.jsonl', 'seq_num': 2}),
Document(page_content='No Im sorry it was my mistake, the blue one is not for sale', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat_messages.jsonl', 'seq_num': 3})]
```
</CodeOutputBlock>
Another option is set `jq_schema='.'` and provide `content_key`:
```python
loader = JSONLoader(
file_path='./example_data/facebook_chat_messages.jsonl',
jq_schema='.',
content_key='sender_name',
json_lines=True)
data = loader.load()
```
```python
pprint(data)
```
<CodeOutputBlock lang="python">
```
[Document(page_content='User 2', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat_messages.jsonl', 'seq_num': 1}),
Document(page_content='User 1', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat_messages.jsonl', 'seq_num': 2}),
Document(page_content='User 2', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat_messages.jsonl', 'seq_num': 3})]
```
</CodeOutputBlock>
## Extracting metadata
Generally, we want to include metadata available in the JSON file into the documents that we create from the content.
The following demonstrates how metadata can be extracted using the `JSONLoader`.
There are some key changes to be noted. In the previous example where we didn't collect the metadata, we managed to directly specify in the schema where the value for the `page_content` can be extracted from.
```
.messages[].content
```
In the current example, we have to tell the loader to iterate over the records in the `messages` field. The jq_schema then has to be:
```
.messages[]
```
This allows us to pass the records (dict) into the `metadata_func` that has to be implemented. The `metadata_func` is responsible for identifying which pieces of information in the record should be included in the metadata stored in the final `Document` object.
Additionally, we now have to explicitly specify in the loader, via the `content_key` argument, the key from the record where the value for the `page_content` needs to be extracted from.
```python
# Define the metadata extraction function.
def metadata_func(record: dict, metadata: dict) -> dict:
metadata["sender_name"] = record.get("sender_name")
metadata["timestamp_ms"] = record.get("timestamp_ms")
return metadata
loader = JSONLoader(
file_path='./example_data/facebook_chat.json',
jq_schema='.messages[]',
content_key="content",
metadata_func=metadata_func
)
data = loader.load()
```
```python
pprint(data)
```
<CodeOutputBlock lang="python">
```
[Document(page_content='Bye!', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 1, 'sender_name': 'User 2', 'timestamp_ms': 1675597571851}),
Document(page_content='Oh no worries! Bye', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 2, 'sender_name': 'User 1', 'timestamp_ms': 1675597435669}),
Document(page_content='No Im sorry it was my mistake, the blue one is not for sale', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 3, 'sender_name': 'User 2', 'timestamp_ms': 1675596277579}),
Document(page_content='I thought you were selling the blue one!', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 4, 'sender_name': 'User 1', 'timestamp_ms': 1675595140251}),
Document(page_content='Im not interested in this bag. Im interested in the blue one!', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 5, 'sender_name': 'User 1', 'timestamp_ms': 1675595109305}),
Document(page_content='Here is $129', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 6, 'sender_name': 'User 2', 'timestamp_ms': 1675595068468}),
Document(page_content='', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 7, 'sender_name': 'User 2', 'timestamp_ms': 1675595060730}),
Document(page_content='Online is at least $100', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 8, 'sender_name': 'User 2', 'timestamp_ms': 1675595045152}),
Document(page_content='How much do you want?', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 9, 'sender_name': 'User 1', 'timestamp_ms': 1675594799696}),
Document(page_content='Goodmorning! $50 is too low.', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 10, 'sender_name': 'User 2', 'timestamp_ms': 1675577876645}),
Document(page_content='Hi! Im interested in your bag. Im offering $50. Let me know if you are interested. Thanks!', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 11, 'sender_name': 'User 1', 'timestamp_ms': 1675549022673})]
```
</CodeOutputBlock>
Now, you will see that the documents contain the metadata associated with the content we extracted.
## The `metadata_func`
As shown above, the `metadata_func` accepts the default metadata generated by the `JSONLoader`. This allows full control to the user with respect to how the metadata is formatted.
For example, the default metadata contains the `source` and the `seq_num` keys. However, it is possible that the JSON data contain these keys as well. The user can then exploit the `metadata_func` to rename the default keys and use the ones from the JSON data.
The example below shows how we can modify the `source` to only contain information of the file source relative to the `langchain` directory.
```python
# Define the metadata extraction function.
def metadata_func(record: dict, metadata: dict) -> dict:
metadata["sender_name"] = record.get("sender_name")
metadata["timestamp_ms"] = record.get("timestamp_ms")
if "source" in metadata:
source = metadata["source"].split("/")
source = source[source.index("langchain"):]
metadata["source"] = "/".join(source)
return metadata
loader = JSONLoader(
file_path='./example_data/facebook_chat.json',
jq_schema='.messages[]',
content_key="content",
metadata_func=metadata_func
)
data = loader.load()
```
```python
pprint(data)
```
<CodeOutputBlock lang="python">
```
[Document(page_content='Bye!', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 1, 'sender_name': 'User 2', 'timestamp_ms': 1675597571851}),
Document(page_content='Oh no worries! Bye', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 2, 'sender_name': 'User 1', 'timestamp_ms': 1675597435669}),
Document(page_content='No Im sorry it was my mistake, the blue one is not for sale', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 3, 'sender_name': 'User 2', 'timestamp_ms': 1675596277579}),
Document(page_content='I thought you were selling the blue one!', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 4, 'sender_name': 'User 1', 'timestamp_ms': 1675595140251}),
Document(page_content='Im not interested in this bag. Im interested in the blue one!', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 5, 'sender_name': 'User 1', 'timestamp_ms': 1675595109305}),
Document(page_content='Here is $129', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 6, 'sender_name': 'User 2', 'timestamp_ms': 1675595068468}),
Document(page_content='', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 7, 'sender_name': 'User 2', 'timestamp_ms': 1675595060730}),
Document(page_content='Online is at least $100', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 8, 'sender_name': 'User 2', 'timestamp_ms': 1675595045152}),
Document(page_content='How much do you want?', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 9, 'sender_name': 'User 1', 'timestamp_ms': 1675594799696}),
Document(page_content='Goodmorning! $50 is too low.', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 10, 'sender_name': 'User 2', 'timestamp_ms': 1675577876645}),
Document(page_content='Hi! Im interested in your bag. Im offering $50. Let me know if you are interested. Thanks!', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 11, 'sender_name': 'User 1', 'timestamp_ms': 1675549022673})]
```
</CodeOutputBlock>
## Common JSON structures with jq schema
The list below provides a reference to the possible `jq_schema` the user can use to extract content from the JSON data depending on the structure.
```
JSON -> [{"text": ...}, {"text": ...}, {"text": ...}]
jq_schema -> ".[].text"
JSON -> {"key": [{"text": ...}, {"text": ...}, {"text": ...}]}
jq_schema -> ".key[].text"
JSON -> ["...", "...", "..."]
jq_schema -> ".[]"
```

View file

@ -1,59 +0,0 @@
```python
# !pip install unstructured > /dev/null
```
```python
from langchain.document_loaders import UnstructuredMarkdownLoader
```
```python
markdown_path = "../../../../../README.md"
loader = UnstructuredMarkdownLoader(markdown_path)
```
```python
data = loader.load()
```
```python
data
```
<CodeOutputBlock lang="python">
```
[Document(page_content="ð\x9f¦\x9cï¸\x8fð\x9f”\x97 LangChain\n\nâ\x9a¡ Building applications with LLMs through composability â\x9a¡\n\nLooking for the JS/TS version? Check out LangChain.js.\n\nProduction Support: As you move your LangChains into production, we'd love to offer more comprehensive support.\nPlease fill out this form and we'll set up a dedicated support Slack channel.\n\nQuick Install\n\npip install langchain\nor\nconda install langchain -c conda-forge\n\nð\x9f¤” What is this?\n\nLarge language models (LLMs) are emerging as a transformative technology, enabling developers to build applications that they previously could not. However, using these LLMs in isolation is often insufficient for creating a truly powerful app - the real power comes when you can combine them with other sources of computation or knowledge.\n\nThis library aims to assist in the development of those types of applications. Common examples of these applications include:\n\nâ\x9d“ Question Answering over specific documents\n\nDocumentation\n\nEnd-to-end Example: Question Answering over Notion Database\n\nð\x9f¬ Chatbots\n\nDocumentation\n\nEnd-to-end Example: Chat-LangChain\n\nð\x9f¤\x96 Agents\n\nDocumentation\n\nEnd-to-end Example: GPT+WolframAlpha\n\nð\x9f“\x96 Documentation\n\nPlease see here for full documentation on:\n\nGetting started (installation, setting up the environment, simple examples)\n\nHow-To examples (demos, integrations, helper functions)\n\nReference (full API docs)\n\nResources (high-level explanation of core concepts)\n\nð\x9f\x9a\x80 What can this help with?\n\nThere are six main areas that LangChain is designed to help with.\nThese are, in increasing order of complexity:\n\nð\x9f“\x83 LLMs and Prompts:\n\nThis includes prompt management, prompt optimization, a generic interface for all LLMs, and common utilities for working with LLMs.\n\nð\x9f”\x97 Chains:\n\nChains go beyond a single LLM call and involve sequences of calls (whether to an LLM or a different utility). LangChain provides a standard interface for chains, lots of integrations with other tools, and end-to-end chains for common applications.\n\nð\x9f“\x9a Data Augmented Generation:\n\nData Augmented Generation involves specific types of chains that first interact with an external data source to fetch data for use in the generation step. Examples include summarization of long pieces of text and question/answering over specific data sources.\n\nð\x9f¤\x96 Agents:\n\nAgents involve an LLM making decisions about which Actions to take, taking that Action, seeing an Observation, and repeating that until done. LangChain provides a standard interface for agents, a selection of agents to choose from, and examples of end-to-end agents.\n\nð\x9f§\xa0 Memory:\n\nMemory refers to persisting state between calls of a chain/agent. LangChain provides a standard interface for memory, a collection of memory implementations, and examples of chains/agents that use memory.\n\nð\x9f§\x90 Evaluation:\n\n[BETA] Generative models are notoriously hard to evaluate with traditional metrics. One new way of evaluating them is using language models themselves to do the evaluation. LangChain provides some prompts/chains for assisting in this.\n\nFor more information on these concepts, please see our full documentation.\n\nð\x9f\x81 Contributing\n\nAs an open-source project in a rapidly developing field, we are extremely open to contributions, whether it be in the form of a new feature, improved infrastructure, or better documentation.\n\nFor detailed information on how to contribute, see here.", metadata={'source': '../../../../../README.md'})]
```
</CodeOutputBlock>
## Retain Elements
Under the hood, Unstructured creates different "elements" for different chunks of text. By default we combine those together, but you can easily keep that separation by specifying `mode="elements"`.
```python
loader = UnstructuredMarkdownLoader(markdown_path, mode="elements")
```
```python
data = loader.load()
```
```python
data[0]
```
<CodeOutputBlock lang="python">
```
Document(page_content='ð\x9f¦\x9cï¸\x8fð\x9f”\x97 LangChain', metadata={'source': '../../../../../README.md', 'page_number': 1, 'category': 'Title'})
```
</CodeOutputBlock>

File diff suppressed because one or more lines are too long

View file

@ -1,57 +0,0 @@
The default recommended text splitter is the RecursiveCharacterTextSplitter. This text splitter takes a list of characters. It tries to create chunks based on splitting on the first character, but if any chunks are too large it then moves onto the next character, and so forth. By default the characters it tries to split on are `["\n\n", "\n", " ", ""]`
In addition to controlling which characters you can split on, you can also control a few other things:
- `length_function`: how the length of chunks is calculated. Defaults to just counting number of characters, but it's pretty common to pass a token counter here.
- `chunk_size`: the maximum size of your chunks (as measured by the length function).
- `chunk_overlap`: the maximum overlap between chunks. It can be nice to have some overlap to maintain some continuity between chunks (eg do a sliding window).
- `add_start_index`: whether to include the starting position of each chunk within the original document in the metadata.
```python
# This is a long document we can split up.
with open('../../state_of_the_union.txt') as f:
state_of_the_union = f.read()
```
```python
from langchain.text_splitter import RecursiveCharacterTextSplitter
```
```python
text_splitter = RecursiveCharacterTextSplitter(
# Set a really small chunk size, just to show.
chunk_size = 100,
chunk_overlap = 20,
length_function = len,
add_start_index = True,
)
```
```python
texts = text_splitter.create_documents([state_of_the_union])
print(texts[0])
print(texts[1])
```
<CodeOutputBlock lang="python">
```
page_content='Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and' metadata={'start_index': 0}
page_content='of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans.' metadata={'start_index': 82}
```
</CodeOutputBlock>
## Other transformations:
### Filter redundant docs, translate docs, extract metadata, and more
We can do perform a number of transformations on docs which are not simply splitting the text. With the
`EmbeddingsRedundantFilter` we can identify similar documents and filter out redundancies. With integrations like
[doctran](https://github.com/psychic-api/doctran/tree/main) we can do things like translate documents from one language
to another, extract desired properties and add them to metadata, and convert conversational dialogue into a Q/A format
set of documents.

View file

@ -1,60 +0,0 @@
```python
# This is a long document we can split up.
with open('../../../state_of_the_union.txt') as f:
state_of_the_union = f.read()
```
```python
from langchain.text_splitter import CharacterTextSplitter
text_splitter = CharacterTextSplitter(
separator = "\n\n",
chunk_size = 1000,
chunk_overlap = 200,
length_function = len,
)
```
```python
texts = text_splitter.create_documents([state_of_the_union])
print(texts[0])
```
<CodeOutputBlock lang="python">
```
page_content='Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \n\nLast year COVID-19 kept us apart. This year we are finally together again. \n\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \n\nWith a duty to one another to the American people to the Constitution. \n\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \n\nSix days ago, Russias Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \n\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \n\nHe met the Ukrainian people. \n\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world.' lookup_str='' metadata={} lookup_index=0
```
</CodeOutputBlock>
Here's an example of passing metadata along with the documents, notice that it is split along with the documents.
```python
metadatas = [{"document": 1}, {"document": 2}]
documents = text_splitter.create_documents([state_of_the_union, state_of_the_union], metadatas=metadatas)
print(documents[0])
```
<CodeOutputBlock lang="python">
```
page_content='Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \n\nLast year COVID-19 kept us apart. This year we are finally together again. \n\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \n\nWith a duty to one another to the American people to the Constitution. \n\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \n\nSix days ago, Russias Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \n\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \n\nHe met the Ukrainian people. \n\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world.' lookup_str='' metadata={'document': 1} lookup_index=0
```
</CodeOutputBlock>
```python
text_splitter.split_text(state_of_the_union)[0]
```
<CodeOutputBlock lang="python">
```
'Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \n\nLast year COVID-19 kept us apart. This year we are finally together again. \n\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \n\nWith a duty to one another to the American people to the Constitution. \n\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \n\nSix days ago, Russias Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \n\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \n\nHe met the Ukrainian people. \n\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world.'
```
</CodeOutputBlock>

View file

@ -1,312 +0,0 @@
```python
from langchain.text_splitter import (
RecursiveCharacterTextSplitter,
Language,
)
```
```python
# Full list of support languages
[e.value for e in Language]
```
<CodeOutputBlock lang="python">
```
['cpp',
'go',
'java',
'js',
'php',
'proto',
'python',
'rst',
'ruby',
'rust',
'scala',
'swift',
'markdown',
'latex',
'html',
'sol',]
```
</CodeOutputBlock>
```python
# You can also see the separators used for a given language
RecursiveCharacterTextSplitter.get_separators_for_language(Language.PYTHON)
```
<CodeOutputBlock lang="python">
```
['\nclass ', '\ndef ', '\n\tdef ', '\n\n', '\n', ' ', '']
```
</CodeOutputBlock>
## Python
Here's an example using the PythonTextSplitter
```python
PYTHON_CODE = """
def hello_world():
print("Hello, World!")
# Call the function
hello_world()
"""
python_splitter = RecursiveCharacterTextSplitter.from_language(
language=Language.PYTHON, chunk_size=50, chunk_overlap=0
)
python_docs = python_splitter.create_documents([PYTHON_CODE])
python_docs
```
<CodeOutputBlock lang="python">
```
[Document(page_content='def hello_world():\n print("Hello, World!")', metadata={}),
Document(page_content='# Call the function\nhello_world()', metadata={})]
```
</CodeOutputBlock>
## JS
Here's an example using the JS text splitter
```python
JS_CODE = """
function helloWorld() {
console.log("Hello, World!");
}
// Call the function
helloWorld();
"""
js_splitter = RecursiveCharacterTextSplitter.from_language(
language=Language.JS, chunk_size=60, chunk_overlap=0
)
js_docs = js_splitter.create_documents([JS_CODE])
js_docs
```
<CodeOutputBlock lang="python">
```
[Document(page_content='function helloWorld() {\n console.log("Hello, World!");\n}', metadata={}),
Document(page_content='// Call the function\nhelloWorld();', metadata={})]
```
</CodeOutputBlock>
## Markdown
Here's an example using the Markdown text splitter.
````python
markdown_text = """
# 🦜️🔗 LangChain
⚡ Building applications with LLMs through composability ⚡
## Quick Install
```bash
# Hopefully this code block isn't split
pip install langchain
```
As an open source project in a rapidly developing field, we are extremely open to contributions.
"""
````
```python
md_splitter = RecursiveCharacterTextSplitter.from_language(
language=Language.MARKDOWN, chunk_size=60, chunk_overlap=0
)
md_docs = md_splitter.create_documents([markdown_text])
md_docs
```
<CodeOutputBlock lang="python">
```
[Document(page_content='# 🦜️🔗 LangChain', metadata={}),
Document(page_content='⚡ Building applications with LLMs through composability ⚡', metadata={}),
Document(page_content='## Quick Install', metadata={}),
Document(page_content="```bash\n# Hopefully this code block isn't split", metadata={}),
Document(page_content='pip install langchain', metadata={}),
Document(page_content='```', metadata={}),
Document(page_content='As an open source project in a rapidly developing field, we', metadata={}),
Document(page_content='are extremely open to contributions.', metadata={})]
```
</CodeOutputBlock>
## Latex
Here's an example on Latex text
```python
latex_text = """
\documentclass{article}
\begin{document}
\maketitle
\section{Introduction}
Large language models (LLMs) are a type of machine learning model that can be trained on vast amounts of text data to generate human-like language. In recent years, LLMs have made significant advances in a variety of natural language processing tasks, including language translation, text generation, and sentiment analysis.
\subsection{History of LLMs}
The earliest LLMs were developed in the 1980s and 1990s, but they were limited by the amount of data that could be processed and the computational power available at the time. In the past decade, however, advances in hardware and software have made it possible to train LLMs on massive datasets, leading to significant improvements in performance.
\subsection{Applications of LLMs}
LLMs have many applications in industry, including chatbots, content creation, and virtual assistants. They can also be used in academia for research in linguistics, psychology, and computational linguistics.
\end{document}
"""
```
```python
latex_splitter = RecursiveCharacterTextSplitter.from_language(
language=Language.MARKDOWN, chunk_size=60, chunk_overlap=0
)
latex_docs = latex_splitter.create_documents([latex_text])
latex_docs
```
<CodeOutputBlock lang="python">
```
[Document(page_content='\\documentclass{article}\n\n\x08egin{document}\n\n\\maketitle', metadata={}),
Document(page_content='\\section{Introduction}', metadata={}),
Document(page_content='Large language models (LLMs) are a type of machine learning', metadata={}),
Document(page_content='model that can be trained on vast amounts of text data to', metadata={}),
Document(page_content='generate human-like language. In recent years, LLMs have', metadata={}),
Document(page_content='made significant advances in a variety of natural language', metadata={}),
Document(page_content='processing tasks, including language translation, text', metadata={}),
Document(page_content='generation, and sentiment analysis.', metadata={}),
Document(page_content='\\subsection{History of LLMs}', metadata={}),
Document(page_content='The earliest LLMs were developed in the 1980s and 1990s,', metadata={}),
Document(page_content='but they were limited by the amount of data that could be', metadata={}),
Document(page_content='processed and the computational power available at the', metadata={}),
Document(page_content='time. In the past decade, however, advances in hardware and', metadata={}),
Document(page_content='software have made it possible to train LLMs on massive', metadata={}),
Document(page_content='datasets, leading to significant improvements in', metadata={}),
Document(page_content='performance.', metadata={}),
Document(page_content='\\subsection{Applications of LLMs}', metadata={}),
Document(page_content='LLMs have many applications in industry, including', metadata={}),
Document(page_content='chatbots, content creation, and virtual assistants. They', metadata={}),
Document(page_content='can also be used in academia for research in linguistics,', metadata={}),
Document(page_content='psychology, and computational linguistics.', metadata={}),
Document(page_content='\\end{document}', metadata={})]
```
</CodeOutputBlock>
## HTML
Here's an example using an HTML text splitter
```python
html_text = """
<!DOCTYPE html>
<html>
<head>
<title>🦜️🔗 LangChain</title>
<style>
body {
font-family: Arial, sans-serif;
}
h1 {
color: darkblue;
}
</style>
</head>
<body>
<div>
<h1>🦜️🔗 LangChain</h1>
<p>⚡ Building applications with LLMs through composability ⚡</p>
</div>
<div>
As an open source project in a rapidly developing field, we are extremely open to contributions.
</div>
</body>
</html>
"""
```
```python
html_splitter = RecursiveCharacterTextSplitter.from_language(
language=Language.HTML, chunk_size=60, chunk_overlap=0
)
html_docs = html_splitter.create_documents([html_text])
html_docs
```
<CodeOutputBlock lang="python">
```
[Document(page_content='<!DOCTYPE html>\n<html>', metadata={}),
Document(page_content='<head>\n <title>🦜️🔗 LangChain</title>', metadata={}),
Document(page_content='<style>\n body {\n font-family: Aria', metadata={}),
Document(page_content='l, sans-serif;\n }\n h1 {', metadata={}),
Document(page_content='color: darkblue;\n }\n </style>\n </head', metadata={}),
Document(page_content='>', metadata={}),
Document(page_content='<body>', metadata={}),
Document(page_content='<div>\n <h1>🦜️🔗 LangChain</h1>', metadata={}),
Document(page_content='<p>⚡ Building applications with LLMs through composability ⚡', metadata={}),
Document(page_content='</p>\n </div>', metadata={}),
Document(page_content='<div>\n As an open source project in a rapidly dev', metadata={}),
Document(page_content='eloping field, we are extremely open to contributions.', metadata={}),
Document(page_content='</div>\n </body>\n</html>', metadata={})]
```
</CodeOutputBlock>
## Solidity
Here's an example using the Solidity text splitter
```python
SOL_CODE = """
pragma solidity ^0.8.20;
contract HelloWorld {
function add(uint a, uint b) pure public returns(uint) {
return a + b;
}
}
"""
sol_splitter = RecursiveCharacterTextSplitter.from_language(
language=Language.SOL, chunk_size=128, chunk_overlap=0
)
sol_docs = sol_splitter.create_documents([SOL_CODE])
sol_docs
```
<CodeOutputBlock>
```
[
Document(page_content='pragma solidity ^0.8.20;', metadata={}),
Document(page_content='contract HelloWorld {\n function add(uint a, uint b) pure public returns(uint) {\n return a + b;\n }\n}', metadata={})
]
```
</CodeOutputBlock>

View file

@ -1,50 +0,0 @@
```python
# This is a long document we can split up.
with open('../../../state_of_the_union.txt') as f:
state_of_the_union = f.read()
```
```python
from langchain.text_splitter import RecursiveCharacterTextSplitter
```
```python
text_splitter = RecursiveCharacterTextSplitter(
# Set a really small chunk size, just to show.
chunk_size = 100,
chunk_overlap = 20,
length_function = len,
)
```
```python
texts = text_splitter.create_documents([state_of_the_union])
print(texts[0])
print(texts[1])
```
<CodeOutputBlock lang="python">
```
page_content='Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and' lookup_str='' metadata={} lookup_index=0
page_content='of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans.' lookup_str='' metadata={} lookup_index=0
```
</CodeOutputBlock>
```python
text_splitter.split_text(state_of_the_union)[:2]
```
<CodeOutputBlock lang="python">
```
['Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and',
'of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans.']
```
</CodeOutputBlock>

View file

@ -1,261 +0,0 @@
```python
# Helper function for printing docs
def pretty_print_docs(docs):
print(f"\n{'-' * 100}\n".join([f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]))
```
## Using a vanilla vector store retriever
Let's start by initializing a simple vector store retriever and storing the 2023 State of the Union speech (in chunks). We can see that given an example question our retriever returns one or two relevant docs and a few irrelevant docs. And even the relevant docs have a lot of irrelevant information in them.
```python
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.document_loaders import TextLoader
from langchain.vectorstores import FAISS
documents = TextLoader('../../../state_of_the_union.txt').load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
retriever = FAISS.from_documents(texts, OpenAIEmbeddings()).as_retriever()
docs = retriever.get_relevant_documents("What did the president say about Ketanji Brown Jackson")
pretty_print_docs(docs)
```
<CodeOutputBlock lang="python">
```
Document 1:
Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections.
Tonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service.
One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court.
And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.
----------------------------------------------------------------------------------------------------
Document 2:
A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans.
And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system.
We can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling.
Weve set up joint patrols with Mexico and Guatemala to catch more human traffickers.
Were putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster.
Were securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.
----------------------------------------------------------------------------------------------------
Document 3:
And for our LGBTQ+ Americans, lets finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong.
As I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential.
While it often appears that we never agree, that isnt true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice.
And soon, well strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things.
So tonight Im offering a Unity Agenda for the Nation. Four big things we can do together.
First, beat the opioid epidemic.
----------------------------------------------------------------------------------------------------
Document 4:
Tonight, Im announcing a crackdown on these companies overcharging American businesses and consumers.
And as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up.
That ends on my watch.
Medicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect.
Well also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees.
Lets pass the Paycheck Fairness Act and paid leave.
Raise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty.
Lets increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls Americas best-kept secret: community colleges.
```
</CodeOutputBlock>
## Adding contextual compression with an `LLMChainExtractor`
Now let's wrap our base retriever with a `ContextualCompressionRetriever`. We'll add an `LLMChainExtractor`, which will iterate over the initially returned documents and extract from each only the content that is relevant to the query.
```python
from langchain.llms import OpenAI
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
llm = OpenAI(temperature=0)
compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=retriever)
compressed_docs = compression_retriever.get_relevant_documents("What did the president say about Ketanji Jackson Brown")
pretty_print_docs(compressed_docs)
```
<CodeOutputBlock lang="python">
```
Document 1:
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court.
And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence."
----------------------------------------------------------------------------------------------------
Document 2:
"A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans."
```
</CodeOutputBlock>
## More built-in compressors: filters
### `LLMChainFilter`
The `LLMChainFilter` is slightly simpler but more robust compressor that uses an LLM chain to decide which of the initially retrieved documents to filter out and which ones to return, without manipulating the document contents.
```python
from langchain.retrievers.document_compressors import LLMChainFilter
_filter = LLMChainFilter.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(base_compressor=_filter, base_retriever=retriever)
compressed_docs = compression_retriever.get_relevant_documents("What did the president say about Ketanji Jackson Brown")
pretty_print_docs(compressed_docs)
```
<CodeOutputBlock lang="python">
```
Document 1:
Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections.
Tonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service.
One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court.
And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.
```
</CodeOutputBlock>
### `EmbeddingsFilter`
Making an extra LLM call over each retrieved document is expensive and slow. The `EmbeddingsFilter` provides a cheaper and faster option by embedding the documents and query and only returning those documents which have sufficiently similar embeddings to the query.
```python
from langchain.embeddings import OpenAIEmbeddings
from langchain.retrievers.document_compressors import EmbeddingsFilter
embeddings = OpenAIEmbeddings()
embeddings_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.76)
compression_retriever = ContextualCompressionRetriever(base_compressor=embeddings_filter, base_retriever=retriever)
compressed_docs = compression_retriever.get_relevant_documents("What did the president say about Ketanji Jackson Brown")
pretty_print_docs(compressed_docs)
```
<CodeOutputBlock lang="python">
```
Document 1:
Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections.
Tonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service.
One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court.
And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.
----------------------------------------------------------------------------------------------------
Document 2:
A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans.
And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system.
We can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling.
Weve set up joint patrols with Mexico and Guatemala to catch more human traffickers.
Were putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster.
Were securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.
----------------------------------------------------------------------------------------------------
Document 3:
And for our LGBTQ+ Americans, lets finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong.
As I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential.
While it often appears that we never agree, that isnt true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice.
And soon, well strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things.
So tonight Im offering a Unity Agenda for the Nation. Four big things we can do together.
First, beat the opioid epidemic.
```
</CodeOutputBlock>
# Stringing compressors and document transformers together
Using the `DocumentCompressorPipeline` we can also easily combine multiple compressors in sequence. Along with compressors we can add `BaseDocumentTransformer`s to our pipeline, which don't perform any contextual compression but simply perform some transformation on a set of documents. For example `TextSplitter`s can be used as document transformers to split documents into smaller pieces, and the `EmbeddingsRedundantFilter` can be used to filter out redundant documents based on embedding similarity between documents.
Below we create a compressor pipeline by first splitting our docs into smaller chunks, then removing redundant documents, and then filtering based on relevance to the query.
```python
from langchain.document_transformers import EmbeddingsRedundantFilter
from langchain.retrievers.document_compressors import DocumentCompressorPipeline
from langchain.text_splitter import CharacterTextSplitter
splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=0, separator=". ")
redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
relevant_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.76)
pipeline_compressor = DocumentCompressorPipeline(
transformers=[splitter, redundant_filter, relevant_filter]
)
```
```python
compression_retriever = ContextualCompressionRetriever(base_compressor=pipeline_compressor, base_retriever=retriever)
compressed_docs = compression_retriever.get_relevant_documents("What did the president say about Ketanji Jackson Brown")
pretty_print_docs(compressed_docs)
```
<CodeOutputBlock lang="python">
```
Document 1:
One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court.
And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson
----------------------------------------------------------------------------------------------------
Document 2:
As I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential.
While it often appears that we never agree, that isnt true. I signed 80 bipartisan bills into law last year
----------------------------------------------------------------------------------------------------
Document 3:
A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder
```
</CodeOutputBlock>

View file

@ -1,254 +0,0 @@
The public API of the `BaseRetriever` class in LangChain is as follows:
```python
from abc import ABC, abstractmethod
from typing import Any, List
from langchain.schema import Document
from langchain.callbacks.manager import Callbacks
class BaseRetriever(ABC):
...
def get_relevant_documents(
self, query: str, *, callbacks: Callbacks = None, **kwargs: Any
) -> List[Document]:
"""Retrieve documents relevant to a query.
Args:
query: string to find relevant documents for
callbacks: Callback manager or list of callbacks
Returns:
List of relevant documents
"""
...
async def aget_relevant_documents(
self, query: str, *, callbacks: Callbacks = None, **kwargs: Any
) -> List[Document]:
"""Asynchronously get documents relevant to a query.
Args:
query: string to find relevant documents for
callbacks: Callback manager or list of callbacks
Returns:
List of relevant documents
"""
...
```
It's that simple! You can call `get_relevant_documents` or the async `get_relevant_documents` methods to retrieve documents relevant to a query, where "relevance" is defined by
the specific retriever object you are calling.
Of course, we also help construct what we think useful Retrievers are. The main type of Retriever that we focus on is a Vectorstore retriever. We will focus on that for the rest of this guide.
In order to understand what a vectorstore retriever is, it's important to understand what a Vectorstore is. So let's look at that.
By default, LangChain uses [Chroma](/docs/ecosystem/integrations/chroma.html) as the vectorstore to index and search embeddings. To walk through this tutorial, we'll first need to install `chromadb`.
```
pip install chromadb
```
This example showcases question answering over documents.
We have chosen this as the example for getting started because it nicely combines a lot of different elements (Text splitters, embeddings, vectorstores) and then also shows how to use them in a chain.
Question answering over documents consists of four steps:
1. Create an index
2. Create a Retriever from that index
3. Create a question answering chain
4. Ask questions!
Each of the steps has multiple sub steps and potential configurations. In this notebook we will primarily focus on (1). We will start by showing the one-liner for doing so, but then break down what is actually going on.
First, let's import some common classes we'll use no matter what.
```python
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
```
Next in the generic setup, let's specify the document loader we want to use. You can download the `state_of_the_union.txt` file [here](https://github.com/hwchase17/langchain/blob/master/docs/extras/modules/state_of_the_union.txt)
```python
from langchain.document_loaders import TextLoader
loader = TextLoader('../state_of_the_union.txt', encoding='utf8')
```
## One Line Index Creation
To get started as quickly as possible, we can use the `VectorstoreIndexCreator`.
```python
from langchain.indexes import VectorstoreIndexCreator
```
```python
index = VectorstoreIndexCreator().from_loaders([loader])
```
<CodeOutputBlock lang="python">
```
Running Chroma using direct local API.
Using DuckDB in-memory for database. Data will be transient.
```
</CodeOutputBlock>
Now that the index is created, we can use it to ask questions of the data! Note that under the hood this is actually doing a few steps as well, which we will cover later in this guide.
```python
query = "What did the president say about Ketanji Brown Jackson"
index.query(query)
```
<CodeOutputBlock lang="python">
```
" The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He also said that she is a consensus builder and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans."
```
</CodeOutputBlock>
```python
query = "What did the president say about Ketanji Brown Jackson"
index.query_with_sources(query)
```
<CodeOutputBlock lang="python">
```
{'question': 'What did the president say about Ketanji Brown Jackson',
'answer': " The president said that he nominated Circuit Court of Appeals Judge Ketanji Brown Jackson, one of the nation's top legal minds, to continue Justice Breyer's legacy of excellence, and that she has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans.\n",
'sources': '../state_of_the_union.txt'}
```
</CodeOutputBlock>
What is returned from the `VectorstoreIndexCreator` is `VectorStoreIndexWrapper`, which provides these nice `query` and `query_with_sources` functionality. If we just wanted to access the vectorstore directly, we can also do that.
```python
index.vectorstore
```
<CodeOutputBlock lang="python">
```
<langchain.vectorstores.chroma.Chroma at 0x119aa5940>
```
</CodeOutputBlock>
If we then want to access the VectorstoreRetriever, we can do that with:
```python
index.vectorstore.as_retriever()
```
<CodeOutputBlock lang="python">
```
VectorStoreRetriever(vectorstore=<langchain.vectorstores.chroma.Chroma object at 0x119aa5940>, search_kwargs={})
```
</CodeOutputBlock>
## Walkthrough
Okay, so what's actually going on? How is this index getting created?
A lot of the magic is being hid in this `VectorstoreIndexCreator`. What is this doing?
There are three main steps going on after the documents are loaded:
1. Splitting documents into chunks
2. Creating embeddings for each document
3. Storing documents and embeddings in a vectorstore
Let's walk through this in code
```python
documents = loader.load()
```
Next, we will split the documents into chunks.
```python
from langchain.text_splitter import CharacterTextSplitter
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
```
We will then select which embeddings we want to use.
```python
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()
```
We now create the vectorstore to use as the index.
```python
from langchain.vectorstores import Chroma
db = Chroma.from_documents(texts, embeddings)
```
<CodeOutputBlock lang="python">
```
Running Chroma using direct local API.
Using DuckDB in-memory for database. Data will be transient.
```
</CodeOutputBlock>
So that's creating the index. Then, we expose this index in a retriever interface.
```python
retriever = db.as_retriever()
```
Then, as before, we create a chain and use it to answer questions!
```python
qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=retriever)
```
```python
query = "What did the president say about Ketanji Brown Jackson"
qa.run(query)
```
<CodeOutputBlock lang="python">
```
" The President said that Judge Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He said she is a consensus builder and has received a broad range of support from organizations such as the Fraternal Order of Police and former judges appointed by Democrats and Republicans."
```
</CodeOutputBlock>
`VectorstoreIndexCreator` is just a wrapper around all this logic. It is configurable in the text splitter it uses, the embeddings it uses, and the vectorstore it uses. For example, you can configure it as below:
```python
index_creator = VectorstoreIndexCreator(
vectorstore_cls=Chroma,
embedding=OpenAIEmbeddings(),
text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
)
```
Hopefully this highlights what is going on under the hood of `VectorstoreIndexCreator`. While we think it's important to have a simple way to create indexes, we also think it's important to understand what's going on under the hood.

View file

@ -1,161 +0,0 @@
# Implement a Custom Retriever
In this walkthrough, you will implement a simple custom retriever in LangChain using a simple dot product distance lookup.
All retrievers inherit from the `BaseRetriever` class and override the following abstract methods:
```python
from abc import ABC, abstractmethod
from typing import Any, List
from langchain.schema import Document
from langchain.callbacks.manager import (
AsyncCallbackManagerForRetrieverRun,
CallbackManagerForRetrieverRun,
)
class BaseRetriever(ABC):
@abstractmethod
def _get_relevant_documents(
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
) -> List[Document]:
"""Get documents relevant to a query.
Args:
query: string to find relevant documents for
run_manager: The callbacks handler to use
Returns:
List of relevant documents
"""
@abstractmethod
async def _aget_relevant_documents(
self,
query: str,
*,
run_manager: AsyncCallbackManagerForRetrieverRun,
) -> List[Document]:
"""Asynchronously get documents relevant to a query.
Args:
query: string to find relevant documents for
run_manager: The callbacks handler to use
Returns:
List of relevant documents
"""
```
The `_get_relevant_documents` and async `_get_relevant_documents` methods can be implemented however you see fit. The `run_manager` is useful if your retriever calls other traceable LangChain primitives like LLMs, chains, or tools.
Below, implement an example that fetches the most similar documents from a list of documents using a numpy array of embeddings.
```python
from typing import Any, List, Optional
import numpy as np
from langchain.callbacks.manager import (
AsyncCallbackManagerForRetrieverRun,
CallbackManagerForRetrieverRun,
)
from langchain.embeddings import OpenAIEmbeddings
from langchain.embeddings.base import Embeddings
from langchain.schema import BaseRetriever, Document
class NumpyRetriever(BaseRetriever):
"""Retrieves documents from a numpy array."""
def __init__(
self,
texts: List[str],
vectors: np.ndarray,
embeddings: Optional[Embeddings] = None,
num_to_return: int = 1,
) -> None:
super().__init__()
self.embeddings = embeddings or OpenAIEmbeddings()
self.texts = texts
self.vectors = vectors
self.num_to_return = num_to_return
@classmethod
def from_texts(
cls,
texts: List[str],
embeddings: Optional[Embeddings] = None,
**kwargs: Any,
) -> "NumpyRetriever":
embeddings = embeddings or OpenAIEmbeddings()
vectors = np.array(embeddings.embed_documents(texts))
return cls(texts, vectors, embeddings)
def _get_relevant_documents_from_query_vector(
self, vector_query: np.ndarray
) -> List[Document]:
dot_product = np.dot(self.vectors, vector_query)
# Get the indices of the min 5 documents
indices = np.argpartition(
dot_product, -min(self.num_to_return, len(self.vectors))
)[-self.num_to_return :]
# Sort indices by distance
indices = indices[np.argsort(dot_product[indices])]
return [
Document(
page_content=self.texts[idx],
metadata={"index": idx},
)
for idx in indices
]
def _get_relevant_documents(
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
) -> List[Document]:
"""Get documents relevant to a query.
Args:
query: string to find relevant documents for
run_manager: The callbacks handler to use
Returns:
List of relevant documents
"""
vector_query = np.array(self.embeddings.embed_query(query))
return self._get_relevant_documents_from_query_vector(vector_query)
async def _aget_relevant_documents(
self,
query: str,
*,
run_manager: AsyncCallbackManagerForRetrieverRun,
) -> List[Document]:
"""Asynchronously get documents relevant to a query.
Args:
query: string to find relevant documents for
run_manager: The callbacks handler to use
Returns:
List of relevant documents
"""
query_emb = await self.embeddings.aembed_query(query)
return self._get_relevant_documents_from_query_vector(np.array(query_emb))
```
The retriever can be instantiated through the class method `from_texts`. It embeds the texts and stores them in a numpy array. To look up documents, it embeds the query and finds the most similar documents using a simple dot product distance.
Once the retriever is implemented, you can use it like any other retriever in LangChain.
```python
retriever = NumpyRetriever.from_texts(texts= ["hello world", "goodbye world"])
```
You can then use the retriever to get relevant documents.
```python
retriever.get_relevant_documents("Hi there!")
# [Document(page_content='hello world', metadata={'index': 0})]
```
```python
retriever.get_relevant_documents("Bye!")
# [Document(page_content='goodbye world', metadata={'index': 1})]
```

View file

@ -1,124 +0,0 @@
```python
import faiss
from datetime import datetime, timedelta
from langchain.docstore import InMemoryDocstore
from langchain.embeddings import OpenAIEmbeddings
from langchain.retrievers import TimeWeightedVectorStoreRetriever
from langchain.schema import Document
from langchain.vectorstores import FAISS
```
## Low Decay Rate
A low `decay rate` (in this, to be extreme, we will set close to 0) means memories will be "remembered" for longer. A `decay rate` of 0 means memories never be forgotten, making this retriever equivalent to the vector lookup.
```python
# Define your embedding model
embeddings_model = OpenAIEmbeddings()
# Initialize the vectorstore as empty
embedding_size = 1536
index = faiss.IndexFlatL2(embedding_size)
vectorstore = FAISS(embeddings_model.embed_query, index, InMemoryDocstore({}), {})
retriever = TimeWeightedVectorStoreRetriever(vectorstore=vectorstore, decay_rate=.0000000000000000000000001, k=1)
```
```python
yesterday = datetime.now() - timedelta(days=1)
retriever.add_documents([Document(page_content="hello world", metadata={"last_accessed_at": yesterday})])
retriever.add_documents([Document(page_content="hello foo")])
```
<CodeOutputBlock lang="python">
```
['d7f85756-2371-4bdf-9140-052780a0f9b3']
```
</CodeOutputBlock>
```python
# "Hello World" is returned first because it is most salient, and the decay rate is close to 0., meaning it's still recent enough
retriever.get_relevant_documents("hello world")
```
<CodeOutputBlock lang="python">
```
[Document(page_content='hello world', metadata={'last_accessed_at': datetime.datetime(2023, 5, 13, 21, 0, 27, 678341), 'created_at': datetime.datetime(2023, 5, 13, 21, 0, 27, 279596), 'buffer_idx': 0})]
```
</CodeOutputBlock>
## High Decay Rate
With a high `decay rate` (e.g., several 9's), the `recency score` quickly goes to 0! If you set this all the way to 1, `recency` is 0 for all objects, once again making this equivalent to a vector lookup.
```python
# Define your embedding model
embeddings_model = OpenAIEmbeddings()
# Initialize the vectorstore as empty
embedding_size = 1536
index = faiss.IndexFlatL2(embedding_size)
vectorstore = FAISS(embeddings_model.embed_query, index, InMemoryDocstore({}), {})
retriever = TimeWeightedVectorStoreRetriever(vectorstore=vectorstore, decay_rate=.999, k=1)
```
```python
yesterday = datetime.now() - timedelta(days=1)
retriever.add_documents([Document(page_content="hello world", metadata={"last_accessed_at": yesterday})])
retriever.add_documents([Document(page_content="hello foo")])
```
<CodeOutputBlock lang="python">
```
['40011466-5bbe-4101-bfd1-e22e7f505de2']
```
</CodeOutputBlock>
```python
# "Hello Foo" is returned first because "hello world" is mostly forgotten
retriever.get_relevant_documents("hello world")
```
<CodeOutputBlock lang="python">
```
[Document(page_content='hello foo', metadata={'last_accessed_at': datetime.datetime(2023, 4, 16, 22, 9, 2, 494798), 'created_at': datetime.datetime(2023, 4, 16, 22, 9, 2, 178722), 'buffer_idx': 1})]
```
</CodeOutputBlock>
## Virtual Time
Using some utils in LangChain, you can mock out the time component
```python
from langchain.utils import mock_now
import datetime
```
```python
# Notice the last access time is that date time
with mock_now(datetime.datetime(2011, 2, 3, 10, 11)):
print(retriever.get_relevant_documents("hello world"))
```
<CodeOutputBlock lang="python">
```
[Document(page_content='hello world', metadata={'last_accessed_at': MockDateTime(2011, 2, 3, 10, 11), 'created_at': datetime.datetime(2023, 5, 13, 21, 0, 27, 279596), 'buffer_idx': 0})]
```
</CodeOutputBlock>

View file

@ -1,88 +0,0 @@
```python
from langchain.document_loaders import TextLoader
loader = TextLoader('../../../state_of_the_union.txt')
```
```python
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
db = FAISS.from_documents(texts, embeddings)
```
<CodeOutputBlock lang="python">
```
Exiting: Cleaning up .chroma directory
```
</CodeOutputBlock>
```python
retriever = db.as_retriever()
```
```python
docs = retriever.get_relevant_documents("what did he say about ketanji brown jackson")
```
## Maximum Marginal Relevance Retrieval
By default, the vectorstore retriever uses similarity search. If the underlying vectorstore support maximum marginal relevance search, you can specify that as the search type.
```python
retriever = db.as_retriever(search_type="mmr")
```
```python
docs = retriever.get_relevant_documents("what did he say about ketanji brown jackson")
```
## Similarity Score Threshold Retrieval
You can also a retrieval method that sets a similarity score threshold and only returns documents with a score above that threshold
```python
retriever = db.as_retriever(search_type="similarity_score_threshold", search_kwargs={"score_threshold": .5})
```
```python
docs = retriever.get_relevant_documents("what did he say about ketanji brown jackson")
```
## Specifying top k
You can also specify search kwargs like `k` to use when doing retrieval.
```python
retriever = db.as_retriever(search_kwargs={"k": 1})
```
```python
docs = retriever.get_relevant_documents("what did he say about ketanji brown jackson")
```
```python
len(docs)
```
<CodeOutputBlock lang="python">
```
1
```
</CodeOutputBlock>

View file

@ -1,201 +0,0 @@
## Get started
We'll use a Pinecone vector store in this example.
First we'll want to create a `Pinecone` VectorStore and seed it with some data. We've created a small demo set of documents that contain summaries of movies.
To use Pinecone, you to have `pinecone` package installed and you must have an API key and an Environment. Here are the [installation instructions](https://docs.pinecone.io/docs/quickstart).
NOTE: The self-query retriever requires you to have `lark` package installed.
```python
# !pip install lark pinecone-client
```
```python
import os
import pinecone
pinecone.init(api_key=os.environ["PINECONE_API_KEY"], environment=os.environ["PINECONE_ENV"])
```
```python
from langchain.schema import Document
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
embeddings = OpenAIEmbeddings()
# create new index
pinecone.create_index("langchain-self-retriever-demo", dimension=1536)
```
```python
docs = [
Document(page_content="A bunch of scientists bring back dinosaurs and mayhem breaks loose", metadata={"year": 1993, "rating": 7.7, "genre": ["action", "science fiction"]}),
Document(page_content="Leo DiCaprio gets lost in a dream within a dream within a dream within a ...", metadata={"year": 2010, "director": "Christopher Nolan", "rating": 8.2}),
Document(page_content="A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea", metadata={"year": 2006, "director": "Satoshi Kon", "rating": 8.6}),
Document(page_content="A bunch of normal-sized women are supremely wholesome and some men pine after them", metadata={"year": 2019, "director": "Greta Gerwig", "rating": 8.3}),
Document(page_content="Toys come alive and have a blast doing so", metadata={"year": 1995, "genre": "animated"}),
Document(page_content="Three men walk into the Zone, three men walk out of the Zone", metadata={"year": 1979, "rating": 9.9, "director": "Andrei Tarkovsky", "genre": ["science fiction", "thriller"], "rating": 9.9})
]
vectorstore = Pinecone.from_documents(
docs, embeddings, index_name="langchain-self-retriever-demo"
)
```
## Creating our self-querying retriever
Now we can instantiate our retriever. To do this we'll need to provide some information upfront about the metadata fields that our documents support and a short description of the document contents.
```python
from langchain.llms import OpenAI
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo
metadata_field_info=[
AttributeInfo(
name="genre",
description="The genre of the movie",
type="string or list[string]",
),
AttributeInfo(
name="year",
description="The year the movie was released",
type="integer",
),
AttributeInfo(
name="director",
description="The name of the movie director",
type="string",
),
AttributeInfo(
name="rating",
description="A 1-10 rating for the movie",
type="float"
),
]
document_content_description = "Brief summary of a movie"
llm = OpenAI(temperature=0)
retriever = SelfQueryRetriever.from_llm(llm, vectorstore, document_content_description, metadata_field_info, verbose=True)
```
## Testing it out
And now we can try actually using our retriever!
```python
# This example only specifies a relevant query
retriever.get_relevant_documents("What are some movies about dinosaurs")
```
<CodeOutputBlock lang="python">
```
query='dinosaur' filter=None
[Document(page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose', metadata={'genre': ['action', 'science fiction'], 'rating': 7.7, 'year': 1993.0}),
Document(page_content='Toys come alive and have a blast doing so', metadata={'genre': 'animated', 'year': 1995.0}),
Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'director': 'Satoshi Kon', 'rating': 8.6, 'year': 2006.0}),
Document(page_content='Leo DiCaprio gets lost in a dream within a dream within a dream within a ...', metadata={'director': 'Christopher Nolan', 'rating': 8.2, 'year': 2010.0})]
```
</CodeOutputBlock>
```python
# This example only specifies a filter
retriever.get_relevant_documents("I want to watch a movie rated higher than 8.5")
```
<CodeOutputBlock lang="python">
```
query=' ' filter=Comparison(comparator=<Comparator.GT: 'gt'>, attribute='rating', value=8.5)
[Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'director': 'Satoshi Kon', 'rating': 8.6, 'year': 2006.0}),
Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'director': 'Andrei Tarkovsky', 'genre': ['science fiction', 'thriller'], 'rating': 9.9, 'year': 1979.0})]
```
</CodeOutputBlock>
```python
# This example specifies a query and a filter
retriever.get_relevant_documents("Has Greta Gerwig directed any movies about women")
```
<CodeOutputBlock lang="python">
```
query='women' filter=Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='director', value='Greta Gerwig')
[Document(page_content='A bunch of normal-sized women are supremely wholesome and some men pine after them', metadata={'director': 'Greta Gerwig', 'rating': 8.3, 'year': 2019.0})]
```
</CodeOutputBlock>
```python
# This example specifies a composite filter
retriever.get_relevant_documents("What's a highly rated (above 8.5) science fiction film?")
```
<CodeOutputBlock lang="python">
```
query=' ' filter=Operation(operator=<Operator.AND: 'and'>, arguments=[Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='genre', value='science fiction'), Comparison(comparator=<Comparator.GT: 'gt'>, attribute='rating', value=8.5)])
[Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'director': 'Andrei Tarkovsky', 'genre': ['science fiction', 'thriller'], 'rating': 9.9, 'year': 1979.0})]
```
</CodeOutputBlock>
```python
# This example specifies a query and composite filter
retriever.get_relevant_documents("What's a movie after 1990 but before 2005 that's all about toys, and preferably is animated")
```
<CodeOutputBlock lang="python">
```
query='toys' filter=Operation(operator=<Operator.AND: 'and'>, arguments=[Comparison(comparator=<Comparator.GT: 'gt'>, attribute='year', value=1990.0), Comparison(comparator=<Comparator.LT: 'lt'>, attribute='year', value=2005.0), Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='genre', value='animated')])
[Document(page_content='Toys come alive and have a blast doing so', metadata={'genre': 'animated', 'year': 1995.0})]
```
</CodeOutputBlock>
## Filter k
We can also use the self query retriever to specify `k`: the number of documents to fetch.
We can do this by passing `enable_limit=True` to the constructor.
```python
retriever = SelfQueryRetriever.from_llm(
llm,
vectorstore,
document_content_description,
metadata_field_info,
enable_limit=True,
verbose=True
)
```
```python
# This example only specifies a relevant query
retriever.get_relevant_documents("What are two movies about dinosaurs")
```

View file

@ -1,73 +0,0 @@
### Setup
To start we'll need to install the OpenAI Python package:
```bash
pip install openai
```
Accessing the API requires an API key, which you can get by creating an account and heading [here](https://platform.openai.com/account/api-keys). Once we have a key we'll want to set it as an environment variable by running:
```bash
export OPENAI_API_KEY="..."
```
If you'd prefer not to set an environment variable you can pass the key in directly via the `openai_api_key` named parameter when initiating the OpenAI LLM class:
```python
from langchain.embeddings import OpenAIEmbeddings
embeddings_model = OpenAIEmbeddings(openai_api_key="...")
```
otherwise you can initialize without any params:
```python
from langchain.embeddings import OpenAIEmbeddings
embeddings_model = OpenAIEmbeddings()
```
### `embed_documents`
#### Embed list of texts
```python
embeddings = embeddings_model.embed_documents(
[
"Hi there!",
"Oh, hello!",
"What's your name?",
"My friends call me World",
"Hello World!"
]
)
len(embeddings), len(embeddings[0])
```
<CodeOutputBlock language="python">
```
(5, 1536)
```
</CodeOutputBlock>
### `embed_query`
#### Embed single query
Embed a single piece of text for the purpose of comparing to other embedded pieces of texts.
```python
embedded_query = embeddings_model.embed_query("What was the name mentioned in the conversation?")
embedded_query[:5]
```
<CodeOutputBlock language="python">
```
[0.0053587136790156364,
-0.0004999046213924885,
0.038883671164512634,
-0.003001077566295862,
-0.00900818221271038]
```
</CodeOutputBlock>

View file

@ -1,89 +0,0 @@
Langchain supports async operation on vector stores. All the methods might be called using their async counterparts, with the prefix `a`, meaning `async`.
`Qdrant` is a vector store, which supports all the async operations, thus it will be used in this walkthrough.
```bash
pip install qdrant-client
```
```python
from langchain.vectorstores import Qdrant
```
### Create a vector store asynchronously
```python
db = await Qdrant.afrom_documents(documents, embeddings, "http://localhost:6333")
```
### Similarity search
```python
query = "What did the president say about Ketanji Brown Jackson"
docs = await db.asimilarity_search(query)
print(docs[0].page_content)
```
<CodeOutputBlock lang="python">
```
Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections.
Tonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service.
One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court.
And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.
```
</CodeOutputBlock>
### Similarity search by vector
```python
embedding_vector = embeddings.embed_query(query)
docs = await db.asimilarity_search_by_vector(embedding_vector)
```
## Maximum marginal relevance search (MMR)
Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents. It is also supported in async API.
```python
query = "What did the president say about Ketanji Brown Jackson"
found_docs = await qdrant.amax_marginal_relevance_search(query, k=2, fetch_k=10)
for i, doc in enumerate(found_docs):
print(f"{i + 1}.", doc.page_content, "\n")
```
<CodeOutputBlock lang="python">
```
1. Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections.
Tonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service.
One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court.
And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.
2. We cant change how divided weve been. But we can change how we move forward—on COVID-19 and other issues we must face together.
I recently visited the New York City Police Department days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera.
They were responding to a 9-1-1 call when a man shot and killed them with a stolen gun.
Officer Mora was 27 years old.
Officer Rivera was 22.
Both Dominican Americans whod grown up on the same streets they later chose to patrol as police officers.
I spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves.
Ive worked on these issues a long time.
I know what works: Investing in crime preventionand community police officers wholl walk the beat, wholl know the neighborhood, and who can restore trust and safety.
```
</CodeOutputBlock>

View file

@ -1,168 +0,0 @@
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
There are many great vector store options, here are a few that are free, open-source, and run entirely on your local machine. Review all integrations for many great hosted offerings.
<Tabs>
<TabItem value="chroma" label="Chroma" default>
This walkthrough uses the `chroma` vector database, which runs on your local machine as a library.
```bash
pip install chromadb
```
We want to use OpenAIEmbeddings so we have to get the OpenAI API Key.
```python
import os
import getpass
os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')
```
```python
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
# Load the document, split it into chunks, embed each chunk and load it into the vector store.
raw_documents = TextLoader('../../../state_of_the_union.txt').load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(raw_documents)
db = Chroma.from_documents(documents, OpenAIEmbeddings())
```
</TabItem>
<TabItem value="faiss" label="FAISS">
This walkthrough uses the `FAISS` vector database, which makes use of the Facebook AI Similarity Search (FAISS) library.
```bash
pip install faiss-cpu
```
We want to use OpenAIEmbeddings so we have to get the OpenAI API Key.
```python
import os
import getpass
os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')
```
```python
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
# Load the document, split it into chunks, embed each chunk and load it into the vector store.
raw_documents = TextLoader('../../../state_of_the_union.txt').load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(raw_documents)
db = FAISS.from_documents(documents, OpenAIEmbeddings())
```
</TabItem>
<TabItem value="lance" label="Lance">
This notebook shows how to use functionality related to the LanceDB vector database based on the Lance data format.
```bash
pip install lancedb
```
We want to use OpenAIEmbeddings so we have to get the OpenAI API Key.
```python
import os
import getpass
os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')
```
```python
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import LanceDB
import lancedb
db = lancedb.connect("/tmp/lancedb")
table = db.create_table(
"my_table",
data=[
{
"vector": embeddings.embed_query("Hello World"),
"text": "Hello World",
"id": "1",
}
],
mode="overwrite",
)
# Load the document, split it into chunks, embed each chunk and load it into the vector store.
raw_documents = TextLoader('../../../state_of_the_union.txt').load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(raw_documents)
db = LanceDB.from_documents(documents, OpenAIEmbeddings(), connection=table)
```
</TabItem>
</Tabs>
### Similarity search
```python
query = "What did the president say about Ketanji Brown Jackson"
docs = db.similarity_search(query)
print(docs[0].page_content)
```
<CodeOutputBlock lang="python">
```
Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections.
Tonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service.
One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court.
And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.
```
</CodeOutputBlock>
### Similarity search by vector
It is also possible to do a search for documents similar to a given embedding vector using `similarity_search_by_vector` which accepts an embedding vector as a parameter instead of a string.
```python
embedding_vector = OpenAIEmbeddings().embed_query(query)
docs = db.similarity_search_by_vector(embedding_vector)
print(docs[0].page_content)
```
The query is the same, and so the result is also the same.
<CodeOutputBlock lang="python">
```
Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections.
Tonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service.
One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court.
And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.
```
</CodeOutputBlock>

View file

@ -1,23 +0,0 @@
```python
from langchain.memory import ChatMessageHistory
history = ChatMessageHistory()
history.add_user_message("hi!")
history.add_ai_message("whats up?")
```
```python
history.messages
```
<CodeOutputBlock lang="python">
```
[HumanMessage(content='hi!', additional_kwargs={}),
AIMessage(content='whats up?', additional_kwargs={})]
```
</CodeOutputBlock>

View file

@ -1,173 +0,0 @@
Let's take a look at how to use ConversationBufferMemory in chains.
ConversationBufferMemory is an extremely simple form of memory that just keeps a list of chat messages in a buffer
and passes those into the prompt template.
```python
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory()
memory.chat_memory.add_user_message("hi!")
memory.chat_memory.add_ai_message("whats up?")
```
When using memory in a chain, there are a few key concepts to understand.
Note that here we cover general concepts that are useful for most types of memory.
Each individual memory type may very well have its own parameters and concepts that are necessary to understand.
### What variables get returned from memory
Before going into the chain, various variables are read from memory.
This have specific names which need to align with the variables the chain expects.
You can see what these variables are by calling `memory.load_memory_variables({})`.
Note that the empty dictionary that we pass in is just a placeholder for real variables.
If the memory type you are using is dependent upon the input variables, you may need to pass some in.
```python
memory.load_memory_variables({})
```
<CodeOutputBlock lang="python">
```
{'history': "Human: hi!\nAI: whats up?"}
```
</CodeOutputBlock>
In this case, you can see that `load_memory_variables` returns a single key, `history`.
This means that your chain (and likely your prompt) should expect and input named `history`.
You can usually control this variable through parameters on the memory class.
For example, if you want the memory variables to be returned in the key `chat_history` you can do:
```python
memory = ConversationBufferMemory(memory_key="chat_history")
memory.chat_memory.add_user_message("hi!")
memory.chat_memory.add_ai_message("whats up?")
```
<CodeOutputBlock lang="python">
```
{'chat_history': "Human: hi!\nAI: whats up?"}
```
</CodeOutputBlock>
The parameter name to control these keys may vary per memory type, but it's important to understand that (1) this is controllable, (2) how to control it.
### Whether memory is a string or a list of messages
One of the most common types of memory involves returning a list of chat messages.
These can either be returned as a single string, all concatenated together (useful when they will be passed in LLMs)
or a list of ChatMessages (useful when passed into ChatModels).
By default, they are returned as a single string.
In order to return as a list of messages, you can set `return_messages=True`
```python
memory = ConversationBufferMemory(return_messages=True)
memory.chat_memory.add_user_message("hi!")
memory.chat_memory.add_ai_message("whats up?")
```
<CodeOutputBlock lang="python">
```
{'history': [HumanMessage(content='hi!', additional_kwargs={}, example=False),
AIMessage(content='whats up?', additional_kwargs={}, example=False)]}
```
</CodeOutputBlock>
### What keys are saved to memory
Often times chains take in or return multiple input/output keys.
In these cases, how can we know which keys we want to save to the chat message history?
This is generally controllable by `input_key` and `output_key` parameters on the memory types.
These default to None - and if there is only one input/output key it is known to just use that.
However, if there are multiple input/output keys then you MUST specify the name of which one to use
### End to end example
Finally, let's take a look at using this in a chain.
We'll use an LLMChain, and show working with both an LLM and a ChatModel.
#### Using an LLM
```python
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory
llm = OpenAI(temperature=0)
# Notice that "chat_history" is present in the prompt template
template = """You are a nice chatbot having a conversation with a human.
Previous conversation:
{chat_history}
New human question: {question}
Response:"""
prompt = PromptTemplate.from_template(template)
# Notice that we need to align the `memory_key`
memory = ConversationBufferMemory(memory_key="chat_history")
conversation = LLMChain(
llm=llm,
prompt=prompt,
verbose=True,
memory=memory
)
```
```python
# Notice that we just pass in the `question` variables - `chat_history` gets populated by memory
conversation({"question": "hi"})
```
#### Using a ChatModel
```python
from langchain.chat_models import ChatOpenAI
from langchain.prompts import (
ChatPromptTemplate,
MessagesPlaceholder,
SystemMessagePromptTemplate,
HumanMessagePromptTemplate,
)
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory
llm = ChatOpenAI()
prompt = ChatPromptTemplate(
messages=[
SystemMessagePromptTemplate.from_template(
"You are a nice chatbot having a conversation with a human."
),
# The `variable_name` here is what must align with memory
MessagesPlaceholder(variable_name="chat_history"),
HumanMessagePromptTemplate.from_template("{question}")
]
)
# Notice that we `return_messages=True` to fit into the MessagesPlaceholder
# Notice that `"chat_history"` aligns with the MessagesPlaceholder name.
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
conversation = LLMChain(
llm=llm,
prompt=prompt,
verbose=True,
memory=memory
)
```
```python
# Notice that we just pass in the `question` variables - `chat_history` gets populated by memory
conversation({"question": "hi"})
```

View file

@ -1,157 +0,0 @@
```python
from langchain.memory import ConversationBufferMemory
```
```python
memory = ConversationBufferMemory()
memory.save_context({"input": "hi"}, {"output": "whats up"})
```
```python
memory.load_memory_variables({})
```
<CodeOutputBlock lang="python">
```
{'history': 'Human: hi\nAI: whats up'}
```
</CodeOutputBlock>
We can also get the history as a list of messages (this is useful if you are using this with a chat model).
```python
memory = ConversationBufferMemory(return_messages=True)
memory.save_context({"input": "hi"}, {"output": "whats up"})
```
```python
memory.load_memory_variables({})
```
<CodeOutputBlock lang="python">
```
{'history': [HumanMessage(content='hi', additional_kwargs={}),
AIMessage(content='whats up', additional_kwargs={})]}
```
</CodeOutputBlock>
## Using in a chain
Finally, let's take a look at using this in a chain (setting `verbose=True` so we can see the prompt).
```python
from langchain.llms import OpenAI
from langchain.chains import ConversationChain
llm = OpenAI(temperature=0)
conversation = ConversationChain(
llm=llm,
verbose=True,
memory=ConversationBufferMemory()
)
```
```python
conversation.predict(input="Hi there!")
```
<CodeOutputBlock lang="python">
```
> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.
Current conversation:
Human: Hi there!
AI:
> Finished chain.
" Hi there! It's nice to meet you. How can I help you today?"
```
</CodeOutputBlock>
```python
conversation.predict(input="I'm doing well! Just having a conversation with an AI.")
```
<CodeOutputBlock lang="python">
```
> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.
Current conversation:
Human: Hi there!
AI: Hi there! It's nice to meet you. How can I help you today?
Human: I'm doing well! Just having a conversation with an AI.
AI:
> Finished chain.
" That's great! It's always nice to have a conversation with someone new. What would you like to talk about?"
```
</CodeOutputBlock>
```python
conversation.predict(input="Tell me about yourself.")
```
<CodeOutputBlock lang="python">
```
> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.
Current conversation:
Human: Hi there!
AI: Hi there! It's nice to meet you. How can I help you today?
Human: I'm doing well! Just having a conversation with an AI.
AI: That's great! It's always nice to have a conversation with someone new. What would you like to talk about?
Human: Tell me about yourself.
AI:
> Finished chain.
" Sure! I'm an AI created to help people with their everyday tasks. I'm programmed to understand natural language and provide helpful information. I'm also constantly learning and updating my knowledge base so I can provide more accurate and helpful answers."
```
</CodeOutputBlock>
And that's it for the getting started! There are plenty of different types of memory, check out our examples to see them all

View file

@ -1,185 +0,0 @@
```python
from langchain.memory import ConversationBufferWindowMemory
```
```python
memory = ConversationBufferWindowMemory( k=1)
memory.save_context({"input": "hi"}, {"output": "whats up"})
memory.save_context({"input": "not much you"}, {"output": "not much"})
```
```python
memory.load_memory_variables({})
```
<CodeOutputBlock lang="python">
```
{'history': 'Human: not much you\nAI: not much'}
```
</CodeOutputBlock>
We can also get the history as a list of messages (this is useful if you are using this with a chat model).
```python
memory = ConversationBufferWindowMemory( k=1, return_messages=True)
memory.save_context({"input": "hi"}, {"output": "whats up"})
memory.save_context({"input": "not much you"}, {"output": "not much"})
```
```python
memory.load_memory_variables({})
```
<CodeOutputBlock lang="python">
```
{'history': [HumanMessage(content='not much you', additional_kwargs={}),
AIMessage(content='not much', additional_kwargs={})]}
```
</CodeOutputBlock>
## Using in a chain
Let's walk through an example, again setting `verbose=True` so we can see the prompt.
```python
from langchain.llms import OpenAI
from langchain.chains import ConversationChain
conversation_with_summary = ConversationChain(
llm=OpenAI(temperature=0),
# We set a low k=2, to only keep the last 2 interactions in memory
memory=ConversationBufferWindowMemory(k=2),
verbose=True
)
conversation_with_summary.predict(input="Hi, what's up?")
```
<CodeOutputBlock lang="python">
```
> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.
Current conversation:
Human: Hi, what's up?
AI:
> Finished chain.
" Hi there! I'm doing great. I'm currently helping a customer with a technical issue. How about you?"
```
</CodeOutputBlock>
```python
conversation_with_summary.predict(input="What's their issues?")
```
<CodeOutputBlock lang="python">
```
> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.
Current conversation:
Human: Hi, what's up?
AI: Hi there! I'm doing great. I'm currently helping a customer with a technical issue. How about you?
Human: What's their issues?
AI:
> Finished chain.
" The customer is having trouble connecting to their Wi-Fi network. I'm helping them troubleshoot the issue and get them connected."
```
</CodeOutputBlock>
```python
conversation_with_summary.predict(input="Is it going well?")
```
<CodeOutputBlock lang="python">
```
> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.
Current conversation:
Human: Hi, what's up?
AI: Hi there! I'm doing great. I'm currently helping a customer with a technical issue. How about you?
Human: What's their issues?
AI: The customer is having trouble connecting to their Wi-Fi network. I'm helping them troubleshoot the issue and get them connected.
Human: Is it going well?
AI:
> Finished chain.
" Yes, it's going well so far. We've already identified the problem and are now working on a solution."
```
</CodeOutputBlock>
```python
# Notice here that the first interaction does not appear.
conversation_with_summary.predict(input="What's the solution?")
```
<CodeOutputBlock lang="python">
```
> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.
Current conversation:
Human: What's their issues?
AI: The customer is having trouble connecting to their Wi-Fi network. I'm helping them troubleshoot the issue and get them connected.
Human: Is it going well?
AI: Yes, it's going well so far. We've already identified the problem and are now working on a solution.
Human: What's the solution?
AI:
> Finished chain.
" The solution is to reset the router and reconfigure the settings. We're currently in the process of doing that."
```
</CodeOutputBlock>

View file

@ -1,418 +0,0 @@
```python
from langchain.llms import OpenAI
from langchain.memory import ConversationEntityMemory
llm = OpenAI(temperature=0)
```
```python
memory = ConversationEntityMemory(llm=llm)
_input = {"input": "Deven & Sam are working on a hackathon project"}
memory.load_memory_variables(_input)
memory.save_context(
_input,
{"output": " That sounds like a great project! What kind of project are they working on?"}
)
```
```python
memory.load_memory_variables({"input": 'who is Sam'})
```
<CodeOutputBlock lang="python">
```
{'history': 'Human: Deven & Sam are working on a hackathon project\nAI: That sounds like a great project! What kind of project are they working on?',
'entities': {'Sam': 'Sam is working on a hackathon project with Deven.'}}
```
</CodeOutputBlock>
```python
memory = ConversationEntityMemory(llm=llm, return_messages=True)
_input = {"input": "Deven & Sam are working on a hackathon project"}
memory.load_memory_variables(_input)
memory.save_context(
_input,
{"output": " That sounds like a great project! What kind of project are they working on?"}
)
```
```python
memory.load_memory_variables({"input": 'who is Sam'})
```
<CodeOutputBlock lang="python">
```
{'history': [HumanMessage(content='Deven & Sam are working on a hackathon project', additional_kwargs={}),
AIMessage(content=' That sounds like a great project! What kind of project are they working on?', additional_kwargs={})],
'entities': {'Sam': 'Sam is working on a hackathon project with Deven.'}}
```
</CodeOutputBlock>
## Using in a chain
Let's now use it in a chain!
```python
from langchain.chains import ConversationChain
from langchain.memory import ConversationEntityMemory
from langchain.memory.prompt import ENTITY_MEMORY_CONVERSATION_TEMPLATE
from pydantic import BaseModel
from typing import List, Dict, Any
```
```python
conversation = ConversationChain(
llm=llm,
verbose=True,
prompt=ENTITY_MEMORY_CONVERSATION_TEMPLATE,
memory=ConversationEntityMemory(llm=llm)
)
```
```python
conversation.predict(input="Deven & Sam are working on a hackathon project")
```
<CodeOutputBlock lang="python">
```
> Entering new ConversationChain chain...
Prompt after formatting:
You are an assistant to a human, powered by a large language model trained by OpenAI.
You are designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, you are able to generate human-like text based on the input you receive, allowing you to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.
You are constantly learning and improving, and your capabilities are constantly evolving. You are able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. You have access to some personalized information provided by the human in the Context section below. Additionally, you are able to generate your own text based on the input you receive, allowing you to engage in discussions and provide explanations and descriptions on a wide range of topics.
Overall, you are a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether the human needs help with a specific question or just wants to have a conversation about a particular topic, you are here to assist.
Context:
{'Deven': 'Deven is working on a hackathon project with Sam.', 'Sam': 'Sam is working on a hackathon project with Deven.'}
Current conversation:
Last line:
Human: Deven & Sam are working on a hackathon project
You:
> Finished chain.
' That sounds like a great project! What kind of project are they working on?'
```
</CodeOutputBlock>
```python
conversation.memory.entity_store.store
```
<CodeOutputBlock lang="python">
```
{'Deven': 'Deven is working on a hackathon project with Sam, which they are entering into a hackathon.',
'Sam': 'Sam is working on a hackathon project with Deven.'}
```
</CodeOutputBlock>
```python
conversation.predict(input="They are trying to add more complex memory structures to Langchain")
```
<CodeOutputBlock lang="python">
```
> Entering new ConversationChain chain...
Prompt after formatting:
You are an assistant to a human, powered by a large language model trained by OpenAI.
You are designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, you are able to generate human-like text based on the input you receive, allowing you to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.
You are constantly learning and improving, and your capabilities are constantly evolving. You are able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. You have access to some personalized information provided by the human in the Context section below. Additionally, you are able to generate your own text based on the input you receive, allowing you to engage in discussions and provide explanations and descriptions on a wide range of topics.
Overall, you are a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether the human needs help with a specific question or just wants to have a conversation about a particular topic, you are here to assist.
Context:
{'Deven': 'Deven is working on a hackathon project with Sam, which they are entering into a hackathon.', 'Sam': 'Sam is working on a hackathon project with Deven.', 'Langchain': ''}
Current conversation:
Human: Deven & Sam are working on a hackathon project
AI: That sounds like a great project! What kind of project are they working on?
Last line:
Human: They are trying to add more complex memory structures to Langchain
You:
> Finished chain.
' That sounds like an interesting project! What kind of memory structures are they trying to add?'
```
</CodeOutputBlock>
```python
conversation.predict(input="They are adding in a key-value store for entities mentioned so far in the conversation.")
```
<CodeOutputBlock lang="python">
```
> Entering new ConversationChain chain...
Prompt after formatting:
You are an assistant to a human, powered by a large language model trained by OpenAI.
You are designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, you are able to generate human-like text based on the input you receive, allowing you to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.
You are constantly learning and improving, and your capabilities are constantly evolving. You are able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. You have access to some personalized information provided by the human in the Context section below. Additionally, you are able to generate your own text based on the input you receive, allowing you to engage in discussions and provide explanations and descriptions on a wide range of topics.
Overall, you are a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether the human needs help with a specific question or just wants to have a conversation about a particular topic, you are here to assist.
Context:
{'Deven': 'Deven is working on a hackathon project with Sam, which they are entering into a hackathon. They are trying to add more complex memory structures to Langchain.', 'Sam': 'Sam is working on a hackathon project with Deven, trying to add more complex memory structures to Langchain.', 'Langchain': 'Langchain is a project that is trying to add more complex memory structures.', 'Key-Value Store': ''}
Current conversation:
Human: Deven & Sam are working on a hackathon project
AI: That sounds like a great project! What kind of project are they working on?
Human: They are trying to add more complex memory structures to Langchain
AI: That sounds like an interesting project! What kind of memory structures are they trying to add?
Last line:
Human: They are adding in a key-value store for entities mentioned so far in the conversation.
You:
> Finished chain.
' That sounds like a great idea! How will the key-value store help with the project?'
```
</CodeOutputBlock>
```python
conversation.predict(input="What do you know about Deven & Sam?")
```
<CodeOutputBlock lang="python">
```
> Entering new ConversationChain chain...
Prompt after formatting:
You are an assistant to a human, powered by a large language model trained by OpenAI.
You are designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, you are able to generate human-like text based on the input you receive, allowing you to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.
You are constantly learning and improving, and your capabilities are constantly evolving. You are able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. You have access to some personalized information provided by the human in the Context section below. Additionally, you are able to generate your own text based on the input you receive, allowing you to engage in discussions and provide explanations and descriptions on a wide range of topics.
Overall, you are a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether the human needs help with a specific question or just wants to have a conversation about a particular topic, you are here to assist.
Context:
{'Deven': 'Deven is working on a hackathon project with Sam, which they are entering into a hackathon. They are trying to add more complex memory structures to Langchain, including a key-value store for entities mentioned so far in the conversation.', 'Sam': 'Sam is working on a hackathon project with Deven, trying to add more complex memory structures to Langchain, including a key-value store for entities mentioned so far in the conversation.'}
Current conversation:
Human: Deven & Sam are working on a hackathon project
AI: That sounds like a great project! What kind of project are they working on?
Human: They are trying to add more complex memory structures to Langchain
AI: That sounds like an interesting project! What kind of memory structures are they trying to add?
Human: They are adding in a key-value store for entities mentioned so far in the conversation.
AI: That sounds like a great idea! How will the key-value store help with the project?
Last line:
Human: What do you know about Deven & Sam?
You:
> Finished chain.
' Deven and Sam are working on a hackathon project together, trying to add more complex memory structures to Langchain, including a key-value store for entities mentioned so far in the conversation. They seem to be working hard on this project and have a great idea for how the key-value store can help.'
```
</CodeOutputBlock>
## Inspecting the memory store
We can also inspect the memory store directly. In the following examples, we look at it directly, and then go through some examples of adding information and watch how it changes.
```python
from pprint import pprint
pprint(conversation.memory.entity_store.store)
```
<CodeOutputBlock lang="python">
```
{'Daimon': 'Daimon is a company founded by Sam, a successful entrepreneur.',
'Deven': 'Deven is working on a hackathon project with Sam, which they are '
'entering into a hackathon. They are trying to add more complex '
'memory structures to Langchain, including a key-value store for '
'entities mentioned so far in the conversation, and seem to be '
'working hard on this project with a great idea for how the '
'key-value store can help.',
'Key-Value Store': 'A key-value store is being added to the project to store '
'entities mentioned in the conversation.',
'Langchain': 'Langchain is a project that is trying to add more complex '
'memory structures, including a key-value store for entities '
'mentioned so far in the conversation.',
'Sam': 'Sam is working on a hackathon project with Deven, trying to add more '
'complex memory structures to Langchain, including a key-value store '
'for entities mentioned so far in the conversation. They seem to have '
'a great idea for how the key-value store can help, and Sam is also '
'the founder of a company called Daimon.'}
```
</CodeOutputBlock>
```python
conversation.predict(input="Sam is the founder of a company called Daimon.")
```
<CodeOutputBlock lang="python">
```
> Entering new ConversationChain chain...
Prompt after formatting:
You are an assistant to a human, powered by a large language model trained by OpenAI.
You are designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, you are able to generate human-like text based on the input you receive, allowing you to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.
You are constantly learning and improving, and your capabilities are constantly evolving. You are able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. You have access to some personalized information provided by the human in the Context section below. Additionally, you are able to generate your own text based on the input you receive, allowing you to engage in discussions and provide explanations and descriptions on a wide range of topics.
Overall, you are a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether the human needs help with a specific question or just wants to have a conversation about a particular topic, you are here to assist.
Context:
{'Daimon': 'Daimon is a company founded by Sam, a successful entrepreneur.', 'Sam': 'Sam is working on a hackathon project with Deven, trying to add more complex memory structures to Langchain, including a key-value store for entities mentioned so far in the conversation. They seem to have a great idea for how the key-value store can help, and Sam is also the founder of a company called Daimon.'}
Current conversation:
Human: They are adding in a key-value store for entities mentioned so far in the conversation.
AI: That sounds like a great idea! How will the key-value store help with the project?
Human: What do you know about Deven & Sam?
AI: Deven and Sam are working on a hackathon project together, trying to add more complex memory structures to Langchain, including a key-value store for entities mentioned so far in the conversation. They seem to be working hard on this project and have a great idea for how the key-value store can help.
Human: Sam is the founder of a company called Daimon.
AI:
That's impressive! It sounds like Sam is a very successful entrepreneur. What kind of company is Daimon?
Last line:
Human: Sam is the founder of a company called Daimon.
You:
> Finished chain.
" That's impressive! It sounds like Sam is a very successful entrepreneur. What kind of company is Daimon?"
```
</CodeOutputBlock>
```python
from pprint import pprint
pprint(conversation.memory.entity_store.store)
```
<CodeOutputBlock lang="python">
```
{'Daimon': 'Daimon is a company founded by Sam, a successful entrepreneur, who '
'is working on a hackathon project with Deven to add more complex '
'memory structures to Langchain.',
'Deven': 'Deven is working on a hackathon project with Sam, which they are '
'entering into a hackathon. They are trying to add more complex '
'memory structures to Langchain, including a key-value store for '
'entities mentioned so far in the conversation, and seem to be '
'working hard on this project with a great idea for how the '
'key-value store can help.',
'Key-Value Store': 'A key-value store is being added to the project to store '
'entities mentioned in the conversation.',
'Langchain': 'Langchain is a project that is trying to add more complex '
'memory structures, including a key-value store for entities '
'mentioned so far in the conversation.',
'Sam': 'Sam is working on a hackathon project with Deven, trying to add more '
'complex memory structures to Langchain, including a key-value store '
'for entities mentioned so far in the conversation. They seem to have '
'a great idea for how the key-value store can help, and Sam is also '
'the founder of a successful company called Daimon.'}
```
</CodeOutputBlock>
```python
conversation.predict(input="What do you know about Sam?")
```
<CodeOutputBlock lang="python">
```
> Entering new ConversationChain chain...
Prompt after formatting:
You are an assistant to a human, powered by a large language model trained by OpenAI.
You are designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, you are able to generate human-like text based on the input you receive, allowing you to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.
You are constantly learning and improving, and your capabilities are constantly evolving. You are able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. You have access to some personalized information provided by the human in the Context section below. Additionally, you are able to generate your own text based on the input you receive, allowing you to engage in discussions and provide explanations and descriptions on a wide range of topics.
Overall, you are a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether the human needs help with a specific question or just wants to have a conversation about a particular topic, you are here to assist.
Context:
{'Deven': 'Deven is working on a hackathon project with Sam, which they are entering into a hackathon. They are trying to add more complex memory structures to Langchain, including a key-value store for entities mentioned so far in the conversation, and seem to be working hard on this project with a great idea for how the key-value store can help.', 'Sam': 'Sam is working on a hackathon project with Deven, trying to add more complex memory structures to Langchain, including a key-value store for entities mentioned so far in the conversation. They seem to have a great idea for how the key-value store can help, and Sam is also the founder of a successful company called Daimon.', 'Langchain': 'Langchain is a project that is trying to add more complex memory structures, including a key-value store for entities mentioned so far in the conversation.', 'Daimon': 'Daimon is a company founded by Sam, a successful entrepreneur, who is working on a hackathon project with Deven to add more complex memory structures to Langchain.'}
Current conversation:
Human: What do you know about Deven & Sam?
AI: Deven and Sam are working on a hackathon project together, trying to add more complex memory structures to Langchain, including a key-value store for entities mentioned so far in the conversation. They seem to be working hard on this project and have a great idea for how the key-value store can help.
Human: Sam is the founder of a company called Daimon.
AI:
That's impressive! It sounds like Sam is a very successful entrepreneur. What kind of company is Daimon?
Human: Sam is the founder of a company called Daimon.
AI: That's impressive! It sounds like Sam is a very successful entrepreneur. What kind of company is Daimon?
Last line:
Human: What do you know about Sam?
You:
> Finished chain.
' Sam is the founder of a successful company called Daimon. He is also working on a hackathon project with Deven to add more complex memory structures to Langchain. They seem to have a great idea for how the key-value store can help.'
```
</CodeOutputBlock>

View file

@ -1,193 +0,0 @@
```python
from langchain.memory import ConversationSummaryMemory, ChatMessageHistory
from langchain.llms import OpenAI
```
```python
memory = ConversationSummaryMemory(llm=OpenAI(temperature=0))
memory.save_context({"input": "hi"}, {"output": "whats up"})
```
```python
memory.load_memory_variables({})
```
<CodeOutputBlock lang="python">
```
{'history': '\nThe human greets the AI, to which the AI responds.'}
```
</CodeOutputBlock>
We can also get the history as a list of messages (this is useful if you are using this with a chat model).
```python
memory = ConversationSummaryMemory(llm=OpenAI(temperature=0), return_messages=True)
memory.save_context({"input": "hi"}, {"output": "whats up"})
```
```python
memory.load_memory_variables({})
```
<CodeOutputBlock lang="python">
```
{'history': [SystemMessage(content='\nThe human greets the AI, to which the AI responds.', additional_kwargs={})]}
```
</CodeOutputBlock>
We can also utilize the `predict_new_summary` method directly.
```python
messages = memory.chat_memory.messages
previous_summary = ""
memory.predict_new_summary(messages, previous_summary)
```
<CodeOutputBlock lang="python">
```
'\nThe human greets the AI, to which the AI responds.'
```
</CodeOutputBlock>
## Initializing with messages
If you have messages outside this class, you can easily initialize the class with ChatMessageHistory. During loading, a summary will be calculated.
```python
history = ChatMessageHistory()
history.add_user_message("hi")
history.add_ai_message("hi there!")
```
```python
memory = ConversationSummaryMemory.from_messages(llm=OpenAI(temperature=0), chat_memory=history, return_messages=True)
```
```python
memory.buffer
```
<CodeOutputBlock lang="python">
```
'\nThe human greets the AI, to which the AI responds with a friendly greeting.'
```
</CodeOutputBlock>
## Using in a chain
Let's walk through an example of using this in a chain, again setting `verbose=True` so we can see the prompt.
```python
from langchain.llms import OpenAI
from langchain.chains import ConversationChain
llm = OpenAI(temperature=0)
conversation_with_summary = ConversationChain(
llm=llm,
memory=ConversationSummaryMemory(llm=OpenAI()),
verbose=True
)
conversation_with_summary.predict(input="Hi, what's up?")
```
<CodeOutputBlock lang="python">
```
> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.
Current conversation:
Human: Hi, what's up?
AI:
> Finished chain.
" Hi there! I'm doing great. I'm currently helping a customer with a technical issue. How about you?"
```
</CodeOutputBlock>
```python
conversation_with_summary.predict(input="Tell me more about it!")
```
<CodeOutputBlock lang="python">
```
> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.
Current conversation:
The human greeted the AI and asked how it was doing. The AI replied that it was doing great and was currently helping a customer with a technical issue.
Human: Tell me more about it!
AI:
> Finished chain.
" Sure! The customer is having trouble with their computer not connecting to the internet. I'm helping them troubleshoot the issue and figure out what the problem is. So far, we've tried resetting the router and checking the network settings, but the issue still persists. We're currently looking into other possible solutions."
```
</CodeOutputBlock>
```python
conversation_with_summary.predict(input="Very cool -- what is the scope of the project?")
```
<CodeOutputBlock lang="python">
```
> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.
Current conversation:
The human greeted the AI and asked how it was doing. The AI replied that it was doing great and was currently helping a customer with a technical issue where their computer was not connecting to the internet. The AI was troubleshooting the issue and had already tried resetting the router and checking the network settings, but the issue still persisted and they were looking into other possible solutions.
Human: Very cool -- what is the scope of the project?
AI:
> Finished chain.
" The scope of the project is to troubleshoot the customer's computer issue and find a solution that will allow them to connect to the internet. We are currently exploring different possibilities and have already tried resetting the router and checking the network settings, but the issue still persists."
```
</CodeOutputBlock>

View file

@ -1,229 +0,0 @@
```python
from datetime import datetime
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.memory import VectorStoreRetrieverMemory
from langchain.chains import ConversationChain
from langchain.prompts import PromptTemplate
```
### Initialize your VectorStore
Depending on the store you choose, this step may look different. Consult the relevant VectorStore documentation for more details.
```python
import faiss
from langchain.docstore import InMemoryDocstore
from langchain.vectorstores import FAISS
embedding_size = 1536 # Dimensions of the OpenAIEmbeddings
index = faiss.IndexFlatL2(embedding_size)
embedding_fn = OpenAIEmbeddings().embed_query
vectorstore = FAISS(embedding_fn, index, InMemoryDocstore({}), {})
```
### Create your the VectorStoreRetrieverMemory
The memory object is instantiated from any VectorStoreRetriever.
```python
# In actual usage, you would set `k` to be a higher value, but we use k=1 to show that
# the vector lookup still returns the semantically relevant information
retriever = vectorstore.as_retriever(search_kwargs=dict(k=1))
memory = VectorStoreRetrieverMemory(retriever=retriever)
# When added to an agent, the memory object can save pertinent information from conversations or used tools
memory.save_context({"input": "My favorite food is pizza"}, {"output": "that's good to know"})
memory.save_context({"input": "My favorite sport is soccer"}, {"output": "..."})
memory.save_context({"input": "I don't the Celtics"}, {"output": "ok"}) #
```
```python
# Notice the first result returned is the memory pertaining to tax help, which the language model deems more semantically relevant
# to a 1099 than the other documents, despite them both containing numbers.
print(memory.load_memory_variables({"prompt": "what sport should i watch?"})["history"])
```
<CodeOutputBlock lang="python">
```
input: My favorite sport is soccer
output: ...
```
</CodeOutputBlock>
## Using in a chain
Let's walk through an example, again setting `verbose=True` so we can see the prompt.
```python
llm = OpenAI(temperature=0) # Can be any valid LLM
_DEFAULT_TEMPLATE = """The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.
Relevant pieces of previous conversation:
{history}
(You do not need to use these pieces of information if not relevant)
Current conversation:
Human: {input}
AI:"""
PROMPT = PromptTemplate(
input_variables=["history", "input"], template=_DEFAULT_TEMPLATE
)
conversation_with_summary = ConversationChain(
llm=llm,
prompt=PROMPT,
# We set a very low max_token_limit for the purposes of testing.
memory=memory,
verbose=True
)
conversation_with_summary.predict(input="Hi, my name is Perry, what's up?")
```
<CodeOutputBlock lang="python">
```
> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.
Relevant pieces of previous conversation:
input: My favorite food is pizza
output: that's good to know
(You do not need to use these pieces of information if not relevant)
Current conversation:
Human: Hi, my name is Perry, what's up?
AI:
> Finished chain.
" Hi Perry, I'm doing well. How about you?"
```
</CodeOutputBlock>
```python
# Here, the basketball related content is surfaced
conversation_with_summary.predict(input="what's my favorite sport?")
```
<CodeOutputBlock lang="python">
```
> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.
Relevant pieces of previous conversation:
input: My favorite sport is soccer
output: ...
(You do not need to use these pieces of information if not relevant)
Current conversation:
Human: what's my favorite sport?
AI:
> Finished chain.
' You told me earlier that your favorite sport is soccer.'
```
</CodeOutputBlock>
```python
# Even though the language model is stateless, since relevant memory is fetched, it can "reason" about the time.
# Timestamping memories and data is useful in general to let the agent determine temporal relevance
conversation_with_summary.predict(input="Whats my favorite food")
```
<CodeOutputBlock lang="python">
```
> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.
Relevant pieces of previous conversation:
input: My favorite food is pizza
output: that's good to know
(You do not need to use these pieces of information if not relevant)
Current conversation:
Human: Whats my favorite food
AI:
> Finished chain.
' You said your favorite food is pizza.'
```
</CodeOutputBlock>
```python
# The memories from the conversation are automatically stored,
# since this query best matches the introduction chat above,
# the agent is able to 'remember' the user's name.
conversation_with_summary.predict(input="What's my name?")
```
<CodeOutputBlock lang="python">
```
> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.
Relevant pieces of previous conversation:
input: Hi, my name is Perry, what's up?
response: Hi Perry, I'm doing well. How about you?
(You do not need to use these pieces of information if not relevant)
Current conversation:
Human: What's my name?
AI:
> Finished chain.
' Your name is Perry.'
```
</CodeOutputBlock>

View file

@ -1,120 +0,0 @@
### Setup
To start we'll need to install the OpenAI Python package:
```bash
pip install openai
```
Accessing the API requires an API key, which you can get by creating an account and heading [here](https://platform.openai.com/account/api-keys). Once we have a key we'll want to set it as an environment variable by running:
```bash
export OPENAI_API_KEY="..."
```
If you'd prefer not to set an environment variable you can pass the key in directly via the `openai_api_key` named parameter when initiating the OpenAI LLM class:
```python
from langchain.chat_models import ChatOpenAI
chat = ChatOpenAI(openai_api_key="...")
```
otherwise you can initialize without any params:
```python
from langchain.chat_models import ChatOpenAI
chat = ChatOpenAI()
```
### Messages
The chat model interface is based around messages rather than raw text.
The types of messages currently supported in LangChain are `AIMessage`, `HumanMessage`, `SystemMessage`, and `ChatMessage` -- `ChatMessage` takes in an arbitrary role parameter. Most of the time, you'll just be dealing with `HumanMessage`, `AIMessage`, and `SystemMessage`
### `__call__`
#### Messages in -> message out
You can get chat completions by passing one or more messages to the chat model. The response will be a message.
```python
from langchain.schema import (
AIMessage,
HumanMessage,
SystemMessage
)
chat([HumanMessage(content="Translate this sentence from English to French: I love programming.")])
```
<CodeOutputBlock lang="python">
```
AIMessage(content="J'aime programmer.", additional_kwargs={})
```
</CodeOutputBlock>
OpenAI's chat model supports multiple messages as input. See [here](https://platform.openai.com/docs/guides/chat/chat-vs-completions) for more information. Here is an example of sending a system and user message to the chat model:
```python
messages = [
SystemMessage(content="You are a helpful assistant that translates English to French."),
HumanMessage(content="I love programming.")
]
chat(messages)
```
<CodeOutputBlock lang="python">
```
AIMessage(content="J'aime programmer.", additional_kwargs={})
```
</CodeOutputBlock>
### `generate`
#### Batch calls, richer outputs
You can go one step further and generate completions for multiple sets of messages using `generate`. This returns an `LLMResult` with an additional `message` parameter.
```python
batch_messages = [
[
SystemMessage(content="You are a helpful assistant that translates English to French."),
HumanMessage(content="I love programming.")
],
[
SystemMessage(content="You are a helpful assistant that translates English to French."),
HumanMessage(content="I love artificial intelligence.")
],
]
result = chat.generate(batch_messages)
result
```
<CodeOutputBlock lang="python">
```
LLMResult(generations=[[ChatGeneration(text="J'aime programmer.", generation_info=None, message=AIMessage(content="J'aime programmer.", additional_kwargs={}))], [ChatGeneration(text="J'aime l'intelligence artificielle.", generation_info=None, message=AIMessage(content="J'aime l'intelligence artificielle.", additional_kwargs={}))]], llm_output={'token_usage': {'prompt_tokens': 57, 'completion_tokens': 20, 'total_tokens': 77}})
```
</CodeOutputBlock>
You can recover things like token usage from this LLMResult
```python
result.llm_output
```
<CodeOutputBlock lang="python">
```
{'token_usage': {'prompt_tokens': 57,
'completion_tokens': 20,
'total_tokens': 77}}
```
</CodeOutputBlock>

View file

@ -1,97 +0,0 @@
```python
import langchain
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI()
```
## In Memory Cache
```python
from langchain.cache import InMemoryCache
langchain.llm_cache = InMemoryCache()
# The first time, it is not yet in cache, so it should take longer
llm.predict("Tell me a joke")
```
<CodeOutputBlock lang="python">
```
CPU times: user 35.9 ms, sys: 28.6 ms, total: 64.6 ms
Wall time: 4.83 s
"\n\nWhy couldn't the bicycle stand up by itself? It was...two tired!"
```
</CodeOutputBlock>
```python
# The second time it is, so it goes faster
llm.predict("Tell me a joke")
```
<CodeOutputBlock lang="python">
```
CPU times: user 238 µs, sys: 143 µs, total: 381 µs
Wall time: 1.76 ms
'\n\nWhy did the chicken cross the road?\n\nTo get to the other side.'
```
</CodeOutputBlock>
## SQLite Cache
```bash
rm .langchain.db
```
```python
# We can do the same thing with a SQLite cache
from langchain.cache import SQLiteCache
langchain.llm_cache = SQLiteCache(database_path=".langchain.db")
```
```python
# The first time, it is not yet in cache, so it should take longer
llm.predict("Tell me a joke")
```
<CodeOutputBlock lang="python">
```
CPU times: user 17 ms, sys: 9.76 ms, total: 26.7 ms
Wall time: 825 ms
'\n\nWhy did the chicken cross the road?\n\nTo get to the other side.'
```
</CodeOutputBlock>
```python
# The second time it is, so it goes faster
llm.predict("Tell me a joke")
```
<CodeOutputBlock lang="python">
```
CPU times: user 2.46 ms, sys: 1.23 ms, total: 3.7 ms
Wall time: 2.67 ms
'\n\nWhy did the chicken cross the road?\n\nTo get to the other side.'
```
</CodeOutputBlock>

View file

@ -1,16 +0,0 @@
```python
chain = LLMChain(llm=chat, prompt=chat_prompt)
```
```python
chain.run(input_language="English", output_language="French", text="I love programming.")
```
<CodeOutputBlock lang="python">
```
"J'adore la programmation."
```
</CodeOutputBlock>

View file

@ -1,47 +0,0 @@
You can make use of templating by using a `MessagePromptTemplate`. You can build a `ChatPromptTemplate` from one or more `MessagePromptTemplates`. You can use `ChatPromptTemplate`'s `format_prompt` -- this returns a `PromptValue`, which you can convert to a string or Message object, depending on whether you want to use the formatted value as input to an llm or chat model.
For convenience, there is a `from_template` method exposed on the template. If you were to use this template, this is what it would look like:
```python
from langchain import PromptTemplate
from langchain.prompts.chat import (
ChatPromptTemplate,
SystemMessagePromptTemplate,
AIMessagePromptTemplate,
HumanMessagePromptTemplate,
)
template="You are a helpful assistant that translates {input_language} to {output_language}."
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
human_template="{text}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
```
```python
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
# get a chat completion from the formatted messages
chat(chat_prompt.format_prompt(input_language="English", output_language="French", text="I love programming.").to_messages())
```
<CodeOutputBlock lang="python">
```
AIMessage(content="J'adore la programmation.", additional_kwargs={})
```
</CodeOutputBlock>
If you wanted to construct the MessagePromptTemplate more directly, you could create a PromptTemplate outside and then pass it in, eg:
```python
prompt=PromptTemplate(
template="You are a helpful assistant that translates {input_language} to {output_language}.",
input_variables=["input_language", "output_language"],
)
system_message_prompt = SystemMessagePromptTemplate(prompt=prompt)
```

View file

@ -1,59 +0,0 @@
```python
from langchain.chat_models import ChatOpenAI
from langchain.schema import (
HumanMessage,
)
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
chat = ChatOpenAI(streaming=True, callbacks=[StreamingStdOutCallbackHandler()], temperature=0)
resp = chat([HumanMessage(content="Write me a song about sparkling water.")])
```
<CodeOutputBlock lang="python">
```
Verse 1:
Bubbles rising to the top
A refreshing drink that never stops
Clear and crisp, it's pure delight
A taste that's sure to excite
Chorus:
Sparkling water, oh so fine
A drink that's always on my mind
With every sip, I feel alive
Sparkling water, you're my vibe
Verse 2:
No sugar, no calories, just pure bliss
A drink that's hard to resist
It's the perfect way to quench my thirst
A drink that always comes first
Chorus:
Sparkling water, oh so fine
A drink that's always on my mind
With every sip, I feel alive
Sparkling water, you're my vibe
Bridge:
From the mountains to the sea
Sparkling water, you're the key
To a healthy life, a happy soul
A drink that makes me feel whole
Chorus:
Sparkling water, oh so fine
A drink that's always on my mind
With every sip, I feel alive
Sparkling water, you're my vibe
Outro:
Sparkling water, you're the one
A drink that's always so much fun
I'll never let you go, my friend
Sparkling
```
</CodeOutputBlock>

View file

@ -1,108 +0,0 @@
### Setup
To start we'll need to install the OpenAI Python package:
```bash
pip install openai
```
Accessing the API requires an API key, which you can get by creating an account and heading [here](https://platform.openai.com/account/api-keys). Once we have a key we'll want to set it as an environment variable by running:
```bash
export OPENAI_API_KEY="..."
```
If you'd prefer not to set an environment variable you can pass the key in directly via the `openai_api_key` named parameter when initiating the OpenAI LLM class:
```python
from langchain.llms import OpenAI
llm = OpenAI(openai_api_key="...")
```
otherwise you can initialize without any params:
```python
from langchain.llms import OpenAI
llm = OpenAI()
```
### `__call__`: string in -> string out
The simplest way to use an LLM is a callable: pass in a string, get a string completion.
```python
llm("Tell me a joke")
```
<CodeOutputBlock lang="python">
```
'Why did the chicken cross the road?\n\nTo get to the other side.'
```
</CodeOutputBlock>
### `generate`: batch calls, richer outputs
`generate` lets you can call the model with a list of strings, getting back a more complete response than just the text. This complete response can includes things like multiple top responses and other LLM provider-specific information:
```python
llm_result = llm.generate(["Tell me a joke", "Tell me a poem"]*15)
```
```python
len(llm_result.generations)
```
<CodeOutputBlock lang="python">
```
30
```
</CodeOutputBlock>
```python
llm_result.generations[0]
```
<CodeOutputBlock lang="python">
```
[Generation(text='\n\nWhy did the chicken cross the road?\n\nTo get to the other side!'),
Generation(text='\n\nWhy did the chicken cross the road?\n\nTo get to the other side.')]
```
</CodeOutputBlock>
```python
llm_result.generations[-1]
```
<CodeOutputBlock lang="python">
```
[Generation(text="\n\nWhat if love neverspeech\n\nWhat if love never ended\n\nWhat if love was only a feeling\n\nI'll never know this love\n\nIt's not a feeling\n\nBut it's what we have for each other\n\nWe just know that love is something strong\n\nAnd we can't help but be happy\n\nWe just feel what love is for us\n\nAnd we love each other with all our heart\n\nWe just don't know how\n\nHow it will go\n\nBut we know that love is something strong\n\nAnd we'll always have each other\n\nIn our lives."),
Generation(text='\n\nOnce upon a time\n\nThere was a love so pure and true\n\nIt lasted for centuries\n\nAnd never became stale or dry\n\nIt was moving and alive\n\nAnd the heart of the love-ick\n\nIs still beating strong and true.')]
```
</CodeOutputBlock>
You can also access provider specific information that is returned. This information is NOT standardized across providers.
```python
llm_result.llm_output
```
<CodeOutputBlock lang="python">
```
{'token_usage': {'completion_tokens': 3903,
'total_tokens': 4023,
'prompt_tokens': 120}}
```
</CodeOutputBlock>

View file

@ -1,177 +0,0 @@
```python
import langchain
from langchain.llms import OpenAI
# To make the caching really obvious, lets use a slower model.
llm = OpenAI(model_name="text-davinci-002", n=2, best_of=2)
```
## In Memory Cache
```python
from langchain.cache import InMemoryCache
langchain.llm_cache = InMemoryCache()
# The first time, it is not yet in cache, so it should take longer
llm.predict("Tell me a joke")
```
<CodeOutputBlock lang="python">
```
CPU times: user 35.9 ms, sys: 28.6 ms, total: 64.6 ms
Wall time: 4.83 s
"\n\nWhy couldn't the bicycle stand up by itself? It was...two tired!"
```
</CodeOutputBlock>
```python
# The second time it is, so it goes faster
llm.predict("Tell me a joke")
```
<CodeOutputBlock lang="python">
```
CPU times: user 238 µs, sys: 143 µs, total: 381 µs
Wall time: 1.76 ms
'\n\nWhy did the chicken cross the road?\n\nTo get to the other side.'
```
</CodeOutputBlock>
## SQLite Cache
```bash
rm .langchain.db
```
```python
# We can do the same thing with a SQLite cache
from langchain.cache import SQLiteCache
langchain.llm_cache = SQLiteCache(database_path=".langchain.db")
```
```python
# The first time, it is not yet in cache, so it should take longer
llm.predict("Tell me a joke")
```
<CodeOutputBlock lang="python">
```
CPU times: user 17 ms, sys: 9.76 ms, total: 26.7 ms
Wall time: 825 ms
'\n\nWhy did the chicken cross the road?\n\nTo get to the other side.'
```
</CodeOutputBlock>
```python
# The second time it is, so it goes faster
llm.predict("Tell me a joke")
```
<CodeOutputBlock lang="python">
```
CPU times: user 2.46 ms, sys: 1.23 ms, total: 3.7 ms
Wall time: 2.67 ms
'\n\nWhy did the chicken cross the road?\n\nTo get to the other side.'
```
</CodeOutputBlock>
## Optional Caching in Chains
You can also turn off caching for particular nodes in chains. Note that because of certain interfaces, its often easier to construct the chain first, and then edit the LLM afterwards.
As an example, we will load a summarizer map-reduce chain. We will cache results for the map-step, but then not freeze it for the combine step.
```python
llm = OpenAI(model_name="text-davinci-002")
no_cache_llm = OpenAI(model_name="text-davinci-002", cache=False)
```
```python
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.mapreduce import MapReduceChain
text_splitter = CharacterTextSplitter()
```
```python
with open('../../../state_of_the_union.txt') as f:
state_of_the_union = f.read()
texts = text_splitter.split_text(state_of_the_union)
```
```python
from langchain.docstore.document import Document
docs = [Document(page_content=t) for t in texts[:3]]
from langchain.chains.summarize import load_summarize_chain
```
```python
chain = load_summarize_chain(llm, chain_type="map_reduce", reduce_llm=no_cache_llm)
```
```python
chain.run(docs)
```
<CodeOutputBlock lang="python">
```
CPU times: user 452 ms, sys: 60.3 ms, total: 512 ms
Wall time: 5.09 s
'\n\nPresident Biden is discussing the American Rescue Plan and the Bipartisan Infrastructure Law, which will create jobs and help Americans. He also talks about his vision for America, which includes investing in education and infrastructure. In response to Russian aggression in Ukraine, the United States is joining with European allies to impose sanctions and isolate Russia. American forces are being mobilized to protect NATO countries in the event that Putin decides to keep moving west. The Ukrainians are bravely fighting back, but the next few weeks will be hard for them. Putin will pay a high price for his actions in the long run. Americans should not be alarmed, as the United States is taking action to protect its interests and allies.'
```
</CodeOutputBlock>
When we run it again, we see that it runs substantially faster but the final answer is different. This is due to caching at the map steps, but not at the reduce step.
```python
chain.run(docs)
```
<CodeOutputBlock lang="python">
```
CPU times: user 11.5 ms, sys: 4.33 ms, total: 15.8 ms
Wall time: 1.04 s
'\n\nPresident Biden is discussing the American Rescue Plan and the Bipartisan Infrastructure Law, which will create jobs and help Americans. He also talks about his vision for America, which includes investing in education and infrastructure.'
```
</CodeOutputBlock>
```bash
rm .langchain.db sqlite.db
```

View file

@ -1,70 +0,0 @@
Currently, we support streaming for a broad range of LLM implementations, including but not limited to `OpenAI`, `ChatOpenAI`, `ChatAnthropic`, `Hugging Face Text Generation Inference`, and `Replicate`. This feature has been expanded to accommodate most of the models. To utilize streaming, use a [`CallbackHandler`](https://github.com/hwchase17/langchain/blob/master/langchain/callbacks/base.py) that implements `on_llm_new_token`. In this example, we are using `StreamingStdOutCallbackHandler`.
```python
from langchain.llms import OpenAI
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
llm = OpenAI(streaming=True, callbacks=[StreamingStdOutCallbackHandler()], temperature=0)
resp = llm("Write me a song about sparkling water.")
```
<CodeOutputBlock lang="python">
```
Verse 1
I'm sippin' on sparkling water,
It's so refreshing and light,
It's the perfect way to quench my thirst
On a hot summer night.
Chorus
Sparkling water, sparkling water,
It's the best way to stay hydrated,
It's so crisp and so clean,
It's the perfect way to stay refreshed.
Verse 2
I'm sippin' on sparkling water,
It's so bubbly and bright,
It's the perfect way to cool me down
On a hot summer night.
Chorus
Sparkling water, sparkling water,
It's the best way to stay hydrated,
It's so crisp and so clean,
It's the perfect way to stay refreshed.
Verse 3
I'm sippin' on sparkling water,
It's so light and so clear,
It's the perfect way to keep me cool
On a hot summer night.
Chorus
Sparkling water, sparkling water,
It's the best way to stay hydrated,
It's so crisp and so clean,
It's the perfect way to stay refreshed.
```
</CodeOutputBlock>
We still have access to the end `LLMResult` if using `generate`. However, `token_usage` is not currently supported for streaming.
```python
llm.generate(["Tell me a joke."])
```
<CodeOutputBlock lang="python">
```
Q: What did the fish say when it hit the wall?
A: Dam!
LLMResult(generations=[[Generation(text='\n\nQ: What did the fish say when it hit the wall?\nA: Dam!', generation_info={'finish_reason': 'stop', 'logprobs': None})]], llm_output={'token_usage': {}, 'model_name': 'text-davinci-003'})
```
</CodeOutputBlock>

View file

@ -1,46 +0,0 @@
```python
from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
output_parser = CommaSeparatedListOutputParser()
```
```python
format_instructions = output_parser.get_format_instructions()
prompt = PromptTemplate(
template="List five {subject}.\n{format_instructions}",
input_variables=["subject"],
partial_variables={"format_instructions": format_instructions}
)
```
```python
model = OpenAI(temperature=0)
```
```python
_input = prompt.format(subject="ice cream flavors")
output = model(_input)
```
```python
output_parser.parse(output)
```
<CodeOutputBlock lang="python">
```
['Vanilla',
'Chocolate',
'Strawberry',
'Mint Chocolate Chip',
'Cookies and Cream']
```
</CodeOutputBlock>

View file

@ -1,76 +0,0 @@
---
sidebar_position: 2
---
Below we go over the main type of output parser, the `PydanticOutputParser`.
```python
from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field, validator
from typing import List
```
```python
model_name = 'text-davinci-003'
temperature = 0.0
model = OpenAI(model_name=model_name, temperature=temperature)
```
```python
# Define your desired data structure.
class Joke(BaseModel):
setup: str = Field(description="question to set up a joke")
punchline: str = Field(description="answer to resolve the joke")
# You can add custom validation logic easily with Pydantic.
@validator('setup')
def question_ends_with_question_mark(cls, field):
if field[-1] != '?':
raise ValueError("Badly formed question!")
return field
```
```python
# Set up a parser + inject instructions into the prompt template.
parser = PydanticOutputParser(pydantic_object=Joke)
```
```python
prompt = PromptTemplate(
template="Answer the user query.\n{format_instructions}\n{query}\n",
input_variables=["query"],
partial_variables={"format_instructions": parser.get_format_instructions()}
)
```
```python
# And a query intended to prompt a language model to populate the data structure.
joke_query = "Tell me a joke."
_input = prompt.format_prompt(query=joke_query)
```
```python
output = model(_input.to_string())
```
```python
parser.parse(output)
```
<CodeOutputBlock lang="python">
```
Joke(setup='Why did the chicken cross the road?', punchline='To get to the other side!')
```
</CodeOutputBlock>

View file

@ -1,112 +0,0 @@
For this example, we'll use the above Pydantic output parser. Here's what happens if we pass it a result that does not comply with the schema:
```python
from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field, validator
from typing import List
```
```python
class Actor(BaseModel):
name: str = Field(description="name of an actor")
film_names: List[str] = Field(description="list of names of films they starred in")
actor_query = "Generate the filmography for a random actor."
parser = PydanticOutputParser(pydantic_object=Actor)
```
```python
misformatted = "{'name': 'Tom Hanks', 'film_names': ['Forrest Gump']}"
```
```python
parser.parse(misformatted)
```
<CodeOutputBlock lang="python">
```
---------------------------------------------------------------------------
JSONDecodeError Traceback (most recent call last)
File ~/workplace/langchain/langchain/output_parsers/pydantic.py:23, in PydanticOutputParser.parse(self, text)
22 json_str = match.group()
---> 23 json_object = json.loads(json_str)
24 return self.pydantic_object.parse_obj(json_object)
File ~/.pyenv/versions/3.9.1/lib/python3.9/json/__init__.py:346, in loads(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
343 if (cls is None and object_hook is None and
344 parse_int is None and parse_float is None and
345 parse_constant is None and object_pairs_hook is None and not kw):
--> 346 return _default_decoder.decode(s)
347 if cls is None:
File ~/.pyenv/versions/3.9.1/lib/python3.9/json/decoder.py:337, in JSONDecoder.decode(self, s, _w)
333 """Return the Python representation of ``s`` (a ``str`` instance
334 containing a JSON document).
335
336 """
--> 337 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
338 end = _w(s, end).end()
File ~/.pyenv/versions/3.9.1/lib/python3.9/json/decoder.py:353, in JSONDecoder.raw_decode(self, s, idx)
352 try:
--> 353 obj, end = self.scan_once(s, idx)
354 except StopIteration as err:
JSONDecodeError: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)
During handling of the above exception, another exception occurred:
OutputParserException Traceback (most recent call last)
Cell In[6], line 1
----> 1 parser.parse(misformatted)
File ~/workplace/langchain/langchain/output_parsers/pydantic.py:29, in PydanticOutputParser.parse(self, text)
27 name = self.pydantic_object.__name__
28 msg = f"Failed to parse {name} from completion {text}. Got: {e}"
---> 29 raise OutputParserException(msg)
OutputParserException: Failed to parse Actor from completion {'name': 'Tom Hanks', 'film_names': ['Forrest Gump']}. Got: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)
```
</CodeOutputBlock>
Now we can construct and use a `OutputFixingParser`. This output parser takes as an argument another output parser but also an LLM with which to try to correct any formatting mistakes.
```python
from langchain.output_parsers import OutputFixingParser
new_parser = OutputFixingParser.from_llm(parser=parser, llm=ChatOpenAI())
```
```python
new_parser.parse(misformatted)
```
<CodeOutputBlock lang="python">
```
Actor(name='Tom Hanks', film_names=['Forrest Gump'])
```
</CodeOutputBlock>

View file

@ -1,93 +0,0 @@
```python
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
```
Here we define the response schema we want to receive.
```python
response_schemas = [
ResponseSchema(name="answer", description="answer to the user's question"),
ResponseSchema(name="source", description="source used to answer the user's question, should be a website.")
]
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
```
We now get a string that contains instructions for how the response should be formatted, and we then insert that into our prompt.
```python
format_instructions = output_parser.get_format_instructions()
prompt = PromptTemplate(
template="answer the users question as best as possible.\n{format_instructions}\n{question}",
input_variables=["question"],
partial_variables={"format_instructions": format_instructions}
)
```
We can now use this to format a prompt to send to the language model, and then parse the returned result.
```python
model = OpenAI(temperature=0)
```
```python
_input = prompt.format_prompt(question="what's the capital of france?")
output = model(_input.to_string())
```
```python
output_parser.parse(output)
```
<CodeOutputBlock lang="python">
```
{'answer': 'Paris',
'source': 'https://www.worldatlas.com/articles/what-is-the-capital-of-france.html'}
```
</CodeOutputBlock>
And here's an example of using this in a chat model
```python
chat_model = ChatOpenAI(temperature=0)
```
```python
prompt = ChatPromptTemplate(
messages=[
HumanMessagePromptTemplate.from_template("answer the users question as best as possible.\n{format_instructions}\n{question}")
],
input_variables=["question"],
partial_variables={"format_instructions": format_instructions}
)
```
```python
_input = prompt.format_prompt(question="what's the capital of france?")
output = chat_model(_input.to_messages())
```
```python
output_parser.parse(output.content)
```
<CodeOutputBlock lang="python">
```
{'answer': 'Paris', 'source': 'https://en.wikipedia.org/wiki/Paris'}
```
</CodeOutputBlock>

View file

@ -1,10 +0,0 @@
```python
class BaseExampleSelector(ABC):
"""Interface for selecting examples to include in prompts."""
@abstractmethod
def select_examples(self, input_variables: Dict[str, str]) -> List[dict]:
"""Select which examples to use based on the inputs."""
```
The only method it needs to expose is a ``select_examples`` method. This takes in the input variables and then returns a list of examples. It is up to each specific implementation as to how those examples are selected. Let's take a look at some below.

View file

@ -1,130 +0,0 @@
```python
from langchain.prompts import PromptTemplate
from langchain.prompts import FewShotPromptTemplate
from langchain.prompts.example_selector import LengthBasedExampleSelector
# These are a lot of examples of a pretend task of creating antonyms.
examples = [
{"input": "happy", "output": "sad"},
{"input": "tall", "output": "short"},
{"input": "energetic", "output": "lethargic"},
{"input": "sunny", "output": "gloomy"},
{"input": "windy", "output": "calm"},
example_prompt = PromptTemplate(
input_variables=["input", "output"],
template="Input: {input}\nOutput: {output}",
)
example_selector = LengthBasedExampleSelector(
# These are the examples it has available to choose from.
examples=examples,
# This is the PromptTemplate being used to format the examples.
example_prompt=example_prompt,
# This is the maximum length that the formatted examples should be.
# Length is measured by the get_text_length function below.
max_length=25,
# This is the function used to get the length of a string, which is used
# to determine which examples to include. It is commented out because
# it is provided as a default value if none is specified.
# get_text_length: Callable[[str], int] = lambda x: len(re.split("\n| ", x))
)
dynamic_prompt = FewShotPromptTemplate(
# We provide an ExampleSelector instead of examples.
example_selector=example_selector,
example_prompt=example_prompt,
prefix="Give the antonym of every input",
suffix="Input: {adjective}\nOutput:",
input_variables=["adjective"],
)
```
```python
# An example with small input, so it selects all examples.
print(dynamic_prompt.format(adjective="big"))
```
<CodeOutputBlock lang="python">
```
Give the antonym of every input
Input: happy
Output: sad
Input: tall
Output: short
Input: energetic
Output: lethargic
Input: sunny
Output: gloomy
Input: windy
Output: calm
Input: big
Output:
```
</CodeOutputBlock>
```python
# An example with long input, so it selects only one example.
long_string = "big and huge and massive and large and gigantic and tall and much much much much much bigger than everything else"
print(dynamic_prompt.format(adjective=long_string))
```
<CodeOutputBlock lang="python">
```
Give the antonym of every input
Input: happy
Output: sad
Input: big and huge and massive and large and gigantic and tall and much much much much much bigger than everything else
Output:
```
</CodeOutputBlock>
```python
# You can add an example to an example selector as well.
new_example = {"input": "big", "output": "small"}
dynamic_prompt.example_selector.add_example(new_example)
print(dynamic_prompt.format(adjective="enthusiastic"))
```
<CodeOutputBlock lang="python">
```
Give the antonym of every input
Input: happy
Output: sad
Input: tall
Output: short
Input: energetic
Output: lethargic
Input: sunny
Output: gloomy
Input: windy
Output: calm
Input: big
Output: small
Input: enthusiastic
Output:
```
</CodeOutputBlock>

View file

@ -1,112 +0,0 @@
```python
from langchain.prompts.example_selector import SemanticSimilarityExampleSelector
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import FewShotPromptTemplate, PromptTemplate
example_prompt = PromptTemplate(
input_variables=["input", "output"],
template="Input: {input}\nOutput: {output}",
)
# These are a lot of examples of a pretend task of creating antonyms.
examples = [
{"input": "happy", "output": "sad"},
{"input": "tall", "output": "short"},
{"input": "energetic", "output": "lethargic"},
{"input": "sunny", "output": "gloomy"},
{"input": "windy", "output": "calm"},
]
```
```python
example_selector = SemanticSimilarityExampleSelector.from_examples(
# This is the list of examples available to select from.
examples,
# This is the embedding class used to produce embeddings which are used to measure semantic similarity.
OpenAIEmbeddings(),
# This is the VectorStore class that is used to store the embeddings and do a similarity search over.
Chroma,
# This is the number of examples to produce.
k=1
)
similar_prompt = FewShotPromptTemplate(
# We provide an ExampleSelector instead of examples.
example_selector=example_selector,
example_prompt=example_prompt,
prefix="Give the antonym of every input",
suffix="Input: {adjective}\nOutput:",
input_variables=["adjective"],
)
```
<CodeOutputBlock lang="python">
```
Running Chroma using direct local API.
Using DuckDB in-memory for database. Data will be transient.
```
</CodeOutputBlock>
```python
# Input is a feeling, so should select the happy/sad example
print(similar_prompt.format(adjective="worried"))
```
<CodeOutputBlock lang="python">
```
Give the antonym of every input
Input: happy
Output: sad
Input: worried
Output:
```
</CodeOutputBlock>
```python
# Input is a measurement, so should select the tall/short example
print(similar_prompt.format(adjective="fat"))
```
<CodeOutputBlock lang="python">
```
Give the antonym of every input
Input: happy
Output: sad
Input: fat
Output:
```
</CodeOutputBlock>
```python
# You can add new examples to the SemanticSimilarityExampleSelector as well
similar_prompt.example_selector.add_example({"input": "enthusiastic", "output": "apathetic"})
print(similar_prompt.format(adjective="joyful"))
```
<CodeOutputBlock lang="python">
```
Give the antonym of every input
Input: happy
Output: sad
Input: joyful
Output:
```
</CodeOutputBlock>

View file

@ -1,257 +0,0 @@
### Use Case
In this tutorial, we'll configure few shot examples for self-ask with search.
## Using an example set
### Create the example set
To get started, create a list of few shot examples. Each example should be a dictionary with the keys being the input variables and the values being the values for those input variables.
```python
from langchain.prompts.few_shot import FewShotPromptTemplate
from langchain.prompts.prompt import PromptTemplate
examples = [
{
"question": "Who lived longer, Muhammad Ali or Alan Turing?",
"answer":
"""
Are follow up questions needed here: Yes.
Follow up: How old was Muhammad Ali when he died?
Intermediate answer: Muhammad Ali was 74 years old when he died.
Follow up: How old was Alan Turing when he died?
Intermediate answer: Alan Turing was 41 years old when he died.
So the final answer is: Muhammad Ali
"""
},
{
"question": "When was the founder of craigslist born?",
"answer":
"""
Are follow up questions needed here: Yes.
Follow up: Who was the founder of craigslist?
Intermediate answer: Craigslist was founded by Craig Newmark.
Follow up: When was Craig Newmark born?
Intermediate answer: Craig Newmark was born on December 6, 1952.
So the final answer is: December 6, 1952
"""
},
{
"question": "Who was the maternal grandfather of George Washington?",
"answer":
"""
Are follow up questions needed here: Yes.
Follow up: Who was the mother of George Washington?
Intermediate answer: The mother of George Washington was Mary Ball Washington.
Follow up: Who was the father of Mary Ball Washington?
Intermediate answer: The father of Mary Ball Washington was Joseph Ball.
So the final answer is: Joseph Ball
"""
},
{
"question": "Are both the directors of Jaws and Casino Royale from the same country?",
"answer":
"""
Are follow up questions needed here: Yes.
Follow up: Who is the director of Jaws?
Intermediate Answer: The director of Jaws is Steven Spielberg.
Follow up: Where is Steven Spielberg from?
Intermediate Answer: The United States.
Follow up: Who is the director of Casino Royale?
Intermediate Answer: The director of Casino Royale is Martin Campbell.
Follow up: Where is Martin Campbell from?
Intermediate Answer: New Zealand.
So the final answer is: No
"""
}
]
```
### Create a formatter for the few shot examples
Configure a formatter that will format the few shot examples into a string. This formatter should be a `PromptTemplate` object.
```python
example_prompt = PromptTemplate(input_variables=["question", "answer"], template="Question: {question}\n{answer}")
print(example_prompt.format(**examples[0]))
```
<CodeOutputBlock lang="python">
```
Question: Who lived longer, Muhammad Ali or Alan Turing?
Are follow up questions needed here: Yes.
Follow up: How old was Muhammad Ali when he died?
Intermediate answer: Muhammad Ali was 74 years old when he died.
Follow up: How old was Alan Turing when he died?
Intermediate answer: Alan Turing was 41 years old when he died.
So the final answer is: Muhammad Ali
```
</CodeOutputBlock>
### Feed examples and formatter to `FewShotPromptTemplate`
Finally, create a `FewShotPromptTemplate` object. This object takes in the few shot examples and the formatter for the few shot examples.
```python
prompt = FewShotPromptTemplate(
examples=examples,
example_prompt=example_prompt,
suffix="Question: {input}",
input_variables=["input"]
)
print(prompt.format(input="Who was the father of Mary Ball Washington?"))
```
<CodeOutputBlock lang="python">
```
Question: Who lived longer, Muhammad Ali or Alan Turing?
Are follow up questions needed here: Yes.
Follow up: How old was Muhammad Ali when he died?
Intermediate answer: Muhammad Ali was 74 years old when he died.
Follow up: How old was Alan Turing when he died?
Intermediate answer: Alan Turing was 41 years old when he died.
So the final answer is: Muhammad Ali
Question: When was the founder of craigslist born?
Are follow up questions needed here: Yes.
Follow up: Who was the founder of craigslist?
Intermediate answer: Craigslist was founded by Craig Newmark.
Follow up: When was Craig Newmark born?
Intermediate answer: Craig Newmark was born on December 6, 1952.
So the final answer is: December 6, 1952
Question: Who was the maternal grandfather of George Washington?
Are follow up questions needed here: Yes.
Follow up: Who was the mother of George Washington?
Intermediate answer: The mother of George Washington was Mary Ball Washington.
Follow up: Who was the father of Mary Ball Washington?
Intermediate answer: The father of Mary Ball Washington was Joseph Ball.
So the final answer is: Joseph Ball
Question: Are both the directors of Jaws and Casino Royale from the same country?
Are follow up questions needed here: Yes.
Follow up: Who is the director of Jaws?
Intermediate Answer: The director of Jaws is Steven Spielberg.
Follow up: Where is Steven Spielberg from?
Intermediate Answer: The United States.
Follow up: Who is the director of Casino Royale?
Intermediate Answer: The director of Casino Royale is Martin Campbell.
Follow up: Where is Martin Campbell from?
Intermediate Answer: New Zealand.
So the final answer is: No
Question: Who was the father of Mary Ball Washington?
```
</CodeOutputBlock>
## Using an example selector
### Feed examples into `ExampleSelector`
We will reuse the example set and the formatter from the previous section. However, instead of feeding the examples directly into the `FewShotPromptTemplate` object, we will feed them into an `ExampleSelector` object.
In this tutorial, we will use the `SemanticSimilarityExampleSelector` class. This class selects few shot examples based on their similarity to the input. It uses an embedding model to compute the similarity between the input and the few shot examples, as well as a vector store to perform the nearest neighbor search.
```python
from langchain.prompts.example_selector import SemanticSimilarityExampleSelector
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
example_selector = SemanticSimilarityExampleSelector.from_examples(
# This is the list of examples available to select from.
examples,
# This is the embedding class used to produce embeddings which are used to measure semantic similarity.
OpenAIEmbeddings(),
# This is the VectorStore class that is used to store the embeddings and do a similarity search over.
Chroma,
# This is the number of examples to produce.
k=1
)
# Select the most similar example to the input.
question = "Who was the father of Mary Ball Washington?"
selected_examples = example_selector.select_examples({"question": question})
print(f"Examples most similar to the input: {question}")
for example in selected_examples:
print("\n")
for k, v in example.items():
print(f"{k}: {v}")
```
<CodeOutputBlock lang="python">
```
Running Chroma using direct local API.
Using DuckDB in-memory for database. Data will be transient.
Examples most similar to the input: Who was the father of Mary Ball Washington?
question: Who was the maternal grandfather of George Washington?
answer:
Are follow up questions needed here: Yes.
Follow up: Who was the mother of George Washington?
Intermediate answer: The mother of George Washington was Mary Ball Washington.
Follow up: Who was the father of Mary Ball Washington?
Intermediate answer: The father of Mary Ball Washington was Joseph Ball.
So the final answer is: Joseph Ball
```
</CodeOutputBlock>
### Feed example selector into `FewShotPromptTemplate`
Finally, create a `FewShotPromptTemplate` object. This object takes in the example selector and the formatter for the few shot examples.
```python
prompt = FewShotPromptTemplate(
example_selector=example_selector,
example_prompt=example_prompt,
suffix="Question: {input}",
input_variables=["input"]
)
print(prompt.format(input="Who was the father of Mary Ball Washington?"))
```
<CodeOutputBlock lang="python">
```
Question: Who was the maternal grandfather of George Washington?
Are follow up questions needed here: Yes.
Follow up: Who was the mother of George Washington?
Intermediate answer: The mother of George Washington was Mary Ball Washington.
Follow up: Who was the father of Mary Ball Washington?
Intermediate answer: The father of Mary Ball Washington was Joseph Ball.
So the final answer is: Joseph Ball
Question: Who was the father of Mary Ball Washington?
```
</CodeOutputBlock>

View file

@ -1,140 +0,0 @@
Here's the simplest example:
```python
from langchain import PromptTemplate
template = """\
You are a naming consultant for new companies.
What is a good name for a company that makes {product}?
"""
prompt = PromptTemplate.from_template(template)
prompt.format(product="colorful socks")
```
<CodeOutputBlock lang="python">
```
You are a naming consultant for new companies.
What is a good name for a company that makes colorful socks?
```
</CodeOutputBlock>
## Create a prompt template
You can create simple hardcoded prompts using the `PromptTemplate` class. Prompt templates can take any number of input variables, and can be formatted to generate a prompt.
```python
from langchain import PromptTemplate
# An example prompt with no input variables
no_input_prompt = PromptTemplate(input_variables=[], template="Tell me a joke.")
no_input_prompt.format()
# -> "Tell me a joke."
# An example prompt with one input variable
one_input_prompt = PromptTemplate(input_variables=["adjective"], template="Tell me a {adjective} joke.")
one_input_prompt.format(adjective="funny")
# -> "Tell me a funny joke."
# An example prompt with multiple input variables
multiple_input_prompt = PromptTemplate(
input_variables=["adjective", "content"],
template="Tell me a {adjective} joke about {content}."
)
multiple_input_prompt.format(adjective="funny", content="chickens")
# -> "Tell me a funny joke about chickens."
```
If you do not wish to specify `input_variables` manually, you can also create a `PromptTemplate` using `from_template` class method. `langchain` will automatically infer the `input_variables` based on the `template` passed.
```python
template = "Tell me a {adjective} joke about {content}."
prompt_template = PromptTemplate.from_template(template)
prompt_template.input_variables
# -> ['adjective', 'content']
prompt_template.format(adjective="funny", content="chickens")
# -> Tell me a funny joke about chickens.
```
You can create custom prompt templates that format the prompt in any way you want. For more information, see [Custom Prompt Templates](./custom_prompt_template.html).
<!-- TODO(shreya): Add link to Jinja -->
## Chat prompt template
[Chat Models](../models/chat) take a list of chat messages as input - this list commonly referred to as a `prompt`.
These chat messages differ from raw string (which you would pass into a [LLM](/docs/modules/model_io/models/llms) model) in that every message is associated with a `role`.
For example, in OpenAI [Chat Completion API](https://platform.openai.com/docs/guides/chat/introduction), a chat message can be associated with the AI, human or system role. The model is supposed to follow instruction from system chat message more closely.
LangChain provides several prompt templates to make constructing and working with prompts easily. You are encouraged to use these chat related prompt templates instead of `PromptTemplate` when querying chat models to fully exploit the potential of underlying chat model.
```python
from langchain.prompts import (
ChatPromptTemplate,
PromptTemplate,
SystemMessagePromptTemplate,
AIMessagePromptTemplate,
HumanMessagePromptTemplate,
)
from langchain.schema import (
AIMessage,
HumanMessage,
SystemMessage
)
```
To create a message template associated with a role, you use `MessagePromptTemplate`.
For convenience, there is a `from_template` method exposed on the template. If you were to use this template, this is what it would look like:
```python
template="You are a helpful assistant that translates {input_language} to {output_language}."
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
human_template="{text}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
```
If you wanted to construct the `MessagePromptTemplate` more directly, you could create a PromptTemplate outside and then pass it in, eg:
```python
prompt=PromptTemplate(
template="You are a helpful assistant that translates {input_language} to {output_language}.",
input_variables=["input_language", "output_language"],
)
system_message_prompt_2 = SystemMessagePromptTemplate(prompt=prompt)
assert system_message_prompt == system_message_prompt_2
```
After that, you can build a `ChatPromptTemplate` from one or more `MessagePromptTemplates`. You can use `ChatPromptTemplate`'s `format_prompt` -- this returns a `PromptValue`, which you can convert to a string or Message object, depending on whether you want to use the formatted value as input to an llm or chat model.
```python
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
# get a chat completion from the formatted messages
chat_prompt.format_prompt(input_language="English", output_language="French", text="I love programming.").to_messages()
```
<CodeOutputBlock lang="python">
```
[SystemMessage(content='You are a helpful assistant that translates English to French.', additional_kwargs={}),
HumanMessage(content='I love programming.', additional_kwargs={})]
```
</CodeOutputBlock>

View file

@ -1,92 +0,0 @@
## Partial With Strings
One common use case for wanting to partial a prompt template is if you get some of the variables before others. For example, suppose you have a prompt template that requires two variables, `foo` and `baz`. If you get the `foo` value early on in the chain, but the `baz` value later, it can be annoying to wait until you have both variables in the same place to pass them to the prompt template. Instead, you can partial the prompt template with the `foo` value, and then pass the partialed prompt template along and just use that. Below is an example of doing this:
```python
from langchain.prompts import PromptTemplate
```
```python
prompt = PromptTemplate(template="{foo}{bar}", input_variables=["foo", "bar"])
partial_prompt = prompt.partial(foo="foo");
print(partial_prompt.format(bar="baz"))
```
<CodeOutputBlock lang="python">
```
foobaz
```
</CodeOutputBlock>
You can also just initialize the prompt with the partialed variables.
```python
prompt = PromptTemplate(template="{foo}{bar}", input_variables=["bar"], partial_variables={"foo": "foo"})
print(prompt.format(bar="baz"))
```
<CodeOutputBlock lang="python">
```
foobaz
```
</CodeOutputBlock>
## Partial With Functions
The other common use is to partial with a function. The use case for this is when you have a variable you know that you always want to fetch in a common way. A prime example of this is with date or time. Imagine you have a prompt which you always want to have the current date. You can't hard code it in the prompt, and passing it along with the other input variables is a bit annoying. In this case, it's very handy to be able to partial the prompt with a function that always returns the current date.
```python
from datetime import datetime
def _get_datetime():
now = datetime.now()
return now.strftime("%m/%d/%Y, %H:%M:%S")
```
```python
prompt = PromptTemplate(
template="Tell me a {adjective} joke about the day {date}",
input_variables=["adjective", "date"]
);
partial_prompt = prompt.partial(date=_get_datetime)
print(partial_prompt.format(adjective="funny"))
```
<CodeOutputBlock lang="python">
```
Tell me a funny joke about the day 02/27/2023, 22:15:16
```
</CodeOutputBlock>
You can also just initialize the prompt with the partialed variables, which often makes more sense in this workflow.
```python
prompt = PromptTemplate(
template="Tell me a {adjective} joke about the day {date}",
input_variables=["adjective"],
partial_variables={"date": _get_datetime}
);
print(prompt.format(adjective="funny"))
```
<CodeOutputBlock lang="python">
```
Tell me a funny joke about the day 02/27/2023, 22:15:16
```
</CodeOutputBlock>

View file

@ -1,88 +0,0 @@
```python
from langchain.prompts.pipeline import PipelinePromptTemplate
from langchain.prompts.prompt import PromptTemplate
```
```python
full_template = """{introduction}
{example}
{start}"""
full_prompt = PromptTemplate.from_template(full_template)
```
```python
introduction_template = """You are impersonating {person}."""
introduction_prompt = PromptTemplate.from_template(introduction_template)
```
```python
example_template = """Here's an example of an interaction:
Q: {example_q}
A: {example_a}"""
example_prompt = PromptTemplate.from_template(example_template)
```
```python
start_template = """Now, do this for real!
Q: {input}
A:"""
start_prompt = PromptTemplate.from_template(start_template)
```
```python
input_prompts = [
("introduction", introduction_prompt),
("example", example_prompt),
("start", start_prompt)
]
pipeline_prompt = PipelinePromptTemplate(final_prompt=full_prompt, pipeline_prompts=input_prompts)
```
```python
pipeline_prompt.input_variables
```
<CodeOutputBlock lang="python">
```
['example_a', 'person', 'example_q', 'input']
```
</CodeOutputBlock>
```python
print(pipeline_prompt.format(
person="Elon Musk",
example_q="What's your favorite car?",
example_a="Tesla",
input="What's your favorite social media site?"
))
```
<CodeOutputBlock lang="python">
```
You are impersonating Elon Musk.
Here's an example of an interaction:
Q: What's your favorite car?
A: Tesla
Now, do this for real!
Q: What's your favorite social media site?
A:
```
</CodeOutputBlock>