add everyting for docs

This commit is contained in:
ishaan-jaff 2023-07-29 07:00:13 -07:00
parent de45a738ee
commit 0fe8799f94
1015 changed files with 185353 additions and 0 deletions

View file

@ -0,0 +1,107 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "56ac1584",
"metadata": {},
"source": [
"# EverNote\n",
"\n",
">[EverNote](https://evernote.com/) is intended for archiving and creating notes in which photos, audio and saved web content can be embedded. Notes are stored in virtual \"notebooks\" and can be tagged, annotated, edited, searched, and exported.\n",
"\n",
"This notebook shows how to load an `Evernote` [export](https://help.evernote.com/hc/en-us/articles/209005557-Export-notes-and-notebooks-as-ENEX-or-HTML) file (.enex) from disk.\n",
"\n",
"A document will be created for each note in the export."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "1a53ece0",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# lxml and html2text are required to parse EverNote notes\n",
"# !pip install lxml\n",
"# !pip install html2text"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "88df766f",
"metadata": {
"pycharm": {
"name": "#%%\n"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"[Document(page_content='testing this\\n\\nwhat happens?\\n\\nto the world?**Jan - March 2022**', metadata={'source': 'example_data/testing.enex'})]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from langchain.document_loaders import EverNoteLoader\n",
"\n",
"# By default all notes are combined into a single Document\n",
"loader = EverNoteLoader(\"example_data/testing.enex\")\n",
"loader.load()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "97a58fde",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Document(page_content='testing this\\n\\nwhat happens?\\n\\nto the world?', metadata={'title': 'testing', 'created': time.struct_time(tm_year=2023, tm_mon=2, tm_mday=9, tm_hour=3, tm_min=47, tm_sec=46, tm_wday=3, tm_yday=40, tm_isdst=-1), 'updated': time.struct_time(tm_year=2023, tm_mon=2, tm_mday=9, tm_hour=3, tm_min=53, tm_sec=28, tm_wday=3, tm_yday=40, tm_isdst=-1), 'note-attributes.author': 'Harrison Chase', 'source': 'example_data/testing.enex'}),\n",
" Document(page_content='**Jan - March 2022**', metadata={'title': 'Summer Training Program', 'created': time.struct_time(tm_year=2022, tm_mon=12, tm_mday=27, tm_hour=1, tm_min=59, tm_sec=48, tm_wday=1, tm_yday=361, tm_isdst=-1), 'note-attributes.author': 'Mike McGarry', 'note-attributes.source': 'mobile.iphone', 'source': 'example_data/testing.enex'})]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# It's likely more useful to return a Document for each note\n",
"loader = EverNoteLoader(\"example_data/testing.enex\", load_single_document=False)\n",
"loader.load()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}