add everyting for docs

2025-04-25 18:54:30 +00:00 · 2023-07-29 07:00:13 -07:00 · 2023-07-29 07:00:13 -07:00 · d400bccb15
commit d400bccb15
parent 36eaaa7d36
1015 changed files with 185353 additions and 0 deletions
--- a/docs/extras/guides/evaluation/string/custom.ipynb
+++ b/docs/extras/guides/evaluation/string/custom.ipynb
@ -0,0 +1,208 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "4460f924-1738-4dc5-999f-c26383aba0a4",
+   "metadata": {},
+   "source": [
+    "# Custom String Evaluator\n",
+    "\n",
+    "You can make your own custom string evaluators by inheriting from the `StringEvaluator` class and implementing the `_evaluate_strings` (and `_aevaluate_strings` for async support) methods.\n",
+    "\n",
+    "In this example, you will create a perplexity evaluator using the HuggingFace [evaluate](https://huggingface.co/docs/evaluate/index) library.\n",
+    "[Perplexity](https://en.wikipedia.org/wiki/Perplexity) is a measure of how well the generated text would be predicted by the model used to compute the metric."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "90ec5942-4b14-47b1-baff-9dd2a9f17a4e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# %pip install evaluate > /dev/null"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "54fdba68-0ae7-4102-a45b-dabab86c97ac",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from typing import Any, Optional\n",
+    "\n",
+    "from langchain.evaluation import StringEvaluator\n",
+    "from evaluate import load\n",
+    "\n",
+    "\n",
+    "class PerplexityEvaluator(StringEvaluator):\n",
+    "    \"\"\"Evaluate the perplexity of a predicted string.\"\"\"\n",
+    "\n",
+    "    def __init__(self, model_id: str = \"gpt2\"):\n",
+    "        self.model_id = model_id\n",
+    "        self.metric_fn = load(\n",
+    "            \"perplexity\", module_type=\"metric\", model_id=self.model_id, pad_token=0\n",
+    "        )\n",
+    "\n",
+    "    def _evaluate_strings(\n",
+    "        self,\n",
+    "        *,\n",
+    "        prediction: str,\n",
+    "        reference: Optional[str] = None,\n",
+    "        input: Optional[str] = None,\n",
+    "        **kwargs: Any,\n",
+    "    ) -> dict:\n",
+    "        results = self.metric_fn.compute(\n",
+    "            predictions=[prediction], model_id=self.model_id\n",
+    "        )\n",
+    "        ppl = results[\"perplexities\"][0]\n",
+    "        return {\"score\": ppl}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "52767568-8075-4f77-93c9-80e1a7e5cba3",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "evaluator = PerplexityEvaluator()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "697ee0c0-d1ae-4a55-a542-a0f8e602c28a",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using pad_token, but it is not set yet.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
+      "To disable this warning, you can either:\n",
+      "\t- Avoid using `tokenizers` before the fork if possible\n",
+      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "467109d44654486e8b415288a319fc2c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'score': 190.3675537109375}"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "evaluator.evaluate_strings(prediction=\"The rains in Spain fall mainly on the plain.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "5089d9d1-eae6-4d47-b4f6-479e5d887d74",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using pad_token, but it is not set yet.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d3266f6f06d746e1bb03ce4aca07d9b9",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'score': 1982.0709228515625}"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# The perplexity is much higher since LangChain was introduced after 'gpt-2' was released and because it is never used in the following context.\n",
+    "evaluator.evaluate_strings(prediction=\"The rains in Spain fall mainly on LangChain.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5eaa178f-6ba3-47ae-b3dc-1b196af6d213",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}