From 1eb1ac0f416abfdf66d15b18b375e8d12beabcb8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 26 Aug 2025 15:38:46 +0200
Subject: [PATCH 001/124] chore(ui-deps): bump @testing-library/jest-dom from
6.6.3 to 6.8.0 in /llama_stack/ui (#3243)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Bumps
[@testing-library/jest-dom](https://github.com/testing-library/jest-dom)
from 6.6.3 to 6.8.0.
Release notes
Sourced from @testing-library/jest-dom
's
releases.
v6.8.0
6.8.0
(2025-08-20)
Features
v6.7.0
6.7.0
(2025-08-13)
Features
v6.6.4
6.6.4
(2025-07-26)
Performance Improvements
Commits
[](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)
Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.
[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)
---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)
Signed-off-by: dependabot[bot]
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
llama_stack/ui/package-lock.json | 32 +++++---------------------------
llama_stack/ui/package.json | 2 +-
2 files changed, 6 insertions(+), 28 deletions(-)
diff --git a/llama_stack/ui/package-lock.json b/llama_stack/ui/package-lock.json
index 58888e586..98a1e4fe5 100644
--- a/llama_stack/ui/package-lock.json
+++ b/llama_stack/ui/package-lock.json
@@ -36,7 +36,7 @@
"@eslint/eslintrc": "^3",
"@tailwindcss/postcss": "^4",
"@testing-library/dom": "^10.4.1",
- "@testing-library/jest-dom": "^6.6.3",
+ "@testing-library/jest-dom": "^6.8.0",
"@testing-library/react": "^16.3.0",
"@types/jest": "^29.5.14",
"@types/node": "^20",
@@ -3597,18 +3597,17 @@
}
},
"node_modules/@testing-library/jest-dom": {
- "version": "6.6.3",
- "resolved": "https://registry.npmjs.org/@testing-library/jest-dom/-/jest-dom-6.6.3.tgz",
- "integrity": "sha512-IteBhl4XqYNkM54f4ejhLRJiZNqcSCoXUOG2CPK7qbD322KjQozM4kHQOfkG2oln9b9HTYqs+Sae8vBATubxxA==",
+ "version": "6.8.0",
+ "resolved": "https://registry.npmjs.org/@testing-library/jest-dom/-/jest-dom-6.8.0.tgz",
+ "integrity": "sha512-WgXcWzVM6idy5JaftTVC8Vs83NKRmGJz4Hqs4oyOuO2J4r/y79vvKZsb+CaGyCSEbUPI6OsewfPd0G1A0/TUZQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"@adobe/css-tools": "^4.4.0",
"aria-query": "^5.0.0",
- "chalk": "^3.0.0",
"css.escape": "^1.5.1",
"dom-accessibility-api": "^0.6.3",
- "lodash": "^4.17.21",
+ "picocolors": "^1.1.1",
"redent": "^3.0.0"
},
"engines": {
@@ -3617,20 +3616,6 @@
"yarn": ">=1"
}
},
- "node_modules/@testing-library/jest-dom/node_modules/chalk": {
- "version": "3.0.0",
- "resolved": "https://registry.npmjs.org/chalk/-/chalk-3.0.0.tgz",
- "integrity": "sha512-4D3B6Wf41KOYRFdszmDqMCGq5VV/uMAB273JILmO+3jAlh8X4qDtdtgCR3fxtbLEMzSx22QdhnDcJvu2u1fVwg==",
- "dev": true,
- "license": "MIT",
- "dependencies": {
- "ansi-styles": "^4.1.0",
- "supports-color": "^7.1.0"
- },
- "engines": {
- "node": ">=8"
- }
- },
"node_modules/@testing-library/jest-dom/node_modules/dom-accessibility-api": {
"version": "0.6.3",
"resolved": "https://registry.npmjs.org/dom-accessibility-api/-/dom-accessibility-api-0.6.3.tgz",
@@ -10066,13 +10051,6 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
- "node_modules/lodash": {
- "version": "4.17.21",
- "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
- "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==",
- "dev": true,
- "license": "MIT"
- },
"node_modules/lodash.merge": {
"version": "4.6.2",
"resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz",
diff --git a/llama_stack/ui/package.json b/llama_stack/ui/package.json
index 4e29e8a5c..7a17d93dd 100644
--- a/llama_stack/ui/package.json
+++ b/llama_stack/ui/package.json
@@ -41,7 +41,7 @@
"@eslint/eslintrc": "^3",
"@tailwindcss/postcss": "^4",
"@testing-library/dom": "^10.4.1",
- "@testing-library/jest-dom": "^6.6.3",
+ "@testing-library/jest-dom": "^6.8.0",
"@testing-library/react": "^16.3.0",
"@types/jest": "^29.5.14",
"@types/node": "^20",
From 7ca82338890e3000659d0bd177339d8d3b822bf3 Mon Sep 17 00:00:00 2001
From: Derek Higgins
Date: Tue, 26 Aug 2025 17:17:00 +0100
Subject: [PATCH 002/124] feat(testing): remove SQLite dependency from
inference recorder (#3254)
Recording files use a predictable naming format, making the SQLite index
redundant. The binary SQLite file was causing frequent git conflicts.
Simplify by calculating file paths directly from request hashes.
Signed-off-by: Derek Higgins
---
llama_stack/testing/inference_recorder.py | 43 +-----------------
tests/integration/recordings/index.sqlite | Bin 57344 -> 0 bytes
.../distribution/test_inference_recordings.py | 16 +------
3 files changed, 2 insertions(+), 57 deletions(-)
delete mode 100644 tests/integration/recordings/index.sqlite
diff --git a/llama_stack/testing/inference_recorder.py b/llama_stack/testing/inference_recorder.py
index 4a6958399..8fa5f5f2e 100644
--- a/llama_stack/testing/inference_recorder.py
+++ b/llama_stack/testing/inference_recorder.py
@@ -9,7 +9,6 @@ from __future__ import annotations # for forward references
import hashlib
import json
import os
-import sqlite3
from collections.abc import Generator
from contextlib import contextmanager
from enum import StrEnum
@@ -125,28 +124,13 @@ class ResponseStorage:
def __init__(self, test_dir: Path):
self.test_dir = test_dir
self.responses_dir = self.test_dir / "responses"
- self.db_path = self.test_dir / "index.sqlite"
self._ensure_directories()
- self._init_database()
def _ensure_directories(self):
self.test_dir.mkdir(parents=True, exist_ok=True)
self.responses_dir.mkdir(exist_ok=True)
- def _init_database(self):
- with sqlite3.connect(self.db_path) as conn:
- conn.execute("""
- CREATE TABLE IF NOT EXISTS recordings (
- request_hash TEXT PRIMARY KEY,
- response_file TEXT,
- endpoint TEXT,
- model TEXT,
- timestamp TEXT,
- is_streaming BOOLEAN
- )
- """)
-
def store_recording(self, request_hash: str, request: dict[str, Any], response: dict[str, Any]):
"""Store a request/response pair."""
# Generate unique response filename
@@ -169,34 +153,9 @@ class ResponseStorage:
f.write("\n")
f.flush()
- # Update SQLite index
- with sqlite3.connect(self.db_path) as conn:
- conn.execute(
- """
- INSERT OR REPLACE INTO recordings
- (request_hash, response_file, endpoint, model, timestamp, is_streaming)
- VALUES (?, ?, ?, ?, datetime('now'), ?)
- """,
- (
- request_hash,
- response_file,
- request.get("endpoint", ""),
- request.get("model", ""),
- response.get("is_streaming", False),
- ),
- )
-
def find_recording(self, request_hash: str) -> dict[str, Any] | None:
"""Find a recorded response by request hash."""
- with sqlite3.connect(self.db_path) as conn:
- result = conn.execute(
- "SELECT response_file FROM recordings WHERE request_hash = ?", (request_hash,)
- ).fetchone()
-
- if not result:
- return None
-
- response_file = result[0]
+ response_file = f"{request_hash[:12]}.json"
response_path = self.responses_dir / response_file
if not response_path.exists():
diff --git a/tests/integration/recordings/index.sqlite b/tests/integration/recordings/index.sqlite
deleted file mode 100644
index 0c88416f1e7c84196c1dd80877c3ff4bcd8322da..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001
literal 57344
zcmeI53y@_;dEd|6duQg(8!h4?z%UZ20+m?p>C>klXG#S~us8$(LMF05ZrSm6#&3jhzIUr}O&t
zIdf+kq*=75w=7w{JKE9i&h5V6{kl*0_xOMR?yPp4ME$l?^~uefZ_Q8My8gz4hu*k;!;P~Vt&CTH>bvx#O4{)V|LX3vejIePQ>=b}5K|LKPH`*mvTrt#LwffuaEv9!k2
zWh07GCv|1BI7u2U>b%NLB5juGRFp!*wcsUn!lXxSR8;Lr#b!hUwX)g0LO-&(mVbe^RtY~yBYLUmW(phFw
z)ilCnD$A`Zh0&rYY^ADHsW_`m;*Z~b-_87%laKMfTbFLV)V=XlFOv%*$%Jf@xY2o8
z%DAq|Cbnr}lT>F-rIWm>RL+Mrr7V<8id;!u7-Lit+e{>lKfZh8JH&zV_M^v-)SHLd
zzDJJaM{{{~vLTE8hmWy?PL-Sco8v-zatXVU+KpHs*EQR6jf=Fbc(1aG)269nnX5Dw
zrKx0*m9|lW4_w
zW!q9M=(3EpEQ$&Ru{%wpR9?hJc&Ery5
zoDyZ@j}MefBiG;DmTN(ZG&XfC5hh8iOj#?^GP6cxjZ_UsQ)x0OQA`%gTFPAV(?*(#
z^;5a!-0;Q+%B7I&RTF*XO(CVQNh7u5^XH{Z>ZaBW2biv!8aY!=Wg~TNW66@1d1WiM
zqO95Rd8+;KfpVqn`b(T#?t~D9m08+kJdirebCJb`5SA0k9UDrfJO!+8R_nB}Qrlc=
z4&Ne;3&SO&6=&`4_)ac&gdhCznlY
z7B7i2⁡os#LOwEvtthY#rqPU=G0vdXX`tgV_N=9Mv*QIp72r~ddLsVq{xV4^E-
zJ03B^IViNu*fpYx%Q|aVj@BqCYOR|>C){ydD5@zipiphWSB8nF{qaFkB}n!B9jP)*
z!=^}cF4&s)PB28$L}M6=T(Fs|Bw=~Wye_qhjm?TOXD3N6Z;Yy{RE0M_L@Mc|`mu>r
zY$JDgi%h0D?~v)lh`7Out4mQ!9{JjoSX0snc9>wxb3Lj;ahP&A@E3o4kW>Pxo;TT7
zSz$%O_9@siN+{Xzj4Kh#81n>cx^^d(!Q90_;grM-=3LE{jVms`GRG?R#s^8|Y^mo?
zbR|pK2n-ZscsDMxwBRulg+xkjGd6T#*+?a4K+J_>Rilk4HOj1Nl?vPV<3prM+%x!`
ziLQ9M3!!vUrCb~dyM=d(S;?4p&P$~-Y?(T>!)
z8CAJ6r;3|gCZ#jtvNA2Xzw(;3G_mt5ShhxR^Hw|tac*jE;uK?#i#f^s@j+6#li}%;
z70fk9BP&%`xidAJnyVGlPUw`I*BPN$0NP*`bHUV!$>Xd*pBQ>6Cn>A#j}MXxJL+eg
zP|ne^X~`S#mJJ(Pq?mEal$=MEuC1xL0OO{~@`PuPa}!h0@DgowA-D<3N|gTiPN;s4
zmf7ISx%7^0vBo_4xs|Mr$~5;EK386t1|un{Ys38;$D9DA!pbr{07ZqBWkuthj8w-M
z@BaA577L$z`P}7vUpo72zAv2lX}&Ms^D4fN?!J=mv!`Fd_qj831O7KyyN&PB%G$qN
zyKVJrs~=pwX?1Po(<@sm8_SO@e{{KCe*V&fOZP3kdTFru$;HjZ{R>}TxPRf$!X@Je
z#&?acng7=O$L4RHzkKd1bMKqGer|U5cV_RL6{9~M{o~PFM$Z_2c6es^^1=58zd87+
z!QYq}M{l3`&ojR~^ZF=y)o5#Fz)k9&7J=Wy@tEVUIqRe0^2=?;f;76pwL{QCW`Klrv0Mnbz1l
zLS$u}*J@^|lP1Yz9H&{zJCrKL9O6kU*_)cplH0t{m>~(Sd!Cmy8-@EtVclU(=2&rc
zIx}jg!H>ZOOI4L~DAc?aMrUH3JC+s#MvO^Pj_a%fa}^cUSUfwtnzVmnQll|`-_
zSrb06`19C)7zW(doakwqrY^lnG=Tk-BZm03hnm%SxbbmLE
zXNUB$W1exsjjZ#81CD3}-c8N3$lg!VIxn+?2cyD9(>Sa(m#bx(GWyOz4$&F^KIn6w+|dcc
zr@@E7jNopTSPB&(Kr0B07#Dqub7z#LJe65VyuoruB4k1UfPkDNCK&yXZW_-(>|u7iT=C
zRtOf?1uBSfXBClC$r2Nx;ISnNjNy@MzK9%JRVf=i+)Wc0LW@4LJq@>GoD%-W;4dWh
z0FgIEUcuqHlH6R6NGy4={Zb(kMxR
zZSHJ!ceoH(G&W}^;ouXR#Yezjb}=Z7(~KxK-fJ0sx0@y~?j3!in+7*3*Vwz6wTi%!
zwM}I;@qJECck+llN$^@Qml8Y}+*8aFLStDZZW}u^LC1z-3->)UxF?F)~f?o}jdB
zOl*kj6Duq^cS|A)yej5xiSXr2G&9pp6HAm%%q$^ggO!G@*~YRl7@QZv=Sm155#m+`
zQ?t^18uZ&3K}@YlGa3DPJB^=N>XVs@e!rV0FoqO;xSJ+0kP&@wdz!FlX!LvSG`_3f
zCsYvqYBx<_(lPo{H%(w-Bl>JNjTf))6Yq~c-AdZK2ij@;*nFSZe)JEwr%9C)*m5XlxPlz~*lye_
zswg>Wxb-sAWG;3}0;<6k$Z?8zO1YC13BD%!zuh$4Dum6sM>QXsn@4AOQ7-6;8HY{A
zwPIpnxL)uf@Y^IIQgyWh~EI`zK%*G%m$jN=G&n;(89s-zZhROf7jffuHCx&xySm7{^*CkT->0?
zudRJ|??jG5PjG+yc_K$Wq?`_!YGA5?sRpJRc#3FXYu{b(m?SK+5+@x)rj&&Kxa-Mb
zl8;Yg{1CFNSwb`*sYsC%eAGD9)`G&Qjjl-;0GJ^QwO@Tzz_RSaxMc~VY0@NT;@QKNS5#mo9d{d1NCq)Lo4wGo0_mxLSl@>
zujNDdeD5iQ8Bb;sAw^3P2$UPvTj)`eZqC_7pJ=
z))5~b@CP9YnSzn@#-B$jaxNYa6cni3h_fgfKKK+l)}|k0s)4BnIt^@HdiRy?cxiFg
zy?ynJU>&&xZ+v&WTwwZNigjT&QnMjrrADSJ50{Hyu#>z)l7MD*feP!vo#eeR;R&JObcVD9u!|R6E&8$Z6TDf7e@3zKU
zC);EFDmf-_dI6zbMz(;B7cr7xXi#3wAHTY1*`H4zy{NSZPbwIk2V2W+s*?tPZ
zBjCq^!sWV}=w|x=sRpJRcyeoih@{$n^2Y{W0g$F8HI?Ro_;C72mXQe}nogDo^ey09
zV3R;rh~H*PS|Y58a2O?$X`u7H@q3&?=c~@?ZVe?i1Wwr9=|n1kM8ML>FA-v=NGeT1
zHwg#eE-lf0AV!$l}9!3>$tRE+UQ%f(TC4N;%Y5g>g@A&mSKmRqS+?Or)yv68I`;*2;y)
zHF$a;0vQejD2zmau`{?eWCBTLlUJm2h}?dHZpmkm?jyhFjSrH_-TPNNshmCH0`MX5
zGJkw0RX=;gZYZi??taNcu26J?KR!$@X+a}C=gECiOr?8bG_ZB)>1S;3*s)&^_Jq`v
zZJ*KBh3_ss-R81*!pD9+*b`Duwj)IH1Enqz02L&$>r)5Lm>eE_--99s9tefDzPE6mHE#^OT(>b
z`5jANU%FxOx8@Jby?6Fovxi2X96f)y^;lnnuSMUCn!%x&&qU1=a}!M8b*h1?*gKgm!05d_zK_x5&@&KD1B0Q-i#0yznP=yCx
zIt87SlQ1oS>Xy{-k@^B^nE*5*K?i#}=(c1LrraFR5rEkN!U0K@S}
z4csOv{;4B70c%}>zy?r89ULzJP*y`XQUK$o4v8`_Z~Vm&E+8C@84&*uY
z=Va)C0gj^x7rf!QM2jF@NL^p;`D<|HE>=HZ0)oj&nwWUY|J0ue+AK%AObl(MbG=>O@H{*
z(ZJ%uCDDPI*G6++9zQTVJ^akfFAfe5eq(&s+9zfY%ztd=we#z1SGgMhx%bZB;8Fkcy-%`TAtGxk7;|cUAtgu~V4MX^aV79g(4;^orLMqC
zg8Pf%E~HGvW)!C-Ddw!DcEk+3YCWKx)YhaB>`|6qL$5`(DT5pAONzDJR42QnJ7ru#ss$l<&OtasGzyYNzqttUhkq@aMW|0x^~|f6`9l
zgBtsQB4@hd;1uL3`Ar4)nk%}2Re{=75W3XiKsaQmHKMqON*v0eB#d8Gf-_t@nj0K=
z_g;W4BHC3!EKO`n4Ty*eL!ims8?9ilU
z@Ia><(8J^~;TO!P)T*f8Y9K10nw!E`SM3E2O9R&=tP8nuG;Wj)A!ZDT987K|vBR#)
zGqeP8uB(9c)4P4r!vooVD4*i94@KzQuXGCzouGS?v?Id74}*Dz`YR|mU}|i#2C_Do
zstoycSYk@pL|Qm;Q&8~@2MgSvgYUG{fD)(tt;SogAPS^{90V;qa|O{DM>ts*)RggD
zI|>&tzR-{)aEs=ETMJ0l$d1oGzmvw8hVXi7BB;$8rW38`@-Dh
z$gTLe?hq6(DN)f4IhZ2^lFjVpor(hqU51})r-@@i02INe2nkDqp-P_*=K?Av<@=DB
zSpsN+I2&?--*6;`RL@gvTtOQ|xNX=K(z>dCc=9No#DA}-vnF&@^I#SfQ^NoS>rs+g
z_!%J(fEEhwT0jRfwl%Q|Xhjk|e6agausskT<1mLbp#h7HP;jDw+~6I#B{I?#6ngWh
z!Vn6X8-bNJujsp%I}Q;v?L5lr}it0ZPtm
zsu@f^{7>C9@aOP|xQ7u!QuIJQDhouIi5f_t{A_%7jHLRYV`sj_PawcQP&?+XwOXp-5Mnm@Ro5u0jsC}og)C;40H$tt>E{gv;&ST
z2vKc3w_Q06Zz~ULPBjB{9*%8~ZJfJ5lUDHpm=l&Zb3q3%n9KlA!m5#;1pEs1Na>%-k*I#JB?rc
z*{A-0v|YF#SpPqIW+zQx{r_m2&I%rq2I_+xZdWLo;Jx6gf*h9sF^4ln%q6TRT-_zn
zclc;InxJklFif$$PzmVa%JL)8k6(nX+pSb!{r@oEo+hmRfAA05X+rD&M*`>LqUk0S
zh%)%4?n4FE{||nF#lC1CDwIZG)RAlX^oM=u6Gw5Yo6!3I(TldH2?N~^UffON)dXBv
z{eM?eFu1Z?a4)6PCnYre*-n~3Ci%hX#qnLy0jT_=-x*(n@BbTk=ZA)$ArkP?**j(r
zj2>RNfArzGFFWr)x^wxFwNK!|e|_!5{KwX=8eB2|t<|s1uTS^_2j7`{&EW9L*34^X
ze|u(basSH3;wR@uqc<;ZF4qf(h%ekVJnejc-syqaJ6iEM#Bgb#U~?Q=1WPHbgnNbB
zI1I`R_gEXqG-1)gR)@X?TZ+sIOt={&E&>Rw82tNo8q!u2iy5+@P`m)*BngBaoxUgLth@zmnz;t$235-xRTy8MDX)*Xe?KFP&v`_YRcuzM?V9siIXZN9Kbs$Ke
zQkse{2GbV2o&b8e1fV0ue}OHA#3~Fq5LsZ0b5~+rk_?0KCUd^bsPmklC`7Twari*p
z3t$=II!gN>)*?m$tt2I!c7V8DmrFQos=|c;h5u1lL08|yieJ>k5_K7+hwtd739L66J*}H2ke+GO`EeIk!}Dl*rcuYJnLA!dI##ez
z;mIW5?YMMdjDea2A{zJj;awMSaMd{EVActBuv!_9JcfEw)`S1hekdPN-v^ODP~9|v
zd|abfbSlPeb^dS#X&=6zf<2CuGfx$Smc;k~Ag~EV>A>!IZm==3@n8t##35M>Y29om
zO(?C`Xr+~gM>VbhIZ$Ox4Y6}e2_4WQfb-GipJA9nr;aDAGK4Y&oeOaB9RCmN0Z~;t
zOuA|4cLF&zWZke-GmT>krP~J!7CY8@JOn7zq+_P01f0eU0Q-3isUhYg6R60B2ij@i
zB8IaQE?fM1RG+}2ZsSIO@W^srxc9?WpM=;#jqnS1^r+N
znV>rjwGLxrM=?ZD1iJCg;YhH7CH!TMUlA5AurIWdpj!!0MEv35b6RP9uH!yj$)oKL
zMagjq{}J5SL}@`i5dX(gc#z
z4Lj~r{3{nE%PrL#GB0rKN%e1rtb2h6}AU
z*0Bt6?K+YJ2w@?&g?$oEBsh|3A^|mPOoW6&d1}}BuZ9=_d=I>GDK{((Yw_@*b{a}7
zxbPeSbW_N+mYVH@>X@aGB57Yd7
zxHRu_f#$S4prQ#{Jc^xQl%!w>+G6;32|7ZS47YNl
zX4l(kaw19w&)sbl;>nI^&mq%5Ex{^mtmX`K+m3D<#A4v;qoopzdF&?`8Q_DcXJ61w
z6G-SfI@nDUNa#Aca`~e(aiauD3;afXt0x5h)qwQ(JD11kQb{fA(tq+Copu5Al
zV1vkahu`a@2_)Ygb!Jj%z0u%(D+?ctE*Wh`bN^)S*|YDNjYgYmo2w76zGdZeE3aJs
z`0`7ZetjVtm-7$KUpx64J~Vv&;1h%WGasH=ir%^RLqGk=R0C5DOf~Rm4Q!3i{6vfU
zgYg6GN>KXHAHN6g?}r&bIt(HVAc;dpj?dh&_50u4#NJh$_QVM@F9d
z3Hbh9pALLnUAV3tAar)ece1z|`u-&{P{y43=~hv|kVB11fBYVb+M76HXIsk+rP)et
zoq{9P32jH?kKaS8Js&Y-QPk<2+Frf|LMB1rIDdTD^6~s59HlU4HYdV`Lhk(W7ehD)
zMRtvvr@#LCYJdm%C)?wN2RSri=a0Y82bsGvMV2>DWT}Yw5iXR3j0!q5N$@lL*EDoMw1L_8X91beJIePrzTsuJ;sY_wM+fvb<>`3lS@VinM=#L7fm7fklcF
z9Ucmtp5w>?k_6aALM#c^gs>Oola8W>pfgcMh}Qk_J!NT|io3%$6mjN{?;(q1jsDI=
zFQKqCe|%3_ZrYJ05c>xFYX>{mQSNwtIG>m$p`
z=-TM%(V^keFE2%lw=H~O{IA9@nt%WN!tghTFC6^hV0Pw?=)XsY=H5R0C$n!^d)wOE
zR!`
z{yWH&Cu)Ce_B)ybdfYp1Kg4;KQ@Im|G*=Mr)*s(roQ?wpamW+}fxZ0k{lt-IXL+KX
zPz0qvzP~sf;|bacO?LU?`-{`onY+Ccn%?xs_Y+5KEuDVnWU)el5dQd{;w&7TdG20^
zz+dx!=do%T>Dhh`>m8loCA}ML%KaE+wYI>FHXlm=N_xT1iL@Jw>YQ&{zN;W
zfHHr4KXKes-Zlkrd#BC4TM%oM9sqcvr=(3B=m`wBt5u--qy-^3j2O5!u=&88NQyUr
zA;<{>8Fsn*?eYD^>AEUAaexKT)R}@939bxC7yYDw`qRxEih0=WLGn<^LeK8ZH9ySh
zVeFbM)%4&6DWd4Lx9CDD7HL}%UBzSGyct?ZblLW|S<
z@w?i&r9M`l$T#aFNAjcj-XW>Z1Tz~d+eoQFKmD4CRH3jie|(5k(nPo%BNQ1~$YLA|*d@KVv5Ev|_UYa^kx-0?8QoY0?iu5!dUlINvE_urhpc&!cd7w
zhuNGe8tJ;5LsCd>CS30PR?y*>{`erNpaPFR$Zv!b6P1M)|M}zl2sb%l+P&YhK0B3B
z)elJ4(3jg01<*b{hFt;Jy=xFalV`xTINN~7bBC@4Q%hC1qOF|eEaVWZz}WAO?<3V@
zArm<8&+eP(3O6gIb9GETE!F0r`EzjNF84wmF8yjXB=_(gxcUt0iW~z3ZW=Aop%;-c@C{D@%}65~W{y0SrWHe^OHT41;SkY@q5U!w@J8frP#
z-D-tmc>VEvNOhjoO2{gmU7f5J6*vVHlXUoY1Qay0PH7WD^JjWe(}aV%a?DNZ_$t#9
zo*JzwF{FsiQOi&s2wuh?-$NF6&5d?eD-Z@usk`eR5>uz3_K@Xqtrk|{_-xB&+pZS1
zM6_uns8$w~l30L3RP0cC21$a1d4qTs4brJ;$)Mzc$AZ%u@)cNjD=I1|XQC6#&iFl~
zI?rmkZY5`DCK?LmF!0CkA5jrmB7mO#IzlT(Nt{aFd!yan;D7ZZoT1D!Qzff5?pPe7LIC;uK
zSq=R0d&u&*_K-kBXSyzPXoy0$Ai6Gp{2o%BcePgL9*&+d_}#($%a`9n-@h*mes|^L
zD_1Oid+Du<4=g4NA6OWTkIn!7+?!?}nB70RZ}`aYrnPeQQ_=Nv53jy%{;fag&2H^G
zx5IsE0&CX%@q2u(^J!bybwSXX&@+_H=ZxC}&L4#63qk-;Nt6*fer<~56tru!siIcR
z^;@Pz6g8wAad6fcn75!cqLL2F9by{EAH4CMRDJwGq1IywyE+JDglnTgPB@BpgUX8i
z@fSllJn3^U+}7L}wg8+4N--(tc3j{PW?`K|t^h9?#h6?Mka82=q41doXiycV0Cp<6
z_~K{~zjC0@o$>vJi?v%;g(nTc`nf9*jwk%d=p9~m?(&J|LK)Hg@fSgJ33%Ld>l5K@
zU`?n${$dCRssFhbOoR)qH1)?{1mW7J_4yOwLP-Gq@fSn5cx&lg>-)RFG7p|~weVDH
zlQm8ukAkb82s5?mn7dG&<*+T_-h=RSRo9j{GTj7W>B@l43(%R~_)a*-hy9}h%eY2r
zJrNtR+8~9sbgp$Coj3*c;WXqkFmO=q0q-3#2n8FZqqU@LpJHYXk(QvKY8){p`~a$<
z^*2rmv}6Jz!QJsa#aVq_bcjpf1uJqKN+#)#znb&sNf>Evqh<%)R>gWA1HmOcM?4;8
zywXr!p8!yR1jlBu0>V=XR|-U8P$^I{ZnWZ+1_A-RL7HQ@7MT9{UG-mhTprtS-`Q7g
zOSPZ_iAwzO!BV|qqAL?f5#o;zl8QcQXJ6)|a-AbX>396`om3apInv9mDEi8sy=Ggk
zaVVdUKR!$@*Om0Fo$M_*kR0I%@ebDq6|0J>UB`^%s5BkRU;^thooouCqHq(67>Yp(
zruar@7V;yIlkScWS;aQs-&&vUIZOk&nEmlVQqha-Y-@Hp-x_*O_%=b!ixMtil5qr$
znu=gHXRv`p#?smq4o?nj7r}*~faN9aUYD1pYkduXO2T#T|kp-DWIXu)l
z#=;>c{PAH@;Sb)keanUtNc-b=h1$$-IoZLx?fxWj&Opq6^r<>`qkHyUg`h0a%`{tj
zKJR!ZQ!e5~mh6hT_eN``=D)T4(YY@>cK`YH
zr3aVpoBvqH>c6<@Sp667UpTaI$@qcsUE^zhFd8uZ+f)Nn4NNsK)xcB(7ovfdABUa@
z(Eq_3h7nW8IHDjI=+ywm9E^z&5Pp_m-bJRc*hx*Lj(Hz0SvZ|wTcknC+=tp}D(K5y
zvp}fv$fUz8>$)>27n{r=#)BllwP7Jpk`RX=AVIGHIQbL!&ETtqBVNpXu$#sU`}PSN
z&zg1`KL*+-hC2Jgb{a?y;Nzz?Bf`@f0;CSnlM-q*`hw8UpmyCz=n#VPNM1t64(DqP
z-b2Q4D8_V~oV}v^P+p9uPmE}GUptMDLF|KJ9Cl=wKFI*x1R$S-0~khgREOE_Zq
zA3JIA(OlP6h*e3xr#US~Szqw6x@IG=hcuL%^nil3v!s~~u-Tl`i=+$m*DkI*xVoFh
ztNZCwH#F$@M*Uz&pJ2+!w$k`0w>~Jj(bKzWaw5r4umY~b=cPRoEpt#i3dmSgBdGWX
zfK1DfmskZ9(+LlE1sJuV0!|S6qnl2aac}Ek*+~=FjC0tLsM4axB}Z5=
zsGO@$2
z)oA(sOMkKScNhQ7;#CVjKmK3ipIkdT|A+HO=f1l7z17=R9$LBnLM*uHYo{8RYGA5?
zsRpJRcw#lMwRG=u+q}RF$-V`)Kl8_T35oM#zkV=HA?=2=C+}@Jah?kZf#!TchNC}z
zH$8Ivhms@iU6WJyUgmV*pbvgh#iki;DGSHXUJJZVMZ-`i;IWrHaB@+0pK9rxo7g*%JE?A3M2u;#9f0zd0_TNpLhp
zc0X`HKr_@Wk}baM+#4o&tTeDeo3zpHc?t=T;3nwPn-mC
ro;MLEv}=q%zLz*MJ9Tfnqb@o=@wSt
Date: Tue, 26 Aug 2025 11:34:08 -0700
Subject: [PATCH 003/124] feat: Add example notebook for Langchain + LLAMAStack
integration (#3228)
# What does this PR do?
Add LLAMAStack + Langchain integration example notebook
## Test Plan
Ran in Jupyter notebook, works end to end.
(Used Claude mainly for documentation and coding/debugging help)
---
.../langchain/Llama_Stack_LangChain.ipynb | 946 ++++++++++++++++++
1 file changed, 946 insertions(+)
create mode 100644 docs/notebooks/langchain/Llama_Stack_LangChain.ipynb
diff --git a/docs/notebooks/langchain/Llama_Stack_LangChain.ipynb b/docs/notebooks/langchain/Llama_Stack_LangChain.ipynb
new file mode 100644
index 000000000..ed918ff50
--- /dev/null
+++ b/docs/notebooks/langchain/Llama_Stack_LangChain.ipynb
@@ -0,0 +1,946 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "1ztegmwm4sp",
+ "metadata": {},
+ "source": [
+ "## LlamaStack + LangChain Integration Tutorial\n",
+ "\n",
+ "This notebook demonstrates how to integrate **LlamaStack** with **LangChain** to build a complete RAG (Retrieval-Augmented Generation) system.\n",
+ "\n",
+ "### Overview\n",
+ "\n",
+ "- **LlamaStack**: Provides the infrastructure for running LLMs and vector databases\n",
+ "- **LangChain**: Provides the framework for chaining operations and prompt templates\n",
+ "- **Integration**: Uses LlamaStack's OpenAI-compatible API with LangChain\n",
+ "\n",
+ "### What You'll See\n",
+ "\n",
+ "1. Setting up LlamaStack server with Together AI provider\n",
+ "2. Creating and managing vector databases\n",
+ "3. Building RAG chains with LangChain + LLAMAStack\n",
+ "4. Querying the chain for relevant information\n",
+ "\n",
+ "### Prerequisites\n",
+ "\n",
+ "- Together AI API key\n",
+ "\n",
+ "---\n",
+ "\n",
+ "### 1. Installation and Setup"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2ktr5ls2cas",
+ "metadata": {},
+ "source": [
+ "#### Install Required Dependencies\n",
+ "\n",
+ "First, we install all the necessary packages for LangChain and FastAPI integration."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "5b6a6a17-b931-4bea-8273-0d6e5563637a",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: fastapi in /Users/swapna942/miniconda3/lib/python3.12/site-packages (0.115.14)\n",
+ "Requirement already satisfied: uvicorn in /Users/swapna942/miniconda3/lib/python3.12/site-packages (0.29.0)\n",
+ "Requirement already satisfied: langchain>=0.2 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (0.3.27)\n",
+ "Requirement already satisfied: langchain-openai in /Users/swapna942/miniconda3/lib/python3.12/site-packages (0.3.30)\n",
+ "Requirement already satisfied: langchain-community in /Users/swapna942/miniconda3/lib/python3.12/site-packages (0.3.27)\n",
+ "Requirement already satisfied: langchain-text-splitters in /Users/swapna942/miniconda3/lib/python3.12/site-packages (0.3.9)\n",
+ "Requirement already satisfied: faiss-cpu in /Users/swapna942/miniconda3/lib/python3.12/site-packages (1.11.0)\n",
+ "Requirement already satisfied: starlette<0.47.0,>=0.40.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from fastapi) (0.46.2)\n",
+ "Requirement already satisfied: pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from fastapi) (2.11.7)\n",
+ "Requirement already satisfied: typing-extensions>=4.8.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from fastapi) (4.14.1)\n",
+ "Requirement already satisfied: annotated-types>=0.6.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (0.7.0)\n",
+ "Requirement already satisfied: pydantic-core==2.33.2 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (2.33.2)\n",
+ "Requirement already satisfied: typing-inspection>=0.4.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (0.4.1)\n",
+ "Requirement already satisfied: anyio<5,>=3.6.2 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from starlette<0.47.0,>=0.40.0->fastapi) (4.10.0)\n",
+ "Requirement already satisfied: idna>=2.8 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from anyio<5,>=3.6.2->starlette<0.47.0,>=0.40.0->fastapi) (3.10)\n",
+ "Requirement already satisfied: sniffio>=1.1 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from anyio<5,>=3.6.2->starlette<0.47.0,>=0.40.0->fastapi) (1.3.1)\n",
+ "Requirement already satisfied: click>=7.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from uvicorn) (8.2.1)\n",
+ "Requirement already satisfied: h11>=0.8 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from uvicorn) (0.16.0)\n",
+ "Requirement already satisfied: langchain-core<1.0.0,>=0.3.72 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langchain>=0.2) (0.3.74)\n",
+ "Requirement already satisfied: langsmith>=0.1.17 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langchain>=0.2) (0.4.14)\n",
+ "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langchain>=0.2) (2.0.41)\n",
+ "Requirement already satisfied: requests<3,>=2 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langchain>=0.2) (2.32.4)\n",
+ "Requirement already satisfied: PyYAML>=5.3 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langchain>=0.2) (6.0.2)\n",
+ "Requirement already satisfied: tenacity!=8.4.0,<10.0.0,>=8.1.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langchain-core<1.0.0,>=0.3.72->langchain>=0.2) (9.1.2)\n",
+ "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langchain-core<1.0.0,>=0.3.72->langchain>=0.2) (1.33)\n",
+ "Requirement already satisfied: packaging>=23.2 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langchain-core<1.0.0,>=0.3.72->langchain>=0.2) (24.2)\n",
+ "Requirement already satisfied: jsonpointer>=1.9 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from jsonpatch<2.0,>=1.33->langchain-core<1.0.0,>=0.3.72->langchain>=0.2) (2.1)\n",
+ "Requirement already satisfied: charset_normalizer<4,>=2 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from requests<3,>=2->langchain>=0.2) (3.3.2)\n",
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from requests<3,>=2->langchain>=0.2) (2.5.0)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from requests<3,>=2->langchain>=0.2) (2025.8.3)\n",
+ "Requirement already satisfied: openai<2.0.0,>=1.99.9 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langchain-openai) (1.100.2)\n",
+ "Requirement already satisfied: tiktoken<1,>=0.7 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langchain-openai) (0.9.0)\n",
+ "Requirement already satisfied: distro<2,>=1.7.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from openai<2.0.0,>=1.99.9->langchain-openai) (1.9.0)\n",
+ "Requirement already satisfied: httpx<1,>=0.23.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from openai<2.0.0,>=1.99.9->langchain-openai) (0.28.1)\n",
+ "Requirement already satisfied: jiter<1,>=0.4.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from openai<2.0.0,>=1.99.9->langchain-openai) (0.10.0)\n",
+ "Requirement already satisfied: tqdm>4 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from openai<2.0.0,>=1.99.9->langchain-openai) (4.67.1)\n",
+ "Requirement already satisfied: httpcore==1.* in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from httpx<1,>=0.23.0->openai<2.0.0,>=1.99.9->langchain-openai) (1.0.9)\n",
+ "Requirement already satisfied: regex>=2022.1.18 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from tiktoken<1,>=0.7->langchain-openai) (2024.11.6)\n",
+ "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langchain-community) (3.12.13)\n",
+ "Requirement already satisfied: dataclasses-json<0.7,>=0.5.7 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langchain-community) (0.6.7)\n",
+ "Requirement already satisfied: pydantic-settings<3.0.0,>=2.4.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langchain-community) (2.10.1)\n",
+ "Requirement already satisfied: httpx-sse<1.0.0,>=0.4.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langchain-community) (0.4.1)\n",
+ "Requirement already satisfied: numpy>=1.26.2 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langchain-community) (2.3.1)\n",
+ "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (2.6.1)\n",
+ "Requirement already satisfied: aiosignal>=1.1.2 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.4.0)\n",
+ "Requirement already satisfied: attrs>=17.3.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (25.3.0)\n",
+ "Requirement already satisfied: frozenlist>=1.1.1 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.7.0)\n",
+ "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (6.6.3)\n",
+ "Requirement already satisfied: propcache>=0.2.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (0.3.2)\n",
+ "Requirement already satisfied: yarl<2.0,>=1.17.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.20.1)\n",
+ "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community) (3.26.1)\n",
+ "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community) (0.9.0)\n",
+ "Requirement already satisfied: python-dotenv>=0.21.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from pydantic-settings<3.0.0,>=2.4.0->langchain-community) (1.1.1)\n",
+ "Requirement already satisfied: mypy-extensions>=0.3.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community) (1.1.0)\n",
+ "Requirement already satisfied: orjson>=3.9.14 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langsmith>=0.1.17->langchain>=0.2) (3.10.18)\n",
+ "Requirement already satisfied: requests-toolbelt>=1.0.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langsmith>=0.1.17->langchain>=0.2) (1.0.0)\n",
+ "Requirement already satisfied: zstandard>=0.23.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langsmith>=0.1.17->langchain>=0.2) (0.23.0)\n"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install fastapi uvicorn \"langchain>=0.2\" langchain-openai \\\n",
+ " langchain-community langchain-text-splitters \\\n",
+ " faiss-cpu"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "wmt9jvqzh7n",
+ "metadata": {},
+ "source": [
+ "### 2. LlamaStack Server Setup\n",
+ "\n",
+ "#### Build and Start LlamaStack Server\n",
+ "\n",
+ "This section sets up the LlamaStack server with:\n",
+ "- **Together AI** as the inference provider\n",
+ "- **FAISS** as the vector database\n",
+ "- **Sentence Transformers** for embeddings\n",
+ "\n",
+ "The server runs on `localhost:8321` and provides OpenAI-compatible endpoints."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "dd2dacf3-ec8b-4cc7-8ff4-b5b6ea4a6e9e",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: uv in /Users/swapna942/miniconda3/lib/python3.12/site-packages (0.7.20)\n",
+ "Environment '/Users/swapna942/llama-stack/.venv' already exists, re-using it.\n",
+ "Virtual environment /Users/swapna942/llama-stack/.venv is already active\n",
+ "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 86ms\u001b[0m\u001b[0m\n",
+ "Installing pip dependencies\n",
+ "\u001b[2K\u001b[2mResolved \u001b[1m178 packages\u001b[0m \u001b[2min 462ms\u001b[0m\u001b[0m \u001b[0m\n",
+ "\u001b[2mUninstalled \u001b[1m2 packages\u001b[0m \u001b[2min 28ms\u001b[0m\u001b[0m\n",
+ "\u001b[2K\u001b[2mInstalled \u001b[1m2 packages\u001b[0m \u001b[2min 5ms\u001b[0m\u001b[0m \u001b[0m\n",
+ " \u001b[31m-\u001b[39m \u001b[1mprotobuf\u001b[0m\u001b[2m==5.29.5\u001b[0m\n",
+ " \u001b[32m+\u001b[39m \u001b[1mprotobuf\u001b[0m\u001b[2m==5.29.4\u001b[0m\n",
+ " \u001b[31m-\u001b[39m \u001b[1mruff\u001b[0m\u001b[2m==0.12.5\u001b[0m\n",
+ " \u001b[32m+\u001b[39m \u001b[1mruff\u001b[0m\u001b[2m==0.9.10\u001b[0m\n",
+ "Installing special provider module: torch torchvision --index-url https://download.pytorch.org/whl/cpu\n",
+ "\u001b[2mAudited \u001b[1m2 packages\u001b[0m \u001b[2min 5ms\u001b[0m\u001b[0m\n",
+ "Installing special provider module: sentence-transformers --no-deps\n",
+ "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 9ms\u001b[0m\u001b[0m\n",
+ "\u001b[32mBuild Successful!\u001b[0m\n",
+ "\u001b[34mYou can find the newly-built distribution here: /Users/swapna942/.llama/distributions/starter/starter-run.yaml\u001b[0m\n",
+ "\u001b[32mYou can run the new Llama Stack distro via: \u001b[34mllama stack run /Users/swapna942/.llama/distributions/starter/starter-run.yaml --image-type venv\u001b[0m\u001b[0m\n"
+ ]
+ }
+ ],
+ "source": [
+ "import os\n",
+ "import subprocess\n",
+ "import time\n",
+ "\n",
+ "!pip install uv\n",
+ "\n",
+ "if \"UV_SYSTEM_PYTHON\" in os.environ:\n",
+ " del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
+ "\n",
+ "# this command installs all the dependencies needed for the llama stack server with the together inference provider\n",
+ "!uv run --with llama-stack llama stack build --distro starter --image-type venv\n",
+ "\n",
+ "\n",
+ "def run_llama_stack_server_background():\n",
+ " log_file = open(\"llama_stack_server.log\", \"w\")\n",
+ " process = subprocess.Popen(\n",
+ " \"uv run --with llama-stack llama stack run /Users/swapna942/.llama/distributions/starter/starter-run.yaml --image-type venv\",\n",
+ " shell=True,\n",
+ " stdout=log_file,\n",
+ " stderr=log_file,\n",
+ " text=True,\n",
+ " )\n",
+ "\n",
+ " print(f\"Starting Llama Stack server with PID: {process.pid}\")\n",
+ " return process\n",
+ "\n",
+ "\n",
+ "def wait_for_server_to_start():\n",
+ " import requests\n",
+ " from requests.exceptions import ConnectionError\n",
+ "\n",
+ " url = \"http://0.0.0.0:8321/v1/health\"\n",
+ " max_retries = 30\n",
+ " retry_interval = 1\n",
+ "\n",
+ " print(\"Waiting for server to start\", end=\"\")\n",
+ " for _ in range(max_retries):\n",
+ " try:\n",
+ " response = requests.get(url)\n",
+ " if response.status_code == 200:\n",
+ " print(\"\\nServer is ready!\")\n",
+ " return True\n",
+ " except ConnectionError:\n",
+ " print(\".\", end=\"\", flush=True)\n",
+ " time.sleep(retry_interval)\n",
+ "\n",
+ " print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n",
+ " return False\n",
+ "\n",
+ "\n",
+ "# use this helper if needed to kill the server\n",
+ "def kill_llama_stack_server():\n",
+ " # Kill any existing llama stack server processes\n",
+ " os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "28bd8dbd-4576-4e76-813f-21ab94db44a2",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Starting Llama Stack server with PID: 99016\n",
+ "Waiting for server to start....\n",
+ "Server is ready!\n"
+ ]
+ }
+ ],
+ "source": [
+ "server_process = run_llama_stack_server_background()\n",
+ "assert wait_for_server_to_start()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "gr9cdcg4r7n",
+ "metadata": {},
+ "source": [
+ "#### Install LlamaStack Client\n",
+ "\n",
+ "Install the client library to interact with the LlamaStack server."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "487d2dbc-d071-400e-b4f0-dcee58f8dc95",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: llama_stack_client in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (0.2.17)\n",
+ "Requirement already satisfied: anyio<5,>=3.5.0 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from llama_stack_client) (4.9.0)\n",
+ "Requirement already satisfied: click in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from llama_stack_client) (8.2.1)\n",
+ "Requirement already satisfied: distro<2,>=1.7.0 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from llama_stack_client) (1.9.0)\n",
+ "Requirement already satisfied: fire in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from llama_stack_client) (0.7.0)\n",
+ "Requirement already satisfied: httpx<1,>=0.23.0 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from llama_stack_client) (0.28.1)\n",
+ "Requirement already satisfied: pandas in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from llama_stack_client) (2.3.1)\n",
+ "Requirement already satisfied: prompt-toolkit in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from llama_stack_client) (3.0.51)\n",
+ "Requirement already satisfied: pyaml in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from llama_stack_client) (25.7.0)\n",
+ "Requirement already satisfied: pydantic<3,>=1.9.0 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from llama_stack_client) (2.11.7)\n",
+ "Requirement already satisfied: requests in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from llama_stack_client) (2.32.4)\n",
+ "Requirement already satisfied: rich in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from llama_stack_client) (14.1.0)\n",
+ "Requirement already satisfied: sniffio in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from llama_stack_client) (1.3.1)\n",
+ "Requirement already satisfied: termcolor in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from llama_stack_client) (3.1.0)\n",
+ "Requirement already satisfied: tqdm in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from llama_stack_client) (4.67.1)\n",
+ "Requirement already satisfied: typing-extensions<5,>=4.7 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from llama_stack_client) (4.14.1)\n",
+ "Requirement already satisfied: idna>=2.8 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from anyio<5,>=3.5.0->llama_stack_client) (3.10)\n",
+ "Requirement already satisfied: certifi in /opt/homebrew/opt/certifi/lib/python3.13/site-packages (from httpx<1,>=0.23.0->llama_stack_client) (2025.8.3)\n",
+ "Requirement already satisfied: httpcore==1.* in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from httpx<1,>=0.23.0->llama_stack_client) (1.0.9)\n",
+ "Requirement already satisfied: h11>=0.16 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->llama_stack_client) (0.16.0)\n",
+ "Requirement already satisfied: annotated-types>=0.6.0 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from pydantic<3,>=1.9.0->llama_stack_client) (0.7.0)\n",
+ "Requirement already satisfied: pydantic-core==2.33.2 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from pydantic<3,>=1.9.0->llama_stack_client) (2.33.2)\n",
+ "Requirement already satisfied: typing-inspection>=0.4.0 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from pydantic<3,>=1.9.0->llama_stack_client) (0.4.1)\n",
+ "Requirement already satisfied: numpy>=1.26.0 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from pandas->llama_stack_client) (2.3.2)\n",
+ "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from pandas->llama_stack_client) (2.9.0.post0)\n",
+ "Requirement already satisfied: pytz>=2020.1 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from pandas->llama_stack_client) (2025.2)\n",
+ "Requirement already satisfied: tzdata>=2022.7 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from pandas->llama_stack_client) (2025.2)\n",
+ "Requirement already satisfied: six>=1.5 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from python-dateutil>=2.8.2->pandas->llama_stack_client) (1.17.0)\n",
+ "Requirement already satisfied: wcwidth in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from prompt-toolkit->llama_stack_client) (0.2.13)\n",
+ "Requirement already satisfied: PyYAML in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from pyaml->llama_stack_client) (6.0.2)\n",
+ "Requirement already satisfied: charset_normalizer<4,>=2 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from requests->llama_stack_client) (3.4.2)\n",
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from requests->llama_stack_client) (2.5.0)\n",
+ "Requirement already satisfied: markdown-it-py>=2.2.0 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from rich->llama_stack_client) (4.0.0)\n",
+ "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from rich->llama_stack_client) (2.19.2)\n",
+ "Requirement already satisfied: mdurl~=0.1 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from markdown-it-py>=2.2.0->rich->llama_stack_client) (0.1.2)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "0"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import sys\n",
+ "\n",
+ "# Install directly to the current Python environment\n",
+ "subprocess.check_call([sys.executable, \"-m\", \"pip\", \"install\", \"llama_stack_client\"])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0j5hag7l9x89",
+ "metadata": {},
+ "source": [
+ "### 3. Initialize LlamaStack Client\n",
+ "\n",
+ "Create a client connection to the LlamaStack server with API keys for different providers:\n",
+ "\n",
+ "- **OpenAI API Key**: For OpenAI models\n",
+ "- **Gemini API Key**: For Google's Gemini models \n",
+ "- **Together API Key**: For Together AI models\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "ab4eff97-4565-4c73-b1b3-0020a4c7e2a5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from llama_stack_client import LlamaStackClient\n",
+ "\n",
+ "client = LlamaStackClient(\n",
+ " base_url=\"http://0.0.0.0:8321\",\n",
+ " provider_data={\"openai_api_key\": \"****\", \"gemini_api_key\": \"****\", \"together_api_key\": \"****\"},\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "vwhexjy1e8o",
+ "metadata": {},
+ "source": [
+ "#### Explore Available Models and Safety Features\n",
+ "\n",
+ "Check what models and safety shields are available through your LlamaStack instance."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "880443ef-ac3c-48b1-a80a-7dab5b25ac61",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:httpx:HTTP Request: GET http://0.0.0.0:8321/v1/models \"HTTP/1.1 200 OK\"\n",
+ "INFO:httpx:HTTP Request: GET http://0.0.0.0:8321/v1/shields \"HTTP/1.1 200 OK\"\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Available models:\n",
+ "- all-minilm\n",
+ "- ollama/all-minilm:l6-v2\n",
+ "- ollama/llama-guard3:1b\n",
+ "- ollama/llama-guard3:8b\n",
+ "- ollama/llama3.2:3b-instruct-fp16\n",
+ "- ollama/nomic-embed-text\n",
+ "- fireworks/accounts/fireworks/models/llama-v3p1-8b-instruct\n",
+ "- fireworks/accounts/fireworks/models/llama-v3p1-70b-instruct\n",
+ "- fireworks/accounts/fireworks/models/llama-v3p1-405b-instruct\n",
+ "- fireworks/accounts/fireworks/models/llama-v3p2-3b-instruct\n",
+ "- fireworks/accounts/fireworks/models/llama-v3p2-11b-vision-instruct\n",
+ "- fireworks/accounts/fireworks/models/llama-v3p2-90b-vision-instruct\n",
+ "- fireworks/accounts/fireworks/models/llama-v3p3-70b-instruct\n",
+ "- fireworks/accounts/fireworks/models/llama4-scout-instruct-basic\n",
+ "- fireworks/accounts/fireworks/models/llama4-maverick-instruct-basic\n",
+ "- fireworks/nomic-ai/nomic-embed-text-v1.5\n",
+ "- fireworks/accounts/fireworks/models/llama-guard-3-8b\n",
+ "- fireworks/accounts/fireworks/models/llama-guard-3-11b-vision\n",
+ "- together/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo\n",
+ "- together/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo\n",
+ "- together/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo\n",
+ "- together/meta-llama/Llama-3.2-3B-Instruct-Turbo\n",
+ "- together/meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo\n",
+ "- together/meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo\n",
+ "- together/meta-llama/Llama-3.3-70B-Instruct-Turbo\n",
+ "- together/togethercomputer/m2-bert-80M-8k-retrieval\n",
+ "- together/togethercomputer/m2-bert-80M-32k-retrieval\n",
+ "- together/meta-llama/Llama-4-Scout-17B-16E-Instruct\n",
+ "- together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8\n",
+ "- together/meta-llama/Llama-Guard-3-8B\n",
+ "- together/meta-llama/Llama-Guard-3-11B-Vision-Turbo\n",
+ "- bedrock/meta.llama3-1-8b-instruct-v1:0\n",
+ "- bedrock/meta.llama3-1-70b-instruct-v1:0\n",
+ "- bedrock/meta.llama3-1-405b-instruct-v1:0\n",
+ "- openai/gpt-3.5-turbo-0125\n",
+ "- openai/gpt-3.5-turbo\n",
+ "- openai/gpt-3.5-turbo-instruct\n",
+ "- openai/gpt-4\n",
+ "- openai/gpt-4-turbo\n",
+ "- openai/gpt-4o\n",
+ "- openai/gpt-4o-2024-08-06\n",
+ "- openai/gpt-4o-mini\n",
+ "- openai/gpt-4o-audio-preview\n",
+ "- openai/chatgpt-4o-latest\n",
+ "- openai/o1\n",
+ "- openai/o1-mini\n",
+ "- openai/o3-mini\n",
+ "- openai/o4-mini\n",
+ "- openai/text-embedding-3-small\n",
+ "- openai/text-embedding-3-large\n",
+ "- anthropic/claude-3-5-sonnet-latest\n",
+ "- anthropic/claude-3-7-sonnet-latest\n",
+ "- anthropic/claude-3-5-haiku-latest\n",
+ "- anthropic/voyage-3\n",
+ "- anthropic/voyage-3-lite\n",
+ "- anthropic/voyage-code-3\n",
+ "- gemini/gemini-1.5-flash\n",
+ "- gemini/gemini-1.5-pro\n",
+ "- gemini/gemini-2.0-flash\n",
+ "- gemini/gemini-2.0-flash-lite\n",
+ "- gemini/gemini-2.5-flash\n",
+ "- gemini/gemini-2.5-flash-lite\n",
+ "- gemini/gemini-2.5-pro\n",
+ "- gemini/text-embedding-004\n",
+ "- groq/llama3-8b-8192\n",
+ "- groq/llama-3.1-8b-instant\n",
+ "- groq/llama3-70b-8192\n",
+ "- groq/llama-3.3-70b-versatile\n",
+ "- groq/llama-3.2-3b-preview\n",
+ "- groq/meta-llama/llama-4-scout-17b-16e-instruct\n",
+ "- groq/meta-llama/llama-4-maverick-17b-128e-instruct\n",
+ "- sambanova/Meta-Llama-3.1-8B-Instruct\n",
+ "- sambanova/Meta-Llama-3.3-70B-Instruct\n",
+ "- sambanova/Llama-4-Maverick-17B-128E-Instruct\n",
+ "- sentence-transformers/all-MiniLM-L6-v2\n",
+ "----\n",
+ "Available shields (safety models):\n",
+ "code-scanner\n",
+ "llama-guard\n",
+ "----\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"Available models:\")\n",
+ "for m in client.models.list():\n",
+ " print(f\"- {m.identifier}\")\n",
+ "\n",
+ "print(\"----\")\n",
+ "print(\"Available shields (safety models):\")\n",
+ "for s in client.shields.list():\n",
+ " print(s.identifier)\n",
+ "print(\"----\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "gojp7at31ht",
+ "metadata": {},
+ "source": [
+ "### 4. Vector Database Setup\n",
+ "\n",
+ "#### Register a Vector Database\n",
+ "\n",
+ "Create a FAISS vector database for storing document embeddings:\n",
+ "\n",
+ "- **Vector DB ID**: Unique identifier for the database\n",
+ "- **Provider**: FAISS (Facebook AI Similarity Search)\n",
+ "- **Embedding Model**: Sentence Transformers model for text embeddings\n",
+ "- **Dimensions**: 384-dimensional embeddings"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "a16e2885-ae70-4fa6-9778-2433fa4dbfff",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/vector-dbs \"HTTP/1.1 200 OK\"\n",
+ "INFO:httpx:HTTP Request: GET http://0.0.0.0:8321/v1/vector-dbs \"HTTP/1.1 200 OK\"\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Registered new vector DB: VectorDBRegisterResponse(embedding_dimension=384, embedding_model='sentence-transformers/all-MiniLM-L6-v2', identifier='acme_docs', provider_id='faiss', type='vector_db', provider_resource_id='acme_docs_v2', owner=None, source='via_register_api', vector_db_name=None)\n",
+ "Existing vector DBs: [VectorDBListResponseItem(embedding_dimension=384, embedding_model='sentence-transformers/all-MiniLM-L6-v2', identifier='acme_docs', provider_id='faiss', type='vector_db', provider_resource_id='acme_docs_v2', vector_db_name=None)]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Register a new clean vector database\n",
+ "vector_db = client.vector_dbs.register(\n",
+ " vector_db_id=\"acme_docs\", # Use a new unique name\n",
+ " provider_id=\"faiss\",\n",
+ " provider_vector_db_id=\"acme_docs_v2\",\n",
+ " embedding_model=\"sentence-transformers/all-MiniLM-L6-v2\",\n",
+ " embedding_dimension=384,\n",
+ ")\n",
+ "print(\"Registered new vector DB:\", vector_db)\n",
+ "\n",
+ "# List all registered vector databases\n",
+ "dbs = client.vector_dbs.list()\n",
+ "print(\"Existing vector DBs:\", dbs)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "pcgjqzfr3eo",
+ "metadata": {},
+ "source": [
+ "#### Prepare Sample Documents\n",
+ "\n",
+ "Create LLAMA Stack Chunks for FAISS vector store"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5a0a6619-c9fb-4938-8ff3-f84304eed91e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from llama_stack_client.types.vector_io_insert_params import Chunk\n",
+ "\n",
+ "docs = [\n",
+ " (\"Acme ships globally in 3-5 business days.\", {\"title\": \"Shipping Policy\"}),\n",
+ " (\"Returns are accepted within 30 days of purchase.\", {\"title\": \"Returns Policy\"}),\n",
+ " (\"Support is available 24/7 via chat and email.\", {\"title\": \"Support\"}),\n",
+ "]\n",
+ "\n",
+ "# Convert to Chunk objects\n",
+ "chunks = []\n",
+ "for _, (content, metadata) in enumerate(docs):\n",
+ " # Transform metadata to required format with document_id from title\n",
+ " metadata = {\"document_id\": metadata[\"title\"]}\n",
+ " chunk = Chunk(\n",
+ " content=content, # Required[InterleavedContent]\n",
+ " metadata=metadata, # Required[Dict]\n",
+ " )\n",
+ " chunks.append(chunk)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6bg3sm2ko5g",
+ "metadata": {},
+ "source": [
+ "#### Insert Documents into Vector Database\n",
+ "\n",
+ "Store the prepared documents in the FAISS vector database. This process:\n",
+ "1. Generates embeddings for each document\n",
+ "2. Stores embeddings with metadata\n",
+ "3. Enables semantic search capabilities"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "0e8740d8-b809-44b9-915f-1e0200e3c3f1",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/vector-io/insert \"HTTP/1.1 200 OK\"\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Documents inserted: None\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Insert chunks into FAISS vector store\n",
+ "\n",
+ "response = client.vector_io.insert(vector_db_id=\"acme_docs\", chunks=chunks)\n",
+ "print(\"Documents inserted:\", response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9061tmi1zpq",
+ "metadata": {},
+ "source": [
+ "#### Test Vector Search\n",
+ "\n",
+ "Query the vector database to verify it's working correctly. This performs semantic search to find relevant documents based on the query."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "4a5e010c-eeeb-4020-a957-74d6d1cba342",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/vector-io/query \"HTTP/1.1 200 OK\"\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "metadata : {'document_id': 'Shipping Policy'}\n",
+ "content : Acme ships globally in 3–5 business days.\n",
+ "metadata : {'document_id': 'Shipping Policy'}\n",
+ "content : Acme ships globally in 3–5 business days.\n",
+ "metadata : {'document_id': 'Returns Policy'}\n",
+ "content : Returns are accepted within 30 days of purchase.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Query chunks from FAISS vector store\n",
+ "\n",
+ "query_chunk_response = client.vector_io.query(\n",
+ " vector_db_id=\"acme_docs\",\n",
+ " query=\"How long does Acme take to ship orders?\",\n",
+ ")\n",
+ "for chunk in query_chunk_response.chunks:\n",
+ " print(\"metadata\", \":\", chunk.metadata)\n",
+ " print(\"content\", \":\", chunk.content)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "usne6mbspms",
+ "metadata": {},
+ "source": [
+ "### 5. LangChain Integration\n",
+ "\n",
+ "#### Configure LangChain with LlamaStack\n",
+ "\n",
+ "Set up LangChain to use LlamaStack's OpenAI-compatible API:\n",
+ "\n",
+ "- **Base URL**: Points to LlamaStack's OpenAI endpoint\n",
+ "- **Headers**: Include Together AI API key for model access\n",
+ "- **Model**: Use Meta Llama 3.1 8B model via Together AI"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "c378bd10-09c2-417c-bdfc-1e0a2dd19084",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "\n",
+ "from langchain_openai import ChatOpenAI\n",
+ "\n",
+ "# Point LangChain to Llamastack Server\n",
+ "os.environ[\"OPENAI_API_KEY\"] = \"dummy\"\n",
+ "os.environ[\"OPENAI_BASE_URL\"] = \"http://0.0.0.0:8321/v1/openai/v1\"\n",
+ "\n",
+ "# LLM from Llamastack together model\n",
+ "llm = ChatOpenAI(\n",
+ " model=\"together/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo\",\n",
+ " default_headers={\"X-LlamaStack-Provider-Data\": '{\"together_api_key\": \"***\"}'},\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5a4ddpcuk3l",
+ "metadata": {},
+ "source": [
+ "#### Test LLM Connection\n",
+ "\n",
+ "Verify that LangChain can successfully communicate with the LlamaStack server."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "f88ffb5a-657b-4916-9375-c6ddc156c25e",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "AIMessage(content=\"In the Andes, a gentle soul resides, \\nA llama's soft eyes, with kindness abide.\", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 22, 'prompt_tokens': 50, 'total_tokens': 72, 'completion_tokens_details': None, 'prompt_tokens_details': None, 'cached_tokens': 0}, 'model_name': 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo', 'system_fingerprint': None, 'id': 'o86Jy3i-2j9zxn-972d7b27f8f22aaa', 'service_tier': None, 'finish_reason': 'stop', 'logprobs': None}, id='run--4797f8b9-a5f6-4730-aece-80c1fd88ac55-0', usage_metadata={'input_tokens': 50, 'output_tokens': 22, 'total_tokens': 72, 'input_token_details': {}, 'output_token_details': {}})"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Test llm with simple message\n",
+ "messages = [\n",
+ " {\"role\": \"system\", \"content\": \"You are a friendly assistant.\"},\n",
+ " {\"role\": \"user\", \"content\": \"Write a two-sentence poem about llama.\"},\n",
+ "]\n",
+ "llm.invoke(messages)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0xh0jg6a0l4a",
+ "metadata": {},
+ "source": [
+ "### 6. Building the RAG Chain\n",
+ "\n",
+ "#### Create a Complete RAG Pipeline\n",
+ "\n",
+ "Build a LangChain pipeline that combines:\n",
+ "\n",
+ "1. **Vector Search**: Query LlamaStack's vector database\n",
+ "2. **Context Assembly**: Format retrieved documents\n",
+ "3. **Prompt Template**: Structure the input for the LLM\n",
+ "4. **LLM Generation**: Generate answers using context\n",
+ "5. **Output Parsing**: Extract the final response\n",
+ "\n",
+ "**Chain Flow**: `Query → Vector Search → Context + Question → LLM → Response`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9684427d-dcc7-4544-9af5-8b110d014c42",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# LangChain for prompt template and chaining + LLAMA Stack Client Vector DB and LLM chat completion\n",
+ "from langchain_core.output_parsers import StrOutputParser\n",
+ "from langchain_core.prompts import ChatPromptTemplate\n",
+ "from langchain_core.runnables import RunnableLambda, RunnablePassthrough\n",
+ "\n",
+ "\n",
+ "def join_docs(docs):\n",
+ " return \"\\n\\n\".join([f\"[{d.metadata.get('document_id')}] {d.content}\" for d in docs.chunks])\n",
+ "\n",
+ "\n",
+ "PROMPT = ChatPromptTemplate.from_messages(\n",
+ " [\n",
+ " (\"system\", \"You are a helpful assistant. Use the following context to answer.\"),\n",
+ " (\"user\", \"Question: {question}\\n\\nContext:\\n{context}\"),\n",
+ " ]\n",
+ ")\n",
+ "\n",
+ "vector_step = RunnableLambda(\n",
+ " lambda x: client.vector_io.query(\n",
+ " vector_db_id=\"acme_docs\",\n",
+ " query=x,\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "chain = (\n",
+ " {\"context\": vector_step | RunnableLambda(join_docs), \"question\": RunnablePassthrough()}\n",
+ " | PROMPT\n",
+ " | llm\n",
+ " | StrOutputParser()\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0onu6rhphlra",
+ "metadata": {},
+ "source": [
+ "### 7. Testing the RAG System\n",
+ "\n",
+ "#### Example 1: Shipping Query"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "03322188-9509-446a-a4a8-ce3bb83ec87c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/vector-io/query \"HTTP/1.1 200 OK\"\n",
+ "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "❓ How long does shipping take?\n",
+ "💡 According to the Shipping Policy, shipping from Acme takes 3-5 business days.\n"
+ ]
+ }
+ ],
+ "source": [
+ "query = \"How long does shipping take?\"\n",
+ "response = chain.invoke(query)\n",
+ "print(\"❓\", query)\n",
+ "print(\"💡\", response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b7krhqj88ku",
+ "metadata": {},
+ "source": [
+ "#### Example 2: Returns Policy Query"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "61995550-bb0b-46a8-a5d0-023207475d60",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/vector-io/query \"HTTP/1.1 200 OK\"\n",
+ "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "❓ Can I return a product after 40 days?\n",
+ "💡 Based on the provided returns policy, it appears that returns are only accepted within 30 days of purchase. Since you're asking about returning a product after 40 days, it would not be within the specified 30-day return window.\n",
+ "\n",
+ "Unfortunately, it seems that you would not be eligible for a return in this case. However, I would recommend reaching out to the support team via chat or email to confirm their policy and see if there are any exceptions or alternative solutions available.\n"
+ ]
+ }
+ ],
+ "source": [
+ "query = \"Can I return a product after 40 days?\"\n",
+ "response = chain.invoke(query)\n",
+ "print(\"❓\", query)\n",
+ "print(\"💡\", response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "h4w24fadvjs",
+ "metadata": {},
+ "source": [
+ "---\n",
+ "We have successfully built a RAG system that combines:\n",
+ "\n",
+ "- **LlamaStack** for infrastructure (LLM serving + vector database)\n",
+ "- **LangChain** for orchestration (prompts + chains)\n",
+ "- **Together AI** for high-quality language models\n",
+ "\n",
+ "### Key Benefits\n",
+ "\n",
+ "1. **Unified Infrastructure**: Single server for LLMs and vector databases\n",
+ "2. **OpenAI Compatibility**: Easy integration with existing LangChain code\n",
+ "3. **Multi-Provider Support**: Switch between different LLM providers\n",
+ "4. **Production Ready**: Built-in safety shields and monitoring\n",
+ "\n",
+ "### Next Steps\n",
+ "\n",
+ "- Add more sophisticated document processing\n",
+ "- Implement conversation memory\n",
+ "- Add safety filtering and monitoring\n",
+ "- Scale to larger document collections\n",
+ "- Integrate with web frameworks like FastAPI or Streamlit\n",
+ "\n",
+ "---\n",
+ "\n",
+ "##### 🔧 Cleanup\n",
+ "\n",
+ "Don't forget to stop the LlamaStack server when you're done:\n",
+ "\n",
+ "```python\n",
+ "kill_llama_stack_server()\n",
+ "```"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.13.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
From 00bd9a61ed6d67c728dfe9cfcdf9b592ec1be7fb Mon Sep 17 00:00:00 2001
From: Matthew Farrellee
Date: Tue, 26 Aug 2025 15:58:44 -0400
Subject: [PATCH 004/124] chore: Add example notebook for Langchain +
LLAMAStack integration (#3228) (#3259)
---
.../langchain/Llama_Stack_LangChain.ipynb | 946 ------------------
1 file changed, 946 deletions(-)
delete mode 100644 docs/notebooks/langchain/Llama_Stack_LangChain.ipynb
diff --git a/docs/notebooks/langchain/Llama_Stack_LangChain.ipynb b/docs/notebooks/langchain/Llama_Stack_LangChain.ipynb
deleted file mode 100644
index ed918ff50..000000000
--- a/docs/notebooks/langchain/Llama_Stack_LangChain.ipynb
+++ /dev/null
@@ -1,946 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "id": "1ztegmwm4sp",
- "metadata": {},
- "source": [
- "## LlamaStack + LangChain Integration Tutorial\n",
- "\n",
- "This notebook demonstrates how to integrate **LlamaStack** with **LangChain** to build a complete RAG (Retrieval-Augmented Generation) system.\n",
- "\n",
- "### Overview\n",
- "\n",
- "- **LlamaStack**: Provides the infrastructure for running LLMs and vector databases\n",
- "- **LangChain**: Provides the framework for chaining operations and prompt templates\n",
- "- **Integration**: Uses LlamaStack's OpenAI-compatible API with LangChain\n",
- "\n",
- "### What You'll See\n",
- "\n",
- "1. Setting up LlamaStack server with Together AI provider\n",
- "2. Creating and managing vector databases\n",
- "3. Building RAG chains with LangChain + LLAMAStack\n",
- "4. Querying the chain for relevant information\n",
- "\n",
- "### Prerequisites\n",
- "\n",
- "- Together AI API key\n",
- "\n",
- "---\n",
- "\n",
- "### 1. Installation and Setup"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "2ktr5ls2cas",
- "metadata": {},
- "source": [
- "#### Install Required Dependencies\n",
- "\n",
- "First, we install all the necessary packages for LangChain and FastAPI integration."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "id": "5b6a6a17-b931-4bea-8273-0d6e5563637a",
- "metadata": {
- "scrolled": true
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Requirement already satisfied: fastapi in /Users/swapna942/miniconda3/lib/python3.12/site-packages (0.115.14)\n",
- "Requirement already satisfied: uvicorn in /Users/swapna942/miniconda3/lib/python3.12/site-packages (0.29.0)\n",
- "Requirement already satisfied: langchain>=0.2 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (0.3.27)\n",
- "Requirement already satisfied: langchain-openai in /Users/swapna942/miniconda3/lib/python3.12/site-packages (0.3.30)\n",
- "Requirement already satisfied: langchain-community in /Users/swapna942/miniconda3/lib/python3.12/site-packages (0.3.27)\n",
- "Requirement already satisfied: langchain-text-splitters in /Users/swapna942/miniconda3/lib/python3.12/site-packages (0.3.9)\n",
- "Requirement already satisfied: faiss-cpu in /Users/swapna942/miniconda3/lib/python3.12/site-packages (1.11.0)\n",
- "Requirement already satisfied: starlette<0.47.0,>=0.40.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from fastapi) (0.46.2)\n",
- "Requirement already satisfied: pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from fastapi) (2.11.7)\n",
- "Requirement already satisfied: typing-extensions>=4.8.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from fastapi) (4.14.1)\n",
- "Requirement already satisfied: annotated-types>=0.6.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (0.7.0)\n",
- "Requirement already satisfied: pydantic-core==2.33.2 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (2.33.2)\n",
- "Requirement already satisfied: typing-inspection>=0.4.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (0.4.1)\n",
- "Requirement already satisfied: anyio<5,>=3.6.2 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from starlette<0.47.0,>=0.40.0->fastapi) (4.10.0)\n",
- "Requirement already satisfied: idna>=2.8 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from anyio<5,>=3.6.2->starlette<0.47.0,>=0.40.0->fastapi) (3.10)\n",
- "Requirement already satisfied: sniffio>=1.1 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from anyio<5,>=3.6.2->starlette<0.47.0,>=0.40.0->fastapi) (1.3.1)\n",
- "Requirement already satisfied: click>=7.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from uvicorn) (8.2.1)\n",
- "Requirement already satisfied: h11>=0.8 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from uvicorn) (0.16.0)\n",
- "Requirement already satisfied: langchain-core<1.0.0,>=0.3.72 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langchain>=0.2) (0.3.74)\n",
- "Requirement already satisfied: langsmith>=0.1.17 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langchain>=0.2) (0.4.14)\n",
- "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langchain>=0.2) (2.0.41)\n",
- "Requirement already satisfied: requests<3,>=2 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langchain>=0.2) (2.32.4)\n",
- "Requirement already satisfied: PyYAML>=5.3 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langchain>=0.2) (6.0.2)\n",
- "Requirement already satisfied: tenacity!=8.4.0,<10.0.0,>=8.1.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langchain-core<1.0.0,>=0.3.72->langchain>=0.2) (9.1.2)\n",
- "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langchain-core<1.0.0,>=0.3.72->langchain>=0.2) (1.33)\n",
- "Requirement already satisfied: packaging>=23.2 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langchain-core<1.0.0,>=0.3.72->langchain>=0.2) (24.2)\n",
- "Requirement already satisfied: jsonpointer>=1.9 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from jsonpatch<2.0,>=1.33->langchain-core<1.0.0,>=0.3.72->langchain>=0.2) (2.1)\n",
- "Requirement already satisfied: charset_normalizer<4,>=2 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from requests<3,>=2->langchain>=0.2) (3.3.2)\n",
- "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from requests<3,>=2->langchain>=0.2) (2.5.0)\n",
- "Requirement already satisfied: certifi>=2017.4.17 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from requests<3,>=2->langchain>=0.2) (2025.8.3)\n",
- "Requirement already satisfied: openai<2.0.0,>=1.99.9 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langchain-openai) (1.100.2)\n",
- "Requirement already satisfied: tiktoken<1,>=0.7 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langchain-openai) (0.9.0)\n",
- "Requirement already satisfied: distro<2,>=1.7.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from openai<2.0.0,>=1.99.9->langchain-openai) (1.9.0)\n",
- "Requirement already satisfied: httpx<1,>=0.23.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from openai<2.0.0,>=1.99.9->langchain-openai) (0.28.1)\n",
- "Requirement already satisfied: jiter<1,>=0.4.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from openai<2.0.0,>=1.99.9->langchain-openai) (0.10.0)\n",
- "Requirement already satisfied: tqdm>4 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from openai<2.0.0,>=1.99.9->langchain-openai) (4.67.1)\n",
- "Requirement already satisfied: httpcore==1.* in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from httpx<1,>=0.23.0->openai<2.0.0,>=1.99.9->langchain-openai) (1.0.9)\n",
- "Requirement already satisfied: regex>=2022.1.18 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from tiktoken<1,>=0.7->langchain-openai) (2024.11.6)\n",
- "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langchain-community) (3.12.13)\n",
- "Requirement already satisfied: dataclasses-json<0.7,>=0.5.7 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langchain-community) (0.6.7)\n",
- "Requirement already satisfied: pydantic-settings<3.0.0,>=2.4.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langchain-community) (2.10.1)\n",
- "Requirement already satisfied: httpx-sse<1.0.0,>=0.4.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langchain-community) (0.4.1)\n",
- "Requirement already satisfied: numpy>=1.26.2 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langchain-community) (2.3.1)\n",
- "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (2.6.1)\n",
- "Requirement already satisfied: aiosignal>=1.1.2 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.4.0)\n",
- "Requirement already satisfied: attrs>=17.3.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (25.3.0)\n",
- "Requirement already satisfied: frozenlist>=1.1.1 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.7.0)\n",
- "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (6.6.3)\n",
- "Requirement already satisfied: propcache>=0.2.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (0.3.2)\n",
- "Requirement already satisfied: yarl<2.0,>=1.17.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.20.1)\n",
- "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community) (3.26.1)\n",
- "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community) (0.9.0)\n",
- "Requirement already satisfied: python-dotenv>=0.21.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from pydantic-settings<3.0.0,>=2.4.0->langchain-community) (1.1.1)\n",
- "Requirement already satisfied: mypy-extensions>=0.3.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community) (1.1.0)\n",
- "Requirement already satisfied: orjson>=3.9.14 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langsmith>=0.1.17->langchain>=0.2) (3.10.18)\n",
- "Requirement already satisfied: requests-toolbelt>=1.0.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langsmith>=0.1.17->langchain>=0.2) (1.0.0)\n",
- "Requirement already satisfied: zstandard>=0.23.0 in /Users/swapna942/miniconda3/lib/python3.12/site-packages (from langsmith>=0.1.17->langchain>=0.2) (0.23.0)\n"
- ]
- }
- ],
- "source": [
- "!pip install fastapi uvicorn \"langchain>=0.2\" langchain-openai \\\n",
- " langchain-community langchain-text-splitters \\\n",
- " faiss-cpu"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "wmt9jvqzh7n",
- "metadata": {},
- "source": [
- "### 2. LlamaStack Server Setup\n",
- "\n",
- "#### Build and Start LlamaStack Server\n",
- "\n",
- "This section sets up the LlamaStack server with:\n",
- "- **Together AI** as the inference provider\n",
- "- **FAISS** as the vector database\n",
- "- **Sentence Transformers** for embeddings\n",
- "\n",
- "The server runs on `localhost:8321` and provides OpenAI-compatible endpoints."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "id": "dd2dacf3-ec8b-4cc7-8ff4-b5b6ea4a6e9e",
- "metadata": {
- "scrolled": true
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Requirement already satisfied: uv in /Users/swapna942/miniconda3/lib/python3.12/site-packages (0.7.20)\n",
- "Environment '/Users/swapna942/llama-stack/.venv' already exists, re-using it.\n",
- "Virtual environment /Users/swapna942/llama-stack/.venv is already active\n",
- "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 86ms\u001b[0m\u001b[0m\n",
- "Installing pip dependencies\n",
- "\u001b[2K\u001b[2mResolved \u001b[1m178 packages\u001b[0m \u001b[2min 462ms\u001b[0m\u001b[0m \u001b[0m\n",
- "\u001b[2mUninstalled \u001b[1m2 packages\u001b[0m \u001b[2min 28ms\u001b[0m\u001b[0m\n",
- "\u001b[2K\u001b[2mInstalled \u001b[1m2 packages\u001b[0m \u001b[2min 5ms\u001b[0m\u001b[0m \u001b[0m\n",
- " \u001b[31m-\u001b[39m \u001b[1mprotobuf\u001b[0m\u001b[2m==5.29.5\u001b[0m\n",
- " \u001b[32m+\u001b[39m \u001b[1mprotobuf\u001b[0m\u001b[2m==5.29.4\u001b[0m\n",
- " \u001b[31m-\u001b[39m \u001b[1mruff\u001b[0m\u001b[2m==0.12.5\u001b[0m\n",
- " \u001b[32m+\u001b[39m \u001b[1mruff\u001b[0m\u001b[2m==0.9.10\u001b[0m\n",
- "Installing special provider module: torch torchvision --index-url https://download.pytorch.org/whl/cpu\n",
- "\u001b[2mAudited \u001b[1m2 packages\u001b[0m \u001b[2min 5ms\u001b[0m\u001b[0m\n",
- "Installing special provider module: sentence-transformers --no-deps\n",
- "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 9ms\u001b[0m\u001b[0m\n",
- "\u001b[32mBuild Successful!\u001b[0m\n",
- "\u001b[34mYou can find the newly-built distribution here: /Users/swapna942/.llama/distributions/starter/starter-run.yaml\u001b[0m\n",
- "\u001b[32mYou can run the new Llama Stack distro via: \u001b[34mllama stack run /Users/swapna942/.llama/distributions/starter/starter-run.yaml --image-type venv\u001b[0m\u001b[0m\n"
- ]
- }
- ],
- "source": [
- "import os\n",
- "import subprocess\n",
- "import time\n",
- "\n",
- "!pip install uv\n",
- "\n",
- "if \"UV_SYSTEM_PYTHON\" in os.environ:\n",
- " del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
- "\n",
- "# this command installs all the dependencies needed for the llama stack server with the together inference provider\n",
- "!uv run --with llama-stack llama stack build --distro starter --image-type venv\n",
- "\n",
- "\n",
- "def run_llama_stack_server_background():\n",
- " log_file = open(\"llama_stack_server.log\", \"w\")\n",
- " process = subprocess.Popen(\n",
- " \"uv run --with llama-stack llama stack run /Users/swapna942/.llama/distributions/starter/starter-run.yaml --image-type venv\",\n",
- " shell=True,\n",
- " stdout=log_file,\n",
- " stderr=log_file,\n",
- " text=True,\n",
- " )\n",
- "\n",
- " print(f\"Starting Llama Stack server with PID: {process.pid}\")\n",
- " return process\n",
- "\n",
- "\n",
- "def wait_for_server_to_start():\n",
- " import requests\n",
- " from requests.exceptions import ConnectionError\n",
- "\n",
- " url = \"http://0.0.0.0:8321/v1/health\"\n",
- " max_retries = 30\n",
- " retry_interval = 1\n",
- "\n",
- " print(\"Waiting for server to start\", end=\"\")\n",
- " for _ in range(max_retries):\n",
- " try:\n",
- " response = requests.get(url)\n",
- " if response.status_code == 200:\n",
- " print(\"\\nServer is ready!\")\n",
- " return True\n",
- " except ConnectionError:\n",
- " print(\".\", end=\"\", flush=True)\n",
- " time.sleep(retry_interval)\n",
- "\n",
- " print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n",
- " return False\n",
- "\n",
- "\n",
- "# use this helper if needed to kill the server\n",
- "def kill_llama_stack_server():\n",
- " # Kill any existing llama stack server processes\n",
- " os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "id": "28bd8dbd-4576-4e76-813f-21ab94db44a2",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Starting Llama Stack server with PID: 99016\n",
- "Waiting for server to start....\n",
- "Server is ready!\n"
- ]
- }
- ],
- "source": [
- "server_process = run_llama_stack_server_background()\n",
- "assert wait_for_server_to_start()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "gr9cdcg4r7n",
- "metadata": {},
- "source": [
- "#### Install LlamaStack Client\n",
- "\n",
- "Install the client library to interact with the LlamaStack server."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "id": "487d2dbc-d071-400e-b4f0-dcee58f8dc95",
- "metadata": {
- "scrolled": true
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Requirement already satisfied: llama_stack_client in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (0.2.17)\n",
- "Requirement already satisfied: anyio<5,>=3.5.0 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from llama_stack_client) (4.9.0)\n",
- "Requirement already satisfied: click in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from llama_stack_client) (8.2.1)\n",
- "Requirement already satisfied: distro<2,>=1.7.0 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from llama_stack_client) (1.9.0)\n",
- "Requirement already satisfied: fire in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from llama_stack_client) (0.7.0)\n",
- "Requirement already satisfied: httpx<1,>=0.23.0 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from llama_stack_client) (0.28.1)\n",
- "Requirement already satisfied: pandas in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from llama_stack_client) (2.3.1)\n",
- "Requirement already satisfied: prompt-toolkit in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from llama_stack_client) (3.0.51)\n",
- "Requirement already satisfied: pyaml in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from llama_stack_client) (25.7.0)\n",
- "Requirement already satisfied: pydantic<3,>=1.9.0 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from llama_stack_client) (2.11.7)\n",
- "Requirement already satisfied: requests in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from llama_stack_client) (2.32.4)\n",
- "Requirement already satisfied: rich in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from llama_stack_client) (14.1.0)\n",
- "Requirement already satisfied: sniffio in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from llama_stack_client) (1.3.1)\n",
- "Requirement already satisfied: termcolor in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from llama_stack_client) (3.1.0)\n",
- "Requirement already satisfied: tqdm in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from llama_stack_client) (4.67.1)\n",
- "Requirement already satisfied: typing-extensions<5,>=4.7 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from llama_stack_client) (4.14.1)\n",
- "Requirement already satisfied: idna>=2.8 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from anyio<5,>=3.5.0->llama_stack_client) (3.10)\n",
- "Requirement already satisfied: certifi in /opt/homebrew/opt/certifi/lib/python3.13/site-packages (from httpx<1,>=0.23.0->llama_stack_client) (2025.8.3)\n",
- "Requirement already satisfied: httpcore==1.* in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from httpx<1,>=0.23.0->llama_stack_client) (1.0.9)\n",
- "Requirement already satisfied: h11>=0.16 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->llama_stack_client) (0.16.0)\n",
- "Requirement already satisfied: annotated-types>=0.6.0 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from pydantic<3,>=1.9.0->llama_stack_client) (0.7.0)\n",
- "Requirement already satisfied: pydantic-core==2.33.2 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from pydantic<3,>=1.9.0->llama_stack_client) (2.33.2)\n",
- "Requirement already satisfied: typing-inspection>=0.4.0 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from pydantic<3,>=1.9.0->llama_stack_client) (0.4.1)\n",
- "Requirement already satisfied: numpy>=1.26.0 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from pandas->llama_stack_client) (2.3.2)\n",
- "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from pandas->llama_stack_client) (2.9.0.post0)\n",
- "Requirement already satisfied: pytz>=2020.1 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from pandas->llama_stack_client) (2025.2)\n",
- "Requirement already satisfied: tzdata>=2022.7 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from pandas->llama_stack_client) (2025.2)\n",
- "Requirement already satisfied: six>=1.5 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from python-dateutil>=2.8.2->pandas->llama_stack_client) (1.17.0)\n",
- "Requirement already satisfied: wcwidth in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from prompt-toolkit->llama_stack_client) (0.2.13)\n",
- "Requirement already satisfied: PyYAML in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from pyaml->llama_stack_client) (6.0.2)\n",
- "Requirement already satisfied: charset_normalizer<4,>=2 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from requests->llama_stack_client) (3.4.2)\n",
- "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from requests->llama_stack_client) (2.5.0)\n",
- "Requirement already satisfied: markdown-it-py>=2.2.0 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from rich->llama_stack_client) (4.0.0)\n",
- "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from rich->llama_stack_client) (2.19.2)\n",
- "Requirement already satisfied: mdurl~=0.1 in /opt/homebrew/Cellar/jupyterlab/4.4.5/libexec/lib/python3.13/site-packages (from markdown-it-py>=2.2.0->rich->llama_stack_client) (0.1.2)\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "0"
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "import sys\n",
- "\n",
- "# Install directly to the current Python environment\n",
- "subprocess.check_call([sys.executable, \"-m\", \"pip\", \"install\", \"llama_stack_client\"])"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "0j5hag7l9x89",
- "metadata": {},
- "source": [
- "### 3. Initialize LlamaStack Client\n",
- "\n",
- "Create a client connection to the LlamaStack server with API keys for different providers:\n",
- "\n",
- "- **OpenAI API Key**: For OpenAI models\n",
- "- **Gemini API Key**: For Google's Gemini models \n",
- "- **Together API Key**: For Together AI models\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "id": "ab4eff97-4565-4c73-b1b3-0020a4c7e2a5",
- "metadata": {},
- "outputs": [],
- "source": [
- "from llama_stack_client import LlamaStackClient\n",
- "\n",
- "client = LlamaStackClient(\n",
- " base_url=\"http://0.0.0.0:8321\",\n",
- " provider_data={\"openai_api_key\": \"****\", \"gemini_api_key\": \"****\", \"together_api_key\": \"****\"},\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "vwhexjy1e8o",
- "metadata": {},
- "source": [
- "#### Explore Available Models and Safety Features\n",
- "\n",
- "Check what models and safety shields are available through your LlamaStack instance."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "id": "880443ef-ac3c-48b1-a80a-7dab5b25ac61",
- "metadata": {
- "scrolled": true
- },
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "INFO:httpx:HTTP Request: GET http://0.0.0.0:8321/v1/models \"HTTP/1.1 200 OK\"\n",
- "INFO:httpx:HTTP Request: GET http://0.0.0.0:8321/v1/shields \"HTTP/1.1 200 OK\"\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Available models:\n",
- "- all-minilm\n",
- "- ollama/all-minilm:l6-v2\n",
- "- ollama/llama-guard3:1b\n",
- "- ollama/llama-guard3:8b\n",
- "- ollama/llama3.2:3b-instruct-fp16\n",
- "- ollama/nomic-embed-text\n",
- "- fireworks/accounts/fireworks/models/llama-v3p1-8b-instruct\n",
- "- fireworks/accounts/fireworks/models/llama-v3p1-70b-instruct\n",
- "- fireworks/accounts/fireworks/models/llama-v3p1-405b-instruct\n",
- "- fireworks/accounts/fireworks/models/llama-v3p2-3b-instruct\n",
- "- fireworks/accounts/fireworks/models/llama-v3p2-11b-vision-instruct\n",
- "- fireworks/accounts/fireworks/models/llama-v3p2-90b-vision-instruct\n",
- "- fireworks/accounts/fireworks/models/llama-v3p3-70b-instruct\n",
- "- fireworks/accounts/fireworks/models/llama4-scout-instruct-basic\n",
- "- fireworks/accounts/fireworks/models/llama4-maverick-instruct-basic\n",
- "- fireworks/nomic-ai/nomic-embed-text-v1.5\n",
- "- fireworks/accounts/fireworks/models/llama-guard-3-8b\n",
- "- fireworks/accounts/fireworks/models/llama-guard-3-11b-vision\n",
- "- together/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo\n",
- "- together/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo\n",
- "- together/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo\n",
- "- together/meta-llama/Llama-3.2-3B-Instruct-Turbo\n",
- "- together/meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo\n",
- "- together/meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo\n",
- "- together/meta-llama/Llama-3.3-70B-Instruct-Turbo\n",
- "- together/togethercomputer/m2-bert-80M-8k-retrieval\n",
- "- together/togethercomputer/m2-bert-80M-32k-retrieval\n",
- "- together/meta-llama/Llama-4-Scout-17B-16E-Instruct\n",
- "- together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8\n",
- "- together/meta-llama/Llama-Guard-3-8B\n",
- "- together/meta-llama/Llama-Guard-3-11B-Vision-Turbo\n",
- "- bedrock/meta.llama3-1-8b-instruct-v1:0\n",
- "- bedrock/meta.llama3-1-70b-instruct-v1:0\n",
- "- bedrock/meta.llama3-1-405b-instruct-v1:0\n",
- "- openai/gpt-3.5-turbo-0125\n",
- "- openai/gpt-3.5-turbo\n",
- "- openai/gpt-3.5-turbo-instruct\n",
- "- openai/gpt-4\n",
- "- openai/gpt-4-turbo\n",
- "- openai/gpt-4o\n",
- "- openai/gpt-4o-2024-08-06\n",
- "- openai/gpt-4o-mini\n",
- "- openai/gpt-4o-audio-preview\n",
- "- openai/chatgpt-4o-latest\n",
- "- openai/o1\n",
- "- openai/o1-mini\n",
- "- openai/o3-mini\n",
- "- openai/o4-mini\n",
- "- openai/text-embedding-3-small\n",
- "- openai/text-embedding-3-large\n",
- "- anthropic/claude-3-5-sonnet-latest\n",
- "- anthropic/claude-3-7-sonnet-latest\n",
- "- anthropic/claude-3-5-haiku-latest\n",
- "- anthropic/voyage-3\n",
- "- anthropic/voyage-3-lite\n",
- "- anthropic/voyage-code-3\n",
- "- gemini/gemini-1.5-flash\n",
- "- gemini/gemini-1.5-pro\n",
- "- gemini/gemini-2.0-flash\n",
- "- gemini/gemini-2.0-flash-lite\n",
- "- gemini/gemini-2.5-flash\n",
- "- gemini/gemini-2.5-flash-lite\n",
- "- gemini/gemini-2.5-pro\n",
- "- gemini/text-embedding-004\n",
- "- groq/llama3-8b-8192\n",
- "- groq/llama-3.1-8b-instant\n",
- "- groq/llama3-70b-8192\n",
- "- groq/llama-3.3-70b-versatile\n",
- "- groq/llama-3.2-3b-preview\n",
- "- groq/meta-llama/llama-4-scout-17b-16e-instruct\n",
- "- groq/meta-llama/llama-4-maverick-17b-128e-instruct\n",
- "- sambanova/Meta-Llama-3.1-8B-Instruct\n",
- "- sambanova/Meta-Llama-3.3-70B-Instruct\n",
- "- sambanova/Llama-4-Maverick-17B-128E-Instruct\n",
- "- sentence-transformers/all-MiniLM-L6-v2\n",
- "----\n",
- "Available shields (safety models):\n",
- "code-scanner\n",
- "llama-guard\n",
- "----\n"
- ]
- }
- ],
- "source": [
- "print(\"Available models:\")\n",
- "for m in client.models.list():\n",
- " print(f\"- {m.identifier}\")\n",
- "\n",
- "print(\"----\")\n",
- "print(\"Available shields (safety models):\")\n",
- "for s in client.shields.list():\n",
- " print(s.identifier)\n",
- "print(\"----\")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "gojp7at31ht",
- "metadata": {},
- "source": [
- "### 4. Vector Database Setup\n",
- "\n",
- "#### Register a Vector Database\n",
- "\n",
- "Create a FAISS vector database for storing document embeddings:\n",
- "\n",
- "- **Vector DB ID**: Unique identifier for the database\n",
- "- **Provider**: FAISS (Facebook AI Similarity Search)\n",
- "- **Embedding Model**: Sentence Transformers model for text embeddings\n",
- "- **Dimensions**: 384-dimensional embeddings"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "id": "a16e2885-ae70-4fa6-9778-2433fa4dbfff",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/vector-dbs \"HTTP/1.1 200 OK\"\n",
- "INFO:httpx:HTTP Request: GET http://0.0.0.0:8321/v1/vector-dbs \"HTTP/1.1 200 OK\"\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Registered new vector DB: VectorDBRegisterResponse(embedding_dimension=384, embedding_model='sentence-transformers/all-MiniLM-L6-v2', identifier='acme_docs', provider_id='faiss', type='vector_db', provider_resource_id='acme_docs_v2', owner=None, source='via_register_api', vector_db_name=None)\n",
- "Existing vector DBs: [VectorDBListResponseItem(embedding_dimension=384, embedding_model='sentence-transformers/all-MiniLM-L6-v2', identifier='acme_docs', provider_id='faiss', type='vector_db', provider_resource_id='acme_docs_v2', vector_db_name=None)]\n"
- ]
- }
- ],
- "source": [
- "# Register a new clean vector database\n",
- "vector_db = client.vector_dbs.register(\n",
- " vector_db_id=\"acme_docs\", # Use a new unique name\n",
- " provider_id=\"faiss\",\n",
- " provider_vector_db_id=\"acme_docs_v2\",\n",
- " embedding_model=\"sentence-transformers/all-MiniLM-L6-v2\",\n",
- " embedding_dimension=384,\n",
- ")\n",
- "print(\"Registered new vector DB:\", vector_db)\n",
- "\n",
- "# List all registered vector databases\n",
- "dbs = client.vector_dbs.list()\n",
- "print(\"Existing vector DBs:\", dbs)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "pcgjqzfr3eo",
- "metadata": {},
- "source": [
- "#### Prepare Sample Documents\n",
- "\n",
- "Create LLAMA Stack Chunks for FAISS vector store"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "5a0a6619-c9fb-4938-8ff3-f84304eed91e",
- "metadata": {},
- "outputs": [],
- "source": [
- "from llama_stack_client.types.vector_io_insert_params import Chunk\n",
- "\n",
- "docs = [\n",
- " (\"Acme ships globally in 3-5 business days.\", {\"title\": \"Shipping Policy\"}),\n",
- " (\"Returns are accepted within 30 days of purchase.\", {\"title\": \"Returns Policy\"}),\n",
- " (\"Support is available 24/7 via chat and email.\", {\"title\": \"Support\"}),\n",
- "]\n",
- "\n",
- "# Convert to Chunk objects\n",
- "chunks = []\n",
- "for _, (content, metadata) in enumerate(docs):\n",
- " # Transform metadata to required format with document_id from title\n",
- " metadata = {\"document_id\": metadata[\"title\"]}\n",
- " chunk = Chunk(\n",
- " content=content, # Required[InterleavedContent]\n",
- " metadata=metadata, # Required[Dict]\n",
- " )\n",
- " chunks.append(chunk)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "6bg3sm2ko5g",
- "metadata": {},
- "source": [
- "#### Insert Documents into Vector Database\n",
- "\n",
- "Store the prepared documents in the FAISS vector database. This process:\n",
- "1. Generates embeddings for each document\n",
- "2. Stores embeddings with metadata\n",
- "3. Enables semantic search capabilities"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "id": "0e8740d8-b809-44b9-915f-1e0200e3c3f1",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/vector-io/insert \"HTTP/1.1 200 OK\"\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Documents inserted: None\n"
- ]
- }
- ],
- "source": [
- "# Insert chunks into FAISS vector store\n",
- "\n",
- "response = client.vector_io.insert(vector_db_id=\"acme_docs\", chunks=chunks)\n",
- "print(\"Documents inserted:\", response)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "9061tmi1zpq",
- "metadata": {},
- "source": [
- "#### Test Vector Search\n",
- "\n",
- "Query the vector database to verify it's working correctly. This performs semantic search to find relevant documents based on the query."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "id": "4a5e010c-eeeb-4020-a957-74d6d1cba342",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/vector-io/query \"HTTP/1.1 200 OK\"\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "metadata : {'document_id': 'Shipping Policy'}\n",
- "content : Acme ships globally in 3–5 business days.\n",
- "metadata : {'document_id': 'Shipping Policy'}\n",
- "content : Acme ships globally in 3–5 business days.\n",
- "metadata : {'document_id': 'Returns Policy'}\n",
- "content : Returns are accepted within 30 days of purchase.\n"
- ]
- }
- ],
- "source": [
- "# Query chunks from FAISS vector store\n",
- "\n",
- "query_chunk_response = client.vector_io.query(\n",
- " vector_db_id=\"acme_docs\",\n",
- " query=\"How long does Acme take to ship orders?\",\n",
- ")\n",
- "for chunk in query_chunk_response.chunks:\n",
- " print(\"metadata\", \":\", chunk.metadata)\n",
- " print(\"content\", \":\", chunk.content)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "usne6mbspms",
- "metadata": {},
- "source": [
- "### 5. LangChain Integration\n",
- "\n",
- "#### Configure LangChain with LlamaStack\n",
- "\n",
- "Set up LangChain to use LlamaStack's OpenAI-compatible API:\n",
- "\n",
- "- **Base URL**: Points to LlamaStack's OpenAI endpoint\n",
- "- **Headers**: Include Together AI API key for model access\n",
- "- **Model**: Use Meta Llama 3.1 8B model via Together AI"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "id": "c378bd10-09c2-417c-bdfc-1e0a2dd19084",
- "metadata": {},
- "outputs": [],
- "source": [
- "import os\n",
- "\n",
- "from langchain_openai import ChatOpenAI\n",
- "\n",
- "# Point LangChain to Llamastack Server\n",
- "os.environ[\"OPENAI_API_KEY\"] = \"dummy\"\n",
- "os.environ[\"OPENAI_BASE_URL\"] = \"http://0.0.0.0:8321/v1/openai/v1\"\n",
- "\n",
- "# LLM from Llamastack together model\n",
- "llm = ChatOpenAI(\n",
- " model=\"together/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo\",\n",
- " default_headers={\"X-LlamaStack-Provider-Data\": '{\"together_api_key\": \"***\"}'},\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "5a4ddpcuk3l",
- "metadata": {},
- "source": [
- "#### Test LLM Connection\n",
- "\n",
- "Verify that LangChain can successfully communicate with the LlamaStack server."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "id": "f88ffb5a-657b-4916-9375-c6ddc156c25e",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "AIMessage(content=\"In the Andes, a gentle soul resides, \\nA llama's soft eyes, with kindness abide.\", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 22, 'prompt_tokens': 50, 'total_tokens': 72, 'completion_tokens_details': None, 'prompt_tokens_details': None, 'cached_tokens': 0}, 'model_name': 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo', 'system_fingerprint': None, 'id': 'o86Jy3i-2j9zxn-972d7b27f8f22aaa', 'service_tier': None, 'finish_reason': 'stop', 'logprobs': None}, id='run--4797f8b9-a5f6-4730-aece-80c1fd88ac55-0', usage_metadata={'input_tokens': 50, 'output_tokens': 22, 'total_tokens': 72, 'input_token_details': {}, 'output_token_details': {}})"
- ]
- },
- "execution_count": 12,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Test llm with simple message\n",
- "messages = [\n",
- " {\"role\": \"system\", \"content\": \"You are a friendly assistant.\"},\n",
- " {\"role\": \"user\", \"content\": \"Write a two-sentence poem about llama.\"},\n",
- "]\n",
- "llm.invoke(messages)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "0xh0jg6a0l4a",
- "metadata": {},
- "source": [
- "### 6. Building the RAG Chain\n",
- "\n",
- "#### Create a Complete RAG Pipeline\n",
- "\n",
- "Build a LangChain pipeline that combines:\n",
- "\n",
- "1. **Vector Search**: Query LlamaStack's vector database\n",
- "2. **Context Assembly**: Format retrieved documents\n",
- "3. **Prompt Template**: Structure the input for the LLM\n",
- "4. **LLM Generation**: Generate answers using context\n",
- "5. **Output Parsing**: Extract the final response\n",
- "\n",
- "**Chain Flow**: `Query → Vector Search → Context + Question → LLM → Response`"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "9684427d-dcc7-4544-9af5-8b110d014c42",
- "metadata": {},
- "outputs": [],
- "source": [
- "# LangChain for prompt template and chaining + LLAMA Stack Client Vector DB and LLM chat completion\n",
- "from langchain_core.output_parsers import StrOutputParser\n",
- "from langchain_core.prompts import ChatPromptTemplate\n",
- "from langchain_core.runnables import RunnableLambda, RunnablePassthrough\n",
- "\n",
- "\n",
- "def join_docs(docs):\n",
- " return \"\\n\\n\".join([f\"[{d.metadata.get('document_id')}] {d.content}\" for d in docs.chunks])\n",
- "\n",
- "\n",
- "PROMPT = ChatPromptTemplate.from_messages(\n",
- " [\n",
- " (\"system\", \"You are a helpful assistant. Use the following context to answer.\"),\n",
- " (\"user\", \"Question: {question}\\n\\nContext:\\n{context}\"),\n",
- " ]\n",
- ")\n",
- "\n",
- "vector_step = RunnableLambda(\n",
- " lambda x: client.vector_io.query(\n",
- " vector_db_id=\"acme_docs\",\n",
- " query=x,\n",
- " )\n",
- ")\n",
- "\n",
- "chain = (\n",
- " {\"context\": vector_step | RunnableLambda(join_docs), \"question\": RunnablePassthrough()}\n",
- " | PROMPT\n",
- " | llm\n",
- " | StrOutputParser()\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "0onu6rhphlra",
- "metadata": {},
- "source": [
- "### 7. Testing the RAG System\n",
- "\n",
- "#### Example 1: Shipping Query"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "id": "03322188-9509-446a-a4a8-ce3bb83ec87c",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/vector-io/query \"HTTP/1.1 200 OK\"\n",
- "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "❓ How long does shipping take?\n",
- "💡 According to the Shipping Policy, shipping from Acme takes 3-5 business days.\n"
- ]
- }
- ],
- "source": [
- "query = \"How long does shipping take?\"\n",
- "response = chain.invoke(query)\n",
- "print(\"❓\", query)\n",
- "print(\"💡\", response)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "b7krhqj88ku",
- "metadata": {},
- "source": [
- "#### Example 2: Returns Policy Query"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "id": "61995550-bb0b-46a8-a5d0-023207475d60",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/vector-io/query \"HTTP/1.1 200 OK\"\n",
- "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "❓ Can I return a product after 40 days?\n",
- "💡 Based on the provided returns policy, it appears that returns are only accepted within 30 days of purchase. Since you're asking about returning a product after 40 days, it would not be within the specified 30-day return window.\n",
- "\n",
- "Unfortunately, it seems that you would not be eligible for a return in this case. However, I would recommend reaching out to the support team via chat or email to confirm their policy and see if there are any exceptions or alternative solutions available.\n"
- ]
- }
- ],
- "source": [
- "query = \"Can I return a product after 40 days?\"\n",
- "response = chain.invoke(query)\n",
- "print(\"❓\", query)\n",
- "print(\"💡\", response)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "h4w24fadvjs",
- "metadata": {},
- "source": [
- "---\n",
- "We have successfully built a RAG system that combines:\n",
- "\n",
- "- **LlamaStack** for infrastructure (LLM serving + vector database)\n",
- "- **LangChain** for orchestration (prompts + chains)\n",
- "- **Together AI** for high-quality language models\n",
- "\n",
- "### Key Benefits\n",
- "\n",
- "1. **Unified Infrastructure**: Single server for LLMs and vector databases\n",
- "2. **OpenAI Compatibility**: Easy integration with existing LangChain code\n",
- "3. **Multi-Provider Support**: Switch between different LLM providers\n",
- "4. **Production Ready**: Built-in safety shields and monitoring\n",
- "\n",
- "### Next Steps\n",
- "\n",
- "- Add more sophisticated document processing\n",
- "- Implement conversation memory\n",
- "- Add safety filtering and monitoring\n",
- "- Scale to larger document collections\n",
- "- Integrate with web frameworks like FastAPI or Streamlit\n",
- "\n",
- "---\n",
- "\n",
- "##### 🔧 Cleanup\n",
- "\n",
- "Don't forget to stop the LlamaStack server when you're done:\n",
- "\n",
- "```python\n",
- "kill_llama_stack_server()\n",
- "```"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.13.5"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
From 9fa69b0337b8a88d2d3324092ffacf454d383188 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe
Date: Tue, 26 Aug 2025 14:06:36 -0700
Subject: [PATCH 005/124] feat(distro): no huggingface provider for starter
(#3258)
The `trl` dependency brings in `accelerate` which brings in nvidia
dependencies for torch. We cannot have that in the starter distro. As
such, no CPU-only post-training for the huggingface provider.
---
docs/source/providers/post_training/index.md | 1 -
llama_stack/core/build.py | 2 +-
llama_stack/distributions/ci-tests/build.yaml | 2 +-
llama_stack/distributions/ci-tests/run.yaml | 9 ++--
.../distributions/starter-gpu/build.yaml | 2 +-
.../distributions/starter-gpu/run.yaml | 9 ++--
.../distributions/starter-gpu/starter_gpu.py | 2 +-
llama_stack/distributions/starter/build.yaml | 2 +-
llama_stack/distributions/starter/run.yaml | 9 ++--
llama_stack/distributions/starter/starter.py | 2 +-
llama_stack/providers/registry/inference.py | 3 +-
.../providers/registry/post_training.py | 47 ++++++-------------
12 files changed, 35 insertions(+), 55 deletions(-)
diff --git a/docs/source/providers/post_training/index.md b/docs/source/providers/post_training/index.md
index 5ada6f9aa..e69f2a45a 100644
--- a/docs/source/providers/post_training/index.md
+++ b/docs/source/providers/post_training/index.md
@@ -9,7 +9,6 @@ This section contains documentation for all available providers for the **post_t
```{toctree}
:maxdepth: 1
-inline_huggingface-cpu
inline_huggingface-gpu
inline_torchtune-cpu
inline_torchtune-gpu
diff --git a/llama_stack/core/build.py b/llama_stack/core/build.py
index fa1fe632b..2ceb9e9be 100644
--- a/llama_stack/core/build.py
+++ b/llama_stack/core/build.py
@@ -80,7 +80,7 @@ def get_provider_dependencies(
normal_deps = []
special_deps = []
for package in deps:
- if "--no-deps" in package or "--index-url" in package:
+ if any(f in package for f in ["--no-deps", "--index-url", "--extra-index-url"]):
special_deps.append(package)
else:
normal_deps.append(package)
diff --git a/llama_stack/distributions/ci-tests/build.yaml b/llama_stack/distributions/ci-tests/build.yaml
index b4701cb81..8e6c0bf67 100644
--- a/llama_stack/distributions/ci-tests/build.yaml
+++ b/llama_stack/distributions/ci-tests/build.yaml
@@ -34,7 +34,7 @@ distribution_spec:
telemetry:
- provider_type: inline::meta-reference
post_training:
- - provider_type: inline::huggingface-cpu
+ - provider_type: inline::torchtune-cpu
eval:
- provider_type: inline::meta-reference
datasetio:
diff --git a/llama_stack/distributions/ci-tests/run.yaml b/llama_stack/distributions/ci-tests/run.yaml
index 3acdd20f9..7523df581 100644
--- a/llama_stack/distributions/ci-tests/run.yaml
+++ b/llama_stack/distributions/ci-tests/run.yaml
@@ -156,13 +156,10 @@ providers:
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/trace_store.db
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
post_training:
- - provider_id: huggingface-cpu
- provider_type: inline::huggingface-cpu
+ - provider_id: torchtune-cpu
+ provider_type: inline::torchtune-cpu
config:
- checkpoint_format: huggingface
- distributed_backend: null
- device: cpu
- dpo_output_dir: ~/.llama/distributions/ci-tests/dpo_output
+ checkpoint_format: meta
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
diff --git a/llama_stack/distributions/starter-gpu/build.yaml b/llama_stack/distributions/starter-gpu/build.yaml
index ae0680cdc..ff7c58e6f 100644
--- a/llama_stack/distributions/starter-gpu/build.yaml
+++ b/llama_stack/distributions/starter-gpu/build.yaml
@@ -35,7 +35,7 @@ distribution_spec:
telemetry:
- provider_type: inline::meta-reference
post_training:
- - provider_type: inline::torchtune-gpu
+ - provider_type: inline::huggingface-gpu
eval:
- provider_type: inline::meta-reference
datasetio:
diff --git a/llama_stack/distributions/starter-gpu/run.yaml b/llama_stack/distributions/starter-gpu/run.yaml
index 81c802317..8aed61519 100644
--- a/llama_stack/distributions/starter-gpu/run.yaml
+++ b/llama_stack/distributions/starter-gpu/run.yaml
@@ -156,10 +156,13 @@ providers:
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/trace_store.db
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
post_training:
- - provider_id: torchtune-gpu
- provider_type: inline::torchtune-gpu
+ - provider_id: huggingface-gpu
+ provider_type: inline::huggingface-gpu
config:
- checkpoint_format: meta
+ checkpoint_format: huggingface
+ distributed_backend: null
+ device: cpu
+ dpo_output_dir: ~/.llama/distributions/starter-gpu/dpo_output
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
diff --git a/llama_stack/distributions/starter-gpu/starter_gpu.py b/llama_stack/distributions/starter-gpu/starter_gpu.py
index 893df6c17..245334749 100644
--- a/llama_stack/distributions/starter-gpu/starter_gpu.py
+++ b/llama_stack/distributions/starter-gpu/starter_gpu.py
@@ -17,6 +17,6 @@ def get_distribution_template() -> DistributionTemplate:
template.description = "Quick start template for running Llama Stack with several popular providers. This distribution is intended for GPU-enabled environments."
template.providers["post_training"] = [
- BuildProvider(provider_type="inline::torchtune-gpu"),
+ BuildProvider(provider_type="inline::huggingface-gpu"),
]
return template
diff --git a/llama_stack/distributions/starter/build.yaml b/llama_stack/distributions/starter/build.yaml
index 3df0eb129..e84e528da 100644
--- a/llama_stack/distributions/starter/build.yaml
+++ b/llama_stack/distributions/starter/build.yaml
@@ -35,7 +35,7 @@ distribution_spec:
telemetry:
- provider_type: inline::meta-reference
post_training:
- - provider_type: inline::huggingface-cpu
+ - provider_type: inline::torchtune-cpu
eval:
- provider_type: inline::meta-reference
datasetio:
diff --git a/llama_stack/distributions/starter/run.yaml b/llama_stack/distributions/starter/run.yaml
index 7e1d46a61..a3962b8aa 100644
--- a/llama_stack/distributions/starter/run.yaml
+++ b/llama_stack/distributions/starter/run.yaml
@@ -156,13 +156,10 @@ providers:
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/trace_store.db
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
post_training:
- - provider_id: huggingface-cpu
- provider_type: inline::huggingface-cpu
+ - provider_id: torchtune-cpu
+ provider_type: inline::torchtune-cpu
config:
- checkpoint_format: huggingface
- distributed_backend: null
- device: cpu
- dpo_output_dir: ~/.llama/distributions/starter/dpo_output
+ checkpoint_format: meta
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
diff --git a/llama_stack/distributions/starter/starter.py b/llama_stack/distributions/starter/starter.py
index f49da0bb7..a4bbc6371 100644
--- a/llama_stack/distributions/starter/starter.py
+++ b/llama_stack/distributions/starter/starter.py
@@ -120,7 +120,7 @@ def get_distribution_template() -> DistributionTemplate:
],
"agents": [BuildProvider(provider_type="inline::meta-reference")],
"telemetry": [BuildProvider(provider_type="inline::meta-reference")],
- "post_training": [BuildProvider(provider_type="inline::huggingface-cpu")],
+ "post_training": [BuildProvider(provider_type="inline::torchtune-cpu")],
"eval": [BuildProvider(provider_type="inline::meta-reference")],
"datasetio": [
BuildProvider(provider_type="remote::huggingface"),
diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py
index 1801cdcad..82b771a28 100644
--- a/llama_stack/providers/registry/inference.py
+++ b/llama_stack/providers/registry/inference.py
@@ -40,8 +40,9 @@ def available_providers() -> list[ProviderSpec]:
InlineProviderSpec(
api=Api.inference,
provider_type="inline::sentence-transformers",
+ # CrossEncoder depends on torchao.quantization
pip_packages=[
- "torch torchvision --index-url https://download.pytorch.org/whl/cpu",
+ "torch torchvision torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu",
"sentence-transformers --no-deps",
],
module="llama_stack.providers.inline.inference.sentence_transformers",
diff --git a/llama_stack/providers/registry/post_training.py b/llama_stack/providers/registry/post_training.py
index 4443f4df1..67238e3fc 100644
--- a/llama_stack/providers/registry/post_training.py
+++ b/llama_stack/providers/registry/post_training.py
@@ -13,7 +13,7 @@ from llama_stack.providers.datatypes import AdapterSpec, Api, InlineProviderSpec
# The CPU version is used for distributions that don't have GPU support -- they result in smaller container images.
torchtune_def = dict(
api=Api.post_training,
- pip_packages=["torchtune==0.5.0", "torchao==0.8.0", "numpy"],
+ pip_packages=["numpy"],
module="llama_stack.providers.inline.post_training.torchtune",
config_class="llama_stack.providers.inline.post_training.torchtune.TorchtunePostTrainingConfig",
api_dependencies=[
@@ -23,56 +23,39 @@ torchtune_def = dict(
description="TorchTune-based post-training provider for fine-tuning and optimizing models using Meta's TorchTune framework.",
)
-huggingface_def = dict(
- api=Api.post_training,
- pip_packages=["trl", "transformers", "peft", "datasets"],
- module="llama_stack.providers.inline.post_training.huggingface",
- config_class="llama_stack.providers.inline.post_training.huggingface.HuggingFacePostTrainingConfig",
- api_dependencies=[
- Api.datasetio,
- Api.datasets,
- ],
- description="HuggingFace-based post-training provider for fine-tuning models using the HuggingFace ecosystem.",
-)
-
def available_providers() -> list[ProviderSpec]:
return [
InlineProviderSpec(
- **{
+ **{ # type: ignore
**torchtune_def,
"provider_type": "inline::torchtune-cpu",
"pip_packages": (
cast(list[str], torchtune_def["pip_packages"])
- + ["torch torchtune==0.5.0 torchao==0.8.0 --index-url https://download.pytorch.org/whl/cpu"]
+ + ["torch torchtune>=0.5.0 torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu"]
),
},
),
InlineProviderSpec(
- **{
- **huggingface_def,
- "provider_type": "inline::huggingface-cpu",
- "pip_packages": (
- cast(list[str], huggingface_def["pip_packages"])
- + ["torch --index-url https://download.pytorch.org/whl/cpu"]
- ),
- },
- ),
- InlineProviderSpec(
- **{
+ **{ # type: ignore
**torchtune_def,
"provider_type": "inline::torchtune-gpu",
"pip_packages": (
- cast(list[str], torchtune_def["pip_packages"]) + ["torch torchtune==0.5.0 torchao==0.8.0"]
+ cast(list[str], torchtune_def["pip_packages"]) + ["torch torchtune>=0.5.0 torchao>=0.12.0"]
),
},
),
InlineProviderSpec(
- **{
- **huggingface_def,
- "provider_type": "inline::huggingface-gpu",
- "pip_packages": (cast(list[str], huggingface_def["pip_packages"]) + ["torch"]),
- },
+ api=Api.post_training,
+ provider_type="inline::huggingface-gpu",
+ pip_packages=["trl", "transformers", "peft", "datasets", "torch"],
+ module="llama_stack.providers.inline.post_training.huggingface",
+ config_class="llama_stack.providers.inline.post_training.huggingface.HuggingFacePostTrainingConfig",
+ api_dependencies=[
+ Api.datasetio,
+ Api.datasets,
+ ],
+ description="HuggingFace-based post-training provider for fine-tuning models using the HuggingFace ecosystem.",
),
remote_provider_spec(
api=Api.post_training,
From 963305c84da587124937c71d0d7727d46525e7ec Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
Date: Tue, 26 Aug 2025 22:02:47 +0000
Subject: [PATCH 006/124] build: Bump version to 0.2.19
---
llama_stack/ui/package-lock.json | 8 ++--
llama_stack/ui/package.json | 2 +-
pyproject.toml | 6 +--
uv.lock | 68 +++++++++++++++++++++-----------
4 files changed, 54 insertions(+), 30 deletions(-)
diff --git a/llama_stack/ui/package-lock.json b/llama_stack/ui/package-lock.json
index 98a1e4fe5..2da25615c 100644
--- a/llama_stack/ui/package-lock.json
+++ b/llama_stack/ui/package-lock.json
@@ -18,7 +18,7 @@
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"framer-motion": "^11.18.2",
- "llama-stack-client": "^0.2.18",
+ "llama-stack-client": "^0.2.19",
"lucide-react": "^0.510.0",
"next": "15.3.3",
"next-auth": "^4.24.11",
@@ -10006,9 +10006,9 @@
"license": "MIT"
},
"node_modules/llama-stack-client": {
- "version": "0.2.18",
- "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.18.tgz",
- "integrity": "sha512-k+xQOz/TIU0cINP4Aih8q6xs7f/6qs0fLDMXTTKQr5C0F1jtCjRiwsas7bTsDfpKfYhg/7Xy/wPw/uZgi6aIVg==",
+ "version": "0.2.19",
+ "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.19.tgz",
+ "integrity": "sha512-sDuAhUdEGlERZ3jlMUzPXcQTgMv/pGbDrPX0ifbE5S+gr7Q+7ohuQYrIXe+hXgIipFjq+y4b2c5laZ76tmAyEA==",
"license": "MIT",
"dependencies": {
"@types/node": "^18.11.18",
diff --git a/llama_stack/ui/package.json b/llama_stack/ui/package.json
index 7a17d93dd..31c836057 100644
--- a/llama_stack/ui/package.json
+++ b/llama_stack/ui/package.json
@@ -23,7 +23,7 @@
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"framer-motion": "^11.18.2",
- "llama-stack-client": "^0.2.18",
+ "llama-stack-client": "^0.2.19",
"lucide-react": "^0.510.0",
"next": "15.3.3",
"next-auth": "^4.24.11",
diff --git a/pyproject.toml b/pyproject.toml
index 6c76da895..dd8529546 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ required-version = ">=0.7.0"
[project]
name = "llama_stack"
-version = "0.2.18"
+version = "0.2.19"
authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
description = "Llama Stack"
readme = "README.md"
@@ -31,7 +31,7 @@ dependencies = [
"huggingface-hub>=0.34.0,<1.0",
"jinja2>=3.1.6",
"jsonschema",
- "llama-stack-client>=0.2.18",
+ "llama-stack-client>=0.2.19",
"llama-api-client>=0.1.2",
"openai>=1.99.6,<1.100.0",
"prompt-toolkit",
@@ -56,7 +56,7 @@ dependencies = [
ui = [
"streamlit",
"pandas",
- "llama-stack-client>=0.2.18",
+ "llama-stack-client>=0.2.19",
"streamlit-option-menu",
]
diff --git a/uv.lock b/uv.lock
index 385c75bea..0626caba6 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1128,6 +1128,9 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/4f/72/dcbc6dbf838549b7b0c2c18c1365d2580eb7456939e4b608c3ab213fce78/geventhttpclient-2.3.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9ac30c38d86d888b42bb2ab2738ab9881199609e9fa9a153eb0c66fc9188c6cb", size = 71984, upload-time = "2025-06-11T13:17:09.126Z" },
{ url = "https://files.pythonhosted.org/packages/4c/f9/74aa8c556364ad39b238919c954a0da01a6154ad5e85a1d1ab5f9f5ac186/geventhttpclient-2.3.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4b802000a4fad80fa57e895009671d6e8af56777e3adf0d8aee0807e96188fd9", size = 52631, upload-time = "2025-06-11T13:17:10.061Z" },
{ url = "https://files.pythonhosted.org/packages/11/1a/bc4b70cba8b46be8b2c6ca5b8067c4f086f8c90915eb68086ab40ff6243d/geventhttpclient-2.3.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:461e4d9f4caee481788ec95ac64e0a4a087c1964ddbfae9b6f2dc51715ba706c", size = 51991, upload-time = "2025-06-11T13:17:11.049Z" },
+ { url = "https://files.pythonhosted.org/packages/03/3f/5ce6e003b3b24f7caf3207285831afd1a4f857ce98ac45e1fb7a6815bd58/geventhttpclient-2.3.4-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b7e41687c74e8fbe6a665458bbaea0c5a75342a95e2583738364a73bcbf1671b", size = 114982, upload-time = "2025-08-24T12:16:50.76Z" },
+ { url = "https://files.pythonhosted.org/packages/60/16/6f9dad141b7c6dd7ee831fbcd72dd02535c57bc1ec3c3282f07e72c31344/geventhttpclient-2.3.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c3ea5da20f4023cf40207ce15f5f4028377ffffdba3adfb60b4c8f34925fce79", size = 115654, upload-time = "2025-08-24T12:16:52.072Z" },
+ { url = "https://files.pythonhosted.org/packages/ba/52/9b516a2ff423d8bd64c319e1950a165ceebb552781c5a88c1e94e93e8713/geventhttpclient-2.3.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:91f19a8a6899c27867dbdace9500f337d3e891a610708e86078915f1d779bf53", size = 121672, upload-time = "2025-08-24T12:16:53.361Z" },
{ url = "https://files.pythonhosted.org/packages/b0/f5/8d0f1e998f6d933c251b51ef92d11f7eb5211e3cd579018973a2b455f7c5/geventhttpclient-2.3.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41f2dcc0805551ea9d49f9392c3b9296505a89b9387417b148655d0d8251b36e", size = 119012, upload-time = "2025-06-11T13:17:11.956Z" },
{ url = "https://files.pythonhosted.org/packages/ea/0e/59e4ab506b3c19fc72e88ca344d150a9028a00c400b1099637100bec26fc/geventhttpclient-2.3.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:62f3a29bf242ecca6360d497304900683fd8f42cbf1de8d0546c871819251dad", size = 124565, upload-time = "2025-06-11T13:17:12.896Z" },
{ url = "https://files.pythonhosted.org/packages/39/5d/dcbd34dfcda0c016b4970bd583cb260cc5ebfc35b33d0ec9ccdb2293587a/geventhttpclient-2.3.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8714a3f2c093aeda3ffdb14c03571d349cb3ed1b8b461d9f321890659f4a5dbf", size = 115573, upload-time = "2025-06-11T13:17:13.937Z" },
@@ -1141,6 +1144,9 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/ff/ad/132fddde6e2dca46d6a86316962437acd2bfaeb264db4e0fae83c529eb04/geventhttpclient-2.3.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:be64c5583884c407fc748dedbcb083475d5b138afb23c6bc0836cbad228402cc", size = 71967, upload-time = "2025-06-11T13:17:22.121Z" },
{ url = "https://files.pythonhosted.org/packages/f4/34/5e77d9a31d93409a8519cf573843288565272ae5a016be9c9293f56c50a1/geventhttpclient-2.3.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:15b2567137734183efda18e4d6245b18772e648b6a25adea0eba8b3a8b0d17e8", size = 52632, upload-time = "2025-06-11T13:17:23.016Z" },
{ url = "https://files.pythonhosted.org/packages/47/d2/cf0dbc333304700e68cee9347f654b56e8b0f93a341b8b0d027ee96800d6/geventhttpclient-2.3.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a4bca1151b8cd207eef6d5cb3c720c562b2aa7293cf113a68874e235cfa19c31", size = 51980, upload-time = "2025-06-11T13:17:23.933Z" },
+ { url = "https://files.pythonhosted.org/packages/27/6e/049e685fc43e2e966c83f24b3187f6a6736103f0fc51118140f4ca1793d4/geventhttpclient-2.3.4-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8a681433e2f3d4b326d8b36b3e05b787b2c6dd2a5660a4a12527622278bf02ed", size = 114998, upload-time = "2025-08-24T12:16:54.72Z" },
+ { url = "https://files.pythonhosted.org/packages/24/13/1d08cf0400bf0fe0bb21e70f3f5fab2130aecef962b4362b7a1eba3cd738/geventhttpclient-2.3.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:736aa8e9609e4da40aeff0dbc02fea69021a034f4ed1e99bf93fc2ca83027b64", size = 115690, upload-time = "2025-08-24T12:16:56.328Z" },
+ { url = "https://files.pythonhosted.org/packages/fd/bc/15d22882983cac573859d274783c5b0a95881e553fc312e7b646be432668/geventhttpclient-2.3.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:9d477ae1f5d42e1ee6abbe520a2e9c7f369781c3b8ca111d1f5283c1453bc825", size = 121681, upload-time = "2025-08-24T12:16:58.344Z" },
{ url = "https://files.pythonhosted.org/packages/ec/5b/c0c30ccd9d06c603add3f2d6abd68bd98430ee9730dc5478815759cf07f7/geventhttpclient-2.3.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b50d9daded5d36193d67e2fc30e59752262fcbbdc86e8222c7df6b93af0346a", size = 118987, upload-time = "2025-06-11T13:17:24.97Z" },
{ url = "https://files.pythonhosted.org/packages/4f/56/095a46af86476372064128162eccbd2ba4a7721503759890d32ea701d5fd/geventhttpclient-2.3.4-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fe705e7656bc6982a463a4ed7f9b1db8c78c08323f1d45d0d1d77063efa0ce96", size = 124519, upload-time = "2025-06-11T13:17:25.933Z" },
{ url = "https://files.pythonhosted.org/packages/ae/12/7c9ba94b58f7954a83d33183152ce6bf5bda10c08ebe47d79a314cd33e29/geventhttpclient-2.3.4-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:69668589359db4cbb9efa327dda5735d1e74145e6f0a9ffa50236d15cf904053", size = 115574, upload-time = "2025-06-11T13:17:27.331Z" },
@@ -1151,6 +1157,24 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/ca/36/9065bb51f261950c42eddf8718e01a9ff344d8082e31317a8b6677be9bd6/geventhttpclient-2.3.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8d1d0db89c1c8f3282eac9a22fda2b4082e1ed62a2107f70e3f1de1872c7919f", size = 112245, upload-time = "2025-06-11T13:17:32.331Z" },
{ url = "https://files.pythonhosted.org/packages/21/7e/08a615bec095c288f997951e42e48b262d43c6081bef33cfbfad96ab9658/geventhttpclient-2.3.4-cp313-cp313-win32.whl", hash = "sha256:4e492b9ab880f98f8a9cc143b96ea72e860946eae8ad5fb2837cede2a8f45154", size = 48360, upload-time = "2025-06-11T13:17:33.349Z" },
{ url = "https://files.pythonhosted.org/packages/ec/19/ef3cb21e7e95b14cfcd21e3ba7fe3d696e171682dfa43ab8c0a727cac601/geventhttpclient-2.3.4-cp313-cp313-win_amd64.whl", hash = "sha256:72575c5b502bf26ececccb905e4e028bb922f542946be701923e726acf305eb6", size = 48956, upload-time = "2025-06-11T13:17:34.956Z" },
+ { url = "https://files.pythonhosted.org/packages/06/45/c41697c7d0cae17075ba535fb901985c2873461a9012e536de679525e28d/geventhttpclient-2.3.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:503db5dd0aa94d899c853b37e1853390c48c7035132f39a0bab44cbf95d29101", size = 71999, upload-time = "2025-08-24T12:17:00.419Z" },
+ { url = "https://files.pythonhosted.org/packages/5d/f7/1d953cafecf8f1681691977d9da9b647d2e02996c2431fb9b718cfdd3013/geventhttpclient-2.3.4-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:389d3f83316220cfa2010f41401c140215a58ddba548222e7122b2161e25e391", size = 52656, upload-time = "2025-08-24T12:17:01.337Z" },
+ { url = "https://files.pythonhosted.org/packages/5c/ca/4bd19040905e911dd8771a4ab74630eadc9ee9072b01ab504332dada2619/geventhttpclient-2.3.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:20c65d404fa42c95f6682831465467dff317004e53602c01f01fbd5ba1e56628", size = 51978, upload-time = "2025-08-24T12:17:02.282Z" },
+ { url = "https://files.pythonhosted.org/packages/11/01/c457257ee41236347dac027e63289fa3f92f164779458bd244b376122bf6/geventhttpclient-2.3.4-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2574ee47ff6f379e9ef124e2355b23060b81629f1866013aa975ba35df0ed60b", size = 115033, upload-time = "2025-08-24T12:17:03.272Z" },
+ { url = "https://files.pythonhosted.org/packages/cc/c1/ef3ddc24b402eb3caa19dacbcd08d7129302a53d9b9109c84af1ea74e31a/geventhttpclient-2.3.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fecf1b735591fb21ea124a374c207104a491ad0d772709845a10d5faa07fa833", size = 115762, upload-time = "2025-08-24T12:17:04.288Z" },
+ { url = "https://files.pythonhosted.org/packages/a9/97/8dca246262e9a1ebd639120151db00e34b7d10f60bdbca8481878b91801a/geventhttpclient-2.3.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:44e9ba810c28f9635e5c4c9cf98fc6470bad5a3620d8045d08693f7489493a3c", size = 121757, upload-time = "2025-08-24T12:17:05.273Z" },
+ { url = "https://files.pythonhosted.org/packages/10/7b/41bff3cbdeff3d06d45df3c61fa39cd25e60fa9d21c709ec6aeb58e9b58f/geventhttpclient-2.3.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:501d5c69adecd5eaee3c22302006f6c16aa114139640873b72732aa17dab9ee7", size = 111747, upload-time = "2025-08-24T12:17:06.585Z" },
+ { url = "https://files.pythonhosted.org/packages/64/e6/3732132fda94082ec8793e3ae0d4d7fff6c1cb8e358e9664d1589499f4b1/geventhttpclient-2.3.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:709f557138fb84ed32703d42da68f786459dab77ff2c23524538f2e26878d154", size = 118487, upload-time = "2025-08-24T12:17:07.816Z" },
+ { url = "https://files.pythonhosted.org/packages/93/29/d48d119dee6c42e066330860186df56a80d4e76d2821a6c706ead49006d7/geventhttpclient-2.3.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b8b86815a30e026c6677b89a5a21ba5fd7b69accf8f0e9b83bac123e4e9f3b31", size = 112198, upload-time = "2025-08-24T12:17:08.867Z" },
+ { url = "https://files.pythonhosted.org/packages/56/48/556adff8de1bd3469b58394f441733bb3c76cb22c2600cf2ee753e73d47f/geventhttpclient-2.3.4-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:4371b1b1afc072ad2b0ff5a8929d73ffd86d582908d3e9e8d7911dc027b1b3a6", size = 72354, upload-time = "2025-08-24T12:17:10.671Z" },
+ { url = "https://files.pythonhosted.org/packages/7c/77/f1b32a91350382978cde0ddfee4089b94e006eb0f3e7297196d9d5451217/geventhttpclient-2.3.4-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:6409fcda1f40d66eab48afc218b4c41e45a95c173738d10c50bc69c7de4261b9", size = 52835, upload-time = "2025-08-24T12:17:12.164Z" },
+ { url = "https://files.pythonhosted.org/packages/d3/06/124f95556e0d5b4c417ec01fc30d91a3e4fe4524a44d2f629a1b1a721984/geventhttpclient-2.3.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:142870c2efb6bd0a593dcd75b83defb58aeb72ceaec4c23186785790bd44a311", size = 52165, upload-time = "2025-08-24T12:17:13.465Z" },
+ { url = "https://files.pythonhosted.org/packages/76/9c/0850256e4461b0a90f2cf5c8156ea8f97e93a826aa76d7be70c9c6d4ba0f/geventhttpclient-2.3.4-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:3a74f7b926badb3b1d47ea987779cb83523a406e89203070b58b20cf95d6f535", size = 117929, upload-time = "2025-08-24T12:17:14.477Z" },
+ { url = "https://files.pythonhosted.org/packages/ca/55/3b54d0c0859efac95ba2649aeb9079a3523cdd7e691549ead2862907dc7d/geventhttpclient-2.3.4-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2a8cde016e5ea6eb289c039b6af8dcef6c3ee77f5d753e57b48fe2555cdeacca", size = 119584, upload-time = "2025-08-24T12:17:15.709Z" },
+ { url = "https://files.pythonhosted.org/packages/84/df/84ce132a0eb2b6d4f86e68a828e3118419cb0411cae101e4bad256c3f321/geventhttpclient-2.3.4-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5aa16f2939a508667093b18e47919376f7db9a9acbe858343173c5a58e347869", size = 125388, upload-time = "2025-08-24T12:17:16.915Z" },
+ { url = "https://files.pythonhosted.org/packages/e8/4f/8156b9f6e25e4f18a60149bd2925f56f1ed7a1f8d520acb5a803536adadd/geventhttpclient-2.3.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ffe87eb7f1956357c2144a56814b5ffc927cbb8932f143a0351c78b93129ebbc", size = 115214, upload-time = "2025-08-24T12:17:17.945Z" },
+ { url = "https://files.pythonhosted.org/packages/f6/5a/b01657605c16ac4555b70339628a33fc7ca41ace58da167637ef72ad0a8e/geventhttpclient-2.3.4-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:5ee758e37215da9519cea53105b2a078d8bc0a32603eef2a1f9ab551e3767dee", size = 121862, upload-time = "2025-08-24T12:17:18.97Z" },
+ { url = "https://files.pythonhosted.org/packages/84/ca/c4e36a9b1bcce9958d8886aa4f7b262c8e9a7c43a284f2d79abfc9ba715d/geventhttpclient-2.3.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:416cc70adb3d34759e782d2e120b4432752399b85ac9758932ecd12274a104c3", size = 114999, upload-time = "2025-08-24T12:17:19.978Z" },
]
[[package]]
@@ -1743,7 +1767,7 @@ wheels = [
[[package]]
name = "llama-stack"
-version = "0.2.18"
+version = "0.2.19"
source = { editable = "." }
dependencies = [
{ name = "aiohttp" },
@@ -1881,8 +1905,8 @@ requires-dist = [
{ name = "jinja2", specifier = ">=3.1.6" },
{ name = "jsonschema" },
{ name = "llama-api-client", specifier = ">=0.1.2" },
- { name = "llama-stack-client", specifier = ">=0.2.18" },
- { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.18" },
+ { name = "llama-stack-client", specifier = ">=0.2.19" },
+ { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.19" },
{ name = "openai", specifier = ">=1.99.6,<1.100.0" },
{ name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" },
{ name = "opentelemetry-sdk", specifier = ">=1.30.0" },
@@ -1989,7 +2013,7 @@ unit = [
[[package]]
name = "llama-stack-client"
-version = "0.2.18"
+version = "0.2.19"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "anyio" },
@@ -2008,9 +2032,9 @@ dependencies = [
{ name = "tqdm" },
{ name = "typing-extensions" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/69/da/5e5a745495f8a2b8ef24fc4d01fe9031aa2277c36447cb22192ec8c8cc1e/llama_stack_client-0.2.18.tar.gz", hash = "sha256:860c885c9e549445178ac55cc9422e6e2a91215ac7aff5aaccfb42f3ce07e79e", size = 277284, upload-time = "2025-08-19T22:12:09.106Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/14/e4/72683c10188ae93e97551ab6eeac725e46f13ec215618532505a7d91bf2b/llama_stack_client-0.2.19.tar.gz", hash = "sha256:6c857e528b83af7821120002ebe4d3db072fd9f7bf867a152a34c70fe606833f", size = 318325, upload-time = "2025-08-26T21:54:20.592Z" }
wheels = [
- { url = "https://files.pythonhosted.org/packages/0a/e4/e97f8fdd8a07aa1efc7f7e37b5657d84357b664bf70dd1885a437edc0699/llama_stack_client-0.2.18-py3-none-any.whl", hash = "sha256:90f827d5476f7fc15fd993f1863af6a6e72bd064646bf6a99435eb43a1327f70", size = 367586, upload-time = "2025-08-19T22:12:07.899Z" },
+ { url = "https://files.pythonhosted.org/packages/51/51/c8dde9fae58193a539eac700502876d8edde8be354c2784ff7b707a47432/llama_stack_client-0.2.19-py3-none-any.whl", hash = "sha256:478565a54541ca03ca9f8fe2019f4136f93ab6afe9591bdd44bc6dde6ddddbd9", size = 369905, upload-time = "2025-08-26T21:54:18.929Z" },
]
[[package]]
@@ -4713,9 +4737,9 @@ dependencies = [
{ name = "typing-extensions", marker = "sys_platform == 'darwin'" },
]
wheels = [
- { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:a47b7986bee3f61ad217d8a8ce24605809ab425baf349f97de758815edd2ef54" },
- { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:fbe2e149c5174ef90d29a5f84a554dfaf28e003cb4f61fa2c8c024c17ec7ca58" },
- { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:057efd30a6778d2ee5e2374cd63a63f63311aa6f33321e627c655df60abdd390" },
+ { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0-cp312-none-macosx_11_0_arm64.whl" },
+ { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0-cp313-cp313t-macosx_14_0_arm64.whl" },
+ { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0-cp313-none-macosx_11_0_arm64.whl" },
]
[[package]]
@@ -4738,19 +4762,19 @@ dependencies = [
{ name = "typing-extensions", marker = "sys_platform != 'darwin'" },
]
wheels = [
- { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-linux_s390x.whl", hash = "sha256:0e34e276722ab7dd0dffa9e12fe2135a9b34a0e300c456ed7ad6430229404eb5" },
- { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:610f600c102386e581327d5efc18c0d6edecb9820b4140d26163354a99cd800d" },
- { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:cb9a8ba8137ab24e36bf1742cb79a1294bd374db570f09fc15a5e1318160db4e" },
- { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-win_amd64.whl", hash = "sha256:2be20b2c05a0cce10430cc25f32b689259640d273232b2de357c35729132256d" },
- { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-win_arm64.whl", hash = "sha256:99fc421a5d234580e45957a7b02effbf3e1c884a5dd077afc85352c77bf41434" },
- { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-linux_s390x.whl", hash = "sha256:8b5882276633cf91fe3d2d7246c743b94d44a7e660b27f1308007fdb1bb89f7d" },
- { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:a5064b5e23772c8d164068cc7c12e01a75faf7b948ecd95a0d4007d7487e5f25" },
- { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:8f81dedb4c6076ec325acc3b47525f9c550e5284a18eae1d9061c543f7b6e7de" },
- { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:e1ee1b2346ade3ea90306dfbec7e8ff17bc220d344109d189ae09078333b0856" },
- { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-win_arm64.whl", hash = "sha256:64c187345509f2b1bb334feed4666e2c781ca381874bde589182f81247e61f88" },
- { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:af81283ac671f434b1b25c95ba295f270e72db1fad48831eb5e4748ff9840041" },
- { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:a9dbb6f64f63258bc811e2c0c99640a81e5af93c531ad96e95c5ec777ea46dab" },
- { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:6d93a7165419bc4b2b907e859ccab0dea5deeab261448ae9a5ec5431f14c0e64" },
+ { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-linux_s390x.whl" },
+ { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-manylinux_2_28_aarch64.whl" },
+ { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl" },
+ { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-win_amd64.whl" },
+ { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-win_arm64.whl" },
+ { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-linux_s390x.whl" },
+ { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-manylinux_2_28_aarch64.whl" },
+ { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl" },
+ { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-win_amd64.whl" },
+ { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-win_arm64.whl" },
+ { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313t-manylinux_2_28_aarch64.whl" },
+ { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl" },
+ { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313t-win_amd64.whl" },
]
[[package]]
From cec00c54762565f7ac09a826ae88c0c0d714894f Mon Sep 17 00:00:00 2001
From: Charlie Doern
Date: Tue, 26 Aug 2025 21:21:15 -0400
Subject: [PATCH 007/124] docs: fix post_training docs (#3262)
# What does this PR do?
the post training docs are missing references to the more indepth
`huggingface.md` and `torchtune.md` which explain how to actually use
the providers.
These files show up in search though.
Add references to these files into the `inline_..md` files currently
pointed to by `index.md`
Signed-off-by: Charlie Doern
---
docs/source/advanced_apis/post_training/inline_huggingface.md | 3 +++
docs/source/advanced_apis/post_training/inline_torchtune.md | 1 +
2 files changed, 4 insertions(+)
diff --git a/docs/source/advanced_apis/post_training/inline_huggingface.md b/docs/source/advanced_apis/post_training/inline_huggingface.md
index 4d2201c99..6536b4f8c 100644
--- a/docs/source/advanced_apis/post_training/inline_huggingface.md
+++ b/docs/source/advanced_apis/post_training/inline_huggingface.md
@@ -35,3 +35,6 @@ device: cpu
```
+[Find more detailed information here!](huggingface.md)
+
+
diff --git a/docs/source/advanced_apis/post_training/inline_torchtune.md b/docs/source/advanced_apis/post_training/inline_torchtune.md
index 6684c99ac..617975b0d 100644
--- a/docs/source/advanced_apis/post_training/inline_torchtune.md
+++ b/docs/source/advanced_apis/post_training/inline_torchtune.md
@@ -22,3 +22,4 @@ checkpoint_format: meta
```
+[Find more detailed information here!](torchtune.md)
From d73955a41e246d4d394ad31454d7c54599d2f812 Mon Sep 17 00:00:00 2001
From: raghotham
Date: Wed, 27 Aug 2025 12:04:25 -0700
Subject: [PATCH 008/124] chore: remove absolute paths (#3263)
# What does this PR do?
Finding these issues while moving to github pages.
## Test Plan
uv run --group docs sphinx-autobuild docs/source docs/build/html
--write-all
---
docs/source/advanced_apis/evaluation_concepts.md | 2 +-
docs/source/building_applications/playground/index.md | 2 +-
docs/source/building_applications/responses_vs_agents.md | 8 ++++----
docs/source/concepts/distributions.md | 2 +-
docs/source/distributions/importing_as_library.md | 2 +-
docs/source/distributions/k8s/apply.sh | 6 +++---
docs/source/distributions/ondevice_distro/android_sdk.md | 2 +-
.../self_hosted_distro/meta-reference-gpu.md | 4 ++--
docs/source/references/evals_reference/index.md | 2 +-
.../distributions/meta-reference-gpu/doc_template.md | 4 ++--
10 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/docs/source/advanced_apis/evaluation_concepts.md b/docs/source/advanced_apis/evaluation_concepts.md
index c26ec8f5e..52ad53ece 100644
--- a/docs/source/advanced_apis/evaluation_concepts.md
+++ b/docs/source/advanced_apis/evaluation_concepts.md
@@ -33,7 +33,7 @@ The list of open-benchmarks we currently support:
- [MMMU](https://arxiv.org/abs/2311.16502) (A Massive Multi-discipline Multimodal Understanding and Reasoning Benchmark for Expert AGI)]: Benchmark designed to evaluate multimodal models.
-You can follow this [contributing guide](https://llama-stack.readthedocs.io/en/latest/references/evals_reference/index.html#open-benchmark-contributing-guide) to add more open-benchmarks to Llama Stack
+You can follow this [contributing guide](../references/evals_reference/index.md#open-benchmark-contributing-guide) to add more open-benchmarks to Llama Stack
#### Run evaluation on open-benchmarks via CLI
diff --git a/docs/source/building_applications/playground/index.md b/docs/source/building_applications/playground/index.md
index fd2b92434..2390c422f 100644
--- a/docs/source/building_applications/playground/index.md
+++ b/docs/source/building_applications/playground/index.md
@@ -88,7 +88,7 @@ Interactive pages for users to play with and explore Llama Stack API capabilitie
- **API Resources**: Inspect Llama Stack API resources
- This page allows you to inspect Llama Stack API resources (`models`, `datasets`, `memory_banks`, `benchmarks`, `shields`).
- Under the hood, it uses Llama Stack's `//list` API to get information about each resources.
- - Please visit [Core Concepts](https://llama-stack.readthedocs.io/en/latest/concepts/index.html) for more details about the resources.
+ - Please visit [Core Concepts](../../concepts/index.md) for more details about the resources.
### Starting the Llama Stack Playground
diff --git a/docs/source/building_applications/responses_vs_agents.md b/docs/source/building_applications/responses_vs_agents.md
index 5abe951d6..63ff69e4f 100644
--- a/docs/source/building_applications/responses_vs_agents.md
+++ b/docs/source/building_applications/responses_vs_agents.md
@@ -3,7 +3,7 @@
Llama Stack (LLS) provides two different APIs for building AI applications with tool calling capabilities: the **Agents API** and the **OpenAI Responses API**. While both enable AI systems to use tools, and maintain full conversation history, they serve different use cases and have distinct characteristics.
```{note}
-For simple and basic inferencing, you may want to use the [Chat Completions API](https://llama-stack.readthedocs.io/en/latest/providers/index.html#chat-completions) directly, before progressing to Agents or Responses API.
+ **Note:** For simple and basic inferencing, you may want to use the [Chat Completions API](../providers/openai.md#chat-completions) directly, before progressing to Agents or Responses API.
```
## Overview
@@ -173,7 +173,7 @@ Both APIs demonstrate distinct strengths that make them valuable on their own fo
## For More Information
-- **LLS Agents API**: For detailed information on creating and managing agents, see the [Agents documentation](https://llama-stack.readthedocs.io/en/latest/building_applications/agent.html)
+- **LLS Agents API**: For detailed information on creating and managing agents, see the [Agents documentation](agent.md)
- **OpenAI Responses API**: For information on using the OpenAI-compatible responses API, see the [OpenAI API documentation](https://platform.openai.com/docs/api-reference/responses)
-- **Chat Completions API**: For the default backend API used by Agents, see the [Chat Completions providers documentation](https://llama-stack.readthedocs.io/en/latest/providers/index.html#chat-completions)
-- **Agent Execution Loop**: For understanding how agents process turns and steps in their execution, see the [Agent Execution Loop documentation](https://llama-stack.readthedocs.io/en/latest/building_applications/agent_execution_loop.html)
+- **Chat Completions API**: For the default backend API used by Agents, see the [Chat Completions providers documentation](../providers/openai.md#chat-completions)
+- **Agent Execution Loop**: For understanding how agents process turns and steps in their execution, see the [Agent Execution Loop documentation](agent_execution_loop.md)
diff --git a/docs/source/concepts/distributions.md b/docs/source/concepts/distributions.md
index c3be12d93..8c63914d1 100644
--- a/docs/source/concepts/distributions.md
+++ b/docs/source/concepts/distributions.md
@@ -6,4 +6,4 @@ While there is a lot of flexibility to mix-and-match providers, often users will
**Locally Hosted Distro**: You may want to run Llama Stack on your own hardware. Typically though, you still need to use Inference via an external service. You can use providers like HuggingFace TGI, Fireworks, Together, etc. for this purpose. Or you may have access to GPUs and can run a [vLLM](https://github.com/vllm-project/vllm) or [NVIDIA NIM](https://build.nvidia.com/nim?filters=nimType%3Anim_type_run_anywhere&q=llama) instance. If you "just" have a regular desktop machine, you can use [Ollama](https://ollama.com/) for inference. To provide convenient quick access to these options, we provide a number of such pre-configured locally-hosted Distros.
-**On-device Distro**: To run Llama Stack directly on an edge device (mobile phone or a tablet), we provide Distros for [iOS](https://llama-stack.readthedocs.io/en/latest/distributions/ondevice_distro/ios_sdk.html) and [Android](https://llama-stack.readthedocs.io/en/latest/distributions/ondevice_distro/android_sdk.html)
+**On-device Distro**: To run Llama Stack directly on an edge device (mobile phone or a tablet), we provide Distros for [iOS](../distributions/ondevice_distro/ios_sdk.md) and [Android](../distributions/ondevice_distro/android_sdk.md)
diff --git a/docs/source/distributions/importing_as_library.md b/docs/source/distributions/importing_as_library.md
index b9b4b065a..9993be227 100644
--- a/docs/source/distributions/importing_as_library.md
+++ b/docs/source/distributions/importing_as_library.md
@@ -27,7 +27,7 @@ Then, you can access the APIs like `models` and `inference` on the client and ca
response = client.models.list()
```
-If you've created a [custom distribution](https://llama-stack.readthedocs.io/en/latest/distributions/building_distro.html), you can also use the run.yaml configuration file directly:
+If you've created a [custom distribution](building_distro.md), you can also use the run.yaml configuration file directly:
```python
client = LlamaStackAsLibraryClient(config_path)
diff --git a/docs/source/distributions/k8s/apply.sh b/docs/source/distributions/k8s/apply.sh
index 3356da53e..1b5b26863 100755
--- a/docs/source/distributions/k8s/apply.sh
+++ b/docs/source/distributions/k8s/apply.sh
@@ -22,17 +22,17 @@ else
fi
if [ -z "${GITHUB_CLIENT_ID:-}" ]; then
- echo "ERROR: GITHUB_CLIENT_ID not set. You need it for Github login to work. Refer to https://llama-stack.readthedocs.io/en/latest/deploying/index.html#kubernetes-deployment-guide"
+ echo "ERROR: GITHUB_CLIENT_ID not set. You need it for Github login to work. See the Kubernetes Deployment Guide in the Llama Stack documentation."
exit 1
fi
if [ -z "${GITHUB_CLIENT_SECRET:-}" ]; then
- echo "ERROR: GITHUB_CLIENT_SECRET not set. You need it for Github login to work. Refer to https://llama-stack.readthedocs.io/en/latest/deploying/index.html#kubernetes-deployment-guide"
+ echo "ERROR: GITHUB_CLIENT_SECRET not set. You need it for Github login to work. See the Kubernetes Deployment Guide in the Llama Stack documentation."
exit 1
fi
if [ -z "${LLAMA_STACK_UI_URL:-}" ]; then
- echo "ERROR: LLAMA_STACK_UI_URL not set. Should be set to the external URL of the UI (excluding port). You need it for Github login to work. Refer to https://llama-stack.readthedocs.io/en/latest/deploying/index.html#kubernetes-deployment-guide"
+ echo "ERROR: LLAMA_STACK_UI_URL not set. Should be set to the external URL of the UI (excluding port). You need it for Github login to work. See the Kubernetes Deployment Guide in the Llama Stack documentation."
exit 1
fi
diff --git a/docs/source/distributions/ondevice_distro/android_sdk.md b/docs/source/distributions/ondevice_distro/android_sdk.md
index 9d16d07d7..ad86fa5f3 100644
--- a/docs/source/distributions/ondevice_distro/android_sdk.md
+++ b/docs/source/distributions/ondevice_distro/android_sdk.md
@@ -66,7 +66,7 @@ llama stack run starter --port 5050
Ensure the Llama Stack server version is the same as the Kotlin SDK Library for maximum compatibility.
-Other inference providers: [Table](https://llama-stack.readthedocs.io/en/latest/index.html#supported-llama-stack-implementations)
+Other inference providers: [Table](../../index.md#supported-llama-stack-implementations)
How to set remote localhost in Demo App: [Settings](https://github.com/meta-llama/llama-stack-client-kotlin/tree/latest-release/examples/android_app#settings)
diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
index 7e50a4161..84b85b91c 100644
--- a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
+++ b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
@@ -2,7 +2,7 @@
orphan: true
---
-# Meta Reference Distribution
+# Meta Reference GPU Distribution
```{toctree}
:maxdepth: 2
@@ -41,7 +41,7 @@ The following environment variables can be configured:
## Prerequisite: Downloading Models
-Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
+Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](../../references/llama_cli_reference/download_models.md) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
```
$ llama model list --downloaded
diff --git a/docs/source/references/evals_reference/index.md b/docs/source/references/evals_reference/index.md
index 054a0b809..9a5ed2f1b 100644
--- a/docs/source/references/evals_reference/index.md
+++ b/docs/source/references/evals_reference/index.md
@@ -202,7 +202,7 @@ pprint(response)
Llama Stack offers a library of scoring functions and the `/scoring` API, allowing you to run evaluations on your pre-annotated AI application datasets.
-In this example, we will work with an example RAG dataset you have built previously, label with an annotation, and use LLM-As-Judge with custom judge prompt for scoring. Please checkout our [Llama Stack Playground](https://llama-stack.readthedocs.io/en/latest/playground/index.html) for an interactive interface to upload datasets and run scorings.
+In this example, we will work with an example RAG dataset you have built previously, label with an annotation, and use LLM-As-Judge with custom judge prompt for scoring. Please checkout our [Llama Stack Playground](../../building_applications/playground/index.md) for an interactive interface to upload datasets and run scorings.
```python
judge_model_id = "meta-llama/Llama-3.1-405B-Instruct-FP8"
diff --git a/llama_stack/distributions/meta-reference-gpu/doc_template.md b/llama_stack/distributions/meta-reference-gpu/doc_template.md
index ff45c3826..602d053c4 100644
--- a/llama_stack/distributions/meta-reference-gpu/doc_template.md
+++ b/llama_stack/distributions/meta-reference-gpu/doc_template.md
@@ -1,7 +1,7 @@
---
orphan: true
---
-# Meta Reference Distribution
+# Meta Reference GPU Distribution
```{toctree}
:maxdepth: 2
@@ -29,7 +29,7 @@ The following environment variables can be configured:
## Prerequisite: Downloading Models
-Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
+Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](../../references/llama_cli_reference/download_models.md) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
```
$ llama model list --downloaded
From 1a9fa3c0b88a60aece2cbbcaa9c98dc635becc48 Mon Sep 17 00:00:00 2001
From: Kelly Brown <86735520+kelbrown20@users.noreply.github.com>
Date: Thu, 28 Aug 2025 06:26:47 -0400
Subject: [PATCH 009/124] docs: Contributor guidelines for creating Internal or
External providers (#3111)
**Description:**
Adding information and guidelines on when contributors should create an
in-tree vs out-of-tree provider.
Im still learning a bit about this subject so Im very open to feedback
on this PR
Will also add this section to the API Providers section of the docs
---
docs/source/contributing/new_api_provider.md | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/docs/source/contributing/new_api_provider.md b/docs/source/contributing/new_api_provider.md
index 6f8f59a47..9a7a62a38 100644
--- a/docs/source/contributing/new_api_provider.md
+++ b/docs/source/contributing/new_api_provider.md
@@ -14,6 +14,13 @@ Here are some example PRs to help you get started:
- [Nvidia Inference Implementation](https://github.com/meta-llama/llama-stack/pull/355)
- [Model context protocol Tool Runtime](https://github.com/meta-llama/llama-stack/pull/665)
+## Guidelines for creating Internal or External Providers
+
+|**Type** |Internal (In-tree) |External (out-of-tree)
+|---------|-------------------|---------------------|
+|**Description** |A provider that is directly in the Llama Stack code|A provider that is outside of the Llama stack core codebase but is still accessible and usable by Llama Stack.
+|**Benefits** |Ability to interact with the provider with minimal additional configurations or installations| Contributors do not have to add directly to the code to create providers accessible on Llama Stack. Keep provider-specific code separate from the core Llama Stack code.
+
## Inference Provider Patterns
When implementing Inference providers for OpenAI-compatible APIs, Llama Stack provides several mixin classes to simplify development and ensure consistent behavior across providers.
From 75fad445a6c62808779da08d9a374c5dccf9ee72 Mon Sep 17 00:00:00 2001
From: Francisco Arceo
Date: Thu, 28 Aug 2025 05:03:31 -0600
Subject: [PATCH 010/124] feat(UI): Implementing File Upload and VectorDB
Creation/Configuration in Playground (#3266)
---
.../chat-playground/chunk-processor.test.tsx | 610 +++++++++++
.../ui/app/chat-playground/page.test.tsx | 217 +++-
llama_stack/ui/app/chat-playground/page.tsx | 963 +++++++++++++++---
.../ui/components/chat-playground/chat.tsx | 11 +-
.../chat-playground/conversations.tsx | 11 +-
.../chat-playground/message-input.tsx | 48 +-
.../chat-playground/vector-db-creator.tsx | 243 +++++
llama_stack/ui/lib/message-content-utils.ts | 51 +
8 files changed, 1953 insertions(+), 201 deletions(-)
create mode 100644 llama_stack/ui/app/chat-playground/chunk-processor.test.tsx
create mode 100644 llama_stack/ui/components/chat-playground/vector-db-creator.tsx
create mode 100644 llama_stack/ui/lib/message-content-utils.ts
diff --git a/llama_stack/ui/app/chat-playground/chunk-processor.test.tsx b/llama_stack/ui/app/chat-playground/chunk-processor.test.tsx
new file mode 100644
index 000000000..70e8b3afa
--- /dev/null
+++ b/llama_stack/ui/app/chat-playground/chunk-processor.test.tsx
@@ -0,0 +1,610 @@
+import { describe, test, expect } from "@jest/globals";
+
+// Extract the exact processChunk function implementation for testing
+function createProcessChunk() {
+ return (chunk: unknown): { text: string | null; isToolCall: boolean } => {
+ const chunkObj = chunk as Record;
+
+ // Helper function to check if content contains function call JSON
+ const containsToolCall = (content: string): boolean => {
+ return (
+ content.includes('"type": "function"') ||
+ content.includes('"name": "knowledge_search"') ||
+ content.includes('"parameters":') ||
+ !!content.match(/\{"type":\s*"function".*?\}/)
+ );
+ };
+
+ // Check if this chunk contains a tool call (function call)
+ let isToolCall = false;
+
+ // Check direct chunk content if it's a string
+ if (typeof chunk === "string") {
+ isToolCall = containsToolCall(chunk);
+ }
+
+ // Check delta structures
+ if (
+ chunkObj?.delta &&
+ typeof chunkObj.delta === "object" &&
+ chunkObj.delta !== null
+ ) {
+ const delta = chunkObj.delta as Record;
+ if ("tool_calls" in delta) {
+ isToolCall = true;
+ }
+ if (typeof delta.text === "string") {
+ if (containsToolCall(delta.text)) {
+ isToolCall = true;
+ }
+ }
+ }
+
+ // Check event structures
+ if (
+ chunkObj?.event &&
+ typeof chunkObj.event === "object" &&
+ chunkObj.event !== null
+ ) {
+ const event = chunkObj.event as Record;
+
+ // Check event payload
+ if (
+ event?.payload &&
+ typeof event.payload === "object" &&
+ event.payload !== null
+ ) {
+ const payload = event.payload as Record;
+ if (typeof payload.content === "string") {
+ if (containsToolCall(payload.content)) {
+ isToolCall = true;
+ }
+ }
+
+ // Check payload delta
+ if (
+ payload?.delta &&
+ typeof payload.delta === "object" &&
+ payload.delta !== null
+ ) {
+ const delta = payload.delta as Record;
+ if (typeof delta.text === "string") {
+ if (containsToolCall(delta.text)) {
+ isToolCall = true;
+ }
+ }
+ }
+ }
+
+ // Check event delta
+ if (
+ event?.delta &&
+ typeof event.delta === "object" &&
+ event.delta !== null
+ ) {
+ const delta = event.delta as Record;
+ if (typeof delta.text === "string") {
+ if (containsToolCall(delta.text)) {
+ isToolCall = true;
+ }
+ }
+ if (typeof delta.content === "string") {
+ if (containsToolCall(delta.content)) {
+ isToolCall = true;
+ }
+ }
+ }
+ }
+
+ // if it's a tool call, skip it (don't display in chat)
+ if (isToolCall) {
+ return { text: null, isToolCall: true };
+ }
+
+ // Extract text content from various chunk formats
+ let text: string | null = null;
+
+ // Helper function to extract clean text content, filtering out function calls
+ const extractCleanText = (content: string): string | null => {
+ if (containsToolCall(content)) {
+ try {
+ // Try to parse and extract non-function call parts
+ const jsonMatch = content.match(
+ /\{"type":\s*"function"[^}]*\}[^}]*\}/
+ );
+ if (jsonMatch) {
+ const jsonPart = jsonMatch[0];
+ const parsedJson = JSON.parse(jsonPart);
+
+ // If it's a function call, extract text after JSON
+ if (parsedJson.type === "function") {
+ const textAfterJson = content
+ .substring(content.indexOf(jsonPart) + jsonPart.length)
+ .trim();
+ return textAfterJson || null;
+ }
+ }
+ // If we can't parse it properly, skip the whole thing
+ return null;
+ } catch {
+ return null;
+ }
+ }
+ return content;
+ };
+
+ // Try direct delta text
+ if (
+ chunkObj?.delta &&
+ typeof chunkObj.delta === "object" &&
+ chunkObj.delta !== null
+ ) {
+ const delta = chunkObj.delta as Record;
+ if (typeof delta.text === "string") {
+ text = extractCleanText(delta.text);
+ }
+ }
+
+ // Try event structures
+ if (
+ !text &&
+ chunkObj?.event &&
+ typeof chunkObj.event === "object" &&
+ chunkObj.event !== null
+ ) {
+ const event = chunkObj.event as Record;
+
+ // Try event payload content
+ if (
+ event?.payload &&
+ typeof event.payload === "object" &&
+ event.payload !== null
+ ) {
+ const payload = event.payload as Record;
+
+ // Try direct payload content
+ if (typeof payload.content === "string") {
+ text = extractCleanText(payload.content);
+ }
+
+ // Try turn_complete event structure: payload.turn.output_message.content
+ if (
+ !text &&
+ payload?.turn &&
+ typeof payload.turn === "object" &&
+ payload.turn !== null
+ ) {
+ const turn = payload.turn as Record;
+ if (
+ turn?.output_message &&
+ typeof turn.output_message === "object" &&
+ turn.output_message !== null
+ ) {
+ const outputMessage = turn.output_message as Record<
+ string,
+ unknown
+ >;
+ if (typeof outputMessage.content === "string") {
+ text = extractCleanText(outputMessage.content);
+ }
+ }
+
+ // Fallback to model_response in steps if no output_message
+ if (
+ !text &&
+ turn?.steps &&
+ Array.isArray(turn.steps) &&
+ turn.steps.length > 0
+ ) {
+ for (const step of turn.steps) {
+ if (step && typeof step === "object" && step !== null) {
+ const stepObj = step as Record;
+ if (
+ stepObj?.model_response &&
+ typeof stepObj.model_response === "object" &&
+ stepObj.model_response !== null
+ ) {
+ const modelResponse = stepObj.model_response as Record<
+ string,
+ unknown
+ >;
+ if (typeof modelResponse.content === "string") {
+ text = extractCleanText(modelResponse.content);
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Try payload delta
+ if (
+ !text &&
+ payload?.delta &&
+ typeof payload.delta === "object" &&
+ payload.delta !== null
+ ) {
+ const delta = payload.delta as Record;
+ if (typeof delta.text === "string") {
+ text = extractCleanText(delta.text);
+ }
+ }
+ }
+
+ // Try event delta
+ if (
+ !text &&
+ event?.delta &&
+ typeof event.delta === "object" &&
+ event.delta !== null
+ ) {
+ const delta = event.delta as Record;
+ if (typeof delta.text === "string") {
+ text = extractCleanText(delta.text);
+ }
+ if (!text && typeof delta.content === "string") {
+ text = extractCleanText(delta.content);
+ }
+ }
+ }
+
+ // Try choices structure (ChatML format)
+ if (
+ !text &&
+ chunkObj?.choices &&
+ Array.isArray(chunkObj.choices) &&
+ chunkObj.choices.length > 0
+ ) {
+ const choice = chunkObj.choices[0] as Record;
+ if (
+ choice?.delta &&
+ typeof choice.delta === "object" &&
+ choice.delta !== null
+ ) {
+ const delta = choice.delta as Record;
+ if (typeof delta.content === "string") {
+ text = extractCleanText(delta.content);
+ }
+ }
+ }
+
+ // Try direct string content
+ if (!text && typeof chunk === "string") {
+ text = extractCleanText(chunk);
+ }
+
+ return { text, isToolCall: false };
+ };
+}
+
+describe("Chunk Processor", () => {
+ const processChunk = createProcessChunk();
+
+ describe("Real Event Structures", () => {
+ test("handles turn_complete event with cancellation policy response", () => {
+ const chunk = {
+ event: {
+ payload: {
+ event_type: "turn_complete",
+ turn: {
+ turn_id: "50a2d6b7-49ed-4d1e-b1c2-6d68b3f726db",
+ session_id: "e7f62b8e-518c-4450-82df-e65fe49f27a3",
+ input_messages: [
+ {
+ role: "user",
+ content: "nice, what's the cancellation policy?",
+ context: null,
+ },
+ ],
+ steps: [
+ {
+ turn_id: "50a2d6b7-49ed-4d1e-b1c2-6d68b3f726db",
+ step_id: "54074310-af42-414c-9ffe-fba5b2ead0ad",
+ started_at: "2025-08-27T18:15:25.870703Z",
+ completed_at: "2025-08-27T18:15:51.288993Z",
+ step_type: "inference",
+ model_response: {
+ role: "assistant",
+ content:
+ "According to the search results, the cancellation policy for Red Hat Summit is as follows:\n\n* Cancellations must be received by 5 PM EDT on April 18, 2025 for a 50% refund of the registration fee.\n* No refunds will be given for cancellations received after 5 PM EDT on April 18, 2025.\n* Cancellation of travel reservations and hotel reservations are the responsibility of the registrant.",
+ stop_reason: "end_of_turn",
+ tool_calls: [],
+ },
+ },
+ ],
+ output_message: {
+ role: "assistant",
+ content:
+ "According to the search results, the cancellation policy for Red Hat Summit is as follows:\n\n* Cancellations must be received by 5 PM EDT on April 18, 2025 for a 50% refund of the registration fee.\n* No refunds will be given for cancellations received after 5 PM EDT on April 18, 2025.\n* Cancellation of travel reservations and hotel reservations are the responsibility of the registrant.",
+ stop_reason: "end_of_turn",
+ tool_calls: [],
+ },
+ output_attachments: [],
+ started_at: "2025-08-27T18:15:25.868548Z",
+ completed_at: "2025-08-27T18:15:51.289262Z",
+ },
+ },
+ },
+ };
+
+ const result = processChunk(chunk);
+ expect(result.isToolCall).toBe(false);
+ expect(result.text).toContain(
+ "According to the search results, the cancellation policy for Red Hat Summit is as follows:"
+ );
+ expect(result.text).toContain("5 PM EDT on April 18, 2025");
+ });
+
+ test("handles turn_complete event with address response", () => {
+ const chunk = {
+ event: {
+ payload: {
+ event_type: "turn_complete",
+ turn: {
+ turn_id: "2f4a1520-8ecc-4cb7-bb7b-886939e042b0",
+ session_id: "e7f62b8e-518c-4450-82df-e65fe49f27a3",
+ input_messages: [
+ {
+ role: "user",
+ content: "what's francisco's address",
+ context: null,
+ },
+ ],
+ steps: [
+ {
+ turn_id: "2f4a1520-8ecc-4cb7-bb7b-886939e042b0",
+ step_id: "c13dd277-1acb-4419-8fbf-d5e2f45392ea",
+ started_at: "2025-08-27T18:14:52.558761Z",
+ completed_at: "2025-08-27T18:15:11.306032Z",
+ step_type: "inference",
+ model_response: {
+ role: "assistant",
+ content:
+ "Francisco Arceo's address is:\n\nRed Hat\nUnited States\n17 Primrose Ln \nBasking Ridge New Jersey 07920",
+ stop_reason: "end_of_turn",
+ tool_calls: [],
+ },
+ },
+ ],
+ output_message: {
+ role: "assistant",
+ content:
+ "Francisco Arceo's address is:\n\nRed Hat\nUnited States\n17 Primrose Ln \nBasking Ridge New Jersey 07920",
+ stop_reason: "end_of_turn",
+ tool_calls: [],
+ },
+ output_attachments: [],
+ started_at: "2025-08-27T18:14:52.553707Z",
+ completed_at: "2025-08-27T18:15:11.306729Z",
+ },
+ },
+ },
+ };
+
+ const result = processChunk(chunk);
+ expect(result.isToolCall).toBe(false);
+ expect(result.text).toContain("Francisco Arceo's address is:");
+ expect(result.text).toContain("17 Primrose Ln");
+ expect(result.text).toContain("Basking Ridge New Jersey 07920");
+ });
+
+ test("handles turn_complete event with ticket cost response", () => {
+ const chunk = {
+ event: {
+ payload: {
+ event_type: "turn_complete",
+ turn: {
+ turn_id: "7ef244a3-efee-42ca-a9c8-942865251002",
+ session_id: "e7f62b8e-518c-4450-82df-e65fe49f27a3",
+ input_messages: [
+ {
+ role: "user",
+ content: "what was the ticket cost for summit?",
+ context: null,
+ },
+ ],
+ steps: [
+ {
+ turn_id: "7ef244a3-efee-42ca-a9c8-942865251002",
+ step_id: "7651dda0-315a-472d-b1c1-3c2725f55bc5",
+ started_at: "2025-08-27T18:14:21.710611Z",
+ completed_at: "2025-08-27T18:14:39.706452Z",
+ step_type: "inference",
+ model_response: {
+ role: "assistant",
+ content:
+ "The ticket cost for the Red Hat Summit was $999.00 for a conference pass.",
+ stop_reason: "end_of_turn",
+ tool_calls: [],
+ },
+ },
+ ],
+ output_message: {
+ role: "assistant",
+ content:
+ "The ticket cost for the Red Hat Summit was $999.00 for a conference pass.",
+ stop_reason: "end_of_turn",
+ tool_calls: [],
+ },
+ output_attachments: [],
+ started_at: "2025-08-27T18:14:21.705289Z",
+ completed_at: "2025-08-27T18:14:39.706752Z",
+ },
+ },
+ },
+ };
+
+ const result = processChunk(chunk);
+ expect(result.isToolCall).toBe(false);
+ expect(result.text).toBe(
+ "The ticket cost for the Red Hat Summit was $999.00 for a conference pass."
+ );
+ });
+ });
+
+ describe("Function Call Detection", () => {
+ test("detects function calls in direct string chunks", () => {
+ const chunk =
+ '{"type": "function", "name": "knowledge_search", "parameters": {"query": "test"}}';
+ const result = processChunk(chunk);
+ expect(result.isToolCall).toBe(true);
+ expect(result.text).toBe(null);
+ });
+
+ test("detects function calls in event payload content", () => {
+ const chunk = {
+ event: {
+ payload: {
+ content:
+ '{"type": "function", "name": "knowledge_search", "parameters": {"query": "test"}}',
+ },
+ },
+ };
+ const result = processChunk(chunk);
+ expect(result.isToolCall).toBe(true);
+ expect(result.text).toBe(null);
+ });
+
+ test("detects tool_calls in delta structure", () => {
+ const chunk = {
+ delta: {
+ tool_calls: [{ function: { name: "knowledge_search" } }],
+ },
+ };
+ const result = processChunk(chunk);
+ expect(result.isToolCall).toBe(true);
+ expect(result.text).toBe(null);
+ });
+
+ test("detects function call in mixed content but skips it", () => {
+ const chunk =
+ '{"type": "function", "name": "knowledge_search", "parameters": {"query": "test"}} Based on the search results, here is your answer.';
+ const result = processChunk(chunk);
+ // This is detected as a tool call and skipped entirely - the implementation prioritizes safety
+ expect(result.isToolCall).toBe(true);
+ expect(result.text).toBe(null);
+ });
+ });
+
+ describe("Text Extraction", () => {
+ test("extracts text from direct string chunks", () => {
+ const chunk = "Hello, this is a normal response.";
+ const result = processChunk(chunk);
+ expect(result.isToolCall).toBe(false);
+ expect(result.text).toBe("Hello, this is a normal response.");
+ });
+
+ test("extracts text from delta structure", () => {
+ const chunk = {
+ delta: {
+ text: "Hello, this is a normal response.",
+ },
+ };
+ const result = processChunk(chunk);
+ expect(result.isToolCall).toBe(false);
+ expect(result.text).toBe("Hello, this is a normal response.");
+ });
+
+ test("extracts text from choices structure", () => {
+ const chunk = {
+ choices: [
+ {
+ delta: {
+ content: "Hello, this is a normal response.",
+ },
+ },
+ ],
+ };
+ const result = processChunk(chunk);
+ expect(result.isToolCall).toBe(false);
+ expect(result.text).toBe("Hello, this is a normal response.");
+ });
+
+ test("prioritizes output_message over model_response in turn structure", () => {
+ const chunk = {
+ event: {
+ payload: {
+ turn: {
+ steps: [
+ {
+ model_response: {
+ content: "Model response content.",
+ },
+ },
+ ],
+ output_message: {
+ content: "Final output message content.",
+ },
+ },
+ },
+ },
+ };
+ const result = processChunk(chunk);
+ expect(result.isToolCall).toBe(false);
+ expect(result.text).toBe("Final output message content.");
+ });
+
+ test("falls back to model_response when no output_message", () => {
+ const chunk = {
+ event: {
+ payload: {
+ turn: {
+ steps: [
+ {
+ model_response: {
+ content: "This is from the model response.",
+ },
+ },
+ ],
+ },
+ },
+ },
+ };
+ const result = processChunk(chunk);
+ expect(result.isToolCall).toBe(false);
+ expect(result.text).toBe("This is from the model response.");
+ });
+ });
+
+ describe("Edge Cases", () => {
+ test("handles empty chunks", () => {
+ const result = processChunk("");
+ expect(result.isToolCall).toBe(false);
+ expect(result.text).toBe("");
+ });
+
+ test("handles null chunks", () => {
+ const result = processChunk(null);
+ expect(result.isToolCall).toBe(false);
+ expect(result.text).toBe(null);
+ });
+
+ test("handles undefined chunks", () => {
+ const result = processChunk(undefined);
+ expect(result.isToolCall).toBe(false);
+ expect(result.text).toBe(null);
+ });
+
+ test("handles chunks with no text content", () => {
+ const chunk = {
+ event: {
+ metadata: {
+ timestamp: "2024-01-01",
+ },
+ },
+ };
+ const result = processChunk(chunk);
+ expect(result.isToolCall).toBe(false);
+ expect(result.text).toBe(null);
+ });
+
+ test("handles malformed JSON in function calls gracefully", () => {
+ const chunk =
+ '{"type": "function", "name": "knowledge_search"} incomplete json';
+ const result = processChunk(chunk);
+ expect(result.isToolCall).toBe(true);
+ expect(result.text).toBe(null);
+ });
+ });
+});
diff --git a/llama_stack/ui/app/chat-playground/page.test.tsx b/llama_stack/ui/app/chat-playground/page.test.tsx
index 54c15f95a..d9025e523 100644
--- a/llama_stack/ui/app/chat-playground/page.test.tsx
+++ b/llama_stack/ui/app/chat-playground/page.test.tsx
@@ -31,6 +31,9 @@ const mockClient = {
toolgroups: {
list: jest.fn(),
},
+ vectorDBs: {
+ list: jest.fn(),
+ },
};
jest.mock("@/hooks/use-auth-client", () => ({
@@ -164,7 +167,7 @@ describe("ChatPlaygroundPage", () => {
session_name: "Test Session",
started_at: new Date().toISOString(),
turns: [],
- }); // No turns by default
+ });
mockClient.agents.retrieve.mockResolvedValue({
agent_id: "test-agent",
agent_config: {
@@ -417,7 +420,6 @@ describe("ChatPlaygroundPage", () => {
});
await waitFor(() => {
- // first agent should be auto-selected
expect(mockClient.agents.session.create).toHaveBeenCalledWith(
"agent_123",
{ session_name: "Default Session" }
@@ -464,7 +466,7 @@ describe("ChatPlaygroundPage", () => {
});
});
- test("hides delete button when only one agent exists", async () => {
+ test("shows delete button even when only one agent exists", async () => {
mockClient.agents.list.mockResolvedValue({
data: [mockAgents[0]],
});
@@ -474,9 +476,7 @@ describe("ChatPlaygroundPage", () => {
});
await waitFor(() => {
- expect(
- screen.queryByTitle("Delete current agent")
- ).not.toBeInTheDocument();
+ expect(screen.getByTitle("Delete current agent")).toBeInTheDocument();
});
});
@@ -505,7 +505,7 @@ describe("ChatPlaygroundPage", () => {
await waitFor(() => {
expect(mockClient.agents.delete).toHaveBeenCalledWith("agent_123");
expect(global.confirm).toHaveBeenCalledWith(
- "Are you sure you want to delete this agent? This action cannot be undone and will delete all associated sessions."
+ "Are you sure you want to delete this agent? This action cannot be undone and will delete the agent and all its sessions."
);
});
@@ -584,4 +584,207 @@ describe("ChatPlaygroundPage", () => {
consoleSpy.mockRestore();
});
});
+
+ describe("RAG File Upload", () => {
+ let mockFileReader: {
+ readAsDataURL: jest.Mock;
+ readAsText: jest.Mock;
+ result: string | null;
+ onload: (() => void) | null;
+ onerror: (() => void) | null;
+ };
+ let mockRAGTool: {
+ insert: jest.Mock;
+ };
+
+ beforeEach(() => {
+ mockFileReader = {
+ readAsDataURL: jest.fn(),
+ readAsText: jest.fn(),
+ result: null,
+ onload: null,
+ onerror: null,
+ };
+ global.FileReader = jest.fn(() => mockFileReader);
+
+ mockRAGTool = {
+ insert: jest.fn().mockResolvedValue({}),
+ };
+ mockClient.toolRuntime = {
+ ragTool: mockRAGTool,
+ };
+ });
+
+ afterEach(() => {
+ jest.clearAllMocks();
+ });
+
+ test("handles text file upload", async () => {
+ new File(["Hello, world!"], "test.txt", {
+ type: "text/plain",
+ });
+
+ mockClient.agents.retrieve.mockResolvedValue({
+ agent_id: "test-agent",
+ agent_config: {
+ toolgroups: [
+ {
+ name: "builtin::rag/knowledge_search",
+ args: { vector_db_ids: ["test-vector-db"] },
+ },
+ ],
+ },
+ });
+
+ await act(async () => {
+ render();
+ });
+
+ await waitFor(() => {
+ expect(screen.getByTestId("chat-component")).toBeInTheDocument();
+ });
+
+ const chatComponent = screen.getByTestId("chat-component");
+ chatComponent.getAttribute("data-onragfileupload");
+
+ // this is a simplified test
+ expect(mockRAGTool.insert).not.toHaveBeenCalled();
+ });
+
+ test("handles PDF file upload with FileReader", async () => {
+ new File([new ArrayBuffer(1000)], "test.pdf", {
+ type: "application/pdf",
+ });
+
+ const mockDataURL = "data:application/pdf;base64,JVBERi0xLjQK";
+ mockFileReader.result = mockDataURL;
+
+ mockClient.agents.retrieve.mockResolvedValue({
+ agent_id: "test-agent",
+ agent_config: {
+ toolgroups: [
+ {
+ name: "builtin::rag/knowledge_search",
+ args: { vector_db_ids: ["test-vector-db"] },
+ },
+ ],
+ },
+ });
+
+ await act(async () => {
+ render();
+ });
+
+ await waitFor(() => {
+ expect(screen.getByTestId("chat-component")).toBeInTheDocument();
+ });
+
+ expect(global.FileReader).toBeDefined();
+ });
+
+ test("handles different file types correctly", () => {
+ const getContentType = (filename: string): string => {
+ const ext = filename.toLowerCase().split(".").pop();
+ switch (ext) {
+ case "pdf":
+ return "application/pdf";
+ case "txt":
+ return "text/plain";
+ case "md":
+ return "text/markdown";
+ case "html":
+ return "text/html";
+ case "csv":
+ return "text/csv";
+ case "json":
+ return "application/json";
+ case "docx":
+ return "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
+ case "doc":
+ return "application/msword";
+ default:
+ return "application/octet-stream";
+ }
+ };
+
+ expect(getContentType("test.pdf")).toBe("application/pdf");
+ expect(getContentType("test.txt")).toBe("text/plain");
+ expect(getContentType("test.md")).toBe("text/markdown");
+ expect(getContentType("test.html")).toBe("text/html");
+ expect(getContentType("test.csv")).toBe("text/csv");
+ expect(getContentType("test.json")).toBe("application/json");
+ expect(getContentType("test.docx")).toBe(
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+ );
+ expect(getContentType("test.doc")).toBe("application/msword");
+ expect(getContentType("test.unknown")).toBe("application/octet-stream");
+ });
+
+ test("determines text vs binary file types correctly", () => {
+ const isTextFile = (mimeType: string): boolean => {
+ return (
+ mimeType.startsWith("text/") ||
+ mimeType === "application/json" ||
+ mimeType === "text/markdown" ||
+ mimeType === "text/html" ||
+ mimeType === "text/csv"
+ );
+ };
+
+ expect(isTextFile("text/plain")).toBe(true);
+ expect(isTextFile("text/markdown")).toBe(true);
+ expect(isTextFile("text/html")).toBe(true);
+ expect(isTextFile("text/csv")).toBe(true);
+ expect(isTextFile("application/json")).toBe(true);
+
+ expect(isTextFile("application/pdf")).toBe(false);
+ expect(isTextFile("application/msword")).toBe(false);
+ expect(
+ isTextFile(
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+ )
+ ).toBe(false);
+ expect(isTextFile("application/octet-stream")).toBe(false);
+ });
+
+ test("handles FileReader error gracefully", async () => {
+ const pdfFile = new File([new ArrayBuffer(1000)], "test.pdf", {
+ type: "application/pdf",
+ });
+
+ mockFileReader.onerror = jest.fn();
+ const mockError = new Error("FileReader failed");
+
+ const fileReaderPromise = new Promise((resolve, reject) => {
+ const reader = new FileReader();
+ reader.onload = () => resolve(reader.result as string);
+ reader.onerror = () => reject(reader.error || mockError);
+ reader.readAsDataURL(pdfFile);
+
+ setTimeout(() => {
+ reader.onerror?.(new ProgressEvent("error"));
+ }, 0);
+ });
+
+ await expect(fileReaderPromise).rejects.toBeDefined();
+ });
+
+ test("handles large file upload with FileReader approach", () => {
+ // create a large file
+ const largeFile = new File(
+ [new ArrayBuffer(10 * 1024 * 1024)],
+ "large.pdf",
+ {
+ type: "application/pdf",
+ }
+ );
+
+ expect(largeFile.size).toBe(10 * 1024 * 1024); // 10MB
+
+ expect(global.FileReader).toBeDefined();
+
+ const reader = new FileReader();
+ expect(reader.readAsDataURL).toBeDefined();
+ });
+ });
});
diff --git a/llama_stack/ui/app/chat-playground/page.tsx b/llama_stack/ui/app/chat-playground/page.tsx
index f26791a41..0417f7083 100644
--- a/llama_stack/ui/app/chat-playground/page.tsx
+++ b/llama_stack/ui/app/chat-playground/page.tsx
@@ -15,6 +15,7 @@ import { Input } from "@/components/ui/input";
import { Trash2 } from "lucide-react";
import { Chat } from "@/components/chat-playground/chat";
import { type Message } from "@/components/chat-playground/chat-message";
+import { VectorDBCreator } from "@/components/chat-playground/vector-db-creator";
import { useAuthClient } from "@/hooks/use-auth-client";
import type { Model } from "llama-stack-client/resources/models";
import type { TurnCreateParams } from "llama-stack-client/resources/agents/turn";
@@ -22,6 +23,10 @@ import {
SessionUtils,
type ChatSession,
} from "@/components/chat-playground/conversations";
+import {
+ cleanMessageContent,
+ extractCleanText,
+} from "@/lib/message-content-utils";
export default function ChatPlaygroundPage() {
const [currentSession, setCurrentSession] = useState(
null
@@ -65,6 +70,20 @@ export default function ChatPlaygroundPage() {
provider_resource_id?: string;
}>
>([]);
+ const [showCreateVectorDB, setShowCreateVectorDB] = useState(false);
+ const [availableVectorDBs, setAvailableVectorDBs] = useState<
+ Array<{
+ identifier: string;
+ vector_db_name?: string;
+ embedding_model: string;
+ }>
+ >([]);
+ const [uploadNotification, setUploadNotification] = useState<{
+ show: boolean;
+ message: string;
+ type: "success" | "error" | "loading";
+ }>({ show: false, message: "", type: "success" });
+ const [selectedVectorDBs, setSelectedVectorDBs] = useState([]);
const client = useAuthClient();
const abortControllerRef = useRef(null);
@@ -73,26 +92,22 @@ export default function ChatPlaygroundPage() {
const loadAgentConfig = useCallback(
async (agentId: string) => {
try {
- console.log("Loading agent config for:", agentId);
-
// try to load from cache first
const cachedConfig = SessionUtils.loadAgentConfig(agentId);
if (cachedConfig) {
- console.log("✅ Loaded agent config from cache:", cachedConfig);
setSelectedAgentConfig({
toolgroups: cachedConfig.toolgroups,
});
return;
}
- console.log("📡 Fetching agent config from API...");
const agentDetails = await client.agents.retrieve(agentId);
- console.log("Agent details retrieved:", agentDetails);
- console.log("Agent config:", agentDetails.agent_config);
- console.log("Agent toolgroups:", agentDetails.agent_config?.toolgroups);
- // cache the config
- SessionUtils.saveAgentConfig(agentId, agentDetails.agent_config);
+ // cache config
+ SessionUtils.saveAgentConfig(agentId, {
+ ...agentDetails.agent_config,
+ toolgroups: agentDetails.agent_config?.toolgroups,
+ });
setSelectedAgentConfig({
toolgroups: agentDetails.agent_config?.toolgroups,
@@ -116,7 +131,7 @@ export default function ChatPlaygroundPage() {
id: response.session_id,
name: "Default Session",
messages: [],
- selectedModel: selectedModel, // Use current selected model
+ selectedModel: selectedModel, // use current selected model
systemMessage: "You are a helpful assistant.",
agentId,
createdAt: Date.now(),
@@ -124,10 +139,6 @@ export default function ChatPlaygroundPage() {
};
setCurrentSession(defaultSession);
- console.log(
- `💾 Saving default session ID for agent ${agentId}:`,
- defaultSession.id
- );
SessionUtils.saveCurrentSessionId(defaultSession.id, agentId);
// cache entire session data
SessionUtils.saveSessionData(agentId, defaultSession);
@@ -152,7 +163,6 @@ export default function ChatPlaygroundPage() {
const messages: Message[] = [];
for (const turn of session.turns) {
- // add user messages
if (turn.input_messages && Array.isArray(turn.input_messages)) {
for (const input of turn.input_messages) {
if (input.role === "user" && input.content) {
@@ -169,15 +179,18 @@ export default function ChatPlaygroundPage() {
}
}
- // add assistant message from output_message
if (turn.output_message && turn.output_message.content) {
+ console.log("Raw message content:", turn.output_message.content);
+ console.log("Content type:", typeof turn.output_message.content);
+
+ const cleanContent = cleanMessageContent(
+ turn.output_message.content
+ );
+
messages.push({
id: `${turn.turn_id}-assistant-${messages.length}`,
role: "assistant",
- content:
- typeof turn.output_message.content === "string"
- ? turn.output_message.content
- : JSON.stringify(turn.output_message.content),
+ content: cleanContent,
createdAt: new Date(
turn.completed_at || turn.started_at || Date.now()
),
@@ -197,27 +210,22 @@ export default function ChatPlaygroundPage() {
const loadAgentSessions = useCallback(
async (agentId: string) => {
try {
- console.log("Loading sessions for agent:", agentId);
const response = await client.agents.session.list(agentId);
- console.log("Available sessions:", response.data);
if (
response.data &&
Array.isArray(response.data) &&
response.data.length > 0
) {
- // check for a previously saved session ID for this specific agent
+ // check for saved session ID for this agent
const savedSessionId = SessionUtils.loadCurrentSessionId(agentId);
- console.log(`Saved session ID for agent ${agentId}:`, savedSessionId);
-
- // try to load cached session data first
+ // try to load cached agent session data first
if (savedSessionId) {
const cachedSession = SessionUtils.loadSessionData(
agentId,
savedSessionId
);
if (cachedSession) {
- console.log("✅ Loaded session from cache:", cachedSession.id);
setCurrentSession(cachedSession);
SessionUtils.saveCurrentSessionId(cachedSession.id, agentId);
return;
@@ -238,7 +246,8 @@ export default function ChatPlaygroundPage() {
// try to find saved session id in available sessions
if (savedSessionId) {
const foundSession = response.data.find(
- (s: { session_id: string }) => s.session_id === savedSessionId
+ (s: { [key: string]: unknown }) =>
+ (s as { session_id: string }).session_id === savedSessionId
);
console.log("Found saved session in list:", foundSession);
if (foundSession) {
@@ -269,7 +278,7 @@ export default function ChatPlaygroundPage() {
id: sessionToLoad.session_id,
name: sessionToLoad.session_name || "Session",
messages,
- selectedModel: selectedModel || "", // Preserve current model or use empty
+ selectedModel: selectedModel || "",
systemMessage: "You are a helpful assistant.",
agentId,
createdAt: sessionToLoad.started_at
@@ -330,7 +339,8 @@ export default function ChatPlaygroundPage() {
// if we have a saved agent ID, find it in the available agents
if (savedAgentId) {
const foundAgent = agentList.data.find(
- (a: { agent_id: string }) => a.agent_id === savedAgentId
+ (a: { [key: string]: unknown }) =>
+ (a as { agent_id: string }).agent_id === savedAgentId
);
if (foundAgent) {
agentToSelect = foundAgent as typeof agentToSelect;
@@ -353,14 +363,10 @@ export default function ChatPlaygroundPage() {
fetchAgents();
- // fetch available toolgroups
const fetchToolgroups = async () => {
try {
- console.log("Fetching toolgroups...");
const toolgroups = await client.toolgroups.list();
- console.log("Toolgroups response:", toolgroups);
- // The client returns data directly, not wrapped in .data
const toolGroupsArray = Array.isArray(toolgroups)
? toolgroups
: toolgroups &&
@@ -381,7 +387,6 @@ export default function ChatPlaygroundPage() {
if (toolGroupsArray && Array.isArray(toolGroupsArray)) {
setAvailableToolgroups(toolGroupsArray);
- console.log("Set toolgroups:", toolGroupsArray);
} else {
console.error("Invalid toolgroups data format:", toolgroups);
}
@@ -398,6 +403,24 @@ export default function ChatPlaygroundPage() {
};
fetchToolgroups();
+
+ const fetchVectorDBs = async () => {
+ try {
+ const vectorDBs = await client.vectorDBs.list();
+
+ const vectorDBsArray = Array.isArray(vectorDBs) ? vectorDBs : [];
+
+ if (vectorDBsArray && Array.isArray(vectorDBsArray)) {
+ setAvailableVectorDBs(vectorDBsArray);
+ } else {
+ console.error("Invalid vector DBs data format:", vectorDBs);
+ }
+ } catch (error) {
+ console.error("Error fetching vector DBs:", error);
+ }
+ };
+
+ fetchVectorDBs();
}, [client, loadAgentSessions, loadAgentConfig]);
const createNewAgent = useCallback(
@@ -405,24 +428,35 @@ export default function ChatPlaygroundPage() {
name: string,
instructions: string,
model: string,
- toolgroups: string[] = []
+ toolgroups: string[] = [],
+ vectorDBs: string[] = []
) => {
try {
- console.log("Creating agent with toolgroups:", toolgroups);
+ const processedToolgroups = toolgroups.map(toolgroup => {
+ if (toolgroup === "builtin::rag" && vectorDBs.length > 0) {
+ return {
+ name: "builtin::rag/knowledge_search",
+ args: {
+ vector_db_ids: vectorDBs,
+ },
+ };
+ }
+ return toolgroup;
+ });
+
const agentConfig = {
model,
instructions,
name: name || undefined,
enable_session_persistence: true,
- toolgroups: toolgroups.length > 0 ? toolgroups : undefined,
+ toolgroups:
+ processedToolgroups.length > 0 ? processedToolgroups : undefined,
};
- console.log("Agent config being sent:", agentConfig);
const response = await client.agents.create({
agent_config: agentConfig,
});
- // refresh agents list
const agentList = await client.agents.list();
setAgents(
(agentList.data as Array<{
@@ -436,7 +470,6 @@ export default function ChatPlaygroundPage() {
}>) || []
);
- // set the new agent as selected
setSelectedAgentId(response.agent_id);
await loadAgentConfig(response.agent_id);
await loadAgentSessions(response.agent_id);
@@ -450,24 +483,47 @@ export default function ChatPlaygroundPage() {
[client, loadAgentSessions, loadAgentConfig]
);
+ const handleVectorDBCreated = useCallback(
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
+ async (_vectorDbId: string) => {
+ setShowCreateVectorDB(false);
+
+ try {
+ const vectorDBs = await client.vectorDBs.list();
+ const vectorDBsArray = Array.isArray(vectorDBs) ? vectorDBs : [];
+
+ if (vectorDBsArray && Array.isArray(vectorDBsArray)) {
+ setAvailableVectorDBs(vectorDBsArray);
+ }
+ } catch (error) {
+ console.error("Error refreshing vector DBs:", error);
+ }
+ },
+ [client]
+ );
+
const deleteAgent = useCallback(
async (agentId: string) => {
- if (agents.length <= 1) {
- return;
- }
-
if (
confirm(
- "Are you sure you want to delete this agent? This action cannot be undone and will delete all associated sessions."
+ "Are you sure you want to delete this agent? This action cannot be undone and will delete the agent and all its sessions."
)
) {
try {
- await client.agents.delete(agentId);
+ // there's a known error where the delete API returns 500 even on success
+ try {
+ await client.agents.delete(agentId);
+ console.log("Agent deleted successfully");
+ } catch (deleteError) {
+ // log the error but don't re-throw - we know deletion succeeded
+ console.log(
+ "Agent delete API returned error (but deletion likely succeeded):",
+ deleteError
+ );
+ }
- // clear cached data for agent
SessionUtils.clearAgentCache(agentId);
- // Refresh agents list
const agentList = await client.agents.list();
setAgents(
(agentList.data as Array<{
@@ -481,10 +537,11 @@ export default function ChatPlaygroundPage() {
}>) || []
);
- // if we deleted the current agent, switch to another one
+ // if we delete current agent, switch to another
if (selectedAgentId === agentId) {
const remainingAgents = agentList.data?.filter(
- (a: { agent_id: string }) => a.agent_id !== agentId
+ (a: { [key: string]: unknown }) =>
+ (a as { agent_id: string }).agent_id !== agentId
);
if (remainingAgents && remainingAgents.length > 0) {
const newAgent = remainingAgents[0] as {
@@ -501,7 +558,7 @@ export default function ChatPlaygroundPage() {
await loadAgentConfig(newAgent.agent_id);
await loadAgentSessions(newAgent.agent_id);
} else {
- // No agents left
+ // no agents left
setSelectedAgentId("");
setCurrentSession(null);
setSelectedAgentConfig(null);
@@ -509,10 +566,76 @@ export default function ChatPlaygroundPage() {
}
} catch (error) {
console.error("Error deleting agent:", error);
+
+ // check if this is known server bug where deletion succeeds but returns 500
+ // The error message will typically contain status codes or "Could not find agent"
+ const errorMessage =
+ error instanceof Error ? error.message : String(error);
+ const isKnownServerBug =
+ errorMessage.includes("500") ||
+ errorMessage.includes("Internal Server Error") ||
+ errorMessage.includes("Could not find agent") ||
+ errorMessage.includes("400");
+
+ if (isKnownServerBug) {
+ console.log(
+ "Agent deletion succeeded despite error, cleaning up UI"
+ );
+ SessionUtils.clearAgentCache(agentId);
+ try {
+ const agentList = await client.agents.list();
+ setAgents(
+ (agentList.data as Array<{
+ agent_id: string;
+ agent_config?: {
+ agent_name?: string;
+ name?: string;
+ instructions?: string;
+ };
+ [key: string]: unknown;
+ }>) || []
+ );
+
+ if (selectedAgentId === agentId) {
+ const remainingAgents = agentList.data?.filter(
+ (a: { [key: string]: unknown }) =>
+ (a as { agent_id: string }).agent_id !== agentId
+ );
+ if (remainingAgents && remainingAgents.length > 0) {
+ const newAgent = remainingAgents[0] as {
+ agent_id: string;
+ agent_config?: {
+ agent_name?: string;
+ name?: string;
+ instructions?: string;
+ };
+ [key: string]: unknown;
+ };
+ setSelectedAgentId(newAgent.agent_id);
+ SessionUtils.saveCurrentAgentId(newAgent.agent_id);
+ await loadAgentConfig(newAgent.agent_id);
+ await loadAgentSessions(newAgent.agent_id);
+ } else {
+ // no agents left
+ setSelectedAgentId("");
+ setCurrentSession(null);
+ setSelectedAgentConfig(null);
+ }
+ }
+ } catch (refreshError) {
+ console.error("Error refreshing agents list:", refreshError);
+ }
+ } else {
+ // show error that we don't know about to user
+ console.error("Unexpected error during agent deletion:", error);
+ if (error instanceof Error) {
+ alert(`Failed to delete agent: ${error.message}`);
+ }
+ }
}
}
},
- [agents.length, client, selectedAgentId, loadAgentConfig, loadAgentSessions]
+ [client, selectedAgentId, loadAgentConfig, loadAgentSessions]
);
const handleModelChange = useCallback((newModel: string) => {
@@ -530,10 +653,6 @@ export default function ChatPlaygroundPage() {
useEffect(() => {
if (currentSession) {
- console.log(
- `💾 Auto-saving session ID for agent ${currentSession.agentId}:`,
- currentSession.id
- );
SessionUtils.saveCurrentSessionId(
currentSession.id,
currentSession.agentId
@@ -556,8 +675,12 @@ export default function ChatPlaygroundPage() {
setModelsLoading(true);
setModelsError(null);
const modelList = await client.models.list();
+
+ // store all models (including embedding models for vector DB creation)
+ setModels(modelList);
+
+ // set default LLM model for chat
const llmModels = modelList.filter(model => model.model_type === "llm");
- setModels(llmModels);
if (llmModels.length > 0) {
handleModelChange(llmModels[0].identifier);
}
@@ -614,7 +737,7 @@ export default function ChatPlaygroundPage() {
messages: [...prev.messages, userMessage],
updatedAt: Date.now(),
};
- // Update cache with new message
+ // update cache with new message
SessionUtils.saveSessionData(prev.agentId, updatedSession);
return updatedSession;
});
@@ -653,7 +776,8 @@ export default function ChatPlaygroundPage() {
turnParams,
{
signal: abortController.signal,
- } as { signal: AbortSignal }
+ timeout: 300000, // 5 minutes timeout for RAG queries
+ } as { signal: AbortSignal; timeout: number }
);
const assistantMessage: Message = {
@@ -663,42 +787,242 @@ export default function ChatPlaygroundPage() {
createdAt: new Date(),
};
- const extractDeltaText = (chunk: unknown): string | null => {
- // this is an awful way to handle different chunk formats, but i'm not sure if there's much of a better way
- if (chunk?.delta?.text && typeof chunk.delta.text === "string") {
- return chunk.delta.text;
- }
+ const processChunk = (
+ chunk: unknown
+ ): { text: string | null; isToolCall: boolean } => {
+ const chunkObj = chunk as Record;
- if (
- chunk?.event?.delta?.text &&
- typeof chunk.event.delta.text === "string"
- ) {
- return chunk.event.delta.text;
- }
+ // helper to check if content contains function call JSON
+ const containsToolCall = (content: string): boolean => {
+ return (
+ content.includes('"type": "function"') ||
+ content.includes('"name": "knowledge_search"') ||
+ content.includes('"parameters":') ||
+ !!content.match(/\{"type":\s*"function".*?\}/)
+ );
+ };
- if (
- chunk?.choices?.[0]?.delta?.content &&
- typeof chunk.choices[0].delta.content === "string"
- ) {
- return chunk.choices[0].delta.content;
- }
+ let isToolCall = false;
+ let potentialContent = "";
if (typeof chunk === "string") {
- return chunk;
+ potentialContent = chunk;
+ isToolCall = containsToolCall(chunk);
}
if (
- chunk?.event?.payload?.delta?.text &&
- typeof chunk.event.payload.delta.text === "string"
+ chunkObj?.delta &&
+ typeof chunkObj.delta === "object" &&
+ chunkObj.delta !== null
) {
- return chunk.event.payload.delta.text;
+ const delta = chunkObj.delta as Record;
+ if ("tool_calls" in delta) {
+ isToolCall = true;
+ }
+ if (typeof delta.text === "string") {
+ potentialContent = delta.text;
+ if (containsToolCall(delta.text)) {
+ isToolCall = true;
+ }
+ }
}
- if (process.env.NODE_ENV !== "production") {
- console.debug("Unrecognized chunk format:", chunk);
+ if (
+ chunkObj?.event &&
+ typeof chunkObj.event === "object" &&
+ chunkObj.event !== null
+ ) {
+ const event = chunkObj.event as Record;
+
+ if (
+ event?.payload &&
+ typeof event.payload === "object" &&
+ event.payload !== null
+ ) {
+ const payload = event.payload as Record;
+ if (typeof payload.content === "string") {
+ potentialContent = payload.content;
+ if (containsToolCall(payload.content)) {
+ isToolCall = true;
+ }
+ }
+
+ if (
+ payload?.delta &&
+ typeof payload.delta === "object" &&
+ payload.delta !== null
+ ) {
+ const delta = payload.delta as Record;
+ if (typeof delta.text === "string") {
+ potentialContent = delta.text;
+ if (containsToolCall(delta.text)) {
+ isToolCall = true;
+ }
+ }
+ }
+ }
+
+ if (
+ event?.delta &&
+ typeof event.delta === "object" &&
+ event.delta !== null
+ ) {
+ const delta = event.delta as Record;
+ if (typeof delta.text === "string") {
+ potentialContent = delta.text;
+ if (containsToolCall(delta.text)) {
+ isToolCall = true;
+ }
+ }
+ if (typeof delta.content === "string") {
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
+ potentialContent = delta.content;
+ if (containsToolCall(delta.content)) {
+ isToolCall = true;
+ }
+ }
+ }
}
- return null;
+ // if it's a tool call, skip it (don't display in chat)
+ if (isToolCall) {
+ return { text: null, isToolCall: true };
+ }
+
+ let text: string | null = null;
+
+ if (
+ chunkObj?.delta &&
+ typeof chunkObj.delta === "object" &&
+ chunkObj.delta !== null
+ ) {
+ const delta = chunkObj.delta as Record;
+ if (typeof delta.text === "string") {
+ text = extractCleanText(delta.text);
+ }
+ }
+
+ if (
+ !text &&
+ chunkObj?.event &&
+ typeof chunkObj.event === "object" &&
+ chunkObj.event !== null
+ ) {
+ const event = chunkObj.event as Record;
+
+ if (
+ event?.payload &&
+ typeof event.payload === "object" &&
+ event.payload !== null
+ ) {
+ const payload = event.payload as Record;
+
+ if (typeof payload.content === "string") {
+ text = extractCleanText(payload.content);
+ }
+
+ if (
+ !text &&
+ payload?.turn &&
+ typeof payload.turn === "object" &&
+ payload.turn !== null
+ ) {
+ const turn = payload.turn as Record;
+ if (
+ turn?.output_message &&
+ typeof turn.output_message === "object" &&
+ turn.output_message !== null
+ ) {
+ const outputMessage = turn.output_message as Record<
+ string,
+ unknown
+ >;
+ if (typeof outputMessage.content === "string") {
+ text = extractCleanText(outputMessage.content);
+ }
+ }
+
+ if (
+ !text &&
+ turn?.steps &&
+ Array.isArray(turn.steps) &&
+ turn.steps.length > 0
+ ) {
+ for (const step of turn.steps) {
+ if (step && typeof step === "object" && step !== null) {
+ const stepObj = step as Record;
+ if (
+ stepObj?.model_response &&
+ typeof stepObj.model_response === "object" &&
+ stepObj.model_response !== null
+ ) {
+ const modelResponse = stepObj.model_response as Record<
+ string,
+ unknown
+ >;
+ if (typeof modelResponse.content === "string") {
+ text = extractCleanText(modelResponse.content);
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (
+ !text &&
+ payload?.delta &&
+ typeof payload.delta === "object" &&
+ payload.delta !== null
+ ) {
+ const delta = payload.delta as Record;
+ if (typeof delta.text === "string") {
+ text = extractCleanText(delta.text);
+ }
+ }
+ }
+
+ if (
+ !text &&
+ event?.delta &&
+ typeof event.delta === "object" &&
+ event.delta !== null
+ ) {
+ const delta = event.delta as Record;
+ if (typeof delta.text === "string") {
+ text = extractCleanText(delta.text);
+ }
+ if (!text && typeof delta.content === "string") {
+ text = extractCleanText(delta.content);
+ }
+ }
+ }
+
+ if (
+ !text &&
+ chunkObj?.choices &&
+ Array.isArray(chunkObj.choices) &&
+ chunkObj.choices.length > 0
+ ) {
+ const choice = chunkObj.choices[0] as Record;
+ if (
+ choice?.delta &&
+ typeof choice.delta === "object" &&
+ choice.delta !== null
+ ) {
+ const delta = choice.delta as Record;
+ if (typeof delta.content === "string") {
+ text = extractCleanText(delta.content);
+ }
+ }
+ }
+
+ if (!text && typeof chunk === "string") {
+ text = extractCleanText(chunk);
+ }
+
+ return { text, isToolCall: false };
};
setCurrentSession(prev => {
if (!prev) return null;
@@ -713,8 +1037,34 @@ export default function ChatPlaygroundPage() {
});
let fullContent = "";
+
for await (const chunk of response) {
- const deltaText = extractDeltaText(chunk);
+ const { text: deltaText } = processChunk(chunk);
+
+ // logging for debugging function calls
+ // if (deltaText && deltaText.includes("knowledge_search")) {
+ // console.log("🔍 Function call detected in text output:", deltaText);
+ // console.log("🔍 Original chunk:", JSON.stringify(chunk, null, 2));
+ // }
+
+ if (chunk && typeof chunk === "object" && "event" in chunk) {
+ const event = (
+ chunk as {
+ event: {
+ payload?: {
+ event_type?: string;
+ turn?: { output_message?: { content?: string } };
+ };
+ };
+ }
+ ).event;
+ if (event?.payload?.event_type === "turn_complete") {
+ const content = event?.payload?.turn?.output_message?.content;
+ if (content && content.includes("knowledge_search")) {
+ console.log("🔍 Function call found in turn_complete:", content);
+ }
+ }
+ }
if (deltaText) {
fullContent += deltaText;
@@ -732,9 +1082,9 @@ export default function ChatPlaygroundPage() {
messages: newMessages,
updatedAt: Date.now(),
};
- // update cache with streaming content (throttled)
+ // update cache with streaming content
if (fullContent.length % 100 === 0) {
- // Only cache every 100 characters to avoid spam
+ // Only cache every 100 characters
SessionUtils.saveSessionData(prev.agentId, updatedSession);
}
return updatedSession;
@@ -809,8 +1159,180 @@ export default function ChatPlaygroundPage() {
setError(null);
};
+ const handleRAGFileUpload = async (file: File) => {
+ if (!selectedAgentConfig?.toolgroups || !selectedAgentId) {
+ setError("No agent selected or agent has no RAG tools configured");
+ return;
+ }
+
+ // find RAG toolgroups that have vector_db_ids configured
+ const ragToolgroups = selectedAgentConfig.toolgroups.filter(toolgroup => {
+ if (typeof toolgroup === "object" && toolgroup.name?.includes("rag")) {
+ return toolgroup.args && "vector_db_ids" in toolgroup.args;
+ }
+ return false;
+ });
+
+ if (ragToolgroups.length === 0) {
+ setError("Current agent has no vector databases configured for RAG");
+ return;
+ }
+
+ try {
+ setError(null);
+ console.log("Uploading file using RAG tool...");
+
+ setUploadNotification({
+ show: true,
+ message: `📄 Uploading and indexing "${file.name}"...`,
+ type: "loading",
+ });
+
+ const vectorDbIds = ragToolgroups.flatMap(toolgroup => {
+ if (
+ typeof toolgroup === "object" &&
+ toolgroup.args &&
+ "vector_db_ids" in toolgroup.args
+ ) {
+ return toolgroup.args.vector_db_ids as string[];
+ }
+ return [];
+ });
+
+ // determine mime type from file extension - this should be in the Llama Stack Client IMO
+ const getContentType = (filename: string): string => {
+ const ext = filename.toLowerCase().split(".").pop();
+ switch (ext) {
+ case "pdf":
+ return "application/pdf";
+ case "txt":
+ return "text/plain";
+ case "md":
+ return "text/markdown";
+ case "html":
+ return "text/html";
+ case "csv":
+ return "text/csv";
+ case "json":
+ return "application/json";
+ case "docx":
+ return "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
+ case "doc":
+ return "application/msword";
+ default:
+ return "application/octet-stream";
+ }
+ };
+
+ const mimeType = getContentType(file.name);
+ let fileContent: string;
+
+ // handle text files vs binary files differently
+ const isTextFile =
+ mimeType.startsWith("text/") ||
+ mimeType === "application/json" ||
+ mimeType === "text/markdown" ||
+ mimeType === "text/html" ||
+ mimeType === "text/csv";
+
+ if (isTextFile) {
+ fileContent = await file.text();
+ } else {
+ // for PDFs and other binary files, create a data URL
+ // use FileReader for efficient base64 conversion
+ fileContent = await new Promise((resolve, reject) => {
+ const reader = new FileReader();
+ reader.onload = () => resolve(reader.result as string);
+ reader.onerror = () => reject(reader.error);
+ reader.readAsDataURL(file);
+ });
+ }
+
+ for (const vectorDbId of vectorDbIds) {
+ await client.toolRuntime.ragTool.insert({
+ documents: [
+ {
+ content: fileContent,
+ document_id: `${file.name}-${Date.now()}`,
+ metadata: {
+ filename: file.name,
+ file_size: file.size,
+ uploaded_at: new Date().toISOString(),
+ agent_id: selectedAgentId,
+ },
+ mime_type: mimeType,
+ },
+ ],
+ vector_db_id: vectorDbId,
+ // TODO: parameterize this somewhere, probably in settings
+ chunk_size_in_tokens: 512,
+ });
+ }
+
+ console.log("✅ File successfully uploaded using RAG tool");
+
+ setUploadNotification({
+ show: true,
+ message: `📄 File "${file.name}" uploaded and indexed successfully!`,
+ type: "success",
+ });
+
+ setTimeout(() => {
+ setUploadNotification(prev => ({ ...prev, show: false }));
+ }, 4000);
+ } catch (err) {
+ console.error("Error uploading file using RAG tool:", err);
+ const errorMessage =
+ err instanceof Error
+ ? `Failed to upload file: ${err.message}`
+ : "Failed to upload file using RAG tool";
+
+ setUploadNotification({
+ show: true,
+ message: errorMessage,
+ type: "error",
+ });
+
+ setTimeout(() => {
+ setUploadNotification(prev => ({ ...prev, show: false }));
+ }, 6000);
+ }
+ };
+
return (
+ {/* Upload Notification */}
+ {uploadNotification.show && (
+
+
+ {uploadNotification.type === "loading" && (
+
+ )}
+
+ {uploadNotification.message}
+
+ {uploadNotification.type !== "loading" && (
+
+ )}
+
+
+ )}
+
{/* Header */}
@@ -822,7 +1344,6 @@ export default function ChatPlaygroundPage() {
- {selectedAgentId && agents.length > 1 && (
+ {selectedAgentId && (
)}
-
- setCurrentSession(prev =>
- prev ? { ...prev, messages, updatedAt: Date.now() } : prev
- )
- }
- />
+ {!agentsLoading && agents.length === 0 ? (
+
+
+
🦙
+
+ Create an Agent with Llama Stack
+
+
+ To get started, create your first agent. Each agent is
+ configured with specific instructions, models, and tools to
+ help you with different tasks.
+
+
+
+
+ ) : (
+
+ setCurrentSession(prev =>
+ prev ? { ...prev, messages, updatedAt: Date.now() } : prev
+ )
+ }
+ onRAGFileUpload={handleRAGFileUpload}
+ />
+ )}
@@ -1086,14 +1662,16 @@ export default function ChatPlaygroundPage() {
- {models.map(model => (
-
- {model.identifier}
-
- ))}
+ {models
+ .filter(model => model.model_type === "llm")
+ .map(model => (
+
+ {model.identifier}
+
+ ))}
@@ -1137,21 +1715,12 @@ export default function ChatPlaygroundPage() {
toolgroup.identifier
)}
onChange={e => {
- console.log(
- "Tool selection changed:",
- toolgroup.identifier,
- e.target.checked
- );
if (e.target.checked) {
setSelectedToolgroups(prev => {
const newSelection = [
...prev,
toolgroup.identifier,
];
- console.log(
- "New selected toolgroups:",
- newSelection
- );
return newSelection;
});
} else {
@@ -1159,10 +1728,6 @@ export default function ChatPlaygroundPage() {
const newSelection = prev.filter(
id => id !== toolgroup.identifier
);
- console.log(
- "New selected toolgroups:",
- newSelection
- );
return newSelection;
});
}
@@ -1194,6 +1759,80 @@ export default function ChatPlaygroundPage() {
text generation agents work without tools.
+
+ {/* Vector DB Configuration for RAG */}
+ {selectedToolgroups.includes("builtin::rag") && (
+
+
+
+
+
+ {availableVectorDBs.length} available
+
+
+
+ {availableVectorDBs.length === 0 ? (
+
+ No vector databases available. Create one to use RAG
+ tools.
+
+ ) : (
+ availableVectorDBs.map(vectorDB => (
+
+ ))
+ )}
+
+ {selectedVectorDBs.length === 0 &&
+ selectedToolgroups.includes("builtin::rag") && (
+
+ ⚠️ RAG tool selected but no vector databases chosen.
+ Create or select a vector database.
+
+ )}
+
+ )}
@@ -1204,12 +1843,14 @@ export default function ChatPlaygroundPage() {
newAgentName,
newAgentInstructions,
selectedModel,
- selectedToolgroups
+ selectedToolgroups,
+ selectedVectorDBs
);
setShowCreateAgent(false);
setNewAgentName("");
setNewAgentInstructions("You are a helpful assistant.");
setSelectedToolgroups([]);
+ setSelectedVectorDBs([]);
} catch (error) {
console.error("Failed to create agent:", error);
}
@@ -1226,6 +1867,7 @@ export default function ChatPlaygroundPage() {
setNewAgentName("");
setNewAgentInstructions("You are a helpful assistant.");
setSelectedToolgroups([]);
+ setSelectedVectorDBs([]);
}}
className="flex-1"
>
@@ -1235,6 +1877,17 @@ export default function ChatPlaygroundPage() {
)}
+
+ {/* Create Vector DB Modal */}
+ {showCreateVectorDB && (
+
+ setShowCreateVectorDB(false)}
+ />
+
+ )}
);
}
diff --git a/llama_stack/ui/components/chat-playground/chat.tsx b/llama_stack/ui/components/chat-playground/chat.tsx
index 023bf0728..3b37c4dfe 100644
--- a/llama_stack/ui/components/chat-playground/chat.tsx
+++ b/llama_stack/ui/components/chat-playground/chat.tsx
@@ -35,6 +35,7 @@ interface ChatPropsBase {
) => void;
setMessages?: (messages: Message[]) => void;
transcribeAudio?: (blob: Blob) => Promise;
+ onRAGFileUpload?: (file: File) => Promise;
}
interface ChatPropsWithoutSuggestions extends ChatPropsBase {
@@ -62,6 +63,7 @@ export function Chat({
onRateResponse,
setMessages,
transcribeAudio,
+ onRAGFileUpload,
}: ChatProps) {
const lastMessage = messages.at(-1);
const isEmpty = messages.length === 0;
@@ -226,16 +228,17 @@ export function Chat({
isPending={isGenerating || isTyping}
handleSubmit={handleSubmit}
>
- {({ files, setFiles }) => (
+ {() => (
{}}
stop={handleStop}
isGenerating={isGenerating}
transcribeAudio={transcribeAudio}
+ onRAGFileUpload={onRAGFileUpload}
/>
)}
diff --git a/llama_stack/ui/components/chat-playground/conversations.tsx b/llama_stack/ui/components/chat-playground/conversations.tsx
index 1a9c960fe..40045b9fe 100644
--- a/llama_stack/ui/components/chat-playground/conversations.tsx
+++ b/llama_stack/ui/components/chat-playground/conversations.tsx
@@ -14,6 +14,7 @@ import { Card } from "@/components/ui/card";
import { Trash2 } from "lucide-react";
import type { Message } from "@/components/chat-playground/chat-message";
import { useAuthClient } from "@/hooks/use-auth-client";
+import { cleanMessageContent } from "@/lib/message-content-utils";
import type {
Session,
SessionCreateParams,
@@ -219,10 +220,7 @@ export function Conversations({
messages.push({
id: `${turn.turn_id}-assistant-${messages.length}`,
role: "assistant",
- content:
- typeof turn.output_message.content === "string"
- ? turn.output_message.content
- : JSON.stringify(turn.output_message.content),
+ content: cleanMessageContent(turn.output_message.content),
createdAt: new Date(
turn.completed_at || turn.started_at || Date.now()
),
@@ -271,7 +269,7 @@ export function Conversations({
);
const deleteSession = async (sessionId: string) => {
- if (sessions.length <= 1 || !selectedAgentId) {
+ if (!selectedAgentId) {
return;
}
@@ -324,7 +322,6 @@ export function Conversations({
}
}, [currentSession]);
- // Don't render if no agent is selected
if (!selectedAgentId) {
return null;
}
@@ -357,7 +354,7 @@ export function Conversations({
+ New
- {currentSession && sessions.length > 1 && (
+ {currentSession && (