Merge branch 'meta-llama:main' into feat/litellm_sambanova_usage

This commit is contained in:
Jorge Piedrahita Ortiz 2025-03-17 09:42:15 -05:00 committed by GitHub
commit 716cb09056
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
145 changed files with 21384 additions and 1283 deletions

View file

@ -6,8 +6,8 @@
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>OpenAPI specification</title>
<link href="https://fonts.googleapis.com/css?family=Montserrat:300,400,700|Roboto:300,400,700" rel="stylesheet">
<script type="module" src="https://unpkg.com/@stoplight/elements/web-components.min.js"></script>
<link rel="stylesheet" href="https://unpkg.com/@stoplight/elements/styles.min.css">
<script type="module" src="https://cdn.jsdelivr.net/npm/@stoplight/elements/web-components.min.js"></script>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@stoplight/elements/styles.min.css">
<style>
body {
margin: 0;
@ -2151,6 +2151,48 @@
}
}
},
"/v1/providers/{provider_id}": {
"get": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ProviderInfo"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Providers"
],
"description": "",
"parameters": [
{
"name": "provider_id",
"in": "path",
"required": true,
"schema": {
"type": "string"
}
}
]
}
},
"/v1/tool-runtime/invoke": {
"post": {
"responses": {
@ -2642,7 +2684,7 @@
}
}
},
"/v1/inspect/providers": {
"/v1/providers": {
"get": {
"responses": {
"200": {
@ -4347,24 +4389,6 @@
"type": "string",
"description": "Unique identifier for the tool call this response is for"
},
"tool_name": {
"oneOf": [
{
"type": "string",
"enum": [
"brave_search",
"wolfram_alpha",
"photogen",
"code_interpreter"
],
"title": "BuiltinTool"
},
{
"type": "string"
}
],
"description": "Name of the tool that was called"
},
"content": {
"$ref": "#/components/schemas/InterleavedContent",
"description": "The response content from the tool"
@ -4374,7 +4398,6 @@
"required": [
"role",
"call_id",
"tool_name",
"content"
],
"title": "ToolResponseMessage",
@ -4549,7 +4572,7 @@
"metrics": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricEvent"
"$ref": "#/components/schemas/MetricInResponse"
}
},
"completion_message": {
@ -4571,46 +4594,9 @@
"title": "ChatCompletionResponse",
"description": "Response from a chat completion request."
},
"MetricEvent": {
"MetricInResponse": {
"type": "object",
"properties": {
"trace_id": {
"type": "string"
},
"span_id": {
"type": "string"
},
"timestamp": {
"type": "string",
"format": "date-time"
},
"attributes": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "string"
},
{
"type": "integer"
},
{
"type": "number"
},
{
"type": "boolean"
},
{
"type": "null"
}
]
}
},
"type": {
"type": "string",
"const": "metric",
"default": "metric"
},
"metric": {
"type": "string"
},
@ -4630,15 +4616,10 @@
},
"additionalProperties": false,
"required": [
"trace_id",
"span_id",
"timestamp",
"type",
"metric",
"value",
"unit"
"value"
],
"title": "MetricEvent"
"title": "MetricInResponse"
},
"TokenLogProbs": {
"type": "object",
@ -4715,6 +4696,12 @@
"CompletionResponse": {
"type": "object",
"properties": {
"metrics": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricInResponse"
}
},
"content": {
"type": "string",
"description": "The generated completion text"
@ -4924,7 +4911,7 @@
"metrics": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricEvent"
"$ref": "#/components/schemas/MetricInResponse"
}
},
"event": {
@ -5082,6 +5069,12 @@
"CompletionResponseStreamChunk": {
"type": "object",
"properties": {
"metrics": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricInResponse"
}
},
"delta": {
"type": "string",
"description": "New content generated since last chunk. This can be one or more tokens."
@ -7961,6 +7954,53 @@
],
"title": "InsertChunksRequest"
},
"ProviderInfo": {
"type": "object",
"properties": {
"api": {
"type": "string"
},
"provider_id": {
"type": "string"
},
"provider_type": {
"type": "string"
},
"config": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"api",
"provider_id",
"provider_type",
"config"
],
"title": "ProviderInfo"
},
"InvokeToolRequest": {
"type": "object",
"properties": {
@ -8173,27 +8213,6 @@
],
"title": "ListModelsResponse"
},
"ProviderInfo": {
"type": "object",
"properties": {
"api": {
"type": "string"
},
"provider_id": {
"type": "string"
},
"provider_type": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"api",
"provider_id",
"provider_type"
],
"title": "ProviderInfo"
},
"ListProvidersResponse": {
"type": "object",
"properties": {
@ -8363,6 +8382,75 @@
],
"title": "LogSeverity"
},
"MetricEvent": {
"type": "object",
"properties": {
"trace_id": {
"type": "string"
},
"span_id": {
"type": "string"
},
"timestamp": {
"type": "string",
"format": "date-time"
},
"attributes": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "string"
},
{
"type": "integer"
},
{
"type": "number"
},
{
"type": "boolean"
},
{
"type": "null"
}
]
}
},
"type": {
"type": "string",
"const": "metric",
"default": "metric"
},
"metric": {
"type": "string"
},
"value": {
"oneOf": [
{
"type": "integer"
},
{
"type": "number"
}
]
},
"unit": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"trace_id",
"span_id",
"timestamp",
"type",
"metric",
"value",
"unit"
],
"title": "MetricEvent"
},
"SpanEndPayload": {
"type": "object",
"properties": {
@ -10125,6 +10213,10 @@
{
"name": "PostTraining (Coming Soon)"
},
{
"name": "Providers",
"x-displayName": "Providers API for inspecting, listing, and modifying providers and their configurations."
},
{
"name": "Safety"
},
@ -10171,6 +10263,7 @@
"Inspect",
"Models",
"PostTraining (Coming Soon)",
"Providers",
"Safety",
"Scoring",
"ScoringFunctions",

View file

@ -1444,6 +1444,34 @@ paths:
schema:
$ref: '#/components/schemas/InsertChunksRequest'
required: true
/v1/providers/{provider_id}:
get:
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/ProviderInfo'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Providers
description: ''
parameters:
- name: provider_id
in: path
required: true
schema:
type: string
/v1/tool-runtime/invoke:
post:
responses:
@ -1782,7 +1810,7 @@ paths:
schema:
$ref: '#/components/schemas/RegisterModelRequest'
required: true
/v1/inspect/providers:
/v1/providers:
get:
responses:
'200':
@ -2943,17 +2971,6 @@ components:
type: string
description: >-
Unique identifier for the tool call this response is for
tool_name:
oneOf:
- type: string
enum:
- brave_search
- wolfram_alpha
- photogen
- code_interpreter
title: BuiltinTool
- type: string
description: Name of the tool that was called
content:
$ref: '#/components/schemas/InterleavedContent'
description: The response content from the tool
@ -2961,7 +2978,6 @@ components:
required:
- role
- call_id
- tool_name
- content
title: ToolResponseMessage
description: >-
@ -3101,7 +3117,7 @@ components:
metrics:
type: array
items:
$ref: '#/components/schemas/MetricEvent'
$ref: '#/components/schemas/MetricInResponse'
completion_message:
$ref: '#/components/schemas/CompletionMessage'
description: The complete response message
@ -3116,29 +3132,9 @@ components:
- completion_message
title: ChatCompletionResponse
description: Response from a chat completion request.
MetricEvent:
MetricInResponse:
type: object
properties:
trace_id:
type: string
span_id:
type: string
timestamp:
type: string
format: date-time
attributes:
type: object
additionalProperties:
oneOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
type:
type: string
const: metric
default: metric
metric:
type: string
value:
@ -3149,14 +3145,9 @@ components:
type: string
additionalProperties: false
required:
- trace_id
- span_id
- timestamp
- type
- metric
- value
- unit
title: MetricEvent
title: MetricInResponse
TokenLogProbs:
type: object
properties:
@ -3213,6 +3204,10 @@ components:
CompletionResponse:
type: object
properties:
metrics:
type: array
items:
$ref: '#/components/schemas/MetricInResponse'
content:
type: string
description: The generated completion text
@ -3412,7 +3407,7 @@ components:
metrics:
type: array
items:
$ref: '#/components/schemas/MetricEvent'
$ref: '#/components/schemas/MetricInResponse'
event:
$ref: '#/components/schemas/ChatCompletionResponseEvent'
description: The event containing the new content
@ -3531,6 +3526,10 @@ components:
CompletionResponseStreamChunk:
type: object
properties:
metrics:
type: array
items:
$ref: '#/components/schemas/MetricInResponse'
delta:
type: string
description: >-
@ -5438,6 +5437,32 @@ components:
- vector_db_id
- chunks
title: InsertChunksRequest
ProviderInfo:
type: object
properties:
api:
type: string
provider_id:
type: string
provider_type:
type: string
config:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- api
- provider_id
- provider_type
- config
title: ProviderInfo
InvokeToolRequest:
type: object
properties:
@ -5573,21 +5598,6 @@ components:
required:
- data
title: ListModelsResponse
ProviderInfo:
type: object
properties:
api:
type: string
provider_id:
type: string
provider_type:
type: string
additionalProperties: false
required:
- api
- provider_id
- provider_type
title: ProviderInfo
ListProvidersResponse:
type: object
properties:
@ -5703,6 +5713,47 @@ components:
- error
- critical
title: LogSeverity
MetricEvent:
type: object
properties:
trace_id:
type: string
span_id:
type: string
timestamp:
type: string
format: date-time
attributes:
type: object
additionalProperties:
oneOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
type:
type: string
const: metric
default: metric
metric:
type: string
value:
oneOf:
- type: integer
- type: number
unit:
type: string
additionalProperties: false
required:
- trace_id
- span_id
- timestamp
- type
- metric
- value
- unit
title: MetricEvent
SpanEndPayload:
type: object
properties:
@ -6820,6 +6871,9 @@ tags:
- name: Inspect
- name: Models
- name: PostTraining (Coming Soon)
- name: Providers
x-displayName: >-
Providers API for inspecting, listing, and modifying providers and their configurations.
- name: Safety
- name: Scoring
- name: ScoringFunctions
@ -6844,6 +6898,7 @@ x-tagGroups:
- Inspect
- Models
- PostTraining (Coming Soon)
- Providers
- Safety
- Scoring
- ScoringFunctions

View file

@ -6,8 +6,8 @@
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>OpenAPI specification</title>
<link href="https://fonts.googleapis.com/css?family=Montserrat:300,400,700|Roboto:300,400,700" rel="stylesheet">
<script type="module" src="https://unpkg.com/@stoplight/elements/web-components.min.js"></script>
<link rel="stylesheet" href="https://unpkg.com/@stoplight/elements/styles.min.css">
<script type="module" src="https://cdn.jsdelivr.net/npm/@stoplight/elements/web-components.min.js"></script>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@stoplight/elements/styles.min.css">
<style>
body {
margin: 0;

View file

@ -71,4 +71,4 @@ While there is a lot of flexibility to mix-and-match providers, often users will
**Locally Hosted Distro**: You may want to run Llama Stack on your own hardware. Typically though, you still need to use Inference via an external service. You can use providers like HuggingFace TGI, Fireworks, Together, etc. for this purpose. Or you may have access to GPUs and can run a [vLLM](https://github.com/vllm-project/vllm) or [NVIDIA NIM](https://build.nvidia.com/nim?filters=nimType%3Anim_type_run_anywhere&q=llama) instance. If you "just" have a regular desktop machine, you can use [Ollama](https://ollama.com/) for inference. To provide convenient quick access to these options, we provide a number of such pre-configured locally-hosted Distros.
**On-device Distro**: Finally, you may want to run Llama Stack directly on an edge device (mobile phone or a tablet.) We provide Distros for iOS and Android (coming soon.)
**On-device Distro**: To run Llama Stack directly on an edge device (mobile phone or a tablet), we provide Distros for [iOS](https://llama-stack.readthedocs.io/en/latest/distributions/ondevice_distro/ios_sdk.html) and [Android](https://llama-stack.readthedocs.io/en/latest/distributions/ondevice_distro/android_sdk.html)

View file

@ -8,12 +8,12 @@ Features:
- Remote Inferencing: Perform inferencing tasks remotely with Llama models hosted on a remote connection (or serverless localhost).
- Simple Integration: With easy-to-use APIs, a developer can quickly integrate Llama Stack in their Android app. The difference with local vs remote inferencing is also minimal.
Latest Release Notes: [v0.0.58](https://github.com/meta-llama/llama-stack-client-kotlin/releases/tag/v0.0.58)
Latest Release Notes: [link](https://github.com/meta-llama/llama-stack-client-kotlin/tree/latest-release)
*Tagged releases are stable versions of the project. While we strive to maintain a stable main branch, it's not guaranteed to be free of bugs or issues.*
## Android Demo App
Check out our demo app to see how to integrate Llama Stack into your Android app: [Android Demo App](https://github.com/meta-llama/llama-stack-apps/tree/android-kotlin-app-latest/examples/android_app)
Check out our demo app to see how to integrate Llama Stack into your Android app: [Android Demo App](https://github.com/meta-llama/llama-stack-client-kotlin/tree/examples/android_app)
The key files in the app are `ExampleLlamaStackLocalInference.kt`, `ExampleLlamaStackRemoteInference.kts`, and `MainActivity.java`. With encompassed business logic, the app shows how to use Llama Stack for both the environments.
@ -24,7 +24,7 @@ The key files in the app are `ExampleLlamaStackLocalInference.kt`, `ExampleLlama
Add the following dependency in your `build.gradle.kts` file:
```
dependencies {
implementation("com.llama.llamastack:llama-stack-client-kotlin:0.0.58")
implementation("com.llama.llamastack:llama-stack-client-kotlin:0.1.4.2")
}
```
This will download jar files in your gradle cache in a directory like `~/.gradle/caches/modules-2/files-2.1/com.llama.llamastack/`
@ -36,13 +36,13 @@ If you plan on doing remote inferencing this is sufficient to get started.
For local inferencing, it is required to include the ExecuTorch library into your app.
Include the ExecuTorch library by:
1. Download the `download-prebuilt-et-lib.sh` script file from the [llama-stack-client-kotlin-client-local](https://github.com/meta-llama/llama-stack-client-kotlin/blob/release/0.0.58/llama-stack-client-kotlin-client-local/download-prebuilt-et-lib.sh) directory to your local machine.
1. Download the `download-prebuilt-et-lib.sh` script file from the [llama-stack-client-kotlin-client-local](https://github.com/meta-llama/llama-stack-client-kotlin/tree/latest-release/llama-stack-client-kotlin-client-local/download-prebuilt-et-lib.sh) directory to your local machine.
2. Move the script to the top level of your Android app where the app directory resides:
<p align="center">
<img src="https://raw.githubusercontent.com/meta-llama/llama-stack-client-kotlin/refs/heads/release/0.0.58/doc/img/example_android_app_directory.png" style="width:300px">
<img src="https://github.com/meta-llama/llama-stack-client-kotlin/blob/latest-release/doc/img/example_android_app_directory.png" style="width:300px">
</p>
3. Run `sh download-prebuilt-et-lib.sh` to create an `app/libs` directory and download the `executorch.aar` in that path. This generates an ExecuTorch library for the XNNPACK delegate with commit: [0a12e33](https://github.com/pytorch/executorch/commit/0a12e33d22a3d44d1aa2af5f0d0673d45b962553).
3. Run `sh download-prebuilt-et-lib.sh` to create an `app/libs` directory and download the `executorch.aar` in that path. This generates an ExecuTorch library for the XNNPACK delegate.
4. Add the `executorch.aar` dependency in your `build.gradle.kts` file:
```
dependencies {
@ -58,12 +58,12 @@ Breaking down the demo app, this section will show the core pieces that are used
### Setup Remote Inferencing
Start a Llama Stack server on localhost. Here is an example of how you can do this using the firework.ai distribution:
```
conda create -n stack-fireworks python=3.10
conda create -n stack-fireworks python=3.10
conda activate stack-fireworks
pip install llama-stack=0.0.58
pip install --no-cache llama-stack==0.1.4
llama stack build --template fireworks --image-type conda
export FIREWORKS_API_KEY=<SOME_KEY>
llama stack run /Users/<your_username>/.llama/distributions/llamastack-fireworks/fireworks-run.yaml --port=5050
llama stack run fireworks --port 5050
```
Ensure the Llama Stack server version is the same as the Kotlin SDK Library for maximum compatibility.
@ -146,7 +146,7 @@ The purpose of this section is to share more details with users that would like
### Prerequisite
You must complete the following steps:
1. Clone the repo (`git clone https://github.com/meta-llama/llama-stack-client-kotlin.git -b release/0.0.58`)
1. Clone the repo (`git clone https://github.com/meta-llama/llama-stack-client-kotlin.git -b latest-release`)
2. Port the appropriate ExecuTorch libraries over into your Llama Stack Kotlin library environment.
```
cd llama-stack-client-kotlin-client-local

View file

@ -1,9 +1,8 @@
# iOS SDK
We offer both remote and on-device use of Llama Stack in Swift via two components:
1. [llama-stack-client-swift](https://github.com/meta-llama/llama-stack-client-swift/)
2. [LocalInferenceImpl](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/inline/ios/inference)
We offer both remote and on-device use of Llama Stack in Swift via a single SDK [llama-stack-client-swift](https://github.com/meta-llama/llama-stack-client-swift/) that contains two components:
1. LlamaStackClient for remote
2. Local Inference for on-device
```{image} ../../../_static/remote_or_local.gif
:alt: Seamlessly switching between local, on-device inference and remote hosted inference
@ -42,7 +41,7 @@ let request = Components.Schemas.CreateAgentTurnRequest(
// ...
```
Check out [iOSCalendarAssistant](https://github.com/meta-llama/llama-stack-apps/tree/main/examples/ios_calendar_assistant) for a complete app demo.
Check out [iOSCalendarAssistant](https://github.com/meta-llama/llama-stack-client-swift/tree/main/examples/ios_calendar_assistant) for a complete app demo.
## LocalInference
@ -58,7 +57,7 @@ let inference = LocalInference(queue: runnerQueue)
let agents = LocalAgents(inference: self.inference)
```
Check out [iOSCalendarAssistantWithLocalInf](https://github.com/meta-llama/llama-stack-apps/tree/main/examples/ios_calendar_assistant) for a complete app demo.
Check out [iOSCalendarAssistantWithLocalInf](https://github.com/meta-llama/llama-stack-client-swift/tree/main/examples/ios_calendar_assistant) for a complete app demo.
### Installation
@ -68,47 +67,6 @@ We're working on making LocalInference easier to set up. For now, you'll need t
1. Install [Cmake](https://cmake.org/) for the executorch build`
1. Drag `LocalInference.xcodeproj` into your project
1. Add `LocalInference` as a framework in your app target
1. Add a package dependency on https://github.com/pytorch/executorch (branch latest)
1. Add all the kernels / backends from executorch (but not exectuorch itself!) as frameworks in your app target:
- backend_coreml
- backend_mps
- backend_xnnpack
- kernels_custom
- kernels_optimized
- kernels_portable
- kernels_quantized
1. In "Build Settings" > "Other Linker Flags" > "Any iOS Simulator SDK", add:
```
-force_load
$(BUILT_PRODUCTS_DIR)/libkernels_optimized-simulator-release.a
-force_load
$(BUILT_PRODUCTS_DIR)/libkernels_custom-simulator-release.a
-force_load
$(BUILT_PRODUCTS_DIR)/libkernels_quantized-simulator-release.a
-force_load
$(BUILT_PRODUCTS_DIR)/libbackend_xnnpack-simulator-release.a
-force_load
$(BUILT_PRODUCTS_DIR)/libbackend_coreml-simulator-release.a
-force_load
$(BUILT_PRODUCTS_DIR)/libbackend_mps-simulator-release.a
```
1. In "Build Settings" > "Other Linker Flags" > "Any iOS SDK", add:
```
-force_load
$(BUILT_PRODUCTS_DIR)/libkernels_optimized-simulator-release.a
-force_load
$(BUILT_PRODUCTS_DIR)/libkernels_custom-simulator-release.a
-force_load
$(BUILT_PRODUCTS_DIR)/libkernels_quantized-simulator-release.a
-force_load
$(BUILT_PRODUCTS_DIR)/libbackend_xnnpack-simulator-release.a
-force_load
$(BUILT_PRODUCTS_DIR)/libbackend_coreml-simulator-release.a
-force_load
$(BUILT_PRODUCTS_DIR)/libbackend_mps-simulator-release.a
```
### Preparing a model

View file

@ -0,0 +1,42 @@
---
orphan: true
---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# Passthrough Distribution
```{toctree}
:maxdepth: 2
:hidden:
self
```
The `llamastack/distribution-passthrough` distribution consists of the following provider configurations.
| API | Provider(s) |
|-----|-------------|
| agents | `inline::meta-reference` |
| datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` |
| inference | `remote::passthrough`, `inline::sentence-transformers` |
| safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `remote::wolfram-alpha`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol` |
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
### Environment Variables
The following environment variables can be configured:
- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
- `PASSTHROUGH_API_KEY`: Passthrough API Key (default: ``)
- `PASSTHROUGH_URL`: Passthrough URL (default: ``)
### Models
The following models are available by default:
- `llama3.1-8b-instruct `
- `llama3.2-11b-vision-instruct `

View file

@ -88,11 +88,19 @@ docker run -it \
:::{dropdown} Installing the Llama Stack client CLI and SDK
You can interact with the Llama Stack server using various client SDKs. We will use the Python SDK which you can install using the following command. Note that you must be using Python 3.10 or newer:
You can interact with the Llama Stack server using various client SDKs. Note that you must be using Python 3.10 or newer. We will use the Python SDK which you can install via `conda` or `virtualenv`.
For `conda`:
```bash
yes | conda create -n stack-client python=3.10
conda activate stack-client
pip install llama-stack-client
```
For `virtualenv`:
```bash
python -m venv stack-client
source stack-client/bin/activate
pip install llama-stack-client
```
@ -173,6 +181,13 @@ response = client.inference.chat_completion(
print(response.completion_message.content)
```
To run the above example, put the code in a file called `inference.py`, ensure your `conda` or `virtualenv` environment is active, and run the following:
```bash
pip install llama_stack
llama stack build --template ollama --image-type <conda|venv>
python inference.py
```
### 4. Your first RAG agent
Here is an example of a simple RAG (Retrieval Augmented Generation) chatbot agent which can answer questions about TorchTune documentation.
@ -273,6 +288,13 @@ for prompt in user_prompts:
log.print()
```
To run the above example, put the code in a file called `rag.py`, ensure your `conda` or `virtualenv` environment is active, and run the following:
```bash
pip install llama_stack
llama stack build --template ollama --image-type <conda|venv>
python rag.py
```
## Next Steps
- Learn more about Llama Stack [Concepts](../concepts/index.md)

View file

@ -38,9 +38,9 @@ We have a number of client-side SDKs available for different languages.
| **Language** | **Client SDK** | **Package** |
| :----: | :----: | :----: |
| Python | [llama-stack-client-python](https://github.com/meta-llama/llama-stack-client-python) | [![PyPI version](https://img.shields.io/pypi/v/llama_stack_client.svg)](https://pypi.org/project/llama_stack_client/)
| Swift | [llama-stack-client-swift](https://github.com/meta-llama/llama-stack-client-swift) | [![Swift Package Index](https://img.shields.io/endpoint?url=https%3A%2F%2Fswiftpackageindex.com%2Fapi%2Fpackages%2Fmeta-llama%2Fllama-stack-client-swift%2Fbadge%3Ftype%3Dswift-versions)](https://swiftpackageindex.com/meta-llama/llama-stack-client-swift)
| Swift | [llama-stack-client-swift](https://github.com/meta-llama/llama-stack-client-swift/tree/latest-release) | [![Swift Package Index](https://img.shields.io/endpoint?url=https%3A%2F%2Fswiftpackageindex.com%2Fapi%2Fpackages%2Fmeta-llama%2Fllama-stack-client-swift%2Fbadge%3Ftype%3Dswift-versions)](https://swiftpackageindex.com/meta-llama/llama-stack-client-swift)
| Node | [llama-stack-client-node](https://github.com/meta-llama/llama-stack-client-node) | [![NPM version](https://img.shields.io/npm/v/llama-stack-client.svg)](https://npmjs.org/package/llama-stack-client)
| Kotlin | [llama-stack-client-kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) | [![Maven version](https://img.shields.io/maven-central/v/com.llama.llamastack/llama-stack-client-kotlin)](https://central.sonatype.com/artifact/com.llama.llamastack/llama-stack-client-kotlin)
| Kotlin | [llama-stack-client-kotlin](https://github.com/meta-llama/llama-stack-client-kotlin/tree/latest-release) | [![Maven version](https://img.shields.io/maven-central/v/com.llama.llamastack/llama-stack-client-kotlin)](https://central.sonatype.com/artifact/com.llama.llamastack/llama-stack-client-kotlin)
## Supported Llama Stack Implementations
@ -61,6 +61,10 @@ A number of "adapters" are available for some popular Inference and Vector Store
| Groq | Hosted |
| SambaNova | Hosted |
| PyTorch ExecuTorch | On-device iOS, Android |
| OpenAI | Hosted |
| Anthropic | Hosted |
| Gemini | Hosted |
**Vector IO API**
| **Provider** | **Environments** |