Signed-off-by: Yuan Tang <terrytangyuan@gmail.com>
This commit is contained in:
Yuan Tang 2025-01-14 20:56:52 -05:00
parent 7c726826b8
commit 3da3d26260
No known key found for this signature in database
4 changed files with 177 additions and 155 deletions

View file

@ -1,9 +1,9 @@
{ {
"hf-serverless": [ "bedrock": [
"aiohttp",
"aiosqlite", "aiosqlite",
"autoevals", "autoevals",
"blobfile", "blobfile",
"boto3",
"chardet", "chardet",
"chromadb-client", "chromadb-client",
"datasets", "datasets",
@ -11,103 +11,6 @@
"fastapi", "fastapi",
"fire", "fire",
"httpx", "httpx",
"huggingface_hub",
"matplotlib",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
],
"together": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"together",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
],
"vllm-gpu": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"vllm",
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
],
"remote-vllm": [
"aiosqlite",
"blobfile",
"chardet",
"chromadb-client",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib", "matplotlib",
"nltk", "nltk",
"numpy", "numpy",
@ -162,7 +65,7 @@
"sentence-transformers --no-deps", "sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu" "torch --index-url https://download.pytorch.org/whl/cpu"
], ],
"tgi": [ "hf-endpoint": [
"aiohttp", "aiohttp",
"aiosqlite", "aiosqlite",
"autoevals", "autoevals",
@ -196,11 +99,11 @@
"sentence-transformers --no-deps", "sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu" "torch --index-url https://download.pytorch.org/whl/cpu"
], ],
"bedrock": [ "hf-serverless": [
"aiohttp",
"aiosqlite", "aiosqlite",
"autoevals", "autoevals",
"blobfile", "blobfile",
"boto3",
"chardet", "chardet",
"chromadb-client", "chromadb-client",
"datasets", "datasets",
@ -208,6 +111,7 @@
"fastapi", "fastapi",
"fire", "fire",
"httpx", "httpx",
"huggingface_hub",
"matplotlib", "matplotlib",
"nltk", "nltk",
"numpy", "numpy",
@ -309,35 +213,6 @@
"sentence-transformers --no-deps", "sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu" "torch --index-url https://download.pytorch.org/whl/cpu"
], ],
"cerebras": [
"aiosqlite",
"blobfile",
"cerebras_cloud_sdk",
"chardet",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"nltk",
"numpy",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
],
"ollama": [ "ollama": [
"aiohttp", "aiohttp",
"aiosqlite", "aiosqlite",
@ -372,7 +247,7 @@
"sentence-transformers --no-deps", "sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu" "torch --index-url https://download.pytorch.org/whl/cpu"
], ],
"hf-endpoint": [ "tgi": [
"aiohttp", "aiohttp",
"aiosqlite", "aiosqlite",
"autoevals", "autoevals",
@ -405,5 +280,130 @@
"uvicorn", "uvicorn",
"sentence-transformers --no-deps", "sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu" "torch --index-url https://download.pytorch.org/whl/cpu"
],
"together": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"together",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
],
"cerebras": [
"aiosqlite",
"blobfile",
"cerebras_cloud_sdk",
"chardet",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"nltk",
"numpy",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
],
"remote-vllm": [
"aiosqlite",
"blobfile",
"chardet",
"chromadb-client",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
],
"vllm-gpu": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"vllm",
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
] ]
} }

View file

@ -1,15 +1,5 @@
---
orphan: true
---
# Cerebras Distribution # Cerebras Distribution
```{toctree}
:maxdepth: 2
:hidden:
self
```
The `llamastack/distribution-cerebras` distribution consists of the following provider configurations. The `llamastack/distribution-cerebras` distribution consists of the following provider configurations.
| API | Provider(s) | | API | Provider(s) |

View file

@ -1,6 +1,5 @@
version: '2' version: '2'
image_name: remote-vllm image_name: remote-vllm
docker_image: null
conda_env: remote-vllm conda_env: remote-vllm
apis: apis:
- agents - agents
@ -8,6 +7,7 @@ apis:
- memory - memory
- safety - safety
- telemetry - telemetry
- tool_runtime
providers: providers:
inference: inference:
- provider_id: vllm-inference - provider_id: vllm-inference
@ -52,33 +52,50 @@ providers:
service_name: ${env.OTEL_SERVICE_NAME:llama-stack} service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
sinks: ${env.TELEMETRY_SINKS:console,sqlite} sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db} sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store: metadata_store:
namespace: null
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}
provider_id: vllm-inference provider_id: vllm-inference
provider_model_id: null
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: ${env.SAFETY_MODEL} model_id: ${env.SAFETY_MODEL}
provider_id: vllm-safety provider_id: vllm-safety
provider_model_id: null
model_type: llm model_type: llm
- metadata: - metadata:
embedding_dimension: 384 embedding_dimension: 384
model_id: all-MiniLM-L6-v2 model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers provider_id: sentence-transformers
provider_model_id: null
model_type: embedding model_type: embedding
shields: shields:
- params: null - shield_id: ${env.SAFETY_MODEL}
shield_id: ${env.SAFETY_MODEL}
provider_id: null
provider_shield_id: null
memory_banks: [] memory_banks: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::memory
provider_id: memory-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter

View file

@ -1,6 +1,5 @@
version: '2' version: '2'
image_name: remote-vllm image_name: remote-vllm
docker_image: null
conda_env: remote-vllm conda_env: remote-vllm
apis: apis:
- agents - agents
@ -8,6 +7,7 @@ apis:
- memory - memory
- safety - safety
- telemetry - telemetry
- tool_runtime
providers: providers:
inference: inference:
- provider_id: vllm-inference - provider_id: vllm-inference
@ -46,24 +46,39 @@ providers:
service_name: ${env.OTEL_SERVICE_NAME:llama-stack} service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
sinks: ${env.TELEMETRY_SINKS:console,sqlite} sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db} sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store: metadata_store:
namespace: null
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}
provider_id: vllm-inference provider_id: vllm-inference
provider_model_id: null
model_type: llm model_type: llm
- metadata: - metadata:
embedding_dimension: 384 embedding_dimension: 384
model_id: all-MiniLM-L6-v2 model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers provider_id: sentence-transformers
provider_model_id: null
model_type: embedding model_type: embedding
shields: [] shields: []
memory_banks: [] memory_banks: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []
tool_groups: []