mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-18 05:39:47 +00:00
implement embedding generation in supported inference providers (#589)
This PR adds the ability to generate embeddings in all supported inference providers. ``` pytest -v -s llama_stack/providers/tests/inference/test_embeddings.py -k "bedrock" --inference-model="amazon.titan-embed-text-v2:0" --env EMBEDDING_DIMENSION=1024 pytest -v -s -k "vllm" --inferrence-model="intfloat/e5-mistral-7b-instruct" llama_stack/providers/tests/inference/test_embeddings.py --env EMBEDDING_DIMENSION=4096 --env VLLM_URL="http://localhost:9798/v1" pytest -v -s --inference-model="nomic-ai/nomic-embed-text-v1.5" llama_stack/providers/tests/inference/test_embeddings.py -k "fireworks" --env FIREWORKS_API_KEY=<API_KEY>--env EMBEDDING_DIMENSION=128 pytest -v -s --inference-model="togethercomputer/m2-bert-80M-2k-retrieval" llama_stack/providers/tests/inference/test_embeddings.py -k "together" --env TOGETHER_API_KEY=<API_KEY>--env EMBEDDING_DIMENSION=768 pytest -v -s -k "ollama" --inference-model="all-minilm:v8" llama_stack/providers/tests/inference/test_embeddings.py --env EMBEDDING_DIMENSION=384 torchrun $CONDA_PREFIX/bin/pytest -v -s -k "meta_reference" --inference-model="sentence-transformers/all-MiniLM-L6-v2" llama_stack/providers/tests/inference/test_embeddings.py --env EMBEDDING_DIMENSION=384 ```
This commit is contained in:
parent
6a23f24ee0
commit
d362d2d740
32 changed files with 597 additions and 143 deletions
|
|
@ -39,6 +39,7 @@ def available_providers() -> List[ProviderSpec]:
|
|||
module="llama_stack.providers.inline.memory.faiss",
|
||||
config_class="llama_stack.providers.inline.memory.faiss.FaissImplConfig",
|
||||
deprecation_warning="Please use the `inline::faiss` provider instead.",
|
||||
api_dependencies=[Api.inference],
|
||||
),
|
||||
InlineProviderSpec(
|
||||
api=Api.memory,
|
||||
|
|
@ -46,6 +47,7 @@ def available_providers() -> List[ProviderSpec]:
|
|||
pip_packages=EMBEDDING_DEPS + ["faiss-cpu"],
|
||||
module="llama_stack.providers.inline.memory.faiss",
|
||||
config_class="llama_stack.providers.inline.memory.faiss.FaissImplConfig",
|
||||
api_dependencies=[Api.inference],
|
||||
),
|
||||
remote_provider_spec(
|
||||
Api.memory,
|
||||
|
|
@ -55,6 +57,7 @@ def available_providers() -> List[ProviderSpec]:
|
|||
module="llama_stack.providers.remote.memory.chroma",
|
||||
config_class="llama_stack.providers.remote.memory.chroma.ChromaRemoteImplConfig",
|
||||
),
|
||||
api_dependencies=[Api.inference],
|
||||
),
|
||||
InlineProviderSpec(
|
||||
api=Api.memory,
|
||||
|
|
@ -71,6 +74,7 @@ def available_providers() -> List[ProviderSpec]:
|
|||
module="llama_stack.providers.remote.memory.pgvector",
|
||||
config_class="llama_stack.providers.remote.memory.pgvector.PGVectorConfig",
|
||||
),
|
||||
api_dependencies=[Api.inference],
|
||||
),
|
||||
remote_provider_spec(
|
||||
Api.memory,
|
||||
|
|
@ -81,6 +85,7 @@ def available_providers() -> List[ProviderSpec]:
|
|||
config_class="llama_stack.providers.remote.memory.weaviate.WeaviateConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.memory.weaviate.WeaviateRequestProviderData",
|
||||
),
|
||||
api_dependencies=[Api.inference],
|
||||
),
|
||||
remote_provider_spec(
|
||||
api=Api.memory,
|
||||
|
|
@ -90,6 +95,7 @@ def available_providers() -> List[ProviderSpec]:
|
|||
module="llama_stack.providers.remote.memory.sample",
|
||||
config_class="llama_stack.providers.remote.memory.sample.SampleConfig",
|
||||
),
|
||||
api_dependencies=[],
|
||||
),
|
||||
remote_provider_spec(
|
||||
Api.memory,
|
||||
|
|
@ -99,5 +105,6 @@ def available_providers() -> List[ProviderSpec]:
|
|||
module="llama_stack.providers.remote.memory.qdrant",
|
||||
config_class="llama_stack.providers.remote.memory.qdrant.QdrantConfig",
|
||||
),
|
||||
api_dependencies=[Api.inference],
|
||||
),
|
||||
]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue