diff --git a/llama_stack/providers/remote/inference/fireworks/fireworks.py b/llama_stack/providers/remote/inference/fireworks/fireworks.py index 3f2ee91e0..3f455da3c 100644 --- a/llama_stack/providers/remote/inference/fireworks/fireworks.py +++ b/llama_stack/providers/remote/inference/fireworks/fireworks.py @@ -237,8 +237,8 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv model = await self.model_store.get_model(model_id) kwargs = {} - if model.metadata.get("embedding_dimensions"): - kwargs["dimensions"] = model.metadata.get("embedding_dimensions") + if model.metadata.get("embedding_dimension"): + kwargs["dimensions"] = model.metadata.get("embedding_dimension") assert all(not content_has_media(content) for content in contents), ( "Fireworks does not support media for embeddings" ) diff --git a/llama_stack/providers/remote/inference/fireworks/models.py b/llama_stack/providers/remote/inference/fireworks/models.py index e71979eae..c90f632ff 100644 --- a/llama_stack/providers/remote/inference/fireworks/models.py +++ b/llama_stack/providers/remote/inference/fireworks/models.py @@ -56,7 +56,7 @@ MODEL_ENTRIES = [ provider_model_id="nomic-ai/nomic-embed-text-v1.5", model_type=ModelType.embedding, metadata={ - "embedding_dimensions": 768, + "embedding_dimension": 768, "context_length": 8192, }, ), diff --git a/llama_stack/providers/remote/inference/ollama/models.py b/llama_stack/providers/remote/inference/ollama/models.py index e0bf269db..be556762c 100644 --- a/llama_stack/providers/remote/inference/ollama/models.py +++ b/llama_stack/providers/remote/inference/ollama/models.py @@ -88,7 +88,7 @@ model_entries = [ aliases=["all-minilm"], model_type=ModelType.embedding, metadata={ - "embedding_dimensions": 384, + "embedding_dimension": 384, "context_length": 512, }, ), @@ -96,7 +96,7 @@ model_entries = [ provider_model_id="nomic-embed-text", model_type=ModelType.embedding, metadata={ - "embedding_dimensions": 768, + "embedding_dimension": 768, "context_length": 8192, }, ), diff --git a/llama_stack/providers/remote/inference/together/models.py b/llama_stack/providers/remote/inference/together/models.py index 6ee31fa78..63d3d94b5 100644 --- a/llama_stack/providers/remote/inference/together/models.py +++ b/llama_stack/providers/remote/inference/together/models.py @@ -52,7 +52,7 @@ MODEL_ENTRIES = [ provider_model_id="togethercomputer/m2-bert-80M-8k-retrieval", model_type=ModelType.embedding, metadata={ - "embedding_dimensions": 768, + "embedding_dimension": 768, "context_length": 8192, }, ), @@ -60,7 +60,7 @@ MODEL_ENTRIES = [ provider_model_id="togethercomputer/m2-bert-80M-32k-retrieval", model_type=ModelType.embedding, metadata={ - "embedding_dimensions": 768, + "embedding_dimension": 768, "context_length": 32768, }, ), diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index 4e5de3933..b37b0d305 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -375,8 +375,8 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate): kwargs = {} assert model.model_type == ModelType.embedding - assert model.metadata.get("embedding_dimensions") - kwargs["dimensions"] = model.metadata.get("embedding_dimensions") + assert model.metadata.get("embedding_dimension") + kwargs["dimensions"] = model.metadata.get("embedding_dimension") assert all(not content_has_media(content) for content in contents), "VLLM does not support media for embeddings" response = self.client.embeddings.create( model=model.provider_resource_id, diff --git a/llama_stack/templates/fireworks/run-with-safety.yaml b/llama_stack/templates/fireworks/run-with-safety.yaml index 1ed5540d8..6f622c7d9 100644 --- a/llama_stack/templates/fireworks/run-with-safety.yaml +++ b/llama_stack/templates/fireworks/run-with-safety.yaml @@ -150,7 +150,7 @@ models: provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision model_type: llm - metadata: - embedding_dimensions: 768 + embedding_dimension: 768 context_length: 8192 model_id: nomic-ai/nomic-embed-text-v1.5 provider_id: fireworks diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml index 04d55eba8..e6d21d10d 100644 --- a/llama_stack/templates/fireworks/run.yaml +++ b/llama_stack/templates/fireworks/run.yaml @@ -144,7 +144,7 @@ models: provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision model_type: llm - metadata: - embedding_dimensions: 768 + embedding_dimension: 768 context_length: 8192 model_id: nomic-ai/nomic-embed-text-v1.5 provider_id: fireworks diff --git a/llama_stack/templates/together/run-with-safety.yaml b/llama_stack/templates/together/run-with-safety.yaml index 837709579..9193a3ef6 100644 --- a/llama_stack/templates/together/run-with-safety.yaml +++ b/llama_stack/templates/together/run-with-safety.yaml @@ -145,14 +145,14 @@ models: provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo model_type: llm - metadata: - embedding_dimensions: 768 + embedding_dimension: 768 context_length: 8192 model_id: togethercomputer/m2-bert-80M-8k-retrieval provider_id: together provider_model_id: togethercomputer/m2-bert-80M-8k-retrieval model_type: embedding - metadata: - embedding_dimensions: 768 + embedding_dimension: 768 context_length: 32768 model_id: togethercomputer/m2-bert-80M-32k-retrieval provider_id: together diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml index 28ff36cff..32ddf7b16 100644 --- a/llama_stack/templates/together/run.yaml +++ b/llama_stack/templates/together/run.yaml @@ -139,14 +139,14 @@ models: provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo model_type: llm - metadata: - embedding_dimensions: 768 + embedding_dimension: 768 context_length: 8192 model_id: togethercomputer/m2-bert-80M-8k-retrieval provider_id: together provider_model_id: togethercomputer/m2-bert-80M-8k-retrieval model_type: embedding - metadata: - embedding_dimensions: 768 + embedding_dimension: 768 context_length: 32768 model_id: togethercomputer/m2-bert-80M-32k-retrieval provider_id: together