Merge branch 'main' into meta-ref-vision-fix-2

This commit is contained in:
Xi Yan 2024-12-30 16:26:15 -08:00 committed by GitHub
commit 2543b54a6a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 23 additions and 9 deletions

View file

@ -43,7 +43,7 @@ Configuration for this is available at `distributions/ollama/run.yaml`.
### 3. Use the Llama Stack client SDK
You can interact with the Llama Stack server using various client SDKs. We will use the Python SDK which you can install using:
You can interact with the Llama Stack server using various client SDKs. We will use the Python SDK which you can install using the following command. Note that you must be using Python 3.10 or newer:
```bash
pip install llama-stack-client
```
@ -62,7 +62,7 @@ llama-stack-client models list
You can test basic Llama inference completion using the CLI too.
```bash
llama-stack-client
llama-stack-client \
inference chat-completion \
--message "hello, what model are you?"
```

View file

@ -81,14 +81,28 @@ async def agents_stack(request, inference_model, safety_shield):
inference_models = (
inference_model if isinstance(inference_model, list) else [inference_model]
)
models = [
# NOTE: meta-reference provider needs 1 provider per model, lookup provider_id from provider config
model_to_provider_id = {}
for provider in providers["inference"]:
if "model" in provider.config:
model_to_provider_id[provider.config["model"]] = provider.provider_id
models = []
for model in inference_models:
if model in model_to_provider_id:
provider_id = model_to_provider_id[model]
else:
provider_id = providers["inference"][0].provider_id
models.append(
ModelInput(
model_id=model,
model_type=ModelType.llm,
provider_id=providers["inference"][i].provider_id,
provider_id=provider_id,
)
for i, model in enumerate(inference_models)
]
)
models.append(
ModelInput(
model_id="all-MiniLM-L6-v2",