mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 09:53:45 +00:00
fix: Avoid model_limits KeyError (#4060)
# What does this PR do? It avoids model_limit KeyError while trying to get embedding models for Watsonx <!-- If resolving an issue, uncomment and update the line below --> Closes https://github.com/llamastack/llama-stack/issues/4059 ## Test Plan <!-- Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.* --> Start server with watsonx distro: ```bash llama stack list-deps watsonx | xargs -L1 uv pip install uv run llama stack run watsonx ``` Run ```python client = LlamaStackClient(base_url=base_url) client.models.list() ``` Check if there is any embedding model available (currently there is not a single one)
This commit is contained in:
parent
ba50790a28
commit
07c28cd519
1 changed files with 2 additions and 6 deletions
|
|
@ -283,8 +283,8 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
|
|||
# ...
|
||||
provider_resource_id = f"{self.__provider_id__}/{model_spec['model_id']}"
|
||||
if "embedding" in functions:
|
||||
embedding_dimension = model_spec["model_limits"]["embedding_dimension"]
|
||||
context_length = model_spec["model_limits"]["max_sequence_length"]
|
||||
embedding_dimension = model_spec.get("model_limits", {}).get("embedding_dimension", 0)
|
||||
context_length = model_spec.get("model_limits", {}).get("max_sequence_length", 0)
|
||||
embedding_metadata = {
|
||||
"embedding_dimension": embedding_dimension,
|
||||
"context_length": context_length,
|
||||
|
|
@ -306,10 +306,6 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
|
|||
metadata={},
|
||||
model_type=ModelType.llm,
|
||||
)
|
||||
# In theory, I guess it is possible that a model could be both an embedding model and a text chat model.
|
||||
# In that case, the cache will record the generator Model object, and the list which we return will have
|
||||
# both the generator Model object and the text chat Model object. That's fine because the cache is
|
||||
# only used for check_model_availability() anyway.
|
||||
self._model_cache[provider_resource_id] = model
|
||||
models.append(model)
|
||||
return models
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue