mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 09:53:45 +00:00
Refactor list_models method
This commit is contained in:
parent
8cae4ddc2a
commit
75e50e87c8
1 changed files with 8 additions and 27 deletions
|
|
@ -274,42 +274,23 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
|
||||||
models = []
|
models = []
|
||||||
for model_spec in self._get_model_specs():
|
for model_spec in self._get_model_specs():
|
||||||
functions = [f["id"] for f in model_spec.get("functions", [])]
|
functions = [f["id"] for f in model_spec.get("functions", [])]
|
||||||
# Format: {"embedding_dimension": 1536, "context_length": 8192}
|
|
||||||
|
|
||||||
# Example of an embedding model:
|
|
||||||
# {'model_id': 'ibm/granite-embedding-278m-multilingual',
|
|
||||||
# 'label': 'granite-embedding-278m-multilingual',
|
|
||||||
# 'model_limits': {'max_sequence_length': 512, 'embedding_dimension': 768},
|
|
||||||
# ...
|
|
||||||
provider_resource_id = f"{self.__provider_id__}/{model_spec['model_id']}"
|
provider_resource_id = f"{self.__provider_id__}/{model_spec['model_id']}"
|
||||||
if "embedding" in functions:
|
if "embedding" in functions:
|
||||||
embedding_dimension = model_spec.get("model_limits", {}).get("embedding_dimension", 0)
|
model_type = ModelType.embedding
|
||||||
context_length = model_spec.get("model_limits", {}).get("max_sequence_length", 0)
|
elif "text_chat" in functions:
|
||||||
embedding_metadata = {
|
model_type = ModelType.llm
|
||||||
"embedding_dimension": embedding_dimension,
|
else:
|
||||||
"context_length": context_length,
|
model_type = None
|
||||||
}
|
|
||||||
model = Model(
|
if model_type is not None:
|
||||||
identifier=model_spec["model_id"],
|
|
||||||
provider_resource_id=provider_resource_id,
|
|
||||||
provider_id=self.__provider_id__,
|
|
||||||
metadata=embedding_metadata,
|
|
||||||
model_type=ModelType.embedding,
|
|
||||||
)
|
|
||||||
self._model_cache[provider_resource_id] = model
|
|
||||||
models.append(model)
|
|
||||||
if "text_chat" in functions:
|
|
||||||
model = Model(
|
model = Model(
|
||||||
identifier=model_spec["model_id"],
|
identifier=model_spec["model_id"],
|
||||||
provider_resource_id=provider_resource_id,
|
provider_resource_id=provider_resource_id,
|
||||||
provider_id=self.__provider_id__,
|
provider_id=self.__provider_id__,
|
||||||
metadata={},
|
metadata={},
|
||||||
model_type=ModelType.llm,
|
model_type=model_type,
|
||||||
)
|
)
|
||||||
# In theory, I guess it is possible that a model could be both an embedding model and a text chat model.
|
|
||||||
# In that case, the cache will record the generator Model object, and the list which we return will have
|
|
||||||
# both the generator Model object and the text chat Model object. That's fine because the cache is
|
|
||||||
# only used for check_model_availability() anyway.
|
|
||||||
self._model_cache[provider_resource_id] = model
|
self._model_cache[provider_resource_id] = model
|
||||||
models.append(model)
|
models.append(model)
|
||||||
return models
|
return models
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue