mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-25 21:57:45 +00:00
Some checks failed
Coverage Badge / unit-tests (push) Failing after 3s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 6s
Python Package Build Test / build (3.12) (push) Failing after 3s
Vector IO Integration Tests / test-matrix (3.12, inline::milvus) (push) Failing after 6s
Integration Tests / discover-tests (push) Successful in 7s
Vector IO Integration Tests / test-matrix (3.12, remote::chromadb) (push) Failing after 6s
Python Package Build Test / build (3.13) (push) Failing after 2s
Vector IO Integration Tests / test-matrix (3.13, inline::milvus) (push) Failing after 6s
Vector IO Integration Tests / test-matrix (3.13, inline::sqlite-vec) (push) Failing after 5s
Vector IO Integration Tests / test-matrix (3.12, inline::faiss) (push) Failing after 8s
Vector IO Integration Tests / test-matrix (3.13, inline::faiss) (push) Failing after 9s
Unit Tests / unit-tests (3.12) (push) Failing after 8s
Vector IO Integration Tests / test-matrix (3.12, inline::sqlite-vec) (push) Failing after 11s
Test External Providers / test-external-providers (venv) (push) Failing after 8s
Vector IO Integration Tests / test-matrix (3.13, remote::pgvector) (push) Failing after 12s
Vector IO Integration Tests / test-matrix (3.13, remote::chromadb) (push) Failing after 9s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 17s
Unit Tests / unit-tests (3.13) (push) Failing after 12s
Update ReadTheDocs / update-readthedocs (push) Failing after 11s
Vector IO Integration Tests / test-matrix (3.12, remote::pgvector) (push) Failing after 16s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 18s
Integration Tests / test-matrix (push) Failing after 18s
Pre-commit / pre-commit (push) Successful in 1m14s
# What does this PR do? add an `OpenAIMixin` for use by inference providers who remote endpoints support an OpenAI compatible API. use is demonstrated by refactoring - OpenAIInferenceAdapter - NVIDIAInferenceAdapter (adds embedding support) - LlamaCompatInferenceAdapter ## Test Plan existing unit and integration tests
75 lines
2.9 KiB
Python
75 lines
2.9 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
import logging
|
|
|
|
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
|
|
|
from .config import OpenAIConfig
|
|
from .models import MODEL_ENTRIES
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
#
|
|
# This OpenAI adapter implements Inference methods using two mixins -
|
|
#
|
|
# | Inference Method | Implementation Source |
|
|
# |----------------------------|--------------------------|
|
|
# | completion | LiteLLMOpenAIMixin |
|
|
# | chat_completion | LiteLLMOpenAIMixin |
|
|
# | embedding | LiteLLMOpenAIMixin |
|
|
# | batch_completion | LiteLLMOpenAIMixin |
|
|
# | batch_chat_completion | LiteLLMOpenAIMixin |
|
|
# | openai_completion | OpenAIMixin |
|
|
# | openai_chat_completion | OpenAIMixin |
|
|
# | openai_embeddings | OpenAIMixin |
|
|
#
|
|
class OpenAIInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
|
"""
|
|
OpenAI Inference Adapter for Llama Stack.
|
|
|
|
Note: The inheritance order is important here. OpenAIMixin must come before
|
|
LiteLLMOpenAIMixin to ensure that OpenAIMixin.check_model_availability()
|
|
is used instead of ModelRegistryHelper.check_model_availability().
|
|
|
|
- OpenAIMixin.check_model_availability() queries the OpenAI API to check if a model exists
|
|
- ModelRegistryHelper.check_model_availability() (inherited by LiteLLMOpenAIMixin) just returns False and shows a warning
|
|
"""
|
|
|
|
def __init__(self, config: OpenAIConfig) -> None:
|
|
LiteLLMOpenAIMixin.__init__(
|
|
self,
|
|
MODEL_ENTRIES,
|
|
api_key_from_config=config.api_key,
|
|
provider_data_api_key_field="openai_api_key",
|
|
)
|
|
self.config = config
|
|
# we set is_openai_compat so users can use the canonical
|
|
# openai model names like "gpt-4" or "gpt-3.5-turbo"
|
|
# and the model name will be translated to litellm's
|
|
# "openai/gpt-4" or "openai/gpt-3.5-turbo" transparently.
|
|
# if we do not set this, users will be exposed to the
|
|
# litellm specific model names, an abstraction leak.
|
|
self.is_openai_compat = True
|
|
|
|
# Delegate the client data handling get_api_key method to LiteLLMOpenAIMixin
|
|
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
|
|
|
def get_base_url(self) -> str:
|
|
"""
|
|
Get the OpenAI API base URL.
|
|
|
|
Returns the standard OpenAI API base URL for direct OpenAI API calls.
|
|
"""
|
|
return "https://api.openai.com/v1"
|
|
|
|
async def initialize(self) -> None:
|
|
await super().initialize()
|
|
|
|
async def shutdown(self) -> None:
|
|
await super().shutdown()
|