mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-24 00:47:00 +00:00
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 0s
Vector IO Integration Tests / test-matrix (push) Failing after 4s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 4s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 3s
Unit Tests / unit-tests (3.12) (push) Failing after 3s
Unit Tests / unit-tests (3.13) (push) Failing after 3s
Test Llama Stack Build / build (push) Failing after 3s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 1s
Integration Tests (Replay) / Integration Tests (, , , client=, ) (push) Failing after 3s
Test Llama Stack Build / generate-matrix (push) Successful in 3s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
Test Llama Stack Build / build-single-provider (push) Failing after 4s
Python Package Build Test / build (3.12) (push) Failing after 1s
API Conformance Tests / check-schema-compatibility (push) Successful in 7s
Python Package Build Test / build (3.13) (push) Failing after 1s
Test External API and Providers / test-external (venv) (push) Failing after 4s
UI Tests / ui-tests (22) (push) Successful in 39s
Pre-commit / pre-commit (push) Successful in 1m18s
# What does this PR do? APIs removed: - POST /v1/batch-inference/completion - POST /v1/batch-inference/chat-completion - POST /v1/inference/batch-completion - POST /v1/inference/batch-chat-completion note - - batch-completion & batch-chat-completion were only implemented for inference=inline::meta-reference - batch-inference were not implemented
76 lines
3 KiB
Python
76 lines
3 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
from llama_stack.log import get_logger
|
|
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
|
|
|
from .config import OpenAIConfig
|
|
|
|
logger = get_logger(name=__name__, category="inference::openai")
|
|
|
|
|
|
#
|
|
# This OpenAI adapter implements Inference methods using two mixins -
|
|
#
|
|
# | Inference Method | Implementation Source |
|
|
# |----------------------------|--------------------------|
|
|
# | completion | LiteLLMOpenAIMixin |
|
|
# | chat_completion | LiteLLMOpenAIMixin |
|
|
# | embedding | LiteLLMOpenAIMixin |
|
|
# | openai_completion | OpenAIMixin |
|
|
# | openai_chat_completion | OpenAIMixin |
|
|
# | openai_embeddings | OpenAIMixin |
|
|
#
|
|
class OpenAIInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
|
"""
|
|
OpenAI Inference Adapter for Llama Stack.
|
|
|
|
Note: The inheritance order is important here. OpenAIMixin must come before
|
|
LiteLLMOpenAIMixin to ensure that OpenAIMixin.check_model_availability()
|
|
is used instead of ModelRegistryHelper.check_model_availability().
|
|
|
|
- OpenAIMixin.check_model_availability() queries the OpenAI API to check if a model exists
|
|
- ModelRegistryHelper.check_model_availability() (inherited by LiteLLMOpenAIMixin) just returns False and shows a warning
|
|
"""
|
|
|
|
embedding_model_metadata = {
|
|
"text-embedding-3-small": {"embedding_dimension": 1536, "context_length": 8192},
|
|
"text-embedding-3-large": {"embedding_dimension": 3072, "context_length": 8192},
|
|
}
|
|
|
|
def __init__(self, config: OpenAIConfig) -> None:
|
|
LiteLLMOpenAIMixin.__init__(
|
|
self,
|
|
litellm_provider_name="openai",
|
|
api_key_from_config=config.api_key,
|
|
provider_data_api_key_field="openai_api_key",
|
|
)
|
|
self.config = config
|
|
# we set is_openai_compat so users can use the canonical
|
|
# openai model names like "gpt-4" or "gpt-3.5-turbo"
|
|
# and the model name will be translated to litellm's
|
|
# "openai/gpt-4" or "openai/gpt-3.5-turbo" transparently.
|
|
# if we do not set this, users will be exposed to the
|
|
# litellm specific model names, an abstraction leak.
|
|
self.is_openai_compat = True
|
|
|
|
# Delegate the client data handling get_api_key method to LiteLLMOpenAIMixin
|
|
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
|
|
|
def get_base_url(self) -> str:
|
|
"""
|
|
Get the OpenAI API base URL.
|
|
|
|
Returns the OpenAI API base URL from the configuration.
|
|
"""
|
|
return self.config.base_url
|
|
|
|
async def initialize(self) -> None:
|
|
await super().initialize()
|
|
|
|
async def shutdown(self) -> None:
|
|
await super().shutdown()
|