mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-12 20:12:33 +00:00
Updating since OpenAIMixin is Pydantic Base Model
This commit is contained in:
parent
9a2b2e3b37
commit
0ba4cd460f
2 changed files with 3 additions and 20 deletions
|
|
@ -11,6 +11,6 @@ async def get_adapter_impl(config: RunpodImplConfig, _deps):
|
||||||
from .runpod import RunpodInferenceAdapter
|
from .runpod import RunpodInferenceAdapter
|
||||||
|
|
||||||
assert isinstance(config, RunpodImplConfig), f"Unexpected config type: {type(config)}"
|
assert isinstance(config, RunpodImplConfig), f"Unexpected config type: {type(config)}"
|
||||||
impl = RunpodInferenceAdapter(config)
|
impl = RunpodInferenceAdapter(config=config)
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
||||||
|
|
@ -7,35 +7,24 @@
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from llama_stack.apis.inference import (
|
from llama_stack.apis.inference import (
|
||||||
Inference,
|
|
||||||
OpenAIEmbeddingsResponse,
|
OpenAIEmbeddingsResponse,
|
||||||
OpenAIMessageParam,
|
OpenAIMessageParam,
|
||||||
OpenAIResponseFormatParam,
|
OpenAIResponseFormatParam,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.models import Model
|
from llama_stack.apis.models import Model
|
||||||
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
|
|
||||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
|
|
||||||
from .config import RunpodImplConfig
|
from .config import RunpodImplConfig
|
||||||
|
|
||||||
MODEL_ENTRIES = []
|
|
||||||
|
|
||||||
|
class RunpodInferenceAdapter(OpenAIMixin):
|
||||||
class RunpodInferenceAdapter(
|
|
||||||
OpenAIMixin,
|
|
||||||
ModelRegistryHelper,
|
|
||||||
Inference,
|
|
||||||
):
|
|
||||||
"""
|
"""
|
||||||
Adapter for RunPod's OpenAI-compatible API endpoints.
|
Adapter for RunPod's OpenAI-compatible API endpoints.
|
||||||
Supports VLLM for serverless endpoint self-hosted or public endpoints.
|
Supports VLLM for serverless endpoint self-hosted or public endpoints.
|
||||||
Can work with any runpod endpoints that support OpenAI-compatible API
|
Can work with any runpod endpoints that support OpenAI-compatible API
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, config: RunpodImplConfig) -> None:
|
config: RunpodImplConfig
|
||||||
OpenAIMixin.__init__(self)
|
|
||||||
ModelRegistryHelper.__init__(self, MODEL_ENTRIES)
|
|
||||||
self.config = config
|
|
||||||
|
|
||||||
def get_api_key(self) -> str:
|
def get_api_key(self) -> str:
|
||||||
"""Get API key for OpenAI client."""
|
"""Get API key for OpenAI client."""
|
||||||
|
|
@ -45,12 +34,6 @@ class RunpodInferenceAdapter(
|
||||||
"""Get base URL for OpenAI client."""
|
"""Get base URL for OpenAI client."""
|
||||||
return self.config.url
|
return self.config.url
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
|
||||||
pass
|
|
||||||
|
|
||||||
async def shutdown(self) -> None:
|
|
||||||
pass
|
|
||||||
|
|
||||||
async def openai_chat_completion(
|
async def openai_chat_completion(
|
||||||
self,
|
self,
|
||||||
model: str,
|
model: str,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue