mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-11 13:44:38 +00:00
# What does this PR do? Converts openai(_chat)_completions params to pydantic BaseModel to reduce code duplication across all providers. ## Test Plan CI --- [//]: # (BEGIN SAPLING FOOTER) Stack created with [Sapling](https://sapling-scm.com). Best reviewed with [ReviewStack](https://reviewstack.dev/llamastack/llama-stack/pull/3761). * #3777 * __->__ #3761
42 lines
1.3 KiB
Python
42 lines
1.3 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
from collections.abc import AsyncIterator
|
|
|
|
from llama_stack.apis.inference import (
|
|
OpenAIChatCompletion,
|
|
OpenAIChatCompletionChunk,
|
|
OpenAIChatCompletionRequest,
|
|
)
|
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
|
|
|
from .config import RunpodImplConfig
|
|
|
|
|
|
class RunpodInferenceAdapter(OpenAIMixin):
|
|
"""
|
|
Adapter for RunPod's OpenAI-compatible API endpoints.
|
|
Supports VLLM for serverless endpoint self-hosted or public endpoints.
|
|
Can work with any runpod endpoints that support OpenAI-compatible API
|
|
"""
|
|
|
|
config: RunpodImplConfig
|
|
|
|
def get_base_url(self) -> str:
|
|
"""Get base URL for OpenAI client."""
|
|
return self.config.url
|
|
|
|
async def openai_chat_completion(
|
|
self,
|
|
params: OpenAIChatCompletionRequest,
|
|
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
|
"""Override to add RunPod-specific stream_options requirement."""
|
|
params = params.model_copy()
|
|
|
|
if params.stream and not params.stream_options:
|
|
params.stream_options = {"include_usage": True}
|
|
|
|
return await super().openai_chat_completion(params)
|