mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-03 19:57:35 +00:00
# What does this PR do? update Groq inference provider to use OpenAIMixin for openai-compat endpoints changes on api.groq.com - - json_schema is now supported for specific models, see https://console.groq.com/docs/structured-outputs#supported-models - response_format with streaming is now supported for models that support response_format - groq no longer returns a 400 error if tools are provided and tool_choice is not "required" ## Test Plan ``` $ GROQ_API_KEY=... uv run llama stack build --image-type venv --providers inference=remote::groq --run ... $ LLAMA_STACK_CONFIG=http://localhost:8321 uv run --group test pytest -v -ra --text-model groq/llama-3.3-70b-versatile tests/integration/inference/test_openai_completion.py -k 'not store' ... SKIPPED [3] tests/integration/inference/test_openai_completion.py:44: Model groq/llama-3.3-70b-versatile hosted by remote::groq doesn't support OpenAI completions. SKIPPED [3] tests/integration/inference/test_openai_completion.py:94: Model groq/llama-3.3-70b-versatile hosted by remote::groq doesn't support vllm extra_body parameters. SKIPPED [4] tests/integration/inference/test_openai_completion.py:73: Model groq/llama-3.3-70b-versatile hosted by remote::groq doesn't support n param. SKIPPED [1] tests/integration/inference/test_openai_completion.py💯 Model groq/llama-3.3-70b-versatile hosted by remote::groq doesn't support chat completion calls with base64 encoded files. ======================= 8 passed, 11 skipped, 8 deselected, 2 warnings in 5.13s ======================== ``` --------- Co-authored-by: raghotham <rsm@meta.com>
38 lines
1.2 KiB
Python
38 lines
1.2 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
|
|
from llama_stack.providers.remote.inference.groq.config import GroqConfig
|
|
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
|
|
|
from .models import MODEL_ENTRIES
|
|
|
|
|
|
class GroqInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
|
_config: GroqConfig
|
|
|
|
def __init__(self, config: GroqConfig):
|
|
LiteLLMOpenAIMixin.__init__(
|
|
self,
|
|
model_entries=MODEL_ENTRIES,
|
|
litellm_provider_name="groq",
|
|
api_key_from_config=config.api_key,
|
|
provider_data_api_key_field="groq_api_key",
|
|
)
|
|
self.config = config
|
|
|
|
# Delegate the client data handling get_api_key method to LiteLLMOpenAIMixin
|
|
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
|
|
|
def get_base_url(self) -> str:
|
|
return f"{self.config.url}/openai/v1"
|
|
|
|
async def initialize(self):
|
|
await super().initialize()
|
|
|
|
async def shutdown(self):
|
|
await super().shutdown()
|