forked from phoenix-oss/llama-stack-mirror
chore: remove llama_models.llama3.api imports from providers (#1107)
There should be a choke-point for llama3.api imports -- this is the prompt adapter. Creating a ChatFormat() object on demand is inexpensive. The underlying Tokenizer is a singleton anyway.
This commit is contained in:
parent
e9b8259cf9
commit
cdcbeb005b
13 changed files with 77 additions and 113 deletions
|
@ -7,8 +7,6 @@
|
|||
import json
|
||||
from typing import AsyncGenerator
|
||||
|
||||
from llama_models.llama3.api.chat_format import ChatFormat
|
||||
from llama_models.llama3.api.tokenizer import Tokenizer
|
||||
from openai import OpenAI
|
||||
|
||||
from llama_stack.apis.common.content_types import (
|
||||
|
@ -38,13 +36,8 @@ from .models import MODEL_ALIASES
|
|||
|
||||
class SambaNovaInferenceAdapter(ModelRegistryHelper, Inference):
|
||||
def __init__(self, config: SambaNovaImplConfig) -> None:
|
||||
ModelRegistryHelper.__init__(
|
||||
self,
|
||||
model_aliases=MODEL_ALIASES,
|
||||
)
|
||||
|
||||
ModelRegistryHelper.__init__(self, model_aliases=MODEL_ALIASES)
|
||||
self.config = config
|
||||
self.formatter = ChatFormat(Tokenizer.get_instance())
|
||||
|
||||
async def initialize(self) -> None:
|
||||
return
|
||||
|
@ -120,7 +113,7 @@ class SambaNovaInferenceAdapter(ModelRegistryHelper, Inference):
|
|||
yield chunk
|
||||
|
||||
stream = _to_async_generator()
|
||||
async for chunk in process_chat_completion_stream_response(stream, self.formatter, request):
|
||||
async for chunk in process_chat_completion_stream_response(stream, request):
|
||||
yield chunk
|
||||
|
||||
async def embeddings(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue