mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 12:07:34 +00:00
Several smaller fixes to make adapters work
Also, reorganized the pattern of __init__ inside providers so configuration can stay lightweight
This commit is contained in:
parent
2a1552a5eb
commit
45987996c4
23 changed files with 164 additions and 160 deletions
|
@ -6,12 +6,11 @@
|
|||
|
||||
import asyncio
|
||||
|
||||
from typing import AsyncIterator, Dict, Union
|
||||
from typing import AsyncIterator, Union
|
||||
|
||||
from llama_models.llama3.api.datatypes import StopReason
|
||||
from llama_models.sku_list import resolve_model
|
||||
|
||||
from llama_toolchain.distribution.datatypes import Api, ProviderSpec
|
||||
from llama_toolchain.inference.api import (
|
||||
ChatCompletionRequest,
|
||||
ChatCompletionResponse,
|
||||
|
@ -27,18 +26,6 @@ from .config import MetaReferenceImplConfig
|
|||
from .model_parallel import LlamaModelParallelGenerator
|
||||
|
||||
|
||||
async def get_provider_impl(
|
||||
config: MetaReferenceImplConfig, _deps: Dict[Api, ProviderSpec]
|
||||
):
|
||||
assert isinstance(
|
||||
config, MetaReferenceImplConfig
|
||||
), f"Unexpected config type: {type(config)}"
|
||||
|
||||
impl = MetaReferenceInferenceImpl(config)
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
||||
|
||||
# there's a single model parallel process running serving the model. for now,
|
||||
# we don't support multiple concurrent requests to this process.
|
||||
SEMAPHORE = asyncio.Semaphore(1)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue