mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-28 02:53:30 +00:00
Inference to use provider resource id to register and validate (#428)
This PR changes the way model id gets translated to the final model name that gets passed through the provider. Major changes include: 1) Providers are responsible for registering an object and as part of the registration returning the object with the correct provider specific name of the model provider_resource_id 2) To help with the common look ups different names a new ModelLookup class is created. Tested all inference providers including together, fireworks, vllm, ollama, meta reference and bedrock
This commit is contained in:
parent
e51107e019
commit
fdff24e77a
21 changed files with 460 additions and 290 deletions
|
@ -96,7 +96,7 @@ class TestInference:
|
|||
response = await inference_impl.completion(
|
||||
content="Micheael Jordan is born in ",
|
||||
stream=False,
|
||||
model=inference_model,
|
||||
model_id=inference_model,
|
||||
sampling_params=SamplingParams(
|
||||
max_tokens=50,
|
||||
),
|
||||
|
@ -110,7 +110,7 @@ class TestInference:
|
|||
async for r in await inference_impl.completion(
|
||||
content="Roses are red,",
|
||||
stream=True,
|
||||
model=inference_model,
|
||||
model_id=inference_model,
|
||||
sampling_params=SamplingParams(
|
||||
max_tokens=50,
|
||||
),
|
||||
|
@ -171,7 +171,7 @@ class TestInference:
|
|||
):
|
||||
inference_impl, _ = inference_stack
|
||||
response = await inference_impl.chat_completion(
|
||||
model=inference_model,
|
||||
model_id=inference_model,
|
||||
messages=sample_messages,
|
||||
stream=False,
|
||||
**common_params,
|
||||
|
@ -204,7 +204,7 @@ class TestInference:
|
|||
num_seasons_in_nba: int
|
||||
|
||||
response = await inference_impl.chat_completion(
|
||||
model=inference_model,
|
||||
model_id=inference_model,
|
||||
messages=[
|
||||
SystemMessage(content="You are a helpful assistant."),
|
||||
UserMessage(content="Please give me information about Michael Jordan."),
|
||||
|
@ -227,7 +227,7 @@ class TestInference:
|
|||
assert answer.num_seasons_in_nba == 15
|
||||
|
||||
response = await inference_impl.chat_completion(
|
||||
model=inference_model,
|
||||
model_id=inference_model,
|
||||
messages=[
|
||||
SystemMessage(content="You are a helpful assistant."),
|
||||
UserMessage(content="Please give me information about Michael Jordan."),
|
||||
|
@ -250,7 +250,7 @@ class TestInference:
|
|||
response = [
|
||||
r
|
||||
async for r in await inference_impl.chat_completion(
|
||||
model=inference_model,
|
||||
model_id=inference_model,
|
||||
messages=sample_messages,
|
||||
stream=True,
|
||||
**common_params,
|
||||
|
@ -286,7 +286,7 @@ class TestInference:
|
|||
]
|
||||
|
||||
response = await inference_impl.chat_completion(
|
||||
model=inference_model,
|
||||
model_id=inference_model,
|
||||
messages=messages,
|
||||
tools=[sample_tool_definition],
|
||||
stream=False,
|
||||
|
@ -327,7 +327,7 @@ class TestInference:
|
|||
response = [
|
||||
r
|
||||
async for r in await inference_impl.chat_completion(
|
||||
model=inference_model,
|
||||
model_id=inference_model,
|
||||
messages=messages,
|
||||
tools=[sample_tool_definition],
|
||||
stream=True,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue