mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-17 11:12:36 +00:00
working fireworks and together
This commit is contained in:
parent
25d8ab0e14
commit
8de4cee373
8 changed files with 205 additions and 86 deletions
|
|
@ -105,9 +105,8 @@ class InferenceRouter(Inference):
|
|||
stream: Optional[bool] = False,
|
||||
logprobs: Optional[LogProbConfig] = None,
|
||||
) -> AsyncGenerator:
|
||||
model = await self.routing_table.get_model(model_id)
|
||||
params = dict(
|
||||
model_id=model.provider_resource_id,
|
||||
model_id=model_id,
|
||||
messages=messages,
|
||||
sampling_params=sampling_params,
|
||||
tools=tools or [],
|
||||
|
|
@ -132,10 +131,9 @@ class InferenceRouter(Inference):
|
|||
stream: Optional[bool] = False,
|
||||
logprobs: Optional[LogProbConfig] = None,
|
||||
) -> AsyncGenerator:
|
||||
model = await self.routing_table.get_model(model_id)
|
||||
provider = self.routing_table.get_provider_impl(model_id)
|
||||
params = dict(
|
||||
model_id=model.provider_resource_id,
|
||||
model_id=model_id,
|
||||
content=content,
|
||||
sampling_params=sampling_params,
|
||||
response_format=response_format,
|
||||
|
|
@ -152,9 +150,8 @@ class InferenceRouter(Inference):
|
|||
model_id: str,
|
||||
contents: List[InterleavedTextMedia],
|
||||
) -> EmbeddingsResponse:
|
||||
model = await self.routing_table.get_model(model_id)
|
||||
return await self.routing_table.get_provider_impl(model_id).embeddings(
|
||||
model_id=model.provider_resource_id,
|
||||
model_id=model_id,
|
||||
contents=contents,
|
||||
)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue