mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-29 16:54:44 +00:00
feat: remote ramalama provider implementation
Implement remote ramalama provider using AsyncOpenAI as the client since ramalama doesn't have its own Async library. Ramalama is similar to ollama, as it is a lightweight local inference server. However, it runs by default in a containerized mode. RAMALAMA_URL is http://localhost:8080 by default Signed-off-by: Charlie Doern <cdoern@redhat.com>
This commit is contained in:
parent
94f83382eb
commit
4de45560bf
8 changed files with 680 additions and 0 deletions
|
|
@ -260,6 +260,7 @@ exclude = [
|
|||
"^llama_stack/providers/remote/inference/nvidia/",
|
||||
"^llama_stack/providers/remote/inference/openai/",
|
||||
"^llama_stack/providers/remote/inference/passthrough/",
|
||||
"^llama_stack/providers/remote/inference/ramalama/",
|
||||
"^llama_stack/providers/remote/inference/runpod/",
|
||||
"^llama_stack/providers/remote/inference/sambanova/",
|
||||
"^llama_stack/providers/remote/inference/sample/",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue