mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 09:53:45 +00:00
feat: add a configurable category-based logger (#1352)
A self-respecting server needs good observability which starts with
configurable logging. Llama Stack had little until now. This PR adds a
`logcat` facility towards that. Callsites look like:
```python
logcat.debug("inference", f"params to ollama: {params}")
```
- the first parameter is a category. there is a static list of
categories in `llama_stack/logcat.py`
- each category can be associated with a log-level which can be
configured via the `LLAMA_STACK_LOGGING` env var.
- a value `LLAMA_STACK_LOGGING=inference=debug;server=info"` does the
obvious thing. there is a special key called `all` which is an alias for
all categories
## Test Plan
Ran with `LLAMA_STACK_LOGGING="all=debug" llama stack run fireworks` and
saw the following:

Hit it with a client-sdk test case and saw this:

This commit is contained in:
parent
a9a7b11326
commit
754feba61f
12 changed files with 409 additions and 47 deletions
|
|
@ -8,6 +8,7 @@ from typing import AsyncGenerator, List, Optional, Union
|
|||
|
||||
from fireworks.client import Fireworks
|
||||
|
||||
from llama_stack import logcat
|
||||
from llama_stack.apis.common.content_types import (
|
||||
InterleavedContent,
|
||||
InterleavedContentItem,
|
||||
|
|
@ -226,12 +227,14 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv
|
|||
if input_dict["prompt"].startswith("<|begin_of_text|>"):
|
||||
input_dict["prompt"] = input_dict["prompt"][len("<|begin_of_text|>") :]
|
||||
|
||||
return {
|
||||
params = {
|
||||
"model": request.model,
|
||||
**input_dict,
|
||||
"stream": request.stream,
|
||||
**self._build_options(request.sampling_params, request.response_format, request.logprobs),
|
||||
}
|
||||
logcat.debug("inference", f"params to fireworks: {params}")
|
||||
return params
|
||||
|
||||
async def embeddings(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ from typing import AsyncGenerator, List, Optional, Union
|
|||
import httpx
|
||||
from ollama import AsyncClient
|
||||
|
||||
from llama_stack import logcat
|
||||
from llama_stack.apis.common.content_types import (
|
||||
ImageContentItem,
|
||||
InterleavedContent,
|
||||
|
|
@ -203,12 +204,14 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
else:
|
||||
raise ValueError(f"Unknown response format type: {fmt.type}")
|
||||
|
||||
return {
|
||||
params = {
|
||||
"model": request.model,
|
||||
**input_dict,
|
||||
"options": sampling_options,
|
||||
"stream": request.stream,
|
||||
}
|
||||
logcat.debug("inference", f"params to ollama: {params}")
|
||||
return params
|
||||
|
||||
async def _nonstream_chat_completion(self, request: ChatCompletionRequest) -> ChatCompletionResponse:
|
||||
params = await self._get_params(request)
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ from typing import AsyncGenerator, List, Optional, Union
|
|||
|
||||
from together import Together
|
||||
|
||||
from llama_stack import logcat
|
||||
from llama_stack.apis.common.content_types import (
|
||||
InterleavedContent,
|
||||
InterleavedContentItem,
|
||||
|
|
@ -213,12 +214,14 @@ class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProvi
|
|||
assert not media_present, "Together does not support media for Completion requests"
|
||||
input_dict["prompt"] = await completion_request_to_prompt(request)
|
||||
|
||||
return {
|
||||
params = {
|
||||
"model": request.model,
|
||||
**input_dict,
|
||||
"stream": request.stream,
|
||||
**self._build_options(request.sampling_params, request.logprobs, request.response_format),
|
||||
}
|
||||
logcat.debug("inference", f"params to together: {params}")
|
||||
return params
|
||||
|
||||
async def embeddings(
|
||||
self,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue