mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-18 15:49:49 +00:00
Merge branch 'meta-llama:main' into main
This commit is contained in:
commit
fced5ec6dd
208 changed files with 7952 additions and 1104 deletions
|
|
@ -3,3 +3,5 @@
|
|||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
#
|
||||
# from .distribution.library_client import LlamaStackAsLibraryClient, AsyncLlamaStackAsLibraryClient
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ from llama_models.schema_utils import json_schema_type, webmethod
|
|||
from pydantic import BaseModel, ConfigDict, Field
|
||||
from typing_extensions import Annotated
|
||||
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
from llama_models.llama3.api.datatypes import * # noqa: F403
|
||||
from llama_stack.apis.common.deployment_types import * # noqa: F403
|
||||
from llama_stack.apis.inference import * # noqa: F403
|
||||
|
|
@ -339,9 +340,8 @@ class AgentTurnResponseStepProgressPayload(BaseModel):
|
|||
step_type: StepType
|
||||
step_id: str
|
||||
|
||||
model_response_text_delta: Optional[str] = None
|
||||
text_delta: Optional[str] = None
|
||||
tool_call_delta: Optional[ToolCallDelta] = None
|
||||
tool_response_text_delta: Optional[str] = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
|
|
@ -418,6 +418,7 @@ class AgentStepResponse(BaseModel):
|
|||
|
||||
|
||||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class Agents(Protocol):
|
||||
@webmethod(route="/agents/create")
|
||||
async def create_agent(
|
||||
|
|
|
|||
|
|
@ -121,7 +121,7 @@ class EventLogger:
|
|||
else:
|
||||
yield event, LogEvent(
|
||||
role=None,
|
||||
content=event.payload.model_response_text_delta,
|
||||
content=event.payload.text_delta,
|
||||
end="",
|
||||
color="yellow",
|
||||
)
|
||||
|
|
@ -171,12 +171,14 @@ class EventLogger:
|
|||
and event_type == EventType.step_complete.value
|
||||
):
|
||||
details = event.payload.step_details
|
||||
content = interleaved_text_media_as_str(details.inserted_context)
|
||||
content = content[:200] + "..." if len(content) > 200 else content
|
||||
inserted_context = interleaved_text_media_as_str(
|
||||
details.inserted_context
|
||||
)
|
||||
content = f"fetched {len(inserted_context)} bytes from {details.memory_bank_ids}"
|
||||
|
||||
yield event, LogEvent(
|
||||
role=step_type,
|
||||
content=f"Retrieved context from banks: {details.memory_bank_ids}.\n====\n{content}\n>",
|
||||
content=content,
|
||||
color="cyan",
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -37,3 +37,8 @@ class DatasetIO(Protocol):
|
|||
page_token: Optional[str] = None,
|
||||
filter_condition: Optional[str] = None,
|
||||
) -> PaginatedRowsResult: ...
|
||||
|
||||
@webmethod(route="/datasetio/append-rows", method="POST")
|
||||
async def append_rows(
|
||||
self, dataset_id: str, rows: List[Dict[str, Any]]
|
||||
) -> None: ...
|
||||
|
|
|
|||
|
|
@ -78,6 +78,21 @@ class DatasetsClient(Datasets):
|
|||
|
||||
return [DatasetDefWithProvider(**x) for x in response.json()]
|
||||
|
||||
async def unregister_dataset(
|
||||
self,
|
||||
dataset_id: str,
|
||||
) -> None:
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.delete(
|
||||
f"{self.base_url}/datasets/unregister",
|
||||
params={
|
||||
"dataset_id": dataset_id,
|
||||
},
|
||||
headers={"Content-Type": "application/json"},
|
||||
timeout=60,
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
|
||||
async def run_main(host: str, port: int):
|
||||
client = DatasetsClient(f"http://{host}:{port}")
|
||||
|
|
|
|||
|
|
@ -64,3 +64,9 @@ class Datasets(Protocol):
|
|||
|
||||
@webmethod(route="/datasets/list", method="GET")
|
||||
async def list_datasets(self) -> List[Dataset]: ...
|
||||
|
||||
@webmethod(route="/datasets/unregister", method="POST")
|
||||
async def unregister_dataset(
|
||||
self,
|
||||
dataset_id: str,
|
||||
) -> None: ...
|
||||
|
|
|
|||
|
|
@ -21,6 +21,8 @@ from llama_models.schema_utils import json_schema_type, webmethod
|
|||
from pydantic import BaseModel, Field
|
||||
from typing_extensions import Annotated
|
||||
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
|
||||
from llama_models.llama3.api.datatypes import * # noqa: F403
|
||||
from llama_stack.apis.models import * # noqa: F403
|
||||
|
||||
|
|
@ -220,6 +222,7 @@ class ModelStore(Protocol):
|
|||
|
||||
|
||||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class Inference(Protocol):
|
||||
model_store: ModelStore
|
||||
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ from pydantic import BaseModel, Field
|
|||
|
||||
from llama_models.llama3.api.datatypes import * # noqa: F403
|
||||
from llama_stack.apis.memory_banks import * # noqa: F403
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
|
||||
|
||||
@json_schema_type
|
||||
|
|
@ -43,6 +44,7 @@ class MemoryBankStore(Protocol):
|
|||
|
||||
|
||||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class Memory(Protocol):
|
||||
memory_bank_store: MemoryBankStore
|
||||
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ from llama_models.schema_utils import json_schema_type, webmethod
|
|||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
|
||||
|
||||
@json_schema_type
|
||||
|
|
@ -129,6 +130,7 @@ class MemoryBankInput(BaseModel):
|
|||
|
||||
|
||||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class MemoryBanks(Protocol):
|
||||
@webmethod(route="/memory-banks/list", method="GET")
|
||||
async def list_memory_banks(self) -> List[MemoryBank]: ...
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ from llama_models.schema_utils import json_schema_type, webmethod
|
|||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
|
||||
|
||||
class CommonModelFields(BaseModel):
|
||||
|
|
@ -43,6 +44,7 @@ class ModelInput(CommonModelFields):
|
|||
|
||||
|
||||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class Models(Protocol):
|
||||
@webmethod(route="/models/list", method="GET")
|
||||
async def list_models(self) -> List[Model]: ...
|
||||
|
|
|
|||
|
|
@ -17,6 +17,8 @@ from llama_models.llama3.api.datatypes import * # noqa: F403
|
|||
from pydantic import BaseModel
|
||||
from termcolor import cprint
|
||||
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_VERSION
|
||||
|
||||
from llama_stack.distribution.datatypes import RemoteProviderConfig
|
||||
|
||||
from llama_stack.apis.safety import * # noqa: F403
|
||||
|
|
@ -45,7 +47,7 @@ class SafetyClient(Safety):
|
|||
) -> RunShieldResponse:
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(
|
||||
f"{self.base_url}/safety/run_shield",
|
||||
f"{self.base_url}/{LLAMA_STACK_API_VERSION}/safety/run-shield",
|
||||
json=dict(
|
||||
shield_id=shield_id,
|
||||
messages=[encodable_dict(m) for m in messages],
|
||||
|
|
@ -91,7 +93,7 @@ async def run_main(host: str, port: int, image_path: str = None):
|
|||
]:
|
||||
cprint(f"User>{message.content}", "green")
|
||||
response = await client.run_shield(
|
||||
shield_id="llama_guard",
|
||||
shield_id="meta-llama/Llama-Guard-3-1B",
|
||||
messages=[message],
|
||||
)
|
||||
print(response)
|
||||
|
|
|
|||
|
|
@ -10,6 +10,8 @@ from typing import Any, Dict, List, Protocol, runtime_checkable
|
|||
from llama_models.schema_utils import json_schema_type, webmethod
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
|
||||
from llama_models.llama3.api.datatypes import * # noqa: F403
|
||||
from llama_stack.apis.shields import * # noqa: F403
|
||||
|
||||
|
|
@ -43,6 +45,7 @@ class ShieldStore(Protocol):
|
|||
|
||||
|
||||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class Safety(Protocol):
|
||||
shield_store: ShieldStore
|
||||
|
||||
|
|
|
|||
|
|
@ -31,6 +31,15 @@ from llama_stack.apis.resource import Resource, ResourceType
|
|||
class ScoringFnParamsType(Enum):
|
||||
llm_as_judge = "llm_as_judge"
|
||||
regex_parser = "regex_parser"
|
||||
basic = "basic"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class AggregationFunctionType(Enum):
|
||||
average = "average"
|
||||
median = "median"
|
||||
categorical_count = "categorical_count"
|
||||
accuracy = "accuracy"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
|
|
@ -44,6 +53,10 @@ class LLMAsJudgeScoringFnParams(BaseModel):
|
|||
description="Regexes to extract the answer from generated response",
|
||||
default_factory=list,
|
||||
)
|
||||
aggregation_functions: Optional[List[AggregationFunctionType]] = Field(
|
||||
description="Aggregation functions to apply to the scores of each row",
|
||||
default_factory=list,
|
||||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
|
|
@ -55,12 +68,26 @@ class RegexParserScoringFnParams(BaseModel):
|
|||
description="Regex to extract the answer from generated response",
|
||||
default_factory=list,
|
||||
)
|
||||
aggregation_functions: Optional[List[AggregationFunctionType]] = Field(
|
||||
description="Aggregation functions to apply to the scores of each row",
|
||||
default_factory=list,
|
||||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class BasicScoringFnParams(BaseModel):
|
||||
type: Literal[ScoringFnParamsType.basic.value] = ScoringFnParamsType.basic.value
|
||||
aggregation_functions: Optional[List[AggregationFunctionType]] = Field(
|
||||
description="Aggregation functions to apply to the scores of each row",
|
||||
default_factory=list,
|
||||
)
|
||||
|
||||
|
||||
ScoringFnParams = Annotated[
|
||||
Union[
|
||||
LLMAsJudgeScoringFnParams,
|
||||
RegexParserScoringFnParams,
|
||||
BasicScoringFnParams,
|
||||
],
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ from llama_models.schema_utils import json_schema_type, webmethod
|
|||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
|
||||
|
||||
class CommonShieldFields(BaseModel):
|
||||
|
|
@ -38,6 +39,7 @@ class ShieldInput(CommonShieldFields):
|
|||
|
||||
|
||||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class Shields(Protocol):
|
||||
@webmethod(route="/shields/list", method="GET")
|
||||
async def list_shields(self) -> List[Shield]: ...
|
||||
|
|
|
|||
|
|
@ -6,12 +6,24 @@
|
|||
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, Literal, Optional, Protocol, runtime_checkable, Union
|
||||
from typing import (
|
||||
Any,
|
||||
Dict,
|
||||
List,
|
||||
Literal,
|
||||
Optional,
|
||||
Protocol,
|
||||
runtime_checkable,
|
||||
Union,
|
||||
)
|
||||
|
||||
from llama_models.schema_utils import json_schema_type, webmethod
|
||||
from pydantic import BaseModel, Field
|
||||
from typing_extensions import Annotated
|
||||
|
||||
# Add this constant near the top of the file, after the imports
|
||||
DEFAULT_TTL_DAYS = 7
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class SpanStatus(Enum):
|
||||
|
|
@ -29,6 +41,11 @@ class Span(BaseModel):
|
|||
end_time: Optional[datetime] = None
|
||||
attributes: Optional[Dict[str, Any]] = Field(default_factory=dict)
|
||||
|
||||
def set_attribute(self, key: str, value: Any):
|
||||
if self.attributes is None:
|
||||
self.attributes = {}
|
||||
self.attributes[key] = value
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class Trace(BaseModel):
|
||||
|
|
@ -123,10 +140,73 @@ Event = Annotated[
|
|||
]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class EvalTrace(BaseModel):
|
||||
session_id: str
|
||||
step: str
|
||||
input: str
|
||||
output: str
|
||||
expected_output: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class SpanWithChildren(Span):
|
||||
children: List["SpanWithChildren"] = Field(default_factory=list)
|
||||
status: Optional[SpanStatus] = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class QueryConditionOp(Enum):
|
||||
EQ = "eq"
|
||||
NE = "ne"
|
||||
GT = "gt"
|
||||
LT = "lt"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class QueryCondition(BaseModel):
|
||||
key: str
|
||||
op: QueryConditionOp
|
||||
value: Any
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class Telemetry(Protocol):
|
||||
@webmethod(route="/telemetry/log-event")
|
||||
async def log_event(self, event: Event) -> None: ...
|
||||
async def log_event(
|
||||
self, event: Event, ttl_seconds: int = DEFAULT_TTL_DAYS * 86400
|
||||
) -> None: ...
|
||||
|
||||
@webmethod(route="/telemetry/get-trace", method="GET")
|
||||
async def get_trace(self, trace_id: str) -> Trace: ...
|
||||
@webmethod(route="/telemetry/query-traces", method="POST")
|
||||
async def query_traces(
|
||||
self,
|
||||
attribute_filters: Optional[List[QueryCondition]] = None,
|
||||
limit: Optional[int] = 100,
|
||||
offset: Optional[int] = 0,
|
||||
order_by: Optional[List[str]] = None,
|
||||
) -> List[Trace]: ...
|
||||
|
||||
@webmethod(route="/telemetry/get-span-tree", method="POST")
|
||||
async def get_span_tree(
|
||||
self,
|
||||
span_id: str,
|
||||
attributes_to_return: Optional[List[str]] = None,
|
||||
max_depth: Optional[int] = None,
|
||||
) -> SpanWithChildren: ...
|
||||
|
||||
@webmethod(route="/telemetry/query-spans", method="POST")
|
||||
async def query_spans(
|
||||
self,
|
||||
attribute_filters: List[QueryCondition],
|
||||
attributes_to_return: List[str],
|
||||
max_depth: Optional[int] = None,
|
||||
) -> List[Span]: ...
|
||||
|
||||
@webmethod(route="/telemetry/save-spans-to-dataset", method="POST")
|
||||
async def save_spans_to_dataset(
|
||||
self,
|
||||
attribute_filters: List[QueryCondition],
|
||||
attributes_to_save: List[str],
|
||||
dataset_id: str,
|
||||
max_depth: Optional[int] = None,
|
||||
) -> None: ...
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ class StackBuild(Subcommand):
|
|||
"--config",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Path to a config file to use for the build. You can find example configs in llama_stack/distribution/example_configs. If this argument is not provided, you will be prompted to enter information interactively",
|
||||
help="Path to a config file to use for the build. You can find example configs in llama_stack/distribution/**/build.yaml. If this argument is not provided, you will be prompted to enter information interactively",
|
||||
)
|
||||
|
||||
self.parser.add_argument(
|
||||
|
|
@ -73,7 +73,7 @@ class StackBuild(Subcommand):
|
|||
"--image-type",
|
||||
type=str,
|
||||
help="Image Type to use for the build. This can be either conda or docker. If not specified, will use the image type from the template config.",
|
||||
choices=["conda", "docker"],
|
||||
choices=["conda", "docker", "venv"],
|
||||
default="conda",
|
||||
)
|
||||
|
||||
|
|
@ -124,8 +124,8 @@ class StackBuild(Subcommand):
|
|||
image_type = prompt(
|
||||
"> Enter the image type you want your Llama Stack to be built as (docker or conda): ",
|
||||
validator=Validator.from_callable(
|
||||
lambda x: x in ["docker", "conda"],
|
||||
error_message="Invalid image type, please enter conda or docker",
|
||||
lambda x: x in ["docker", "conda", "venv"],
|
||||
error_message="Invalid image type, please enter conda or docker or venv",
|
||||
),
|
||||
default="conda",
|
||||
)
|
||||
|
|
@ -261,7 +261,6 @@ class StackBuild(Subcommand):
|
|||
) -> None:
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
|
||||
import yaml
|
||||
from termcolor import cprint
|
||||
|
|
@ -291,20 +290,8 @@ class StackBuild(Subcommand):
|
|||
run_config_file = build_dir / f"{build_config.name}-run.yaml"
|
||||
shutil.copy(template_path, run_config_file)
|
||||
|
||||
with open(template_path, "r") as f:
|
||||
yaml_content = f.read()
|
||||
|
||||
# Find all ${env.VARIABLE} patterns
|
||||
env_vars = set(re.findall(r"\${env\.([A-Za-z0-9_]+)}", yaml_content))
|
||||
cprint("Build Successful! Next steps: ", color="green")
|
||||
cprint(
|
||||
f" 1. Set the environment variables: {list(env_vars)}",
|
||||
color="green",
|
||||
)
|
||||
cprint(
|
||||
f" 2. Run: `llama stack run {template_name}`",
|
||||
color="green",
|
||||
)
|
||||
cprint("Build Successful!", color="green")
|
||||
else:
|
||||
self._generate_run_config(build_config, build_dir)
|
||||
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ from typing import List
|
|||
|
||||
import pkg_resources
|
||||
from pydantic import BaseModel
|
||||
from termcolor import cprint
|
||||
|
||||
from llama_stack.distribution.utils.exec import run_with_pty
|
||||
|
||||
|
|
@ -37,6 +38,7 @@ SERVER_DEPENDENCIES = [
|
|||
class ImageType(Enum):
|
||||
docker = "docker"
|
||||
conda = "conda"
|
||||
venv = "venv"
|
||||
|
||||
|
||||
class ApiInput(BaseModel):
|
||||
|
|
@ -45,7 +47,7 @@ class ApiInput(BaseModel):
|
|||
|
||||
|
||||
def get_provider_dependencies(
|
||||
config_providers: Dict[str, List[Provider]]
|
||||
config_providers: Dict[str, List[Provider]],
|
||||
) -> tuple[list[str], list[str]]:
|
||||
"""Get normal and special dependencies from provider configuration."""
|
||||
all_providers = get_provider_registry()
|
||||
|
|
@ -90,11 +92,12 @@ def get_provider_dependencies(
|
|||
def print_pip_install_help(providers: Dict[str, List[Provider]]):
|
||||
normal_deps, special_deps = get_provider_dependencies(providers)
|
||||
|
||||
print(
|
||||
f"Please install needed dependencies using the following commands:\n\n\tpip install {' '.join(normal_deps)}"
|
||||
cprint(
|
||||
f"Please install needed dependencies using the following commands:\n\npip install {' '.join(normal_deps)}",
|
||||
"yellow",
|
||||
)
|
||||
for special_dep in special_deps:
|
||||
log.info(f"\tpip install {special_dep}")
|
||||
cprint(f"pip install {special_dep}", "yellow")
|
||||
print()
|
||||
|
||||
|
||||
|
|
@ -118,7 +121,7 @@ def build_image(build_config: BuildConfig, build_file_path: Path):
|
|||
str(BUILDS_BASE_DIR / ImageType.docker.value),
|
||||
" ".join(normal_deps),
|
||||
]
|
||||
else:
|
||||
elif build_config.image_type == ImageType.conda.value:
|
||||
script = pkg_resources.resource_filename(
|
||||
"llama_stack", "distribution/build_conda_env.sh"
|
||||
)
|
||||
|
|
@ -128,6 +131,16 @@ def build_image(build_config: BuildConfig, build_file_path: Path):
|
|||
str(build_file_path),
|
||||
" ".join(normal_deps),
|
||||
]
|
||||
elif build_config.image_type == ImageType.venv.value:
|
||||
script = pkg_resources.resource_filename(
|
||||
"llama_stack", "distribution/build_venv.sh"
|
||||
)
|
||||
args = [
|
||||
script,
|
||||
build_config.name,
|
||||
str(build_file_path),
|
||||
" ".join(normal_deps),
|
||||
]
|
||||
|
||||
if special_deps:
|
||||
args.append("#".join(special_deps))
|
||||
|
|
|
|||
105
llama_stack/distribution/build_venv.sh
Executable file
105
llama_stack/distribution/build_venv.sh
Executable file
|
|
@ -0,0 +1,105 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
# TODO: combine this with build_conda_env.sh since it is almost identical
|
||||
# the only difference is that we don't do any conda-specific setup
|
||||
|
||||
LLAMA_MODELS_DIR=${LLAMA_MODELS_DIR:-}
|
||||
LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-}
|
||||
TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-}
|
||||
|
||||
if [ -n "$LLAMA_STACK_DIR" ]; then
|
||||
echo "Using llama-stack-dir=$LLAMA_STACK_DIR"
|
||||
fi
|
||||
if [ -n "$LLAMA_MODELS_DIR" ]; then
|
||||
echo "Using llama-models-dir=$LLAMA_MODELS_DIR"
|
||||
fi
|
||||
|
||||
if [ "$#" -lt 3 ]; then
|
||||
echo "Usage: $0 <distribution_type> <build_name> <build_file_path> <pip_dependencies> [<special_pip_deps>]" >&2
|
||||
echo "Example: $0 <distribution_type> mybuild ./my-stack-build.yaml 'numpy pandas scipy'" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
special_pip_deps="$4"
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
build_name="$1"
|
||||
env_name="llamastack-$build_name"
|
||||
build_file_path="$2"
|
||||
pip_dependencies="$3"
|
||||
|
||||
# Define color codes
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# this is set if we actually create a new conda in which case we need to clean up
|
||||
ENVNAME=""
|
||||
|
||||
SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
|
||||
source "$SCRIPT_DIR/common.sh"
|
||||
|
||||
run() {
|
||||
local env_name="$1"
|
||||
local pip_dependencies="$2"
|
||||
local special_pip_deps="$3"
|
||||
|
||||
if [ -n "$TEST_PYPI_VERSION" ]; then
|
||||
# these packages are damaged in test-pypi, so install them first
|
||||
pip install fastapi libcst
|
||||
pip install --extra-index-url https://test.pypi.org/simple/ \
|
||||
llama-models==$TEST_PYPI_VERSION llama-stack==$TEST_PYPI_VERSION \
|
||||
$pip_dependencies
|
||||
if [ -n "$special_pip_deps" ]; then
|
||||
IFS='#' read -ra parts <<<"$special_pip_deps"
|
||||
for part in "${parts[@]}"; do
|
||||
echo "$part"
|
||||
pip install $part
|
||||
done
|
||||
fi
|
||||
else
|
||||
# Re-installing llama-stack in the new conda environment
|
||||
if [ -n "$LLAMA_STACK_DIR" ]; then
|
||||
if [ ! -d "$LLAMA_STACK_DIR" ]; then
|
||||
printf "${RED}Warning: LLAMA_STACK_DIR is set but directory does not exist: $LLAMA_STACK_DIR${NC}\n" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
printf "Installing from LLAMA_STACK_DIR: $LLAMA_STACK_DIR\n"
|
||||
pip install --no-cache-dir -e "$LLAMA_STACK_DIR"
|
||||
else
|
||||
pip install --no-cache-dir llama-stack
|
||||
fi
|
||||
|
||||
if [ -n "$LLAMA_MODELS_DIR" ]; then
|
||||
if [ ! -d "$LLAMA_MODELS_DIR" ]; then
|
||||
printf "${RED}Warning: LLAMA_MODELS_DIR is set but directory does not exist: $LLAMA_MODELS_DIR${NC}\n" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
printf "Installing from LLAMA_MODELS_DIR: $LLAMA_MODELS_DIR\n"
|
||||
pip uninstall -y llama-models
|
||||
pip install --no-cache-dir -e "$LLAMA_MODELS_DIR"
|
||||
fi
|
||||
|
||||
# Install pip dependencies
|
||||
printf "Installing pip dependencies\n"
|
||||
pip install $pip_dependencies
|
||||
if [ -n "$special_pip_deps" ]; then
|
||||
IFS='#' read -ra parts <<<"$special_pip_deps"
|
||||
for part in "${parts[@]}"; do
|
||||
echo "$part"
|
||||
pip install $part
|
||||
done
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
run "$env_name" "$pip_dependencies" "$special_pip_deps"
|
||||
|
|
@ -165,5 +165,5 @@ class BuildConfig(BaseModel):
|
|||
)
|
||||
image_type: str = Field(
|
||||
default="conda",
|
||||
description="Type of package to build (conda | container)",
|
||||
description="Type of package to build (conda | docker | venv)",
|
||||
)
|
||||
|
|
|
|||
331
llama_stack/distribution/library_client.py
Normal file
331
llama_stack/distribution/library_client.py
Normal file
|
|
@ -0,0 +1,331 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import asyncio
|
||||
import inspect
|
||||
import json
|
||||
import os
|
||||
import queue
|
||||
import threading
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Any, Generator, get_args, get_origin, Optional, Type, TypeVar, Union
|
||||
|
||||
import yaml
|
||||
from llama_stack_client import AsyncLlamaStackClient, LlamaStackClient, NOT_GIVEN
|
||||
from pydantic import BaseModel, TypeAdapter
|
||||
from rich.console import Console
|
||||
|
||||
from termcolor import cprint
|
||||
|
||||
from llama_stack.distribution.build import print_pip_install_help
|
||||
from llama_stack.distribution.configure import parse_and_maybe_upgrade_config
|
||||
from llama_stack.distribution.datatypes import Api
|
||||
from llama_stack.distribution.resolver import ProviderRegistry
|
||||
from llama_stack.distribution.server.endpoints import get_all_api_endpoints
|
||||
from llama_stack.distribution.stack import (
|
||||
construct_stack,
|
||||
get_stack_run_config_from_template,
|
||||
replace_env_vars,
|
||||
)
|
||||
|
||||
from llama_stack.providers.utils.telemetry.tracing import (
|
||||
end_trace,
|
||||
setup_logger,
|
||||
start_trace,
|
||||
)
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
def in_notebook():
|
||||
try:
|
||||
from IPython import get_ipython
|
||||
|
||||
if "IPKernelApp" not in get_ipython().config: # pragma: no cover
|
||||
return False
|
||||
except ImportError:
|
||||
return False
|
||||
except AttributeError:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def stream_across_asyncio_run_boundary(
|
||||
async_gen_maker,
|
||||
pool_executor: ThreadPoolExecutor,
|
||||
) -> Generator[T, None, None]:
|
||||
result_queue = queue.Queue()
|
||||
stop_event = threading.Event()
|
||||
|
||||
async def consumer():
|
||||
# make sure we make the generator in the event loop context
|
||||
gen = await async_gen_maker()
|
||||
try:
|
||||
async for item in gen:
|
||||
result_queue.put(item)
|
||||
except Exception as e:
|
||||
print(f"Error in generator {e}")
|
||||
result_queue.put(e)
|
||||
except asyncio.CancelledError:
|
||||
return
|
||||
finally:
|
||||
result_queue.put(StopIteration)
|
||||
stop_event.set()
|
||||
|
||||
def run_async():
|
||||
# Run our own loop to avoid double async generator cleanup which is done
|
||||
# by asyncio.run()
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
try:
|
||||
task = loop.create_task(consumer())
|
||||
loop.run_until_complete(task)
|
||||
finally:
|
||||
# Handle pending tasks like a generator's athrow()
|
||||
pending = asyncio.all_tasks(loop)
|
||||
if pending:
|
||||
loop.run_until_complete(
|
||||
asyncio.gather(*pending, return_exceptions=True)
|
||||
)
|
||||
loop.close()
|
||||
|
||||
future = pool_executor.submit(run_async)
|
||||
|
||||
try:
|
||||
# yield results as they come in
|
||||
while not stop_event.is_set() or not result_queue.empty():
|
||||
try:
|
||||
item = result_queue.get(timeout=0.1)
|
||||
if item is StopIteration:
|
||||
break
|
||||
if isinstance(item, Exception):
|
||||
raise item
|
||||
yield item
|
||||
except queue.Empty:
|
||||
continue
|
||||
finally:
|
||||
future.result()
|
||||
|
||||
|
||||
def convert_pydantic_to_json_value(value: Any, cast_to: Type) -> dict:
|
||||
if isinstance(value, Enum):
|
||||
return value.value
|
||||
elif isinstance(value, list):
|
||||
return [convert_pydantic_to_json_value(item, cast_to) for item in value]
|
||||
elif isinstance(value, dict):
|
||||
return {k: convert_pydantic_to_json_value(v, cast_to) for k, v in value.items()}
|
||||
elif isinstance(value, BaseModel):
|
||||
# This is quite hacky and we should figure out how to use stuff from
|
||||
# generated client-sdk code (using ApiResponse.parse() essentially)
|
||||
value_dict = json.loads(value.model_dump_json())
|
||||
|
||||
origin = get_origin(cast_to)
|
||||
if origin is Union:
|
||||
args = get_args(cast_to)
|
||||
for arg in args:
|
||||
arg_name = arg.__name__.split(".")[-1]
|
||||
value_name = value.__class__.__name__.split(".")[-1]
|
||||
if arg_name == value_name:
|
||||
return arg(**value_dict)
|
||||
|
||||
# assume we have the correct association between the server-side type and the client-side type
|
||||
return cast_to(**value_dict)
|
||||
|
||||
return value
|
||||
|
||||
|
||||
def convert_to_pydantic(annotation: Any, value: Any) -> Any:
|
||||
if isinstance(annotation, type) and annotation in {str, int, float, bool}:
|
||||
return value
|
||||
|
||||
origin = get_origin(annotation)
|
||||
if origin is list:
|
||||
item_type = get_args(annotation)[0]
|
||||
try:
|
||||
return [convert_to_pydantic(item_type, item) for item in value]
|
||||
except Exception:
|
||||
print(f"Error converting list {value}")
|
||||
return value
|
||||
|
||||
elif origin is dict:
|
||||
key_type, val_type = get_args(annotation)
|
||||
try:
|
||||
return {k: convert_to_pydantic(val_type, v) for k, v in value.items()}
|
||||
except Exception:
|
||||
print(f"Error converting dict {value}")
|
||||
return value
|
||||
|
||||
try:
|
||||
# Handle Pydantic models and discriminated unions
|
||||
return TypeAdapter(annotation).validate_python(value)
|
||||
except Exception as e:
|
||||
cprint(
|
||||
f"Warning: direct client failed to convert parameter {value} into {annotation}: {e}",
|
||||
"yellow",
|
||||
)
|
||||
return value
|
||||
|
||||
|
||||
class LlamaStackAsLibraryClient(LlamaStackClient):
|
||||
def __init__(
|
||||
self,
|
||||
config_path_or_template_name: str,
|
||||
custom_provider_registry: Optional[ProviderRegistry] = None,
|
||||
):
|
||||
super().__init__()
|
||||
self.async_client = AsyncLlamaStackAsLibraryClient(
|
||||
config_path_or_template_name, custom_provider_registry
|
||||
)
|
||||
self.pool_executor = ThreadPoolExecutor(max_workers=4)
|
||||
|
||||
def initialize(self):
|
||||
if in_notebook():
|
||||
import nest_asyncio
|
||||
|
||||
nest_asyncio.apply()
|
||||
|
||||
return asyncio.run(self.async_client.initialize())
|
||||
|
||||
def request(self, *args, **kwargs):
|
||||
if kwargs.get("stream"):
|
||||
return stream_across_asyncio_run_boundary(
|
||||
lambda: self.async_client.request(*args, **kwargs),
|
||||
self.pool_executor,
|
||||
)
|
||||
else:
|
||||
return asyncio.run(self.async_client.request(*args, **kwargs))
|
||||
|
||||
|
||||
class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
||||
def __init__(
|
||||
self,
|
||||
config_path_or_template_name: str,
|
||||
custom_provider_registry: Optional[ProviderRegistry] = None,
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
# when using the library client, we should not log to console since many
|
||||
# of our logs are intended for server-side usage
|
||||
os.environ["TELEMETRY_SINKS"] = "sqlite"
|
||||
|
||||
if config_path_or_template_name.endswith(".yaml"):
|
||||
config_path = Path(config_path_or_template_name)
|
||||
if not config_path.exists():
|
||||
raise ValueError(f"Config file {config_path} does not exist")
|
||||
config_dict = replace_env_vars(yaml.safe_load(config_path.read_text()))
|
||||
config = parse_and_maybe_upgrade_config(config_dict)
|
||||
else:
|
||||
# template
|
||||
config = get_stack_run_config_from_template(config_path_or_template_name)
|
||||
|
||||
self.config_path_or_template_name = config_path_or_template_name
|
||||
self.config = config
|
||||
self.custom_provider_registry = custom_provider_registry
|
||||
|
||||
async def initialize(self):
|
||||
try:
|
||||
self.impls = await construct_stack(
|
||||
self.config, self.custom_provider_registry
|
||||
)
|
||||
except ModuleNotFoundError as _e:
|
||||
cprint(
|
||||
"Using llama-stack as a library requires installing dependencies depending on the template (providers) you choose.\n",
|
||||
"yellow",
|
||||
)
|
||||
if self.config_path_or_template_name.endswith(".yaml"):
|
||||
print_pip_install_help(self.config.providers)
|
||||
else:
|
||||
prefix = "!" if in_notebook() else ""
|
||||
cprint(
|
||||
f"Please run:\n\n{prefix}llama stack build --template {self.config_path_or_template_name} --image-type venv\n\n",
|
||||
"yellow",
|
||||
)
|
||||
return False
|
||||
|
||||
if Api.telemetry in self.impls:
|
||||
setup_logger(self.impls[Api.telemetry])
|
||||
|
||||
console = Console()
|
||||
console.print(f"Using config [blue]{self.config_path_or_template_name}[/blue]:")
|
||||
console.print(yaml.dump(self.config.model_dump(), indent=2))
|
||||
|
||||
endpoints = get_all_api_endpoints()
|
||||
endpoint_impls = {}
|
||||
for api, api_endpoints in endpoints.items():
|
||||
for endpoint in api_endpoints:
|
||||
impl = self.impls[api]
|
||||
func = getattr(impl, endpoint.name)
|
||||
endpoint_impls[endpoint.route] = func
|
||||
|
||||
self.endpoint_impls = endpoint_impls
|
||||
return True
|
||||
|
||||
async def request(
|
||||
self,
|
||||
cast_to: Any,
|
||||
options: Any,
|
||||
*,
|
||||
stream=False,
|
||||
stream_cls=None,
|
||||
):
|
||||
if not self.endpoint_impls:
|
||||
raise ValueError("Client not initialized")
|
||||
|
||||
params = options.params or {}
|
||||
params |= options.json_data or {}
|
||||
if stream:
|
||||
return self._call_streaming(options.url, params, cast_to)
|
||||
else:
|
||||
return await self._call_non_streaming(options.url, params, cast_to)
|
||||
|
||||
async def _call_non_streaming(
|
||||
self, path: str, body: dict = None, cast_to: Any = None
|
||||
):
|
||||
await start_trace(path, {"__location__": "library_client"})
|
||||
try:
|
||||
func = self.endpoint_impls.get(path)
|
||||
if not func:
|
||||
raise ValueError(f"No endpoint found for {path}")
|
||||
|
||||
body = self._convert_body(path, body)
|
||||
return convert_pydantic_to_json_value(await func(**body), cast_to)
|
||||
finally:
|
||||
await end_trace()
|
||||
|
||||
async def _call_streaming(self, path: str, body: dict = None, cast_to: Any = None):
|
||||
await start_trace(path, {"__location__": "library_client"})
|
||||
try:
|
||||
func = self.endpoint_impls.get(path)
|
||||
if not func:
|
||||
raise ValueError(f"No endpoint found for {path}")
|
||||
|
||||
body = self._convert_body(path, body)
|
||||
async for chunk in await func(**body):
|
||||
yield convert_pydantic_to_json_value(chunk, cast_to)
|
||||
finally:
|
||||
await end_trace()
|
||||
|
||||
def _convert_body(self, path: str, body: Optional[dict] = None) -> dict:
|
||||
if not body:
|
||||
return {}
|
||||
|
||||
func = self.endpoint_impls[path]
|
||||
sig = inspect.signature(func)
|
||||
|
||||
# Strip NOT_GIVENs to use the defaults in signature
|
||||
body = {k: v for k, v in body.items() if v is not NOT_GIVEN}
|
||||
|
||||
# Convert parameters to Pydantic models where needed
|
||||
converted_body = {}
|
||||
for param_name, param in sig.parameters.items():
|
||||
if param_name in body:
|
||||
value = body.get(param_name)
|
||||
converted_body[param_name] = convert_to_pydantic(
|
||||
param.annotation, value
|
||||
)
|
||||
return converted_body
|
||||
|
|
@ -222,6 +222,12 @@ class DatasetIORouter(DatasetIO):
|
|||
filter_condition=filter_condition,
|
||||
)
|
||||
|
||||
async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None:
|
||||
return await self.routing_table.get_provider_impl(dataset_id).append_rows(
|
||||
dataset_id=dataset_id,
|
||||
rows=rows,
|
||||
)
|
||||
|
||||
|
||||
class ScoringRouter(Scoring):
|
||||
def __init__(
|
||||
|
|
|
|||
|
|
@ -57,6 +57,8 @@ async def unregister_object_from_provider(obj: RoutableObject, p: Any) -> None:
|
|||
return await p.unregister_memory_bank(obj.identifier)
|
||||
elif api == Api.inference:
|
||||
return await p.unregister_model(obj.identifier)
|
||||
elif api == Api.datasetio:
|
||||
return await p.unregister_dataset(obj.identifier)
|
||||
else:
|
||||
raise ValueError(f"Unregister not supported for {api}")
|
||||
|
||||
|
|
@ -354,6 +356,12 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets):
|
|||
)
|
||||
await self.register_object(dataset)
|
||||
|
||||
async def unregister_dataset(self, dataset_id: str) -> None:
|
||||
dataset = await self.get_dataset(dataset_id)
|
||||
if dataset is None:
|
||||
raise ValueError(f"Dataset {dataset_id} not found")
|
||||
await self.unregister_object(dataset)
|
||||
|
||||
|
||||
class ScoringFunctionsRoutingTable(CommonRoutingTableImpl, ScoringFunctions):
|
||||
async def list_scoring_functions(self) -> List[ScoringFn]:
|
||||
|
|
|
|||
|
|
@ -43,9 +43,9 @@ from llama_stack.distribution.stack import (
|
|||
replace_env_vars,
|
||||
validate_env_pair,
|
||||
)
|
||||
from llama_stack.providers.inline.meta_reference.telemetry.console import (
|
||||
ConsoleConfig,
|
||||
ConsoleTelemetryImpl,
|
||||
from llama_stack.providers.inline.telemetry.meta_reference.config import TelemetryConfig
|
||||
from llama_stack.providers.inline.telemetry.meta_reference.telemetry import (
|
||||
TelemetryAdapter,
|
||||
)
|
||||
|
||||
from .endpoints import get_all_api_endpoints
|
||||
|
|
@ -217,7 +217,7 @@ class TracingMiddleware:
|
|||
|
||||
async def __call__(self, scope, receive, send):
|
||||
path = scope["path"]
|
||||
await start_trace(path, {"location": "server"})
|
||||
await start_trace(path, {"__location__": "server"})
|
||||
try:
|
||||
return await self.app(scope, receive, send)
|
||||
finally:
|
||||
|
|
@ -290,7 +290,7 @@ def main():
|
|||
if Api.telemetry in impls:
|
||||
setup_logger(impls[Api.telemetry])
|
||||
else:
|
||||
setup_logger(ConsoleTelemetryImpl(ConsoleConfig()))
|
||||
setup_logger(TelemetryAdapter(TelemetryConfig()))
|
||||
|
||||
all_endpoints = get_all_api_endpoints()
|
||||
|
||||
|
|
|
|||
128
llama_stack/distribution/tests/library_client_test.py
Normal file
128
llama_stack/distribution/tests/library_client_test.py
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
|
||||
from llama_stack_client.lib.agents.agent import Agent
|
||||
from llama_stack_client.lib.agents.event_logger import EventLogger as AgentEventLogger
|
||||
from llama_stack_client.lib.inference.event_logger import EventLogger
|
||||
from llama_stack_client.types import Attachment, UserMessage
|
||||
from llama_stack_client.types.agent_create_params import AgentConfig
|
||||
|
||||
|
||||
def main(config_path: str):
|
||||
client = LlamaStackAsLibraryClient(config_path)
|
||||
if not client.initialize():
|
||||
return
|
||||
|
||||
models = client.models.list()
|
||||
print("\nModels:")
|
||||
for model in models:
|
||||
print(model)
|
||||
|
||||
if not models:
|
||||
print("No models found, skipping chat completion test")
|
||||
return
|
||||
|
||||
model_id = models[0].identifier
|
||||
response = client.inference.chat_completion(
|
||||
messages=[UserMessage(content="What is the capital of France?", role="user")],
|
||||
model_id=model_id,
|
||||
stream=False,
|
||||
)
|
||||
print("\nChat completion response (non-stream):")
|
||||
print(response)
|
||||
|
||||
response = client.inference.chat_completion(
|
||||
messages=[UserMessage(content="What is the capital of France?", role="user")],
|
||||
model_id=model_id,
|
||||
stream=True,
|
||||
)
|
||||
|
||||
print("\nChat completion response (stream):")
|
||||
for log in EventLogger().log(response):
|
||||
log.print()
|
||||
|
||||
print("\nAgent test:")
|
||||
agent_config = AgentConfig(
|
||||
model=model_id,
|
||||
instructions="You are a helpful assistant",
|
||||
sampling_params={
|
||||
"strategy": "greedy",
|
||||
"temperature": 1.0,
|
||||
"top_p": 0.9,
|
||||
},
|
||||
tools=(
|
||||
[
|
||||
{
|
||||
"type": "brave_search",
|
||||
"engine": "brave",
|
||||
"api_key": os.getenv("BRAVE_SEARCH_API_KEY"),
|
||||
}
|
||||
]
|
||||
if os.getenv("BRAVE_SEARCH_API_KEY")
|
||||
else []
|
||||
)
|
||||
+ (
|
||||
[
|
||||
{
|
||||
"type": "code_interpreter",
|
||||
}
|
||||
]
|
||||
),
|
||||
tool_choice="required",
|
||||
input_shields=[],
|
||||
output_shields=[],
|
||||
enable_session_persistence=False,
|
||||
)
|
||||
agent = Agent(client, agent_config)
|
||||
user_prompts = [
|
||||
"Hello",
|
||||
"Which players played in the winning team of the NBA western conference semifinals of 2024, please use tools",
|
||||
]
|
||||
user_prompts = [
|
||||
(
|
||||
"Here is a csv, can you describe it ?",
|
||||
[
|
||||
Attachment(
|
||||
content="https://raw.githubusercontent.com/meta-llama/llama-stack-apps/main/examples/resources/inflation.csv",
|
||||
mime_type="test/csv",
|
||||
)
|
||||
],
|
||||
),
|
||||
("Which year ended with the highest inflation ?", None),
|
||||
(
|
||||
"What macro economic situations that led to such high inflation in that period?",
|
||||
None,
|
||||
),
|
||||
("Plot average yearly inflation as a time series", None),
|
||||
]
|
||||
|
||||
session_id = agent.create_session("test-session")
|
||||
|
||||
for prompt, attachments in user_prompts:
|
||||
response = agent.create_turn(
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": prompt,
|
||||
}
|
||||
],
|
||||
attachments=attachments,
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
for log in AgentEventLogger().log(response):
|
||||
log.print()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("config_path", help="Path to the config YAML file")
|
||||
args = parser.parse_args()
|
||||
main(args.config_path)
|
||||
|
|
@ -1,10 +1,41 @@
|
|||
# LLama Stack UI
|
||||
# (Experimental) LLama Stack UI
|
||||
|
||||
[!NOTE] This is a work in progress.
|
||||
## Docker Setup
|
||||
|
||||
## Running Streamlit App
|
||||
:warning: This is a work in progress.
|
||||
|
||||
## Developer Setup
|
||||
|
||||
1. Start up Llama Stack API server. More details [here](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html).
|
||||
|
||||
```
|
||||
llama stack build --template together --image-type conda
|
||||
|
||||
llama stack run together
|
||||
```
|
||||
|
||||
2. (Optional) Register datasets and eval tasks as resources. If you want to run pre-configured evaluation flows (e.g. Evaluations (Generation + Scoring) Page).
|
||||
|
||||
```bash
|
||||
$ llama-stack-client datasets register \
|
||||
--dataset-id "mmlu" \
|
||||
--provider-id "huggingface" \
|
||||
--url "https://huggingface.co/datasets/llamastack/evals" \
|
||||
--metadata '{"path": "llamastack/evals", "name": "evals__mmlu__details", "split": "train"}' \
|
||||
--schema '{"input_query": {"type": "string"}, "expected_answer": {"type": "string", "chat_completion_input": {"type": "string"}}}'
|
||||
```
|
||||
|
||||
```bash
|
||||
$ llama-stack-client eval_tasks register \
|
||||
--eval-task-id meta-reference-mmlu \
|
||||
--provider-id meta-reference \
|
||||
--dataset-id mmlu \
|
||||
--scoring-functions basic::regex_parser_multiple_choice_answer
|
||||
```
|
||||
|
||||
3. Start Streamlit UI
|
||||
|
||||
```bash
|
||||
cd llama_stack/distribution/ui
|
||||
pip install -r requirements.txt
|
||||
streamlit run app.py
|
||||
|
|
|
|||
|
|
@ -3,170 +3,54 @@
|
|||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import json
|
||||
|
||||
import pandas as pd
|
||||
|
||||
import streamlit as st
|
||||
|
||||
from modules.api import LlamaStackEvaluation
|
||||
|
||||
from modules.utils import process_dataset
|
||||
|
||||
EVALUATION_API = LlamaStackEvaluation()
|
||||
|
||||
|
||||
def main():
|
||||
# Add collapsible sidebar
|
||||
with st.sidebar:
|
||||
# Add collapse button
|
||||
if "sidebar_state" not in st.session_state:
|
||||
st.session_state.sidebar_state = True
|
||||
|
||||
if st.session_state.sidebar_state:
|
||||
st.title("Navigation")
|
||||
page = st.radio(
|
||||
"Select a Page",
|
||||
["Application Evaluation"],
|
||||
index=0,
|
||||
)
|
||||
else:
|
||||
page = "Application Evaluation" # Default page when sidebar is collapsed
|
||||
|
||||
# Main content area
|
||||
st.title("🦙 Llama Stack Evaluations")
|
||||
|
||||
if page == "Application Evaluation":
|
||||
application_evaluation_page()
|
||||
|
||||
|
||||
def application_evaluation_page():
|
||||
# File uploader
|
||||
uploaded_file = st.file_uploader("Upload Dataset", type=["csv", "xlsx", "xls"])
|
||||
|
||||
if uploaded_file is None:
|
||||
st.error("No file uploaded")
|
||||
return
|
||||
|
||||
# Process uploaded file
|
||||
df = process_dataset(uploaded_file)
|
||||
if df is None:
|
||||
st.error("Error processing file")
|
||||
return
|
||||
|
||||
# Display dataset information
|
||||
st.success("Dataset loaded successfully!")
|
||||
|
||||
# Display dataframe preview
|
||||
st.subheader("Dataset Preview")
|
||||
st.dataframe(df)
|
||||
|
||||
# Select Scoring Functions to Run Evaluation On
|
||||
st.subheader("Select Scoring Functions")
|
||||
scoring_functions = EVALUATION_API.list_scoring_functions()
|
||||
scoring_functions = {sf.identifier: sf for sf in scoring_functions}
|
||||
scoring_functions_names = list(scoring_functions.keys())
|
||||
selected_scoring_functions = st.multiselect(
|
||||
"Choose one or more scoring functions",
|
||||
options=scoring_functions_names,
|
||||
help="Choose one or more scoring functions.",
|
||||
# Evaluation pages
|
||||
application_evaluation_page = st.Page(
|
||||
"page/evaluations/app_eval.py",
|
||||
title="Evaluations (Scoring)",
|
||||
icon="📊",
|
||||
default=False,
|
||||
)
|
||||
native_evaluation_page = st.Page(
|
||||
"page/evaluations/native_eval.py",
|
||||
title="Evaluations (Generation + Scoring)",
|
||||
icon="📊",
|
||||
default=False,
|
||||
)
|
||||
|
||||
available_models = EVALUATION_API.list_models()
|
||||
available_models = [m.identifier for m in available_models]
|
||||
# Playground pages
|
||||
chat_page = st.Page(
|
||||
"page/playground/chat.py", title="Chat", icon="💬", default=True
|
||||
)
|
||||
rag_page = st.Page("page/playground/rag.py", title="RAG", icon="💬", default=False)
|
||||
|
||||
scoring_params = {}
|
||||
if selected_scoring_functions:
|
||||
st.write("Selected:")
|
||||
for scoring_fn_id in selected_scoring_functions:
|
||||
scoring_fn = scoring_functions[scoring_fn_id]
|
||||
st.write(f"- **{scoring_fn_id}**: {scoring_fn.description}")
|
||||
new_params = None
|
||||
if scoring_fn.params:
|
||||
new_params = {}
|
||||
for param_name, param_value in scoring_fn.params.to_dict().items():
|
||||
if param_name == "type":
|
||||
new_params[param_name] = param_value
|
||||
continue
|
||||
# Distribution pages
|
||||
resources_page = st.Page(
|
||||
"page/distribution/resources.py", title="Resources", icon="🔍", default=False
|
||||
)
|
||||
provider_page = st.Page(
|
||||
"page/distribution/providers.py",
|
||||
title="API Providers",
|
||||
icon="🔍",
|
||||
default=False,
|
||||
)
|
||||
|
||||
if param_name == "judge_model":
|
||||
value = st.selectbox(
|
||||
f"Select **{param_name}** for {scoring_fn_id}",
|
||||
options=available_models,
|
||||
index=0,
|
||||
key=f"{scoring_fn_id}_{param_name}",
|
||||
)
|
||||
new_params[param_name] = value
|
||||
else:
|
||||
value = st.text_area(
|
||||
f"Enter value for **{param_name}** in {scoring_fn_id} in valid JSON format",
|
||||
value=json.dumps(param_value, indent=2),
|
||||
height=80,
|
||||
)
|
||||
try:
|
||||
new_params[param_name] = json.loads(value)
|
||||
except json.JSONDecodeError:
|
||||
st.error(
|
||||
f"Invalid JSON for **{param_name}** in {scoring_fn_id}"
|
||||
)
|
||||
|
||||
st.json(new_params)
|
||||
scoring_params[scoring_fn_id] = new_params
|
||||
|
||||
# Add run evaluation button & slider
|
||||
total_rows = len(df)
|
||||
num_rows = st.slider("Number of rows to evaluate", 1, total_rows, total_rows)
|
||||
|
||||
if st.button("Run Evaluation"):
|
||||
progress_text = "Running evaluation..."
|
||||
progress_bar = st.progress(0, text=progress_text)
|
||||
rows = df.to_dict(orient="records")
|
||||
if num_rows < total_rows:
|
||||
rows = rows[:num_rows]
|
||||
|
||||
# Create separate containers for progress text and results
|
||||
progress_text_container = st.empty()
|
||||
results_container = st.empty()
|
||||
output_res = {}
|
||||
for i, r in enumerate(rows):
|
||||
# Update progress
|
||||
progress = i / len(rows)
|
||||
progress_bar.progress(progress, text=progress_text)
|
||||
|
||||
# Run evaluation for current row
|
||||
score_res = EVALUATION_API.run_scoring(
|
||||
r,
|
||||
scoring_function_ids=selected_scoring_functions,
|
||||
scoring_params=scoring_params,
|
||||
)
|
||||
|
||||
for k in r.keys():
|
||||
if k not in output_res:
|
||||
output_res[k] = []
|
||||
output_res[k].append(r[k])
|
||||
|
||||
for fn_id in selected_scoring_functions:
|
||||
if fn_id not in output_res:
|
||||
output_res[fn_id] = []
|
||||
output_res[fn_id].append(score_res.results[fn_id].score_rows[0])
|
||||
|
||||
# Display current row results using separate containers
|
||||
progress_text_container.write(
|
||||
f"Expand to see current processed result ({i+1}/{len(rows)})"
|
||||
)
|
||||
results_container.json(
|
||||
score_res.to_json(),
|
||||
expanded=2,
|
||||
)
|
||||
|
||||
progress_bar.progress(1.0, text="Evaluation complete!")
|
||||
|
||||
# Display results in dataframe
|
||||
if output_res:
|
||||
output_df = pd.DataFrame(output_res)
|
||||
st.subheader("Evaluation Results")
|
||||
st.dataframe(output_df)
|
||||
pg = st.navigation(
|
||||
{
|
||||
"Playground": [
|
||||
chat_page,
|
||||
rag_page,
|
||||
application_evaluation_page,
|
||||
native_evaluation_page,
|
||||
],
|
||||
"Inspect": [provider_page, resources_page],
|
||||
},
|
||||
expanded=False,
|
||||
)
|
||||
pg.run()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ from typing import Optional
|
|||
from llama_stack_client import LlamaStackClient
|
||||
|
||||
|
||||
class LlamaStackEvaluation:
|
||||
class LlamaStackApi:
|
||||
def __init__(self):
|
||||
self.client = LlamaStackClient(
|
||||
base_url=os.environ.get("LLAMA_STACK_ENDPOINT", "http://localhost:5000"),
|
||||
|
|
@ -22,14 +22,6 @@ class LlamaStackEvaluation:
|
|||
},
|
||||
)
|
||||
|
||||
def list_scoring_functions(self):
|
||||
"""List all available scoring functions"""
|
||||
return self.client.scoring_functions.list()
|
||||
|
||||
def list_models(self):
|
||||
"""List all available judge models"""
|
||||
return self.client.models.list()
|
||||
|
||||
def run_scoring(
|
||||
self, row, scoring_function_ids: list[str], scoring_params: Optional[dict]
|
||||
):
|
||||
|
|
@ -39,3 +31,6 @@ class LlamaStackEvaluation:
|
|||
return self.client.scoring.score(
|
||||
input_rows=[row], scoring_functions=scoring_params
|
||||
)
|
||||
|
||||
|
||||
llama_stack_api = LlamaStackApi()
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import base64
|
||||
import os
|
||||
|
||||
import pandas as pd
|
||||
|
|
@ -29,3 +30,13 @@ def process_dataset(file):
|
|||
except Exception as e:
|
||||
st.error(f"Error processing file: {str(e)}")
|
||||
return None
|
||||
|
||||
|
||||
def data_url_from_file(file) -> str:
|
||||
file_content = file.getvalue()
|
||||
base64_content = base64.b64encode(file_content).decode("utf-8")
|
||||
mime_type = file.type
|
||||
|
||||
data_url = f"data:{mime_type};base64,{base64_content}"
|
||||
|
||||
return data_url
|
||||
|
|
|
|||
5
llama_stack/distribution/ui/page/__init__.py
Normal file
5
llama_stack/distribution/ui/page/__init__.py
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
19
llama_stack/distribution/ui/page/distribution/datasets.py
Normal file
19
llama_stack/distribution/ui/page/distribution/datasets.py
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import streamlit as st
|
||||
from modules.api import llama_stack_api
|
||||
|
||||
|
||||
def datasets():
|
||||
st.header("Datasets")
|
||||
|
||||
datasets_info = {
|
||||
d.identifier: d.to_dict() for d in llama_stack_api.client.datasets.list()
|
||||
}
|
||||
|
||||
selected_dataset = st.selectbox("Select a dataset", list(datasets_info.keys()))
|
||||
st.json(datasets_info[selected_dataset], expanded=True)
|
||||
22
llama_stack/distribution/ui/page/distribution/eval_tasks.py
Normal file
22
llama_stack/distribution/ui/page/distribution/eval_tasks.py
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import streamlit as st
|
||||
from modules.api import llama_stack_api
|
||||
|
||||
|
||||
def eval_tasks():
|
||||
# Eval Tasks Section
|
||||
st.header("Eval Tasks")
|
||||
|
||||
eval_tasks_info = {
|
||||
d.identifier: d.to_dict() for d in llama_stack_api.client.eval_tasks.list()
|
||||
}
|
||||
|
||||
selected_eval_task = st.selectbox(
|
||||
"Select an eval task", list(eval_tasks_info.keys()), key="eval_task_inspect"
|
||||
)
|
||||
st.json(eval_tasks_info[selected_eval_task], expanded=True)
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import streamlit as st
|
||||
from modules.api import llama_stack_api
|
||||
|
||||
|
||||
def memory_banks():
|
||||
st.header("Memory Banks")
|
||||
memory_banks_info = {
|
||||
m.identifier: m.to_dict() for m in llama_stack_api.client.memory_banks.list()
|
||||
}
|
||||
|
||||
if len(memory_banks_info) > 0:
|
||||
selected_memory_bank = st.selectbox(
|
||||
"Select a memory bank", list(memory_banks_info.keys())
|
||||
)
|
||||
st.json(memory_banks_info[selected_memory_bank])
|
||||
else:
|
||||
st.info("No memory banks found")
|
||||
19
llama_stack/distribution/ui/page/distribution/models.py
Normal file
19
llama_stack/distribution/ui/page/distribution/models.py
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import streamlit as st
|
||||
from modules.api import llama_stack_api
|
||||
|
||||
|
||||
def models():
|
||||
# Models Section
|
||||
st.header("Models")
|
||||
models_info = {
|
||||
m.identifier: m.to_dict() for m in llama_stack_api.client.models.list()
|
||||
}
|
||||
|
||||
selected_model = st.selectbox("Select a model", list(models_info.keys()))
|
||||
st.json(models_info[selected_model])
|
||||
20
llama_stack/distribution/ui/page/distribution/providers.py
Normal file
20
llama_stack/distribution/ui/page/distribution/providers.py
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import streamlit as st
|
||||
from modules.api import llama_stack_api
|
||||
|
||||
|
||||
def providers():
|
||||
st.header("🔍 API Providers")
|
||||
apis_providers_info = llama_stack_api.client.providers.list()
|
||||
# selected_api = st.selectbox("Select an API", list(apis_providers_info.keys()))
|
||||
for api in apis_providers_info.keys():
|
||||
st.markdown(f"###### {api}")
|
||||
st.dataframe([p.to_dict() for p in apis_providers_info[api]], width=500)
|
||||
|
||||
|
||||
providers()
|
||||
52
llama_stack/distribution/ui/page/distribution/resources.py
Normal file
52
llama_stack/distribution/ui/page/distribution/resources.py
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from page.distribution.datasets import datasets
|
||||
from page.distribution.eval_tasks import eval_tasks
|
||||
from page.distribution.memory_banks import memory_banks
|
||||
from page.distribution.models import models
|
||||
from page.distribution.scoring_functions import scoring_functions
|
||||
from page.distribution.shields import shields
|
||||
|
||||
from streamlit_option_menu import option_menu
|
||||
|
||||
|
||||
def resources_page():
|
||||
options = [
|
||||
"Models",
|
||||
"Memory Banks",
|
||||
"Shields",
|
||||
"Scoring Functions",
|
||||
"Datasets",
|
||||
"Eval Tasks",
|
||||
]
|
||||
icons = ["magic", "memory", "shield", "file-bar-graph", "database", "list-task"]
|
||||
selected_resource = option_menu(
|
||||
None,
|
||||
options,
|
||||
icons=icons,
|
||||
orientation="horizontal",
|
||||
styles={
|
||||
"nav-link": {
|
||||
"font-size": "12px",
|
||||
},
|
||||
},
|
||||
)
|
||||
if selected_resource == "Eval Tasks":
|
||||
eval_tasks()
|
||||
elif selected_resource == "Memory Banks":
|
||||
memory_banks()
|
||||
elif selected_resource == "Datasets":
|
||||
datasets()
|
||||
elif selected_resource == "Models":
|
||||
models()
|
||||
elif selected_resource == "Scoring Functions":
|
||||
scoring_functions()
|
||||
elif selected_resource == "Shields":
|
||||
shields()
|
||||
|
||||
|
||||
resources_page()
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import streamlit as st
|
||||
from modules.api import llama_stack_api
|
||||
|
||||
|
||||
def scoring_functions():
|
||||
st.header("Scoring Functions")
|
||||
|
||||
scoring_functions_info = {
|
||||
s.identifier: s.to_dict()
|
||||
for s in llama_stack_api.client.scoring_functions.list()
|
||||
}
|
||||
|
||||
selected_scoring_function = st.selectbox(
|
||||
"Select a scoring function", list(scoring_functions_info.keys())
|
||||
)
|
||||
st.json(scoring_functions_info[selected_scoring_function], expanded=True)
|
||||
20
llama_stack/distribution/ui/page/distribution/shields.py
Normal file
20
llama_stack/distribution/ui/page/distribution/shields.py
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import streamlit as st
|
||||
from modules.api import llama_stack_api
|
||||
|
||||
|
||||
def shields():
|
||||
# Shields Section
|
||||
st.header("Shields")
|
||||
|
||||
shields_info = {
|
||||
s.identifier: s.to_dict() for s in llama_stack_api.client.shields.list()
|
||||
}
|
||||
|
||||
selected_shield = st.selectbox("Select a shield", list(shields_info.keys()))
|
||||
st.json(shields_info[selected_shield])
|
||||
5
llama_stack/distribution/ui/page/evaluations/__init__.py
Normal file
5
llama_stack/distribution/ui/page/evaluations/__init__.py
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
148
llama_stack/distribution/ui/page/evaluations/app_eval.py
Normal file
148
llama_stack/distribution/ui/page/evaluations/app_eval.py
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import json
|
||||
|
||||
import pandas as pd
|
||||
import streamlit as st
|
||||
|
||||
from modules.api import llama_stack_api
|
||||
from modules.utils import process_dataset
|
||||
|
||||
|
||||
def application_evaluation_page():
|
||||
|
||||
st.set_page_config(page_title="Evaluations (Scoring)", page_icon="🦙")
|
||||
st.title("📊 Evaluations (Scoring)")
|
||||
|
||||
# File uploader
|
||||
uploaded_file = st.file_uploader("Upload Dataset", type=["csv", "xlsx", "xls"])
|
||||
|
||||
if uploaded_file is None:
|
||||
st.error("No file uploaded")
|
||||
return
|
||||
|
||||
# Process uploaded file
|
||||
df = process_dataset(uploaded_file)
|
||||
if df is None:
|
||||
st.error("Error processing file")
|
||||
return
|
||||
|
||||
# Display dataset information
|
||||
st.success("Dataset loaded successfully!")
|
||||
|
||||
# Display dataframe preview
|
||||
st.subheader("Dataset Preview")
|
||||
st.dataframe(df)
|
||||
|
||||
# Select Scoring Functions to Run Evaluation On
|
||||
st.subheader("Select Scoring Functions")
|
||||
scoring_functions = llama_stack_api.client.scoring_functions.list()
|
||||
scoring_functions = {sf.identifier: sf for sf in scoring_functions}
|
||||
scoring_functions_names = list(scoring_functions.keys())
|
||||
selected_scoring_functions = st.multiselect(
|
||||
"Choose one or more scoring functions",
|
||||
options=scoring_functions_names,
|
||||
help="Choose one or more scoring functions.",
|
||||
)
|
||||
|
||||
available_models = llama_stack_api.client.models.list()
|
||||
available_models = [m.identifier for m in available_models]
|
||||
|
||||
scoring_params = {}
|
||||
if selected_scoring_functions:
|
||||
st.write("Selected:")
|
||||
for scoring_fn_id in selected_scoring_functions:
|
||||
scoring_fn = scoring_functions[scoring_fn_id]
|
||||
st.write(f"- **{scoring_fn_id}**: {scoring_fn.description}")
|
||||
new_params = None
|
||||
if scoring_fn.params:
|
||||
new_params = {}
|
||||
for param_name, param_value in scoring_fn.params.to_dict().items():
|
||||
if param_name == "type":
|
||||
new_params[param_name] = param_value
|
||||
continue
|
||||
|
||||
if param_name == "judge_model":
|
||||
value = st.selectbox(
|
||||
f"Select **{param_name}** for {scoring_fn_id}",
|
||||
options=available_models,
|
||||
index=0,
|
||||
key=f"{scoring_fn_id}_{param_name}",
|
||||
)
|
||||
new_params[param_name] = value
|
||||
else:
|
||||
value = st.text_area(
|
||||
f"Enter value for **{param_name}** in {scoring_fn_id} in valid JSON format",
|
||||
value=json.dumps(param_value, indent=2),
|
||||
height=80,
|
||||
)
|
||||
try:
|
||||
new_params[param_name] = json.loads(value)
|
||||
except json.JSONDecodeError:
|
||||
st.error(
|
||||
f"Invalid JSON for **{param_name}** in {scoring_fn_id}"
|
||||
)
|
||||
|
||||
st.json(new_params)
|
||||
scoring_params[scoring_fn_id] = new_params
|
||||
|
||||
# Add run evaluation button & slider
|
||||
total_rows = len(df)
|
||||
num_rows = st.slider("Number of rows to evaluate", 1, total_rows, total_rows)
|
||||
|
||||
if st.button("Run Evaluation"):
|
||||
progress_text = "Running evaluation..."
|
||||
progress_bar = st.progress(0, text=progress_text)
|
||||
rows = df.to_dict(orient="records")
|
||||
if num_rows < total_rows:
|
||||
rows = rows[:num_rows]
|
||||
|
||||
# Create separate containers for progress text and results
|
||||
progress_text_container = st.empty()
|
||||
results_container = st.empty()
|
||||
output_res = {}
|
||||
for i, r in enumerate(rows):
|
||||
# Update progress
|
||||
progress = i / len(rows)
|
||||
progress_bar.progress(progress, text=progress_text)
|
||||
|
||||
# Run evaluation for current row
|
||||
score_res = llama_stack_api.run_scoring(
|
||||
r,
|
||||
scoring_function_ids=selected_scoring_functions,
|
||||
scoring_params=scoring_params,
|
||||
)
|
||||
|
||||
for k in r.keys():
|
||||
if k not in output_res:
|
||||
output_res[k] = []
|
||||
output_res[k].append(r[k])
|
||||
|
||||
for fn_id in selected_scoring_functions:
|
||||
if fn_id not in output_res:
|
||||
output_res[fn_id] = []
|
||||
output_res[fn_id].append(score_res.results[fn_id].score_rows[0])
|
||||
|
||||
# Display current row results using separate containers
|
||||
progress_text_container.write(
|
||||
f"Expand to see current processed result ({i+1}/{len(rows)})"
|
||||
)
|
||||
results_container.json(
|
||||
score_res.to_json(),
|
||||
expanded=2,
|
||||
)
|
||||
|
||||
progress_bar.progress(1.0, text="Evaluation complete!")
|
||||
|
||||
# Display results in dataframe
|
||||
if output_res:
|
||||
output_df = pd.DataFrame(output_res)
|
||||
st.subheader("Evaluation Results")
|
||||
st.dataframe(output_df)
|
||||
|
||||
|
||||
application_evaluation_page()
|
||||
257
llama_stack/distribution/ui/page/evaluations/native_eval.py
Normal file
257
llama_stack/distribution/ui/page/evaluations/native_eval.py
Normal file
|
|
@ -0,0 +1,257 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import json
|
||||
|
||||
import pandas as pd
|
||||
|
||||
import streamlit as st
|
||||
|
||||
from modules.api import llama_stack_api
|
||||
|
||||
|
||||
def select_eval_task_1():
|
||||
# Select Eval Tasks
|
||||
st.subheader("1. Choose An Eval Task")
|
||||
eval_tasks = llama_stack_api.client.eval_tasks.list()
|
||||
eval_tasks = {et.identifier: et for et in eval_tasks}
|
||||
eval_tasks_names = list(eval_tasks.keys())
|
||||
selected_eval_task = st.selectbox(
|
||||
"Choose an eval task.",
|
||||
options=eval_tasks_names,
|
||||
help="Choose an eval task. Each eval task is parameterized by a dataset, and list of scoring functions.",
|
||||
)
|
||||
with st.expander("View Eval Task"):
|
||||
st.json(eval_tasks[selected_eval_task], expanded=True)
|
||||
|
||||
st.session_state["selected_eval_task"] = selected_eval_task
|
||||
st.session_state["eval_tasks"] = eval_tasks
|
||||
if st.button("Confirm", key="confirm_1"):
|
||||
st.session_state["selected_eval_task_1_next"] = True
|
||||
|
||||
|
||||
def define_eval_candidate_2():
|
||||
if not st.session_state.get("selected_eval_task_1_next", None):
|
||||
return
|
||||
|
||||
st.subheader("2. Define Eval Candidate")
|
||||
st.info(
|
||||
"""
|
||||
Define the configurations for the evaluation candidate model or agent used for generation.
|
||||
Select "model" if you want to run generation with inference API, or "agent" if you want to run generation with agent API through specifying AgentConfig.
|
||||
"""
|
||||
)
|
||||
with st.expander("Define Eval Candidate", expanded=True):
|
||||
# Define Eval Candidate
|
||||
candidate_type = st.radio("Candidate Type", ["model", "agent"])
|
||||
|
||||
available_models = llama_stack_api.client.models.list()
|
||||
available_models = [model.identifier for model in available_models]
|
||||
selected_model = st.selectbox(
|
||||
"Choose a model",
|
||||
available_models,
|
||||
index=0,
|
||||
)
|
||||
|
||||
# Sampling Parameters
|
||||
st.markdown("##### Sampling Parameters")
|
||||
strategy = st.selectbox(
|
||||
"Strategy",
|
||||
["greedy", "top_p", "top_k"],
|
||||
index=0,
|
||||
)
|
||||
temperature = st.slider(
|
||||
"Temperature",
|
||||
min_value=0.0,
|
||||
max_value=1.0,
|
||||
value=0.0,
|
||||
step=0.1,
|
||||
help="Controls the randomness of the response. Higher values make the output more creative and unexpected, lower values make it more conservative and predictable",
|
||||
)
|
||||
top_p = st.slider(
|
||||
"Top P",
|
||||
min_value=0.0,
|
||||
max_value=1.0,
|
||||
value=0.95,
|
||||
step=0.1,
|
||||
)
|
||||
max_tokens = st.slider(
|
||||
"Max Tokens",
|
||||
min_value=0,
|
||||
max_value=4096,
|
||||
value=512,
|
||||
step=1,
|
||||
help="The maximum number of tokens to generate",
|
||||
)
|
||||
repetition_penalty = st.slider(
|
||||
"Repetition Penalty",
|
||||
min_value=1.0,
|
||||
max_value=2.0,
|
||||
value=1.0,
|
||||
step=0.1,
|
||||
help="Controls the likelihood for generating the same word or phrase multiple times in the same sentence or paragraph. 1 implies no penalty, 2 will strongly discourage model to repeat words or phrases.",
|
||||
)
|
||||
if candidate_type == "model":
|
||||
eval_candidate = {
|
||||
"type": "model",
|
||||
"model": selected_model,
|
||||
"sampling_params": {
|
||||
"strategy": strategy,
|
||||
"temperature": temperature,
|
||||
"top_p": top_p,
|
||||
"max_tokens": max_tokens,
|
||||
"repetition_penalty": repetition_penalty,
|
||||
},
|
||||
}
|
||||
elif candidate_type == "agent":
|
||||
system_prompt = st.text_area(
|
||||
"System Prompt",
|
||||
value="You are a helpful AI assistant.",
|
||||
help="Initial instructions given to the AI to set its behavior and context",
|
||||
)
|
||||
tools_json = st.text_area(
|
||||
"Tools Configuration (JSON)",
|
||||
value=json.dumps(
|
||||
[
|
||||
{
|
||||
"type": "brave_search",
|
||||
"engine": "brave",
|
||||
"api_key": "ENTER_BRAVE_API_KEY_HERE",
|
||||
}
|
||||
]
|
||||
),
|
||||
help="Enter tool configurations in JSON format. Each tool should have a name, description, and parameters.",
|
||||
height=200,
|
||||
)
|
||||
try:
|
||||
tools = json.loads(tools_json)
|
||||
except json.JSONDecodeError:
|
||||
st.error("Invalid JSON format for tools configuration")
|
||||
tools = []
|
||||
eval_candidate = {
|
||||
"type": "agent",
|
||||
"config": {
|
||||
"model": selected_model,
|
||||
"instructions": system_prompt,
|
||||
"tools": tools,
|
||||
"tool_choice": "auto",
|
||||
"tool_prompt_format": "json",
|
||||
"input_shields": [],
|
||||
"output_shields": [],
|
||||
"enable_session_persistence": False,
|
||||
},
|
||||
}
|
||||
st.session_state["eval_candidate"] = eval_candidate
|
||||
|
||||
if st.button("Confirm", key="confirm_2"):
|
||||
st.session_state["selected_eval_candidate_2_next"] = True
|
||||
|
||||
|
||||
def run_evaluation_3():
|
||||
if not st.session_state.get("selected_eval_candidate_2_next", None):
|
||||
return
|
||||
|
||||
st.subheader("3. Run Evaluation")
|
||||
# Add info box to explain configurations being used
|
||||
st.info(
|
||||
"""
|
||||
Review the configurations that will be used for this evaluation run, make any necessary changes, and then click the "Run Evaluation" button.
|
||||
"""
|
||||
)
|
||||
selected_eval_task = st.session_state["selected_eval_task"]
|
||||
eval_tasks = st.session_state["eval_tasks"]
|
||||
eval_candidate = st.session_state["eval_candidate"]
|
||||
|
||||
dataset_id = eval_tasks[selected_eval_task].dataset_id
|
||||
rows = llama_stack_api.client.datasetio.get_rows_paginated(
|
||||
dataset_id=dataset_id,
|
||||
rows_in_page=-1,
|
||||
)
|
||||
total_rows = len(rows.rows)
|
||||
# Add number of examples control
|
||||
num_rows = st.number_input(
|
||||
"Number of Examples to Evaluate",
|
||||
min_value=1,
|
||||
max_value=total_rows,
|
||||
value=5,
|
||||
help="Number of examples from the dataset to evaluate. ",
|
||||
)
|
||||
|
||||
eval_task_config = {
|
||||
"type": "benchmark",
|
||||
"eval_candidate": eval_candidate,
|
||||
"scoring_params": {},
|
||||
}
|
||||
|
||||
with st.expander("View Evaluation Task", expanded=True):
|
||||
st.json(eval_tasks[selected_eval_task], expanded=True)
|
||||
with st.expander("View Evaluation Task Configuration", expanded=True):
|
||||
st.json(eval_task_config, expanded=True)
|
||||
|
||||
# Add run button and handle evaluation
|
||||
if st.button("Run Evaluation"):
|
||||
|
||||
progress_text = "Running evaluation..."
|
||||
progress_bar = st.progress(0, text=progress_text)
|
||||
rows = rows.rows
|
||||
if num_rows < total_rows:
|
||||
rows = rows[:num_rows]
|
||||
|
||||
# Create separate containers for progress text and results
|
||||
progress_text_container = st.empty()
|
||||
results_container = st.empty()
|
||||
output_res = {}
|
||||
for i, r in enumerate(rows):
|
||||
# Update progress
|
||||
progress = i / len(rows)
|
||||
progress_bar.progress(progress, text=progress_text)
|
||||
# Run evaluation for current row
|
||||
eval_res = llama_stack_api.client.eval.evaluate_rows(
|
||||
task_id=selected_eval_task,
|
||||
input_rows=[r],
|
||||
scoring_functions=eval_tasks[selected_eval_task].scoring_functions,
|
||||
task_config=eval_task_config,
|
||||
)
|
||||
|
||||
for k in r.keys():
|
||||
if k not in output_res:
|
||||
output_res[k] = []
|
||||
output_res[k].append(r[k])
|
||||
|
||||
for k in eval_res.generations[0].keys():
|
||||
if k not in output_res:
|
||||
output_res[k] = []
|
||||
output_res[k].append(eval_res.generations[0][k])
|
||||
|
||||
for scoring_fn in eval_tasks[selected_eval_task].scoring_functions:
|
||||
if scoring_fn not in output_res:
|
||||
output_res[scoring_fn] = []
|
||||
output_res[scoring_fn].append(eval_res.scores[scoring_fn].score_rows[0])
|
||||
|
||||
progress_text_container.write(
|
||||
f"Expand to see current processed result ({i+1}/{len(rows)})"
|
||||
)
|
||||
results_container.json(eval_res, expanded=2)
|
||||
|
||||
progress_bar.progress(1.0, text="Evaluation complete!")
|
||||
# Display results in dataframe
|
||||
if output_res:
|
||||
output_df = pd.DataFrame(output_res)
|
||||
st.subheader("Evaluation Results")
|
||||
st.dataframe(output_df)
|
||||
|
||||
|
||||
def native_evaluation_page():
|
||||
|
||||
st.set_page_config(page_title="Evaluations (Generation + Scoring)", page_icon="🦙")
|
||||
st.title("📊 Evaluations (Generation + Scoring)")
|
||||
|
||||
select_eval_task_1()
|
||||
define_eval_candidate_2()
|
||||
run_evaluation_3()
|
||||
|
||||
|
||||
native_evaluation_page()
|
||||
5
llama_stack/distribution/ui/page/playground/__init__.py
Normal file
5
llama_stack/distribution/ui/page/playground/__init__.py
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
123
llama_stack/distribution/ui/page/playground/chat.py
Normal file
123
llama_stack/distribution/ui/page/playground/chat.py
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import streamlit as st
|
||||
from modules.api import llama_stack_api
|
||||
|
||||
# Sidebar configurations
|
||||
with st.sidebar:
|
||||
st.header("Configuration")
|
||||
available_models = llama_stack_api.client.models.list()
|
||||
available_models = [model.identifier for model in available_models]
|
||||
selected_model = st.selectbox(
|
||||
"Choose a model",
|
||||
available_models,
|
||||
index=0,
|
||||
)
|
||||
|
||||
temperature = st.slider(
|
||||
"Temperature",
|
||||
min_value=0.0,
|
||||
max_value=1.0,
|
||||
value=0.0,
|
||||
step=0.1,
|
||||
help="Controls the randomness of the response. Higher values make the output more creative and unexpected, lower values make it more conservative and predictable",
|
||||
)
|
||||
|
||||
top_p = st.slider(
|
||||
"Top P",
|
||||
min_value=0.0,
|
||||
max_value=1.0,
|
||||
value=0.95,
|
||||
step=0.1,
|
||||
)
|
||||
|
||||
max_tokens = st.slider(
|
||||
"Max Tokens",
|
||||
min_value=0,
|
||||
max_value=4096,
|
||||
value=512,
|
||||
step=1,
|
||||
help="The maximum number of tokens to generate",
|
||||
)
|
||||
|
||||
repetition_penalty = st.slider(
|
||||
"Repetition Penalty",
|
||||
min_value=1.0,
|
||||
max_value=2.0,
|
||||
value=1.0,
|
||||
step=0.1,
|
||||
help="Controls the likelihood for generating the same word or phrase multiple times in the same sentence or paragraph. 1 implies no penalty, 2 will strongly discourage model to repeat words or phrases.",
|
||||
)
|
||||
|
||||
stream = st.checkbox("Stream", value=True)
|
||||
system_prompt = st.text_area(
|
||||
"System Prompt",
|
||||
value="You are a helpful AI assistant.",
|
||||
help="Initial instructions given to the AI to set its behavior and context",
|
||||
)
|
||||
|
||||
# Add clear chat button to sidebar
|
||||
if st.button("Clear Chat", use_container_width=True):
|
||||
st.session_state.messages = []
|
||||
st.rerun()
|
||||
|
||||
|
||||
# Main chat interface
|
||||
st.title("🦙 Chat")
|
||||
|
||||
|
||||
# Initialize chat history
|
||||
if "messages" not in st.session_state:
|
||||
st.session_state.messages = []
|
||||
|
||||
# Display chat messages
|
||||
for message in st.session_state.messages:
|
||||
with st.chat_message(message["role"]):
|
||||
st.markdown(message["content"])
|
||||
|
||||
# Chat input
|
||||
if prompt := st.chat_input("Example: What is Llama Stack?"):
|
||||
# Add user message to chat history
|
||||
st.session_state.messages.append({"role": "user", "content": prompt})
|
||||
|
||||
# Display user message
|
||||
with st.chat_message("user"):
|
||||
st.markdown(prompt)
|
||||
|
||||
# Display assistant response
|
||||
with st.chat_message("assistant"):
|
||||
message_placeholder = st.empty()
|
||||
full_response = ""
|
||||
|
||||
response = llama_stack_api.client.inference.chat_completion(
|
||||
messages=[
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
model_id=selected_model,
|
||||
stream=stream,
|
||||
sampling_params={
|
||||
"temperature": temperature,
|
||||
"top_p": top_p,
|
||||
"max_tokens": max_tokens,
|
||||
"repetition_penalty": repetition_penalty,
|
||||
},
|
||||
)
|
||||
|
||||
if stream:
|
||||
for chunk in response:
|
||||
if chunk.event.event_type == "progress":
|
||||
full_response += chunk.event.delta
|
||||
message_placeholder.markdown(full_response + "▌")
|
||||
message_placeholder.markdown(full_response)
|
||||
else:
|
||||
full_response = response
|
||||
message_placeholder.markdown(full_response.completion_message.content)
|
||||
|
||||
st.session_state.messages.append(
|
||||
{"role": "assistant", "content": full_response}
|
||||
)
|
||||
188
llama_stack/distribution/ui/page/playground/rag.py
Normal file
188
llama_stack/distribution/ui/page/playground/rag.py
Normal file
|
|
@ -0,0 +1,188 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import streamlit as st
|
||||
from llama_stack_client.lib.agents.agent import Agent
|
||||
from llama_stack_client.lib.agents.event_logger import EventLogger
|
||||
from llama_stack_client.types.agent_create_params import AgentConfig
|
||||
from llama_stack_client.types.memory_insert_params import Document
|
||||
|
||||
from modules.api import llama_stack_api
|
||||
from modules.utils import data_url_from_file
|
||||
|
||||
|
||||
def rag_chat_page():
|
||||
st.title("🦙 RAG")
|
||||
|
||||
with st.sidebar:
|
||||
# File/Directory Upload Section
|
||||
st.subheader("Upload Documents")
|
||||
uploaded_files = st.file_uploader(
|
||||
"Upload file(s) or directory",
|
||||
accept_multiple_files=True,
|
||||
type=["txt", "pdf", "doc", "docx"], # Add more file types as needed
|
||||
)
|
||||
# Process uploaded files
|
||||
if uploaded_files:
|
||||
st.success(f"Successfully uploaded {len(uploaded_files)} files")
|
||||
# Add memory bank name input field
|
||||
memory_bank_name = st.text_input(
|
||||
"Memory Bank Name",
|
||||
value="rag_bank",
|
||||
help="Enter a unique identifier for this memory bank",
|
||||
)
|
||||
if st.button("Create Memory Bank"):
|
||||
documents = [
|
||||
Document(
|
||||
document_id=uploaded_file.name,
|
||||
content=data_url_from_file(uploaded_file),
|
||||
)
|
||||
for i, uploaded_file in enumerate(uploaded_files)
|
||||
]
|
||||
|
||||
providers = llama_stack_api.client.providers.list()
|
||||
llama_stack_api.client.memory_banks.register(
|
||||
memory_bank_id=memory_bank_name, # Use the user-provided name
|
||||
params={
|
||||
"embedding_model": "all-MiniLM-L6-v2",
|
||||
"chunk_size_in_tokens": 512,
|
||||
"overlap_size_in_tokens": 64,
|
||||
},
|
||||
provider_id=providers["memory"][0].provider_id,
|
||||
)
|
||||
|
||||
# insert documents using the custom bank name
|
||||
llama_stack_api.client.memory.insert(
|
||||
bank_id=memory_bank_name, # Use the user-provided name
|
||||
documents=documents,
|
||||
)
|
||||
st.success("Memory bank created successfully!")
|
||||
|
||||
st.subheader("Configure Agent")
|
||||
# select memory banks
|
||||
memory_banks = llama_stack_api.client.memory_banks.list()
|
||||
memory_banks = [bank.identifier for bank in memory_banks]
|
||||
selected_memory_banks = st.multiselect(
|
||||
"Select Memory Banks",
|
||||
memory_banks,
|
||||
)
|
||||
memory_bank_configs = [
|
||||
{"bank_id": bank_id, "type": "vector"} for bank_id in selected_memory_banks
|
||||
]
|
||||
|
||||
available_models = llama_stack_api.client.models.list()
|
||||
available_models = [model.identifier for model in available_models]
|
||||
selected_model = st.selectbox(
|
||||
"Choose a model",
|
||||
available_models,
|
||||
index=0,
|
||||
)
|
||||
system_prompt = st.text_area(
|
||||
"System Prompt",
|
||||
value="You are a helpful assistant. ",
|
||||
help="Initial instructions given to the AI to set its behavior and context",
|
||||
)
|
||||
temperature = st.slider(
|
||||
"Temperature",
|
||||
min_value=0.0,
|
||||
max_value=1.0,
|
||||
value=0.0,
|
||||
step=0.1,
|
||||
help="Controls the randomness of the response. Higher values make the output more creative and unexpected, lower values make it more conservative and predictable",
|
||||
)
|
||||
|
||||
top_p = st.slider(
|
||||
"Top P",
|
||||
min_value=0.0,
|
||||
max_value=1.0,
|
||||
value=0.95,
|
||||
step=0.1,
|
||||
)
|
||||
|
||||
# Add clear chat button to sidebar
|
||||
if st.button("Clear Chat", use_container_width=True):
|
||||
st.session_state.messages = []
|
||||
st.rerun()
|
||||
|
||||
# Chat Interface
|
||||
if "messages" not in st.session_state:
|
||||
st.session_state.messages = []
|
||||
|
||||
# Display chat history
|
||||
for message in st.session_state.messages:
|
||||
with st.chat_message(message["role"]):
|
||||
st.markdown(message["content"])
|
||||
|
||||
selected_model = llama_stack_api.client.models.list()[0].identifier
|
||||
|
||||
agent_config = AgentConfig(
|
||||
model=selected_model,
|
||||
instructions=system_prompt,
|
||||
sampling_params={
|
||||
"strategy": "greedy",
|
||||
"temperature": temperature,
|
||||
"top_p": top_p,
|
||||
},
|
||||
tools=[
|
||||
{
|
||||
"type": "memory",
|
||||
"memory_bank_configs": memory_bank_configs,
|
||||
"query_generator_config": {"type": "default", "sep": " "},
|
||||
"max_tokens_in_context": 4096,
|
||||
"max_chunks": 10,
|
||||
}
|
||||
],
|
||||
tool_choice="auto",
|
||||
tool_prompt_format="json",
|
||||
input_shields=[],
|
||||
output_shields=[],
|
||||
enable_session_persistence=False,
|
||||
)
|
||||
|
||||
agent = Agent(llama_stack_api.client, agent_config)
|
||||
session_id = agent.create_session("rag-session")
|
||||
|
||||
# Chat input
|
||||
if prompt := st.chat_input("Ask a question about your documents"):
|
||||
# Add user message to chat history
|
||||
st.session_state.messages.append({"role": "user", "content": prompt})
|
||||
|
||||
# Display user message
|
||||
with st.chat_message("user"):
|
||||
st.markdown(prompt)
|
||||
|
||||
response = agent.create_turn(
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": prompt,
|
||||
}
|
||||
],
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
# Display assistant response
|
||||
with st.chat_message("assistant"):
|
||||
retrieval_message_placeholder = st.empty()
|
||||
message_placeholder = st.empty()
|
||||
full_response = ""
|
||||
retrieval_response = ""
|
||||
for log in EventLogger().log(response):
|
||||
log.print()
|
||||
if log.role == "memory_retrieval":
|
||||
retrieval_response += log.content.replace("====", "").strip()
|
||||
retrieval_message_placeholder.info(retrieval_response)
|
||||
else:
|
||||
full_response += log.content
|
||||
message_placeholder.markdown(full_response + "▌")
|
||||
message_placeholder.markdown(full_response)
|
||||
|
||||
st.session_state.messages.append(
|
||||
{"role": "assistant", "content": full_response}
|
||||
)
|
||||
|
||||
|
||||
rag_chat_page()
|
||||
|
|
@ -1,3 +1,4 @@
|
|||
streamlit
|
||||
pandas
|
||||
llama-stack-client>=0.0.55
|
||||
streamlit-option-menu
|
||||
|
|
|
|||
|
|
@ -53,8 +53,6 @@ class ShieldsProtocolPrivate(Protocol):
|
|||
|
||||
|
||||
class MemoryBanksProtocolPrivate(Protocol):
|
||||
async def list_memory_banks(self) -> List[MemoryBank]: ...
|
||||
|
||||
async def register_memory_bank(self, memory_bank: MemoryBank) -> None: ...
|
||||
|
||||
async def unregister_memory_bank(self, memory_bank_id: str) -> None: ...
|
||||
|
|
@ -63,6 +61,8 @@ class MemoryBanksProtocolPrivate(Protocol):
|
|||
class DatasetsProtocolPrivate(Protocol):
|
||||
async def register_dataset(self, dataset: Dataset) -> None: ...
|
||||
|
||||
async def unregister_dataset(self, dataset_id: str) -> None: ...
|
||||
|
||||
|
||||
class ScoringFunctionsProtocolPrivate(Protocol):
|
||||
async def list_scoring_functions(self) -> List[ScoringFn]: ...
|
||||
|
|
|
|||
|
|
@ -10,9 +10,7 @@ import logging
|
|||
import os
|
||||
import re
|
||||
import secrets
|
||||
import shutil
|
||||
import string
|
||||
import tempfile
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import AsyncGenerator, List, Tuple
|
||||
|
|
@ -57,6 +55,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
self,
|
||||
agent_id: str,
|
||||
agent_config: AgentConfig,
|
||||
tempdir: str,
|
||||
inference_api: Inference,
|
||||
memory_api: Memory,
|
||||
memory_banks_api: MemoryBanks,
|
||||
|
|
@ -65,14 +64,13 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
):
|
||||
self.agent_id = agent_id
|
||||
self.agent_config = agent_config
|
||||
self.tempdir = tempdir
|
||||
self.inference_api = inference_api
|
||||
self.memory_api = memory_api
|
||||
self.memory_banks_api = memory_banks_api
|
||||
self.safety_api = safety_api
|
||||
self.storage = AgentPersistence(agent_id, persistence_store)
|
||||
|
||||
self.tempdir = tempfile.mkdtemp()
|
||||
|
||||
builtin_tools = []
|
||||
for tool_defn in agent_config.tools:
|
||||
if isinstance(tool_defn, WolframAlphaToolDefinition):
|
||||
|
|
@ -103,9 +101,6 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
output_shields=agent_config.output_shields,
|
||||
)
|
||||
|
||||
def __del__(self):
|
||||
shutil.rmtree(self.tempdir)
|
||||
|
||||
def turn_to_messages(self, turn: Turn) -> List[Message]:
|
||||
messages = []
|
||||
|
||||
|
|
@ -144,87 +139,91 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
async def create_session(self, name: str) -> str:
|
||||
return await self.storage.create_session(name)
|
||||
|
||||
@tracing.span("create_and_execute_turn")
|
||||
async def create_and_execute_turn(
|
||||
self, request: AgentTurnCreateRequest
|
||||
) -> AsyncGenerator:
|
||||
assert request.stream is True, "Non-streaming not supported"
|
||||
with tracing.span("create_and_execute_turn") as span:
|
||||
span.set_attribute("session_id", request.session_id)
|
||||
span.set_attribute("agent_id", self.agent_id)
|
||||
span.set_attribute("request", request.model_dump_json())
|
||||
assert request.stream is True, "Non-streaming not supported"
|
||||
|
||||
session_info = await self.storage.get_session_info(request.session_id)
|
||||
if session_info is None:
|
||||
raise ValueError(f"Session {request.session_id} not found")
|
||||
session_info = await self.storage.get_session_info(request.session_id)
|
||||
if session_info is None:
|
||||
raise ValueError(f"Session {request.session_id} not found")
|
||||
|
||||
turns = await self.storage.get_session_turns(request.session_id)
|
||||
turns = await self.storage.get_session_turns(request.session_id)
|
||||
|
||||
messages = []
|
||||
if self.agent_config.instructions != "":
|
||||
messages.append(SystemMessage(content=self.agent_config.instructions))
|
||||
messages = []
|
||||
if self.agent_config.instructions != "":
|
||||
messages.append(SystemMessage(content=self.agent_config.instructions))
|
||||
|
||||
for i, turn in enumerate(turns):
|
||||
messages.extend(self.turn_to_messages(turn))
|
||||
for i, turn in enumerate(turns):
|
||||
messages.extend(self.turn_to_messages(turn))
|
||||
|
||||
messages.extend(request.messages)
|
||||
messages.extend(request.messages)
|
||||
|
||||
turn_id = str(uuid.uuid4())
|
||||
start_time = datetime.now()
|
||||
yield AgentTurnResponseStreamChunk(
|
||||
event=AgentTurnResponseEvent(
|
||||
payload=AgentTurnResponseTurnStartPayload(
|
||||
turn_id=turn_id,
|
||||
turn_id = str(uuid.uuid4())
|
||||
span.set_attribute("turn_id", turn_id)
|
||||
start_time = datetime.now()
|
||||
yield AgentTurnResponseStreamChunk(
|
||||
event=AgentTurnResponseEvent(
|
||||
payload=AgentTurnResponseTurnStartPayload(
|
||||
turn_id=turn_id,
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
steps = []
|
||||
output_message = None
|
||||
async for chunk in self.run(
|
||||
session_id=request.session_id,
|
||||
turn_id=turn_id,
|
||||
input_messages=messages,
|
||||
attachments=request.attachments or [],
|
||||
sampling_params=self.agent_config.sampling_params,
|
||||
stream=request.stream,
|
||||
):
|
||||
if isinstance(chunk, CompletionMessage):
|
||||
log.info(
|
||||
f"{chunk.role.capitalize()}: {chunk.content}",
|
||||
)
|
||||
output_message = chunk
|
||||
continue
|
||||
|
||||
assert isinstance(
|
||||
chunk, AgentTurnResponseStreamChunk
|
||||
), f"Unexpected type {type(chunk)}"
|
||||
event = chunk.event
|
||||
if (
|
||||
event.payload.event_type
|
||||
== AgentTurnResponseEventType.step_complete.value
|
||||
steps = []
|
||||
output_message = None
|
||||
async for chunk in self.run(
|
||||
session_id=request.session_id,
|
||||
turn_id=turn_id,
|
||||
input_messages=messages,
|
||||
attachments=request.attachments or [],
|
||||
sampling_params=self.agent_config.sampling_params,
|
||||
stream=request.stream,
|
||||
):
|
||||
steps.append(event.payload.step_details)
|
||||
if isinstance(chunk, CompletionMessage):
|
||||
log.info(
|
||||
f"{chunk.role.capitalize()}: {chunk.content}",
|
||||
)
|
||||
output_message = chunk
|
||||
continue
|
||||
|
||||
yield chunk
|
||||
assert isinstance(
|
||||
chunk, AgentTurnResponseStreamChunk
|
||||
), f"Unexpected type {type(chunk)}"
|
||||
event = chunk.event
|
||||
if (
|
||||
event.payload.event_type
|
||||
== AgentTurnResponseEventType.step_complete.value
|
||||
):
|
||||
steps.append(event.payload.step_details)
|
||||
|
||||
assert output_message is not None
|
||||
yield chunk
|
||||
|
||||
turn = Turn(
|
||||
turn_id=turn_id,
|
||||
session_id=request.session_id,
|
||||
input_messages=request.messages,
|
||||
output_message=output_message,
|
||||
started_at=start_time,
|
||||
completed_at=datetime.now(),
|
||||
steps=steps,
|
||||
)
|
||||
await self.storage.add_turn_to_session(request.session_id, turn)
|
||||
assert output_message is not None
|
||||
|
||||
chunk = AgentTurnResponseStreamChunk(
|
||||
event=AgentTurnResponseEvent(
|
||||
payload=AgentTurnResponseTurnCompletePayload(
|
||||
turn=turn,
|
||||
turn = Turn(
|
||||
turn_id=turn_id,
|
||||
session_id=request.session_id,
|
||||
input_messages=request.messages,
|
||||
output_message=output_message,
|
||||
started_at=start_time,
|
||||
completed_at=datetime.now(),
|
||||
steps=steps,
|
||||
)
|
||||
await self.storage.add_turn_to_session(request.session_id, turn)
|
||||
|
||||
chunk = AgentTurnResponseStreamChunk(
|
||||
event=AgentTurnResponseEvent(
|
||||
payload=AgentTurnResponseTurnCompletePayload(
|
||||
turn=turn,
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
yield chunk
|
||||
yield chunk
|
||||
|
||||
async def run(
|
||||
self,
|
||||
|
|
@ -273,7 +272,6 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
|
||||
yield final_response
|
||||
|
||||
@tracing.span("run_shields")
|
||||
async def run_multiple_shields_wrapper(
|
||||
self,
|
||||
turn_id: str,
|
||||
|
|
@ -281,23 +279,46 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
shields: List[str],
|
||||
touchpoint: str,
|
||||
) -> AsyncGenerator:
|
||||
if len(shields) == 0:
|
||||
return
|
||||
with tracing.span("run_shields") as span:
|
||||
span.set_attribute("input", [m.model_dump_json() for m in messages])
|
||||
if len(shields) == 0:
|
||||
span.set_attribute("output", "no shields")
|
||||
return
|
||||
|
||||
step_id = str(uuid.uuid4())
|
||||
try:
|
||||
yield AgentTurnResponseStreamChunk(
|
||||
event=AgentTurnResponseEvent(
|
||||
payload=AgentTurnResponseStepStartPayload(
|
||||
step_type=StepType.shield_call.value,
|
||||
step_id=step_id,
|
||||
metadata=dict(touchpoint=touchpoint),
|
||||
step_id = str(uuid.uuid4())
|
||||
try:
|
||||
yield AgentTurnResponseStreamChunk(
|
||||
event=AgentTurnResponseEvent(
|
||||
payload=AgentTurnResponseStepStartPayload(
|
||||
step_type=StepType.shield_call.value,
|
||||
step_id=step_id,
|
||||
metadata=dict(touchpoint=touchpoint),
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
await self.run_multiple_shields(messages, shields)
|
||||
await self.run_multiple_shields(messages, shields)
|
||||
|
||||
except SafetyException as e:
|
||||
yield AgentTurnResponseStreamChunk(
|
||||
event=AgentTurnResponseEvent(
|
||||
payload=AgentTurnResponseStepCompletePayload(
|
||||
step_type=StepType.shield_call.value,
|
||||
step_details=ShieldCallStep(
|
||||
step_id=step_id,
|
||||
turn_id=turn_id,
|
||||
violation=e.violation,
|
||||
),
|
||||
)
|
||||
)
|
||||
)
|
||||
span.set_attribute("output", e.violation.model_dump_json())
|
||||
|
||||
yield CompletionMessage(
|
||||
content=str(e),
|
||||
stop_reason=StopReason.end_of_turn,
|
||||
)
|
||||
yield False
|
||||
|
||||
except SafetyException as e:
|
||||
yield AgentTurnResponseStreamChunk(
|
||||
event=AgentTurnResponseEvent(
|
||||
payload=AgentTurnResponseStepCompletePayload(
|
||||
|
|
@ -305,30 +326,12 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
step_details=ShieldCallStep(
|
||||
step_id=step_id,
|
||||
turn_id=turn_id,
|
||||
violation=e.violation,
|
||||
violation=None,
|
||||
),
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
yield CompletionMessage(
|
||||
content=str(e),
|
||||
stop_reason=StopReason.end_of_turn,
|
||||
)
|
||||
yield False
|
||||
|
||||
yield AgentTurnResponseStreamChunk(
|
||||
event=AgentTurnResponseEvent(
|
||||
payload=AgentTurnResponseStepCompletePayload(
|
||||
step_type=StepType.shield_call.value,
|
||||
step_details=ShieldCallStep(
|
||||
step_id=step_id,
|
||||
turn_id=turn_id,
|
||||
violation=None,
|
||||
),
|
||||
)
|
||||
)
|
||||
)
|
||||
span.set_attribute("output", "no violations")
|
||||
|
||||
async def _run(
|
||||
self,
|
||||
|
|
@ -356,10 +359,15 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
|
||||
# TODO: find older context from the session and either replace it
|
||||
# or append with a sliding window. this is really a very simplistic implementation
|
||||
with tracing.span("retrieve_rag_context"):
|
||||
with tracing.span("retrieve_rag_context") as span:
|
||||
rag_context, bank_ids = await self._retrieve_context(
|
||||
session_id, input_messages, attachments
|
||||
)
|
||||
span.set_attribute(
|
||||
"input", [m.model_dump_json() for m in input_messages]
|
||||
)
|
||||
span.set_attribute("output", rag_context)
|
||||
span.set_attribute("bank_ids", bank_ids)
|
||||
|
||||
step_id = str(uuid.uuid4())
|
||||
yield AgentTurnResponseStreamChunk(
|
||||
|
|
@ -396,11 +404,6 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
n_iter = 0
|
||||
while True:
|
||||
msg = input_messages[-1]
|
||||
if len(str(msg)) > 1000:
|
||||
msg_str = f"{str(msg)[:500]}...<more>...{str(msg)[-500:]}"
|
||||
else:
|
||||
msg_str = str(msg)
|
||||
log.info(f"{msg_str}")
|
||||
|
||||
step_id = str(uuid.uuid4())
|
||||
yield AgentTurnResponseStreamChunk(
|
||||
|
|
@ -416,7 +419,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
content = ""
|
||||
stop_reason = None
|
||||
|
||||
with tracing.span("inference"):
|
||||
with tracing.span("inference") as span:
|
||||
async for chunk in await self.inference_api.chat_completion(
|
||||
self.agent_config.model,
|
||||
input_messages,
|
||||
|
|
@ -436,14 +439,13 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
if isinstance(delta, ToolCallDelta):
|
||||
if delta.parse_status == ToolCallParseStatus.success:
|
||||
tool_calls.append(delta.content)
|
||||
|
||||
if stream:
|
||||
yield AgentTurnResponseStreamChunk(
|
||||
event=AgentTurnResponseEvent(
|
||||
payload=AgentTurnResponseStepProgressPayload(
|
||||
step_type=StepType.inference.value,
|
||||
step_id=step_id,
|
||||
model_response_text_delta="",
|
||||
text_delta="",
|
||||
tool_call_delta=delta,
|
||||
)
|
||||
)
|
||||
|
|
@ -457,7 +459,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
payload=AgentTurnResponseStepProgressPayload(
|
||||
step_type=StepType.inference.value,
|
||||
step_id=step_id,
|
||||
model_response_text_delta=event.delta,
|
||||
text_delta=event.delta,
|
||||
)
|
||||
)
|
||||
)
|
||||
|
|
@ -466,6 +468,13 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
|
||||
if event.stop_reason is not None:
|
||||
stop_reason = event.stop_reason
|
||||
span.set_attribute("stop_reason", stop_reason)
|
||||
span.set_attribute(
|
||||
"input", [m.model_dump_json() for m in input_messages]
|
||||
)
|
||||
span.set_attribute(
|
||||
"output", f"content: {content} tool_calls: {tool_calls}"
|
||||
)
|
||||
|
||||
stop_reason = stop_reason or StopReason.out_of_tokens
|
||||
|
||||
|
|
@ -549,7 +558,13 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
)
|
||||
)
|
||||
|
||||
with tracing.span("tool_execution"):
|
||||
with tracing.span(
|
||||
"tool_execution",
|
||||
{
|
||||
"tool_name": tool_call.tool_name,
|
||||
"input": message.model_dump_json(),
|
||||
},
|
||||
) as span:
|
||||
result_messages = await execute_tool_call_maybe(
|
||||
self.tools_dict,
|
||||
[message],
|
||||
|
|
@ -558,6 +573,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
len(result_messages) == 1
|
||||
), "Currently not supporting multiple messages"
|
||||
result_message = result_messages[0]
|
||||
span.set_attribute("output", result_message.model_dump_json())
|
||||
|
||||
yield AgentTurnResponseStreamChunk(
|
||||
event=AgentTurnResponseEvent(
|
||||
|
|
|
|||
|
|
@ -6,9 +6,13 @@
|
|||
|
||||
import json
|
||||
import logging
|
||||
import shutil
|
||||
import tempfile
|
||||
import uuid
|
||||
from typing import AsyncGenerator
|
||||
|
||||
from termcolor import colored
|
||||
|
||||
from llama_stack.apis.inference import Inference
|
||||
from llama_stack.apis.memory import Memory
|
||||
from llama_stack.apis.memory_banks import MemoryBanks
|
||||
|
|
@ -40,10 +44,20 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
self.memory_banks_api = memory_banks_api
|
||||
|
||||
self.in_memory_store = InmemoryKVStoreImpl()
|
||||
self.tempdir = tempfile.mkdtemp()
|
||||
|
||||
async def initialize(self) -> None:
|
||||
self.persistence_store = await kvstore_impl(self.config.persistence_store)
|
||||
|
||||
# check if "bwrap" is available
|
||||
if not shutil.which("bwrap"):
|
||||
print(
|
||||
colored(
|
||||
"Warning: `bwrap` is not available. Code interpreter tool will not work correctly.",
|
||||
"yellow",
|
||||
)
|
||||
)
|
||||
|
||||
async def create_agent(
|
||||
self,
|
||||
agent_config: AgentConfig,
|
||||
|
|
@ -82,6 +96,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
return ChatAgent(
|
||||
agent_id=agent_id,
|
||||
agent_config=agent_config,
|
||||
tempdir=self.tempdir,
|
||||
inference_api=self.inference_api,
|
||||
safety_api=self.safety_api,
|
||||
memory_api=self.memory_api,
|
||||
|
|
|
|||
|
|
@ -3,14 +3,17 @@
|
|||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
from typing import Optional
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import pandas
|
||||
from llama_models.llama3.api.datatypes import * # noqa: F403
|
||||
|
||||
from llama_stack.apis.datasetio import * # noqa: F403
|
||||
import base64
|
||||
import os
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from llama_stack.providers.datatypes import DatasetsProtocolPrivate
|
||||
from llama_stack.providers.utils.datasetio.url_utils import get_dataframe_from_url
|
||||
|
|
@ -97,6 +100,9 @@ class LocalFSDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
|
|||
dataset_impl=dataset_impl,
|
||||
)
|
||||
|
||||
async def unregister_dataset(self, dataset_id: str) -> None:
|
||||
del self.dataset_infos[dataset_id]
|
||||
|
||||
async def get_rows_paginated(
|
||||
self,
|
||||
dataset_id: str,
|
||||
|
|
@ -128,3 +134,41 @@ class LocalFSDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
|
|||
total_count=len(rows),
|
||||
next_page_token=str(end),
|
||||
)
|
||||
|
||||
async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None:
|
||||
dataset_info = self.dataset_infos.get(dataset_id)
|
||||
if dataset_info is None:
|
||||
raise ValueError(f"Dataset with id {dataset_id} not found")
|
||||
|
||||
dataset_impl = dataset_info.dataset_impl
|
||||
dataset_impl.load()
|
||||
|
||||
new_rows_df = pandas.DataFrame(rows)
|
||||
new_rows_df = dataset_impl._validate_dataset_schema(new_rows_df)
|
||||
dataset_impl.df = pandas.concat(
|
||||
[dataset_impl.df, new_rows_df], ignore_index=True
|
||||
)
|
||||
|
||||
url = str(dataset_info.dataset_def.url)
|
||||
parsed_url = urlparse(url)
|
||||
|
||||
if parsed_url.scheme == "file" or not parsed_url.scheme:
|
||||
file_path = parsed_url.path
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
dataset_impl.df.to_csv(file_path, index=False)
|
||||
elif parsed_url.scheme == "data":
|
||||
# For data URLs, we need to update the base64-encoded content
|
||||
if not parsed_url.path.startswith("text/csv;base64,"):
|
||||
raise ValueError("Data URL must be a base64-encoded CSV")
|
||||
|
||||
csv_buffer = dataset_impl.df.to_csv(index=False)
|
||||
base64_content = base64.b64encode(csv_buffer.encode("utf-8")).decode(
|
||||
"utf-8"
|
||||
)
|
||||
dataset_info.dataset_def.url = URL(
|
||||
uri=f"data:text/csv;base64,{base64_content}"
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unsupported URL scheme: {parsed_url.scheme}. Only file:// and data: URLs are supported for writing."
|
||||
)
|
||||
|
|
|
|||
5
llama_stack/providers/inline/eval/__init__.py
Normal file
5
llama_stack/providers/inline/eval/__init__.py
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
|
@ -3,12 +3,13 @@
|
|||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR
|
||||
from llama_stack.providers.utils.kvstore.config import (
|
||||
KVStoreConfig,
|
||||
SqliteKVStoreConfig,
|
||||
)
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class MetaReferenceEvalConfig(BaseModel):
|
||||
|
|
|
|||
|
|
@ -4,7 +4,9 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional
|
||||
from llama_models.llama3.api.datatypes import * # noqa: F403
|
||||
from tqdm import tqdm
|
||||
|
||||
from .....apis.common.job_types import Job
|
||||
from .....apis.eval.eval import Eval, EvalTaskConfig, EvaluateResponse, JobStatus
|
||||
|
|
@ -17,7 +19,6 @@ from llama_stack.apis.inference import Inference
|
|||
from llama_stack.apis.scoring import Scoring
|
||||
from llama_stack.providers.datatypes import EvalTasksProtocolPrivate
|
||||
from llama_stack.providers.utils.kvstore import kvstore_impl
|
||||
from tqdm import tqdm
|
||||
|
||||
from .config import MetaReferenceEvalConfig
|
||||
|
||||
|
|
|
|||
|
|
@ -4,12 +4,12 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .config import ConsoleConfig
|
||||
from .config import ChromaInlineImplConfig
|
||||
|
||||
|
||||
async def get_provider_impl(config: ConsoleConfig, _deps):
|
||||
from .console import ConsoleTelemetryImpl
|
||||
async def get_provider_impl(config: ChromaInlineImplConfig, _deps):
|
||||
from llama_stack.providers.remote.memory.chroma.chroma import ChromaMemoryAdapter
|
||||
|
||||
impl = ConsoleTelemetryImpl(config)
|
||||
impl = ChromaMemoryAdapter(config)
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
|
@ -4,18 +4,14 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from enum import Enum
|
||||
|
||||
from llama_models.schema_utils import json_schema_type
|
||||
from typing import Any, Dict
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class LogFormat(Enum):
|
||||
TEXT = "text"
|
||||
JSON = "json"
|
||||
class ChromaInlineImplConfig(BaseModel):
|
||||
db_path: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ConsoleConfig(BaseModel):
|
||||
log_format: LogFormat = LogFormat.TEXT
|
||||
@classmethod
|
||||
def sample_config(cls) -> Dict[str, Any]:
|
||||
return {"db_path": "{env.CHROMADB_PATH}"}
|
||||
|
|
@ -27,7 +27,6 @@ from llama_stack.providers.utils.memory.vector_store import (
|
|||
BankWithIndex,
|
||||
EmbeddingIndex,
|
||||
)
|
||||
from llama_stack.providers.utils.telemetry import tracing
|
||||
|
||||
from .config import FaissImplConfig
|
||||
|
||||
|
|
@ -95,7 +94,6 @@ class FaissIndex(EmbeddingIndex):
|
|||
|
||||
await self.kvstore.delete(f"faiss_index:v1::{self.bank_id}")
|
||||
|
||||
@tracing.span(name="add_chunks")
|
||||
async def add_chunks(self, chunks: List[Chunk], embeddings: NDArray):
|
||||
indexlen = len(self.id_by_index)
|
||||
for i, chunk in enumerate(chunks):
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
import json
|
||||
from typing import Optional
|
||||
from typing import List, Optional
|
||||
|
||||
from .config import LogFormat
|
||||
|
||||
|
|
@ -49,8 +49,27 @@ class ConsoleTelemetryImpl(Telemetry):
|
|||
if formatted:
|
||||
print(formatted)
|
||||
|
||||
async def get_trace(self, trace_id: str) -> Trace:
|
||||
raise NotImplementedError()
|
||||
async def query_traces(
|
||||
self,
|
||||
attribute_conditions: Optional[List[QueryCondition]] = None,
|
||||
attribute_keys_to_return: Optional[List[str]] = None,
|
||||
limit: Optional[int] = 100,
|
||||
offset: Optional[int] = 0,
|
||||
order_by: Optional[List[str]] = None,
|
||||
) -> List[Trace]:
|
||||
raise NotImplementedError("Console telemetry does not support trace querying")
|
||||
|
||||
async def get_spans(
|
||||
self,
|
||||
span_id: str,
|
||||
attribute_conditions: Optional[List[QueryCondition]] = None,
|
||||
attribute_keys_to_return: Optional[List[str]] = None,
|
||||
max_depth: Optional[int] = None,
|
||||
limit: Optional[int] = 100,
|
||||
offset: Optional[int] = 0,
|
||||
order_by: Optional[List[str]] = None,
|
||||
) -> SpanWithChildren:
|
||||
raise NotImplementedError("Console telemetry does not support span querying")
|
||||
|
||||
|
||||
COLORS = {
|
||||
|
|
|
|||
5
llama_stack/providers/inline/scoring/__init__.py
Normal file
5
llama_stack/providers/inline/scoring/__init__.py
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
|
@ -113,7 +113,9 @@ class BasicScoringImpl(Scoring, ScoringFunctionsProtocolPrivate):
|
|||
score_results = await scoring_fn.score(
|
||||
input_rows, scoring_fn_id, scoring_fn_params
|
||||
)
|
||||
agg_results = await scoring_fn.aggregate(score_results)
|
||||
agg_results = await scoring_fn.aggregate(
|
||||
score_results, scoring_fn_id, scoring_fn_params
|
||||
)
|
||||
res[scoring_fn_id] = ScoringResult(
|
||||
score_rows=score_results,
|
||||
aggregated_results=agg_results,
|
||||
|
|
|
|||
|
|
@ -4,12 +4,12 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.providers.utils.scoring.base_scoring_fn import BaseScoringFn
|
||||
from llama_stack.apis.scoring_functions import * # noqa: F401, F403
|
||||
from llama_stack.apis.scoring import * # noqa: F401, F403
|
||||
from llama_stack.apis.common.type_system import * # noqa: F403
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from llama_stack.providers.utils.scoring.aggregation_utils import aggregate_accuracy
|
||||
from llama_stack.apis.scoring import ScoringResultRow
|
||||
|
||||
from llama_stack.apis.scoring_functions import ScoringFnParams
|
||||
from llama_stack.providers.utils.scoring.base_scoring_fn import BaseScoringFn
|
||||
|
||||
from .fn_defs.equality import equality
|
||||
|
||||
|
|
@ -42,8 +42,3 @@ class EqualityScoringFn(BaseScoringFn):
|
|||
return {
|
||||
"score": score,
|
||||
}
|
||||
|
||||
async def aggregate(
|
||||
self, scoring_results: List[ScoringResultRow]
|
||||
) -> Dict[str, Any]:
|
||||
return aggregate_accuracy(scoring_results)
|
||||
|
|
|
|||
|
|
@ -5,14 +5,20 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.apis.common.type_system import NumberType
|
||||
from llama_stack.apis.scoring_functions import ScoringFn
|
||||
from llama_stack.apis.scoring_functions import (
|
||||
AggregationFunctionType,
|
||||
BasicScoringFnParams,
|
||||
ScoringFn,
|
||||
)
|
||||
|
||||
|
||||
equality = ScoringFn(
|
||||
identifier="basic::equality",
|
||||
description="Returns 1.0 if the input is equal to the target, 0.0 otherwise.",
|
||||
params=None,
|
||||
provider_id="basic",
|
||||
provider_resource_id="equality",
|
||||
return_type=NumberType(),
|
||||
params=BasicScoringFnParams(
|
||||
aggregation_functions=[AggregationFunctionType.accuracy]
|
||||
),
|
||||
)
|
||||
|
|
|
|||
|
|
@ -4,9 +4,12 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.apis.scoring_functions import * # noqa: F401, F403
|
||||
from llama_stack.apis.scoring import * # noqa: F401, F403
|
||||
from llama_stack.apis.common.type_system import NumberType
|
||||
from llama_stack.apis.scoring_functions import (
|
||||
AggregationFunctionType,
|
||||
RegexParserScoringFnParams,
|
||||
ScoringFn,
|
||||
)
|
||||
|
||||
MULTILINGUAL_ANSWER_REGEXES = [
|
||||
r"Answer\s*:",
|
||||
|
|
@ -67,5 +70,6 @@ regex_parser_multiple_choice_answer = ScoringFn(
|
|||
MULTILINGUAL_ANSWER_PATTERN_TEMPLATE.format(x)
|
||||
for x in MULTILINGUAL_ANSWER_REGEXES
|
||||
],
|
||||
aggregation_functions=[AggregationFunctionType.accuracy],
|
||||
),
|
||||
)
|
||||
|
|
|
|||
|
|
@ -5,7 +5,11 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.apis.common.type_system import NumberType
|
||||
from llama_stack.apis.scoring_functions import ScoringFn
|
||||
from llama_stack.apis.scoring_functions import (
|
||||
AggregationFunctionType,
|
||||
BasicScoringFnParams,
|
||||
ScoringFn,
|
||||
)
|
||||
|
||||
|
||||
subset_of = ScoringFn(
|
||||
|
|
@ -14,4 +18,7 @@ subset_of = ScoringFn(
|
|||
return_type=NumberType(),
|
||||
provider_id="basic",
|
||||
provider_resource_id="subset-of",
|
||||
params=BasicScoringFnParams(
|
||||
aggregation_functions=[AggregationFunctionType.accuracy]
|
||||
),
|
||||
)
|
||||
|
|
|
|||
|
|
@ -5,11 +5,11 @@
|
|||
# the root directory of this source tree.
|
||||
import re
|
||||
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from llama_stack.apis.scoring import ScoringResultRow
|
||||
from llama_stack.apis.scoring_functions import ScoringFnParams, ScoringFnParamsType
|
||||
from llama_stack.providers.utils.scoring.base_scoring_fn import BaseScoringFn
|
||||
from llama_stack.apis.scoring_functions import * # noqa: F401, F403
|
||||
from llama_stack.apis.scoring import * # noqa: F401, F403
|
||||
from llama_stack.apis.common.type_system import * # noqa: F403
|
||||
from llama_stack.providers.utils.scoring.aggregation_utils import aggregate_accuracy
|
||||
|
||||
from .fn_defs.regex_parser_multiple_choice_answer import (
|
||||
regex_parser_multiple_choice_answer,
|
||||
|
|
@ -60,8 +60,3 @@ class RegexParserScoringFn(BaseScoringFn):
|
|||
return {
|
||||
"score": score,
|
||||
}
|
||||
|
||||
async def aggregate(
|
||||
self, scoring_results: List[ScoringResultRow]
|
||||
) -> Dict[str, Any]:
|
||||
return aggregate_accuracy(scoring_results)
|
||||
|
|
|
|||
|
|
@ -4,11 +4,11 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from llama_stack.apis.scoring import ScoringResultRow
|
||||
from llama_stack.apis.scoring_functions import ScoringFnParams
|
||||
from llama_stack.providers.utils.scoring.base_scoring_fn import BaseScoringFn
|
||||
from llama_stack.apis.scoring_functions import * # noqa: F401, F403
|
||||
from llama_stack.apis.scoring import * # noqa: F401, F403
|
||||
from llama_stack.apis.common.type_system import * # noqa: F403
|
||||
from llama_stack.providers.utils.scoring.aggregation_utils import aggregate_accuracy
|
||||
|
||||
from .fn_defs.subset_of import subset_of
|
||||
|
||||
|
|
@ -36,8 +36,3 @@ class SubsetOfScoringFn(BaseScoringFn):
|
|||
return {
|
||||
"score": score,
|
||||
}
|
||||
|
||||
async def aggregate(
|
||||
self, scoring_results: List[ScoringResultRow]
|
||||
) -> Dict[str, Any]:
|
||||
return aggregate_accuracy(scoring_results)
|
||||
|
|
|
|||
|
|
@ -5,9 +5,10 @@
|
|||
# the root directory of this source tree.
|
||||
from typing import Dict
|
||||
|
||||
from llama_stack.distribution.datatypes import Api, ProviderSpec
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.distribution.datatypes import Api, ProviderSpec
|
||||
|
||||
from .config import BraintrustScoringConfig
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ import os
|
|||
|
||||
from autoevals.llm import Factuality
|
||||
from autoevals.ragas import AnswerCorrectness
|
||||
|
||||
from llama_stack.distribution.request_headers import NeedsRequestProviderData
|
||||
from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate
|
||||
|
||||
|
|
@ -85,7 +86,7 @@ class BraintrustScoringImpl(
|
|||
|
||||
async def set_api_key(self) -> None:
|
||||
# api key is in the request headers
|
||||
if self.config.openai_api_key is None:
|
||||
if not self.config.openai_api_key:
|
||||
provider_data = self.get_request_provider_data()
|
||||
if provider_data is None or not provider_data.openai_api_key:
|
||||
raise ValueError(
|
||||
|
|
@ -146,7 +147,7 @@ class BraintrustScoringImpl(
|
|||
await self.score_row(input_row, scoring_fn_id)
|
||||
for input_row in input_rows
|
||||
]
|
||||
|
||||
aggregation_functions = [AggregationFunctionType.average]
|
||||
agg_results = aggregate_average(score_results)
|
||||
res[scoring_fn_id] = ScoringResult(
|
||||
score_rows=score_results,
|
||||
|
|
|
|||
|
|
@ -11,3 +11,9 @@ class BraintrustScoringConfig(BaseModel):
|
|||
default=None,
|
||||
description="The OpenAI API Key",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
|
||||
return {
|
||||
"openai_api_key": "${env.OPENAI_API_KEY:}",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -120,7 +120,9 @@ class LlmAsJudgeScoringImpl(Scoring, ScoringFunctionsProtocolPrivate):
|
|||
score_results = await scoring_fn.score(
|
||||
input_rows, scoring_fn_id, scoring_fn_params
|
||||
)
|
||||
agg_results = await scoring_fn.aggregate(score_results)
|
||||
agg_results = await scoring_fn.aggregate(
|
||||
score_results, scoring_fn_id, scoring_fn_params
|
||||
)
|
||||
res[scoring_fn_id] = ScoringResult(
|
||||
score_rows=score_results,
|
||||
aggregated_results=agg_results,
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.apis.common.type_system import NumberType
|
||||
from llama_stack.apis.scoring_functions import ScoringFn
|
||||
from llama_stack.apis.scoring_functions import LLMAsJudgeScoringFnParams, ScoringFn
|
||||
|
||||
|
||||
llm_as_judge_base = ScoringFn(
|
||||
|
|
@ -14,4 +14,8 @@ llm_as_judge_base = ScoringFn(
|
|||
return_type=NumberType(),
|
||||
provider_id="llm-as-judge",
|
||||
provider_resource_id="llm-as-judge-base",
|
||||
params=LLMAsJudgeScoringFnParams(
|
||||
judge_model="meta-llama/Llama-3.1-405B-Instruct",
|
||||
prompt_template="Enter custom LLM as Judge Prompt Template",
|
||||
),
|
||||
)
|
||||
|
|
|
|||
|
|
@ -3,13 +3,16 @@
|
|||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
import re
|
||||
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from llama_stack.apis.inference.inference import Inference
|
||||
|
||||
from llama_stack.apis.scoring import ScoringResultRow
|
||||
from llama_stack.apis.scoring_functions import ScoringFnParams
|
||||
|
||||
from llama_stack.providers.utils.scoring.base_scoring_fn import BaseScoringFn
|
||||
from llama_stack.apis.scoring_functions import * # noqa: F401, F403
|
||||
from llama_stack.apis.scoring import * # noqa: F401, F403
|
||||
from llama_stack.apis.common.type_system import * # noqa: F403
|
||||
import re
|
||||
|
||||
from .fn_defs.llm_as_judge_405b_simpleqa import llm_as_judge_405b_simpleqa
|
||||
|
||||
|
|
@ -85,9 +88,3 @@ class LlmAsJudgeScoringFn(BaseScoringFn):
|
|||
"score": judge_rating,
|
||||
"judge_feedback": content,
|
||||
}
|
||||
|
||||
async def aggregate(
|
||||
self, scoring_results: List[ScoringResultRow]
|
||||
) -> Dict[str, Any]:
|
||||
# TODO: this needs to be config based aggregation, and only useful w/ Jobs API
|
||||
return {}
|
||||
|
|
|
|||
5
llama_stack/providers/inline/telemetry/__init__.py
Normal file
5
llama_stack/providers/inline/telemetry/__init__.py
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Any, Dict
|
||||
|
||||
from .config import TelemetryConfig, TelemetrySink
|
||||
|
||||
__all__ = ["TelemetryConfig", "TelemetrySink"]
|
||||
|
||||
|
||||
async def get_provider_impl(config: TelemetryConfig, deps: Dict[str, Any]):
|
||||
from .telemetry import TelemetryAdapter
|
||||
|
||||
impl = TelemetryAdapter(config, deps)
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
|
||||
from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR
|
||||
|
||||
|
||||
class TelemetrySink(str, Enum):
|
||||
OTEL = "otel"
|
||||
SQLITE = "sqlite"
|
||||
CONSOLE = "console"
|
||||
|
||||
|
||||
class TelemetryConfig(BaseModel):
|
||||
otel_endpoint: str = Field(
|
||||
default="http://localhost:4318/v1/traces",
|
||||
description="The OpenTelemetry collector endpoint URL",
|
||||
)
|
||||
service_name: str = Field(
|
||||
default="llama-stack",
|
||||
description="The service name to use for telemetry",
|
||||
)
|
||||
sinks: List[TelemetrySink] = Field(
|
||||
default=[TelemetrySink.CONSOLE, TelemetrySink.SQLITE],
|
||||
description="List of telemetry sinks to enable (possible values: otel, sqlite, console)",
|
||||
)
|
||||
sqlite_db_path: str = Field(
|
||||
default=(RUNTIME_BASE_DIR / "trace_store.db").as_posix(),
|
||||
description="The path to the SQLite database to use for storing traces",
|
||||
)
|
||||
|
||||
@field_validator("sinks", mode="before")
|
||||
@classmethod
|
||||
def validate_sinks(cls, v):
|
||||
if isinstance(v, str):
|
||||
return [TelemetrySink(sink.strip()) for sink in v.split(",")]
|
||||
return v
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(
|
||||
cls, __distro_dir__: str = "runtime", db_name: str = "trace_store.db"
|
||||
) -> Dict[str, Any]:
|
||||
return {
|
||||
"service_name": "${env.OTEL_SERVICE_NAME:llama-stack}",
|
||||
"sinks": "${env.TELEMETRY_SINKS:console,sqlite}",
|
||||
"sqlite_db_path": "${env.SQLITE_DB_PATH:~/.llama/"
|
||||
+ __distro_dir__
|
||||
+ "/"
|
||||
+ db_name
|
||||
+ "}",
|
||||
}
|
||||
|
|
@ -0,0 +1,117 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
from opentelemetry.sdk.trace import ReadableSpan
|
||||
from opentelemetry.sdk.trace.export import SpanProcessor
|
||||
from opentelemetry.trace.status import StatusCode
|
||||
|
||||
# Colors for console output
|
||||
COLORS = {
|
||||
"reset": "\033[0m",
|
||||
"bold": "\033[1m",
|
||||
"dim": "\033[2m",
|
||||
"red": "\033[31m",
|
||||
"green": "\033[32m",
|
||||
"yellow": "\033[33m",
|
||||
"blue": "\033[34m",
|
||||
"magenta": "\033[35m",
|
||||
"cyan": "\033[36m",
|
||||
"white": "\033[37m",
|
||||
}
|
||||
|
||||
|
||||
class ConsoleSpanProcessor(SpanProcessor):
|
||||
|
||||
def __init__(self, print_attributes: bool = False):
|
||||
self.print_attributes = print_attributes
|
||||
|
||||
def on_start(self, span: ReadableSpan, parent_context=None) -> None:
|
||||
if span.attributes and span.attributes.get("__autotraced__"):
|
||||
return
|
||||
|
||||
timestamp = datetime.utcfromtimestamp(span.start_time / 1e9).strftime(
|
||||
"%H:%M:%S.%f"
|
||||
)[:-3]
|
||||
|
||||
print(
|
||||
f"{COLORS['dim']}{timestamp}{COLORS['reset']} "
|
||||
f"{COLORS['magenta']}[START]{COLORS['reset']} "
|
||||
f"{COLORS['dim']}{span.name}{COLORS['reset']}"
|
||||
)
|
||||
|
||||
def on_end(self, span: ReadableSpan) -> None:
|
||||
if span.attributes and span.attributes.get("__autotraced__"):
|
||||
return
|
||||
|
||||
timestamp = datetime.utcfromtimestamp(span.end_time / 1e9).strftime(
|
||||
"%H:%M:%S.%f"
|
||||
)[:-3]
|
||||
|
||||
span_context = (
|
||||
f"{COLORS['dim']}{timestamp}{COLORS['reset']} "
|
||||
f"{COLORS['magenta']}[END]{COLORS['reset']} "
|
||||
f"{COLORS['dim']}{span.name}{COLORS['reset']}"
|
||||
)
|
||||
|
||||
if span.status.status_code == StatusCode.ERROR:
|
||||
span_context += f"{COLORS['reset']} {COLORS['red']}[ERROR]{COLORS['reset']}"
|
||||
elif span.status.status_code != StatusCode.UNSET:
|
||||
span_context += f"{COLORS['reset']} [{span.status.status_code}]"
|
||||
|
||||
duration_ms = (span.end_time - span.start_time) / 1e6
|
||||
span_context += f"{COLORS['reset']} ({duration_ms:.2f}ms)"
|
||||
|
||||
print(span_context)
|
||||
|
||||
if self.print_attributes and span.attributes:
|
||||
for key, value in span.attributes.items():
|
||||
if key.startswith("__"):
|
||||
continue
|
||||
str_value = str(value)
|
||||
if len(str_value) > 1000:
|
||||
str_value = str_value[:997] + "..."
|
||||
print(f" {COLORS['dim']}{key}: {str_value}{COLORS['reset']}")
|
||||
|
||||
for event in span.events:
|
||||
event_time = datetime.utcfromtimestamp(event.timestamp / 1e9).strftime(
|
||||
"%H:%M:%S.%f"
|
||||
)[:-3]
|
||||
|
||||
severity = event.attributes.get("severity", "info")
|
||||
message = event.attributes.get("message", event.name)
|
||||
if isinstance(message, (dict, list)):
|
||||
message = json.dumps(message, indent=2)
|
||||
|
||||
severity_colors = {
|
||||
"error": f"{COLORS['bold']}{COLORS['red']}",
|
||||
"warn": f"{COLORS['bold']}{COLORS['yellow']}",
|
||||
"info": COLORS["white"],
|
||||
"debug": COLORS["dim"],
|
||||
}
|
||||
msg_color = severity_colors.get(severity, COLORS["white"])
|
||||
|
||||
print(
|
||||
f" {event_time} "
|
||||
f"{msg_color}[{severity.upper()}] "
|
||||
f"{message}{COLORS['reset']}"
|
||||
)
|
||||
|
||||
if event.attributes:
|
||||
for key, value in event.attributes.items():
|
||||
if key.startswith("__") or key in ["message", "severity"]:
|
||||
continue
|
||||
print(f" {COLORS['dim']}{key}: {value}{COLORS['reset']}")
|
||||
|
||||
def shutdown(self) -> None:
|
||||
"""Shutdown the processor."""
|
||||
pass
|
||||
|
||||
def force_flush(self, timeout_millis: float = None) -> bool:
|
||||
"""Force flush any pending spans."""
|
||||
return True
|
||||
|
|
@ -0,0 +1,177 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import json
|
||||
import os
|
||||
import sqlite3
|
||||
from datetime import datetime
|
||||
|
||||
from opentelemetry.sdk.trace import SpanProcessor
|
||||
from opentelemetry.trace import Span
|
||||
|
||||
|
||||
class SQLiteSpanProcessor(SpanProcessor):
|
||||
def __init__(self, conn_string):
|
||||
"""Initialize the SQLite span processor with a connection string."""
|
||||
self.conn_string = conn_string
|
||||
self.conn = None
|
||||
self.setup_database()
|
||||
|
||||
def _get_connection(self) -> sqlite3.Connection:
|
||||
"""Get the database connection."""
|
||||
if self.conn is None:
|
||||
self.conn = sqlite3.connect(self.conn_string, check_same_thread=False)
|
||||
return self.conn
|
||||
|
||||
def setup_database(self):
|
||||
"""Create the necessary tables if they don't exist."""
|
||||
# Create directory if it doesn't exist
|
||||
os.makedirs(os.path.dirname(self.conn_string), exist_ok=True)
|
||||
|
||||
conn = self._get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS traces (
|
||||
trace_id TEXT PRIMARY KEY,
|
||||
service_name TEXT,
|
||||
root_span_id TEXT,
|
||||
start_time TIMESTAMP,
|
||||
end_time TIMESTAMP,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
cursor.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS spans (
|
||||
span_id TEXT PRIMARY KEY,
|
||||
trace_id TEXT REFERENCES traces(trace_id),
|
||||
parent_span_id TEXT,
|
||||
name TEXT,
|
||||
start_time TIMESTAMP,
|
||||
end_time TIMESTAMP,
|
||||
attributes TEXT,
|
||||
status TEXT,
|
||||
kind TEXT
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
cursor.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS span_events (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
span_id TEXT REFERENCES spans(span_id),
|
||||
name TEXT,
|
||||
timestamp TIMESTAMP,
|
||||
attributes TEXT
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
cursor.execute(
|
||||
"""
|
||||
CREATE INDEX IF NOT EXISTS idx_traces_created_at
|
||||
ON traces(created_at)
|
||||
"""
|
||||
)
|
||||
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
|
||||
def on_start(self, span: Span, parent_context=None):
|
||||
"""Called when a span starts."""
|
||||
pass
|
||||
|
||||
def on_end(self, span: Span):
|
||||
"""Called when a span ends. Export the span data to SQLite."""
|
||||
try:
|
||||
conn = self._get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
trace_id = format(span.get_span_context().trace_id, "032x")
|
||||
span_id = format(span.get_span_context().span_id, "016x")
|
||||
service_name = span.resource.attributes.get("service.name", "unknown")
|
||||
|
||||
parent_span_id = None
|
||||
parent_context = span.parent
|
||||
if parent_context:
|
||||
parent_span_id = format(parent_context.span_id, "016x")
|
||||
|
||||
# Insert into traces
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO traces (
|
||||
trace_id, service_name, root_span_id, start_time, end_time
|
||||
) VALUES (?, ?, ?, ?, ?)
|
||||
ON CONFLICT(trace_id) DO UPDATE SET
|
||||
root_span_id = COALESCE(root_span_id, excluded.root_span_id),
|
||||
start_time = MIN(excluded.start_time, start_time),
|
||||
end_time = MAX(excluded.end_time, end_time)
|
||||
""",
|
||||
(
|
||||
trace_id,
|
||||
service_name,
|
||||
(span_id if not parent_span_id else None),
|
||||
datetime.fromtimestamp(span.start_time / 1e9).isoformat(),
|
||||
datetime.fromtimestamp(span.end_time / 1e9).isoformat(),
|
||||
),
|
||||
)
|
||||
|
||||
# Insert into spans
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO spans (
|
||||
span_id, trace_id, parent_span_id, name,
|
||||
start_time, end_time, attributes, status,
|
||||
kind
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
span_id,
|
||||
trace_id,
|
||||
parent_span_id,
|
||||
span.name,
|
||||
datetime.fromtimestamp(span.start_time / 1e9).isoformat(),
|
||||
datetime.fromtimestamp(span.end_time / 1e9).isoformat(),
|
||||
json.dumps(dict(span.attributes)),
|
||||
span.status.status_code.name,
|
||||
span.kind.name,
|
||||
),
|
||||
)
|
||||
|
||||
for event in span.events:
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO span_events (
|
||||
span_id, name, timestamp, attributes
|
||||
) VALUES (?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
span_id,
|
||||
event.name,
|
||||
datetime.fromtimestamp(event.timestamp / 1e9).isoformat(),
|
||||
json.dumps(dict(event.attributes)),
|
||||
),
|
||||
)
|
||||
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
except Exception as e:
|
||||
print(f"Error exporting span to SQLite: {e}")
|
||||
|
||||
def shutdown(self):
|
||||
"""Cleanup any resources."""
|
||||
if self.conn:
|
||||
self.conn.close()
|
||||
self.conn = None
|
||||
|
||||
def force_flush(self, timeout_millis=30000):
|
||||
"""Force export of spans."""
|
||||
pass
|
||||
|
|
@ -5,6 +5,7 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
import threading
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from opentelemetry import metrics, trace
|
||||
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
|
||||
|
|
@ -16,10 +17,21 @@ from opentelemetry.sdk.trace import TracerProvider
|
|||
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
||||
from opentelemetry.semconv.resource import ResourceAttributes
|
||||
|
||||
from llama_stack.providers.inline.telemetry.meta_reference.console_span_processor import (
|
||||
ConsoleSpanProcessor,
|
||||
)
|
||||
|
||||
from llama_stack.providers.inline.telemetry.meta_reference.sqlite_span_processor import (
|
||||
SQLiteSpanProcessor,
|
||||
)
|
||||
from llama_stack.providers.utils.telemetry.dataset_mixin import TelemetryDatasetMixin
|
||||
from llama_stack.providers.utils.telemetry.sqlite_trace_store import SQLiteTraceStore
|
||||
|
||||
from llama_stack.apis.telemetry import * # noqa: F403
|
||||
|
||||
from .config import OpenTelemetryConfig
|
||||
from llama_stack.distribution.datatypes import Api
|
||||
|
||||
from .config import TelemetryConfig, TelemetrySink
|
||||
|
||||
_GLOBAL_STORAGE = {
|
||||
"active_spans": {},
|
||||
|
|
@ -45,9 +57,10 @@ def is_tracing_enabled(tracer):
|
|||
return span.is_recording()
|
||||
|
||||
|
||||
class OpenTelemetryAdapter(Telemetry):
|
||||
def __init__(self, config: OpenTelemetryConfig):
|
||||
class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
|
||||
def __init__(self, config: TelemetryConfig, deps: Dict[str, Any]) -> None:
|
||||
self.config = config
|
||||
self.datasetio_api = deps[Api.datasetio]
|
||||
|
||||
resource = Resource.create(
|
||||
{
|
||||
|
|
@ -57,22 +70,29 @@ class OpenTelemetryAdapter(Telemetry):
|
|||
|
||||
provider = TracerProvider(resource=resource)
|
||||
trace.set_tracer_provider(provider)
|
||||
otlp_exporter = OTLPSpanExporter(
|
||||
endpoint=self.config.otel_endpoint,
|
||||
)
|
||||
span_processor = BatchSpanProcessor(otlp_exporter)
|
||||
trace.get_tracer_provider().add_span_processor(span_processor)
|
||||
# Set up metrics
|
||||
metric_reader = PeriodicExportingMetricReader(
|
||||
OTLPMetricExporter(
|
||||
if TelemetrySink.OTEL in self.config.sinks:
|
||||
otlp_exporter = OTLPSpanExporter(
|
||||
endpoint=self.config.otel_endpoint,
|
||||
)
|
||||
)
|
||||
metric_provider = MeterProvider(
|
||||
resource=resource, metric_readers=[metric_reader]
|
||||
)
|
||||
metrics.set_meter_provider(metric_provider)
|
||||
self.meter = metrics.get_meter(__name__)
|
||||
span_processor = BatchSpanProcessor(otlp_exporter)
|
||||
trace.get_tracer_provider().add_span_processor(span_processor)
|
||||
metric_reader = PeriodicExportingMetricReader(
|
||||
OTLPMetricExporter(
|
||||
endpoint=self.config.otel_endpoint,
|
||||
)
|
||||
)
|
||||
metric_provider = MeterProvider(
|
||||
resource=resource, metric_readers=[metric_reader]
|
||||
)
|
||||
metrics.set_meter_provider(metric_provider)
|
||||
self.meter = metrics.get_meter(__name__)
|
||||
if TelemetrySink.SQLITE in self.config.sinks:
|
||||
trace.get_tracer_provider().add_span_processor(
|
||||
SQLiteSpanProcessor(self.config.sqlite_db_path)
|
||||
)
|
||||
self.trace_store = SQLiteTraceStore(self.config.sqlite_db_path)
|
||||
if TelemetrySink.CONSOLE in self.config.sinks:
|
||||
trace.get_tracer_provider().add_span_processor(ConsoleSpanProcessor())
|
||||
self._lock = _global_lock
|
||||
|
||||
async def initialize(self) -> None:
|
||||
|
|
@ -83,15 +103,17 @@ class OpenTelemetryAdapter(Telemetry):
|
|||
trace.get_tracer_provider().shutdown()
|
||||
metrics.get_meter_provider().shutdown()
|
||||
|
||||
async def log_event(self, event: Event) -> None:
|
||||
async def log_event(self, event: Event, ttl_seconds: int = 604800) -> None:
|
||||
if isinstance(event, UnstructuredLogEvent):
|
||||
self._log_unstructured(event)
|
||||
self._log_unstructured(event, ttl_seconds)
|
||||
elif isinstance(event, MetricEvent):
|
||||
self._log_metric(event)
|
||||
elif isinstance(event, StructuredLogEvent):
|
||||
self._log_structured(event)
|
||||
self._log_structured(event, ttl_seconds)
|
||||
else:
|
||||
raise ValueError(f"Unknown event type: {event}")
|
||||
|
||||
def _log_unstructured(self, event: UnstructuredLogEvent) -> None:
|
||||
def _log_unstructured(self, event: UnstructuredLogEvent, ttl_seconds: int) -> None:
|
||||
with self._lock:
|
||||
# Use global storage instead of instance storage
|
||||
span_id = string_to_span_id(event.span_id)
|
||||
|
|
@ -104,6 +126,7 @@ class OpenTelemetryAdapter(Telemetry):
|
|||
attributes={
|
||||
"message": event.message,
|
||||
"severity": event.severity.value,
|
||||
"__ttl__": ttl_seconds,
|
||||
**event.attributes,
|
||||
},
|
||||
timestamp=timestamp_ns,
|
||||
|
|
@ -154,11 +177,14 @@ class OpenTelemetryAdapter(Telemetry):
|
|||
)
|
||||
return _GLOBAL_STORAGE["up_down_counters"][name]
|
||||
|
||||
def _log_structured(self, event: StructuredLogEvent) -> None:
|
||||
def _log_structured(self, event: StructuredLogEvent, ttl_seconds: int) -> None:
|
||||
with self._lock:
|
||||
span_id = string_to_span_id(event.span_id)
|
||||
trace_id = string_to_trace_id(event.trace_id)
|
||||
tracer = trace.get_tracer(__name__)
|
||||
if event.attributes is None:
|
||||
event.attributes = {}
|
||||
event.attributes["__ttl__"] = ttl_seconds
|
||||
|
||||
if isinstance(event.payload, SpanStartPayload):
|
||||
# Check if span already exists to prevent duplicates
|
||||
|
|
@ -170,7 +196,6 @@ class OpenTelemetryAdapter(Telemetry):
|
|||
parent_span_id = string_to_span_id(event.payload.parent_span_id)
|
||||
parent_span = _GLOBAL_STORAGE["active_spans"].get(parent_span_id)
|
||||
|
||||
# Create a new trace context with the trace_id
|
||||
context = trace.Context(trace_id=trace_id)
|
||||
if parent_span:
|
||||
context = trace.set_span_in_context(parent_span, context)
|
||||
|
|
@ -179,14 +204,9 @@ class OpenTelemetryAdapter(Telemetry):
|
|||
name=event.payload.name,
|
||||
context=context,
|
||||
attributes=event.attributes or {},
|
||||
start_time=int(event.timestamp.timestamp() * 1e9),
|
||||
)
|
||||
_GLOBAL_STORAGE["active_spans"][span_id] = span
|
||||
|
||||
# Set as current span using context manager
|
||||
with trace.use_span(span, end_on_exit=False):
|
||||
pass # Let the span continue beyond this block
|
||||
|
||||
elif isinstance(event.payload, SpanEndPayload):
|
||||
span = _GLOBAL_STORAGE["active_spans"].get(span_id)
|
||||
if span:
|
||||
|
|
@ -199,10 +219,33 @@ class OpenTelemetryAdapter(Telemetry):
|
|||
else trace.Status(status_code=trace.StatusCode.ERROR)
|
||||
)
|
||||
span.set_status(status)
|
||||
span.end(end_time=int(event.timestamp.timestamp() * 1e9))
|
||||
|
||||
# Remove from active spans
|
||||
span.end()
|
||||
_GLOBAL_STORAGE["active_spans"].pop(span_id, None)
|
||||
else:
|
||||
raise ValueError(f"Unknown structured log event: {event}")
|
||||
|
||||
async def get_trace(self, trace_id: str) -> Trace:
|
||||
raise NotImplementedError("Trace retrieval not implemented yet")
|
||||
async def query_traces(
|
||||
self,
|
||||
attribute_filters: Optional[List[QueryCondition]] = None,
|
||||
limit: Optional[int] = 100,
|
||||
offset: Optional[int] = 0,
|
||||
order_by: Optional[List[str]] = None,
|
||||
) -> List[Trace]:
|
||||
return await self.trace_store.query_traces(
|
||||
attribute_filters=attribute_filters,
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
order_by=order_by,
|
||||
)
|
||||
|
||||
async def get_span_tree(
|
||||
self,
|
||||
span_id: str,
|
||||
attributes_to_return: Optional[List[str]] = None,
|
||||
max_depth: Optional[int] = None,
|
||||
) -> SpanWithChildren:
|
||||
return await self.trace_store.get_span_tree(
|
||||
span_id=span_id,
|
||||
attributes_to_return=attributes_to_return,
|
||||
max_depth=max_depth,
|
||||
)
|
||||
|
|
@ -61,6 +61,17 @@ def available_providers() -> List[ProviderSpec]:
|
|||
config_class="llama_stack.providers.remote.inference.sample.SampleConfig",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
api=Api.inference,
|
||||
adapter=AdapterSpec(
|
||||
adapter_type="cerebras",
|
||||
pip_packages=[
|
||||
"cerebras_cloud_sdk",
|
||||
],
|
||||
module="llama_stack.providers.remote.inference.cerebras",
|
||||
config_class="llama_stack.providers.remote.inference.cerebras.CerebrasImplConfig",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
api=Api.inference,
|
||||
adapter=AdapterSpec(
|
||||
|
|
|
|||
|
|
@ -53,9 +53,16 @@ def available_providers() -> List[ProviderSpec]:
|
|||
adapter_type="chromadb",
|
||||
pip_packages=EMBEDDING_DEPS + ["chromadb-client"],
|
||||
module="llama_stack.providers.remote.memory.chroma",
|
||||
config_class="llama_stack.distribution.datatypes.RemoteProviderConfig",
|
||||
config_class="llama_stack.providers.remote.memory.chroma.ChromaRemoteImplConfig",
|
||||
),
|
||||
),
|
||||
InlineProviderSpec(
|
||||
api=Api.memory,
|
||||
provider_type="inline::chromadb",
|
||||
pip_packages=EMBEDDING_DEPS + ["chromadb"],
|
||||
module="llama_stack.providers.inline.memory.chroma",
|
||||
config_class="llama_stack.providers.inline.memory.chroma.ChromaInlineImplConfig",
|
||||
),
|
||||
remote_provider_spec(
|
||||
Api.memory,
|
||||
AdapterSpec(
|
||||
|
|
|
|||
|
|
@ -14,9 +14,13 @@ def available_providers() -> List[ProviderSpec]:
|
|||
InlineProviderSpec(
|
||||
api=Api.telemetry,
|
||||
provider_type="inline::meta-reference",
|
||||
pip_packages=[],
|
||||
module="llama_stack.providers.inline.meta_reference.telemetry",
|
||||
config_class="llama_stack.providers.inline.meta_reference.telemetry.ConsoleConfig",
|
||||
pip_packages=[
|
||||
"opentelemetry-sdk",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
],
|
||||
api_dependencies=[Api.datasetio],
|
||||
module="llama_stack.providers.inline.telemetry.meta_reference",
|
||||
config_class="llama_stack.providers.inline.telemetry.meta_reference.config.TelemetryConfig",
|
||||
),
|
||||
remote_provider_spec(
|
||||
api=Api.telemetry,
|
||||
|
|
@ -27,18 +31,4 @@ def available_providers() -> List[ProviderSpec]:
|
|||
config_class="llama_stack.providers.remote.telemetry.sample.SampleConfig",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
api=Api.telemetry,
|
||||
adapter=AdapterSpec(
|
||||
adapter_type="opentelemetry-jaeger",
|
||||
pip_packages=[
|
||||
"opentelemetry-api",
|
||||
"opentelemetry-sdk",
|
||||
"opentelemetry-exporter-jaeger",
|
||||
"opentelemetry-semantic-conventions",
|
||||
],
|
||||
module="llama_stack.providers.remote.telemetry.opentelemetry",
|
||||
config_class="llama_stack.providers.remote.telemetry.opentelemetry.OpenTelemetryConfig",
|
||||
),
|
||||
),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
from typing import Optional
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from llama_stack.apis.datasetio import * # noqa: F403
|
||||
|
||||
|
|
@ -64,6 +64,11 @@ class HuggingfaceDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
|
|||
)
|
||||
self.dataset_infos[dataset_def.identifier] = dataset_def
|
||||
|
||||
async def unregister_dataset(self, dataset_id: str) -> None:
|
||||
key = f"{DATASETS_PREFIX}{dataset_id}"
|
||||
await self.kvstore.delete(key=key)
|
||||
del self.dataset_infos[dataset_id]
|
||||
|
||||
async def get_rows_paginated(
|
||||
self,
|
||||
dataset_id: str,
|
||||
|
|
@ -95,3 +100,22 @@ class HuggingfaceDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
|
|||
total_count=len(rows),
|
||||
next_page_token=str(end),
|
||||
)
|
||||
|
||||
async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None:
|
||||
dataset_def = self.dataset_infos[dataset_id]
|
||||
loaded_dataset = load_hf_dataset(dataset_def)
|
||||
|
||||
# Convert rows to HF Dataset format
|
||||
new_dataset = hf_datasets.Dataset.from_list(rows)
|
||||
|
||||
# Concatenate the new rows with existing dataset
|
||||
updated_dataset = hf_datasets.concatenate_datasets(
|
||||
[loaded_dataset, new_dataset]
|
||||
)
|
||||
|
||||
if dataset_def.metadata.get("path", None):
|
||||
updated_dataset.push_to_hub(dataset_def.metadata["path"])
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
"Uploading to URL-based datasets is not supported yet"
|
||||
)
|
||||
|
|
|
|||
21
llama_stack/providers/remote/inference/cerebras/__init__.py
Normal file
21
llama_stack/providers/remote/inference/cerebras/__init__.py
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .config import CerebrasImplConfig
|
||||
|
||||
|
||||
async def get_adapter_impl(config: CerebrasImplConfig, _deps):
|
||||
from .cerebras import CerebrasInferenceAdapter
|
||||
|
||||
assert isinstance(
|
||||
config, CerebrasImplConfig
|
||||
), f"Unexpected config type: {type(config)}"
|
||||
|
||||
impl = CerebrasInferenceAdapter(config)
|
||||
|
||||
await impl.initialize()
|
||||
|
||||
return impl
|
||||
191
llama_stack/providers/remote/inference/cerebras/cerebras.py
Normal file
191
llama_stack/providers/remote/inference/cerebras/cerebras.py
Normal file
|
|
@ -0,0 +1,191 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import AsyncGenerator
|
||||
|
||||
from cerebras.cloud.sdk import AsyncCerebras
|
||||
|
||||
from llama_models.llama3.api.chat_format import ChatFormat
|
||||
|
||||
from llama_models.llama3.api.datatypes import Message
|
||||
from llama_models.llama3.api.tokenizer import Tokenizer
|
||||
|
||||
from llama_stack.apis.inference import * # noqa: F403
|
||||
|
||||
from llama_models.datatypes import CoreModelId
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
build_model_alias,
|
||||
ModelRegistryHelper,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.openai_compat import (
|
||||
get_sampling_options,
|
||||
process_chat_completion_response,
|
||||
process_chat_completion_stream_response,
|
||||
process_completion_response,
|
||||
process_completion_stream_response,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||
chat_completion_request_to_prompt,
|
||||
completion_request_to_prompt,
|
||||
)
|
||||
|
||||
from .config import CerebrasImplConfig
|
||||
|
||||
|
||||
model_aliases = [
|
||||
build_model_alias(
|
||||
"llama3.1-8b",
|
||||
CoreModelId.llama3_1_8b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
"llama3.1-70b",
|
||||
CoreModelId.llama3_1_70b_instruct.value,
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
class CerebrasInferenceAdapter(ModelRegistryHelper, Inference):
|
||||
def __init__(self, config: CerebrasImplConfig) -> None:
|
||||
ModelRegistryHelper.__init__(
|
||||
self,
|
||||
model_aliases=model_aliases,
|
||||
)
|
||||
self.config = config
|
||||
self.formatter = ChatFormat(Tokenizer.get_instance())
|
||||
|
||||
self.client = AsyncCerebras(
|
||||
base_url=self.config.base_url, api_key=self.config.api_key
|
||||
)
|
||||
|
||||
async def initialize(self) -> None:
|
||||
return
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
pass
|
||||
|
||||
async def completion(
|
||||
self,
|
||||
model_id: str,
|
||||
content: InterleavedTextMedia,
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
||||
response_format: Optional[ResponseFormat] = None,
|
||||
stream: Optional[bool] = False,
|
||||
logprobs: Optional[LogProbConfig] = None,
|
||||
) -> AsyncGenerator:
|
||||
model = await self.model_store.get_model(model_id)
|
||||
request = CompletionRequest(
|
||||
model=model.provider_resource_id,
|
||||
content=content,
|
||||
sampling_params=sampling_params,
|
||||
response_format=response_format,
|
||||
stream=stream,
|
||||
logprobs=logprobs,
|
||||
)
|
||||
if stream:
|
||||
return self._stream_completion(
|
||||
request,
|
||||
)
|
||||
else:
|
||||
return await self._nonstream_completion(request)
|
||||
|
||||
async def _nonstream_completion(
|
||||
self, request: CompletionRequest
|
||||
) -> CompletionResponse:
|
||||
params = self._get_params(request)
|
||||
|
||||
r = await self.client.completions.create(**params)
|
||||
|
||||
return process_completion_response(r, self.formatter)
|
||||
|
||||
async def _stream_completion(self, request: CompletionRequest) -> AsyncGenerator:
|
||||
params = self._get_params(request)
|
||||
|
||||
stream = await self.client.completions.create(**params)
|
||||
|
||||
async for chunk in process_completion_stream_response(stream, self.formatter):
|
||||
yield chunk
|
||||
|
||||
async def chat_completion(
|
||||
self,
|
||||
model_id: str,
|
||||
messages: List[Message],
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
||||
tools: Optional[List[ToolDefinition]] = None,
|
||||
tool_choice: Optional[ToolChoice] = ToolChoice.auto,
|
||||
tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json,
|
||||
response_format: Optional[ResponseFormat] = None,
|
||||
stream: Optional[bool] = False,
|
||||
logprobs: Optional[LogProbConfig] = None,
|
||||
) -> AsyncGenerator:
|
||||
model = await self.model_store.get_model(model_id)
|
||||
request = ChatCompletionRequest(
|
||||
model=model.provider_resource_id,
|
||||
messages=messages,
|
||||
sampling_params=sampling_params,
|
||||
tools=tools or [],
|
||||
tool_choice=tool_choice,
|
||||
tool_prompt_format=tool_prompt_format,
|
||||
response_format=response_format,
|
||||
stream=stream,
|
||||
logprobs=logprobs,
|
||||
)
|
||||
|
||||
if stream:
|
||||
return self._stream_chat_completion(request)
|
||||
else:
|
||||
return await self._nonstream_chat_completion(request)
|
||||
|
||||
async def _nonstream_chat_completion(
|
||||
self, request: CompletionRequest
|
||||
) -> CompletionResponse:
|
||||
params = self._get_params(request)
|
||||
|
||||
r = await self.client.completions.create(**params)
|
||||
|
||||
return process_chat_completion_response(r, self.formatter)
|
||||
|
||||
async def _stream_chat_completion(
|
||||
self, request: CompletionRequest
|
||||
) -> AsyncGenerator:
|
||||
params = self._get_params(request)
|
||||
|
||||
stream = await self.client.completions.create(**params)
|
||||
|
||||
async for chunk in process_chat_completion_stream_response(
|
||||
stream, self.formatter
|
||||
):
|
||||
yield chunk
|
||||
|
||||
def _get_params(
|
||||
self, request: Union[ChatCompletionRequest, CompletionRequest]
|
||||
) -> dict:
|
||||
if request.sampling_params and request.sampling_params.top_k:
|
||||
raise ValueError("`top_k` not supported by Cerebras")
|
||||
|
||||
prompt = ""
|
||||
if type(request) == ChatCompletionRequest:
|
||||
prompt = chat_completion_request_to_prompt(
|
||||
request, self.get_llama_model(request.model), self.formatter
|
||||
)
|
||||
elif type(request) == CompletionRequest:
|
||||
prompt = completion_request_to_prompt(request, self.formatter)
|
||||
else:
|
||||
raise ValueError(f"Unknown request type {type(request)}")
|
||||
|
||||
return {
|
||||
"model": request.model,
|
||||
"prompt": prompt,
|
||||
"stream": request.stream,
|
||||
**get_sampling_options(request.sampling_params),
|
||||
}
|
||||
|
||||
async def embeddings(
|
||||
self,
|
||||
model_id: str,
|
||||
contents: List[InterleavedTextMedia],
|
||||
) -> EmbeddingsResponse:
|
||||
raise NotImplementedError()
|
||||
32
llama_stack/providers/remote/inference/cerebras/config.py
Normal file
32
llama_stack/providers/remote/inference/cerebras/config.py
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import os
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from llama_models.schema_utils import json_schema_type
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
DEFAULT_BASE_URL = "https://api.cerebras.ai"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class CerebrasImplConfig(BaseModel):
|
||||
base_url: str = Field(
|
||||
default=os.environ.get("CEREBRAS_BASE_URL", DEFAULT_BASE_URL),
|
||||
description="Base URL for the Cerebras API",
|
||||
)
|
||||
api_key: Optional[str] = Field(
|
||||
default=os.environ.get("CEREBRAS_API_KEY"),
|
||||
description="Cerebras API Key",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
|
||||
return {
|
||||
"base_url": DEFAULT_BASE_URL,
|
||||
"api_key": "${env.CEREBRAS_API_KEY}",
|
||||
}
|
||||
|
|
@ -9,6 +9,7 @@ from typing import AsyncIterator, List, Optional, Union
|
|||
|
||||
from llama_models.datatypes import SamplingParams
|
||||
from llama_models.llama3.api.datatypes import (
|
||||
ImageMedia,
|
||||
InterleavedTextMedia,
|
||||
Message,
|
||||
ToolChoice,
|
||||
|
|
@ -22,6 +23,7 @@ from llama_stack.apis.inference import (
|
|||
ChatCompletionRequest,
|
||||
ChatCompletionResponse,
|
||||
ChatCompletionResponseStreamChunk,
|
||||
CompletionRequest,
|
||||
CompletionResponse,
|
||||
CompletionResponseStreamChunk,
|
||||
EmbeddingsResponse,
|
||||
|
|
@ -37,8 +39,11 @@ from llama_stack.providers.utils.inference.model_registry import (
|
|||
from . import NVIDIAConfig
|
||||
from .openai_utils import (
|
||||
convert_chat_completion_request,
|
||||
convert_completion_request,
|
||||
convert_openai_chat_completion_choice,
|
||||
convert_openai_chat_completion_stream,
|
||||
convert_openai_completion_choice,
|
||||
convert_openai_completion_stream,
|
||||
)
|
||||
from .utils import _is_nvidia_hosted, check_health
|
||||
|
||||
|
|
@ -115,7 +120,7 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
|
|||
timeout=self._config.timeout,
|
||||
)
|
||||
|
||||
def completion(
|
||||
async def completion(
|
||||
self,
|
||||
model_id: str,
|
||||
content: InterleavedTextMedia,
|
||||
|
|
@ -124,7 +129,38 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
|
|||
stream: Optional[bool] = False,
|
||||
logprobs: Optional[LogProbConfig] = None,
|
||||
) -> Union[CompletionResponse, AsyncIterator[CompletionResponseStreamChunk]]:
|
||||
raise NotImplementedError()
|
||||
if isinstance(content, ImageMedia) or (
|
||||
isinstance(content, list)
|
||||
and any(isinstance(c, ImageMedia) for c in content)
|
||||
):
|
||||
raise NotImplementedError("ImageMedia is not supported")
|
||||
|
||||
await check_health(self._config) # this raises errors
|
||||
|
||||
request = convert_completion_request(
|
||||
request=CompletionRequest(
|
||||
model=self.get_provider_model_id(model_id),
|
||||
content=content,
|
||||
sampling_params=sampling_params,
|
||||
response_format=response_format,
|
||||
stream=stream,
|
||||
logprobs=logprobs,
|
||||
),
|
||||
n=1,
|
||||
)
|
||||
|
||||
try:
|
||||
response = await self._client.completions.create(**request)
|
||||
except APIConnectionError as e:
|
||||
raise ConnectionError(
|
||||
f"Failed to connect to NVIDIA NIM at {self._config.url}: {e}"
|
||||
) from e
|
||||
|
||||
if stream:
|
||||
return convert_openai_completion_stream(response)
|
||||
else:
|
||||
# we pass n=1 to get only one completion
|
||||
return convert_openai_completion_choice(response.choices[0])
|
||||
|
||||
async def embeddings(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -17,7 +17,6 @@ from llama_models.llama3.api.datatypes import (
|
|||
ToolDefinition,
|
||||
)
|
||||
from openai import AsyncStream
|
||||
|
||||
from openai.types.chat import (
|
||||
ChatCompletionAssistantMessageParam as OpenAIChatCompletionAssistantMessage,
|
||||
ChatCompletionChunk as OpenAIChatCompletionChunk,
|
||||
|
|
@ -31,10 +30,11 @@ from openai.types.chat.chat_completion import (
|
|||
Choice as OpenAIChoice,
|
||||
ChoiceLogprobs as OpenAIChoiceLogprobs, # same as chat_completion_chunk ChoiceLogprobs
|
||||
)
|
||||
|
||||
from openai.types.chat.chat_completion_message_tool_call_param import (
|
||||
Function as OpenAIFunction,
|
||||
)
|
||||
from openai.types.completion import Completion as OpenAICompletion
|
||||
from openai.types.completion_choice import Logprobs as OpenAICompletionLogprobs
|
||||
|
||||
from llama_stack.apis.inference import (
|
||||
ChatCompletionRequest,
|
||||
|
|
@ -42,6 +42,9 @@ from llama_stack.apis.inference import (
|
|||
ChatCompletionResponseEvent,
|
||||
ChatCompletionResponseEventType,
|
||||
ChatCompletionResponseStreamChunk,
|
||||
CompletionRequest,
|
||||
CompletionResponse,
|
||||
CompletionResponseStreamChunk,
|
||||
JsonSchemaResponseFormat,
|
||||
Message,
|
||||
SystemMessage,
|
||||
|
|
@ -579,3 +582,165 @@ async def convert_openai_chat_completion_stream(
|
|||
stop_reason=stop_reason,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def convert_completion_request(
|
||||
request: CompletionRequest,
|
||||
n: int = 1,
|
||||
) -> dict:
|
||||
"""
|
||||
Convert a ChatCompletionRequest to an OpenAI API-compatible dictionary.
|
||||
"""
|
||||
# model -> model
|
||||
# prompt -> prompt
|
||||
# sampling_params TODO(mattf): review strategy
|
||||
# strategy=greedy -> nvext.top_k = -1, temperature = temperature
|
||||
# strategy=top_p -> nvext.top_k = -1, top_p = top_p
|
||||
# strategy=top_k -> nvext.top_k = top_k
|
||||
# temperature -> temperature
|
||||
# top_p -> top_p
|
||||
# top_k -> nvext.top_k
|
||||
# max_tokens -> max_tokens
|
||||
# repetition_penalty -> nvext.repetition_penalty
|
||||
# response_format -> nvext.guided_json
|
||||
# stream -> stream
|
||||
# logprobs.top_k -> logprobs
|
||||
|
||||
nvext = {}
|
||||
payload: Dict[str, Any] = dict(
|
||||
model=request.model,
|
||||
prompt=request.content,
|
||||
stream=request.stream,
|
||||
extra_body=dict(nvext=nvext),
|
||||
extra_headers={
|
||||
b"User-Agent": b"llama-stack: nvidia-inference-adapter",
|
||||
},
|
||||
n=n,
|
||||
)
|
||||
|
||||
if request.response_format:
|
||||
# this is not openai compliant, it is a nim extension
|
||||
nvext.update(guided_json=request.response_format.json_schema)
|
||||
|
||||
if request.logprobs:
|
||||
payload.update(logprobs=request.logprobs.top_k)
|
||||
|
||||
if request.sampling_params:
|
||||
nvext.update(repetition_penalty=request.sampling_params.repetition_penalty)
|
||||
|
||||
if request.sampling_params.max_tokens:
|
||||
payload.update(max_tokens=request.sampling_params.max_tokens)
|
||||
|
||||
if request.sampling_params.strategy == "top_p":
|
||||
nvext.update(top_k=-1)
|
||||
payload.update(top_p=request.sampling_params.top_p)
|
||||
elif request.sampling_params.strategy == "top_k":
|
||||
if (
|
||||
request.sampling_params.top_k != -1
|
||||
and request.sampling_params.top_k < 1
|
||||
):
|
||||
warnings.warn("top_k must be -1 or >= 1")
|
||||
nvext.update(top_k=request.sampling_params.top_k)
|
||||
elif request.sampling_params.strategy == "greedy":
|
||||
nvext.update(top_k=-1)
|
||||
payload.update(temperature=request.sampling_params.temperature)
|
||||
|
||||
return payload
|
||||
|
||||
|
||||
def _convert_openai_completion_logprobs(
|
||||
logprobs: Optional[OpenAICompletionLogprobs],
|
||||
) -> Optional[List[TokenLogProbs]]:
|
||||
"""
|
||||
Convert an OpenAI CompletionLogprobs into a list of TokenLogProbs.
|
||||
|
||||
OpenAI CompletionLogprobs:
|
||||
text_offset: Optional[List[int]]
|
||||
token_logprobs: Optional[List[float]]
|
||||
tokens: Optional[List[str]]
|
||||
top_logprobs: Optional[List[Dict[str, float]]]
|
||||
|
||||
->
|
||||
|
||||
TokenLogProbs:
|
||||
logprobs_by_token: Dict[str, float]
|
||||
- token, logprob
|
||||
"""
|
||||
if not logprobs:
|
||||
return None
|
||||
|
||||
return [
|
||||
TokenLogProbs(logprobs_by_token=logprobs) for logprobs in logprobs.top_logprobs
|
||||
]
|
||||
|
||||
|
||||
def convert_openai_completion_choice(
|
||||
choice: OpenAIChoice,
|
||||
) -> CompletionResponse:
|
||||
"""
|
||||
Convert an OpenAI Completion Choice into a CompletionResponse.
|
||||
|
||||
OpenAI Completion Choice:
|
||||
text: str
|
||||
finish_reason: str
|
||||
logprobs: Optional[ChoiceLogprobs]
|
||||
|
||||
->
|
||||
|
||||
CompletionResponse:
|
||||
completion_message: CompletionMessage
|
||||
logprobs: Optional[List[TokenLogProbs]]
|
||||
|
||||
CompletionMessage:
|
||||
role: Literal["assistant"]
|
||||
content: str | ImageMedia | List[str | ImageMedia]
|
||||
stop_reason: StopReason
|
||||
tool_calls: List[ToolCall]
|
||||
|
||||
class StopReason(Enum):
|
||||
end_of_turn = "end_of_turn"
|
||||
end_of_message = "end_of_message"
|
||||
out_of_tokens = "out_of_tokens"
|
||||
"""
|
||||
return CompletionResponse(
|
||||
content=choice.text,
|
||||
stop_reason=_convert_openai_finish_reason(choice.finish_reason),
|
||||
logprobs=_convert_openai_completion_logprobs(choice.logprobs),
|
||||
)
|
||||
|
||||
|
||||
async def convert_openai_completion_stream(
|
||||
stream: AsyncStream[OpenAICompletion],
|
||||
) -> AsyncGenerator[CompletionResponse, None]:
|
||||
"""
|
||||
Convert a stream of OpenAI Completions into a stream
|
||||
of ChatCompletionResponseStreamChunks.
|
||||
|
||||
OpenAI Completion:
|
||||
id: str
|
||||
choices: List[OpenAICompletionChoice]
|
||||
created: int
|
||||
model: str
|
||||
system_fingerprint: Optional[str]
|
||||
usage: Optional[OpenAICompletionUsage]
|
||||
|
||||
OpenAI CompletionChoice:
|
||||
finish_reason: str
|
||||
index: int
|
||||
logprobs: Optional[OpenAILogprobs]
|
||||
text: str
|
||||
|
||||
->
|
||||
|
||||
CompletionResponseStreamChunk:
|
||||
delta: str
|
||||
stop_reason: Optional[StopReason]
|
||||
logprobs: Optional[List[TokenLogProbs]]
|
||||
"""
|
||||
async for chunk in stream:
|
||||
choice = chunk.choices[0]
|
||||
yield CompletionResponseStreamChunk(
|
||||
delta=choice.text,
|
||||
stop_reason=_convert_openai_finish_reason(choice.finish_reason),
|
||||
logprobs=_convert_openai_completion_logprobs(choice.logprobs),
|
||||
)
|
||||
|
|
|
|||
|
|
@ -180,7 +180,6 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
async def _nonstream_completion(self, request: CompletionRequest) -> AsyncGenerator:
|
||||
params = await self._get_params(request)
|
||||
r = await self.client.generate(**params)
|
||||
assert isinstance(r, dict)
|
||||
|
||||
choice = OpenAICompatCompletionChoice(
|
||||
finish_reason=r["done_reason"] if r["done"] else None,
|
||||
|
|
@ -270,7 +269,6 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
r = await self.client.chat(**params)
|
||||
else:
|
||||
r = await self.client.generate(**params)
|
||||
assert isinstance(r, dict)
|
||||
|
||||
if "message" in r:
|
||||
choice = OpenAICompatCompletionChoice(
|
||||
|
|
|
|||
|
|
@ -100,6 +100,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
tool_prompt_format=tool_prompt_format,
|
||||
stream=stream,
|
||||
logprobs=logprobs,
|
||||
response_format=response_format,
|
||||
)
|
||||
if stream:
|
||||
return self._stream_chat_completion(request, self.client)
|
||||
|
|
@ -180,6 +181,16 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
self.formatter,
|
||||
)
|
||||
|
||||
if fmt := request.response_format:
|
||||
if fmt.type == ResponseFormatType.json_schema.value:
|
||||
input_dict["extra_body"] = {
|
||||
"guided_json": request.response_format.json_schema
|
||||
}
|
||||
elif fmt.type == ResponseFormatType.grammar.value:
|
||||
raise NotImplementedError("Grammar response format not supported yet")
|
||||
else:
|
||||
raise ValueError(f"Unknown response format {fmt.type}")
|
||||
|
||||
return {
|
||||
"model": request.model,
|
||||
**input_dict,
|
||||
|
|
|
|||
|
|
@ -4,12 +4,12 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.distribution.datatypes import RemoteProviderConfig
|
||||
from .config import ChromaRemoteImplConfig
|
||||
|
||||
|
||||
async def get_adapter_impl(config: RemoteProviderConfig, _deps):
|
||||
async def get_adapter_impl(config: ChromaRemoteImplConfig, _deps):
|
||||
from .chroma import ChromaMemoryAdapter
|
||||
|
||||
impl = ChromaMemoryAdapter(config.url)
|
||||
impl = ChromaMemoryAdapter(config)
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from typing import List
|
||||
|
|
@ -12,21 +12,31 @@ from urllib.parse import urlparse
|
|||
import chromadb
|
||||
from numpy.typing import NDArray
|
||||
|
||||
from pydantic import parse_obj_as
|
||||
|
||||
from llama_stack.apis.memory import * # noqa: F403
|
||||
|
||||
from llama_stack.providers.datatypes import MemoryBanksProtocolPrivate
|
||||
from llama_stack.providers.inline.memory.chroma import ChromaInlineImplConfig
|
||||
from llama_stack.providers.utils.memory.vector_store import (
|
||||
BankWithIndex,
|
||||
EmbeddingIndex,
|
||||
)
|
||||
from .config import ChromaRemoteImplConfig
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
ChromaClientType = Union[chromadb.AsyncHttpClient, chromadb.PersistentClient]
|
||||
|
||||
|
||||
# this is a helper to allow us to use async and non-async chroma clients interchangeably
|
||||
async def maybe_await(result):
|
||||
if asyncio.iscoroutine(result):
|
||||
return await result
|
||||
return result
|
||||
|
||||
|
||||
class ChromaIndex(EmbeddingIndex):
|
||||
def __init__(self, client: chromadb.AsyncHttpClient, collection):
|
||||
def __init__(self, client: ChromaClientType, collection):
|
||||
self.client = client
|
||||
self.collection = collection
|
||||
|
||||
|
|
@ -35,19 +45,23 @@ class ChromaIndex(EmbeddingIndex):
|
|||
embeddings
|
||||
), f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
|
||||
|
||||
await self.collection.add(
|
||||
documents=[chunk.json() for chunk in chunks],
|
||||
embeddings=embeddings,
|
||||
ids=[f"{c.document_id}:chunk-{i}" for i, c in enumerate(chunks)],
|
||||
await maybe_await(
|
||||
self.collection.add(
|
||||
documents=[chunk.model_dump_json() for chunk in chunks],
|
||||
embeddings=embeddings,
|
||||
ids=[f"{c.document_id}:chunk-{i}" for i, c in enumerate(chunks)],
|
||||
)
|
||||
)
|
||||
|
||||
async def query(
|
||||
self, embedding: NDArray, k: int, score_threshold: float
|
||||
) -> QueryDocumentsResponse:
|
||||
results = await self.collection.query(
|
||||
query_embeddings=[embedding.tolist()],
|
||||
n_results=k,
|
||||
include=["documents", "distances"],
|
||||
results = await maybe_await(
|
||||
self.collection.query(
|
||||
query_embeddings=[embedding.tolist()],
|
||||
n_results=k,
|
||||
include=["documents", "distances"],
|
||||
)
|
||||
)
|
||||
distances = results["distances"][0]
|
||||
documents = results["documents"][0]
|
||||
|
|
@ -68,31 +82,33 @@ class ChromaIndex(EmbeddingIndex):
|
|||
return QueryDocumentsResponse(chunks=chunks, scores=scores)
|
||||
|
||||
async def delete(self):
|
||||
await self.client.delete_collection(self.collection.name)
|
||||
await maybe_await(self.client.delete_collection(self.collection.name))
|
||||
|
||||
|
||||
class ChromaMemoryAdapter(Memory, MemoryBanksProtocolPrivate):
|
||||
def __init__(self, url: str) -> None:
|
||||
log.info(f"Initializing ChromaMemoryAdapter with url: {url}")
|
||||
url = url.rstrip("/")
|
||||
parsed = urlparse(url)
|
||||
|
||||
if parsed.path and parsed.path != "/":
|
||||
raise ValueError("URL should not contain a path")
|
||||
|
||||
self.host = parsed.hostname
|
||||
self.port = parsed.port
|
||||
|
||||
def __init__(
|
||||
self, config: Union[ChromaRemoteImplConfig, ChromaInlineImplConfig]
|
||||
) -> None:
|
||||
log.info(f"Initializing ChromaMemoryAdapter with url: {config}")
|
||||
self.config = config
|
||||
self.client = None
|
||||
self.cache = {}
|
||||
|
||||
async def initialize(self) -> None:
|
||||
try:
|
||||
log.info(f"Connecting to Chroma server at: {self.host}:{self.port}")
|
||||
self.client = await chromadb.AsyncHttpClient(host=self.host, port=self.port)
|
||||
except Exception as e:
|
||||
log.exception("Could not connect to Chroma server")
|
||||
raise RuntimeError("Could not connect to Chroma server") from e
|
||||
if isinstance(self.config, ChromaRemoteImplConfig):
|
||||
log.info(f"Connecting to Chroma server at: {self.config.url}")
|
||||
url = self.config.url.rstrip("/")
|
||||
parsed = urlparse(url)
|
||||
|
||||
if parsed.path and parsed.path != "/":
|
||||
raise ValueError("URL should not contain a path")
|
||||
|
||||
self.client = await chromadb.AsyncHttpClient(
|
||||
host=parsed.hostname, port=parsed.port
|
||||
)
|
||||
else:
|
||||
log.info(f"Connecting to Chroma local db at: {self.config.db_path}")
|
||||
self.client = chromadb.PersistentClient(path=self.config.db_path)
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
pass
|
||||
|
|
@ -105,33 +121,17 @@ class ChromaMemoryAdapter(Memory, MemoryBanksProtocolPrivate):
|
|||
memory_bank.memory_bank_type == MemoryBankType.vector.value
|
||||
), f"Only vector banks are supported {memory_bank.memory_bank_type}"
|
||||
|
||||
collection = await self.client.get_or_create_collection(
|
||||
name=memory_bank.identifier,
|
||||
metadata={"bank": memory_bank.model_dump_json()},
|
||||
collection = await maybe_await(
|
||||
self.client.get_or_create_collection(
|
||||
name=memory_bank.identifier,
|
||||
metadata={"bank": memory_bank.model_dump_json()},
|
||||
)
|
||||
)
|
||||
bank_index = BankWithIndex(
|
||||
bank=memory_bank, index=ChromaIndex(self.client, collection)
|
||||
)
|
||||
self.cache[memory_bank.identifier] = bank_index
|
||||
|
||||
async def list_memory_banks(self) -> List[MemoryBank]:
|
||||
collections = await self.client.list_collections()
|
||||
for collection in collections:
|
||||
try:
|
||||
data = json.loads(collection.metadata["bank"])
|
||||
bank = parse_obj_as(VectorMemoryBank, data)
|
||||
except Exception:
|
||||
log.exception(f"Failed to parse bank: {collection.metadata}")
|
||||
continue
|
||||
|
||||
index = BankWithIndex(
|
||||
bank=bank,
|
||||
index=ChromaIndex(self.client, collection),
|
||||
)
|
||||
self.cache[bank.identifier] = index
|
||||
|
||||
return [i.bank for i in self.cache.values()]
|
||||
|
||||
async def unregister_memory_bank(self, memory_bank_id: str) -> None:
|
||||
await self.cache[memory_bank_id].index.delete()
|
||||
del self.cache[memory_bank_id]
|
||||
|
|
@ -163,7 +163,7 @@ class ChromaMemoryAdapter(Memory, MemoryBanksProtocolPrivate):
|
|||
bank = await self.memory_bank_store.get_memory_bank(bank_id)
|
||||
if not bank:
|
||||
raise ValueError(f"Bank {bank_id} not found in Llama Stack")
|
||||
collection = await self.client.get_collection(bank_id)
|
||||
collection = await maybe_await(self.client.get_collection(bank_id))
|
||||
if not collection:
|
||||
raise ValueError(f"Bank {bank_id} not found in Chroma")
|
||||
index = BankWithIndex(bank=bank, index=ChromaIndex(self.client, collection))
|
||||
|
|
|
|||
17
llama_stack/providers/remote/memory/chroma/config.py
Normal file
17
llama_stack/providers/remote/memory/chroma/config.py
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Any, Dict
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class ChromaRemoteImplConfig(BaseModel):
|
||||
url: str
|
||||
|
||||
@classmethod
|
||||
def sample_config(cls) -> Dict[str, Any]:
|
||||
return {"url": "{env.CHROMADB_URL}"}
|
||||
|
|
@ -185,17 +185,6 @@ class PGVectorMemoryAdapter(Memory, MemoryBanksProtocolPrivate):
|
|||
await self.cache[memory_bank_id].index.delete()
|
||||
del self.cache[memory_bank_id]
|
||||
|
||||
async def list_memory_banks(self) -> List[MemoryBank]:
|
||||
banks = load_models(self.cursor, VectorMemoryBank)
|
||||
for bank in banks:
|
||||
if bank.identifier not in self.cache:
|
||||
index = BankWithIndex(
|
||||
bank=bank,
|
||||
index=PGVectorIndex(bank, ALL_MINILM_L6_V2_DIMENSION, self.cursor),
|
||||
)
|
||||
self.cache[bank.identifier] = index
|
||||
return banks
|
||||
|
||||
async def insert_documents(
|
||||
self,
|
||||
bank_id: str,
|
||||
|
|
|
|||
|
|
@ -127,11 +127,6 @@ class QdrantVectorMemoryAdapter(Memory, MemoryBanksProtocolPrivate):
|
|||
|
||||
self.cache[memory_bank.identifier] = index
|
||||
|
||||
async def list_memory_banks(self) -> List[MemoryBank]:
|
||||
# Qdrant doesn't have collection level metadata to store the bank properties
|
||||
# So we only return from the cache value
|
||||
return [i.bank for i in self.cache.values()]
|
||||
|
||||
async def _get_and_cache_bank_index(self, bank_id: str) -> Optional[BankWithIndex]:
|
||||
if bank_id in self.cache:
|
||||
return self.cache[bank_id]
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ class SampleMemoryImpl(Memory):
|
|||
def __init__(self, config: SampleConfig):
|
||||
self.config = config
|
||||
|
||||
async def register_memory_bank(self, memory_bank: MemoryBankDef) -> None:
|
||||
async def register_memory_bank(self, memory_bank: MemoryBank) -> None:
|
||||
# these are the memory banks the Llama Stack will use to route requests to this provider
|
||||
# perform validation here if necessary
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -141,13 +141,6 @@ class WeaviateMemoryAdapter(
|
|||
)
|
||||
self.cache[memory_bank.identifier] = index
|
||||
|
||||
async def list_memory_banks(self) -> List[MemoryBank]:
|
||||
# TODO: right now the Llama Stack is the source of truth for these banks. That is
|
||||
# not ideal. It should be Weaviate which is the source of truth. Unfortunately,
|
||||
# list() happens at Stack startup when the Weaviate client (credentials) is not
|
||||
# yet available. We need to figure out a way to make this work.
|
||||
return [i.bank for i in self.cache.values()]
|
||||
|
||||
async def _get_and_cache_bank_index(self, bank_id: str) -> Optional[BankWithIndex]:
|
||||
if bank_id in self.cache:
|
||||
return self.cache[bank_id]
|
||||
|
|
|
|||
|
|
@ -1,15 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .config import OpenTelemetryConfig
|
||||
|
||||
|
||||
async def get_adapter_impl(config: OpenTelemetryConfig, _deps):
|
||||
from .opentelemetry import OpenTelemetryAdapter
|
||||
|
||||
impl = OpenTelemetryAdapter(config)
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Any, Dict
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class OpenTelemetryConfig(BaseModel):
|
||||
otel_endpoint: str = Field(
|
||||
default="http://localhost:4318/v1/traces",
|
||||
description="The OpenTelemetry collector endpoint URL",
|
||||
)
|
||||
service_name: str = Field(
|
||||
default="llama-stack",
|
||||
description="The service name to use for telemetry",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
|
||||
return {
|
||||
"otel_endpoint": "${env.OTEL_ENDPOINT:http://localhost:4318/v1/traces}",
|
||||
"service_name": "${env.OTEL_SERVICE_NAME:llama-stack}",
|
||||
}
|
||||
|
|
@ -81,6 +81,18 @@ class TestDatasetIO:
|
|||
assert len(response) == 1
|
||||
assert response[0].identifier == "test_dataset"
|
||||
|
||||
with pytest.raises(Exception) as exc_info:
|
||||
# unregister a dataset that does not exist
|
||||
await datasets_impl.unregister_dataset("test_dataset2")
|
||||
|
||||
await datasets_impl.unregister_dataset("test_dataset")
|
||||
response = await datasets_impl.list_datasets()
|
||||
assert isinstance(response, list)
|
||||
assert len(response) == 0
|
||||
|
||||
with pytest.raises(Exception) as exc_info:
|
||||
await datasets_impl.unregister_dataset("test_dataset")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_rows_paginated(self, datasetio_stack):
|
||||
datasetio_impl, datasets_impl = datasetio_stack
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue