forked from phoenix-oss/llama-stack-mirror
# What does this PR do? - Update `/eval-tasks` to `/benchmarks` - ⚠️ Remove differentiation between `app` v.s. `benchmark` eval task config. Now we only have `BenchmarkConfig`. The overloaded `benchmark` is confusing and do not add any value. Backward compatibility is being kept as the "type" is not being used anywhere. [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan - This change is backward compatible - Run notebook test with ``` pytest -v -s --nbval-lax ./docs/getting_started.ipynb pytest -v -s --nbval-lax ./docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb ``` <img width="846" alt="image" src="https://github.com/user-attachments/assets/d2fc06a7-593a-444f-bc1f-10ab9b0c843d" /> [//]: # (## Documentation) [//]: # (- [ ] Added a Changelog entry if the change is significant) --------- Signed-off-by: Ihar Hrachyshka <ihar.hrachyshka@gmail.com> Signed-off-by: Ben Browning <bbrownin@redhat.com> Signed-off-by: Sébastien Han <seb@redhat.com> Signed-off-by: reidliu <reid201711@gmail.com> Co-authored-by: Ihar Hrachyshka <ihar.hrachyshka@gmail.com> Co-authored-by: Ben Browning <ben324@gmail.com> Co-authored-by: Sébastien Han <seb@redhat.com> Co-authored-by: Reid <61492567+reidliu41@users.noreply.github.com> Co-authored-by: reidliu <reid201711@gmail.com> Co-authored-by: Yuan Tang <terrytangyuan@gmail.com>
24 KiB
24 KiB
Python SDK Reference
Shared Types
from llama_stack_client.types import (
AgentConfig,
BatchCompletion,
CompletionMessage,
ContentDelta,
Document,
InterleavedContent,
InterleavedContentItem,
Message,
ParamType,
QueryConfig,
QueryResult,
ReturnType,
SafetyViolation,
SamplingParams,
ScoringResult,
SystemMessage,
ToolCall,
ToolParamDefinition,
ToolResponseMessage,
URL,
UserMessage,
)
Toolgroups
Types:
from llama_stack_client.types import (
ListToolGroupsResponse,
ToolGroup,
ToolgroupListResponse,
)
Methods:
client.toolgroups.list() -> ToolgroupListResponse
client.toolgroups.get(toolgroup_id) -> ToolGroup
client.toolgroups.register(**params) -> None
client.toolgroups.unregister(toolgroup_id) -> None
Tools
Types:
from llama_stack_client.types import ListToolsResponse, Tool, ToolListResponse
Methods:
client.tools.list(**params) -> ToolListResponse
client.tools.get(tool_name) -> Tool
ToolRuntime
Types:
from llama_stack_client.types import ToolDef, ToolInvocationResult
Methods:
client.tool_runtime.invoke_tool(**params) -> ToolInvocationResult
client.tool_runtime.list_tools(**params) -> JSONLDecoder[ToolDef]
RagTool
Methods:
client.tool_runtime.rag_tool.insert(**params) -> None
client.tool_runtime.rag_tool.query(**params) -> QueryResult
Agents
Types:
from llama_stack_client.types import (
InferenceStep,
MemoryRetrievalStep,
ShieldCallStep,
ToolExecutionStep,
ToolResponse,
AgentCreateResponse,
)
Methods:
client.agents.create(**params) -> AgentCreateResponse
client.agents.delete(agent_id) -> None
Session
Types:
from llama_stack_client.types.agents import Session, SessionCreateResponse
Methods:
client.agents.session.create(agent_id, **params) -> SessionCreateResponse
client.agents.session.retrieve(session_id, *, agent_id, **params) -> Session
client.agents.session.delete(session_id, *, agent_id) -> None
Steps
Types:
from llama_stack_client.types.agents import StepRetrieveResponse
Methods:
client.agents.steps.retrieve(step_id, *, agent_id, session_id, turn_id) -> StepRetrieveResponse
Turn
Types:
from llama_stack_client.types.agents import Turn, TurnCreateResponse
Methods:
client.agents.turn.create(session_id, *, agent_id, **params) -> TurnCreateResponse
client.agents.turn.retrieve(turn_id, *, agent_id, session_id) -> Turn
BatchInference
Types:
from llama_stack_client.types import BatchInferenceChatCompletionResponse
Methods:
client.batch_inference.chat_completion(**params) -> BatchInferenceChatCompletionResponse
client.batch_inference.completion(**params) -> BatchCompletion
Datasets
Types:
from llama_stack_client.types import (
ListDatasetsResponse,
DatasetRetrieveResponse,
DatasetListResponse,
)
Methods:
client.datasets.retrieve(dataset_id) -> Optional[DatasetRetrieveResponse]
client.datasets.list() -> DatasetListResponse
client.datasets.register(**params) -> None
client.datasets.unregister(dataset_id) -> None
Eval
Types:
from llama_stack_client.types import EvaluateResponse, Job
Methods:
client.eval.evaluate_rows(benchmark_id, **params) -> EvaluateResponse
client.eval.run_eval(benchmark_id, **params) -> Job
Jobs
Types:
from llama_stack_client.types.eval import JobStatusResponse
Methods:
client.eval.jobs.retrieve(job_id, *, benchmark_id) -> EvaluateResponse
client.eval.jobs.cancel(job_id, *, benchmark_id) -> None
client.eval.jobs.status(job_id, *, benchmark_id) -> Optional[JobStatusResponse]
Inspect
Types:
from llama_stack_client.types import HealthInfo, ProviderInfo, RouteInfo, VersionInfo
Methods:
client.inspect.health() -> HealthInfo
client.inspect.version() -> VersionInfo
Inference
Types:
from llama_stack_client.types import (
CompletionResponse,
EmbeddingsResponse,
TokenLogProbs,
InferenceChatCompletionResponse,
InferenceCompletionResponse,
)
Methods:
client.inference.chat_completion(**params) -> InferenceChatCompletionResponse
client.inference.completion(**params) -> InferenceCompletionResponse
client.inference.embeddings(**params) -> EmbeddingsResponse
VectorIo
Types:
from llama_stack_client.types import QueryChunksResponse
Methods:
client.vector_io.insert(**params) -> None
client.vector_io.query(**params) -> QueryChunksResponse
VectorDBs
Types:
from llama_stack_client.types import (
ListVectorDBsResponse,
VectorDBRetrieveResponse,
VectorDBListResponse,
VectorDBRegisterResponse,
)
Methods:
client.vector_dbs.retrieve(vector_db_id) -> Optional[VectorDBRetrieveResponse]
client.vector_dbs.list() -> VectorDBListResponse
client.vector_dbs.register(**params) -> VectorDBRegisterResponse
client.vector_dbs.unregister(vector_db_id) -> None
Models
Types:
from llama_stack_client.types import ListModelsResponse, Model, ModelListResponse
Methods:
client.models.retrieve(model_id) -> Optional[Model]
client.models.list() -> ModelListResponse
client.models.register(**params) -> Model
client.models.unregister(model_id) -> None
PostTraining
Types:
from llama_stack_client.types import ListPostTrainingJobsResponse, PostTrainingJob
Methods:
client.post_training.preference_optimize(**params) -> PostTrainingJob
client.post_training.supervised_fine_tune(**params) -> PostTrainingJob
Job
Types:
from llama_stack_client.types.post_training import (
JobListResponse,
JobArtifactsResponse,
JobStatusResponse,
)
Methods:
client.post_training.job.list() -> JobListResponse
client.post_training.job.artifacts(**params) -> Optional[JobArtifactsResponse]
client.post_training.job.cancel(**params) -> None
client.post_training.job.status(**params) -> Optional[JobStatusResponse]
Providers
Types:
from llama_stack_client.types import ListProvidersResponse, ProviderListResponse
Methods:
client.providers.list() -> ProviderListResponse
Routes
Types:
from llama_stack_client.types import ListRoutesResponse, RouteListResponse
Methods:
client.routes.list() -> RouteListResponse
Safety
Types:
from llama_stack_client.types import RunShieldResponse
Methods:
client.safety.run_shield(**params) -> RunShieldResponse
Shields
Types:
from llama_stack_client.types import ListShieldsResponse, Shield, ShieldListResponse
Methods:
client.shields.retrieve(identifier) -> Optional[Shield]
client.shields.list() -> ShieldListResponse
client.shields.register(**params) -> Shield
SyntheticDataGeneration
Types:
from llama_stack_client.types import SyntheticDataGenerationResponse
Methods:
client.synthetic_data_generation.generate(**params) -> SyntheticDataGenerationResponse
Telemetry
Types:
from llama_stack_client.types import (
QuerySpansResponse,
SpanWithStatus,
Trace,
TelemetryGetSpanResponse,
TelemetryGetSpanTreeResponse,
TelemetryQuerySpansResponse,
TelemetryQueryTracesResponse,
)
Methods:
client.telemetry.get_span(span_id, *, trace_id) -> TelemetryGetSpanResponse
client.telemetry.get_span_tree(span_id, **params) -> TelemetryGetSpanTreeResponse
client.telemetry.get_trace(trace_id) -> Trace
client.telemetry.log_event(**params) -> None
client.telemetry.query_spans(**params) -> TelemetryQuerySpansResponse
client.telemetry.query_traces(**params) -> TelemetryQueryTracesResponse
client.telemetry.save_spans_to_dataset(**params) -> None
Datasetio
Types:
from llama_stack_client.types import PaginatedRowsResult
Methods:
client.datasetio.append_rows(**params) -> None
client.datasetio.get_rows_paginated(**params) -> PaginatedRowsResult
Scoring
Types:
from llama_stack_client.types import ScoringScoreResponse, ScoringScoreBatchResponse
Methods:
client.scoring.score(**params) -> ScoringScoreResponse
client.scoring.score_batch(**params) -> ScoringScoreBatchResponse
ScoringFunctions
Types:
from llama_stack_client.types import (
ListScoringFunctionsResponse,
ScoringFn,
ScoringFunctionListResponse,
)
Methods:
client.scoring_functions.retrieve(scoring_fn_id) -> Optional[ScoringFn]
client.scoring_functions.list() -> ScoringFunctionListResponse
client.scoring_functions.register(**params) -> None
Benchmarks
Types:
from llama_stack_client.types import (
Benchmark,
ListBenchmarksResponse,
BenchmarkListResponse,
)
Methods:
client.benchmarks.retrieve(benchmark_id) -> Optional[Benchmark]
client.benchmarks.list() -> BenchmarkListResponse
client.benchmarks.register(**params) -> None