feat: split API and provider specs into separate llama-stack-api pkg (#3895)

# What does this PR do?

Extract API definitions and provider specifications into a standalone
llama-stack-api package that can be published to PyPI independently of
the main llama-stack server.


see: https://github.com/llamastack/llama-stack/pull/2978 and
https://github.com/llamastack/llama-stack/pull/2978#issuecomment-3145115942

Motivation

External providers currently import from llama-stack, which overrides
the installed version and causes dependency conflicts. This separation
allows external providers to:

- Install only the type definitions they need without server
dependencies
- Avoid version conflicts with the installed llama-stack package
- Be versioned and released independently

This enables us to re-enable external provider module tests that were
previously blocked by these import conflicts.

Changes

- Created llama-stack-api package with minimal dependencies (pydantic,
jsonschema)
- Moved APIs, providers datatypes, strong_typing, and schema_utils
- Updated all imports from llama_stack.* to llama_stack_api.*
- Configured local editable install for development workflow
- Updated linting and type-checking configuration for both packages

Next Steps

- Publish llama-stack-api to PyPI
- Update external provider dependencies
- Re-enable external provider module tests


Pre-cursor PRs to this one:

- #4093 
- #3954 
- #4064 

These PRs moved key pieces _out_ of the Api pkg, limiting the scope of
change here.


relates to #3237 

## Test Plan

Package builds successfully and can be imported independently. All
pre-commit hooks pass with expected exclusions maintained.

---------

Signed-off-by: Charlie Doern <cdoern@redhat.com>
This commit is contained in:
Charlie Doern 2025-11-13 14:51:17 -05:00 committed by GitHub
parent ceb716b9a0
commit 840ad75fe9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
358 changed files with 2337 additions and 1424 deletions

View file

@ -1,217 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from enum import StrEnum
from typing import Any, Protocol
from urllib.parse import urlparse
from pydantic import BaseModel, Field
from llama_stack.apis.benchmarks import Benchmark
from llama_stack.apis.datasets import Dataset
from llama_stack.apis.datatypes import Api
from llama_stack.apis.models import Model
from llama_stack.apis.scoring_functions import ScoringFn
from llama_stack.apis.shields import Shield
from llama_stack.apis.tools import ToolGroup
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.schema_utils import json_schema_type
class ModelsProtocolPrivate(Protocol):
"""
Protocol for model management.
This allows users to register their preferred model identifiers.
Model registration requires -
- a provider, used to route the registration request
- a model identifier, user's intended name for the model during inference
- a provider model identifier, a model identifier supported by the provider
Providers will only accept registration for provider model ids they support.
Example,
register: provider x my-model-id x provider-model-id
-> Error if provider does not support provider-model-id
-> Error if my-model-id is already registered
-> Success if provider supports provider-model-id
inference: my-model-id x ...
-> Provider uses provider-model-id for inference
"""
# this should be called `on_model_register` or something like that.
# the provider should _not_ be able to change the object in this
# callback
async def register_model(self, model: Model) -> Model: ...
async def unregister_model(self, model_id: str) -> None: ...
# the Stack router will query each provider for their list of models
# if a `refresh_interval_seconds` is provided, this method will be called
# periodically to refresh the list of models
#
# NOTE: each model returned will be registered with the model registry. this means
# a callback to the `register_model()` method will be made. this is duplicative and
# may be removed in the future.
async def list_models(self) -> list[Model] | None: ...
async def should_refresh_models(self) -> bool: ...
class ShieldsProtocolPrivate(Protocol):
async def register_shield(self, shield: Shield) -> None: ...
async def unregister_shield(self, identifier: str) -> None: ...
class VectorStoresProtocolPrivate(Protocol):
async def register_vector_store(self, vector_store: VectorStore) -> None: ...
async def unregister_vector_store(self, vector_store_id: str) -> None: ...
class DatasetsProtocolPrivate(Protocol):
async def register_dataset(self, dataset: Dataset) -> None: ...
async def unregister_dataset(self, dataset_id: str) -> None: ...
class ScoringFunctionsProtocolPrivate(Protocol):
async def list_scoring_functions(self) -> list[ScoringFn]: ...
async def register_scoring_function(self, scoring_fn: ScoringFn) -> None: ...
class BenchmarksProtocolPrivate(Protocol):
async def register_benchmark(self, benchmark: Benchmark) -> None: ...
class ToolGroupsProtocolPrivate(Protocol):
async def register_toolgroup(self, toolgroup: ToolGroup) -> None: ...
async def unregister_toolgroup(self, toolgroup_id: str) -> None: ...
@json_schema_type
class ProviderSpec(BaseModel):
api: Api
provider_type: str
config_class: str = Field(
...,
description="Fully-qualified classname of the config for this provider",
)
api_dependencies: list[Api] = Field(
default_factory=list,
description="Higher-level API surfaces may depend on other providers to provide their functionality",
)
optional_api_dependencies: list[Api] = Field(
default_factory=list,
)
deprecation_warning: str | None = Field(
default=None,
description="If this provider is deprecated, specify the warning message here",
)
deprecation_error: str | None = Field(
default=None,
description="If this provider is deprecated and does NOT work, specify the error message here",
)
module: str | None = Field(
default=None,
description="""
Fully-qualified name of the module to import. The module is expected to have:
- `get_adapter_impl(config, deps)`: returns the adapter implementation
Example: `module: ramalama_stack`
""",
)
pip_packages: list[str] = Field(
default_factory=list,
description="The pip dependencies needed for this implementation",
)
provider_data_validator: str | None = Field(
default=None,
)
is_external: bool = Field(default=False, description="Notes whether this provider is an external provider.")
# used internally by the resolver; this is a hack for now
deps__: list[str] = Field(default_factory=list)
@property
def is_sample(self) -> bool:
return self.provider_type in ("sample", "remote::sample")
class RoutingTable(Protocol):
async def get_provider_impl(self, routing_key: str) -> Any: ...
@json_schema_type
class InlineProviderSpec(ProviderSpec):
container_image: str | None = Field(
default=None,
description="""
The container image to use for this implementation. If one is provided, pip_packages will be ignored.
If a provider depends on other providers, the dependencies MUST NOT specify a container image.
""",
)
description: str | None = Field(
default=None,
description="""
A description of the provider. This is used to display in the documentation.
""",
)
class RemoteProviderConfig(BaseModel):
host: str = "localhost"
port: int | None = None
protocol: str = "http"
@property
def url(self) -> str:
if self.port is None:
return f"{self.protocol}://{self.host}"
return f"{self.protocol}://{self.host}:{self.port}"
@classmethod
def from_url(cls, url: str) -> "RemoteProviderConfig":
parsed = urlparse(url)
attrs = {k: v for k, v in parsed._asdict().items() if v is not None}
return cls(**attrs)
@json_schema_type
class RemoteProviderSpec(ProviderSpec):
adapter_type: str = Field(
...,
description="Unique identifier for this adapter",
)
description: str | None = Field(
default=None,
description="""
A description of the provider. This is used to display in the documentation.
""",
)
@property
def container_image(self) -> str | None:
return None
class HealthStatus(StrEnum):
OK = "OK"
ERROR = "Error"
NOT_IMPLEMENTED = "Not Implemented"
HealthResponse = dict[str, Any]

View file

@ -5,25 +5,26 @@
# the root directory of this source tree.
from llama_stack.apis.agents import (
from llama_stack_api import (
Agents,
Conversations,
Inference,
ListOpenAIResponseInputItem,
ListOpenAIResponseObject,
OpenAIDeleteResponseObject,
OpenAIResponseInput,
OpenAIResponseInputTool,
OpenAIResponseObject,
OpenAIResponsePrompt,
OpenAIResponseText,
Order,
ResponseGuardrail,
Safety,
ToolGroups,
ToolRuntime,
VectorIO,
)
from llama_stack.apis.agents.agents import ResponseGuardrail
from llama_stack.apis.agents.openai_responses import OpenAIResponsePrompt, OpenAIResponseText
from llama_stack.apis.conversations import Conversations
from llama_stack.apis.inference import (
Inference,
)
from llama_stack.apis.safety import Safety
from llama_stack.apis.tools import ToolGroups, ToolRuntime
from llama_stack.apis.vector_io import VectorIO
from llama_stack.core.datatypes import AccessRule
from llama_stack.log import get_logger
from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl

View file

@ -8,14 +8,15 @@ import time
import uuid
from collections.abc import AsyncIterator
from pydantic import BaseModel, TypeAdapter
from llama_stack.apis.agents import Order
from llama_stack.apis.agents.agents import ResponseGuardrailSpec
from llama_stack.apis.agents.openai_responses import (
from llama_stack_api import (
ConversationItem,
Conversations,
Inference,
InvalidConversationIdError,
ListOpenAIResponseInputItem,
ListOpenAIResponseObject,
OpenAIDeleteResponseObject,
OpenAIMessageParam,
OpenAIResponseInput,
OpenAIResponseInputMessageContentText,
OpenAIResponseInputTool,
@ -25,20 +26,16 @@ from llama_stack.apis.agents.openai_responses import (
OpenAIResponsePrompt,
OpenAIResponseText,
OpenAIResponseTextFormat,
)
from llama_stack.apis.common.errors import (
InvalidConversationIdError,
)
from llama_stack.apis.conversations import Conversations
from llama_stack.apis.conversations.conversations import ConversationItem
from llama_stack.apis.inference import (
Inference,
OpenAIMessageParam,
OpenAISystemMessageParam,
Order,
ResponseGuardrailSpec,
Safety,
ToolGroups,
ToolRuntime,
VectorIO,
)
from llama_stack.apis.safety import Safety
from llama_stack.apis.tools import ToolGroups, ToolRuntime
from llama_stack.apis.vector_io import VectorIO
from pydantic import BaseModel, TypeAdapter
from llama_stack.log import get_logger
from llama_stack.providers.utils.responses.responses_store import (
ResponsesStore,

View file

@ -8,10 +8,18 @@ import uuid
from collections.abc import AsyncIterator
from typing import Any
from llama_stack.apis.agents.openai_responses import (
from llama_stack_api import (
AllowedToolsFilter,
ApprovalFilter,
Inference,
MCPListToolsTool,
OpenAIAssistantMessageParam,
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAIChatCompletionRequestWithExtraBody,
OpenAIChatCompletionToolCall,
OpenAIChoice,
OpenAIMessageParam,
OpenAIResponseContentPartOutputText,
OpenAIResponseContentPartReasoningText,
OpenAIResponseContentPartRefusal,
@ -56,16 +64,7 @@ from llama_stack.apis.agents.openai_responses import (
OpenAIResponseUsageOutputTokensDetails,
WebSearchToolTypes,
)
from llama_stack.apis.inference import (
Inference,
OpenAIAssistantMessageParam,
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAIChatCompletionRequestWithExtraBody,
OpenAIChatCompletionToolCall,
OpenAIChoice,
OpenAIMessageParam,
)
from llama_stack.core.telemetry import tracing
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
@ -1023,9 +1022,9 @@ class StreamingResponseOrchestrator:
self, tools: list[OpenAIResponseInputTool], output_messages: list[OpenAIResponseOutput]
) -> AsyncIterator[OpenAIResponseObjectStream]:
"""Process all tools and emit appropriate streaming events."""
from llama_stack_api import ToolDef
from openai.types.chat import ChatCompletionToolParam
from llama_stack.apis.tools import ToolDef
from llama_stack.models.llama.datatypes import ToolDefinition
from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool

View file

@ -9,7 +9,12 @@ import json
from collections.abc import AsyncIterator
from typing import Any
from llama_stack.apis.agents.openai_responses import (
from llama_stack_api import (
ImageContentItem,
OpenAIChatCompletionContentPartImageParam,
OpenAIChatCompletionContentPartTextParam,
OpenAIChatCompletionToolCall,
OpenAIImageURL,
OpenAIResponseInputToolFileSearch,
OpenAIResponseInputToolMCP,
OpenAIResponseObjectStreamResponseFileSearchCallCompleted,
@ -23,22 +28,15 @@ from llama_stack.apis.agents.openai_responses import (
OpenAIResponseObjectStreamResponseWebSearchCallSearching,
OpenAIResponseOutputMessageFileSearchToolCall,
OpenAIResponseOutputMessageFileSearchToolCallResults,
OpenAIResponseOutputMessageMCPCall,
OpenAIResponseOutputMessageWebSearchToolCall,
)
from llama_stack.apis.common.content_types import (
ImageContentItem,
TextContentItem,
)
from llama_stack.apis.inference import (
OpenAIChatCompletionContentPartImageParam,
OpenAIChatCompletionContentPartTextParam,
OpenAIChatCompletionToolCall,
OpenAIImageURL,
OpenAIToolMessageParam,
TextContentItem,
ToolGroups,
ToolInvocationResult,
ToolRuntime,
VectorIO,
)
from llama_stack.apis.tools import ToolGroups, ToolInvocationResult, ToolRuntime
from llama_stack.apis.vector_io import VectorIO
from llama_stack.core.telemetry import tracing
from llama_stack.log import get_logger
@ -398,6 +396,10 @@ class ToolExecutor:
# Build output message
message: Any
if mcp_tool_to_server and function.name in mcp_tool_to_server:
from llama_stack_api import (
OpenAIResponseOutputMessageMCPCall,
)
message = OpenAIResponseOutputMessageMCPCall(
id=item_id,
arguments=function.arguments,

View file

@ -7,10 +7,10 @@
from dataclasses import dataclass
from typing import cast
from openai.types.chat import ChatCompletionToolParam
from pydantic import BaseModel
from llama_stack.apis.agents.openai_responses import (
from llama_stack_api import (
OpenAIChatCompletionToolCall,
OpenAIMessageParam,
OpenAIResponseFormatParam,
OpenAIResponseInput,
OpenAIResponseInputTool,
OpenAIResponseInputToolFileSearch,
@ -26,7 +26,8 @@ from llama_stack.apis.agents.openai_responses import (
OpenAIResponseTool,
OpenAIResponseToolMCP,
)
from llama_stack.apis.inference import OpenAIChatCompletionToolCall, OpenAIMessageParam, OpenAIResponseFormatParam
from openai.types.chat import ChatCompletionToolParam
from pydantic import BaseModel
class ToolExecutionResult(BaseModel):

View file

@ -9,9 +9,23 @@ import re
import uuid
from collections.abc import Sequence
from llama_stack.apis.agents.agents import ResponseGuardrailSpec
from llama_stack.apis.agents.openai_responses import (
from llama_stack_api import (
OpenAIAssistantMessageParam,
OpenAIChatCompletionContentPartImageParam,
OpenAIChatCompletionContentPartParam,
OpenAIChatCompletionContentPartTextParam,
OpenAIChatCompletionToolCall,
OpenAIChatCompletionToolCallFunction,
OpenAIChoice,
OpenAIDeveloperMessageParam,
OpenAIImageURL,
OpenAIJSONSchema,
OpenAIMessageParam,
OpenAIResponseAnnotationFileCitation,
OpenAIResponseFormatJSONObject,
OpenAIResponseFormatJSONSchema,
OpenAIResponseFormatParam,
OpenAIResponseFormatText,
OpenAIResponseInput,
OpenAIResponseInputFunctionToolCallOutput,
OpenAIResponseInputMessageContent,
@ -27,28 +41,12 @@ from llama_stack.apis.agents.openai_responses import (
OpenAIResponseOutputMessageMCPCall,
OpenAIResponseOutputMessageMCPListTools,
OpenAIResponseText,
)
from llama_stack.apis.inference import (
OpenAIAssistantMessageParam,
OpenAIChatCompletionContentPartImageParam,
OpenAIChatCompletionContentPartParam,
OpenAIChatCompletionContentPartTextParam,
OpenAIChatCompletionToolCall,
OpenAIChatCompletionToolCallFunction,
OpenAIChoice,
OpenAIDeveloperMessageParam,
OpenAIImageURL,
OpenAIJSONSchema,
OpenAIMessageParam,
OpenAIResponseFormatJSONObject,
OpenAIResponseFormatJSONSchema,
OpenAIResponseFormatParam,
OpenAIResponseFormatText,
OpenAISystemMessageParam,
OpenAIToolMessageParam,
OpenAIUserMessageParam,
ResponseGuardrailSpec,
Safety,
)
from llama_stack.apis.safety import Safety
async def convert_chat_choice_to_response_message(

View file

@ -6,8 +6,8 @@
import asyncio
from llama_stack.apis.inference import OpenAIMessageParam
from llama_stack.apis.safety import Safety, SafetyViolation, ViolationLevel
from llama_stack_api import OpenAIMessageParam, Safety, SafetyViolation, ViolationLevel
from llama_stack.core.telemetry import tracing
from llama_stack.log import get_logger

View file

@ -6,9 +6,8 @@
from typing import Any
from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference
from llama_stack.apis.models import Models
from llama_stack_api import Files, Inference, Models
from llama_stack.core.datatypes import AccessRule, Api
from llama_stack.providers.utils.kvstore import kvstore_impl

View file

@ -13,25 +13,29 @@ import uuid
from io import BytesIO
from typing import Any, Literal
from openai.types.batch import BatchError, Errors
from pydantic import BaseModel
from llama_stack.apis.batches import Batches, BatchObject, ListBatchesResponse
from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError
from llama_stack.apis.files import Files, OpenAIFilePurpose
from llama_stack.apis.inference import (
from llama_stack_api import (
Batches,
BatchObject,
ConflictError,
Files,
Inference,
ListBatchesResponse,
Models,
OpenAIAssistantMessageParam,
OpenAIChatCompletionRequestWithExtraBody,
OpenAICompletionRequestWithExtraBody,
OpenAIDeveloperMessageParam,
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIFilePurpose,
OpenAIMessageParam,
OpenAISystemMessageParam,
OpenAIToolMessageParam,
OpenAIUserMessageParam,
ResourceNotFoundError,
)
from llama_stack.apis.models import Models
from openai.types.batch import BatchError, Errors
from pydantic import BaseModel
from llama_stack.log import get_logger
from llama_stack.providers.utils.kvstore import KVStore

View file

@ -5,10 +5,8 @@
# the root directory of this source tree.
from typing import Any
from llama_stack.apis.common.responses import PaginatedResponse
from llama_stack.apis.datasetio import DatasetIO
from llama_stack.apis.datasets import Dataset
from llama_stack.providers.datatypes import DatasetsProtocolPrivate
from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse
from llama_stack.providers.utils.datasetio.url_utils import get_dataframe_from_uri
from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.pagination import paginate_records

View file

@ -6,26 +6,29 @@
import json
from typing import Any
from tqdm import tqdm
from llama_stack.apis.agents import Agents
from llama_stack.apis.benchmarks import Benchmark
from llama_stack.apis.datasetio import DatasetIO
from llama_stack.apis.datasets import Datasets
from llama_stack.apis.inference import (
from llama_stack_api import (
Agents,
Benchmark,
BenchmarkConfig,
BenchmarksProtocolPrivate,
DatasetIO,
Datasets,
Eval,
EvaluateResponse,
Inference,
Job,
JobStatus,
OpenAIChatCompletionRequestWithExtraBody,
OpenAICompletionRequestWithExtraBody,
OpenAISystemMessageParam,
OpenAIUserMessageParam,
Scoring,
)
from llama_stack.apis.scoring import Scoring
from llama_stack.providers.datatypes import BenchmarksProtocolPrivate
from tqdm import tqdm
from llama_stack.providers.utils.common.data_schema_validator import ColumnName
from llama_stack.providers.utils.kvstore import kvstore_impl
from .....apis.common.job_types import Job, JobStatus
from .....apis.eval.eval import BenchmarkConfig, Eval, EvaluateResponse
from .config import MetaReferenceEvalConfig
EVAL_TASKS_PREFIX = "benchmarks:"

View file

@ -10,17 +10,17 @@ from pathlib import Path
from typing import Annotated
from fastapi import Depends, File, Form, Response, UploadFile
from llama_stack.apis.common.errors import ResourceNotFoundError
from llama_stack.apis.common.responses import Order
from llama_stack.apis.files import (
from llama_stack_api import (
ExpiresAfter,
Files,
ListOpenAIFileResponse,
OpenAIFileDeleteResponse,
OpenAIFileObject,
OpenAIFilePurpose,
Order,
ResourceNotFoundError,
)
from llama_stack.core.datatypes import AccessRule
from llama_stack.core.id_generation import generate_object_id
from llama_stack.log import get_logger

View file

@ -6,9 +6,9 @@
from typing import Any
from llama_stack_api import QuantizationConfig
from pydantic import BaseModel, field_validator
from llama_stack.apis.inference import QuantizationConfig
from llama_stack.providers.utils.inference import supported_inference_models

View file

@ -8,9 +8,7 @@ import math
from typing import Optional
import torch
from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData
from llama_stack.apis.inference import (
from llama_stack_api import (
GreedySamplingStrategy,
JsonSchemaResponseFormat,
OpenAIChatCompletionRequestWithExtraBody,
@ -20,6 +18,8 @@ from llama_stack.apis.inference import (
SamplingParams,
TopPSamplingStrategy,
)
from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData
from llama_stack.models.llama.datatypes import QuantizationMode, ToolPromptFormat
from llama_stack.models.llama.llama3.generation import Llama3
from llama_stack.models.llama.llama3.tokenizer import Tokenizer as Llama3Tokenizer

View file

@ -9,22 +9,23 @@ import time
import uuid
from collections.abc import AsyncIterator
from llama_stack.apis.inference import (
from llama_stack_api import (
InferenceProvider,
Model,
ModelsProtocolPrivate,
ModelType,
OpenAIAssistantMessageParam,
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAIChatCompletionRequestWithExtraBody,
OpenAIChatCompletionUsage,
OpenAIChoice,
OpenAICompletion,
OpenAICompletionRequestWithExtraBody,
OpenAIUserMessageParam,
ToolChoice,
)
from llama_stack.apis.inference.inference import (
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAICompletion,
)
from llama_stack.apis.models import Model, ModelType
from llama_stack.log import get_logger
from llama_stack.models.llama.datatypes import RawMessage, RawTextItem, ToolDefinition
from llama_stack.models.llama.llama3.chat_format import ChatFormat as Llama3ChatFormat
@ -40,7 +41,6 @@ from llama_stack.models.llama.llama4.prompt_templates.system_prompts import (
from llama_stack.models.llama.llama4.tokenizer import Tokenizer as Llama4Tokenizer
from llama_stack.models.llama.sku_list import resolve_model
from llama_stack.models.llama.sku_types import ModelFamily, is_multimodal
from llama_stack.providers.datatypes import ModelsProtocolPrivate
from llama_stack.providers.utils.inference.embedding_mixin import (
SentenceTransformerEmbeddingMixin,
)
@ -376,7 +376,7 @@ class MetaReferenceInferenceImpl(
# Convert tool calls to OpenAI format
openai_tool_calls = None
if decoded_message.tool_calls:
from llama_stack.apis.inference import (
from llama_stack_api import (
OpenAIChatCompletionToolCall,
OpenAIChatCompletionToolCallFunction,
)
@ -441,13 +441,14 @@ class MetaReferenceInferenceImpl(
params: OpenAIChatCompletionRequestWithExtraBody,
) -> AsyncIterator[OpenAIChatCompletionChunk]:
"""Stream chat completion chunks as they're generated."""
from llama_stack.apis.inference import (
from llama_stack_api import (
OpenAIChatCompletionChunk,
OpenAIChatCompletionToolCall,
OpenAIChatCompletionToolCallFunction,
OpenAIChoiceDelta,
OpenAIChunkChoice,
)
from llama_stack.models.llama.datatypes import StopReason
from llama_stack.providers.utils.inference.prompt_adapter import decode_assistant_message

View file

@ -6,19 +6,19 @@
from collections.abc import AsyncIterator
from llama_stack.apis.inference import (
from llama_stack_api import (
InferenceProvider,
OpenAIChatCompletionRequestWithExtraBody,
OpenAICompletionRequestWithExtraBody,
)
from llama_stack.apis.inference.inference import (
Model,
ModelsProtocolPrivate,
ModelType,
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAIChatCompletionRequestWithExtraBody,
OpenAICompletion,
OpenAICompletionRequestWithExtraBody,
)
from llama_stack.apis.models import ModelType
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
from llama_stack.providers.utils.inference.embedding_mixin import (
SentenceTransformerEmbeddingMixin,
)

View file

@ -12,11 +12,8 @@
from typing import Any
from llama_stack.apis.common.type_system import (
ChatCompletionInputType,
DialogType,
StringType,
)
from llama_stack_api import ChatCompletionInputType, DialogType, StringType
from llama_stack.providers.utils.common.data_schema_validator import (
ColumnName,
)

View file

@ -6,11 +6,11 @@
from enum import Enum
from typing import Any
from llama_stack.apis.datasetio import DatasetIO
from llama_stack.apis.datasets import Datasets
from llama_stack.apis.post_training import (
from llama_stack_api import (
AlgorithmConfig,
Checkpoint,
DatasetIO,
Datasets,
DPOAlignmentConfig,
JobStatus,
ListPostTrainingJobsResponse,
@ -19,6 +19,7 @@ from llama_stack.apis.post_training import (
PostTrainingJobStatusResponse,
TrainingConfig,
)
from llama_stack.providers.inline.post_training.huggingface.config import (
HuggingFacePostTrainingConfig,
)

View file

@ -12,20 +12,20 @@ from typing import Any
import torch
from datasets import Dataset
from llama_stack_api import (
Checkpoint,
DataConfig,
DatasetIO,
Datasets,
LoraFinetuningConfig,
TrainingConfig,
)
from peft import LoraConfig
from transformers import (
AutoTokenizer,
)
from trl import SFTConfig, SFTTrainer
from llama_stack.apis.datasetio import DatasetIO
from llama_stack.apis.datasets import Datasets
from llama_stack.apis.post_training import (
Checkpoint,
DataConfig,
LoraFinetuningConfig,
TrainingConfig,
)
from llama_stack.log import get_logger
from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device

View file

@ -11,18 +11,18 @@ from typing import Any
import torch
from datasets import Dataset
from llama_stack_api import (
Checkpoint,
DatasetIO,
Datasets,
DPOAlignmentConfig,
TrainingConfig,
)
from transformers import (
AutoTokenizer,
)
from trl import DPOConfig, DPOTrainer
from llama_stack.apis.datasetio import DatasetIO
from llama_stack.apis.datasets import Datasets
from llama_stack.apis.post_training import (
Checkpoint,
DPOAlignmentConfig,
TrainingConfig,
)
from llama_stack.log import get_logger
from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device

View file

@ -14,6 +14,7 @@ from typing import TYPE_CHECKING, Any, Protocol
import psutil
import torch
from datasets import Dataset
from llama_stack_api import Checkpoint, DatasetIO, TrainingConfig
from transformers import AutoConfig, AutoModelForCausalLM
if TYPE_CHECKING:
@ -34,8 +35,6 @@ class HFAutoModel(Protocol):
def save_pretrained(self, save_directory: str | Path) -> None: ...
from llama_stack.apis.datasetio import DatasetIO
from llama_stack.apis.post_training import Checkpoint, TrainingConfig
from llama_stack.log import get_logger
from .config import HuggingFacePostTrainingConfig

View file

@ -13,6 +13,7 @@
from collections.abc import Callable
import torch
from llama_stack_api import DatasetFormat
from pydantic import BaseModel
from torchtune.data._messages import InputOutputToMessages, ShareGPTToMessages
from torchtune.models.llama3 import llama3_tokenizer
@ -21,7 +22,6 @@ from torchtune.models.llama3_1 import lora_llama3_1_8b
from torchtune.models.llama3_2 import lora_llama3_2_3b
from torchtune.modules.transforms import Transform
from llama_stack.apis.post_training import DatasetFormat
from llama_stack.models.llama.sku_list import resolve_model
from llama_stack.models.llama.sku_types import Model

View file

@ -6,11 +6,11 @@
from enum import Enum
from typing import Any
from llama_stack.apis.datasetio import DatasetIO
from llama_stack.apis.datasets import Datasets
from llama_stack.apis.post_training import (
from llama_stack_api import (
AlgorithmConfig,
Checkpoint,
DatasetIO,
Datasets,
DPOAlignmentConfig,
JobStatus,
ListPostTrainingJobsResponse,
@ -20,6 +20,7 @@ from llama_stack.apis.post_training import (
PostTrainingJobStatusResponse,
TrainingConfig,
)
from llama_stack.providers.inline.post_training.torchtune.config import (
TorchtunePostTrainingConfig,
)

View file

@ -12,6 +12,17 @@ from pathlib import Path
from typing import Any
import torch
from llama_stack_api import (
Checkpoint,
DataConfig,
DatasetIO,
Datasets,
LoraFinetuningConfig,
OptimizerConfig,
PostTrainingMetric,
QATFinetuningConfig,
TrainingConfig,
)
from torch import nn
from torch.optim import Optimizer
from torch.utils.data import DataLoader, DistributedSampler
@ -32,17 +43,6 @@ from torchtune.training.lr_schedulers import get_cosine_schedule_with_warmup
from torchtune.training.metric_logging import DiskLogger
from tqdm import tqdm
from llama_stack.apis.common.training_types import PostTrainingMetric
from llama_stack.apis.datasetio import DatasetIO
from llama_stack.apis.datasets import Datasets
from llama_stack.apis.post_training import (
Checkpoint,
DataConfig,
LoraFinetuningConfig,
OptimizerConfig,
QATFinetuningConfig,
TrainingConfig,
)
from llama_stack.core.utils.config_dirs import DEFAULT_CHECKPOINT_DIR
from llama_stack.core.utils.model_utils import model_local_dir
from llama_stack.log import get_logger

View file

@ -10,15 +10,17 @@ from typing import TYPE_CHECKING, Any
if TYPE_CHECKING:
from codeshield.cs import CodeShieldScanResult
from llama_stack.apis.inference import OpenAIMessageParam
from llama_stack.apis.safety import (
from llama_stack_api import (
ModerationObject,
ModerationObjectResults,
OpenAIMessageParam,
RunShieldResponse,
Safety,
SafetyViolation,
Shield,
ViolationLevel,
)
from llama_stack.apis.safety.safety import ModerationObject, ModerationObjectResults
from llama_stack.apis.shields import Shield
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.prompt_adapter import (
interleaved_content_as_str,

View file

@ -9,26 +9,27 @@ import uuid
from string import Template
from typing import Any
from llama_stack.apis.common.content_types import ImageContentItem, TextContentItem
from llama_stack.apis.inference import (
from llama_stack_api import (
ImageContentItem,
Inference,
ModerationObject,
ModerationObjectResults,
OpenAIChatCompletionRequestWithExtraBody,
OpenAIMessageParam,
OpenAIUserMessageParam,
)
from llama_stack.apis.safety import (
RunShieldResponse,
Safety,
SafetyViolation,
Shield,
ShieldsProtocolPrivate,
TextContentItem,
ViolationLevel,
)
from llama_stack.apis.safety.safety import ModerationObject, ModerationObjectResults
from llama_stack.apis.shields import Shield
from llama_stack.core.datatypes import Api
from llama_stack.log import get_logger
from llama_stack.models.llama.datatypes import Role
from llama_stack.models.llama.sku_types import CoreModelId
from llama_stack.providers.datatypes import ShieldsProtocolPrivate
from llama_stack.providers.utils.inference.prompt_adapter import (
interleaved_content_as_str,
)

View file

@ -7,21 +7,21 @@
from typing import Any
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from llama_stack.apis.inference import OpenAIMessageParam
from llama_stack.apis.safety import (
from llama_stack_api import (
ModerationObject,
OpenAIMessageParam,
RunShieldResponse,
Safety,
SafetyViolation,
Shield,
ShieldsProtocolPrivate,
ShieldStore,
ViolationLevel,
)
from llama_stack.apis.safety.safety import ModerationObject
from llama_stack.apis.shields import Shield
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from llama_stack.core.utils.model_utils import model_local_dir
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import ShieldsProtocolPrivate
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
from .config import PromptGuardConfig, PromptGuardType

View file

@ -5,17 +5,19 @@
# the root directory of this source tree.
from typing import Any
from llama_stack.apis.datasetio import DatasetIO
from llama_stack.apis.datasets import Datasets
from llama_stack.apis.scoring import (
from llama_stack_api import (
DatasetIO,
Datasets,
ScoreBatchResponse,
ScoreResponse,
Scoring,
ScoringFn,
ScoringFnParams,
ScoringFunctionsProtocolPrivate,
ScoringResult,
)
from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams
from llama_stack.core.datatypes import Api
from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate
from llama_stack.providers.utils.common.data_schema_validator import (
get_valid_schemas,
validate_dataset_schema,

View file

@ -8,8 +8,8 @@ import json
import re
from typing import Any
from llama_stack.apis.scoring import ScoringResultRow
from llama_stack.apis.scoring_functions import ScoringFnParams
from llama_stack_api import ScoringFnParams, ScoringResultRow
from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
from .fn_defs.docvqa import docvqa

View file

@ -6,8 +6,8 @@
from typing import Any
from llama_stack.apis.scoring import ScoringResultRow
from llama_stack.apis.scoring_functions import ScoringFnParams
from llama_stack_api import ScoringFnParams, ScoringResultRow
from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
from .fn_defs.equality import equality

View file

@ -4,10 +4,10 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.common.type_system import NumberType
from llama_stack.apis.scoring_functions import (
from llama_stack_api import (
AggregationFunctionType,
BasicScoringFnParams,
NumberType,
ScoringFn,
)

View file

@ -4,10 +4,10 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.common.type_system import NumberType
from llama_stack.apis.scoring_functions import (
from llama_stack_api import (
AggregationFunctionType,
BasicScoringFnParams,
NumberType,
ScoringFn,
)

View file

@ -4,10 +4,10 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.common.type_system import NumberType
from llama_stack.apis.scoring_functions import (
from llama_stack_api import (
AggregationFunctionType,
BasicScoringFnParams,
NumberType,
ScoringFn,
)

View file

@ -4,9 +4,9 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.common.type_system import NumberType
from llama_stack.apis.scoring_functions import (
from llama_stack_api import (
AggregationFunctionType,
NumberType,
RegexParserScoringFnParams,
ScoringFn,
)

View file

@ -4,9 +4,9 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.common.type_system import NumberType
from llama_stack.apis.scoring_functions import (
from llama_stack_api import (
AggregationFunctionType,
NumberType,
RegexParserScoringFnParams,
ScoringFn,
)

View file

@ -4,10 +4,10 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.common.type_system import NumberType
from llama_stack.apis.scoring_functions import (
from llama_stack_api import (
AggregationFunctionType,
BasicScoringFnParams,
NumberType,
ScoringFn,
)

View file

@ -6,8 +6,8 @@
from typing import Any
from llama_stack.apis.scoring import ScoringResultRow
from llama_stack.apis.scoring_functions import ScoringFnParams
from llama_stack_api import ScoringFnParams, ScoringResultRow
from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
from .fn_defs.ifeval import (

View file

@ -5,8 +5,8 @@
# the root directory of this source tree.
from typing import Any
from llama_stack.apis.scoring import ScoringResultRow
from llama_stack.apis.scoring_functions import ScoringFnParams, ScoringFnParamsType
from llama_stack_api import ScoringFnParams, ScoringFnParamsType, ScoringResultRow
from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
from ..utils.math_utils import first_answer, normalize_final_answer, try_evaluate_frac, try_evaluate_latex

View file

@ -6,8 +6,8 @@
import re
from typing import Any
from llama_stack.apis.scoring import ScoringResultRow
from llama_stack.apis.scoring_functions import ScoringFnParams, ScoringFnParamsType
from llama_stack_api import ScoringFnParams, ScoringFnParamsType, ScoringResultRow
from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
from .fn_defs.regex_parser_multiple_choice_answer import (

View file

@ -6,8 +6,8 @@
from typing import Any
from llama_stack.apis.scoring import ScoringResultRow
from llama_stack.apis.scoring_functions import ScoringFnParams
from llama_stack_api import ScoringFnParams, ScoringResultRow
from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
from .fn_defs.subset_of import subset_of

View file

@ -17,21 +17,22 @@ from autoevals.ragas import (
ContextRelevancy,
Faithfulness,
)
from pydantic import BaseModel
from llama_stack.apis.datasetio import DatasetIO
from llama_stack.apis.datasets import Datasets
from llama_stack.apis.scoring import (
from llama_stack_api import (
DatasetIO,
Datasets,
ScoreBatchResponse,
ScoreResponse,
Scoring,
ScoringFn,
ScoringFnParams,
ScoringFunctionsProtocolPrivate,
ScoringResult,
ScoringResultRow,
)
from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams
from pydantic import BaseModel
from llama_stack.core.datatypes import Api
from llama_stack.core.request_headers import NeedsRequestProviderData
from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate
from llama_stack.providers.utils.common.data_schema_validator import (
get_valid_schemas,
validate_dataset_schema,

View file

@ -4,10 +4,10 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.common.type_system import NumberType
from llama_stack.apis.scoring_functions import (
from llama_stack_api import (
AggregationFunctionType,
BasicScoringFnParams,
NumberType,
ScoringFn,
)

View file

@ -4,10 +4,10 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.common.type_system import NumberType
from llama_stack.apis.scoring_functions import (
from llama_stack_api import (
AggregationFunctionType,
BasicScoringFnParams,
NumberType,
ScoringFn,
)

View file

@ -4,10 +4,10 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.common.type_system import NumberType
from llama_stack.apis.scoring_functions import (
from llama_stack_api import (
AggregationFunctionType,
BasicScoringFnParams,
NumberType,
ScoringFn,
)

View file

@ -4,10 +4,10 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.common.type_system import NumberType
from llama_stack.apis.scoring_functions import (
from llama_stack_api import (
AggregationFunctionType,
BasicScoringFnParams,
NumberType,
ScoringFn,
)

View file

@ -4,10 +4,10 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.common.type_system import NumberType
from llama_stack.apis.scoring_functions import (
from llama_stack_api import (
AggregationFunctionType,
BasicScoringFnParams,
NumberType,
ScoringFn,
)

View file

@ -4,10 +4,10 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.common.type_system import NumberType
from llama_stack.apis.scoring_functions import (
from llama_stack_api import (
AggregationFunctionType,
BasicScoringFnParams,
NumberType,
ScoringFn,
)

View file

@ -4,10 +4,10 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.common.type_system import NumberType
from llama_stack.apis.scoring_functions import (
from llama_stack_api import (
AggregationFunctionType,
BasicScoringFnParams,
NumberType,
ScoringFn,
)

View file

@ -4,10 +4,10 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.common.type_system import NumberType
from llama_stack.apis.scoring_functions import (
from llama_stack_api import (
AggregationFunctionType,
BasicScoringFnParams,
NumberType,
ScoringFn,
)

View file

@ -4,10 +4,10 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.common.type_system import NumberType
from llama_stack.apis.scoring_functions import (
from llama_stack_api import (
AggregationFunctionType,
BasicScoringFnParams,
NumberType,
ScoringFn,
)

View file

@ -5,18 +5,20 @@
# the root directory of this source tree.
from typing import Any
from llama_stack.apis.datasetio import DatasetIO
from llama_stack.apis.datasets import Datasets
from llama_stack.apis.inference import Inference
from llama_stack.apis.scoring import (
from llama_stack_api import (
DatasetIO,
Datasets,
Inference,
ScoreBatchResponse,
ScoreResponse,
Scoring,
ScoringFn,
ScoringFnParams,
ScoringFunctionsProtocolPrivate,
ScoringResult,
)
from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams
from llama_stack.core.datatypes import Api
from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate
from llama_stack.providers.utils.common.data_schema_validator import (
get_valid_schemas,
validate_dataset_schema,

View file

@ -4,10 +4,10 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.common.type_system import NumberType
from llama_stack.apis.scoring_functions import (
from llama_stack_api import (
AggregationFunctionType,
LLMAsJudgeScoringFnParams,
NumberType,
ScoringFn,
)

View file

@ -4,8 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.common.type_system import NumberType
from llama_stack.apis.scoring_functions import LLMAsJudgeScoringFnParams, ScoringFn
from llama_stack_api import LLMAsJudgeScoringFnParams, NumberType, ScoringFn
llm_as_judge_base = ScoringFn(
identifier="llm-as-judge::base",

View file

@ -6,9 +6,8 @@
import re
from typing import Any
from llama_stack.apis.inference import Inference, OpenAIChatCompletionRequestWithExtraBody
from llama_stack.apis.scoring import ScoringResultRow
from llama_stack.apis.scoring_functions import ScoringFnParams
from llama_stack_api import Inference, OpenAIChatCompletionRequestWithExtraBody, ScoringFnParams, ScoringResultRow
from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
from .fn_defs.llm_as_judge_405b_simpleqa import llm_as_judge_405b_simpleqa

View file

@ -6,7 +6,7 @@
from typing import Any
from llama_stack.providers.datatypes import Api
from llama_stack_api import Api
from .config import RagToolRuntimeConfig

View file

@ -6,15 +6,16 @@
from jinja2 import Template
from llama_stack.apis.common.content_types import InterleavedContent
from llama_stack.apis.inference import OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
from llama_stack.apis.tools.rag_tool import (
from llama_stack_api import (
DefaultRAGQueryGeneratorConfig,
InterleavedContent,
LLMRAGQueryGeneratorConfig,
OpenAIChatCompletionRequestWithExtraBody,
OpenAIUserMessageParam,
RAGQueryGenerator,
RAGQueryGeneratorConfig,
)
from llama_stack.providers.utils.inference.prompt_adapter import (
interleaved_content_as_str,
)

View file

@ -12,34 +12,31 @@ from typing import Any
import httpx
from fastapi import UploadFile
from pydantic import TypeAdapter
from llama_stack.apis.common.content_types import (
from llama_stack_api import (
URL,
Files,
Inference,
InterleavedContent,
InterleavedContentItem,
TextContentItem,
)
from llama_stack.apis.files import Files, OpenAIFilePurpose
from llama_stack.apis.inference import Inference
from llama_stack.apis.tools import (
ListToolDefsResponse,
OpenAIFilePurpose,
QueryChunksResponse,
RAGDocument,
RAGQueryConfig,
RAGQueryResult,
TextContentItem,
ToolDef,
ToolGroup,
ToolGroupsProtocolPrivate,
ToolInvocationResult,
ToolRuntime,
)
from llama_stack.apis.vector_io import (
QueryChunksResponse,
VectorIO,
VectorStoreChunkingStrategyStatic,
VectorStoreChunkingStrategyStaticConfig,
)
from pydantic import TypeAdapter
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
from llama_stack.providers.utils.memory.vector_store import parse_data_url

View file

@ -6,7 +6,7 @@
from typing import Any
from llama_stack.providers.datatypes import Api
from llama_stack_api import Api
from .config import ChromaVectorIOConfig

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.core.storage.datatypes import KVStoreReference
from llama_stack.schema_utils import json_schema_type
@json_schema_type

View file

@ -6,7 +6,7 @@
from typing import Any
from llama_stack.providers.datatypes import Api
from llama_stack_api import Api
from .config import FaissVectorIOConfig

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel
from llama_stack.core.storage.datatypes import KVStoreReference
from llama_stack.schema_utils import json_schema_type
@json_schema_type

View file

@ -12,15 +12,22 @@ from typing import Any
import faiss # type: ignore[import-untyped]
import numpy as np
from llama_stack_api import (
Chunk,
Files,
HealthResponse,
HealthStatus,
Inference,
InterleavedContent,
QueryChunksResponse,
VectorIO,
VectorStore,
VectorStoreNotFoundError,
VectorStoresProtocolPrivate,
)
from numpy.typing import NDArray
from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference, InterleavedContent
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorStoresProtocolPrivate
from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin

View file

@ -6,7 +6,7 @@
from typing import Any
from llama_stack.providers.datatypes import Api
from llama_stack_api import Api
from .config import MilvusVectorIOConfig

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.core.storage.datatypes import KVStoreReference
from llama_stack.schema_utils import json_schema_type
@json_schema_type

View file

@ -6,7 +6,7 @@
from typing import Any
from llama_stack.providers.datatypes import Api
from llama_stack_api import Api
from .config import QdrantVectorIOConfig

View file

@ -7,10 +7,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel
from llama_stack.core.storage.datatypes import KVStoreReference
from llama_stack.schema_utils import json_schema_type
@json_schema_type

View file

@ -6,7 +6,7 @@
from typing import Any
from llama_stack.providers.datatypes import Api
from llama_stack_api import Api
from .config import SQLiteVectorIOConfig

View file

@ -12,15 +12,19 @@ from typing import Any
import numpy as np
import sqlite_vec # type: ignore[import-untyped]
from llama_stack_api import (
Chunk,
Files,
Inference,
QueryChunksResponse,
VectorIO,
VectorStore,
VectorStoreNotFoundError,
VectorStoresProtocolPrivate,
)
from numpy.typing import NDArray
from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin

View file

@ -5,11 +5,12 @@
# the root directory of this source tree.
from llama_stack.providers.datatypes import (
from llama_stack_api import (
Api,
InlineProviderSpec,
ProviderSpec,
)
from llama_stack.providers.utils.kvstore import kvstore_dependencies

View file

@ -5,7 +5,7 @@
# the root directory of this source tree.
from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec
from llama_stack_api import Api, InlineProviderSpec, ProviderSpec
def available_providers() -> list[ProviderSpec]:

View file

@ -5,7 +5,7 @@
# the root directory of this source tree.
from llama_stack.providers.datatypes import (
from llama_stack_api import (
Api,
InlineProviderSpec,
ProviderSpec,

View file

@ -5,7 +5,7 @@
# the root directory of this source tree.
from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
def available_providers() -> list[ProviderSpec]:

View file

@ -4,7 +4,8 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
from llama_stack.providers.utils.sqlstore.sqlstore import sql_store_pip_packages

View file

@ -5,7 +5,7 @@
# the root directory of this source tree.
from llama_stack.providers.datatypes import (
from llama_stack_api import (
Api,
InlineProviderSpec,
ProviderSpec,

View file

@ -7,7 +7,7 @@
from typing import cast
from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
# We provide two versions of these providers so that distributions can package the appropriate version of torch.
# The CPU version is used for distributions that don't have GPU support -- they result in smaller container images.

View file

@ -5,7 +5,7 @@
# the root directory of this source tree.
from llama_stack.providers.datatypes import (
from llama_stack_api import (
Api,
InlineProviderSpec,
ProviderSpec,

View file

@ -5,7 +5,7 @@
# the root directory of this source tree.
from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec
from llama_stack_api import Api, InlineProviderSpec, ProviderSpec
def available_providers() -> list[ProviderSpec]:

View file

@ -5,12 +5,13 @@
# the root directory of this source tree.
from llama_stack.providers.datatypes import (
from llama_stack_api import (
Api,
InlineProviderSpec,
ProviderSpec,
RemoteProviderSpec,
)
from llama_stack.providers.registry.vector_io import DEFAULT_VECTOR_IO_DEPS

View file

@ -5,7 +5,7 @@
# the root directory of this source tree.
from llama_stack.providers.datatypes import (
from llama_stack_api import (
Api,
InlineProviderSpec,
ProviderSpec,
@ -244,7 +244,7 @@ Two ranker types are supported:
Example using RAGQueryConfig with different search modes:
```python
from llama_stack.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker
from llama_stack_api import RAGQueryConfig, RRFRanker, WeightedRanker
# Vector search
config = RAGQueryConfig(mode="vector", max_chunks=5)

View file

@ -6,10 +6,8 @@
from typing import Any
from urllib.parse import parse_qs, urlparse
from llama_stack.apis.common.responses import PaginatedResponse
from llama_stack.apis.datasetio import DatasetIO
from llama_stack.apis.datasets import Dataset
from llama_stack.providers.datatypes import DatasetsProtocolPrivate
from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse
from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.pagination import paginate_records

View file

@ -7,11 +7,7 @@
from typing import Any
import aiohttp
from llama_stack.apis.common.content_types import URL
from llama_stack.apis.common.responses import PaginatedResponse
from llama_stack.apis.common.type_system import ParamType
from llama_stack.apis.datasets import Dataset
from llama_stack_api import URL, Dataset, PaginatedResponse, ParamType
from .config import NvidiaDatasetIOConfig

View file

@ -6,18 +6,24 @@
from typing import Any
import requests
from llama_stack_api import (
Agents,
Benchmark,
BenchmarkConfig,
BenchmarksProtocolPrivate,
DatasetIO,
Datasets,
Eval,
EvaluateResponse,
Inference,
Job,
JobStatus,
Scoring,
ScoringResult,
)
from llama_stack.apis.agents import Agents
from llama_stack.apis.benchmarks import Benchmark
from llama_stack.apis.datasetio import DatasetIO
from llama_stack.apis.datasets import Datasets
from llama_stack.apis.inference import Inference
from llama_stack.apis.scoring import Scoring, ScoringResult
from llama_stack.providers.datatypes import BenchmarksProtocolPrivate
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
from .....apis.common.job_types import Job, JobStatus
from .....apis.eval.eval import BenchmarkConfig, Eval, EvaluateResponse
from .config import NVIDIAEvalConfig
DEFAULT_NAMESPACE = "nvidia"

View file

@ -8,17 +8,17 @@ from datetime import UTC, datetime
from typing import Annotated, Any
from fastapi import Depends, File, Form, Response, UploadFile
from llama_stack.apis.common.errors import ResourceNotFoundError
from llama_stack.apis.common.responses import Order
from llama_stack.apis.files import (
from llama_stack_api import (
ExpiresAfter,
Files,
ListOpenAIFileResponse,
OpenAIFileDeleteResponse,
OpenAIFileObject,
OpenAIFilePurpose,
Order,
ResourceNotFoundError,
)
from llama_stack.core.datatypes import AccessRule
from llama_stack.providers.utils.files.form_data import parse_expires_after
from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType

View file

@ -17,16 +17,17 @@ from fastapi import Depends, File, Form, Response, UploadFile
if TYPE_CHECKING:
from mypy_boto3_s3.client import S3Client
from llama_stack.apis.common.errors import ResourceNotFoundError
from llama_stack.apis.common.responses import Order
from llama_stack.apis.files import (
from llama_stack_api import (
ExpiresAfter,
Files,
ListOpenAIFileResponse,
OpenAIFileDeleteResponse,
OpenAIFileObject,
OpenAIFilePurpose,
Order,
ResourceNotFoundError,
)
from llama_stack.core.datatypes import AccessRule
from llama_stack.core.id_generation import generate_object_id
from llama_stack.providers.utils.files.form_data import parse_expires_after

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class AnthropicProviderDataValidator(BaseModel):

View file

@ -7,10 +7,10 @@
import os
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field, HttpUrl, SecretStr
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class AzureProviderDataValidator(BaseModel):

View file

@ -6,9 +6,7 @@
from collections.abc import AsyncIterator, Iterable
from openai import AuthenticationError
from llama_stack.apis.inference import (
from llama_stack_api import (
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAIChatCompletionRequestWithExtraBody,
@ -17,6 +15,8 @@ from llama_stack.apis.inference import (
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse,
)
from openai import AuthenticationError
from llama_stack.core.telemetry.tracing import get_current_span
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin

View file

@ -6,10 +6,11 @@
from urllib.parse import urljoin
from llama_stack.apis.inference import (
from llama_stack_api import (
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse,
)
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
from .config import CerebrasImplConfig

View file

@ -7,10 +7,10 @@
import os
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
DEFAULT_BASE_URL = "https://api.cerebras.ai"

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field, SecretStr
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class DatabricksProviderDataValidator(BaseModel):

View file

@ -7,8 +7,8 @@
from collections.abc import Iterable
from databricks.sdk import WorkspaceClient
from llama_stack_api import OpenAICompletion, OpenAICompletionRequestWithExtraBody
from llama_stack.apis.inference import OpenAICompletion, OpenAICompletionRequestWithExtraBody
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
@json_schema_type

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class GeminiProviderDataValidator(BaseModel):

View file

@ -6,12 +6,13 @@
from typing import Any
from llama_stack.apis.inference import (
from llama_stack_api import (
OpenAIEmbeddingData,
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse,
OpenAIEmbeddingUsage,
)
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
from .config import GeminiConfig

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class GroqProviderDataValidator(BaseModel):

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class LlamaProviderDataValidator(BaseModel):

View file

@ -4,12 +4,13 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.inference.inference import (
from llama_stack_api import (
OpenAICompletion,
OpenAICompletionRequestWithExtraBody,
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse,
)
from llama_stack.log import get_logger
from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.inference import Inference
from llama_stack_api import Inference
from .config import NVIDIAConfig

View file

@ -7,10 +7,10 @@
import os
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class NVIDIAProviderDataValidator(BaseModel):

Some files were not shown because too many files have changed in this diff Show more