diff --git a/.github/workflows/providers-build.yml b/.github/workflows/providers-build.yml index 8268a0085..6de72cd60 100644 --- a/.github/workflows/providers-build.yml +++ b/.github/workflows/providers-build.yml @@ -11,6 +11,8 @@ on: - 'llama_stack/distribution/*.sh' - '.github/workflows/providers-build.yml' - 'llama_stack/templates/**' + - 'pyproject.toml' + pull_request: paths: - 'llama_stack/cli/stack/build.py' @@ -19,6 +21,7 @@ on: - 'llama_stack/distribution/*.sh' - '.github/workflows/providers-build.yml' - 'llama_stack/templates/**' + - 'pyproject.toml' concurrency: group: ${{ github.workflow }}-${{ github.ref }} diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 801e8dc33..f9e4bb38e 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -7390,6 +7390,147 @@ ], "title": "AgentTurnResponseTurnStartPayload" }, + "OpenAIResponseAnnotationCitation": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "url_citation", + "default": "url_citation" + }, + "end_index": { + "type": "integer" + }, + "start_index": { + "type": "integer" + }, + "title": { + "type": "string" + }, + "url": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "type", + "end_index", + "start_index", + "title", + "url" + ], + "title": "OpenAIResponseAnnotationCitation" + }, + "OpenAIResponseAnnotationContainerFileCitation": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "container_file_citation", + "default": "container_file_citation" + }, + "container_id": { + "type": "string" + }, + "end_index": { + "type": "integer" + }, + "file_id": { + "type": "string" + }, + "filename": { + "type": "string" + }, + "start_index": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "type", + "container_id", + "end_index", + "file_id", + "filename", + "start_index" + ], + "title": "OpenAIResponseAnnotationContainerFileCitation" + }, + "OpenAIResponseAnnotationFileCitation": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "file_citation", + "default": "file_citation" + }, + "file_id": { + "type": "string" + }, + "filename": { + "type": "string" + }, + "index": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "type", + "file_id", + "filename", + "index" + ], + "title": "OpenAIResponseAnnotationFileCitation" + }, + "OpenAIResponseAnnotationFilePath": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "file_path", + "default": "file_path" + }, + "file_id": { + "type": "string" + }, + "index": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "type", + "file_id", + "index" + ], + "title": "OpenAIResponseAnnotationFilePath" + }, + "OpenAIResponseAnnotations": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationFileCitation" + }, + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationCitation" + }, + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation" + }, + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationFilePath" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "file_citation": "#/components/schemas/OpenAIResponseAnnotationFileCitation", + "url_citation": "#/components/schemas/OpenAIResponseAnnotationCitation", + "container_file_citation": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation", + "file_path": "#/components/schemas/OpenAIResponseAnnotationFilePath" + } + } + }, "OpenAIResponseInput": { "oneOf": [ { @@ -7764,6 +7905,10 @@ "type": "string", "const": "web_search" }, + { + "type": "string", + "const": "web_search_preview" + }, { "type": "string", "const": "web_search_preview_2025_03_11" @@ -7855,12 +8000,19 @@ "type": "string", "const": "output_text", "default": "output_text" + }, + "annotations": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseAnnotations" + } } }, "additionalProperties": false, "required": [ "text", - "type" + "type", + "annotations" ], "title": "OpenAIResponseOutputMessageContentOutputText" }, diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index b736cd904..9175c97fc 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -5263,6 +5263,106 @@ components: - event_type - turn_id title: AgentTurnResponseTurnStartPayload + OpenAIResponseAnnotationCitation: + type: object + properties: + type: + type: string + const: url_citation + default: url_citation + end_index: + type: integer + start_index: + type: integer + title: + type: string + url: + type: string + additionalProperties: false + required: + - type + - end_index + - start_index + - title + - url + title: OpenAIResponseAnnotationCitation + "OpenAIResponseAnnotationContainerFileCitation": + type: object + properties: + type: + type: string + const: container_file_citation + default: container_file_citation + container_id: + type: string + end_index: + type: integer + file_id: + type: string + filename: + type: string + start_index: + type: integer + additionalProperties: false + required: + - type + - container_id + - end_index + - file_id + - filename + - start_index + title: >- + OpenAIResponseAnnotationContainerFileCitation + OpenAIResponseAnnotationFileCitation: + type: object + properties: + type: + type: string + const: file_citation + default: file_citation + file_id: + type: string + filename: + type: string + index: + type: integer + additionalProperties: false + required: + - type + - file_id + - filename + - index + title: OpenAIResponseAnnotationFileCitation + OpenAIResponseAnnotationFilePath: + type: object + properties: + type: + type: string + const: file_path + default: file_path + file_id: + type: string + index: + type: integer + additionalProperties: false + required: + - type + - file_id + - index + title: OpenAIResponseAnnotationFilePath + OpenAIResponseAnnotations: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' + - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' + discriminator: + propertyName: type + mapping: + file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' + container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' OpenAIResponseInput: oneOf: - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' @@ -5488,6 +5588,8 @@ components: oneOf: - type: string const: web_search + - type: string + const: web_search_preview - type: string const: web_search_preview_2025_03_11 default: web_search @@ -5547,10 +5649,15 @@ components: type: string const: output_text default: output_text + annotations: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseAnnotations' additionalProperties: false required: - text - type + - annotations title: >- OpenAIResponseOutputMessageContentOutputText "OpenAIResponseOutputMessageFileSearchToolCall": diff --git a/docs/source/distributions/configuration.md b/docs/source/distributions/configuration.md index 4bc9b37e4..1b50ee712 100644 --- a/docs/source/distributions/configuration.md +++ b/docs/source/distributions/configuration.md @@ -18,7 +18,7 @@ providers: - provider_id: ollama provider_type: remote::ollama config: - url: ${env.OLLAMA_URL:http://localhost:11434} + url: ${env.OLLAMA_URL:=http://localhost:11434} vector_io: - provider_id: faiss provider_type: inline::faiss @@ -26,7 +26,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -38,7 +38,7 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference @@ -46,7 +46,7 @@ providers: metadata_store: namespace: null type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} @@ -85,7 +85,7 @@ providers: # config is a dictionary that contains the configuration for the provider. # in this case, the configuration is the url of the ollama server config: - url: ${env.OLLAMA_URL:http://localhost:11434} + url: ${env.OLLAMA_URL:=http://localhost:11434} ``` A few things to note: - A _provider instance_ is identified with an (id, type, configuration) triplet. @@ -94,6 +94,95 @@ A few things to note: - The configuration dictionary is provider-specific. - Notice that configuration can reference environment variables (with default values), which are expanded at runtime. When you run a stack server (via docker or via `llama stack run`), you can specify `--env OLLAMA_URL=http://my-server:11434` to override the default value. +### Environment Variable Substitution + +Llama Stack supports environment variable substitution in configuration values using the +`${env.VARIABLE_NAME}` syntax. This allows you to externalize configuration values and provide +different settings for different environments. The syntax is inspired by [bash parameter expansion](https://www.gnu.org/software/bash/manual/html_node/Shell-Parameter-Expansion.html) +and follows similar patterns. + +#### Basic Syntax + +The basic syntax for environment variable substitution is: + +```yaml +config: + api_key: ${env.API_KEY} + url: ${env.SERVICE_URL} +``` + +If the environment variable is not set, the server will raise an error during startup. + +#### Default Values + +You can provide default values using the `:=` operator: + +```yaml +config: + url: ${env.OLLAMA_URL:=http://localhost:11434} + port: ${env.PORT:=8321} + timeout: ${env.TIMEOUT:=60} +``` + +If the environment variable is not set, the default value `http://localhost:11434` will be used. +Empty defaults are not allowed so `url: ${env.OLLAMA_URL:=}` will raise an error if the environment variable is not set. + +#### Conditional Values + +You can use the `:+` operator to provide a value only when the environment variable is set: + +```yaml +config: + # Only include this field if ENVIRONMENT is set + environment: ${env.ENVIRONMENT:+production} +``` + +If the environment variable is set, the value after `:+` will be used. If it's not set, the field +will be omitted with a `None` value. +So `${env.ENVIRONMENT:+}` is supported, it means that the field will be omitted if the environment +variable is not set. It can be used to make a field optional and then enabled at runtime when desired. + +#### Examples + +Here are some common patterns: + +```yaml +# Required environment variable (will error if not set) +api_key: ${env.OPENAI_API_KEY} + +# Optional with default +base_url: ${env.API_BASE_URL:=https://api.openai.com/v1} + +# Conditional field +debug_mode: ${env.DEBUG:+true} + +# Optional field that becomes None if not set +optional_token: ${env.OPTIONAL_TOKEN:+} +``` + +#### Runtime Override + +You can override environment variables at runtime when starting the server: + +```bash +# Override specific environment variables +llama stack run --config run.yaml --env API_KEY=sk-123 --env BASE_URL=https://custom-api.com + +# Or set them in your shell +export API_KEY=sk-123 +export BASE_URL=https://custom-api.com +llama stack run --config run.yaml +``` + +#### Type Safety + +The environment variable substitution system is type-safe: + +- String values remain strings +- Empty defaults (`${env.VAR:+}`) are converted to `None` for fields that accept `str | None` +- Numeric defaults are properly typed (e.g., `${env.PORT:=8321}` becomes an integer) +- Boolean defaults work correctly (e.g., `${env.DEBUG:=false}` becomes a boolean) + ## Resources Finally, let's look at the `models` section: @@ -152,7 +241,7 @@ server: config: jwks: uri: "https://kubernetes.default.svc:8443/openid/v1/jwks" - token: "${env.TOKEN:}" + token: "${env.TOKEN:+}" key_recheck_period: 3600 tls_cafile: "/path/to/ca.crt" issuer: "https://kubernetes.default.svc" @@ -396,12 +485,12 @@ providers: - provider_id: vllm-0 provider_type: remote::vllm config: - url: ${env.VLLM_URL:http://localhost:8000} + url: ${env.VLLM_URL:=http://localhost:8000} # this vLLM server serves the llama-guard model (e.g., llama-guard:3b) - provider_id: vllm-1 provider_type: remote::vllm config: - url: ${env.SAFETY_VLLM_URL:http://localhost:8001} + url: ${env.SAFETY_VLLM_URL:=http://localhost:8001} ... models: - metadata: {} diff --git a/docs/source/distributions/k8s/stack-configmap.yaml b/docs/source/distributions/k8s/stack-configmap.yaml index fa7bacd8f..0a08bca03 100644 --- a/docs/source/distributions/k8s/stack-configmap.yaml +++ b/docs/source/distributions/k8s/stack-configmap.yaml @@ -15,10 +15,10 @@ data: - provider_id: vllm-inference provider_type: remote::vllm config: - url: ${env.VLLM_URL:http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:4096} - api_token: ${env.VLLM_API_TOKEN:fake} - tls_verify: ${env.VLLM_TLS_VERIFY:true} + url: ${env.VLLM_URL:=http://localhost:8000/v1} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} - provider_id: vllm-safety provider_type: remote::vllm config: @@ -30,10 +30,10 @@ data: provider_type: inline::sentence-transformers config: {} vector_io: - - provider_id: ${env.ENABLE_CHROMADB+chromadb} + - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb config: - url: ${env.CHROMADB_URL:} + url: ${env.CHROMADB_URL:+} safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -45,34 +45,34 @@ data: config: persistence_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} responses_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: ${env.OTEL_SERVICE_NAME:} + service_name: ${env.OTEL_SERVICE_NAME:+} sinks: ${env.TELEMETRY_SINKS:console} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -82,19 +82,19 @@ data: config: {} metadata_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} table_name: llamastack_kvstore inference_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} models: - metadata: embedding_dimension: 384 @@ -106,11 +106,11 @@ data: provider_id: vllm-inference model_type: llm - metadata: {} - model_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B} + model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} provider_id: vllm-safety model_type: llm shields: - - shield_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B} + - shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} vector_dbs: [] datasets: [] scoring_fns: [] diff --git a/docs/source/distributions/k8s/stack_run_config.yaml b/docs/source/distributions/k8s/stack_run_config.yaml index 8e2773dd1..5ac08134c 100644 --- a/docs/source/distributions/k8s/stack_run_config.yaml +++ b/docs/source/distributions/k8s/stack_run_config.yaml @@ -12,25 +12,25 @@ providers: - provider_id: vllm-inference provider_type: remote::vllm config: - url: ${env.VLLM_URL:http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:4096} - api_token: ${env.VLLM_API_TOKEN:fake} - tls_verify: ${env.VLLM_TLS_VERIFY:true} + url: ${env.VLLM_URL:=http://localhost:8000/v1} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} - provider_id: vllm-safety provider_type: remote::vllm config: - url: ${env.VLLM_SAFETY_URL:http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:4096} - api_token: ${env.VLLM_API_TOKEN:fake} - tls_verify: ${env.VLLM_TLS_VERIFY:true} + url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} vector_io: - - provider_id: ${env.ENABLE_CHROMADB+chromadb} + - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb config: - url: ${env.CHROMADB_URL:} + url: ${env.CHROMADB_URL:+} safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -42,34 +42,34 @@ providers: config: persistence_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} responses_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: ${env.OTEL_SERVICE_NAME:} - sinks: ${env.TELEMETRY_SINKS:console} + service_name: ${env.OTEL_SERVICE_NAME:+console} + sinks: ${env.TELEMETRY_SINKS:+console} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -79,19 +79,19 @@ providers: config: {} metadata_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} table_name: llamastack_kvstore inference_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} models: - metadata: embedding_dimension: 384 @@ -103,11 +103,11 @@ models: provider_id: vllm-inference model_type: llm - metadata: {} - model_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B} + model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} provider_id: vllm-safety model_type: llm shields: -- shield_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B} +- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} vector_dbs: [] datasets: [] scoring_fns: [] diff --git a/llama_stack/apis/agents/__init__.py b/llama_stack/apis/agents/__init__.py index ab203b6cd..6416b283b 100644 --- a/llama_stack/apis/agents/__init__.py +++ b/llama_stack/apis/agents/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .agents import * # noqa: F401 F403 +from .agents import * diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py index addb72f14..27b85e2d6 100644 --- a/llama_stack/apis/agents/openai_responses.py +++ b/llama_stack/apis/agents/openai_responses.py @@ -44,10 +44,55 @@ OpenAIResponseInputMessageContent = Annotated[ register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent") +@json_schema_type +class OpenAIResponseAnnotationFileCitation(BaseModel): + type: Literal["file_citation"] = "file_citation" + file_id: str + filename: str + index: int + + +@json_schema_type +class OpenAIResponseAnnotationCitation(BaseModel): + type: Literal["url_citation"] = "url_citation" + end_index: int + start_index: int + title: str + url: str + + +@json_schema_type +class OpenAIResponseAnnotationContainerFileCitation(BaseModel): + type: Literal["container_file_citation"] = "container_file_citation" + container_id: str + end_index: int + file_id: str + filename: str + start_index: int + + +@json_schema_type +class OpenAIResponseAnnotationFilePath(BaseModel): + type: Literal["file_path"] = "file_path" + file_id: str + index: int + + +OpenAIResponseAnnotations = Annotated[ + OpenAIResponseAnnotationFileCitation + | OpenAIResponseAnnotationCitation + | OpenAIResponseAnnotationContainerFileCitation + | OpenAIResponseAnnotationFilePath, + Field(discriminator="type"), +] +register_schema(OpenAIResponseAnnotations, name="OpenAIResponseAnnotations") + + @json_schema_type class OpenAIResponseOutputMessageContentOutputText(BaseModel): text: str type: Literal["output_text"] = "output_text" + annotations: list[OpenAIResponseAnnotations] = Field(default_factory=list) OpenAIResponseOutputMessageContent = Annotated[ @@ -384,9 +429,16 @@ OpenAIResponseInput = Annotated[ register_schema(OpenAIResponseInput, name="OpenAIResponseInput") +# Must match type Literals of OpenAIResponseInputToolWebSearch below +WebSearchToolTypes = ["web_search", "web_search_preview", "web_search_preview_2025_03_11"] + + @json_schema_type class OpenAIResponseInputToolWebSearch(BaseModel): - type: Literal["web_search"] | Literal["web_search_preview_2025_03_11"] = "web_search" + # Must match values of WebSearchToolTypes above + type: Literal["web_search"] | Literal["web_search_preview"] | Literal["web_search_preview_2025_03_11"] = ( + "web_search" + ) # TODO: actually use search_context_size somewhere... search_context_size: str | None = Field(default="medium", pattern="^low|medium|high$") # TODO: add user_location diff --git a/llama_stack/apis/batch_inference/__init__.py b/llama_stack/apis/batch_inference/__init__.py index 3249475ee..b9b2944b2 100644 --- a/llama_stack/apis/batch_inference/__init__.py +++ b/llama_stack/apis/batch_inference/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .batch_inference import * # noqa: F401 F403 +from .batch_inference import * diff --git a/llama_stack/apis/benchmarks/__init__.py b/llama_stack/apis/benchmarks/__init__.py index f8f564957..62d1b367c 100644 --- a/llama_stack/apis/benchmarks/__init__.py +++ b/llama_stack/apis/benchmarks/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .benchmarks import * # noqa: F401 F403 +from .benchmarks import * diff --git a/llama_stack/apis/datasetio/__init__.py b/llama_stack/apis/datasetio/__init__.py index 378afbba8..8c087bfa4 100644 --- a/llama_stack/apis/datasetio/__init__.py +++ b/llama_stack/apis/datasetio/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .datasetio import * # noqa: F401 F403 +from .datasetio import * diff --git a/llama_stack/apis/datasets/__init__.py b/llama_stack/apis/datasets/__init__.py index 102b9927f..9c9a128d2 100644 --- a/llama_stack/apis/datasets/__init__.py +++ b/llama_stack/apis/datasets/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .datasets import * # noqa: F401 F403 +from .datasets import * diff --git a/llama_stack/apis/datasets/datasets.py b/llama_stack/apis/datasets/datasets.py index e3de3d5cb..8bf7a48d0 100644 --- a/llama_stack/apis/datasets/datasets.py +++ b/llama_stack/apis/datasets/datasets.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from enum import Enum +from enum import Enum, StrEnum from typing import Annotated, Any, Literal, Protocol from pydantic import BaseModel, Field @@ -13,7 +13,7 @@ from llama_stack.apis.resource import Resource, ResourceType from llama_stack.schema_utils import json_schema_type, register_schema, webmethod -class DatasetPurpose(str, Enum): +class DatasetPurpose(StrEnum): """ Purpose of the dataset. Each purpose has a required input data schema. diff --git a/llama_stack/apis/eval/__init__.py b/llama_stack/apis/eval/__init__.py index 5f91ad70d..28a1d6049 100644 --- a/llama_stack/apis/eval/__init__.py +++ b/llama_stack/apis/eval/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .eval import * # noqa: F401 F403 +from .eval import * diff --git a/llama_stack/apis/files/__init__.py b/llama_stack/apis/files/__init__.py index 269baf177..189e4de19 100644 --- a/llama_stack/apis/files/__init__.py +++ b/llama_stack/apis/files/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .files import * # noqa: F401 F403 +from .files import * diff --git a/llama_stack/apis/files/files.py b/llama_stack/apis/files/files.py index 4dfeed448..a72dcd8d4 100644 --- a/llama_stack/apis/files/files.py +++ b/llama_stack/apis/files/files.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from enum import Enum +from enum import StrEnum from typing import Annotated, Literal, Protocol, runtime_checkable from fastapi import File, Form, Response, UploadFile @@ -16,7 +16,7 @@ from llama_stack.schema_utils import json_schema_type, webmethod # OpenAI Files API Models -class OpenAIFilePurpose(str, Enum): +class OpenAIFilePurpose(StrEnum): """ Valid purpose values for OpenAI Files API. """ diff --git a/llama_stack/apis/inference/__init__.py b/llama_stack/apis/inference/__init__.py index f9f77f769..f0c8783c1 100644 --- a/llama_stack/apis/inference/__init__.py +++ b/llama_stack/apis/inference/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .inference import * # noqa: F401 F403 +from .inference import * diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py index c64a5f750..222099064 100644 --- a/llama_stack/apis/inference/inference.py +++ b/llama_stack/apis/inference/inference.py @@ -20,7 +20,7 @@ from typing_extensions import TypedDict from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent, InterleavedContentItem from llama_stack.apis.common.responses import Order from llama_stack.apis.models import Model -from llama_stack.apis.telemetry.telemetry import MetricResponseMixin +from llama_stack.apis.telemetry import MetricResponseMixin from llama_stack.models.llama.datatypes import ( BuiltinTool, StopReason, diff --git a/llama_stack/apis/inspect/__init__.py b/llama_stack/apis/inspect/__init__.py index 88ba8e908..016937e3d 100644 --- a/llama_stack/apis/inspect/__init__.py +++ b/llama_stack/apis/inspect/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .inspect import * # noqa: F401 F403 +from .inspect import * diff --git a/llama_stack/apis/models/__init__.py b/llama_stack/apis/models/__init__.py index 410d8d1f9..ee90106b6 100644 --- a/llama_stack/apis/models/__init__.py +++ b/llama_stack/apis/models/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .models import * # noqa: F401 F403 +from .models import * diff --git a/llama_stack/apis/models/models.py b/llama_stack/apis/models/models.py index 3d90a92a0..36da97e62 100644 --- a/llama_stack/apis/models/models.py +++ b/llama_stack/apis/models/models.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from enum import Enum +from enum import StrEnum from typing import Any, Literal, Protocol, runtime_checkable from pydantic import BaseModel, ConfigDict, Field @@ -22,7 +22,7 @@ class CommonModelFields(BaseModel): @json_schema_type -class ModelType(str, Enum): +class ModelType(StrEnum): llm = "llm" embedding = "embedding" diff --git a/llama_stack/apis/post_training/__init__.py b/llama_stack/apis/post_training/__init__.py index 7129c4abd..695575a30 100644 --- a/llama_stack/apis/post_training/__init__.py +++ b/llama_stack/apis/post_training/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .post_training import * # noqa: F401 F403 +from .post_training import * diff --git a/llama_stack/apis/providers/__init__.py b/llama_stack/apis/providers/__init__.py index b554a5d23..e35e2fe47 100644 --- a/llama_stack/apis/providers/__init__.py +++ b/llama_stack/apis/providers/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .providers import * # noqa: F401 F403 +from .providers import * diff --git a/llama_stack/apis/safety/__init__.py b/llama_stack/apis/safety/__init__.py index dc3fe90b4..d93bc1355 100644 --- a/llama_stack/apis/safety/__init__.py +++ b/llama_stack/apis/safety/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .safety import * # noqa: F401 F403 +from .safety import * diff --git a/llama_stack/apis/scoring/__init__.py b/llama_stack/apis/scoring/__init__.py index 0739dfc80..624b9e704 100644 --- a/llama_stack/apis/scoring/__init__.py +++ b/llama_stack/apis/scoring/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .scoring import * # noqa: F401 F403 +from .scoring import * diff --git a/llama_stack/apis/scoring_functions/__init__.py b/llama_stack/apis/scoring_functions/__init__.py index b96acb45f..fc1de0311 100644 --- a/llama_stack/apis/scoring_functions/__init__.py +++ b/llama_stack/apis/scoring_functions/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .scoring_functions import * # noqa: F401 F403 +from .scoring_functions import * diff --git a/llama_stack/apis/shields/__init__.py b/llama_stack/apis/shields/__init__.py index edad26100..783a4d124 100644 --- a/llama_stack/apis/shields/__init__.py +++ b/llama_stack/apis/shields/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .shields import * # noqa: F401 F403 +from .shields import * diff --git a/llama_stack/apis/synthetic_data_generation/__init__.py b/llama_stack/apis/synthetic_data_generation/__init__.py index cfdec76ce..bc169e8e6 100644 --- a/llama_stack/apis/synthetic_data_generation/__init__.py +++ b/llama_stack/apis/synthetic_data_generation/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .synthetic_data_generation import * # noqa: F401 F403 +from .synthetic_data_generation import * diff --git a/llama_stack/apis/telemetry/__init__.py b/llama_stack/apis/telemetry/__init__.py index 6a111dc9e..1250767f7 100644 --- a/llama_stack/apis/telemetry/__init__.py +++ b/llama_stack/apis/telemetry/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .telemetry import * # noqa: F401 F403 +from .telemetry import * diff --git a/llama_stack/apis/tools/__init__.py b/llama_stack/apis/tools/__init__.py index be8846ba2..b25310ecf 100644 --- a/llama_stack/apis/tools/__init__.py +++ b/llama_stack/apis/tools/__init__.py @@ -4,5 +4,5 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .rag_tool import * # noqa: F401 F403 -from .tools import * # noqa: F401 F403 +from .rag_tool import * +from .tools import * diff --git a/llama_stack/apis/vector_dbs/__init__.py b/llama_stack/apis/vector_dbs/__init__.py index 158241a6d..af34ba9d4 100644 --- a/llama_stack/apis/vector_dbs/__init__.py +++ b/llama_stack/apis/vector_dbs/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .vector_dbs import * # noqa: F401 F403 +from .vector_dbs import * diff --git a/llama_stack/apis/vector_io/__init__.py b/llama_stack/apis/vector_io/__init__.py index 3fe4fa4b6..3f4c60805 100644 --- a/llama_stack/apis/vector_io/__init__.py +++ b/llama_stack/apis/vector_io/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .vector_io import * # noqa: F401 F403 +from .vector_io import * diff --git a/llama_stack/distribution/access_control/datatypes.py b/llama_stack/distribution/access_control/datatypes.py index bc5ed6645..c833ed51b 100644 --- a/llama_stack/distribution/access_control/datatypes.py +++ b/llama_stack/distribution/access_control/datatypes.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from enum import Enum +from enum import StrEnum from typing import Self from pydantic import BaseModel, model_validator @@ -12,7 +12,7 @@ from pydantic import BaseModel, model_validator from .conditions import parse_conditions -class Action(str, Enum): +class Action(StrEnum): CREATE = "create" READ = "read" UPDATE = "update" diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/distribution/datatypes.py index abc3f0065..e07da001e 100644 --- a/llama_stack/distribution/datatypes.py +++ b/llama_stack/distribution/datatypes.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from enum import Enum +from enum import StrEnum from pathlib import Path from typing import Annotated, Any @@ -29,8 +29,8 @@ from llama_stack.providers.datatypes import Api, ProviderSpec from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig from llama_stack.providers.utils.sqlstore.sqlstore import SqlStoreConfig -LLAMA_STACK_BUILD_CONFIG_VERSION = "2" -LLAMA_STACK_RUN_CONFIG_VERSION = "2" +LLAMA_STACK_BUILD_CONFIG_VERSION = 2 +LLAMA_STACK_RUN_CONFIG_VERSION = 2 RoutingKey = str | list[str] @@ -159,7 +159,7 @@ class LoggingConfig(BaseModel): ) -class AuthProviderType(str, Enum): +class AuthProviderType(StrEnum): """Supported authentication provider types.""" OAUTH2_TOKEN = "oauth2_token" @@ -182,7 +182,7 @@ class AuthenticationRequiredError(Exception): pass -class QuotaPeriod(str, Enum): +class QuotaPeriod(StrEnum): DAY = "day" @@ -229,7 +229,7 @@ class ServerConfig(BaseModel): class StackRunConfig(BaseModel): - version: str = LLAMA_STACK_RUN_CONFIG_VERSION + version: int = LLAMA_STACK_RUN_CONFIG_VERSION image_name: str = Field( ..., @@ -300,7 +300,7 @@ a default SQLite store will be used.""", class BuildConfig(BaseModel): - version: str = LLAMA_STACK_BUILD_CONFIG_VERSION + version: int = LLAMA_STACK_BUILD_CONFIG_VERSION distribution_spec: DistributionSpec = Field(description="The distribution spec to build including API providers. ") image_type: str = Field( diff --git a/llama_stack/distribution/routers/inference.py b/llama_stack/distribution/routers/inference.py index 50c429315..b39da7810 100644 --- a/llama_stack/distribution/routers/inference.py +++ b/llama_stack/distribution/routers/inference.py @@ -30,7 +30,13 @@ from llama_stack.apis.inference import ( ListOpenAIChatCompletionResponse, LogProbConfig, Message, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAICompletion, OpenAICompletionWithInputMessages, + OpenAIEmbeddingsResponse, + OpenAIMessageParam, + OpenAIResponseFormatParam, Order, ResponseFormat, SamplingParams, @@ -41,14 +47,6 @@ from llama_stack.apis.inference import ( ToolDefinition, ToolPromptFormat, ) -from llama_stack.apis.inference.inference import ( - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAICompletion, - OpenAIEmbeddingsResponse, - OpenAIMessageParam, - OpenAIResponseFormatParam, -) from llama_stack.apis.models import Model, ModelType from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry from llama_stack.log import get_logger diff --git a/llama_stack/distribution/routers/vector_io.py b/llama_stack/distribution/routers/vector_io.py index 6af3bd416..4bd5952dc 100644 --- a/llama_stack/distribution/routers/vector_io.py +++ b/llama_stack/distribution/routers/vector_io.py @@ -16,17 +16,15 @@ from llama_stack.apis.vector_io import ( QueryChunksResponse, SearchRankingOptions, VectorIO, - VectorStoreDeleteResponse, - VectorStoreListResponse, - VectorStoreObject, - VectorStoreSearchResponsePage, -) -from llama_stack.apis.vector_io.vector_io import ( VectorStoreChunkingStrategy, + VectorStoreDeleteResponse, VectorStoreFileContentsResponse, VectorStoreFileDeleteResponse, VectorStoreFileObject, VectorStoreFileStatus, + VectorStoreListResponse, + VectorStoreObject, + VectorStoreSearchResponsePage, ) from llama_stack.log import get_logger from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable diff --git a/llama_stack/distribution/stack.py b/llama_stack/distribution/stack.py index b33b0d3f7..c86880669 100644 --- a/llama_stack/distribution/stack.py +++ b/llama_stack/distribution/stack.py @@ -127,7 +127,12 @@ class EnvVarError(Exception): def __init__(self, var_name: str, path: str = ""): self.var_name = var_name self.path = path - super().__init__(f"Environment variable '{var_name}' not set or empty{f' at {path}' if path else ''}") + super().__init__( + f"Environment variable '{var_name}' not set or empty {f'at {path}' if path else ''}. " + f"Use ${{env.{var_name}:=default_value}} to provide a default value, " + f"${{env.{var_name}:+value_if_set}} to make the field conditional, " + f"or ensure the environment variable is set." + ) def replace_env_vars(config: Any, path: str = "") -> Any: @@ -150,25 +155,27 @@ def replace_env_vars(config: Any, path: str = "") -> Any: return result elif isinstance(config, str): - # Updated pattern to support both default values (:) and conditional values (+) - pattern = r"\${env\.([A-Z0-9_]+)(?:([:\+])([^}]*))?}" + # Pattern supports bash-like syntax: := for default and :+ for conditional and a optional value + pattern = r"\${env\.([A-Z0-9_]+)(?::([=+])([^}]*))?}" - def get_env_var(match): + def get_env_var(match: re.Match): env_var = match.group(1) - operator = match.group(2) # ':' for default, '+' for conditional + operator = match.group(2) # '=' for default, '+' for conditional value_expr = match.group(3) env_value = os.environ.get(env_var) - if operator == ":": # Default value syntax: ${env.FOO:default} + if operator == "=": # Default value syntax: ${env.FOO:=default} if not env_value: - if value_expr is None: + # value_expr returns empty string (not None) when not matched + # This means ${env.FOO:=} is an error + if value_expr == "": raise EnvVarError(env_var, path) else: value = value_expr else: value = env_value - elif operator == "+": # Conditional value syntax: ${env.FOO+value_if_set} + elif operator == "+": # Conditional value syntax: ${env.FOO:+value_if_set} if env_value: value = value_expr else: @@ -183,13 +190,42 @@ def replace_env_vars(config: Any, path: str = "") -> Any: return os.path.expanduser(value) try: - return re.sub(pattern, get_env_var, config) + result = re.sub(pattern, get_env_var, config) + return _convert_string_to_proper_type(result) except EnvVarError as e: raise EnvVarError(e.var_name, e.path) from None return config +def _convert_string_to_proper_type(value: str) -> Any: + # This might be tricky depending on what the config type is, if 'str | None' we are + # good, if 'str' we need to keep the empty string... 'str | None' is more common and + # providers config should be typed this way. + # TODO: we could try to load the config class and see if the config has a field with type 'str | None' + # and then convert the empty string to None or not + if value == "": + return None + + lowered = value.lower() + if lowered == "true": + return True + elif lowered == "false": + return False + + try: + return int(value) + except ValueError: + pass + + try: + return float(value) + except ValueError: + pass + + return value + + def validate_env_pair(env_pair: str) -> tuple[str, str]: """Validate and split an environment variable key-value pair.""" try: diff --git a/llama_stack/distribution/ui/modules/api.py b/llama_stack/distribution/ui/modules/api.py index 11455ed46..9db87b280 100644 --- a/llama_stack/distribution/ui/modules/api.py +++ b/llama_stack/distribution/ui/modules/api.py @@ -25,7 +25,7 @@ class LlamaStackApi: def run_scoring(self, row, scoring_function_ids: list[str], scoring_params: dict | None): """Run scoring on a single row""" if not scoring_params: - scoring_params = {fn_id: None for fn_id in scoring_function_ids} + scoring_params = dict.fromkeys(scoring_function_ids) return self.client.scoring.score(input_rows=[row], scoring_functions=scoring_params) diff --git a/llama_stack/log.py b/llama_stack/log.py index c14967f0a..fcbb79a5d 100644 --- a/llama_stack/log.py +++ b/llama_stack/log.py @@ -33,7 +33,7 @@ CATEGORIES = [ ] # Initialize category levels with default level -_category_levels: dict[str, int] = {category: DEFAULT_LOG_LEVEL for category in CATEGORIES} +_category_levels: dict[str, int] = dict.fromkeys(CATEGORIES, DEFAULT_LOG_LEVEL) def config_to_category_levels(category: str, level: str): diff --git a/llama_stack/models/llama/datatypes.py b/llama_stack/models/llama/datatypes.py index f9f094c3d..7f1ebed55 100644 --- a/llama_stack/models/llama/datatypes.py +++ b/llama_stack/models/llama/datatypes.py @@ -5,7 +5,7 @@ # the root directory of this source tree. import base64 -from enum import Enum +from enum import Enum, StrEnum from io import BytesIO from typing import Annotated, Any, Literal @@ -171,7 +171,7 @@ class GenerationResult(BaseModel): ignore_token: bool -class QuantizationMode(str, Enum): +class QuantizationMode(StrEnum): none = "none" fp8_mixed = "fp8_mixed" int4_mixed = "int4_mixed" diff --git a/llama_stack/providers/datatypes.py b/llama_stack/providers/datatypes.py index 60b05545b..221ed9027 100644 --- a/llama_stack/providers/datatypes.py +++ b/llama_stack/providers/datatypes.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from enum import Enum +from enum import StrEnum from typing import Any, Protocol from urllib.parse import urlparse @@ -225,7 +225,7 @@ def remote_provider_spec( ) -class HealthStatus(str, Enum): +class HealthStatus(StrEnum): OK = "OK" ERROR = "Error" NOT_IMPLEMENTED = "Not Implemented" diff --git a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py index 4465a32fe..f291593f4 100644 --- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py +++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py @@ -42,9 +42,10 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseOutputMessageWebSearchToolCall, OpenAIResponseText, OpenAIResponseTextFormat, + WebSearchToolTypes, ) from llama_stack.apis.common.content_types import TextContentItem -from llama_stack.apis.inference.inference import ( +from llama_stack.apis.inference import ( Inference, OpenAIAssistantMessageParam, OpenAIChatCompletion, @@ -583,7 +584,7 @@ class OpenAIResponsesImpl: from llama_stack.apis.agents.openai_responses import ( MCPListToolsTool, ) - from llama_stack.apis.tools.tools import Tool + from llama_stack.apis.tools import Tool mcp_tool_to_server = {} @@ -609,7 +610,7 @@ class OpenAIResponsesImpl: # TODO: Handle other tool types if input_tool.type == "function": chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump())) - elif input_tool.type == "web_search": + elif input_tool.type in WebSearchToolTypes: tool_name = "web_search" tool = await self.tool_groups_api.get_tool(tool_name) if not tool: diff --git a/llama_stack/providers/inline/eval/meta_reference/eval.py b/llama_stack/providers/inline/eval/meta_reference/eval.py index bc0898dc5..9ae2018c4 100644 --- a/llama_stack/providers/inline/eval/meta_reference/eval.py +++ b/llama_stack/providers/inline/eval/meta_reference/eval.py @@ -208,7 +208,7 @@ class MetaReferenceEvalImpl( for scoring_fn_id in scoring_functions } else: - scoring_functions_dict = {scoring_fn_id: None for scoring_fn_id in scoring_functions} + scoring_functions_dict = dict.fromkeys(scoring_functions) score_response = await self.scoring_api.score( input_rows=score_input_rows, scoring_functions=scoring_functions_dict diff --git a/llama_stack/providers/inline/files/localfs/config.py b/llama_stack/providers/inline/files/localfs/config.py index 757a70742..6c767af8f 100644 --- a/llama_stack/providers/inline/files/localfs/config.py +++ b/llama_stack/providers/inline/files/localfs/config.py @@ -23,7 +23,7 @@ class LocalfsFilesImplConfig(BaseModel): @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: return { - "storage_dir": "${env.FILES_STORAGE_DIR:" + __distro_dir__ + "/files}", + "storage_dir": "${env.FILES_STORAGE_DIR:=" + __distro_dir__ + "/files}", "metadata_store": SqliteSqlStoreConfig.sample_run_config( __distro_dir__=__distro_dir__, db_name="files_metadata.db", diff --git a/llama_stack/providers/inline/inference/meta_reference/config.py b/llama_stack/providers/inline/inference/meta_reference/config.py index 7bc961443..9556b026a 100644 --- a/llama_stack/providers/inline/inference/meta_reference/config.py +++ b/llama_stack/providers/inline/inference/meta_reference/config.py @@ -49,11 +49,11 @@ class MetaReferenceInferenceConfig(BaseModel): def sample_run_config( cls, model: str = "Llama3.2-3B-Instruct", - checkpoint_dir: str = "${env.CHECKPOINT_DIR:null}", - quantization_type: str = "${env.QUANTIZATION_TYPE:bf16}", - model_parallel_size: str = "${env.MODEL_PARALLEL_SIZE:0}", - max_batch_size: str = "${env.MAX_BATCH_SIZE:1}", - max_seq_len: str = "${env.MAX_SEQ_LEN:4096}", + checkpoint_dir: str = "${env.CHECKPOINT_DIR:=null}", + quantization_type: str = "${env.QUANTIZATION_TYPE:=bf16}", + model_parallel_size: str = "${env.MODEL_PARALLEL_SIZE:=0}", + max_batch_size: str = "${env.MAX_BATCH_SIZE:=1}", + max_seq_len: str = "${env.MAX_SEQ_LEN:=4096}", **kwargs, ) -> dict[str, Any]: return { diff --git a/llama_stack/providers/inline/inference/vllm/config.py b/llama_stack/providers/inline/inference/vllm/config.py index ce8743c74..660ef206b 100644 --- a/llama_stack/providers/inline/inference/vllm/config.py +++ b/llama_stack/providers/inline/inference/vllm/config.py @@ -44,10 +44,10 @@ class VLLMConfig(BaseModel): @classmethod def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]: return { - "tensor_parallel_size": "${env.TENSOR_PARALLEL_SIZE:1}", - "max_tokens": "${env.MAX_TOKENS:4096}", - "max_model_len": "${env.MAX_MODEL_LEN:4096}", - "max_num_seqs": "${env.MAX_NUM_SEQS:4}", - "enforce_eager": "${env.ENFORCE_EAGER:False}", - "gpu_memory_utilization": "${env.GPU_MEMORY_UTILIZATION:0.3}", + "tensor_parallel_size": "${env.TENSOR_PARALLEL_SIZE:=1}", + "max_tokens": "${env.MAX_TOKENS:=4096}", + "max_model_len": "${env.MAX_MODEL_LEN:=4096}", + "max_num_seqs": "${env.MAX_NUM_SEQS:=4}", + "enforce_eager": "${env.ENFORCE_EAGER:=False}", + "gpu_memory_utilization": "${env.GPU_MEMORY_UTILIZATION:=0.3}", } diff --git a/llama_stack/providers/inline/scoring/braintrust/config.py b/llama_stack/providers/inline/scoring/braintrust/config.py index 4a80f1e4f..f44d27f96 100644 --- a/llama_stack/providers/inline/scoring/braintrust/config.py +++ b/llama_stack/providers/inline/scoring/braintrust/config.py @@ -17,5 +17,5 @@ class BraintrustScoringConfig(BaseModel): @classmethod def sample_run_config(cls, **kwargs) -> dict[str, Any]: return { - "openai_api_key": "${env.OPENAI_API_KEY:}", + "openai_api_key": "${env.OPENAI_API_KEY:+}", } diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py b/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py index b705cb9b3..2bd113a94 100644 --- a/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +++ b/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py @@ -7,7 +7,7 @@ from typing import Any from llama_stack.apis.datasetio import DatasetIO from llama_stack.apis.datasets import Datasets -from llama_stack.apis.inference.inference import Inference +from llama_stack.apis.inference import Inference from llama_stack.apis.scoring import ( ScoreBatchResponse, ScoreResponse, diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py b/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py index 51cdf6c3f..340215a53 100644 --- a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +++ b/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py @@ -6,7 +6,7 @@ import re from typing import Any -from llama_stack.apis.inference.inference import Inference, UserMessage +from llama_stack.apis.inference import Inference, UserMessage from llama_stack.apis.scoring import ScoringResultRow from llama_stack.apis.scoring_functions import ScoringFnParams from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn diff --git a/llama_stack/providers/inline/telemetry/meta_reference/config.py b/llama_stack/providers/inline/telemetry/meta_reference/config.py index 93509040c..50dd8a788 100644 --- a/llama_stack/providers/inline/telemetry/meta_reference/config.py +++ b/llama_stack/providers/inline/telemetry/meta_reference/config.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from enum import Enum +from enum import StrEnum from typing import Any from pydantic import BaseModel, Field, field_validator @@ -12,7 +12,7 @@ from pydantic import BaseModel, Field, field_validator from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR -class TelemetrySink(str, Enum): +class TelemetrySink(StrEnum): OTEL_TRACE = "otel_trace" OTEL_METRIC = "otel_metric" SQLITE = "sqlite" @@ -20,12 +20,12 @@ class TelemetrySink(str, Enum): class TelemetryConfig(BaseModel): - otel_trace_endpoint: str = Field( - default="http://localhost:4318/v1/traces", + otel_trace_endpoint: str | None = Field( + default=None, description="The OpenTelemetry collector endpoint URL for traces", ) - otel_metric_endpoint: str = Field( - default="http://localhost:4318/v1/metrics", + otel_metric_endpoint: str | None = Field( + default=None, description="The OpenTelemetry collector endpoint URL for metrics", ) service_name: str = Field( @@ -52,7 +52,7 @@ class TelemetryConfig(BaseModel): @classmethod def sample_run_config(cls, __distro_dir__: str, db_name: str = "trace_store.db") -> dict[str, Any]: return { - "service_name": "${env.OTEL_SERVICE_NAME:\u200b}", - "sinks": "${env.TELEMETRY_SINKS:console,sqlite}", - "sqlite_db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name, + "service_name": "${env.OTEL_SERVICE_NAME:=\u200b}", + "sinks": "${env.TELEMETRY_SINKS:=console,sqlite}", + "sqlite_db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name, } diff --git a/llama_stack/providers/inline/telemetry/meta_reference/sqlite_span_processor.py b/llama_stack/providers/inline/telemetry/meta_reference/sqlite_span_processor.py index b329a363c..8ab491189 100644 --- a/llama_stack/providers/inline/telemetry/meta_reference/sqlite_span_processor.py +++ b/llama_stack/providers/inline/telemetry/meta_reference/sqlite_span_processor.py @@ -14,6 +14,8 @@ from opentelemetry.sdk.trace import SpanProcessor from opentelemetry.trace import Span from opentelemetry.trace.span import format_span_id, format_trace_id +from llama_stack.providers.utils.telemetry.tracing import LOCAL_ROOT_SPAN_MARKER + class SQLiteSpanProcessor(SpanProcessor): def __init__(self, conn_string): @@ -124,7 +126,7 @@ class SQLiteSpanProcessor(SpanProcessor): ( trace_id, service_name, - (span_id if span.attributes.get("__root_span__") == "true" else None), + (span_id if span.attributes.get(LOCAL_ROOT_SPAN_MARKER) else None), datetime.fromtimestamp(span.start_time / 1e9, UTC).isoformat(), datetime.fromtimestamp(span.end_time / 1e9, UTC).isoformat(), ), diff --git a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py index 0f6cf8619..98f5bf5a1 100644 --- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +++ b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py @@ -87,12 +87,16 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry): trace.set_tracer_provider(provider) _TRACER_PROVIDER = provider if TelemetrySink.OTEL_TRACE in self.config.sinks: + if self.config.otel_trace_endpoint is None: + raise ValueError("otel_trace_endpoint is required when OTEL_TRACE is enabled") span_exporter = OTLPSpanExporter( endpoint=self.config.otel_trace_endpoint, ) span_processor = BatchSpanProcessor(span_exporter) trace.get_tracer_provider().add_span_processor(span_processor) if TelemetrySink.OTEL_METRIC in self.config.sinks: + if self.config.otel_metric_endpoint is None: + raise ValueError("otel_metric_endpoint is required when OTEL_METRIC is enabled") metric_reader = PeriodicExportingMetricReader( OTLPMetricExporter( endpoint=self.config.otel_metric_endpoint, diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py index 12f4d6ad0..355750b25 100644 --- a/llama_stack/providers/inline/vector_io/faiss/faiss.py +++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py @@ -16,8 +16,7 @@ import numpy as np from numpy.typing import NDArray from llama_stack.apis.files import Files -from llama_stack.apis.inference import InterleavedContent -from llama_stack.apis.inference.inference import Inference +from llama_stack.apis.inference import Inference, InterleavedContent from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, diff --git a/llama_stack/providers/inline/vector_io/qdrant/config.py b/llama_stack/providers/inline/vector_io/qdrant/config.py index 283724b41..7cc91d918 100644 --- a/llama_stack/providers/inline/vector_io/qdrant/config.py +++ b/llama_stack/providers/inline/vector_io/qdrant/config.py @@ -19,5 +19,5 @@ class QdrantVectorIOConfig(BaseModel): @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: return { - "path": "${env.QDRANT_PATH:~/.llama/" + __distro_dir__ + "}/" + "qdrant.db", + "path": "${env.QDRANT_PATH:=~/.llama/" + __distro_dir__ + "}/" + "qdrant.db", } diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/config.py b/llama_stack/providers/inline/vector_io/sqlite_vec/config.py index cb806cb39..4c57f4aba 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/config.py +++ b/llama_stack/providers/inline/vector_io/sqlite_vec/config.py @@ -15,5 +15,5 @@ class SQLiteVectorIOConfig(BaseModel): @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: return { - "db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + "sqlite_vec.db", + "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + "sqlite_vec.db", } diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py index 3b3c5f486..7e977635a 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py @@ -15,8 +15,8 @@ import numpy as np import sqlite_vec from numpy.typing import NDArray -from llama_stack.apis.files.files import Files -from llama_stack.apis.inference.inference import Inference +from llama_stack.apis.files import Files +from llama_stack.apis.inference import Inference from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, @@ -64,7 +64,7 @@ def _normalize_scores(scores: dict[str, float]) -> dict[str, float]: score_range = max_score - min_score if score_range > 0: return {doc_id: (score - min_score) / score_range for doc_id, score in scores.items()} - return {doc_id: 1.0 for doc_id in scores} + return dict.fromkeys(scores, 1.0) def _weighted_rerank( diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py index 66f2e8bce..47be57eee 100644 --- a/llama_stack/providers/registry/inference.py +++ b/llama_stack/providers/registry/inference.py @@ -70,7 +70,7 @@ def available_providers() -> list[ProviderSpec]: api=Api.inference, adapter=AdapterSpec( adapter_type="ollama", - pip_packages=["ollama", "aiohttp"], + pip_packages=["ollama", "aiohttp", "h11>=0.16.0"], config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig", module="llama_stack.providers.remote.inference.ollama", ), diff --git a/llama_stack/providers/registry/safety.py b/llama_stack/providers/registry/safety.py index e0a04be48..f0fe1e9f5 100644 --- a/llama_stack/providers/registry/safety.py +++ b/llama_stack/providers/registry/safety.py @@ -67,7 +67,7 @@ def available_providers() -> list[ProviderSpec]: api=Api.safety, adapter=AdapterSpec( adapter_type="sambanova", - pip_packages=["litellm"], + pip_packages=["litellm", "requests"], module="llama_stack.providers.remote.safety.sambanova", config_class="llama_stack.providers.remote.safety.sambanova.SambaNovaSafetyConfig", provider_data_validator="llama_stack.providers.remote.safety.sambanova.config.SambaNovaProviderDataValidator", diff --git a/llama_stack/providers/registry/scoring.py b/llama_stack/providers/registry/scoring.py index 7980d6a13..244b06842 100644 --- a/llama_stack/providers/registry/scoring.py +++ b/llama_stack/providers/registry/scoring.py @@ -13,7 +13,7 @@ def available_providers() -> list[ProviderSpec]: InlineProviderSpec( api=Api.scoring, provider_type="inline::basic", - pip_packages=[], + pip_packages=["requests"], module="llama_stack.providers.inline.scoring.basic", config_class="llama_stack.providers.inline.scoring.basic.BasicScoringConfig", api_dependencies=[ diff --git a/llama_stack/providers/remote/datasetio/nvidia/config.py b/llama_stack/providers/remote/datasetio/nvidia/config.py index e616ce25c..0f5ea22e9 100644 --- a/llama_stack/providers/remote/datasetio/nvidia/config.py +++ b/llama_stack/providers/remote/datasetio/nvidia/config.py @@ -54,8 +54,8 @@ class NvidiaDatasetIOConfig(BaseModel): @classmethod def sample_run_config(cls, **kwargs) -> dict[str, Any]: return { - "api_key": "${env.NVIDIA_API_KEY:}", - "dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:default}", - "project_id": "${env.NVIDIA_PROJECT_ID:test-project}", - "datasets_url": "${env.NVIDIA_DATASETS_URL:http://nemo.test}", + "api_key": "${env.NVIDIA_API_KEY:+}", + "dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:=default}", + "project_id": "${env.NVIDIA_PROJECT_ID:=test-project}", + "datasets_url": "${env.NVIDIA_DATASETS_URL:=http://nemo.test}", } diff --git a/llama_stack/providers/remote/datasetio/nvidia/datasetio.py b/llama_stack/providers/remote/datasetio/nvidia/datasetio.py index 1f22a935b..f723c92cc 100644 --- a/llama_stack/providers/remote/datasetio/nvidia/datasetio.py +++ b/llama_stack/providers/remote/datasetio/nvidia/datasetio.py @@ -66,7 +66,7 @@ class NvidiaDatasetIOAdapter: Returns: Dataset """ - ## add warnings for unsupported params + # add warnings for unsupported params request_body = { "name": dataset_def.identifier, "namespace": self.config.dataset_namespace, diff --git a/llama_stack/providers/remote/eval/nvidia/config.py b/llama_stack/providers/remote/eval/nvidia/config.py index 5c8f9ff76..7a1c04304 100644 --- a/llama_stack/providers/remote/eval/nvidia/config.py +++ b/llama_stack/providers/remote/eval/nvidia/config.py @@ -25,5 +25,5 @@ class NVIDIAEvalConfig(BaseModel): @classmethod def sample_run_config(cls, **kwargs) -> dict[str, Any]: return { - "evaluator_url": "${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}", + "evaluator_url": "${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331}", } diff --git a/llama_stack/providers/remote/inference/anthropic/models.py b/llama_stack/providers/remote/inference/anthropic/models.py index 39cb64440..afaf3c4e4 100644 --- a/llama_stack/providers/remote/inference/anthropic/models.py +++ b/llama_stack/providers/remote/inference/anthropic/models.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.providers.utils.inference.model_registry import ( ProviderModelEntry, ) diff --git a/llama_stack/providers/remote/inference/fireworks/fireworks.py b/llama_stack/providers/remote/inference/fireworks/fireworks.py index 79b1b5f08..1c82ff3a8 100644 --- a/llama_stack/providers/remote/inference/fireworks/fireworks.py +++ b/llama_stack/providers/remote/inference/fireworks/fireworks.py @@ -24,6 +24,12 @@ from llama_stack.apis.inference import ( Inference, LogProbConfig, Message, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAICompletion, + OpenAIEmbeddingsResponse, + OpenAIMessageParam, + OpenAIResponseFormatParam, ResponseFormat, ResponseFormatType, SamplingParams, @@ -33,14 +39,6 @@ from llama_stack.apis.inference import ( ToolDefinition, ToolPromptFormat, ) -from llama_stack.apis.inference.inference import ( - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAICompletion, - OpenAIEmbeddingsResponse, - OpenAIMessageParam, - OpenAIResponseFormatParam, -) from llama_stack.distribution.request_headers import NeedsRequestProviderData from llama_stack.log import get_logger from llama_stack.providers.utils.inference.model_registry import ( diff --git a/llama_stack/providers/remote/inference/fireworks/models.py b/llama_stack/providers/remote/inference/fireworks/models.py index 027eeab8d..392aed72f 100644 --- a/llama_stack/providers/remote/inference/fireworks/models.py +++ b/llama_stack/providers/remote/inference/fireworks/models.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.models.llama.sku_types import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( ProviderModelEntry, diff --git a/llama_stack/providers/remote/inference/gemini/models.py b/llama_stack/providers/remote/inference/gemini/models.py index ef1cf339f..c4bb4f08b 100644 --- a/llama_stack/providers/remote/inference/gemini/models.py +++ b/llama_stack/providers/remote/inference/gemini/models.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.providers.utils.inference.model_registry import ( ProviderModelEntry, ) diff --git a/llama_stack/providers/remote/inference/groq/groq.py b/llama_stack/providers/remote/inference/groq/groq.py index 27d7d7961..4b295e788 100644 --- a/llama_stack/providers/remote/inference/groq/groq.py +++ b/llama_stack/providers/remote/inference/groq/groq.py @@ -9,7 +9,7 @@ from typing import Any from openai import AsyncOpenAI -from llama_stack.apis.inference.inference import ( +from llama_stack.apis.inference import ( OpenAIChatCompletion, OpenAIChatCompletionChunk, OpenAIChoiceDelta, diff --git a/llama_stack/providers/remote/inference/nvidia/config.py b/llama_stack/providers/remote/inference/nvidia/config.py index 4c449edc2..6369928bb 100644 --- a/llama_stack/providers/remote/inference/nvidia/config.py +++ b/llama_stack/providers/remote/inference/nvidia/config.py @@ -55,7 +55,7 @@ class NVIDIAConfig(BaseModel): @classmethod def sample_run_config(cls, **kwargs) -> dict[str, Any]: return { - "url": "${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com}", - "api_key": "${env.NVIDIA_API_KEY:}", - "append_api_version": "${env.NVIDIA_APPEND_API_VERSION:True}", + "url": "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}", + "api_key": "${env.NVIDIA_API_KEY:+}", + "append_api_version": "${env.NVIDIA_APPEND_API_VERSION:=True}", } diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index cb6c6e279..1dd72da3f 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -29,20 +29,18 @@ from llama_stack.apis.inference import ( Inference, LogProbConfig, Message, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAICompletion, OpenAIEmbeddingsResponse, + OpenAIMessageParam, + OpenAIResponseFormatParam, ResponseFormat, SamplingParams, TextTruncation, ToolChoice, ToolConfig, ) -from llama_stack.apis.inference.inference import ( - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAICompletion, - OpenAIMessageParam, - OpenAIResponseFormatParam, -) from llama_stack.apis.models import Model, ModelType from llama_stack.models.llama.datatypes import ToolDefinition, ToolPromptFormat from llama_stack.providers.utils.inference import ( diff --git a/llama_stack/providers/remote/inference/ollama/config.py b/llama_stack/providers/remote/inference/ollama/config.py index 37b827f4f..b2cc4d8a7 100644 --- a/llama_stack/providers/remote/inference/ollama/config.py +++ b/llama_stack/providers/remote/inference/ollama/config.py @@ -17,7 +17,7 @@ class OllamaImplConfig(BaseModel): @classmethod def sample_run_config( - cls, url: str = "${env.OLLAMA_URL:http://localhost:11434}", raise_on_connect_error: bool = True, **kwargs + cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", raise_on_connect_error: bool = True, **kwargs ) -> dict[str, Any]: return { "url": url, diff --git a/llama_stack/providers/remote/inference/ollama/models.py b/llama_stack/providers/remote/inference/ollama/models.py index 8f0f0421a..cacf88861 100644 --- a/llama_stack/providers/remote/inference/ollama/models.py +++ b/llama_stack/providers/remote/inference/ollama/models.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.models.llama.sku_types import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( ProviderModelEntry, diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py index 2f51920b5..e9df0dcc8 100644 --- a/llama_stack/providers/remote/inference/ollama/ollama.py +++ b/llama_stack/providers/remote/inference/ollama/ollama.py @@ -32,15 +32,6 @@ from llama_stack.apis.inference import ( JsonSchemaResponseFormat, LogProbConfig, Message, - ResponseFormat, - SamplingParams, - TextTruncation, - ToolChoice, - ToolConfig, - ToolDefinition, - ToolPromptFormat, -) -from llama_stack.apis.inference.inference import ( OpenAIChatCompletion, OpenAIChatCompletionChunk, OpenAICompletion, @@ -48,6 +39,13 @@ from llama_stack.apis.inference.inference import ( OpenAIEmbeddingUsage, OpenAIMessageParam, OpenAIResponseFormatParam, + ResponseFormat, + SamplingParams, + TextTruncation, + ToolChoice, + ToolConfig, + ToolDefinition, + ToolPromptFormat, ) from llama_stack.apis.models import Model, ModelType from llama_stack.log import get_logger diff --git a/llama_stack/providers/remote/inference/openai/models.py b/llama_stack/providers/remote/inference/openai/models.py index e029c456c..14a6955d5 100644 --- a/llama_stack/providers/remote/inference/openai/models.py +++ b/llama_stack/providers/remote/inference/openai/models.py @@ -6,7 +6,7 @@ from dataclasses import dataclass -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.providers.utils.inference.model_registry import ( ProviderModelEntry, ) diff --git a/llama_stack/providers/remote/inference/openai/openai.py b/llama_stack/providers/remote/inference/openai/openai.py index ed4ec22aa..72428422f 100644 --- a/llama_stack/providers/remote/inference/openai/openai.py +++ b/llama_stack/providers/remote/inference/openai/openai.py @@ -10,7 +10,7 @@ from typing import Any from openai import AsyncOpenAI -from llama_stack.apis.inference.inference import ( +from llama_stack.apis.inference import ( OpenAIChatCompletion, OpenAIChatCompletionChunk, OpenAICompletion, diff --git a/llama_stack/providers/remote/inference/passthrough/passthrough.py b/llama_stack/providers/remote/inference/passthrough/passthrough.py index e9660abb9..d5b3a5973 100644 --- a/llama_stack/providers/remote/inference/passthrough/passthrough.py +++ b/llama_stack/providers/remote/inference/passthrough/passthrough.py @@ -19,7 +19,12 @@ from llama_stack.apis.inference import ( Inference, LogProbConfig, Message, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAICompletion, OpenAIEmbeddingsResponse, + OpenAIMessageParam, + OpenAIResponseFormatParam, ResponseFormat, SamplingParams, TextTruncation, @@ -28,13 +33,6 @@ from llama_stack.apis.inference import ( ToolDefinition, ToolPromptFormat, ) -from llama_stack.apis.inference.inference import ( - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAICompletion, - OpenAIMessageParam, - OpenAIResponseFormatParam, -) from llama_stack.apis.models import Model from llama_stack.distribution.library_client import convert_pydantic_to_json_value, convert_to_pydantic from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper diff --git a/llama_stack/providers/remote/inference/runpod/config.py b/llama_stack/providers/remote/inference/runpod/config.py index e3913dc35..ff32a971c 100644 --- a/llama_stack/providers/remote/inference/runpod/config.py +++ b/llama_stack/providers/remote/inference/runpod/config.py @@ -25,6 +25,6 @@ class RunpodImplConfig(BaseModel): @classmethod def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]: return { - "url": "${env.RUNPOD_URL:}", - "api_token": "${env.RUNPOD_API_TOKEN:}", + "url": "${env.RUNPOD_URL:+}", + "api_token": "${env.RUNPOD_API_TOKEN:+}", } diff --git a/llama_stack/providers/remote/inference/runpod/runpod.py b/llama_stack/providers/remote/inference/runpod/runpod.py index f8c98893e..1863b8a50 100644 --- a/llama_stack/providers/remote/inference/runpod/runpod.py +++ b/llama_stack/providers/remote/inference/runpod/runpod.py @@ -8,7 +8,7 @@ from collections.abc import AsyncGenerator from openai import OpenAI from llama_stack.apis.inference import * # noqa: F403 -from llama_stack.apis.inference.inference import OpenAIEmbeddingsResponse +from llama_stack.apis.inference import OpenAIEmbeddingsResponse # from llama_stack.providers.datatypes import ModelsProtocolPrivate from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper diff --git a/llama_stack/providers/remote/inference/together/config.py b/llama_stack/providers/remote/inference/together/config.py index 5c7f60519..121e2cae7 100644 --- a/llama_stack/providers/remote/inference/together/config.py +++ b/llama_stack/providers/remote/inference/together/config.py @@ -26,5 +26,5 @@ class TogetherImplConfig(BaseModel): def sample_run_config(cls, **kwargs) -> dict[str, Any]: return { "url": "https://api.together.xyz/v1", - "api_key": "${env.TOGETHER_API_KEY:}", + "api_key": "${env.TOGETHER_API_KEY:+}", } diff --git a/llama_stack/providers/remote/inference/together/models.py b/llama_stack/providers/remote/inference/together/models.py index f4b259767..85e1b1848 100644 --- a/llama_stack/providers/remote/inference/together/models.py +++ b/llama_stack/providers/remote/inference/together/models.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.models.llama.sku_types import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( ProviderModelEntry, diff --git a/llama_stack/providers/remote/inference/together/together.py b/llama_stack/providers/remote/inference/together/together.py index 7030a644d..9e6877b7c 100644 --- a/llama_stack/providers/remote/inference/together/together.py +++ b/llama_stack/providers/remote/inference/together/together.py @@ -23,7 +23,12 @@ from llama_stack.apis.inference import ( Inference, LogProbConfig, Message, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAICompletion, OpenAIEmbeddingsResponse, + OpenAIMessageParam, + OpenAIResponseFormatParam, ResponseFormat, ResponseFormatType, SamplingParams, @@ -33,13 +38,6 @@ from llama_stack.apis.inference import ( ToolDefinition, ToolPromptFormat, ) -from llama_stack.apis.inference.inference import ( - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAICompletion, - OpenAIMessageParam, - OpenAIResponseFormatParam, -) from llama_stack.distribution.request_headers import NeedsRequestProviderData from llama_stack.log import get_logger from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper diff --git a/llama_stack/providers/remote/inference/vllm/config.py b/llama_stack/providers/remote/inference/vllm/config.py index 99abddf51..e11efa7f0 100644 --- a/llama_stack/providers/remote/inference/vllm/config.py +++ b/llama_stack/providers/remote/inference/vllm/config.py @@ -34,9 +34,6 @@ class VLLMInferenceAdapterConfig(BaseModel): @classmethod def validate_tls_verify(cls, v): if isinstance(v, str): - # Check if it's a boolean string - if v.lower() in ("true", "false"): - return v.lower() == "true" # Otherwise, treat it as a cert path cert_path = Path(v).expanduser().resolve() if not cert_path.exists(): @@ -54,7 +51,7 @@ class VLLMInferenceAdapterConfig(BaseModel): ): return { "url": url, - "max_tokens": "${env.VLLM_MAX_TOKENS:4096}", - "api_token": "${env.VLLM_API_TOKEN:fake}", - "tls_verify": "${env.VLLM_TLS_VERIFY:true}", + "max_tokens": "${env.VLLM_MAX_TOKENS:=4096}", + "api_token": "${env.VLLM_API_TOKEN:=fake}", + "tls_verify": "${env.VLLM_TLS_VERIFY:=true}", } diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index ae04f206a..d1455acaa 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -38,9 +38,13 @@ from llama_stack.apis.inference import ( JsonSchemaResponseFormat, LogProbConfig, Message, + OpenAIChatCompletion, + OpenAICompletion, OpenAIEmbeddingData, OpenAIEmbeddingsResponse, OpenAIEmbeddingUsage, + OpenAIMessageParam, + OpenAIResponseFormatParam, ResponseFormat, SamplingParams, TextTruncation, @@ -49,12 +53,6 @@ from llama_stack.apis.inference import ( ToolDefinition, ToolPromptFormat, ) -from llama_stack.apis.inference.inference import ( - OpenAIChatCompletion, - OpenAICompletion, - OpenAIMessageParam, - OpenAIResponseFormatParam, -) from llama_stack.apis.models import Model, ModelType from llama_stack.models.llama.datatypes import BuiltinTool, StopReason, ToolCall from llama_stack.models.llama.sku_list import all_registered_models diff --git a/llama_stack/providers/remote/inference/watsonx/config.py b/llama_stack/providers/remote/inference/watsonx/config.py index 5eda9c5c0..9534eceeb 100644 --- a/llama_stack/providers/remote/inference/watsonx/config.py +++ b/llama_stack/providers/remote/inference/watsonx/config.py @@ -40,7 +40,7 @@ class WatsonXConfig(BaseModel): @classmethod def sample_run_config(cls, **kwargs) -> dict[str, Any]: return { - "url": "${env.WATSONX_BASE_URL:https://us-south.ml.cloud.ibm.com}", - "api_key": "${env.WATSONX_API_KEY:}", - "project_id": "${env.WATSONX_PROJECT_ID:}", + "url": "${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}", + "api_key": "${env.WATSONX_API_KEY:+}", + "project_id": "${env.WATSONX_PROJECT_ID:+}", } diff --git a/llama_stack/providers/remote/inference/watsonx/watsonx.py b/llama_stack/providers/remote/inference/watsonx/watsonx.py index 7cdd06a1f..78161d1cb 100644 --- a/llama_stack/providers/remote/inference/watsonx/watsonx.py +++ b/llama_stack/providers/remote/inference/watsonx/watsonx.py @@ -18,10 +18,16 @@ from llama_stack.apis.inference import ( CompletionRequest, EmbeddingsResponse, EmbeddingTaskType, + GreedySamplingStrategy, Inference, LogProbConfig, Message, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAICompletion, OpenAIEmbeddingsResponse, + OpenAIMessageParam, + OpenAIResponseFormatParam, ResponseFormat, SamplingParams, TextTruncation, @@ -29,14 +35,6 @@ from llama_stack.apis.inference import ( ToolConfig, ToolDefinition, ToolPromptFormat, -) -from llama_stack.apis.inference.inference import ( - GreedySamplingStrategy, - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAICompletion, - OpenAIMessageParam, - OpenAIResponseFormatParam, TopKSamplingStrategy, TopPSamplingStrategy, ) diff --git a/llama_stack/providers/remote/post_training/nvidia/config.py b/llama_stack/providers/remote/post_training/nvidia/config.py index fa08b6e3f..ea6dff0b5 100644 --- a/llama_stack/providers/remote/post_training/nvidia/config.py +++ b/llama_stack/providers/remote/post_training/nvidia/config.py @@ -55,10 +55,10 @@ class NvidiaPostTrainingConfig(BaseModel): @classmethod def sample_run_config(cls, **kwargs) -> dict[str, Any]: return { - "api_key": "${env.NVIDIA_API_KEY:}", - "dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:default}", - "project_id": "${env.NVIDIA_PROJECT_ID:test-project}", - "customizer_url": "${env.NVIDIA_CUSTOMIZER_URL:http://nemo.test}", + "api_key": "${env.NVIDIA_API_KEY:+}", + "dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:=default}", + "project_id": "${env.NVIDIA_PROJECT_ID:=test-project}", + "customizer_url": "${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test}", } diff --git a/llama_stack/providers/remote/safety/nvidia/config.py b/llama_stack/providers/remote/safety/nvidia/config.py index ddf5a3a0b..1c618f4f4 100644 --- a/llama_stack/providers/remote/safety/nvidia/config.py +++ b/llama_stack/providers/remote/safety/nvidia/config.py @@ -35,6 +35,6 @@ class NVIDIASafetyConfig(BaseModel): @classmethod def sample_run_config(cls, **kwargs) -> dict[str, Any]: return { - "guardrails_service_url": "${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}", - "config_id": "${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check}", + "guardrails_service_url": "${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}", + "config_id": "${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}", } diff --git a/llama_stack/providers/remote/tool_runtime/brave_search/config.py b/llama_stack/providers/remote/tool_runtime/brave_search/config.py index 37ba21304..93b97a1b2 100644 --- a/llama_stack/providers/remote/tool_runtime/brave_search/config.py +++ b/llama_stack/providers/remote/tool_runtime/brave_search/config.py @@ -22,6 +22,6 @@ class BraveSearchToolConfig(BaseModel): @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: return { - "api_key": "${env.BRAVE_SEARCH_API_KEY:}", + "api_key": "${env.BRAVE_SEARCH_API_KEY:+}", "max_results": 3, } diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py index c9b18d30d..5bdd27807 100644 --- a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py +++ b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py @@ -22,6 +22,6 @@ class TavilySearchToolConfig(BaseModel): @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: return { - "api_key": "${env.TAVILY_SEARCH_API_KEY:}", + "api_key": "${env.TAVILY_SEARCH_API_KEY:+}", "max_results": 3, } diff --git a/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py b/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py index aefc86bd6..b5b10e371 100644 --- a/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +++ b/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py @@ -17,5 +17,5 @@ class WolframAlphaToolConfig(BaseModel): @classmethod def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: return { - "api_key": "${env.WOLFRAM_ALPHA_API_KEY:}", + "api_key": "${env.WOLFRAM_ALPHA_API_KEY:+}", } diff --git a/llama_stack/providers/remote/vector_io/pgvector/config.py b/llama_stack/providers/remote/vector_io/pgvector/config.py index 04b92a2e4..041e864ca 100644 --- a/llama_stack/providers/remote/vector_io/pgvector/config.py +++ b/llama_stack/providers/remote/vector_io/pgvector/config.py @@ -22,8 +22,8 @@ class PGVectorVectorIOConfig(BaseModel): @classmethod def sample_run_config( cls, - host: str = "${env.PGVECTOR_HOST:localhost}", - port: int = "${env.PGVECTOR_PORT:5432}", + host: str = "${env.PGVECTOR_HOST:=localhost}", + port: int = "${env.PGVECTOR_PORT:=5432}", db: str = "${env.PGVECTOR_DB}", user: str = "${env.PGVECTOR_USER}", password: str = "${env.PGVECTOR_PASSWORD}", diff --git a/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/llama_stack/providers/utils/inference/litellm_openai_mixin.py index c21f379c9..d19908368 100644 --- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py +++ b/llama_stack/providers/utils/inference/litellm_openai_mixin.py @@ -23,6 +23,13 @@ from llama_stack.apis.inference import ( JsonSchemaResponseFormat, LogProbConfig, Message, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAICompletion, + OpenAIEmbeddingsResponse, + OpenAIEmbeddingUsage, + OpenAIMessageParam, + OpenAIResponseFormatParam, ResponseFormat, SamplingParams, TextTruncation, @@ -31,16 +38,7 @@ from llama_stack.apis.inference import ( ToolDefinition, ToolPromptFormat, ) -from llama_stack.apis.inference.inference import ( - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAICompletion, - OpenAIEmbeddingsResponse, - OpenAIEmbeddingUsage, - OpenAIMessageParam, - OpenAIResponseFormatParam, -) -from llama_stack.apis.models.models import Model +from llama_stack.apis.models import Model from llama_stack.distribution.request_headers import NeedsRequestProviderData from llama_stack.log import get_logger from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper diff --git a/llama_stack/providers/utils/inference/model_registry.py b/llama_stack/providers/utils/inference/model_registry.py index d707e36c2..de67e5288 100644 --- a/llama_stack/providers/utils/inference/model_registry.py +++ b/llama_stack/providers/utils/inference/model_registry.py @@ -8,7 +8,7 @@ from typing import Any from pydantic import BaseModel, Field -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate from llama_stack.providers.utils.inference import ( diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py index 01dfb8d61..47144ee0e 100644 --- a/llama_stack/providers/utils/inference/openai_compat.py +++ b/llama_stack/providers/utils/inference/openai_compat.py @@ -95,27 +95,25 @@ from llama_stack.apis.inference import ( CompletionResponse, CompletionResponseStreamChunk, GreedySamplingStrategy, - Message, - SamplingParams, - SystemMessage, - TokenLogProbs, - ToolChoice, - ToolResponseMessage, - TopKSamplingStrategy, - TopPSamplingStrategy, - UserMessage, -) -from llama_stack.apis.inference.inference import ( JsonSchemaResponseFormat, + Message, OpenAIChatCompletion, OpenAICompletion, OpenAICompletionChoice, OpenAIEmbeddingData, OpenAIMessageParam, OpenAIResponseFormatParam, + SamplingParams, + SystemMessage, + TokenLogProbs, + ToolChoice, ToolConfig, + ToolResponseMessage, + TopKSamplingStrategy, + TopPSamplingStrategy, + UserMessage, ) -from llama_stack.apis.inference.inference import ( +from llama_stack.apis.inference import ( OpenAIChoice as OpenAIChatCompletionChoice, ) from llama_stack.models.llama.datatypes import ( diff --git a/llama_stack/providers/utils/kvstore/config.py b/llama_stack/providers/utils/kvstore/config.py index e966e13ba..a45ff4ce8 100644 --- a/llama_stack/providers/utils/kvstore/config.py +++ b/llama_stack/providers/utils/kvstore/config.py @@ -45,8 +45,8 @@ class RedisKVStoreConfig(CommonConfig): return { "type": "redis", "namespace": None, - "host": "${env.REDIS_HOST:localhost}", - "port": "${env.REDIS_PORT:6379}", + "host": "${env.REDIS_HOST:=localhost}", + "port": "${env.REDIS_PORT:=6379}", } @@ -66,7 +66,7 @@ class SqliteKVStoreConfig(CommonConfig): return { "type": "sqlite", "namespace": None, - "db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name, + "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name, } @@ -84,12 +84,12 @@ class PostgresKVStoreConfig(CommonConfig): return { "type": "postgres", "namespace": None, - "host": "${env.POSTGRES_HOST:localhost}", - "port": "${env.POSTGRES_PORT:5432}", - "db": "${env.POSTGRES_DB:llamastack}", - "user": "${env.POSTGRES_USER:llamastack}", - "password": "${env.POSTGRES_PASSWORD:llamastack}", - "table_name": "${env.POSTGRES_TABLE_NAME:" + table_name + "}", + "host": "${env.POSTGRES_HOST:=localhost}", + "port": "${env.POSTGRES_PORT:=5432}", + "db": "${env.POSTGRES_DB:=llamastack}", + "user": "${env.POSTGRES_USER:=llamastack}", + "password": "${env.POSTGRES_PASSWORD:=llamastack}", + "table_name": "${env.POSTGRES_TABLE_NAME:=" + table_name + "}", } @classmethod @@ -131,12 +131,12 @@ class MongoDBKVStoreConfig(CommonConfig): return { "type": "mongodb", "namespace": None, - "host": "${env.MONGODB_HOST:localhost}", - "port": "${env.MONGODB_PORT:5432}", + "host": "${env.MONGODB_HOST:=localhost}", + "port": "${env.MONGODB_PORT:=5432}", "db": "${env.MONGODB_DB}", "user": "${env.MONGODB_USER}", "password": "${env.MONGODB_PASSWORD}", - "collection_name": "${env.MONGODB_COLLECTION_NAME:" + collection_name + "}", + "collection_name": "${env.MONGODB_COLLECTION_NAME:=" + collection_name + "}", } diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index 8b962db76..d00624aed 100644 --- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -12,8 +12,7 @@ import uuid from abc import ABC, abstractmethod from typing import Any -from llama_stack.apis.files import Files -from llama_stack.apis.files.files import OpenAIFileObject +from llama_stack.apis.files import Files, OpenAIFileObject from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, diff --git a/llama_stack/providers/utils/sqlstore/sqlstore.py b/llama_stack/providers/utils/sqlstore/sqlstore.py index edc7672a3..d558a2a26 100644 --- a/llama_stack/providers/utils/sqlstore/sqlstore.py +++ b/llama_stack/providers/utils/sqlstore/sqlstore.py @@ -50,7 +50,7 @@ class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig): def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"): return cls( type="sqlite", - db_path="${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name, + db_path="${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name, ) @property @@ -78,11 +78,11 @@ class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig): def sample_run_config(cls, **kwargs): return cls( type="postgres", - host="${env.POSTGRES_HOST:localhost}", - port="${env.POSTGRES_PORT:5432}", - db="${env.POSTGRES_DB:llamastack}", - user="${env.POSTGRES_USER:llamastack}", - password="${env.POSTGRES_PASSWORD:llamastack}", + host="${env.POSTGRES_HOST:=localhost}", + port="${env.POSTGRES_PORT:=5432}", + db="${env.POSTGRES_DB:=llamastack}", + user="${env.POSTGRES_USER:=llamastack}", + password="${env.POSTGRES_PASSWORD:=llamastack}", ) diff --git a/llama_stack/providers/utils/telemetry/tracing.py b/llama_stack/providers/utils/telemetry/tracing.py index 10081f037..c85722bdc 100644 --- a/llama_stack/providers/utils/telemetry/tracing.py +++ b/llama_stack/providers/utils/telemetry/tracing.py @@ -35,6 +35,9 @@ INVALID_SPAN_ID = 0x0000000000000000 INVALID_TRACE_ID = 0x00000000000000000000000000000000 ROOT_SPAN_MARKERS = ["__root__", "__root_span__"] +# The logical root span may not be visible to this process if a parent context +# is passed in. The local root span is the first local span in a trace. +LOCAL_ROOT_SPAN_MARKER = "__local_root_span__" def trace_id_to_str(trace_id: int) -> str: @@ -180,7 +183,13 @@ async def start_trace(name: str, attributes: dict[str, Any] = None) -> TraceCont trace_id = generate_trace_id() context = TraceContext(BACKGROUND_LOGGER, trace_id) - attributes = {marker: True for marker in ROOT_SPAN_MARKERS} | (attributes or {}) + # Mark this span as the root for the trace for now. The processing of + # traceparent context if supplied comes later and will result in the + # ROOT_SPAN_MARKERS being removed. Also mark this is the 'local' root, + # i.e. the root of the spans originating in this process as this is + # needed to ensure that we insert this 'local' root span's id into + # the trace record in sqlite store. + attributes = dict.fromkeys(ROOT_SPAN_MARKERS, True) | {LOCAL_ROOT_SPAN_MARKER: True} | (attributes or {}) context.push_span(name, attributes) CURRENT_TRACE_CONTEXT.set(context) diff --git a/llama_stack/templates/bedrock/build.yaml b/llama_stack/templates/bedrock/build.yaml index 97a06f77a..1a2c883fa 100644 --- a/llama_stack/templates/bedrock/build.yaml +++ b/llama_stack/templates/bedrock/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use AWS Bedrock for running LLM inference and safety providers: diff --git a/llama_stack/templates/bedrock/run.yaml b/llama_stack/templates/bedrock/run.yaml index 8033b2086..61bc83f02 100644 --- a/llama_stack/templates/bedrock/run.yaml +++ b/llama_stack/templates/bedrock/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: bedrock apis: - agents @@ -22,7 +22,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/faiss_store.db safety: - provider_id: bedrock provider_type: remote::bedrock @@ -34,17 +34,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -52,7 +52,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -60,14 +60,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -78,17 +78,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -98,10 +98,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/inference_store.db models: - metadata: {} model_id: meta.llama3-1-8b-instruct-v1:0 diff --git a/llama_stack/templates/cerebras/build.yaml b/llama_stack/templates/cerebras/build.yaml index f26f4ed9b..ecd0ac418 100644 --- a/llama_stack/templates/cerebras/build.yaml +++ b/llama_stack/templates/cerebras/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use Cerebras for running LLM inference providers: diff --git a/llama_stack/templates/cerebras/cerebras.py b/llama_stack/templates/cerebras/cerebras.py index d891502d8..f341a88c1 100644 --- a/llama_stack/templates/cerebras/cerebras.py +++ b/llama_stack/templates/cerebras/cerebras.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, diff --git a/llama_stack/templates/cerebras/run.yaml b/llama_stack/templates/cerebras/run.yaml index 490648302..9bd8fcc7c 100644 --- a/llama_stack/templates/cerebras/run.yaml +++ b/llama_stack/templates/cerebras/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: cerebras apis: - agents @@ -32,7 +32,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/faiss_store.db agents: - provider_id: meta-reference provider_type: inline::meta-reference @@ -40,10 +40,10 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/responses_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -51,7 +51,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -59,14 +59,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -77,34 +77,34 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/trace_store.db tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/inference_store.db models: - metadata: {} model_id: llama3.1-8b diff --git a/llama_stack/templates/ci-tests/build.yaml b/llama_stack/templates/ci-tests/build.yaml index 9f4fbbdda..c061d0793 100644 --- a/llama_stack/templates/ci-tests/build.yaml +++ b/llama_stack/templates/ci-tests/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Distribution for running e2e tests in CI providers: diff --git a/llama_stack/templates/ci-tests/ci_tests.py b/llama_stack/templates/ci-tests/ci_tests.py index afa8a23ce..7de8069ae 100644 --- a/llama_stack/templates/ci-tests/ci_tests.py +++ b/llama_stack/templates/ci-tests/ci_tests.py @@ -5,7 +5,7 @@ # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/ci-tests/run.yaml b/llama_stack/templates/ci-tests/run.yaml index 92497b0bf..4b7de1c0c 100644 --- a/llama_stack/templates/ci-tests/run.yaml +++ b/llama_stack/templates/ci-tests/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: ci-tests apis: - agents @@ -24,7 +24,7 @@ providers: - provider_id: sqlite-vec provider_type: inline::sqlite-vec config: - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/sqlite_vec.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -37,17 +37,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -55,7 +55,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -63,14 +63,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -81,17 +81,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -101,10 +101,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/inference_store.db models: - metadata: {} model_id: accounts/fireworks/models/llama-v3p1-8b-instruct diff --git a/llama_stack/templates/dell/build.yaml b/llama_stack/templates/dell/build.yaml index 513df16c1..ff8d58a08 100644 --- a/llama_stack/templates/dell/build.yaml +++ b/llama_stack/templates/dell/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Dell's distribution of Llama Stack. TGI inference via Dell's custom container diff --git a/llama_stack/templates/dell/dell.py b/llama_stack/templates/dell/dell.py index a7ec5f3b8..5a6f52a89 100644 --- a/llama_stack/templates/dell/dell.py +++ b/llama_stack/templates/dell/dell.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/dell/run-with-safety.yaml b/llama_stack/templates/dell/run-with-safety.yaml index 22cf1fd24..7f1d0a8c0 100644 --- a/llama_stack/templates/dell/run-with-safety.yaml +++ b/llama_stack/templates/dell/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: dell apis: - agents @@ -40,17 +40,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -58,7 +58,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -66,14 +66,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -84,27 +84,27 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/dell/run.yaml b/llama_stack/templates/dell/run.yaml index aeca2fc26..310f3cc20 100644 --- a/llama_stack/templates/dell/run.yaml +++ b/llama_stack/templates/dell/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: dell apis: - agents @@ -36,17 +36,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -54,7 +54,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -62,14 +62,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -80,27 +80,27 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/fireworks/build.yaml b/llama_stack/templates/fireworks/build.yaml index 53b47da41..eb08c1d43 100644 --- a/llama_stack/templates/fireworks/build.yaml +++ b/llama_stack/templates/fireworks/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use Fireworks.AI for running LLM inference providers: diff --git a/llama_stack/templates/fireworks/fireworks.py b/llama_stack/templates/fireworks/fireworks.py index 5e8935361..ad29c648f 100644 --- a/llama_stack/templates/fireworks/fireworks.py +++ b/llama_stack/templates/fireworks/fireworks.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/fireworks/run-with-safety.yaml b/llama_stack/templates/fireworks/run-with-safety.yaml index 302328486..6265f5cae 100644 --- a/llama_stack/templates/fireworks/run-with-safety.yaml +++ b/llama_stack/templates/fireworks/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: fireworks apis: - agents @@ -28,7 +28,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -46,17 +46,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -64,7 +64,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -72,14 +72,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -90,30 +90,30 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} files: - provider_id: meta-reference-files provider_type: inline::localfs config: - storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/fireworks/files} + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/fireworks/files} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/files_metadata.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/files_metadata.db tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} @@ -122,10 +122,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/inference_store.db models: - metadata: {} model_id: accounts/fireworks/models/llama-v3p1-8b-instruct diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml index a31ed732b..e10404e92 100644 --- a/llama_stack/templates/fireworks/run.yaml +++ b/llama_stack/templates/fireworks/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: fireworks apis: - agents @@ -28,7 +28,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -41,17 +41,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -59,7 +59,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -67,14 +67,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -85,30 +85,30 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} files: - provider_id: meta-reference-files provider_type: inline::localfs config: - storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/fireworks/files} + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/fireworks/files} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/files_metadata.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/files_metadata.db tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} @@ -117,10 +117,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/inference_store.db models: - metadata: {} model_id: accounts/fireworks/models/llama-v3p1-8b-instruct diff --git a/llama_stack/templates/groq/build.yaml b/llama_stack/templates/groq/build.yaml index 819df22f0..7e50a899f 100644 --- a/llama_stack/templates/groq/build.yaml +++ b/llama_stack/templates/groq/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use Groq for running LLM inference providers: diff --git a/llama_stack/templates/groq/groq.py b/llama_stack/templates/groq/groq.py index 4e52aa42d..9e166a288 100644 --- a/llama_stack/templates/groq/groq.py +++ b/llama_stack/templates/groq/groq.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, diff --git a/llama_stack/templates/groq/run.yaml b/llama_stack/templates/groq/run.yaml index 7f1912a6f..21c8f7e0f 100644 --- a/llama_stack/templates/groq/run.yaml +++ b/llama_stack/templates/groq/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: groq apis: - agents @@ -27,7 +27,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -40,17 +40,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -58,7 +58,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -66,14 +66,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -84,27 +84,27 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/inference_store.db models: - metadata: {} model_id: groq/llama3-8b-8192 diff --git a/llama_stack/templates/hf-endpoint/build.yaml b/llama_stack/templates/hf-endpoint/build.yaml index 8ede83694..9fca9ac22 100644 --- a/llama_stack/templates/hf-endpoint/build.yaml +++ b/llama_stack/templates/hf-endpoint/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use (an external) Hugging Face Inference Endpoint for running LLM inference providers: diff --git a/llama_stack/templates/hf-endpoint/hf_endpoint.py b/llama_stack/templates/hf-endpoint/hf_endpoint.py index 69e037299..23887469f 100644 --- a/llama_stack/templates/hf-endpoint/hf_endpoint.py +++ b/llama_stack/templates/hf-endpoint/hf_endpoint.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/hf-endpoint/run-with-safety.yaml b/llama_stack/templates/hf-endpoint/run-with-safety.yaml index 8b00f4ba5..2ae1d7685 100644 --- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml +++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: hf-endpoint apis: - agents @@ -32,7 +32,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -45,17 +45,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -63,7 +63,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -71,14 +71,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -89,17 +89,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -109,10 +109,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/hf-endpoint/run.yaml b/llama_stack/templates/hf-endpoint/run.yaml index 8a9cd5c49..3ec5ae9c1 100644 --- a/llama_stack/templates/hf-endpoint/run.yaml +++ b/llama_stack/templates/hf-endpoint/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: hf-endpoint apis: - agents @@ -27,7 +27,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -40,17 +40,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -58,7 +58,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -66,14 +66,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -84,17 +84,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -104,10 +104,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/hf-serverless/build.yaml b/llama_stack/templates/hf-serverless/build.yaml index d0752db9a..214245116 100644 --- a/llama_stack/templates/hf-serverless/build.yaml +++ b/llama_stack/templates/hf-serverless/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use (an external) Hugging Face Inference Endpoint for running LLM inference providers: diff --git a/llama_stack/templates/hf-serverless/hf_serverless.py b/llama_stack/templates/hf-serverless/hf_serverless.py index ecfe2a167..c58c0921d 100644 --- a/llama_stack/templates/hf-serverless/hf_serverless.py +++ b/llama_stack/templates/hf-serverless/hf_serverless.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/hf-serverless/run-with-safety.yaml b/llama_stack/templates/hf-serverless/run-with-safety.yaml index fec64c1df..3871b77e7 100644 --- a/llama_stack/templates/hf-serverless/run-with-safety.yaml +++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: hf-serverless apis: - agents @@ -32,7 +32,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -45,17 +45,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -63,7 +63,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -71,14 +71,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -89,17 +89,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -109,10 +109,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/hf-serverless/run.yaml b/llama_stack/templates/hf-serverless/run.yaml index d4a6286d7..0a5b59400 100644 --- a/llama_stack/templates/hf-serverless/run.yaml +++ b/llama_stack/templates/hf-serverless/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: hf-serverless apis: - agents @@ -27,7 +27,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -40,17 +40,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -58,7 +58,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -66,14 +66,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -84,17 +84,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -104,10 +104,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/llama_api/build.yaml b/llama_stack/templates/llama_api/build.yaml index 857e5f014..44a42594a 100644 --- a/llama_stack/templates/llama_api/build.yaml +++ b/llama_stack/templates/llama_api/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Distribution for running e2e tests in CI providers: diff --git a/llama_stack/templates/llama_api/llama_api.py b/llama_stack/templates/llama_api/llama_api.py index b4641b9da..7631781af 100644 --- a/llama_stack/templates/llama_api/llama_api.py +++ b/llama_stack/templates/llama_api/llama_api.py @@ -5,7 +5,7 @@ # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, @@ -41,7 +41,7 @@ def get_inference_providers() -> tuple[list[Provider], list[ModelInput]]: ( "llama-openai-compat", LLLAMA_MODEL_ENTRIES, - LlamaCompatConfig.sample_run_config(api_key="${env.LLAMA_API_KEY:}"), + LlamaCompatConfig.sample_run_config(api_key="${env.LLAMA_API_KEY:+}"), ), ] inference_providers = [] @@ -85,17 +85,17 @@ def get_distribution_template() -> DistributionTemplate: config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), ), Provider( - provider_id="${env.ENABLE_CHROMADB+chromadb}", + provider_id="${env.ENABLE_CHROMADB:+chromadb}", provider_type="remote::chromadb", - config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"), + config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:+}"), ), Provider( - provider_id="${env.ENABLE_PGVECTOR+pgvector}", + provider_id="${env.ENABLE_PGVECTOR:+pgvector}", provider_type="remote::pgvector", config=PGVectorVectorIOConfig.sample_run_config( - db="${env.PGVECTOR_DB:}", - user="${env.PGVECTOR_USER:}", - password="${env.PGVECTOR_PASSWORD:}", + db="${env.PGVECTOR_DB:+}", + user="${env.PGVECTOR_USER:+}", + password="${env.PGVECTOR_PASSWORD:+}", ), ), ] diff --git a/llama_stack/templates/llama_api/run.yaml b/llama_stack/templates/llama_api/run.yaml index 2185eb4fc..b627ed2f1 100644 --- a/llama_stack/templates/llama_api/run.yaml +++ b/llama_stack/templates/llama_api/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: llama_api apis: - agents @@ -16,7 +16,7 @@ providers: provider_type: remote::llama-openai-compat config: openai_compat_api_base: https://api.llama.com/compat/v1/ - api_key: ${env.LLAMA_API_KEY:} + api_key: ${env.LLAMA_API_KEY:+} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -24,19 +24,19 @@ providers: - provider_id: sqlite-vec provider_type: inline::sqlite-vec config: - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/sqlite_vec.db - - provider_id: ${env.ENABLE_CHROMADB+chromadb} + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/sqlite_vec.db + - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb config: - url: ${env.CHROMADB_URL:} - - provider_id: ${env.ENABLE_PGVECTOR+pgvector} + url: ${env.CHROMADB_URL:+} + - provider_id: ${env.ENABLE_PGVECTOR:+pgvector} provider_type: remote::pgvector config: - host: ${env.PGVECTOR_HOST:localhost} - port: ${env.PGVECTOR_PORT:5432} - db: ${env.PGVECTOR_DB:} - user: ${env.PGVECTOR_USER:} - password: ${env.PGVECTOR_PASSWORD:} + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB:+} + user: ${env.PGVECTOR_USER:+} + password: ${env.PGVECTOR_PASSWORD:+} safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -49,17 +49,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -67,7 +67,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -75,14 +75,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -93,17 +93,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -113,10 +113,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/inference_store.db models: - metadata: {} model_id: Llama-3.3-70B-Instruct diff --git a/llama_stack/templates/meta-reference-gpu/build.yaml b/llama_stack/templates/meta-reference-gpu/build.yaml index 53ad411e3..2119eeddd 100644 --- a/llama_stack/templates/meta-reference-gpu/build.yaml +++ b/llama_stack/templates/meta-reference-gpu/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use Meta Reference for running LLM inference providers: diff --git a/llama_stack/templates/meta-reference-gpu/meta_reference.py b/llama_stack/templates/meta-reference-gpu/meta_reference.py index 95d126095..57fb8f2af 100644 --- a/llama_stack/templates/meta-reference-gpu/meta_reference.py +++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml index e65445a9e..6b15a1e01 100644 --- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml +++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: meta-reference-gpu apis: - agents @@ -18,10 +18,10 @@ providers: model: ${env.INFERENCE_MODEL} checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null} quantization: - type: ${env.QUANTIZATION_TYPE:bf16} - model_parallel_size: ${env.MODEL_PARALLEL_SIZE:0} - max_batch_size: ${env.MAX_BATCH_SIZE:1} - max_seq_len: ${env.MAX_SEQ_LEN:4096} + type: ${env.QUANTIZATION_TYPE:=bf16} + model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0} + max_batch_size: ${env.MAX_BATCH_SIZE:=1} + max_seq_len: ${env.MAX_SEQ_LEN:=4096} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -31,10 +31,10 @@ providers: model: ${env.SAFETY_MODEL} checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:null} quantization: - type: ${env.QUANTIZATION_TYPE:bf16} - model_parallel_size: ${env.MODEL_PARALLEL_SIZE:0} - max_batch_size: ${env.MAX_BATCH_SIZE:1} - max_seq_len: ${env.MAX_SEQ_LEN:4096} + type: ${env.QUANTIZATION_TYPE:=bf16} + model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0} + max_batch_size: ${env.MAX_BATCH_SIZE:=1} + max_seq_len: ${env.MAX_SEQ_LEN:=4096} vector_io: - provider_id: faiss provider_type: inline::faiss @@ -42,7 +42,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -55,17 +55,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -73,7 +73,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -81,14 +81,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -99,17 +99,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -119,10 +119,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/meta-reference-gpu/run.yaml b/llama_stack/templates/meta-reference-gpu/run.yaml index 8ef02f14d..1b44a0b3e 100644 --- a/llama_stack/templates/meta-reference-gpu/run.yaml +++ b/llama_stack/templates/meta-reference-gpu/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: meta-reference-gpu apis: - agents @@ -18,10 +18,10 @@ providers: model: ${env.INFERENCE_MODEL} checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null} quantization: - type: ${env.QUANTIZATION_TYPE:bf16} - model_parallel_size: ${env.MODEL_PARALLEL_SIZE:0} - max_batch_size: ${env.MAX_BATCH_SIZE:1} - max_seq_len: ${env.MAX_SEQ_LEN:4096} + type: ${env.QUANTIZATION_TYPE:=bf16} + model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0} + max_batch_size: ${env.MAX_BATCH_SIZE:=1} + max_seq_len: ${env.MAX_SEQ_LEN:=4096} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -32,7 +32,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -45,17 +45,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -63,7 +63,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -71,14 +71,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -89,17 +89,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -109,10 +109,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/nvidia/build.yaml b/llama_stack/templates/nvidia/build.yaml index 6bd8a0100..51685b2e3 100644 --- a/llama_stack/templates/nvidia/build.yaml +++ b/llama_stack/templates/nvidia/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use NVIDIA NIM for running LLM inference, evaluation and safety providers: diff --git a/llama_stack/templates/nvidia/run-with-safety.yaml b/llama_stack/templates/nvidia/run-with-safety.yaml index eebfa1066..875fccc9d 100644 --- a/llama_stack/templates/nvidia/run-with-safety.yaml +++ b/llama_stack/templates/nvidia/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: nvidia apis: - agents @@ -16,14 +16,14 @@ providers: - provider_id: nvidia provider_type: remote::nvidia config: - url: ${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com} - api_key: ${env.NVIDIA_API_KEY:} - append_api_version: ${env.NVIDIA_APPEND_API_VERSION:True} + url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} + api_key: ${env.NVIDIA_API_KEY:+} + append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} - provider_id: nvidia provider_type: remote::nvidia config: - guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331} - config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check} + guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331} + config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check} vector_io: - provider_id: faiss provider_type: inline::faiss @@ -31,13 +31,13 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db safety: - provider_id: nvidia provider_type: remote::nvidia config: - guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331} - config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check} + guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331} + config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check} agents: - provider_id: meta-reference provider_type: inline::meta-reference @@ -45,30 +45,30 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/trace_store.db eval: - provider_id: nvidia provider_type: remote::nvidia config: - evaluator_url: ${env.NVIDIA_EVALUATOR_URL:http://localhost:7331} + evaluator_url: ${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331} post_training: - provider_id: nvidia provider_type: remote::nvidia config: - api_key: ${env.NVIDIA_API_KEY:} - dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:default} - project_id: ${env.NVIDIA_PROJECT_ID:test-project} - customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:http://nemo.test} + api_key: ${env.NVIDIA_API_KEY:+} + dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default} + project_id: ${env.NVIDIA_PROJECT_ID:=test-project} + customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test} datasetio: - provider_id: localfs provider_type: inline::localfs @@ -76,14 +76,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/localfs_datasetio.db - provider_id: nvidia provider_type: remote::nvidia config: - api_key: ${env.NVIDIA_API_KEY:} - dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:default} - project_id: ${env.NVIDIA_PROJECT_ID:test-project} - datasets_url: ${env.NVIDIA_DATASETS_URL:http://nemo.test} + api_key: ${env.NVIDIA_API_KEY:+} + dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default} + project_id: ${env.NVIDIA_PROJECT_ID:=test-project} + datasets_url: ${env.NVIDIA_DATASETS_URL:=http://nemo.test} scoring: - provider_id: basic provider_type: inline::basic @@ -94,10 +94,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml index be0e3f6d1..4477d5244 100644 --- a/llama_stack/templates/nvidia/run.yaml +++ b/llama_stack/templates/nvidia/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: nvidia apis: - agents @@ -16,9 +16,9 @@ providers: - provider_id: nvidia provider_type: remote::nvidia config: - url: ${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com} - api_key: ${env.NVIDIA_API_KEY:} - append_api_version: ${env.NVIDIA_APPEND_API_VERSION:True} + url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} + api_key: ${env.NVIDIA_API_KEY:+} + append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} vector_io: - provider_id: faiss provider_type: inline::faiss @@ -26,13 +26,13 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db safety: - provider_id: nvidia provider_type: remote::nvidia config: - guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331} - config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check} + guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331} + config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check} agents: - provider_id: meta-reference provider_type: inline::meta-reference @@ -40,38 +40,38 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/trace_store.db eval: - provider_id: nvidia provider_type: remote::nvidia config: - evaluator_url: ${env.NVIDIA_EVALUATOR_URL:http://localhost:7331} + evaluator_url: ${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331} post_training: - provider_id: nvidia provider_type: remote::nvidia config: - api_key: ${env.NVIDIA_API_KEY:} - dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:default} - project_id: ${env.NVIDIA_PROJECT_ID:test-project} - customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:http://nemo.test} + api_key: ${env.NVIDIA_API_KEY:+} + dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default} + project_id: ${env.NVIDIA_PROJECT_ID:=test-project} + customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test} datasetio: - provider_id: nvidia provider_type: remote::nvidia config: - api_key: ${env.NVIDIA_API_KEY:} - dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:default} - project_id: ${env.NVIDIA_PROJECT_ID:test-project} - datasets_url: ${env.NVIDIA_DATASETS_URL:http://nemo.test} + api_key: ${env.NVIDIA_API_KEY:+} + dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default} + project_id: ${env.NVIDIA_PROJECT_ID:=test-project} + datasets_url: ${env.NVIDIA_DATASETS_URL:=http://nemo.test} scoring: - provider_id: basic provider_type: inline::basic @@ -82,10 +82,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/inference_store.db models: - metadata: {} model_id: meta/llama3-8b-instruct diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml index ebe0849f3..cbf4281a2 100644 --- a/llama_stack/templates/ollama/build.yaml +++ b/llama_stack/templates/ollama/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use (an external) Ollama server for running LLM inference providers: diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py index 46c4852a4..cba25296b 100644 --- a/llama_stack/templates/ollama/ollama.py +++ b/llama_stack/templates/ollama/ollama.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml index 2e1b7fdcc..5e906a12c 100644 --- a/llama_stack/templates/ollama/run-with-safety.yaml +++ b/llama_stack/templates/ollama/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: ollama apis: - agents @@ -17,7 +17,7 @@ providers: - provider_id: ollama provider_type: remote::ollama config: - url: ${env.OLLAMA_URL:http://localhost:11434} + url: ${env.OLLAMA_URL:=http://localhost:11434} raise_on_connect_error: true vector_io: - provider_id: faiss @@ -26,7 +26,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -41,17 +41,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -59,7 +59,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -67,14 +67,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -85,15 +85,15 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} files: - provider_id: meta-reference-files provider_type: inline::localfs config: - storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/ollama/files} + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ollama/files} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/files_metadata.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/files_metadata.db post_training: - provider_id: huggingface provider_type: inline::huggingface @@ -105,12 +105,12 @@ providers: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -121,13 +121,13 @@ providers: - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml index 8c2b17ef1..d2b4e3978 100644 --- a/llama_stack/templates/ollama/run.yaml +++ b/llama_stack/templates/ollama/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: ollama apis: - agents @@ -17,7 +17,7 @@ providers: - provider_id: ollama provider_type: remote::ollama config: - url: ${env.OLLAMA_URL:http://localhost:11434} + url: ${env.OLLAMA_URL:=http://localhost:11434} raise_on_connect_error: true vector_io: - provider_id: faiss @@ -26,7 +26,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -39,17 +39,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -57,7 +57,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -65,14 +65,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -83,15 +83,15 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} files: - provider_id: meta-reference-files provider_type: inline::localfs config: - storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/ollama/files} + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ollama/files} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/files_metadata.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/files_metadata.db post_training: - provider_id: huggingface provider_type: inline::huggingface @@ -103,12 +103,12 @@ providers: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -119,13 +119,13 @@ providers: - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/open-benchmark/build.yaml b/llama_stack/templates/open-benchmark/build.yaml index 840f1e1db..5f82c5243 100644 --- a/llama_stack/templates/open-benchmark/build.yaml +++ b/llama_stack/templates/open-benchmark/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Distribution for running open benchmarks providers: diff --git a/llama_stack/templates/open-benchmark/open_benchmark.py b/llama_stack/templates/open-benchmark/open_benchmark.py index d944d4eff..b4cfbdb52 100644 --- a/llama_stack/templates/open-benchmark/open_benchmark.py +++ b/llama_stack/templates/open-benchmark/open_benchmark.py @@ -6,7 +6,7 @@ from llama_stack.apis.datasets import DatasetPurpose, URIDataSource -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( BenchmarkInput, DatasetInput, @@ -120,17 +120,17 @@ def get_distribution_template() -> DistributionTemplate: config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), ), Provider( - provider_id="${env.ENABLE_CHROMADB+chromadb}", + provider_id="${env.ENABLE_CHROMADB:+chromadb}", provider_type="remote::chromadb", - config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"), + config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:+}"), ), Provider( - provider_id="${env.ENABLE_PGVECTOR+pgvector}", + provider_id="${env.ENABLE_PGVECTOR:+pgvector}", provider_type="remote::pgvector", config=PGVectorVectorIOConfig.sample_run_config( - db="${env.PGVECTOR_DB:}", - user="${env.PGVECTOR_USER:}", - password="${env.PGVECTOR_PASSWORD:}", + db="${env.PGVECTOR_DB:+}", + user="${env.PGVECTOR_USER:+}", + password="${env.PGVECTOR_PASSWORD:+}", ), ), ] diff --git a/llama_stack/templates/open-benchmark/run.yaml b/llama_stack/templates/open-benchmark/run.yaml index 051ca6f8e..403b0fd3d 100644 --- a/llama_stack/templates/open-benchmark/run.yaml +++ b/llama_stack/templates/open-benchmark/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: open-benchmark apis: - agents @@ -33,24 +33,24 @@ providers: provider_type: remote::together config: url: https://api.together.xyz/v1 - api_key: ${env.TOGETHER_API_KEY:} + api_key: ${env.TOGETHER_API_KEY:+} vector_io: - provider_id: sqlite-vec provider_type: inline::sqlite-vec config: - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/sqlite_vec.db - - provider_id: ${env.ENABLE_CHROMADB+chromadb} + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sqlite_vec.db + - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb config: - url: ${env.CHROMADB_URL:} - - provider_id: ${env.ENABLE_PGVECTOR+pgvector} + url: ${env.CHROMADB_URL:+} + - provider_id: ${env.ENABLE_PGVECTOR:+pgvector} provider_type: remote::pgvector config: - host: ${env.PGVECTOR_HOST:localhost} - port: ${env.PGVECTOR_PORT:5432} - db: ${env.PGVECTOR_DB:} - user: ${env.PGVECTOR_USER:} - password: ${env.PGVECTOR_PASSWORD:} + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB:+} + user: ${env.PGVECTOR_USER:+} + password: ${env.PGVECTOR_PASSWORD:+} safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -63,17 +63,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -81,7 +81,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -89,14 +89,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -107,17 +107,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -127,10 +127,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/inference_store.db models: - metadata: {} model_id: openai/gpt-4o diff --git a/llama_stack/templates/passthrough/build.yaml b/llama_stack/templates/passthrough/build.yaml index 46b99cb75..e2e041dbc 100644 --- a/llama_stack/templates/passthrough/build.yaml +++ b/llama_stack/templates/passthrough/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use Passthrough hosted llama-stack endpoint for LLM inference providers: diff --git a/llama_stack/templates/passthrough/passthrough.py b/llama_stack/templates/passthrough/passthrough.py index 6a30625c5..1b94a9aae 100644 --- a/llama_stack/templates/passthrough/passthrough.py +++ b/llama_stack/templates/passthrough/passthrough.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/passthrough/run-with-safety.yaml b/llama_stack/templates/passthrough/run-with-safety.yaml index 3168eeb9f..c5b047511 100644 --- a/llama_stack/templates/passthrough/run-with-safety.yaml +++ b/llama_stack/templates/passthrough/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: passthrough apis: - agents @@ -27,7 +27,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -45,17 +45,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -63,7 +63,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -71,14 +71,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -89,22 +89,22 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} @@ -113,10 +113,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/inference_store.db models: - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct diff --git a/llama_stack/templates/passthrough/run.yaml b/llama_stack/templates/passthrough/run.yaml index 48abf8577..896b3c91e 100644 --- a/llama_stack/templates/passthrough/run.yaml +++ b/llama_stack/templates/passthrough/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: passthrough apis: - agents @@ -27,7 +27,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -40,17 +40,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -58,7 +58,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -66,14 +66,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -84,22 +84,22 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} @@ -108,10 +108,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/inference_store.db models: - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct diff --git a/llama_stack/templates/postgres-demo/build.yaml b/llama_stack/templates/postgres-demo/build.yaml index 6416cd00f..645b59613 100644 --- a/llama_stack/templates/postgres-demo/build.yaml +++ b/llama_stack/templates/postgres-demo/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Quick start template for running Llama Stack with several popular providers providers: diff --git a/llama_stack/templates/postgres-demo/postgres_demo.py b/llama_stack/templates/postgres-demo/postgres_demo.py index 759281567..5d42b8901 100644 --- a/llama_stack/templates/postgres-demo/postgres_demo.py +++ b/llama_stack/templates/postgres-demo/postgres_demo.py @@ -5,7 +5,7 @@ # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, @@ -50,9 +50,9 @@ def get_distribution_template() -> DistributionTemplate: vector_io_providers = [ Provider( - provider_id="${env.ENABLE_CHROMADB+chromadb}", + provider_id="${env.ENABLE_CHROMADB:+chromadb}", provider_type="remote::chromadb", - config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"), + config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:+}"), ), ] default_tool_groups = [ @@ -114,9 +114,9 @@ def get_distribution_template() -> DistributionTemplate: provider_id="meta-reference", provider_type="inline::meta-reference", config=dict( - service_name="${env.OTEL_SERVICE_NAME:}", - sinks="${env.TELEMETRY_SINKS:console,otel_trace}", - otel_trace_endpoint="${env.OTEL_TRACE_ENDPOINT:http://localhost:4318/v1/traces}", + service_name="${env.OTEL_SERVICE_NAME:+}", + sinks="${env.TELEMETRY_SINKS:=console,otel_trace}", + otel_trace_endpoint="${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces}", ), ) ], diff --git a/llama_stack/templates/postgres-demo/run.yaml b/llama_stack/templates/postgres-demo/run.yaml index 0e0d020b2..03b7a59fb 100644 --- a/llama_stack/templates/postgres-demo/run.yaml +++ b/llama_stack/templates/postgres-demo/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: postgres-demo apis: - agents @@ -13,17 +13,17 @@ providers: provider_type: remote::vllm config: url: ${env.VLLM_URL:http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:4096} - api_token: ${env.VLLM_API_TOKEN:fake} - tls_verify: ${env.VLLM_TLS_VERIFY:true} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} vector_io: - - provider_id: ${env.ENABLE_CHROMADB+chromadb} + - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb config: - url: ${env.CHROMADB_URL:} + url: ${env.CHROMADB_URL:+} safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -35,35 +35,35 @@ providers: config: persistence_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} responses_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: ${env.OTEL_SERVICE_NAME:} - sinks: ${env.TELEMETRY_SINKS:console,otel_trace} - otel_trace_endpoint: ${env.OTEL_TRACE_ENDPOINT:http://localhost:4318/v1/traces} + service_name: ${env.OTEL_SERVICE_NAME:+} + sinks: ${env.TELEMETRY_SINKS:=console,otel_trace} + otel_trace_endpoint: ${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -73,19 +73,19 @@ providers: config: {} metadata_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} - table_name: ${env.POSTGRES_TABLE_NAME:llamastack_kvstore} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} inference_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/remote-vllm/build.yaml b/llama_stack/templates/remote-vllm/build.yaml index 16fe5d4fd..0298b01c7 100644 --- a/llama_stack/templates/remote-vllm/build.yaml +++ b/llama_stack/templates/remote-vllm/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use (an external) vLLM server for running LLM inference providers: diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml index 64f71087a..b297f1489 100644 --- a/llama_stack/templates/remote-vllm/run-with-safety.yaml +++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: remote-vllm apis: - agents @@ -16,16 +16,16 @@ providers: provider_type: remote::vllm config: url: ${env.VLLM_URL:http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:4096} - api_token: ${env.VLLM_API_TOKEN:fake} - tls_verify: ${env.VLLM_TLS_VERIFY:true} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} - provider_id: vllm-safety provider_type: remote::vllm config: url: ${env.SAFETY_VLLM_URL} - max_tokens: ${env.VLLM_MAX_TOKENS:4096} - api_token: ${env.VLLM_API_TOKEN:fake} - tls_verify: ${env.VLLM_TLS_VERIFY:true} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -36,7 +36,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -49,10 +49,10 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/responses_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -60,7 +60,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -68,14 +68,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -86,24 +86,24 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/trace_store.db tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -114,13 +114,13 @@ providers: - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml index 353b9902d..6bd332cc9 100644 --- a/llama_stack/templates/remote-vllm/run.yaml +++ b/llama_stack/templates/remote-vllm/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: remote-vllm apis: - agents @@ -16,9 +16,9 @@ providers: provider_type: remote::vllm config: url: ${env.VLLM_URL:http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:4096} - api_token: ${env.VLLM_API_TOKEN:fake} - tls_verify: ${env.VLLM_TLS_VERIFY:true} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -29,7 +29,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -42,10 +42,10 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/responses_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -53,7 +53,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -61,14 +61,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -79,24 +79,24 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/trace_store.db tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -107,13 +107,13 @@ providers: - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/remote-vllm/vllm.py b/llama_stack/templates/remote-vllm/vllm.py index 2782a3ea0..94606e9d0 100644 --- a/llama_stack/templates/remote-vllm/vllm.py +++ b/llama_stack/templates/remote-vllm/vllm.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/sambanova/build.yaml b/llama_stack/templates/sambanova/build.yaml index 14b1c8974..ba70f88c6 100644 --- a/llama_stack/templates/sambanova/build.yaml +++ b/llama_stack/templates/sambanova/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use SambaNova for running LLM inference and safety providers: diff --git a/llama_stack/templates/sambanova/run.yaml b/llama_stack/templates/sambanova/run.yaml index 58d0d36e3..b96621b58 100644 --- a/llama_stack/templates/sambanova/run.yaml +++ b/llama_stack/templates/sambanova/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: sambanova apis: - agents @@ -24,19 +24,19 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/faiss_store.db - - provider_id: ${env.ENABLE_CHROMADB+chromadb} + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/faiss_store.db + - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb config: - url: ${env.CHROMADB_URL:} - - provider_id: ${env.ENABLE_PGVECTOR+pgvector} + url: ${env.CHROMADB_URL:+} + - provider_id: ${env.ENABLE_PGVECTOR:+pgvector} provider_type: remote::pgvector config: - host: ${env.PGVECTOR_HOST:localhost} - port: ${env.PGVECTOR_PORT:5432} - db: ${env.PGVECTOR_DB:} - user: ${env.PGVECTOR_USER:} - password: ${env.PGVECTOR_PASSWORD:} + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB:+} + user: ${env.PGVECTOR_USER:+} + password: ${env.PGVECTOR_PASSWORD:+} safety: - provider_id: sambanova provider_type: remote::sambanova @@ -50,27 +50,27 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/trace_store.db tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -81,13 +81,13 @@ providers: - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/inference_store.db models: - metadata: {} model_id: sambanova/Meta-Llama-3.1-8B-Instruct diff --git a/llama_stack/templates/sambanova/sambanova.py b/llama_stack/templates/sambanova/sambanova.py index 54a49423d..428577697 100644 --- a/llama_stack/templates/sambanova/sambanova.py +++ b/llama_stack/templates/sambanova/sambanova.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, @@ -73,17 +73,17 @@ def get_distribution_template() -> DistributionTemplate: ), ), Provider( - provider_id="${env.ENABLE_CHROMADB+chromadb}", + provider_id="${env.ENABLE_CHROMADB:+chromadb}", provider_type="remote::chromadb", - config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"), + config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:+}"), ), Provider( - provider_id="${env.ENABLE_PGVECTOR+pgvector}", + provider_id="${env.ENABLE_PGVECTOR:+pgvector}", provider_type="remote::pgvector", config=PGVectorVectorIOConfig.sample_run_config( - db="${env.PGVECTOR_DB:}", - user="${env.PGVECTOR_USER:}", - password="${env.PGVECTOR_PASSWORD:}", + db="${env.PGVECTOR_DB:+}", + user="${env.PGVECTOR_USER:+}", + password="${env.PGVECTOR_PASSWORD:+}", ), ), ] diff --git a/llama_stack/templates/starter/build.yaml b/llama_stack/templates/starter/build.yaml index 9bf4913a7..3b48dcf7a 100644 --- a/llama_stack/templates/starter/build.yaml +++ b/llama_stack/templates/starter/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Quick start template for running Llama Stack with several popular providers providers: diff --git a/llama_stack/templates/starter/run.yaml b/llama_stack/templates/starter/run.yaml index 30df39e5d..f7c53170b 100644 --- a/llama_stack/templates/starter/run.yaml +++ b/llama_stack/templates/starter/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: starter apis: - agents @@ -16,47 +16,47 @@ providers: - provider_id: openai provider_type: remote::openai config: - api_key: ${env.OPENAI_API_KEY:} + api_key: ${env.OPENAI_API_KEY:+} - provider_id: fireworks provider_type: remote::fireworks config: url: https://api.fireworks.ai/inference/v1 - api_key: ${env.FIREWORKS_API_KEY:} + api_key: ${env.FIREWORKS_API_KEY:+} - provider_id: together provider_type: remote::together config: url: https://api.together.xyz/v1 - api_key: ${env.TOGETHER_API_KEY:} + api_key: ${env.TOGETHER_API_KEY:+} - provider_id: ollama provider_type: remote::ollama config: - url: ${env.OLLAMA_URL:http://localhost:11434} + url: ${env.OLLAMA_URL:=http://localhost:11434} raise_on_connect_error: false - provider_id: anthropic provider_type: remote::anthropic config: - api_key: ${env.ANTHROPIC_API_KEY:} + api_key: ${env.ANTHROPIC_API_KEY:+} - provider_id: gemini provider_type: remote::gemini config: - api_key: ${env.GEMINI_API_KEY:} + api_key: ${env.GEMINI_API_KEY:+} - provider_id: groq provider_type: remote::groq config: url: https://api.groq.com - api_key: ${env.GROQ_API_KEY:} + api_key: ${env.GROQ_API_KEY:+} - provider_id: sambanova provider_type: remote::sambanova config: url: https://api.sambanova.ai/v1 - api_key: ${env.SAMBANOVA_API_KEY:} + api_key: ${env.SAMBANOVA_API_KEY:+} - provider_id: vllm provider_type: remote::vllm config: - url: ${env.VLLM_URL:http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:4096} - api_token: ${env.VLLM_API_TOKEN:fake} - tls_verify: ${env.VLLM_TLS_VERIFY:true} + url: ${env.VLLM_URL:=http://localhost:8000/v1} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -67,31 +67,31 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db - provider_id: ${env.ENABLE_SQLITE_VEC+sqlite-vec} provider_type: inline::sqlite-vec config: - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/sqlite_vec.db - - provider_id: ${env.ENABLE_CHROMADB+chromadb} + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db + - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb config: - url: ${env.CHROMADB_URL:} - - provider_id: ${env.ENABLE_PGVECTOR+pgvector} + url: ${env.CHROMADB_URL:+} + - provider_id: ${env.ENABLE_PGVECTOR:+pgvector} provider_type: remote::pgvector config: - host: ${env.PGVECTOR_HOST:localhost} - port: ${env.PGVECTOR_PORT:5432} - db: ${env.PGVECTOR_DB:} - user: ${env.PGVECTOR_USER:} - password: ${env.PGVECTOR_PASSWORD:} + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB:+} + user: ${env.PGVECTOR_USER:+} + password: ${env.PGVECTOR_PASSWORD:+} files: - provider_id: meta-reference-files provider_type: inline::localfs config: - storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/starter/files} + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/files_metadata.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -104,17 +104,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -122,7 +122,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -130,14 +130,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -148,17 +148,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -168,10 +168,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/inference_store.db models: - metadata: {} model_id: openai/gpt-4o @@ -538,15 +538,15 @@ models: provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 model_type: llm - metadata: {} - model_id: ollama/${env.OLLAMA_INFERENCE_MODEL:__disabled__} + model_id: ollama/${env.OLLAMA_INFERENCE_MODEL:=__disabled__} provider_id: ollama - provider_model_id: ${env.OLLAMA_INFERENCE_MODEL:__disabled__} + provider_model_id: ${env.OLLAMA_INFERENCE_MODEL:=__disabled__} model_type: llm - metadata: - embedding_dimension: ${env.OLLAMA_EMBEDDING_DIMENSION:384} - model_id: ollama/${env.OLLAMA_EMBEDDING_MODEL:__disabled__} + embedding_dimension: ${env.OLLAMA_EMBEDDING_DIMENSION:=384} + model_id: ollama/${env.OLLAMA_EMBEDDING_MODEL:=__disabled__} provider_id: ollama - provider_model_id: ${env.OLLAMA_EMBEDDING_MODEL:__disabled__} + provider_model_id: ${env.OLLAMA_EMBEDDING_MODEL:=__disabled__} model_type: embedding - metadata: {} model_id: anthropic/claude-3-5-sonnet-latest @@ -802,9 +802,9 @@ models: provider_model_id: sambanova/Meta-Llama-Guard-3-8B model_type: llm - metadata: {} - model_id: vllm/${env.VLLM_INFERENCE_MODEL:__disabled__} + model_id: vllm/${env.VLLM_INFERENCE_MODEL:=__disabled__} provider_id: vllm - provider_model_id: ${env.VLLM_INFERENCE_MODEL:__disabled__} + provider_model_id: ${env.VLLM_INFERENCE_MODEL:=__disabled__} model_type: llm - metadata: embedding_dimension: 384 diff --git a/llama_stack/templates/starter/starter.py b/llama_stack/templates/starter/starter.py index ec01d08e9..df31fed84 100644 --- a/llama_stack/templates/starter/starter.py +++ b/llama_stack/templates/starter/starter.py @@ -5,7 +5,7 @@ # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, @@ -69,67 +69,67 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo ( "openai", OPENAI_MODEL_ENTRIES, - OpenAIConfig.sample_run_config(api_key="${env.OPENAI_API_KEY:}"), + OpenAIConfig.sample_run_config(api_key="${env.OPENAI_API_KEY:+}"), ), ( "fireworks", FIREWORKS_MODEL_ENTRIES, - FireworksImplConfig.sample_run_config(api_key="${env.FIREWORKS_API_KEY:}"), + FireworksImplConfig.sample_run_config(api_key="${env.FIREWORKS_API_KEY:+}"), ), ( "together", TOGETHER_MODEL_ENTRIES, - TogetherImplConfig.sample_run_config(api_key="${env.TOGETHER_API_KEY:}"), + TogetherImplConfig.sample_run_config(api_key="${env.TOGETHER_API_KEY:+}"), ), ( "ollama", [ ProviderModelEntry( - provider_model_id="${env.OLLAMA_INFERENCE_MODEL:__disabled__}", + provider_model_id="${env.OLLAMA_INFERENCE_MODEL:=__disabled__}", model_type=ModelType.llm, ), ProviderModelEntry( - provider_model_id="${env.OLLAMA_EMBEDDING_MODEL:__disabled__}", + provider_model_id="${env.OLLAMA_EMBEDDING_MODEL:=__disabled__}", model_type=ModelType.embedding, metadata={ - "embedding_dimension": "${env.OLLAMA_EMBEDDING_DIMENSION:384}", + "embedding_dimension": "${env.OLLAMA_EMBEDDING_DIMENSION:=384}", }, ), ], OllamaImplConfig.sample_run_config( - url="${env.OLLAMA_URL:http://localhost:11434}", raise_on_connect_error=False + url="${env.OLLAMA_URL:=http://localhost:11434}", raise_on_connect_error=False ), ), ( "anthropic", ANTHROPIC_MODEL_ENTRIES, - AnthropicConfig.sample_run_config(api_key="${env.ANTHROPIC_API_KEY:}"), + AnthropicConfig.sample_run_config(api_key="${env.ANTHROPIC_API_KEY:+}"), ), ( "gemini", GEMINI_MODEL_ENTRIES, - GeminiConfig.sample_run_config(api_key="${env.GEMINI_API_KEY:}"), + GeminiConfig.sample_run_config(api_key="${env.GEMINI_API_KEY:+}"), ), ( "groq", GROQ_MODEL_ENTRIES, - GroqConfig.sample_run_config(api_key="${env.GROQ_API_KEY:}"), + GroqConfig.sample_run_config(api_key="${env.GROQ_API_KEY:+}"), ), ( "sambanova", SAMBANOVA_MODEL_ENTRIES, - SambaNovaImplConfig.sample_run_config(api_key="${env.SAMBANOVA_API_KEY:}"), + SambaNovaImplConfig.sample_run_config(api_key="${env.SAMBANOVA_API_KEY:+}"), ), ( "vllm", [ ProviderModelEntry( - provider_model_id="${env.VLLM_INFERENCE_MODEL:__disabled__}", + provider_model_id="${env.VLLM_INFERENCE_MODEL:=__disabled__}", model_type=ModelType.llm, ), ], VLLMInferenceAdapterConfig.sample_run_config( - url="${env.VLLM_URL:http://localhost:8000/v1}", + url="${env.VLLM_URL:=http://localhost:8000/v1}", ), ), ] @@ -180,17 +180,17 @@ def get_distribution_template() -> DistributionTemplate: config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), ), Provider( - provider_id="${env.ENABLE_CHROMADB+chromadb}", + provider_id="${env.ENABLE_CHROMADB:+chromadb}", provider_type="remote::chromadb", - config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"), + config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:+}"), ), Provider( - provider_id="${env.ENABLE_PGVECTOR+pgvector}", + provider_id="${env.ENABLE_PGVECTOR:+pgvector}", provider_type="remote::pgvector", config=PGVectorVectorIOConfig.sample_run_config( - db="${env.PGVECTOR_DB:}", - user="${env.PGVECTOR_USER:}", - password="${env.PGVECTOR_PASSWORD:}", + db="${env.PGVECTOR_DB:+}", + user="${env.PGVECTOR_USER:+}", + password="${env.PGVECTOR_PASSWORD:+}", ), ), ] diff --git a/llama_stack/templates/template.py b/llama_stack/templates/template.py index 712d2dcb4..7badff140 100644 --- a/llama_stack/templates/template.py +++ b/llama_stack/templates/template.py @@ -13,7 +13,7 @@ import yaml from pydantic import BaseModel, Field from llama_stack.apis.datasets import DatasetPurpose -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( Api, BenchmarkInput, diff --git a/llama_stack/templates/tgi/build.yaml b/llama_stack/templates/tgi/build.yaml index 361b0b680..3ac3968e8 100644 --- a/llama_stack/templates/tgi/build.yaml +++ b/llama_stack/templates/tgi/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use (an external) TGI server for running LLM inference providers: diff --git a/llama_stack/templates/tgi/run-with-safety.yaml b/llama_stack/templates/tgi/run-with-safety.yaml index 22b7bcde6..63da62a03 100644 --- a/llama_stack/templates/tgi/run-with-safety.yaml +++ b/llama_stack/templates/tgi/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: tgi apis: - agents @@ -27,7 +27,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -40,17 +40,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -58,7 +58,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -66,14 +66,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -84,17 +84,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -104,10 +104,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/tgi/run.yaml b/llama_stack/templates/tgi/run.yaml index dd012323c..430494121 100644 --- a/llama_stack/templates/tgi/run.yaml +++ b/llama_stack/templates/tgi/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: tgi apis: - agents @@ -26,7 +26,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -39,17 +39,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -57,7 +57,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -65,14 +65,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -83,17 +83,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -103,10 +103,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/tgi/tgi.py b/llama_stack/templates/tgi/tgi.py index 2c97cbf80..394cde18e 100644 --- a/llama_stack/templates/tgi/tgi.py +++ b/llama_stack/templates/tgi/tgi.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/together/build.yaml b/llama_stack/templates/together/build.yaml index 5ffeac873..518a843da 100644 --- a/llama_stack/templates/together/build.yaml +++ b/llama_stack/templates/together/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use Together.AI for running LLM inference providers: diff --git a/llama_stack/templates/together/run-with-safety.yaml b/llama_stack/templates/together/run-with-safety.yaml index a24843416..7ae2a1d1a 100644 --- a/llama_stack/templates/together/run-with-safety.yaml +++ b/llama_stack/templates/together/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: together apis: - agents @@ -16,7 +16,7 @@ providers: provider_type: remote::together config: url: https://api.together.xyz/v1 - api_key: ${env.TOGETHER_API_KEY:} + api_key: ${env.TOGETHER_API_KEY:+} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -27,7 +27,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -45,17 +45,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -63,7 +63,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -71,14 +71,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -89,17 +89,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -110,13 +110,13 @@ providers: - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/inference_store.db models: - metadata: {} model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml index c71f960bd..dc09aeac9 100644 --- a/llama_stack/templates/together/run.yaml +++ b/llama_stack/templates/together/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: together apis: - agents @@ -16,7 +16,7 @@ providers: provider_type: remote::together config: url: https://api.together.xyz/v1 - api_key: ${env.TOGETHER_API_KEY:} + api_key: ${env.TOGETHER_API_KEY:+} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -27,7 +27,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -40,17 +40,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -58,7 +58,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -66,14 +66,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -84,17 +84,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -105,13 +105,13 @@ providers: - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/inference_store.db models: - metadata: {} model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo diff --git a/llama_stack/templates/together/together.py b/llama_stack/templates/together/together.py index 7761bd9fd..4c64ff3cd 100644 --- a/llama_stack/templates/together/together.py +++ b/llama_stack/templates/together/together.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/vllm-gpu/build.yaml b/llama_stack/templates/vllm-gpu/build.yaml index d5ff0f1f4..147dca50d 100644 --- a/llama_stack/templates/vllm-gpu/build.yaml +++ b/llama_stack/templates/vllm-gpu/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use a built-in vLLM engine for running LLM inference providers: diff --git a/llama_stack/templates/vllm-gpu/run.yaml b/llama_stack/templates/vllm-gpu/run.yaml index 6878c22b2..104b3a239 100644 --- a/llama_stack/templates/vllm-gpu/run.yaml +++ b/llama_stack/templates/vllm-gpu/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: vllm-gpu apis: - agents @@ -15,12 +15,12 @@ providers: - provider_id: vllm provider_type: inline::vllm config: - tensor_parallel_size: ${env.TENSOR_PARALLEL_SIZE:1} - max_tokens: ${env.MAX_TOKENS:4096} - max_model_len: ${env.MAX_MODEL_LEN:4096} - max_num_seqs: ${env.MAX_NUM_SEQS:4} - enforce_eager: ${env.ENFORCE_EAGER:False} - gpu_memory_utilization: ${env.GPU_MEMORY_UTILIZATION:0.3} + tensor_parallel_size: ${env.TENSOR_PARALLEL_SIZE:=1} + max_tokens: ${env.MAX_TOKENS:=4096} + max_model_len: ${env.MAX_MODEL_LEN:=4096} + max_num_seqs: ${env.MAX_NUM_SEQS:=4} + enforce_eager: ${env.ENFORCE_EAGER:=False} + gpu_memory_utilization: ${env.GPU_MEMORY_UTILIZATION:=0.3} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -31,7 +31,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -44,17 +44,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -62,7 +62,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -70,14 +70,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -88,17 +88,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -108,10 +108,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/vllm-gpu/vllm.py b/llama_stack/templates/vllm-gpu/vllm.py index 5775138b1..443fcd7a3 100644 --- a/llama_stack/templates/vllm-gpu/vllm.py +++ b/llama_stack/templates/vllm-gpu/vllm.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ModelInput, Provider from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, diff --git a/llama_stack/templates/watsonx/build.yaml b/llama_stack/templates/watsonx/build.yaml index e68ace183..08ee2c5ce 100644 --- a/llama_stack/templates/watsonx/build.yaml +++ b/llama_stack/templates/watsonx/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use watsonx for running LLM inference providers: diff --git a/llama_stack/templates/watsonx/run.yaml b/llama_stack/templates/watsonx/run.yaml index d60a87906..678bf72ff 100644 --- a/llama_stack/templates/watsonx/run.yaml +++ b/llama_stack/templates/watsonx/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: watsonx apis: - agents @@ -15,9 +15,9 @@ providers: - provider_id: watsonx provider_type: remote::watsonx config: - url: ${env.WATSONX_BASE_URL:https://us-south.ml.cloud.ibm.com} - api_key: ${env.WATSONX_API_KEY:} - project_id: ${env.WATSONX_PROJECT_ID:} + url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com} + api_key: ${env.WATSONX_API_KEY:+} + project_id: ${env.WATSONX_PROJECT_ID:+} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -28,7 +28,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -41,17 +41,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -59,7 +59,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -67,14 +67,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -85,17 +85,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -105,10 +105,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/inference_store.db models: - metadata: {} model_id: meta-llama/llama-3-3-70b-instruct diff --git a/llama_stack/templates/watsonx/watsonx.py b/llama_stack/templates/watsonx/watsonx.py index 802aaf8f1..7fa3a55e5 100644 --- a/llama_stack/templates/watsonx/watsonx.py +++ b/llama_stack/templates/watsonx/watsonx.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, diff --git a/pyproject.toml b/pyproject.toml index 968a3ae60..99be1a80a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,8 +22,8 @@ classifiers = [ ] dependencies = [ "aiohttp", - "fastapi>=0.115.0,<1.0", - "fire", + "fastapi>=0.115.0,<1.0", # server + "fire", # for MCP in LLS client "httpx", "huggingface-hub>=0.30.0,<1.0", "jinja2>=3.1.6", @@ -34,15 +34,18 @@ dependencies = [ "python-dotenv", "python-jose", "pydantic>=2", - "requests", "rich", - "setuptools", "starlette", "termcolor", "tiktoken", "pillow", "h11>=0.16.0", - "python-multipart>=0.0.20", + "python-multipart>=0.0.20", # For fastapi Form + "uvicorn>=0.34.0", # server + "opentelemetry-sdk", # server + "opentelemetry-exporter-otlp-proto-http", # server + "aiosqlite>=0.21.0", # server - for metadata store + "asyncpg", # for metadata store ] [project.optional-dependencies] @@ -67,7 +70,6 @@ dev = [ "types-requests", "types-setuptools", "pre-commit", - "uvicorn", "ruamel.yaml", # needed for openapi generator ] # These are the dependencies required for running unit tests. @@ -80,7 +82,6 @@ unit = [ "mcp", "chardet", "qdrant-client", - "opentelemetry-exporter-otlp-proto-http", "sqlalchemy", "sqlalchemy[asyncio]>=2.0.41", "blobfile", @@ -96,8 +97,6 @@ test = [ "aiohttp", "torch>=2.6.0", "torchvision>=0.21.0", - "opentelemetry-sdk", - "opentelemetry-exporter-otlp-proto-http", "chardet", "pypdf", "mcp", @@ -106,6 +105,7 @@ test = [ "transformers", "sqlalchemy", "sqlalchemy[asyncio]>=2.0.41", + "requests", ] docs = [ "sphinx-autobuild", @@ -122,6 +122,7 @@ docs = [ "tomli", "linkify", "sphinxcontrib.openapi", + "requests", ] codegen = ["rich", "pydantic", "jinja2>=3.1.6"] @@ -204,6 +205,9 @@ unfixable = [ "RUF001", "PLE2515", ] +"llama_stack/apis/**/__init__.py" = [ + "F403", +] # Using import * is acceptable (or at least tolerated) in an __init__.py of a package API [tool.mypy] mypy_path = ["llama_stack"] diff --git a/requirements.txt b/requirements.txt index 7e7aa38ce..2e016ef72 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,8 @@ aiohttp==3.11.13 # via llama-stack aiosignal==1.3.2 # via aiohttp +aiosqlite==0.21.0 + # via llama-stack annotated-types==0.7.0 # via pydantic anyio==4.8.0 @@ -14,6 +16,8 @@ anyio==4.8.0 # llama-stack-client # openai # starlette +asyncpg==0.30.0 + # via llama-stack attrs==25.1.0 # via # aiohttp @@ -27,11 +31,18 @@ certifi==2025.1.31 charset-normalizer==3.4.1 # via requests click==8.1.8 - # via llama-stack-client + # via + # llama-stack-client + # uvicorn colorama==0.4.6 ; sys_platform == 'win32' # via # click # tqdm +deprecated==1.2.18 + # via + # opentelemetry-api + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-semantic-conventions distro==1.9.0 # via # llama-stack-client @@ -50,10 +61,13 @@ frozenlist==1.5.0 # aiosignal fsspec==2024.12.0 # via huggingface-hub +googleapis-common-protos==1.67.0 + # via opentelemetry-exporter-otlp-proto-http h11==0.16.0 # via # httpcore # llama-stack + # uvicorn hf-xet==1.1.5 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64' # via huggingface-hub httpcore==1.0.9 @@ -71,6 +85,8 @@ idna==3.10 # httpx # requests # yarl +importlib-metadata==8.5.0 + # via opentelemetry-api jinja2==3.1.6 # via llama-stack jiter==0.8.2 @@ -95,6 +111,25 @@ numpy==2.2.3 # via pandas openai==1.71.0 # via llama-stack +opentelemetry-api==1.30.0 + # via + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-exporter-otlp-proto-common==1.30.0 + # via opentelemetry-exporter-otlp-proto-http +opentelemetry-exporter-otlp-proto-http==1.30.0 + # via llama-stack +opentelemetry-proto==1.30.0 + # via + # opentelemetry-exporter-otlp-proto-common + # opentelemetry-exporter-otlp-proto-http +opentelemetry-sdk==1.30.0 + # via + # llama-stack + # opentelemetry-exporter-otlp-proto-http +opentelemetry-semantic-conventions==0.51b0 + # via opentelemetry-sdk packaging==24.2 # via huggingface-hub pandas==2.2.3 @@ -109,6 +144,10 @@ propcache==0.3.0 # via # aiohttp # yarl +protobuf==5.29.3 + # via + # googleapis-common-protos + # opentelemetry-proto pyaml==25.1.0 # via llama-stack-client pyasn1==0.4.8 @@ -148,7 +187,7 @@ regex==2024.11.6 requests==2.32.4 # via # huggingface-hub - # llama-stack + # opentelemetry-exporter-otlp-proto-http # tiktoken rich==13.9.4 # via @@ -160,8 +199,6 @@ rpds-py==0.22.3 # referencing rsa==4.9 # via python-jose -setuptools==80.8.0 - # via llama-stack six==1.17.0 # via # ecdsa @@ -189,11 +226,13 @@ tqdm==4.67.1 # openai typing-extensions==4.12.2 # via + # aiosqlite # anyio # fastapi # huggingface-hub # llama-stack-client # openai + # opentelemetry-sdk # pydantic # pydantic-core # referencing @@ -201,7 +240,13 @@ tzdata==2025.1 # via pandas urllib3==2.3.0 # via requests +uvicorn==0.34.0 + # via llama-stack wcwidth==0.2.13 # via prompt-toolkit +wrapt==1.17.2 + # via deprecated yarl==1.18.3 # via aiohttp +zipp==3.21.0 + # via importlib-metadata diff --git a/tests/external-provider/llama-stack-provider-ollama/run.yaml b/tests/external-provider/llama-stack-provider-ollama/run.yaml index 158f6800f..60cff7503 100644 --- a/tests/external-provider/llama-stack-provider-ollama/run.yaml +++ b/tests/external-provider/llama-stack-provider-ollama/run.yaml @@ -1,71 +1,101 @@ -version: '2' +version: 2 image_name: ollama apis: +- agents +- datasetio +- eval - inference +- safety +- scoring - telemetry - tool_runtime -- datasetio - vector_io + providers: inference: - - provider_id: custom_ollama - provider_type: remote::custom_ollama + - provider_id: ollama + provider_type: remote::ollama config: - url: ${env.OLLAMA_URL:http://localhost:11434} + url: ${env.OLLAMA_URL:=http://localhost:11434} vector_io: - provider_id: faiss provider_type: inline::faiss config: - kvstore: + metadata_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + agents_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db + responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200b}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/trace_store.db + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + metadata_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db datasetio: - provider_id: huggingface provider_type: remote::huggingface config: - kvstore: + metadata_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: - kvstore: + metadata_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} + metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/tests/unit/distribution/routers/test_routing_tables.py b/tests/unit/distribution/routers/test_routing_tables.py index 9cbdc8e51..0eeb68167 100644 --- a/tests/unit/distribution/routers/test_routing_tables.py +++ b/tests/unit/distribution/routers/test_routing_tables.py @@ -13,7 +13,7 @@ import pytest from llama_stack.apis.common.type_system import NumberType from llama_stack.apis.datasets.datasets import Dataset, DatasetPurpose, URIDataSource from llama_stack.apis.datatypes import Api -from llama_stack.apis.models.models import Model, ModelType +from llama_stack.apis.models import Model, ModelType from llama_stack.apis.shields.shields import Shield from llama_stack.apis.tools import ListToolDefsResponse, ToolDef, ToolGroup, ToolParameter from llama_stack.apis.vector_dbs.vector_dbs import VectorDB diff --git a/tests/unit/providers/agents/meta_reference/fixtures/__init__.py b/tests/unit/providers/agents/meta_reference/fixtures/__init__.py index e112bb6e5..2ebcd9970 100644 --- a/tests/unit/providers/agents/meta_reference/fixtures/__init__.py +++ b/tests/unit/providers/agents/meta_reference/fixtures/__init__.py @@ -8,7 +8,7 @@ import os import yaml -from llama_stack.apis.inference.inference import ( +from llama_stack.apis.inference import ( OpenAIChatCompletion, ) diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py index a3d798083..0d1ef8eca 100644 --- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py +++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py @@ -27,8 +27,9 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseOutputMessageWebSearchToolCall, OpenAIResponseText, OpenAIResponseTextFormat, + WebSearchToolTypes, ) -from llama_stack.apis.inference.inference import ( +from llama_stack.apis.inference import ( OpenAIAssistantMessageParam, OpenAIChatCompletionContentPartTextParam, OpenAIDeveloperMessageParam, @@ -161,11 +162,6 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon input_text = "What is the capital of Ireland?" model = "meta-llama/Llama-3.1-8B-Instruct" - mock_inference_api.openai_chat_completion.side_effect = [ - fake_stream("tool_call_completion.yaml"), - fake_stream(), - ] - openai_responses_impl.tool_groups_api.get_tool.return_value = Tool( identifier="web_search", provider_id="client", @@ -182,39 +178,50 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon ) # Execute - result = await openai_responses_impl.create_openai_response( - input=input_text, - model=model, - temperature=0.1, - tools=[ - OpenAIResponseInputToolWebSearch( - name="web_search", - ) - ], - ) + for tool_name in WebSearchToolTypes: + # Reset mock states as we loop through each tool type + mock_inference_api.openai_chat_completion.side_effect = [ + fake_stream("tool_call_completion.yaml"), + fake_stream(), + ] + openai_responses_impl.tool_groups_api.get_tool.reset_mock() + openai_responses_impl.tool_runtime_api.invoke_tool.reset_mock() + openai_responses_impl.responses_store.store_response_object.reset_mock() - # Verify - first_call = mock_inference_api.openai_chat_completion.call_args_list[0] - assert first_call.kwargs["messages"][0].content == "What is the capital of Ireland?" - assert first_call.kwargs["tools"] is not None - assert first_call.kwargs["temperature"] == 0.1 + result = await openai_responses_impl.create_openai_response( + input=input_text, + model=model, + temperature=0.1, + tools=[ + OpenAIResponseInputToolWebSearch( + name=tool_name, + ) + ], + ) - second_call = mock_inference_api.openai_chat_completion.call_args_list[1] - assert second_call.kwargs["messages"][-1].content == "Dublin" - assert second_call.kwargs["temperature"] == 0.1 + # Verify + first_call = mock_inference_api.openai_chat_completion.call_args_list[0] + assert first_call.kwargs["messages"][0].content == "What is the capital of Ireland?" + assert first_call.kwargs["tools"] is not None + assert first_call.kwargs["temperature"] == 0.1 - openai_responses_impl.tool_groups_api.get_tool.assert_called_once_with("web_search") - openai_responses_impl.tool_runtime_api.invoke_tool.assert_called_once_with( - tool_name="web_search", - kwargs={"query": "What is the capital of Ireland?"}, - ) + second_call = mock_inference_api.openai_chat_completion.call_args_list[1] + assert second_call.kwargs["messages"][-1].content == "Dublin" + assert second_call.kwargs["temperature"] == 0.1 - openai_responses_impl.responses_store.store_response_object.assert_called_once() + openai_responses_impl.tool_groups_api.get_tool.assert_called_once_with("web_search") + openai_responses_impl.tool_runtime_api.invoke_tool.assert_called_once_with( + tool_name="web_search", + kwargs={"query": "What is the capital of Ireland?"}, + ) - # Check that we got the content from our mocked tool execution result - assert len(result.output) >= 1 - assert isinstance(result.output[1], OpenAIResponseMessage) - assert result.output[1].content[0].text == "Dublin" + openai_responses_impl.responses_store.store_response_object.assert_called_once() + + # Check that we got the content from our mocked tool execution result + assert len(result.output) >= 1 + assert isinstance(result.output[1], OpenAIResponseMessage) + assert result.output[1].content[0].text == "Dublin" + assert result.output[1].content[0].annotations == [] @pytest.mark.asyncio diff --git a/tests/unit/providers/nvidia/test_safety.py b/tests/unit/providers/nvidia/test_safety.py index 8c74f178b..73fc32a02 100644 --- a/tests/unit/providers/nvidia/test_safety.py +++ b/tests/unit/providers/nvidia/test_safety.py @@ -11,7 +11,7 @@ from unittest.mock import AsyncMock, MagicMock, patch import pytest -from llama_stack.apis.inference.inference import CompletionMessage, UserMessage +from llama_stack.apis.inference import CompletionMessage, UserMessage from llama_stack.apis.safety import RunShieldResponse, ViolationLevel from llama_stack.apis.shields import Shield from llama_stack.providers.remote.safety.nvidia.config import NVIDIASafetyConfig diff --git a/tests/unit/providers/utils/inference/test_openai_compat.py b/tests/unit/providers/utils/inference/test_openai_compat.py index 4c75b8a2f..3598e4810 100644 --- a/tests/unit/providers/utils/inference/test_openai_compat.py +++ b/tests/unit/providers/utils/inference/test_openai_compat.py @@ -7,7 +7,7 @@ import pytest from llama_stack.apis.common.content_types import TextContentItem -from llama_stack.apis.inference.inference import ( +from llama_stack.apis.inference import ( CompletionMessage, OpenAIAssistantMessageParam, OpenAIChatCompletionContentPartTextParam, diff --git a/tests/unit/providers/utils/test_model_registry.py b/tests/unit/providers/utils/test_model_registry.py index 67f8a138f..10fa1e075 100644 --- a/tests/unit/providers/utils/test_model_registry.py +++ b/tests/unit/providers/utils/test_model_registry.py @@ -35,7 +35,7 @@ import pytest -from llama_stack.apis.models.models import Model +from llama_stack.apis.models import Model from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry diff --git a/tests/unit/server/test_replace_env_vars.py b/tests/unit/server/test_replace_env_vars.py index 7fcbbfde9..0fb7c395e 100644 --- a/tests/unit/server/test_replace_env_vars.py +++ b/tests/unit/server/test_replace_env_vars.py @@ -26,39 +26,44 @@ class TestReplaceEnvVars(unittest.TestCase): self.assertEqual(replace_env_vars("${env.TEST_VAR}"), "test_value") def test_default_value_when_not_set(self): - self.assertEqual(replace_env_vars("${env.NOT_SET:default}"), "default") + self.assertEqual(replace_env_vars("${env.NOT_SET:=default}"), "default") def test_default_value_when_set(self): - self.assertEqual(replace_env_vars("${env.TEST_VAR:default}"), "test_value") + self.assertEqual(replace_env_vars("${env.TEST_VAR:=default}"), "test_value") def test_default_value_when_empty(self): - self.assertEqual(replace_env_vars("${env.EMPTY_VAR:default}"), "default") + self.assertEqual(replace_env_vars("${env.EMPTY_VAR:=default}"), "default") + + def test_empty_var_no_default(self): + self.assertEqual(replace_env_vars("${env.EMPTY_VAR_NO_DEFAULT:+}"), None) def test_conditional_value_when_set(self): - self.assertEqual(replace_env_vars("${env.TEST_VAR+conditional}"), "conditional") + self.assertEqual(replace_env_vars("${env.TEST_VAR:+conditional}"), "conditional") def test_conditional_value_when_not_set(self): - self.assertEqual(replace_env_vars("${env.NOT_SET+conditional}"), "") + self.assertEqual(replace_env_vars("${env.NOT_SET:+conditional}"), None) def test_conditional_value_when_empty(self): - self.assertEqual(replace_env_vars("${env.EMPTY_VAR+conditional}"), "") + self.assertEqual(replace_env_vars("${env.EMPTY_VAR:+conditional}"), None) def test_conditional_value_with_zero(self): - self.assertEqual(replace_env_vars("${env.ZERO_VAR+conditional}"), "conditional") + self.assertEqual(replace_env_vars("${env.ZERO_VAR:+conditional}"), "conditional") def test_mixed_syntax(self): - self.assertEqual(replace_env_vars("${env.TEST_VAR:default} and ${env.NOT_SET+conditional}"), "test_value and ") self.assertEqual( - replace_env_vars("${env.NOT_SET:default} and ${env.TEST_VAR+conditional}"), "default and conditional" + replace_env_vars("${env.TEST_VAR:=default} and ${env.NOT_SET:+conditional}"), "test_value and " + ) + self.assertEqual( + replace_env_vars("${env.NOT_SET:=default} and ${env.TEST_VAR:+conditional}"), "default and conditional" ) def test_nested_structures(self): data = { - "key1": "${env.TEST_VAR:default}", - "key2": ["${env.NOT_SET:default}", "${env.TEST_VAR+conditional}"], - "key3": {"nested": "${env.NOT_SET+conditional}"}, + "key1": "${env.TEST_VAR:=default}", + "key2": ["${env.NOT_SET:=default}", "${env.TEST_VAR:+conditional}"], + "key3": {"nested": "${env.NOT_SET:+conditional}"}, } - expected = {"key1": "test_value", "key2": ["default", "conditional"], "key3": {"nested": ""}} + expected = {"key1": "test_value", "key2": ["default", "conditional"], "key3": {"nested": None}} self.assertEqual(replace_env_vars(data), expected) diff --git a/uv.lock b/uv.lock index 42eece4e1..31e296642 100644 --- a/uv.lock +++ b/uv.lock @@ -158,6 +158,38 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/25/8a/c46dcc25341b5bce5472c718902eb3d38600a903b14fa6aeecef3f21a46f/asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2", size = 26918, upload-time = "2024-11-30T04:30:10.946Z" }, ] +[[package]] +name = "asyncpg" +version = "0.30.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2f/4c/7c991e080e106d854809030d8584e15b2e996e26f16aee6d757e387bc17d/asyncpg-0.30.0.tar.gz", hash = "sha256:c551e9928ab6707602f44811817f82ba3c446e018bfe1d3abecc8ba5f3eac851", size = 957746, upload-time = "2024-10-20T00:30:41.127Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4c/0e/f5d708add0d0b97446c402db7e8dd4c4183c13edaabe8a8500b411e7b495/asyncpg-0.30.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5e0511ad3dec5f6b4f7a9e063591d407eee66b88c14e2ea636f187da1dcfff6a", size = 674506, upload-time = "2024-10-20T00:29:27.988Z" }, + { url = "https://files.pythonhosted.org/packages/6a/a0/67ec9a75cb24a1d99f97b8437c8d56da40e6f6bd23b04e2f4ea5d5ad82ac/asyncpg-0.30.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:915aeb9f79316b43c3207363af12d0e6fd10776641a7de8a01212afd95bdf0ed", size = 645922, upload-time = "2024-10-20T00:29:29.391Z" }, + { url = "https://files.pythonhosted.org/packages/5c/d9/a7584f24174bd86ff1053b14bb841f9e714380c672f61c906eb01d8ec433/asyncpg-0.30.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c198a00cce9506fcd0bf219a799f38ac7a237745e1d27f0e1f66d3707c84a5a", size = 3079565, upload-time = "2024-10-20T00:29:30.832Z" }, + { url = "https://files.pythonhosted.org/packages/a0/d7/a4c0f9660e333114bdb04d1a9ac70db690dd4ae003f34f691139a5cbdae3/asyncpg-0.30.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3326e6d7381799e9735ca2ec9fd7be4d5fef5dcbc3cb555d8a463d8460607956", size = 3109962, upload-time = "2024-10-20T00:29:33.114Z" }, + { url = "https://files.pythonhosted.org/packages/3c/21/199fd16b5a981b1575923cbb5d9cf916fdc936b377e0423099f209e7e73d/asyncpg-0.30.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:51da377487e249e35bd0859661f6ee2b81db11ad1f4fc036194bc9cb2ead5056", size = 3064791, upload-time = "2024-10-20T00:29:34.677Z" }, + { url = "https://files.pythonhosted.org/packages/77/52/0004809b3427534a0c9139c08c87b515f1c77a8376a50ae29f001e53962f/asyncpg-0.30.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bc6d84136f9c4d24d358f3b02be4b6ba358abd09f80737d1ac7c444f36108454", size = 3188696, upload-time = "2024-10-20T00:29:36.389Z" }, + { url = "https://files.pythonhosted.org/packages/52/cb/fbad941cd466117be58b774a3f1cc9ecc659af625f028b163b1e646a55fe/asyncpg-0.30.0-cp311-cp311-win32.whl", hash = "sha256:574156480df14f64c2d76450a3f3aaaf26105869cad3865041156b38459e935d", size = 567358, upload-time = "2024-10-20T00:29:37.915Z" }, + { url = "https://files.pythonhosted.org/packages/3c/0a/0a32307cf166d50e1ad120d9b81a33a948a1a5463ebfa5a96cc5606c0863/asyncpg-0.30.0-cp311-cp311-win_amd64.whl", hash = "sha256:3356637f0bd830407b5597317b3cb3571387ae52ddc3bca6233682be88bbbc1f", size = 629375, upload-time = "2024-10-20T00:29:39.987Z" }, + { url = "https://files.pythonhosted.org/packages/4b/64/9d3e887bb7b01535fdbc45fbd5f0a8447539833b97ee69ecdbb7a79d0cb4/asyncpg-0.30.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c902a60b52e506d38d7e80e0dd5399f657220f24635fee368117b8b5fce1142e", size = 673162, upload-time = "2024-10-20T00:29:41.88Z" }, + { url = "https://files.pythonhosted.org/packages/6e/eb/8b236663f06984f212a087b3e849731f917ab80f84450e943900e8ca4052/asyncpg-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aca1548e43bbb9f0f627a04666fedaca23db0a31a84136ad1f868cb15deb6e3a", size = 637025, upload-time = "2024-10-20T00:29:43.352Z" }, + { url = "https://files.pythonhosted.org/packages/cc/57/2dc240bb263d58786cfaa60920779af6e8d32da63ab9ffc09f8312bd7a14/asyncpg-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c2a2ef565400234a633da0eafdce27e843836256d40705d83ab7ec42074efb3", size = 3496243, upload-time = "2024-10-20T00:29:44.922Z" }, + { url = "https://files.pythonhosted.org/packages/f4/40/0ae9d061d278b10713ea9021ef6b703ec44698fe32178715a501ac696c6b/asyncpg-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1292b84ee06ac8a2ad8e51c7475aa309245874b61333d97411aab835c4a2f737", size = 3575059, upload-time = "2024-10-20T00:29:46.891Z" }, + { url = "https://files.pythonhosted.org/packages/c3/75/d6b895a35a2c6506952247640178e5f768eeb28b2e20299b6a6f1d743ba0/asyncpg-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0f5712350388d0cd0615caec629ad53c81e506b1abaaf8d14c93f54b35e3595a", size = 3473596, upload-time = "2024-10-20T00:29:49.201Z" }, + { url = "https://files.pythonhosted.org/packages/c8/e7/3693392d3e168ab0aebb2d361431375bd22ffc7b4a586a0fc060d519fae7/asyncpg-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:db9891e2d76e6f425746c5d2da01921e9a16b5a71a1c905b13f30e12a257c4af", size = 3641632, upload-time = "2024-10-20T00:29:50.768Z" }, + { url = "https://files.pythonhosted.org/packages/32/ea/15670cea95745bba3f0352341db55f506a820b21c619ee66b7d12ea7867d/asyncpg-0.30.0-cp312-cp312-win32.whl", hash = "sha256:68d71a1be3d83d0570049cd1654a9bdfe506e794ecc98ad0873304a9f35e411e", size = 560186, upload-time = "2024-10-20T00:29:52.394Z" }, + { url = "https://files.pythonhosted.org/packages/7e/6b/fe1fad5cee79ca5f5c27aed7bd95baee529c1bf8a387435c8ba4fe53d5c1/asyncpg-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:9a0292c6af5c500523949155ec17b7fe01a00ace33b68a476d6b5059f9630305", size = 621064, upload-time = "2024-10-20T00:29:53.757Z" }, + { url = "https://files.pythonhosted.org/packages/3a/22/e20602e1218dc07692acf70d5b902be820168d6282e69ef0d3cb920dc36f/asyncpg-0.30.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:05b185ebb8083c8568ea8a40e896d5f7af4b8554b64d7719c0eaa1eb5a5c3a70", size = 670373, upload-time = "2024-10-20T00:29:55.165Z" }, + { url = "https://files.pythonhosted.org/packages/3d/b3/0cf269a9d647852a95c06eb00b815d0b95a4eb4b55aa2d6ba680971733b9/asyncpg-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c47806b1a8cbb0a0db896f4cd34d89942effe353a5035c62734ab13b9f938da3", size = 634745, upload-time = "2024-10-20T00:29:57.14Z" }, + { url = "https://files.pythonhosted.org/packages/8e/6d/a4f31bf358ce8491d2a31bfe0d7bcf25269e80481e49de4d8616c4295a34/asyncpg-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b6fde867a74e8c76c71e2f64f80c64c0f3163e687f1763cfaf21633ec24ec33", size = 3512103, upload-time = "2024-10-20T00:29:58.499Z" }, + { url = "https://files.pythonhosted.org/packages/96/19/139227a6e67f407b9c386cb594d9628c6c78c9024f26df87c912fabd4368/asyncpg-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46973045b567972128a27d40001124fbc821c87a6cade040cfcd4fa8a30bcdc4", size = 3592471, upload-time = "2024-10-20T00:30:00.354Z" }, + { url = "https://files.pythonhosted.org/packages/67/e4/ab3ca38f628f53f0fd28d3ff20edff1c975dd1cb22482e0061916b4b9a74/asyncpg-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9110df111cabc2ed81aad2f35394a00cadf4f2e0635603db6ebbd0fc896f46a4", size = 3496253, upload-time = "2024-10-20T00:30:02.794Z" }, + { url = "https://files.pythonhosted.org/packages/ef/5f/0bf65511d4eeac3a1f41c54034a492515a707c6edbc642174ae79034d3ba/asyncpg-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:04ff0785ae7eed6cc138e73fc67b8e51d54ee7a3ce9b63666ce55a0bf095f7ba", size = 3662720, upload-time = "2024-10-20T00:30:04.501Z" }, + { url = "https://files.pythonhosted.org/packages/e7/31/1513d5a6412b98052c3ed9158d783b1e09d0910f51fbe0e05f56cc370bc4/asyncpg-0.30.0-cp313-cp313-win32.whl", hash = "sha256:ae374585f51c2b444510cdf3595b97ece4f233fde739aa14b50e0d64e8a7a590", size = 560404, upload-time = "2024-10-20T00:30:06.537Z" }, + { url = "https://files.pythonhosted.org/packages/c8/a4/cec76b3389c4c5ff66301cd100fe88c318563ec8a520e0b2e792b5b84972/asyncpg-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:f59b430b8e27557c3fb9869222559f7417ced18688375825f8f12302c34e915e", size = 621623, upload-time = "2024-10-20T00:30:09.024Z" }, +] + [[package]] name = "attrs" version = "25.1.0" @@ -1187,6 +1219,8 @@ version = "0.2.12" source = { editable = "." } dependencies = [ { name = "aiohttp" }, + { name = "aiosqlite" }, + { name = "asyncpg" }, { name = "fastapi" }, { name = "fire" }, { name = "h11" }, @@ -1196,18 +1230,19 @@ dependencies = [ { name = "jsonschema" }, { name = "llama-stack-client" }, { name = "openai" }, + { name = "opentelemetry-exporter-otlp-proto-http" }, + { name = "opentelemetry-sdk" }, { name = "pillow" }, { name = "prompt-toolkit" }, { name = "pydantic" }, { name = "python-dotenv" }, { name = "python-jose" }, { name = "python-multipart" }, - { name = "requests" }, { name = "rich" }, - { name = "setuptools" }, { name = "starlette" }, { name = "termcolor" }, { name = "tiktoken" }, + { name = "uvicorn" }, ] [package.optional-dependencies] @@ -1238,11 +1273,11 @@ dev = [ { name = "ruff" }, { name = "types-requests" }, { name = "types-setuptools" }, - { name = "uvicorn" }, ] docs = [ { name = "linkify" }, { name = "myst-parser" }, + { name = "requests" }, { name = "sphinx" }, { name = "sphinx-autobuild" }, { name = "sphinx-copybutton" }, @@ -1264,9 +1299,8 @@ test = [ { name = "datasets" }, { name = "mcp" }, { name = "openai" }, - { name = "opentelemetry-exporter-otlp-proto-http" }, - { name = "opentelemetry-sdk" }, { name = "pypdf" }, + { name = "requests" }, { name = "sqlalchemy", extra = ["asyncio"] }, { name = "torch", version = "2.6.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, { name = "torch", version = "2.6.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" }, @@ -1282,7 +1316,6 @@ unit = [ { name = "faiss-cpu" }, { name = "mcp" }, { name = "openai" }, - { name = "opentelemetry-exporter-otlp-proto-http" }, { name = "pypdf" }, { name = "qdrant-client" }, { name = "sqlalchemy", extra = ["asyncio"] }, @@ -1292,6 +1325,8 @@ unit = [ [package.metadata] requires-dist = [ { name = "aiohttp" }, + { name = "aiosqlite", specifier = ">=0.21.0" }, + { name = "asyncpg" }, { name = "fastapi", specifier = ">=0.115.0,<1.0" }, { name = "fire" }, { name = "h11", specifier = ">=0.16.0" }, @@ -1302,6 +1337,8 @@ requires-dist = [ { name = "llama-stack-client", specifier = ">=0.2.12" }, { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.12" }, { name = "openai", specifier = ">=1.66" }, + { name = "opentelemetry-exporter-otlp-proto-http" }, + { name = "opentelemetry-sdk" }, { name = "pandas", marker = "extra == 'ui'" }, { name = "pillow" }, { name = "prompt-toolkit" }, @@ -1309,14 +1346,13 @@ requires-dist = [ { name = "python-dotenv" }, { name = "python-jose" }, { name = "python-multipart", specifier = ">=0.0.20" }, - { name = "requests" }, { name = "rich" }, - { name = "setuptools" }, { name = "starlette" }, { name = "streamlit", marker = "extra == 'ui'" }, { name = "streamlit-option-menu", marker = "extra == 'ui'" }, { name = "termcolor" }, { name = "tiktoken" }, + { name = "uvicorn", specifier = ">=0.34.0" }, ] provides-extras = ["ui"] @@ -1340,11 +1376,11 @@ dev = [ { name = "ruff" }, { name = "types-requests" }, { name = "types-setuptools" }, - { name = "uvicorn" }, ] docs = [ { name = "linkify" }, { name = "myst-parser" }, + { name = "requests" }, { name = "sphinx" }, { name = "sphinx-autobuild" }, { name = "sphinx-copybutton" }, @@ -1366,9 +1402,8 @@ test = [ { name = "datasets" }, { name = "mcp" }, { name = "openai" }, - { name = "opentelemetry-exporter-otlp-proto-http" }, - { name = "opentelemetry-sdk" }, { name = "pypdf" }, + { name = "requests" }, { name = "sqlalchemy" }, { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.41" }, { name = "torch", specifier = ">=2.6.0", index = "https://download.pytorch.org/whl/cpu" }, @@ -1383,7 +1418,6 @@ unit = [ { name = "faiss-cpu" }, { name = "mcp" }, { name = "openai" }, - { name = "opentelemetry-exporter-otlp-proto-http" }, { name = "pypdf" }, { name = "qdrant-client" }, { name = "sqlalchemy" },