Merge branch 'main' into restructure_scoring

This commit is contained in:
Xi Yan 2024-11-12 09:59:35 -05:00 committed by GitHub
commit 86fdad3be3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
26 changed files with 89 additions and 80 deletions

View file

@ -23,7 +23,7 @@ providers:
region_name: <AWS_REGION>
memory:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config: {}
safety:
- provider_id: bedrock0
@ -35,12 +35,12 @@ providers:
region_name: <AWS_REGION>
agents:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
db_path: ~/.llama/runtime/kvstore.db
telemetry:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config: {}

View file

@ -29,11 +29,11 @@ providers:
model: Prompt-Guard-86M
memory:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config: {}
agents:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
namespace: null
@ -41,5 +41,5 @@ providers:
db_path: ~/.llama/runtime/kvstore.db
telemetry:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config: {}

View file

@ -31,7 +31,7 @@ providers:
model: Prompt-Guard-86M
memory:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config: {}
# Uncomment to use weaviate memory provider
# - provider_id: weaviate0
@ -39,7 +39,7 @@ providers:
# config: {}
agents:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
namespace: null
@ -47,5 +47,5 @@ providers:
db_path: ~/.llama/runtime/kvstore.db
telemetry:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config: {}

View file

@ -42,7 +42,7 @@ providers:
# model: Prompt-Guard-86M
memory:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config: {}
# Uncomment to use pgvector
# - provider_id: pgvector
@ -55,7 +55,7 @@ providers:
# password: mysecretpassword
agents:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
namespace: null
@ -63,5 +63,5 @@ providers:
db_path: ~/.llama/runtime/agents_store.db
telemetry:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config: {}

View file

@ -14,7 +14,7 @@ apis:
providers:
inference:
- provider_id: inference0
provider_type: meta-reference
provider_type: inline::meta-reference
config:
model: Llama3.2-3B-Instruct
quantization: null
@ -22,7 +22,7 @@ providers:
max_seq_len: 4096
max_batch_size: 1
- provider_id: inference1
provider_type: meta-reference
provider_type: inline::meta-reference
config:
model: Llama-Guard-3-1B
quantization: null
@ -44,7 +44,7 @@ providers:
# model: Prompt-Guard-86M
memory:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config: {}
# Uncomment to use pgvector
# - provider_id: pgvector
@ -57,7 +57,7 @@ providers:
# password: mysecretpassword
agents:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
namespace: null
@ -65,5 +65,5 @@ providers:
db_path: ~/.llama/runtime/agents_store.db
telemetry:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config: {}

View file

@ -14,7 +14,7 @@ apis:
providers:
inference:
- provider_id: meta0
provider_type: meta-reference-quantized
provider_type: inline::meta-reference-quantized
config:
model: Llama3.2-3B-Instruct:int4-qlora-eo8
quantization:
@ -23,7 +23,7 @@ providers:
max_seq_len: 2048
max_batch_size: 1
- provider_id: meta1
provider_type: meta-reference-quantized
provider_type: inline::meta-reference-quantized
config:
# not a quantized model !
model: Llama-Guard-3-1B
@ -43,11 +43,11 @@ providers:
model: Prompt-Guard-86M
memory:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config: {}
agents:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
namespace: null
@ -55,5 +55,5 @@ providers:
db_path: ~/.llama/runtime/kvstore.db
telemetry:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config: {}

View file

@ -29,11 +29,11 @@ providers:
model: Prompt-Guard-86M
memory:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config: {}
agents:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
namespace: null
@ -41,5 +41,5 @@ providers:
db_path: ~/.llama/runtime/kvstore.db
telemetry:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config: {}

View file

@ -29,11 +29,11 @@ providers:
model: Prompt-Guard-86M
memory:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config: {}
agents:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
namespace: null
@ -41,5 +41,5 @@ providers:
db_path: ~/.llama/runtime/kvstore.db
telemetry:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config: {}

View file

@ -29,11 +29,11 @@ providers:
model: Prompt-Guard-86M
memory:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config: {}
agents:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
namespace: null
@ -41,5 +41,5 @@ providers:
db_path: ~/.llama/runtime/kvstore.db
telemetry:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config: {}

View file

@ -29,11 +29,11 @@ providers:
model: Prompt-Guard-86M
memory:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config: {}
agents:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
namespace: null
@ -41,5 +41,5 @@ providers:
db_path: ~/.llama/runtime/kvstore.db
telemetry:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config: {}

View file

@ -34,7 +34,7 @@ providers:
config: {}
agents:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
namespace: null
@ -42,5 +42,5 @@ providers:
db_path: ~/.llama/runtime/kvstore.db
telemetry:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config: {}

View file

@ -35,14 +35,14 @@ the provider types (implementations) you want to use for these APIs.
Tip: use <TAB> to see options for the providers.
> Enter provider for API inference: meta-reference
> Enter provider for API inference: inline::meta-reference
> Enter provider for API safety: inline::llama-guard
> Enter provider for API agents: meta-reference
> Enter provider for API agents: inline::meta-reference
> Enter provider for API memory: inline::faiss
> Enter provider for API datasetio: meta-reference
> Enter provider for API scoring: meta-reference
> Enter provider for API eval: meta-reference
> Enter provider for API telemetry: meta-reference
> Enter provider for API datasetio: inline::meta-reference
> Enter provider for API scoring: inline::meta-reference
> Enter provider for API eval: inline::meta-reference
> Enter provider for API telemetry: inline::meta-reference
> (Optional) Enter a short description for your Llama Stack:

View file

@ -59,7 +59,7 @@ You may change the `config.model` in `run.yaml` to update the model currently be
```
inference:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config:
model: Llama3.2-11B-Vision-Instruct
quantization: null

View file

@ -400,7 +400,7 @@ You may change the `config.model` in `run.yaml` to update the model currently be
```
inference:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::meta-reference
config:
model: Llama3.2-11B-Vision-Instruct
quantization: null

View file

@ -67,7 +67,7 @@
"providers:\n",
" inference:\n",
" - provider_id: meta-reference\n",
" provider_type: meta-reference\n",
" provider_type: inline::meta-reference\n",
" config:\n",
" model: Llama3.1-8B-Instruct\n",
" torch_seed: 42\n",
@ -77,7 +77,7 @@
" checkpoint_dir: null\n",
" safety:\n",
" - provider_id: meta-reference\n",
" provider_type: meta-reference\n",
" provider_type: inline::meta-reference\n",
" config:\n",
" llama_guard_shield:\n",
" model: Llama-Guard-3-1B\n",
@ -94,7 +94,7 @@
"```bash\n",
"inference:\n",
" - provider_id: meta-reference\n",
" provider_type: meta-reference\n",
" provider_type: inline::meta-reference\n",
" config:\n",
" model: Llama3.1-8B-Instruct\n",
" torch_seed: null\n",
@ -103,7 +103,7 @@
" create_distributed_process_group: true\n",
" checkpoint_dir: null\n",
" - provider_id: meta1\n",
" provider_type: meta-reference\n",
" provider_type: inline::meta-reference\n",
" config:\n",
" model: Llama-Guard-3-1B\n",
" torch_seed: null\n",

View file

@ -25,11 +25,11 @@ def up_to_date_config():
providers:
inference:
- provider_id: provider1
provider_type: meta-reference
provider_type: inline::meta-reference
config: {{}}
safety:
- provider_id: provider1
provider_type: meta-reference
provider_type: inline::meta-reference
config:
llama_guard_shield:
model: Llama-Guard-3-1B
@ -39,7 +39,7 @@ def up_to_date_config():
enable_prompt_guard: false
memory:
- provider_id: provider1
provider_type: meta-reference
provider_type: inline::meta-reference
config: {{}}
""".format(
version=LLAMA_STACK_RUN_CONFIG_VERSION, built_at=datetime.now().isoformat()
@ -61,13 +61,13 @@ def old_config():
host: localhost
port: 11434
routing_key: Llama3.2-1B-Instruct
- provider_type: meta-reference
- provider_type: inline::meta-reference
config:
model: Llama3.1-8B-Instruct
routing_key: Llama3.1-8B-Instruct
safety:
- routing_key: ["shield1", "shield2"]
provider_type: meta-reference
provider_type: inline::meta-reference
config:
llama_guard_shield:
model: Llama-Guard-3-1B
@ -77,7 +77,7 @@ def old_config():
enable_prompt_guard: false
memory:
- routing_key: vector
provider_type: meta-reference
provider_type: inline::meta-reference
config: {{}}
api_providers:
telemetry:

View file

@ -5,7 +5,7 @@
# the root directory of this source tree.
from enum import Enum
from typing import List, Optional
from typing import List
import pkg_resources
from pydantic import BaseModel
@ -38,11 +38,6 @@ class ImageType(Enum):
conda = "conda"
class Dependencies(BaseModel):
pip_packages: List[str]
docker_image: Optional[str] = None
class ApiInput(BaseModel):
api: Api
provider: str
@ -103,17 +98,12 @@ def print_pip_install_help(providers: Dict[str, List[Provider]]):
def build_image(build_config: BuildConfig, build_file_path: Path):
package_deps = Dependencies(
docker_image=build_config.distribution_spec.docker_image or "python:3.10-slim",
pip_packages=SERVER_DEPENDENCIES,
)
docker_image = build_config.distribution_spec.docker_image or "python:3.10-slim"
# extend package dependencies based on providers spec
normal_deps, special_deps = get_provider_dependencies(
build_config.distribution_spec.providers
)
package_deps.pip_packages.extend(normal_deps)
package_deps.pip_packages.extend(special_deps)
normal_deps += SERVER_DEPENDENCIES
if build_config.image_type == ImageType.docker.value:
script = pkg_resources.resource_filename(
@ -122,7 +112,7 @@ def build_image(build_config: BuildConfig, build_file_path: Path):
args = [
script,
build_config.name,
package_deps.docker_image,
docker_image,
str(build_file_path),
str(BUILDS_BASE_DIR / ImageType.docker.value),
" ".join(normal_deps),

View file

@ -147,8 +147,19 @@ else
image_name="$image_name-$(curl -s $URL | jq -r '.info.version')"
fi
# Detect platform architecture
ARCH=$(uname -m)
if [ "$ARCH" = "arm64" ] || [ "$ARCH" = "aarch64" ]; then
PLATFORM="--platform linux/arm64"
elif [ "$ARCH" = "x86_64" ]; then
PLATFORM="--platform linux/amd64"
else
echo "Unsupported architecture: $ARCH"
exit 1
fi
set -x
$DOCKER_BINARY build $DOCKER_OPTS -t $image_name -f "$TEMP_DIR/Dockerfile" "$REPO_DIR" $mounts
$DOCKER_BINARY build $DOCKER_OPTS $PLATFORM -t $image_name -f "$TEMP_DIR/Dockerfile" "$REPO_DIR" $mounts
# clean up tmp/configs
set +x

View file

@ -10,6 +10,8 @@ DOCKER_BINARY=${DOCKER_BINARY:-docker}
DOCKER_OPTS=${DOCKER_OPTS:-}
LLAMA_CHECKPOINT_DIR=${LLAMA_CHECKPOINT_DIR:-}
LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-}
TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-}
PYPI_VERSION=${PYPI_VERSION:-}
set -euo pipefail
@ -54,6 +56,12 @@ if [ -n "$LLAMA_CHECKPOINT_DIR" ]; then
DOCKER_OPTS="$DOCKER_OPTS --gpus=all"
fi
if [ -n "$PYPI_VERSION" ]; then
docker_image="$docker_image-$PYPI_VERSION"
elif [ -n "$TEST_PYPI_VERSION" ]; then
docker_image="$docker_image-test-$TEST_PYPI_VERSION"
fi
$DOCKER_BINARY run $DOCKER_OPTS -it \
-p $port:$port \
-v "$yaml_config:/app/config.yaml" \

View file

@ -14,7 +14,7 @@ def available_providers() -> List[ProviderSpec]:
return [
InlineProviderSpec(
api=Api.agents,
provider_type="meta-reference",
provider_type="inline::meta-reference",
pip_packages=[
"matplotlib",
"pillow",

View file

@ -13,7 +13,7 @@ def available_providers() -> List[ProviderSpec]:
return [
InlineProviderSpec(
api=Api.eval,
provider_type="meta-reference",
provider_type="inline::meta-reference",
pip_packages=[],
module="llama_stack.providers.inline.eval.meta_reference",
config_class="llama_stack.providers.inline.eval.meta_reference.MetaReferenceEvalConfig",

View file

@ -25,14 +25,14 @@ def available_providers() -> List[ProviderSpec]:
return [
InlineProviderSpec(
api=Api.inference,
provider_type="meta-reference",
provider_type="inline::meta-reference",
pip_packages=META_REFERENCE_DEPS,
module="llama_stack.providers.inline.inference.meta_reference",
config_class="llama_stack.providers.inline.inference.meta_reference.MetaReferenceInferenceConfig",
),
InlineProviderSpec(
api=Api.inference,
provider_type="meta-reference-quantized",
provider_type="inline::meta-reference-quantized",
pip_packages=(
META_REFERENCE_DEPS
+ [

View file

@ -34,7 +34,7 @@ def available_providers() -> List[ProviderSpec]:
return [
InlineProviderSpec(
api=Api.memory,
provider_type="meta-reference",
provider_type="inline::meta-reference",
pip_packages=EMBEDDING_DEPS + ["faiss-cpu"],
module="llama_stack.providers.inline.memory.faiss",
config_class="llama_stack.providers.inline.memory.faiss.FaissImplConfig",

View file

@ -19,7 +19,7 @@ def available_providers() -> List[ProviderSpec]:
return [
InlineProviderSpec(
api=Api.safety,
provider_type="meta-reference",
provider_type="inline::meta-reference",
pip_packages=[
"transformers",
"torch --index-url https://download.pytorch.org/whl/cpu",
@ -30,7 +30,7 @@ def available_providers() -> List[ProviderSpec]:
Api.inference,
],
deprecation_error="""
Provider `meta-reference` for API `safety` does not work with the latest Llama Stack.
Provider `inline::meta-reference` for API `safety` does not work with the latest Llama Stack.
- if you are using Llama Guard v3, please use the `inline::llama-guard` provider instead.
- if you are using Prompt Guard, please use the `inline::prompt-guard` provider instead.

View file

@ -13,7 +13,7 @@ def available_providers() -> List[ProviderSpec]:
return [
InlineProviderSpec(
api=Api.scoring,
provider_type="basic",
provider_type="inline::basic",
pip_packages=[],
module="llama_stack.providers.inline.scoring.basic",
config_class="llama_stack.providers.inline.scoring.basic.BasicScoringConfig",
@ -24,7 +24,7 @@ def available_providers() -> List[ProviderSpec]:
),
InlineProviderSpec(
api=Api.scoring,
provider_type="llm-as-judge",
provider_type="inline::llm-as-judge",
pip_packages=[],
module="llama_stack.providers.inline.scoring.llm_as_judge",
config_class="llama_stack.providers.inline.scoring.llm_as_judge.LlmAsJudgeScoringConfig",
@ -36,7 +36,7 @@ def available_providers() -> List[ProviderSpec]:
),
InlineProviderSpec(
api=Api.scoring,
provider_type="braintrust",
provider_type="inline::braintrust",
pip_packages=["autoevals", "openai"],
module="llama_stack.providers.inline.scoring.braintrust",
config_class="llama_stack.providers.inline.scoring.braintrust.BraintrustScoringConfig",

View file

@ -13,7 +13,7 @@ def available_providers() -> List[ProviderSpec]:
return [
InlineProviderSpec(
api=Api.telemetry,
provider_type="meta-reference",
provider_type="inline::meta-reference",
pip_packages=[],
module="llama_stack.providers.inline.meta_reference.telemetry",
config_class="llama_stack.providers.inline.meta_reference.telemetry.ConsoleConfig",