mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-28 10:54:19 +00:00
Merge branch 'main' into vest_search
This commit is contained in:
commit
7893641de1
5 changed files with 42 additions and 6 deletions
10
.github/workflows/test-external-providers.yml
vendored
10
.github/workflows/test-external-providers.yml
vendored
|
@ -45,20 +45,22 @@ jobs:
|
||||||
|
|
||||||
- name: Build distro from config file
|
- name: Build distro from config file
|
||||||
run: |
|
run: |
|
||||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml
|
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. llama stack build --config tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml
|
||||||
|
|
||||||
- name: Start Llama Stack server in background
|
- name: Start Llama Stack server in background
|
||||||
if: ${{ matrix.image-type }} == 'venv'
|
if: ${{ matrix.image-type }} == 'venv'
|
||||||
env:
|
env:
|
||||||
INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
|
INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
|
||||||
run: |
|
run: |
|
||||||
uv run pip list
|
# Use the virtual environment created by the build step (name comes from build config)
|
||||||
nohup uv run --active llama stack run tests/external-provider/llama-stack-provider-ollama/run.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 &
|
source ci-test/bin/activate
|
||||||
|
uv pip list
|
||||||
|
nohup llama stack run tests/external-provider/llama-stack-provider-ollama/run.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 &
|
||||||
|
|
||||||
- name: Wait for Llama Stack server to be ready
|
- name: Wait for Llama Stack server to be ready
|
||||||
run: |
|
run: |
|
||||||
for i in {1..30}; do
|
for i in {1..30}; do
|
||||||
if ! grep -q "remote::custom_ollama from /home/runner/.llama/providers.d/remote/inference/custom_ollama.yaml" server.log; then
|
if ! grep -q "Successfully loaded external provider remote::custom_ollama" server.log; then
|
||||||
echo "Waiting for Llama Stack server to load the provider..."
|
echo "Waiting for Llama Stack server to load the provider..."
|
||||||
sleep 1
|
sleep 1
|
||||||
else
|
else
|
||||||
|
|
|
@ -180,6 +180,7 @@ def get_provider_registry(
|
||||||
if provider_type_key in ret[api]:
|
if provider_type_key in ret[api]:
|
||||||
logger.warning(f"Overriding already registered provider {provider_type_key} for {api.name}")
|
logger.warning(f"Overriding already registered provider {provider_type_key} for {api.name}")
|
||||||
ret[api][provider_type_key] = spec
|
ret[api][provider_type_key] = spec
|
||||||
|
logger.info(f"Successfully loaded external provider {provider_type_key}")
|
||||||
except yaml.YAMLError as yaml_err:
|
except yaml.YAMLError as yaml_err:
|
||||||
logger.error(f"Failed to parse YAML file {spec_path}: {yaml_err}")
|
logger.error(f"Failed to parse YAML file {spec_path}: {yaml_err}")
|
||||||
raise yaml_err
|
raise yaml_err
|
||||||
|
|
|
@ -115,7 +115,7 @@ def parse_environment_config(env_config: str) -> dict[str, int]:
|
||||||
|
|
||||||
class CustomRichHandler(RichHandler):
|
class CustomRichHandler(RichHandler):
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
kwargs["console"] = Console(width=120)
|
kwargs["console"] = Console(width=150)
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
def emit(self, record):
|
def emit(self, record):
|
||||||
|
|
|
@ -76,7 +76,7 @@ class WeaviateIndex(EmbeddingIndex):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
chunks.append(chunk)
|
chunks.append(chunk)
|
||||||
scores.append(1.0 / doc.metadata.distance)
|
scores.append(1.0 / doc.metadata.distance if doc.metadata.distance != 0 else float("inf"))
|
||||||
|
|
||||||
return QueryChunksResponse(chunks=chunks, scores=scores)
|
return QueryChunksResponse(chunks=chunks, scores=scores)
|
||||||
|
|
||||||
|
|
|
@ -154,3 +154,36 @@ def test_insert_chunks_with_precomputed_embeddings(client_with_empty_registry, e
|
||||||
assert len(response.chunks) > 0
|
assert len(response.chunks) > 0
|
||||||
assert response.chunks[0].metadata["document_id"] == "doc1"
|
assert response.chunks[0].metadata["document_id"] == "doc1"
|
||||||
assert response.chunks[0].metadata["source"] == "precomputed"
|
assert response.chunks[0].metadata["source"] == "precomputed"
|
||||||
|
|
||||||
|
|
||||||
|
def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(client_with_empty_registry, embedding_model_id):
|
||||||
|
vector_db_id = "test_precomputed_embeddings_db"
|
||||||
|
client_with_empty_registry.vector_dbs.register(
|
||||||
|
vector_db_id=vector_db_id,
|
||||||
|
embedding_model=embedding_model_id,
|
||||||
|
embedding_dimension=384,
|
||||||
|
)
|
||||||
|
|
||||||
|
chunks_with_embeddings = [
|
||||||
|
Chunk(
|
||||||
|
content="duplicate",
|
||||||
|
metadata={"document_id": "doc1", "source": "precomputed"},
|
||||||
|
embedding=[0.1] * 384,
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
client_with_empty_registry.vector_io.insert(
|
||||||
|
vector_db_id=vector_db_id,
|
||||||
|
chunks=chunks_with_embeddings,
|
||||||
|
)
|
||||||
|
|
||||||
|
response = client_with_empty_registry.vector_io.query(
|
||||||
|
vector_db_id=vector_db_id,
|
||||||
|
query="duplicate",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify the top result is the expected document
|
||||||
|
assert response is not None
|
||||||
|
assert len(response.chunks) > 0
|
||||||
|
assert response.chunks[0].metadata["document_id"] == "doc1"
|
||||||
|
assert response.chunks[0].metadata["source"] == "precomputed"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue