[llama stack ui] add native eval & inspect distro & playground pages (#541)

# What does this PR do? New Pages Added: - (1) Inspect Distro - (2) Evaluations: - (a) native evaluations (including generation) - (b) application evaluations (no generation, scoring only) - (3) Playground: - (a) chat - (b) RAG ## Test Plan ``` streamlit run app.py ``` #### Playground https://github.com/user-attachments/assets/6ca617e8-32ca-49b2-9774-185020ff5204 #### Inspect https://github.com/user-attachments/assets/01d52b2d-92af-4e3a-b623-a9b8ba22ba99 #### Evaluations (Generation + Scoring) https://github.com/user-attachments/assets/345845c7-2a2b-4095-960a-9ae40f6a93cf #### Evaluations (Scoring) https://github.com/user-attachments/assets/6cc1659f-eba4-49ca-a0a5-7c243557b4f5 ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Ran pre-commit to handle lint / formatting issues. - [ ] Read the [contributor guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md), Pull Request section? - [ ] Updated relevant documentation. - [ ] Wrote necessary unit or integration tests.
2024-12-04 09:47:09 -08:00 · 2024-12-04 09:47:09 -08:00 · 16769256b7
commit 16769256b7
parent caf1dac114
22 changed files with 1000 additions and 166 deletions
--- a/llama_stack/distribution/ui/modules/api.py
+++ b/llama_stack/distribution/ui/modules/api.py
@ -11,7 +11,7 @@ from typing import Optional
 from llama_stack_client import LlamaStackClient


-class LlamaStackEvaluation:
+class LlamaStackApi:
    def __init__(self):
        self.client = LlamaStackClient(
            base_url=os.environ.get("LLAMA_STACK_ENDPOINT", "http://localhost:5000"),
@ -22,14 +22,6 @@ class LlamaStackEvaluation:
            },
        )

-    def list_scoring_functions(self):
-        """List all available scoring functions"""
-        return self.client.scoring_functions.list()
-
-    def list_models(self):
-        """List all available judge models"""
-        return self.client.models.list()
-
    def run_scoring(
        self, row, scoring_function_ids: list[str], scoring_params: Optional[dict]
    ):
@ -39,3 +31,6 @@ class LlamaStackEvaluation:
        return self.client.scoring.score(
            input_rows=[row], scoring_functions=scoring_params
        )
+
+
+llama_stack_api = LlamaStackApi()