chat playground

2025-12-17 16:42:44 +00:00 · 2024-11-27 15:11:27 -08:00 · 2024-11-27 15:11:27 -08:00 · c544e4b015
commit c544e4b015
parent 371259ca5b
8 changed files with 295 additions and 158 deletions
--- a/llama_stack/distribution/ui/page/playground/init.py
+++ b/llama_stack/distribution/ui/page/playground/init.py
@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
--- a/llama_stack/distribution/ui/page/playground/chat.py
+++ b/llama_stack/distribution/ui/page/playground/chat.py
@ -0,0 +1,109 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import streamlit as st
+from modules.api import llama_stack_api
+
+# Sidebar configurations
+with st.sidebar:
+    st.header("Configuration")
+    available_models = llama_stack_api.list_models()
+    available_models = [model.identifier for model in available_models]
+    selected_model = st.selectbox(
+        "Choose a model",
+        available_models,
+        index=0,
+    )
+
+    temperature = st.slider(
+        "Temperature",
+        min_value=0.0,
+        max_value=1.0,
+        value=0.0,
+        step=0.1,
+        help="Controls the randomness of the response. Higher values make the output more creative and unexpected, lower values make it more conservative and predictable",
+    )
+
+    top_p = st.slider(
+        "Top P",
+        min_value=0.0,
+        max_value=1.0,
+        value=0.95,
+        step=0.1,
+    )
+
+    max_tokens = st.slider(
+        "Max Tokens",
+        min_value=0,
+        max_value=4096,
+        value=512,
+        step=1,
+        help="The maximum number of tokens to generate",
+    )
+
+    repetition_penalty = st.slider(
+        "Repetition Penalty",
+        min_value=1.0,
+        max_value=2.0,
+        value=1.0,
+        step=0.1,
+        help="Controls the likelihood for generating the same word or phrase multiple times in the same sentence or paragraph. 1 implies no penalty, 2 will strongly discourage model to repeat words or phrases.",
+    )
+
+    stream = st.checkbox("Stream", value=True)
+
+
+# Main chat interface
+st.title("🦙 Chat")
+
+# Initialize chat history
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+
+# Display chat messages
+for message in st.session_state.messages:
+    with st.chat_message(message["role"]):
+        st.markdown(message["content"])
+
+# Chat input
+if prompt := st.chat_input("Example: What is Llama Stack?"):
+    # Add user message to chat history
+    st.session_state.messages.append({"role": "user", "content": prompt})
+
+    # Display user message
+    with st.chat_message("user"):
+        st.markdown(prompt)
+
+    # Display assistant response
+    with st.chat_message("assistant"):
+        message_placeholder = st.empty()
+        full_response = ""
+
+        response = llama_stack_api.client.inference.chat_completion(
+            messages=[{"role": "user", "content": prompt}],
+            model_id=selected_model,
+            stream=stream,
+            sampling_params={
+                "temperature": temperature,
+                "top_p": top_p,
+                "max_tokens": max_tokens,
+                "repetition_penalty": repetition_penalty,
+            },
+        )
+
+        if stream:
+            for chunk in response:
+                if chunk.event.event_type == "progress":
+                    full_response += chunk.event.delta
+                message_placeholder.markdown(full_response + "▌")
+            message_placeholder.markdown(full_response)
+        else:
+            full_response = response
+            message_placeholder.markdown(full_response.completion_message.content)
+
+        st.session_state.messages.append(
+            {"role": "assistant", "content": full_response}
+        )