import gradio as gr
from IPython.display import IFrame, display
import os
from llama_cpp import Llama
import jsongr.close_all()
Running a Gradio App on JupyterHub¶
This code demonstrates how to run a Gradio app (a simple web-based interface for Python functions) inside a JupyterHub environment such as DataHub or CloudBank.
Gradio normally launches on localhost, but on JupyterHub the server runs behind a proxy — so we use the environment variable JUPYTERHUB_SERVICE_PREFIX to route the app correctly through the proxy system.
demo1 = gr.Interface(fn=lambda x: f"Hello {x}!", inputs="text", outputs="text")
base_url = os.environ.get('JUPYTERHUB_SERVICE_PREFIX', '/')
demo1.launch(
share=False,
prevent_thread_lock=True,
server_port=7860,
root_path=f"{base_url}proxy/7860",
inline=False
)
proxy_url = f"{base_url}proxy/7860/"
display(IFrame(src=proxy_url, width=1000, height=600))demo1.close()print(base_url)
Set up the llama-cpp-python framework¶
and Run llama-cpp-python behind the Chatbot
path="/home/jovyan/shared/"model = Llama(
model_path=os.path.join(path, "qwen2-1_5b-instruct-q4_0.gguf"),
n_ctx=2048,
n_threads=None,
verbose=True,
chat_format="chatml"
)# Define function to call the model
def chat_with_model(prompt):
messages = [{"role": "user", "content": prompt}]
response = model.create_chat_completion(messages=messages, max_tokens=256)
return response["choices"][0]["message"]["content"]
# Define Gradio interface
demo2 = gr.Interface(fn=chat_with_model, inputs="text", outputs="text", title="Small Model Chat")
# Launch on JupyterHub proxy
base_url = os.environ.get('JUPYTERHUB_SERVICE_PREFIX', '/')
demo2.launch(
share=False,
prevent_thread_lock=True,
server_port=7860,
root_path=f"{base_url}proxy/7860",
inline=False
)
# Display inline in the notebook
proxy_url = f"{base_url}proxy/7860/"
display(IFrame(src=proxy_url, width=1000, height=600))demo2.close()Lets Build the History of the Chat¶
This will be a Json file that stores the chat history
demo2.close()# ===== Persistent History File =====
HISTORY_FILE = "chat_history.json"
def load_history():
if os.path.exists(HISTORY_FILE):
try:
with open(HISTORY_FILE, "r") as f:
content = f.read().strip()
if not content:
return [] # empty file
return json.loads(content)
except json.JSONDecodeError:
print("⚠️ Warning: history file is corrupted or empty, resetting it.")
return []
return []
def save_history(history):
with open(HISTORY_FILE, "w") as f:
json.dump(history, f, indent=2)
history = load_history()
# ===== Chat Function =====
def chat_with_model(user_input):
"""Append user input to history, generate response, and persist conversation."""
# Reload history each time in case file changed externally
global history
history = load_history()
# Build messages list from history
messages = []
for h in history:
messages.append({"role": "user", "content": h['user']})
messages.append({"role": "assistant", "content": h['model']})
messages.append({"role": "user", "content": user_input})
# Generate model response
response = model.create_chat_completion(messages=messages, max_tokens=100)
response_text = response["choices"][0]["message"]["content"]
# Update and persist history
history.append({"user": user_input, "model": response_text})
save_history(history)
return response_text
# ===== Gradio Interface =====
demo3 = gr.Interface(
fn=chat_with_model,
inputs="text",
outputs="text",
title="Persistent Small Model Chat",
description="Chat with a local llama-cpp-python model that remembers previous conversations."
)
base_url = os.environ.get('JUPYTERHUB_SERVICE_PREFIX', '/')
demo3.launch(
share=False,
prevent_thread_lock=True,
server_port=7860,
root_path=f"{base_url}proxy/7860",
inline=False
)
proxy_url = f"{base_url}proxy/7860/"
display(IFrame(src=proxy_url, width=1000, height=600))Now we can check the history that we have¶
for turn in json.load(open("chat_history.json")):
print(f"🧑 User: {turn['user']}\n🤖 Model: {turn['model']}\n")#if you want to clear the history
#open("chat_history.json", "w").write("[]")demo3.close()gr.close_all()