Skip to content

Commit

Permalink
Replace browser UI with Basic Streamlit UI Implementation (pytorch#908)
Browse files Browse the repository at this point in the history
Remove the existing browser UI and replace it with a UI built with Streamlit. This reduces complexity & leverages the functionality introduced in PR pytorch#906 to display chunked responses.

**Testing**
```
streamlit run torchchat.py -- browser stories110M --compile --max-new-tokens 256

  You can now view your Streamlit app in your browser.

  Local URL: http://localhost:8501
  Network URL: http://192.0.0.2:8501
```
<img width="1002" alt="image" src="https://github.com/user-attachments/assets/df305943-2326-4d01-a48b-61dd2006fa28">
  • Loading branch information
vmpuri authored Jul 18, 2024
1 parent b0081ed commit 3e71b61
Show file tree
Hide file tree
Showing 7 changed files with 105 additions and 283 deletions.
18 changes: 9 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,22 +123,22 @@ For more information run `python3 torchchat.py generate --help`

### Browser
This mode provides access to the model via the browser's localhost.

Launch an interactive chat with your model. Running the command will automatically open a tab in your browser. [Streamlit](https://streamlit.io/) should already be installed by the `install_requirements.sh` script.
```
streamlit run torchchat.py -- browser <model_name> <model_args>
```

For example, to quantize and chat with LLaMA3:
[skip default]: begin
```
python3 torchchat.py browser llama3
streamlit run torchchat.py -- browser llama3 --quantize '{"precision": {"dtype":"float16"}, "executor":{"accelerator":"cpu"}}' --max-new-tokens 256 --compile
```
[skip default]: end


*Running on http://127.0.0.1:5000* should be printed out on the
terminal. Click the link or go to
[http://127.0.0.1:5000](http://127.0.0.1:5000) on your browser to
start interacting with it.

Enter some text in the input box, then hit the enter key or click the
“SEND” button. After a second or two, the text you entered together
with the generated text will be displayed. Repeat to have a
conversation.




Expand Down
114 changes: 81 additions & 33 deletions browser/browser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,40 +4,88 @@
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.

import subprocess
import sys
import time

import streamlit as st
from api.api import CompletionRequest, OpenAiApiGenerator

from build.builder import BuilderArgs, TokenizerArgs

from generate import GeneratorArgs


def main(args):
builder_args = BuilderArgs.from_args(args)
speculative_builder_args = BuilderArgs.from_speculative_args(args)
tokenizer_args = TokenizerArgs.from_args(args)
generator_args = GeneratorArgs.from_args(args)
generator_args.chat_mode = False

@st.cache_resource
def initialize_generator() -> OpenAiApiGenerator:
return OpenAiApiGenerator(
builder_args,
speculative_builder_args,
tokenizer_args,
generator_args,
args.profile,
args.quantize,
args.draft_quantize,
)

gen = initialize_generator()

st.title("torchchat")

# Initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = []

# Display chat messages from history on app rerun
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])

# Accept user input
if prompt := st.chat_input("What is up?"):
# Add user message to chat history
st.session_state.messages.append({"role": "user", "content": prompt})
# Display user message in chat message container
with st.chat_message("user"):
st.markdown(prompt)

# Display assistant response in chat message container
with st.chat_message("assistant"), st.status(
"Generating... ", expanded=True
) as status:

req = CompletionRequest(
model=gen.builder_args.checkpoint_path,
prompt=prompt,
temperature=generator_args.temperature,
messages=[],
)

def unwrap(completion_generator):
start = time.time()
tokcount = 0
for chunk_response in completion_generator:
content = chunk_response.choices[0].delta.content
if not gen.is_llama3_model or content not in set(
gen.tokenizer.special_tokens.keys()
):
yield content
if content == gen.tokenizer.eos_id():
yield "."
tokcount += 1
status.update(
label="Done, averaged {:.2f} tokens/second".format(
tokcount / (time.time() - start)
),
state="complete",
)

response = st.write_stream(unwrap(gen.completion(req)))

# Directory Containing the server file "chat_in_browser.py"
server_dir = "browser"

# Look for port from cmd args. Default to 5000 if not found.
port = 5000
i = 2
while i < len(sys.argv):
if sys.argv[i] == "--port":
if i + 1 < len(sys.argv):
# Extract the value and remove '--port' and the value from sys.argv
port = sys.argv[i + 1]
del sys.argv[i : i + 2]
break
else:
i += 1

# Construct arguments for the flask app minus 'browser' command
# plus '--chat'
args_plus_chat = ["'{}'".format(s) for s in sys.argv[1:] if s != "browser"] + [
'"--chat"'
]
formatted_args = ", ".join(args_plus_chat)
command = [
"flask",
"--app",
f"{server_dir}/chat_in_browser:create_app(" + formatted_args + ")",
"run",
"--port",
f"{port}",
]
subprocess.run(command)
# Add assistant response to chat history
st.session_state.messages.append({"role": "assistant", "content": response})
107 changes: 0 additions & 107 deletions browser/chat_in_browser.py

This file was deleted.

96 changes: 0 additions & 96 deletions browser/static/css/style.css

This file was deleted.

27 changes: 0 additions & 27 deletions browser/templates/chat.html

This file was deleted.

Loading

0 comments on commit 3e71b61

Please sign in to comment.