From 6af0a7c8a618f87e05151dcf4939766c8aa59754 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Fri, 8 Sep 2023 15:21:18 -0700 Subject: [PATCH] Update UI and sponsers (#2387) --- fastchat/serve/gradio_block_arena_anony.py | 6 +++--- fastchat/serve/gradio_block_arena_named.py | 4 ++-- fastchat/serve/gradio_web_server.py | 4 ++-- fastchat/serve/monitor/monitor.py | 9 +++++++-- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/fastchat/serve/gradio_block_arena_anony.py b/fastchat/serve/gradio_block_arena_anony.py index 978f76b75..a598a8c9a 100644 --- a/fastchat/serve/gradio_block_arena_anony.py +++ b/fastchat/serve/gradio_block_arena_anony.py @@ -196,7 +196,7 @@ def share_click(state0, state1, model_selector0, model_selector1, request: gr.Re "chatglm-6b": 0.5, } -SAMPLING_BOOST_MODELS = ["llama-2-70b-chat", "codellama-34b-instruct"] +SAMPLING_BOOST_MODELS = ["wizardlm-70b"] model_pairs = [] model_pairs_weights = [] @@ -420,12 +420,12 @@ def build_side_by_side_ui_anony(models): with gr.Column(scale=20): textbox = gr.Textbox( show_label=False, - placeholder="Enter text and press ENTER", + placeholder="Enter your prompt here and press ENTER", visible=False, container=False, ) with gr.Column(scale=1, min_width=50): - send_btn = gr.Button(value="Send", visible=False) + send_btn = gr.Button(value="Battle", visible=False, variant="primary") with gr.Row() as button_row2: regenerate_btn = gr.Button(value="🔄 Regenerate", interactive=False) diff --git a/fastchat/serve/gradio_block_arena_named.py b/fastchat/serve/gradio_block_arena_named.py index b26172f3e..c031d28c2 100644 --- a/fastchat/serve/gradio_block_arena_named.py +++ b/fastchat/serve/gradio_block_arena_named.py @@ -352,12 +352,12 @@ def build_side_by_side_ui_named(models): with gr.Column(scale=20): textbox = gr.Textbox( show_label=False, - placeholder="Enter text and press ENTER", + placeholder="Enter your prompt here and press ENTER", visible=False, container=False, ) with gr.Column(scale=1, min_width=50): - send_btn = gr.Button(value="Send", visible=False) + send_btn = gr.Button(value="Battle", visible=False, variant="primary") with gr.Row() as button_row2: regenerate_btn = gr.Button(value="🔄 Regenerate", interactive=False) diff --git a/fastchat/serve/gradio_web_server.py b/fastchat/serve/gradio_web_server.py index c2e22e562..2fae670dc 100644 --- a/fastchat/serve/gradio_web_server.py +++ b/fastchat/serve/gradio_web_server.py @@ -591,12 +591,12 @@ def build_single_model_ui(models, add_promotion_links=False): with gr.Column(scale=20): textbox = gr.Textbox( show_label=False, - placeholder="Enter text and press ENTER", + placeholder="Enter your prompt here and press ENTER", visible=False, container=False, ) with gr.Column(scale=1, min_width=50): - send_btn = gr.Button(value="Send", visible=False) + send_btn = gr.Button(value="Send", visible=False, variant="primary") with gr.Row(visible=False) as button_row: upvote_btn = gr.Button(value="👍 Upvote", interactive=False) diff --git a/fastchat/serve/monitor/monitor.py b/fastchat/serve/monitor/monitor.py index 395f2bf84..b2081bc0d 100644 --- a/fastchat/serve/monitor/monitor.py +++ b/fastchat/serve/monitor/monitor.py @@ -30,11 +30,11 @@ def make_leaderboard_md(elo_results): | [Blog](https://lmsys.org/blog/2023-05-03-arena/) | [GitHub](https://github.com/lm-sys/FastChat) | [Paper](https://arxiv.org/abs/2306.05685) | [Dataset](https://huggingface.co/datasets/lmsys/chatbot_arena_conversations) | [Twitter](https://twitter.com/lmsysorg) | [Discord](https://discord.gg/HSWAKCrnFx) | 🏆 This leaderboard is based on the following three benchmarks. -- [Chatbot Arena](https://lmsys.org/blog/2023-05-03-arena/) - a crowdsourced, randomized battle platform. We use 50K+ user votes to compute Elo ratings. +- [Chatbot Arena](https://lmsys.org/blog/2023-05-03-arena/) - a crowdsourced, randomized battle platform. We use 70K+ user votes to compute Elo ratings. - [MT-Bench](https://arxiv.org/abs/2306.05685) - a set of challenging multi-turn questions. We use GPT-4 to grade the model responses. - [MMLU](https://arxiv.org/abs/2009.03300) (5-shot) - a test to measure a model's multitask accuracy on 57 tasks. -💻 Code: The Arena Elo ratings are computed by this [notebook]({notebook_url}). The MT-bench scores (single-answer grading on a scale of 10) are computed by [fastchat.llm_judge](https://github.com/lm-sys/FastChat/tree/main/fastchat/llm_judge). The MMLU scores are computed by [InstructEval](https://github.com/declare-lab/instruct-eval) and [Chain-of-Thought Hub](https://github.com/FranxYao/chain-of-thought-hub). Higher values are better for all benchmarks. Empty cells mean not available. +💻 Code: The Arena Elo ratings are computed by this [notebook]({notebook_url}). The MT-bench scores (single-answer grading on a scale of 10) are computed by [fastchat.llm_judge](https://github.com/lm-sys/FastChat/tree/main/fastchat/llm_judge). The MMLU scores are computed by [InstructEval](https://github.com/declare-lab/instruct-eval) and [Chain-of-Thought Hub](https://github.com/FranxYao/chain-of-thought-hub). Higher values are better for all benchmarks. Empty cells mean not available. Last updated: Sept, 2023. """ return leaderboard_md @@ -241,6 +241,11 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file): "#### Figure 4: Average Win Rate Against All Other Models (Assuming Uniform Sampling and No Ties)" ) plot_4 = gr.Plot(p4, show_label=False) + + from fastchat.serve.gradio_web_server import acknowledgment_md + + gr.Markdown(acknowledgment_md) + return [md_1, plot_1, plot_2, plot_3, plot_4]