Skip to content

Commit

Permalink
Update UI and sponsers (#2387)
Browse files Browse the repository at this point in the history
  • Loading branch information
merrymercy authored Sep 8, 2023
1 parent 56744d1 commit 6af0a7c
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 9 deletions.
6 changes: 3 additions & 3 deletions fastchat/serve/gradio_block_arena_anony.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def share_click(state0, state1, model_selector0, model_selector1, request: gr.Re
"chatglm-6b": 0.5,
}

SAMPLING_BOOST_MODELS = ["llama-2-70b-chat", "codellama-34b-instruct"]
SAMPLING_BOOST_MODELS = ["wizardlm-70b"]

model_pairs = []
model_pairs_weights = []
Expand Down Expand Up @@ -420,12 +420,12 @@ def build_side_by_side_ui_anony(models):
with gr.Column(scale=20):
textbox = gr.Textbox(
show_label=False,
placeholder="Enter text and press ENTER",
placeholder="Enter your prompt here and press ENTER",
visible=False,
container=False,
)
with gr.Column(scale=1, min_width=50):
send_btn = gr.Button(value="Send", visible=False)
send_btn = gr.Button(value="Battle", visible=False, variant="primary")

with gr.Row() as button_row2:
regenerate_btn = gr.Button(value="🔄 Regenerate", interactive=False)
Expand Down
4 changes: 2 additions & 2 deletions fastchat/serve/gradio_block_arena_named.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,12 +352,12 @@ def build_side_by_side_ui_named(models):
with gr.Column(scale=20):
textbox = gr.Textbox(
show_label=False,
placeholder="Enter text and press ENTER",
placeholder="Enter your prompt here and press ENTER",
visible=False,
container=False,
)
with gr.Column(scale=1, min_width=50):
send_btn = gr.Button(value="Send", visible=False)
send_btn = gr.Button(value="Battle", visible=False, variant="primary")

with gr.Row() as button_row2:
regenerate_btn = gr.Button(value="🔄 Regenerate", interactive=False)
Expand Down
4 changes: 2 additions & 2 deletions fastchat/serve/gradio_web_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,12 +591,12 @@ def build_single_model_ui(models, add_promotion_links=False):
with gr.Column(scale=20):
textbox = gr.Textbox(
show_label=False,
placeholder="Enter text and press ENTER",
placeholder="Enter your prompt here and press ENTER",
visible=False,
container=False,
)
with gr.Column(scale=1, min_width=50):
send_btn = gr.Button(value="Send", visible=False)
send_btn = gr.Button(value="Send", visible=False, variant="primary")

with gr.Row(visible=False) as button_row:
upvote_btn = gr.Button(value="👍 Upvote", interactive=False)
Expand Down
9 changes: 7 additions & 2 deletions fastchat/serve/monitor/monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@ def make_leaderboard_md(elo_results):
| [Blog](https://lmsys.org/blog/2023-05-03-arena/) | [GitHub](https://github.com/lm-sys/FastChat) | [Paper](https://arxiv.org/abs/2306.05685) | [Dataset](https://huggingface.co/datasets/lmsys/chatbot_arena_conversations) | [Twitter](https://twitter.com/lmsysorg) | [Discord](https://discord.gg/HSWAKCrnFx) |
🏆 This leaderboard is based on the following three benchmarks.
- [Chatbot Arena](https://lmsys.org/blog/2023-05-03-arena/) - a crowdsourced, randomized battle platform. We use 50K+ user votes to compute Elo ratings.
- [Chatbot Arena](https://lmsys.org/blog/2023-05-03-arena/) - a crowdsourced, randomized battle platform. We use 70K+ user votes to compute Elo ratings.
- [MT-Bench](https://arxiv.org/abs/2306.05685) - a set of challenging multi-turn questions. We use GPT-4 to grade the model responses.
- [MMLU](https://arxiv.org/abs/2009.03300) (5-shot) - a test to measure a model's multitask accuracy on 57 tasks.
💻 Code: The Arena Elo ratings are computed by this [notebook]({notebook_url}). The MT-bench scores (single-answer grading on a scale of 10) are computed by [fastchat.llm_judge](https://github.com/lm-sys/FastChat/tree/main/fastchat/llm_judge). The MMLU scores are computed by [InstructEval](https://github.com/declare-lab/instruct-eval) and [Chain-of-Thought Hub](https://github.com/FranxYao/chain-of-thought-hub). Higher values are better for all benchmarks. Empty cells mean not available.
💻 Code: The Arena Elo ratings are computed by this [notebook]({notebook_url}). The MT-bench scores (single-answer grading on a scale of 10) are computed by [fastchat.llm_judge](https://github.com/lm-sys/FastChat/tree/main/fastchat/llm_judge). The MMLU scores are computed by [InstructEval](https://github.com/declare-lab/instruct-eval) and [Chain-of-Thought Hub](https://github.com/FranxYao/chain-of-thought-hub). Higher values are better for all benchmarks. Empty cells mean not available. Last updated: Sept, 2023.
"""
return leaderboard_md

Expand Down Expand Up @@ -241,6 +241,11 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file):
"#### Figure 4: Average Win Rate Against All Other Models (Assuming Uniform Sampling and No Ties)"
)
plot_4 = gr.Plot(p4, show_label=False)

from fastchat.serve.gradio_web_server import acknowledgment_md

gr.Markdown(acknowledgment_md)

return [md_1, plot_1, plot_2, plot_3, plot_4]


Expand Down

0 comments on commit 6af0a7c

Please sign in to comment.