Skip to content

Commit

Permalink
Merge pull request #1251 from zachlasiuk/demo-functionality
Browse files Browse the repository at this point in the history
Finalizing demo templatizing
  • Loading branch information
pareenaverma authored Sep 13, 2024
2 parents b57ce1b + 876329b commit beaca96
Show file tree
Hide file tree
Showing 8 changed files with 41 additions and 18 deletions.
2 changes: 1 addition & 1 deletion assets/css/demo.css
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@
#notification-popup {
position: absolute;
/*top: -34px;*/
bottom: 46px;
bottom: 46px; /* 66 at 2 lines....86 at 3 lines....106px at 4 lines */

left: 0;
width: calc(100% - 32px); /* Subtract gutter space from width */
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
---
title: Run a llama.cpp chatbot powered by Arm Kleidi technology
overview: Running a chatbot can be expensive at scale, and surprising performance can be achieved with quantized (4 or 8 bit) small LLMs (~8 billion parameters) on Arm Neoverse CPUs. CPUs are generally more accessible and lower cost than traditional GPU solutions. Chat with the Llama-3.1-8b LLM here to see the performance for yourself. This demo is running on AWS Graviton 3, on C7g.16xlarge instances.
overview: |
This Arm Kleidi learning path shows how to use a single AWS Graviton instance -- powered by an Arm Neoverse CPU -- to build a simple “Token as a Service” server, used below to provide a chat-bot to serve a small number of concurrent users.
This architecture would be suitable for businesses looking to deploy the latest Generative AI technologies using their existing CPU compute capacity and deployment pipelines. The demo uses the open source llama.cpp framework, which Arm has enhanced by contributing the latest Arm Kleidi Technologies. Further optimizations are achieved by using the smaller 8 billion parameter Llama 3.1 model, which has been quantized to optimize memory usage.
Chat with the Llama-3.1-8B LLM below to see the performance for yourself, then follow the learning path to build your own Generative AI service on Arm Neoverse.
demo_steps:
- Type & send a message to the chatbot.
- Get the chatbot's reply.
- Receive the chatbot's reply.
- View stats showing how well AWS Graviton runs LLMs.

diagram: config-diagram-dark.png
Expand Down Expand Up @@ -65,7 +71,7 @@ tps_ranges:
### FIXED, DO NOT MODIFY
# ================================================================================
demo_template_name: llm_chatbot_first_demo # allows the 'demo.html' partial to route to the correct Configuration and Demo/Stats sub partials for page render.
weight: 1 # _index.md always has weight of 1 to order correctly
weight: 2 # _index.md always has weight of 1 to order correctly
layout: "learningpathall" # All files under learning paths have this same wrapper
learning_path_main_page: "yes" # This should be surfaced when looking for related content. Only set for _index.md of learning path content.
---
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
title: Run a Large Language model (LLM) chatbot on Arm servers
weight: 2
weight: 3

### FIXED, DO NOT MODIFY
layout: learningpathall
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
title: Access the chatbot using the OpenAI-compatible API
weight: 3
weight: 4

### FIXED, DO NOT MODIFY
layout: learningpathall
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@
const chatbot_response_div = chatbot_messages[0];
const chatbot_message_span = chatbot_response_div.querySelector('span');

chatbot_message_span.textContent += messageData.replace(/</g, "").replace(/>/g, "");
chatbot_message_span.textContent += messageData;

}

Expand Down Expand Up @@ -379,7 +379,22 @@
}
}

// Check if textarea input makes a new line
function hidePopupOnInputHeightIncrease() {
const text_area = document.getElementById('user-input-for-demo');
const popup = document.getElementById('notification-popup');
if (text_area.scrollHeight > 44) {
popup.classList.remove('show-popup');
popup.classList.add('hide-popup');
}
}

function adjustInputHeight() {
const text_area = document.getElementById('user-input-for-demo');
text_area.style.height = 'auto'; // Reset height to recalculate
text_area.style.height = (text_area.scrollHeight > text_area.clientHeight ? text_area.scrollHeight : text_area.clientHeight) + 'px';
hidePopupOnInputHeightIncrease(); // Auto-hide popup if height is above default (multiple lines)
}



Expand Down Expand Up @@ -410,6 +425,7 @@
else {
// Clear message box & toggle button
text_area.value = '';
adjustInputHeight();
toggleButton();
text_area.focus();

Expand All @@ -426,8 +442,9 @@

// User enters any text in textinput
text_area.addEventListener('input', function() {
this.style.height = 'auto';
this.style.height = (this.scrollHeight > this.clientHeight ? this.scrollHeight : this.clientHeight) + 'px';
//this.style.height = 'auto';
//this.style.height = (this.scrollHeight > this.clientHeight ? this.scrollHeight : this.clientHeight) + 'px';
adjustInputHeight();
toggleButton(); // enable input button
});
// User hits enter in textinput
Expand Down Expand Up @@ -491,17 +508,17 @@

// Update ping & popup status
if (data.cluster_utilization == 'normal') {
showPopupPostConnection('Connected to Graviton-based LLM! Start chatting now.',"success");
showPopupPostConnection('Connected to Arm Neoverse V2 based LLM! Start chatting now.',"success");
//ping_txt.textContent = `Ping: ${ping}`
traffic_txt.textContent = 'Server traffic: Low'
}
else if (data.cluster_utilization == 'high') {
showPopupPostConnection('Connected to Graviton-based LLM! Traffic is high, delays may occur. Start chatting now.',"success");
showPopupPostConnection('Connected to Arm Neoverse V2 based LLM! Traffic is high, delays may occur. Start chatting now.',"success");
//ping_txt.textContent = `Ping: ${ping}`
traffic_txt.textContent = `Server traffic: High`
}
else if (data.cluster_utilization == 'at-limit') {
showPopupPostConnection('Connected to Graviton-based LLM! Traffic is high, delays may occur. Start chatting now.',"warning");
showPopupPostConnection('Connected to Arm Neoverse V2 based LLM! Traffic is high, delays may occur. Start chatting now.',"warning");
//ping_txt.textContent = `Ping: ${ping}`
traffic_txt.textContent = `Server traffic: High`
}
Expand All @@ -513,7 +530,7 @@
entry_div.style.opacity = '0.75';
entry_div.setAttribute('placeholder','No connection');

showPopupPostConnection('Unable to connect. The problem may be with us; try refreshing the page.',"error");
showPopupPostConnection('Unable to connect to the server; try refreshing the page or returning later.',"error");
});
}

Expand Down Expand Up @@ -571,7 +588,7 @@

if (return_json.message_type == 'initial_response') {
if (return_json.value == 'NO_RESOURCES_AVAILABLE') {
showPopupPostConnection("No open resources - try sending a new message.","warning");
showPopupPostConnection("I'm sorry, too many people are trying to access me at once! Try again a bit later.","warning");
}
else if (return_json.value == 'STREAM_STARTING') {
hidePopup();
Expand All @@ -584,7 +601,7 @@
updateLLMMetrics(return_json.value);
}
else {
showPopupPostConnection("Unexpected server response - try a new message.","error");
showPopupPostConnection("Unexpected server response - try a new message or return later.","error");
}
}
}
Expand All @@ -594,7 +611,7 @@
return readStream(); // Read the stream
} else {
console.error('Error sending message to the server');
showPopupPostConnection("Problem sending message - try a new message.","error");
showPopupPostConnection("Problem sending message - try sending a new message.","error");
}
})
.catch(error => {
Expand All @@ -612,7 +629,7 @@
}
else {
console.error('An unexpected error occurred:', error);
showPopupPostConnection('Unknown error - try sending a new message.',"error");
showPopupPostConnection('Server error - try sending a new message.',"error");
}
});
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

<div class="content-box">
<h2>Overview</h2>
<p class="u-margin-left-1">{{.Params.overview}}</p>
<p class="u-margin-left-1">{{.Params.overview | markdownify }}</p>

<div class="c-row u-gap-1/2 lg:u-flex-nowrap">
<!-- Left - Demo steps -->
Expand Down

0 comments on commit beaca96

Please sign in to comment.