Skip to content

Commit

Permalink
LLM performance metric tracking (#2825)
Browse files Browse the repository at this point in the history
* WIP performance metric tracking

* fix: patch UI trying to .toFixed() null metric
Anthropic tracking migraiton
cleanup logs

* Apipie implmentation, not tested

* Cleanup Anthropic notes, Add support for AzureOpenAI tracking

* bedrock token metric tracking

* Cohere support

* feat: improve default stream handler to track for provider who are actually OpenAI compliant in usage reporting
add deepseek support

* feat: Add FireworksAI tracking reporting
fix: improve handler when usage:null is reported (why?)

* Add token reporting for GenericOpenAI

* token reporting for koboldcpp + lmstudio

* lint

* support Groq token tracking

* HF token tracking

* token tracking for togetherai

* LiteLLM token tracking

* linting + Mitral token tracking support

* XAI token metric reporting

* native provider runner

* LocalAI token tracking

* Novita token tracking

* OpenRouter token tracking

* Apipie stream metrics

* textwebgenui token tracking

* perplexity token reporting

* ollama token reporting

* lint

* put back comment

* Rip out LC ollama wrapper and use official library

* patch images with new ollama lib

* improve ollama offline message

* fix image handling in ollama llm provider

* lint

* NVIDIA NIM token tracking

* update openai compatbility responses

* UI/UX show/hide metrics on click for user preference

* update bedrock client

---------

Co-authored-by: shatfield4 <seanhatfield5@gmail.com>
  • Loading branch information
timothycarambat and shatfield4 authored Dec 16, 2024
1 parent 15abc3f commit dd7c467
Show file tree
Hide file tree
Showing 42 changed files with 1,770 additions and 566 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import { numberWithCommas } from "@/utils/numbers";
import React, { useEffect, useState, useContext } from "react";
const MetricsContext = React.createContext();
const SHOW_METRICS_KEY = "anythingllm_show_chat_metrics";
const SHOW_METRICS_EVENT = "anythingllm_show_metrics_change";

/**
* @param {number} duration - duration in milliseconds
* @returns {string}
*/
function formatDuration(duration) {
try {
return duration < 1
? `${(duration * 1000).toFixed(0)}ms`
: `${duration.toFixed(3)}s`;
} catch {
return "";
}
}

/**
* Format the output TPS to a string
* @param {number} outputTps - output TPS
* @returns {string}
*/
function formatTps(outputTps) {
try {
return outputTps < 1000
? outputTps.toFixed(2)
: numberWithCommas(outputTps.toFixed(0));
} catch {
return "";
}
}

/**
* Get the show metrics setting from localStorage `anythingllm_show_chat_metrics` key
* @returns {boolean}
*/
function getAutoShowMetrics() {
return window?.localStorage?.getItem(SHOW_METRICS_KEY) === "true";
}

/**
* Toggle the show metrics setting in localStorage `anythingllm_show_chat_metrics` key
* @returns {void}
*/
function toggleAutoShowMetrics() {
const currentValue = getAutoShowMetrics() || false;
window?.localStorage?.setItem(SHOW_METRICS_KEY, !currentValue);
window.dispatchEvent(
new CustomEvent(SHOW_METRICS_EVENT, {
detail: { showMetricsAutomatically: !currentValue },
})
);
return !currentValue;
}

/**
* Provider for the metrics context that controls the visibility of the metrics
* per-chat based on the user's preference.
* @param {React.ReactNode} children
* @returns {React.ReactNode}
*/
export function MetricsProvider({ children }) {
const [showMetricsAutomatically, setShowMetricsAutomatically] =
useState(getAutoShowMetrics());

useEffect(() => {
function handleShowingMetricsEvent(e) {
if (!e?.detail?.hasOwnProperty("showMetricsAutomatically")) return;
setShowMetricsAutomatically(e.detail.showMetricsAutomatically);
}
console.log("Adding event listener for metrics visibility");
window.addEventListener(SHOW_METRICS_EVENT, handleShowingMetricsEvent);
return () =>
window.removeEventListener(SHOW_METRICS_EVENT, handleShowingMetricsEvent);
}, []);

return (
<MetricsContext.Provider
value={{ showMetricsAutomatically, setShowMetricsAutomatically }}
>
{children}
</MetricsContext.Provider>
);
}

/**
* Render the metrics for a given chat, if available
* @param {metrics: {duration:number, outputTps: number}} props
* @returns
*/
export default function RenderMetrics({ metrics = {} }) {
// Inherit the showMetricsAutomatically state from the MetricsProvider so the state is shared across all chats
const { showMetricsAutomatically, setShowMetricsAutomatically } =
useContext(MetricsContext);
if (!metrics?.duration || !metrics?.outputTps) return null;

return (
<button
type="button"
onClick={() => setShowMetricsAutomatically(toggleAutoShowMetrics())}
data-tooltip-id="metrics-visibility"
data-tooltip-content={
showMetricsAutomatically
? "Click to only show metrics when hovering"
: "Click to show metrics as soon as they are available"
}
className={`border-none flex justify-end items-center gap-x-[8px] ${showMetricsAutomatically ? "opacity-100" : "opacity-0"} md:group-hover:opacity-100 transition-all duration-300`}
>
<p className="cursor-pointer text-xs font-mono text-theme-text-secondary opacity-50">
{formatDuration(metrics.duration)} ({formatTps(metrics.outputTps)}{" "}
tok/s)
</p>
</button>
);
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import useCopyText from "@/hooks/useCopyText";
import { Check, ThumbsUp, ArrowsClockwise, Copy } from "@phosphor-icons/react";
import Workspace from "@/models/workspace";
import { EditMessageAction } from "./EditMessage";
import RenderMetrics from "./RenderMetrics";
import ActionMenu from "./ActionMenu";

const Actions = ({
Expand All @@ -15,6 +16,7 @@ const Actions = ({
forkThread,
isEditing,
role,
metrics = {},
}) => {
const [selectedFeedback, setSelectedFeedback] = useState(feedbackScore);
const handleFeedback = async (newFeedback) => {
Expand Down Expand Up @@ -58,6 +60,7 @@ const Actions = ({
/>
</div>
</div>
<RenderMetrics metrics={metrics} />
</div>
);
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ const HistoricalMessage = ({
regenerateMessage,
saveEditedMessage,
forkThread,
metrics = {},
}) => {
const { isEditing } = useEditMessage({ chatId, role });
const { isDeleted, completeDelete, onEndAnimation } = useWatchDeleteMessage({
Expand Down Expand Up @@ -117,6 +118,7 @@ const HistoricalMessage = ({
isEditing={isEditing}
role={role}
forkThread={forkThread}
metrics={metrics}
/>
</div>
{role === "assistant" && <Citations sources={sources} />}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ export default function ChatHistory({
isLastMessage={isLastBotReply}
saveEditedMessage={saveEditedMessage}
forkThread={forkThread}
metrics={props.metrics}
/>
);
})}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@ export function ChatTooltips() {
// as the citation modal is z-indexed above the chat history
className="tooltip !text-xs z-[100]"
/>
<Tooltip
id="metrics-visibility"
place="bottom"
delayShow={300}
className="tooltip !text-xs"
/>
</>
);
}
19 changes: 11 additions & 8 deletions frontend/src/components/WorkspaceChat/ChatContainer/index.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import SpeechRecognition, {
useSpeechRecognition,
} from "react-speech-recognition";
import { ChatTooltips } from "./ChatTooltips";
import { MetricsProvider } from "./ChatHistory/HistoricalMessage/Actions/RenderMetrics";

export default function ChatContainer({ workspace, knownHistory = [] }) {
const { threadSlug = null } = useParams();
Expand Down Expand Up @@ -268,14 +269,16 @@ export default function ChatContainer({ workspace, knownHistory = [] }) {
>
{isMobile && <SidebarMobileHeader />}
<DnDFileUploaderWrapper>
<ChatHistory
history={chatHistory}
workspace={workspace}
sendCommand={sendCommand}
updateHistory={setChatHistory}
regenerateAssistantMessage={regenerateAssistantMessage}
hasAttachments={files.length > 0}
/>
<MetricsProvider>
<ChatHistory
history={chatHistory}
workspace={workspace}
sendCommand={sendCommand}
updateHistory={setChatHistory}
regenerateAssistantMessage={regenerateAssistantMessage}
hasAttachments={files.length > 0}
/>
</MetricsProvider>
<PromptInput
submit={handleSubmit}
onChange={handleMessageChange}
Expand Down
49 changes: 38 additions & 11 deletions frontend/src/utils/chat/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ export default function handleChat(
close,
chatId = null,
action = null,
metrics = {},
} = chatResult;

if (type === "abort" || type === "statusResponse") {
Expand All @@ -35,6 +36,7 @@ export default function handleChat(
error,
animate: false,
pending: false,
metrics,
},
]);
_chatHistory.push({
Expand All @@ -47,6 +49,7 @@ export default function handleChat(
error,
animate: false,
pending: false,
metrics,
});
} else if (type === "textResponse") {
setLoadingResponse(false);
Expand All @@ -62,6 +65,7 @@ export default function handleChat(
animate: !close,
pending: false,
chatId,
metrics,
},
]);
_chatHistory.push({
Expand All @@ -74,21 +78,42 @@ export default function handleChat(
animate: !close,
pending: false,
chatId,
metrics,
});
} else if (type === "textResponseChunk") {
} else if (
type === "textResponseChunk" ||
type === "finalizeResponseStream"
) {
const chatIdx = _chatHistory.findIndex((chat) => chat.uuid === uuid);
if (chatIdx !== -1) {
const existingHistory = { ..._chatHistory[chatIdx] };
const updatedHistory = {
...existingHistory,
content: existingHistory.content + textResponse,
sources,
error,
closed: close,
animate: !close,
pending: false,
chatId,
};
let updatedHistory;

// If the response is finalized, we can set the loading state to false.
// and append the metrics to the history.
if (type === "finalizeResponseStream") {
updatedHistory = {
...existingHistory,
closed: close,
animate: !close,
pending: false,
chatId,
metrics,
};
setLoadingResponse(false);
} else {
updatedHistory = {
...existingHistory,
content: existingHistory.content + textResponse,
sources,
error,
closed: close,
animate: !close,
pending: false,
chatId,
metrics,
};
}
_chatHistory[chatIdx] = updatedHistory;
} else {
_chatHistory.push({
Expand All @@ -101,6 +126,7 @@ export default function handleChat(
animate: !close,
pending: false,
chatId,
metrics,
});
}
setChatHistory([..._chatHistory]);
Expand All @@ -125,6 +151,7 @@ export default function handleChat(
error: null,
animate: false,
pending: false,
metrics,
};
_chatHistory[chatIdx] = updatedHistory;

Expand Down
4 changes: 4 additions & 0 deletions server/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
# PERPLEXITY_API_KEY='my-perplexity-key'
# PERPLEXITY_MODEL_PREF='codellama-34b-instruct'

# LLM_PROVIDER='deepseek'
# DEEPSEEK_API_KEY=YOUR_API_KEY
# DEEPSEEK_MODEL_PREF='deepseek-chat'

# LLM_PROVIDER='openrouter'
# OPENROUTER_API_KEY='my-openrouter-key'
# OPENROUTER_MODEL_PREF='openrouter/auto'
Expand Down
48 changes: 20 additions & 28 deletions server/endpoints/api/admin/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -610,24 +610,20 @@ function apiAdminEndpoints(app) {
const workspaceUsers = await Workspace.workspaceUsers(workspace.id);

if (!workspace) {
response
.status(404)
.json({
success: false,
error: `Workspace ${workspaceSlug} not found`,
users: workspaceUsers,
});
response.status(404).json({
success: false,
error: `Workspace ${workspaceSlug} not found`,
users: workspaceUsers,
});
return;
}

if (userIds.length === 0) {
response
.status(404)
.json({
success: false,
error: `No valid user IDs provided.`,
users: workspaceUsers,
});
response.status(404).json({
success: false,
error: `No valid user IDs provided.`,
users: workspaceUsers,
});
return;
}

Expand All @@ -637,13 +633,11 @@ function apiAdminEndpoints(app) {
workspace.id,
userIds
);
return response
.status(200)
.json({
success,
error,
users: await Workspace.workspaceUsers(workspace.id),
});
return response.status(200).json({
success,
error,
users: await Workspace.workspaceUsers(workspace.id),
});
}

// Add new users to the workspace if they are not already in the workspace
Expand All @@ -653,13 +647,11 @@ function apiAdminEndpoints(app) {
);
if (usersToAdd.length > 0)
await WorkspaceUser.createManyUsers(usersToAdd, workspace.id);
response
.status(200)
.json({
success: true,
error: null,
users: await Workspace.workspaceUsers(workspace.id),
});
response.status(200).json({
success: true,
error: null,
users: await Workspace.workspaceUsers(workspace.id),
});
} catch (e) {
console.error(e);
response.sendStatus(500).end();
Expand Down
Loading

0 comments on commit dd7c467

Please sign in to comment.