diff --git a/api/core/workflow/nodes/llm/node.py b/api/core/workflow/nodes/llm/node.py index 6909b30c9e82ca..6a4f8c4e207bb2 100644 --- a/api/core/workflow/nodes/llm/node.py +++ b/api/core/workflow/nodes/llm/node.py @@ -185,6 +185,8 @@ def _run(self) -> Generator[NodeEvent | InNodeEvent, None, None]: result_text = event.text usage = event.usage finish_reason = event.finish_reason + # deduct quota + self.deduct_llm_quota(tenant_id=self.tenant_id, model_instance=model_instance, usage=usage) break except LLMNodeError as e: yield RunCompletedEvent( @@ -240,17 +242,7 @@ def _invoke_llm( user=self.user_id, ) - # handle invoke result - generator = self._handle_invoke_result(invoke_result=invoke_result) - - usage = LLMUsage.empty_usage() - for event in generator: - yield event - if isinstance(event, ModelInvokeCompletedEvent): - usage = event.usage - - # deduct quota - self.deduct_llm_quota(tenant_id=self.tenant_id, model_instance=model_instance, usage=usage) + return self._handle_invoke_result(invoke_result=invoke_result) def _handle_invoke_result(self, invoke_result: LLMResult | Generator) -> Generator[NodeEvent, None, None]: if isinstance(invoke_result, LLMResult):