Skip to content

Commit

Permalink
fix update segment keyword with same content (#12908)
Browse files Browse the repository at this point in the history
  • Loading branch information
JohnJyong authored Jan 21, 2025
1 parent 3d1ce4c commit 162a8c4
Showing 1 changed file with 13 additions and 8 deletions.
21 changes: 13 additions & 8 deletions api/services/dataset_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import random
import time
import uuid
from collections import Counter
from typing import Any, Optional

from flask_login import current_user # type: ignore
Expand Down Expand Up @@ -1610,8 +1611,11 @@ def update_segment(cls, args: SegmentUpdateArgs, segment: DocumentSegment, docum
segment.answer = args.answer
segment.word_count += len(args.answer) if args.answer else 0
word_count_change = segment.word_count - word_count_change
keyword_changed = False
if args.keywords:
segment.keywords = args.keywords
if Counter(segment.keywords) != Counter(args.keywords):
segment.keywords = args.keywords
keyword_changed = True
segment.enabled = True
segment.disabled_at = None
segment.disabled_by = None
Expand All @@ -1622,13 +1626,6 @@ def update_segment(cls, args: SegmentUpdateArgs, segment: DocumentSegment, docum
document.word_count = max(0, document.word_count + word_count_change)
db.session.add(document)
# update segment index task
if args.enabled:
VectorService.create_segments_vector(
[args.keywords] if args.keywords else None,
[segment],
dataset,
document.doc_form,
)
if document.doc_form == IndexType.PARENT_CHILD_INDEX and args.regenerate_child_chunks:
# regenerate child chunks
# get embedding model instance
Expand Down Expand Up @@ -1661,6 +1658,14 @@ def update_segment(cls, args: SegmentUpdateArgs, segment: DocumentSegment, docum
VectorService.generate_child_chunks(
segment, document, dataset, embedding_model_instance, processing_rule, True
)
elif document.doc_form in (IndexType.PARAGRAPH_INDEX, IndexType.QA_INDEX):
if args.enabled or keyword_changed:
VectorService.create_segments_vector(
[args.keywords] if args.keywords else None,
[segment],
dataset,
document.doc_form,
)
else:
segment_hash = helper.generate_text_hash(content)
tokens = 0
Expand Down

0 comments on commit 162a8c4

Please sign in to comment.