diff --git a/api/db/db_models.py b/api/db/db_models.py index fbf0f3cde10..1ce8b51283d 100644 --- a/api/db/db_models.py +++ b/api/db/db_models.py @@ -840,7 +840,7 @@ class Task(DataBaseModel): doc_id = CharField(max_length=32, null=False, index=True) from_page = IntegerField(default=0) - to_page = IntegerField(default=-1) + to_page = IntegerField(default=100000000) begin_at = DateTimeField(null=True, index=True) process_duation = FloatField(default=0) diff --git a/deepdoc/parser/docx_parser.py b/deepdoc/parser/docx_parser.py index 57804109938..1c1c14d3041 100644 --- a/deepdoc/parser/docx_parser.py +++ b/deepdoc/parser/docx_parser.py @@ -110,7 +110,7 @@ def blockType(b): return lines return ["\n".join(lines)] - def __call__(self, fnm, from_page=0, to_page=100000): + def __call__(self, fnm, from_page=0, to_page=100000000): self.doc = Document(fnm) if isinstance( fnm, str) else Document(BytesIO(fnm)) pn = 0 # parsed page @@ -130,7 +130,7 @@ def __call__(self, fnm, from_page=0, to_page=100000): if 'lastRenderedPageBreak' in run._element.xml: pn += 1 - secs.append(("".join(runs_within_single_paragraph), p.style.name)) # then concat run.text as part of the paragraph + secs.append(("".join(runs_within_single_paragraph), p.style.name if hasattr(p.style, 'name') else '')) # then concat run.text as part of the paragraph tbls = [self.__extract_table_content(tb) for tb in self.doc.tables] return secs, tbls