From c813902320e9fbd89da6224d21eaa89ba2e46f24 Mon Sep 17 00:00:00 2001 From: "Loibl Johannes (IFAG DES PTS TI EA DE)" Date: Wed, 17 May 2023 13:46:39 +0200 Subject: [PATCH 1/3] Replace /content? endpoint by /content/scan? Background: /content GET endpoint was blocked by our IT because it was causing a bug. Workaround: Use the /content/scan endpoint for searching pages. Since /scan does not support filtering for a page title, all pages have to be searched with pagination (sub-optimal, but our only option). See https://jira.atlassian.com/browse/CONFSERVER-57639 --- sphinxcontrib/confluencebuilder/publisher.py | 46 ++++++++++++++++--- .../confluencebuilder/storage/translator.py | 7 +++ 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/sphinxcontrib/confluencebuilder/publisher.py b/sphinxcontrib/confluencebuilder/publisher.py index 160a130f..0e09c377 100644 --- a/sphinxcontrib/confluencebuilder/publisher.py +++ b/sphinxcontrib/confluencebuilder/publisher.py @@ -550,13 +550,9 @@ def get_page(self, page_name, expand='version', status='current'): 'title': page_name, }) else: - rsp = self.rest.get(f'{self.APIV1}content', { - 'type': 'page', - 'spaceKey': self.space_key, - 'title': page_name, - 'status': status, - 'expand': expand, - }) + # Workaround for https://jira.atlassian.com/browse/CONFSERVER-57639: + # Hitting the base Content API endpoint can cause performance problem for large instances + return self.get_page_by_page_name(page_name=page_name, expand=expand, status=status) if rsp['results']: page = rsp['results'][0] @@ -639,6 +635,42 @@ def get_page_by_id(self, page_id, expand='version'): return page_id, page + def get_page_by_page_name(self, page_name, expand='version', status='current', page_size=3, cursor=None): + """ + Workaround for https://jira.atlassian.com/browse/CONFSERVER-57639: + Hitting the base Content API endpoint can cause performance problem for large instances + + For some companies the GET /content endpoint is blocked by IT, so we need to use /content/scan. + But scan does not support filtering by title, so we have to iterate through all the pages (with paging) + and find the page brute force. + """ + params = { + 'type': 'page', + 'spaceKey': self.space_key, + 'status': status, + 'expand': expand, + 'limit': page_size, + } + if cursor: + params["cursor"] = cursor + + if self.api_mode == 'v2': + raise NotImplementedError("get_page_by_page_name is not supported in v2 API") + rsp = self.rest.get(f'{self.APIV1}content/scan', params) + + if rsp['size'] != 0: + for page in rsp["results"]: + page_id = page['id'] + self._name_cache[page_id] = page["title"] + if page["title"] == page_name: + return page_id, page + + if "nextCursor" in rsp: + return self.get_page_by_page_name( + page_name=page_name, expand=expand, status=status, page_size=page_size, cursor=rsp["nextCursor"] + ) + return None, None + def get_page_case_insensitive(self, page_name): """ get page information with the provided page name (case-insensitive) diff --git a/sphinxcontrib/confluencebuilder/storage/translator.py b/sphinxcontrib/confluencebuilder/storage/translator.py index ef00cdb9..e48e74a3 100644 --- a/sphinxcontrib/confluencebuilder/storage/translator.py +++ b/sphinxcontrib/confluencebuilder/storage/translator.py @@ -1279,6 +1279,13 @@ def _visit_reference_intern_id(self, node): # identifier value instead target = self.state.target(anchorname) if target: + # Johannes Loibl: If multiple anchors are generated for the same reference (e.g. when an explicit reference + # is placed directly before a heading, Sphinx will generate two anchors and increase the suffix counter, + # e.g. HEADING, HEADING.1, HEADING.2, ... . This leads to a wrong naming of the final anchor link, + # since the heading can only be accessed via the root name (HEADING in this case). + # So we have to strip off the number suffix + if "." in target: + target = target.split(".")[0] anchor_value = target anchor_value = self.encode(anchor_value) else: From acf6ce97a3dad8349ac77ecad0991e5cecef952d Mon Sep 17 00:00:00 2001 From: Johannes Loibl Date: Tue, 4 Jun 2024 10:29:08 +0200 Subject: [PATCH 2/3] Fix style --- sphinxcontrib/confluencebuilder/publisher.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sphinxcontrib/confluencebuilder/publisher.py b/sphinxcontrib/confluencebuilder/publisher.py index 0e09c377..675e8cea 100644 --- a/sphinxcontrib/confluencebuilder/publisher.py +++ b/sphinxcontrib/confluencebuilder/publisher.py @@ -655,7 +655,8 @@ def get_page_by_page_name(self, page_name, expand='version', status='current', p params["cursor"] = cursor if self.api_mode == 'v2': - raise NotImplementedError("get_page_by_page_name is not supported in v2 API") + msg = "get_page_by_page_name is not supported in v2 API" + raise NotImplementedError(msg) rsp = self.rest.get(f'{self.APIV1}content/scan', params) if rsp['size'] != 0: @@ -667,7 +668,7 @@ def get_page_by_page_name(self, page_name, expand='version', status='current', p if "nextCursor" in rsp: return self.get_page_by_page_name( - page_name=page_name, expand=expand, status=status, page_size=page_size, cursor=rsp["nextCursor"] + page_name=page_name, expand=expand, status=status, page_size=page_size, cursor=rsp["nextCursor"], ) return None, None From c969b386b47a67fa0a8b1e5425cf4d64bc8facbb Mon Sep 17 00:00:00 2001 From: Johannes Loibl Date: Thu, 13 Jun 2024 12:43:32 +0200 Subject: [PATCH 3/3] Replace /content/scan by /content/search --- sphinxcontrib/confluencebuilder/publisher.py | 44 +++----------------- 1 file changed, 6 insertions(+), 38 deletions(-) diff --git a/sphinxcontrib/confluencebuilder/publisher.py b/sphinxcontrib/confluencebuilder/publisher.py index 675e8cea..38f31cf8 100644 --- a/sphinxcontrib/confluencebuilder/publisher.py +++ b/sphinxcontrib/confluencebuilder/publisher.py @@ -552,7 +552,12 @@ def get_page(self, page_name, expand='version', status='current'): else: # Workaround for https://jira.atlassian.com/browse/CONFSERVER-57639: # Hitting the base Content API endpoint can cause performance problem for large instances - return self.get_page_by_page_name(page_name=page_name, expand=expand, status=status) + rsp = self.rest.get(f'{self.APIV1}content/search', { + "cql": f"title='{page_name}'", + "cqlcontext": json.dumps({"contentStatuses": [status], "spaceKey": self.space_key}), + "expand": ["version"], + "limit": 1 + }) if rsp['results']: page = rsp['results'][0] @@ -635,43 +640,6 @@ def get_page_by_id(self, page_id, expand='version'): return page_id, page - def get_page_by_page_name(self, page_name, expand='version', status='current', page_size=3, cursor=None): - """ - Workaround for https://jira.atlassian.com/browse/CONFSERVER-57639: - Hitting the base Content API endpoint can cause performance problem for large instances - - For some companies the GET /content endpoint is blocked by IT, so we need to use /content/scan. - But scan does not support filtering by title, so we have to iterate through all the pages (with paging) - and find the page brute force. - """ - params = { - 'type': 'page', - 'spaceKey': self.space_key, - 'status': status, - 'expand': expand, - 'limit': page_size, - } - if cursor: - params["cursor"] = cursor - - if self.api_mode == 'v2': - msg = "get_page_by_page_name is not supported in v2 API" - raise NotImplementedError(msg) - rsp = self.rest.get(f'{self.APIV1}content/scan', params) - - if rsp['size'] != 0: - for page in rsp["results"]: - page_id = page['id'] - self._name_cache[page_id] = page["title"] - if page["title"] == page_name: - return page_id, page - - if "nextCursor" in rsp: - return self.get_page_by_page_name( - page_name=page_name, expand=expand, status=status, page_size=page_size, cursor=rsp["nextCursor"], - ) - return None, None - def get_page_case_insensitive(self, page_name): """ get page information with the provided page name (case-insensitive)