From 7a0f5b8aac840109d8500579fae5e49d12a46549 Mon Sep 17 00:00:00 2001 From: Youssef Victor Date: Fri, 18 Mar 2022 17:25:34 +0200 Subject: [PATCH] Handle anchors correctly --- .../TocSectionNumberBuilder.cs | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/tools/StandardAnchorTags/TocSectionNumberBuilder.cs b/tools/StandardAnchorTags/TocSectionNumberBuilder.cs index 1767480e3..777311443 100644 --- a/tools/StandardAnchorTags/TocSectionNumberBuilder.cs +++ b/tools/StandardAnchorTags/TocSectionNumberBuilder.cs @@ -151,12 +151,7 @@ private SectionLink BuildSectionLink(SectionHeader header, string filename) string newSectionNumber = isAnnexes ? string.Join('.', headings.Take(header.level).Select((n, index) => (index == 0) ? ((char)(n + 64)).ToString() : n.ToString())) : string.Join('.', headings.Take(header.level).Select(n => n.ToString())); - string anchor = $"{newSectionNumber} {header.title}" - .Replace(' ', '-').Replace(".", "").Replace(",", "").Replace("`", "") - .Replace("/", "").Replace(":", "").Replace("?", "").Replace("&", "") - .Replace("|", "").Replace("!", "").Replace("\\<", "").Replace("\\>", "").Replace("\\#", "") - .Replace("…", "") - .ToLower(); + string anchor = UrilizeAsGfm($"{newSectionNumber} {header.title}"); // Top-level annex references (e.g. just to "Annex D") need a leading "annex-" as that's // in the title of the page. @@ -167,6 +162,21 @@ private SectionLink BuildSectionLink(SectionHeader header, string filename) return new SectionLink(header.sectionHeaderText, newSectionNumber, $"{filename}#{anchor}"); } + // Copy from https://github.com/xoofx/markdig/blob/0cfe6d7da48ea6621072eb50ade32141ea92bc35/src/Markdig/Helpers/LinkHelper.cs#L100-L113 + private static string UrilizeAsGfm(string headingText) + { + // Following https://github.com/jch/html-pipeline/blob/master/lib/html/pipeline/toc_filter.rb + var headingBuffer = new StringBuilder(); + for (int i = 0; i < headingText.Length; i++) + { + var c = headingText[i]; + if (char.IsLetterOrDigit(c) || c == ' ' || c == '-' || c == '_') + { + headingBuffer.Append(c == ' ' ? '-' : char.ToLowerInvariant(c)); + } + } + return headingBuffer.ToString(); + } // A line in the standard is either a paragraph of text or // a header. this method determines which and returns one