Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor markup rendering to accept general "protocol:" prefix #29276

Merged
merged 5 commits into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 17 additions & 17 deletions modules/markup/html.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,38 +53,38 @@ var (
// shortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax
shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`)

// anySHA1Pattern splits url containing SHA into parts
// anyHashPattern splits url containing SHA into parts
anyHashPattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40,64})(/[-+~_%.a-zA-Z0-9/]+)?(#[-+~_%.a-zA-Z0-9]+)?`)

// comparePattern matches "http://domain/org/repo/compare/COMMIT1...COMMIT2#hash"
comparePattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{7,64})(\.\.\.?)([0-9a-f]{7,64})?(#[-+~_%.a-zA-Z0-9]+)?`)

validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`)
// fullURLPattern matches full URL like "mailto:...", "https://..." and "ssh+git://..."
fullURLPattern = regexp.MustCompile(`^[a-z][-+\w]+:`)

// While this email regex is definitely not perfect and I'm sure you can come up
// with edge cases, it is still accepted by the CommonMark specification, as
// well as the HTML5 spec:
// emailRegex is definitely not perfect with edge cases,
// it is still accepted by the CommonMark specification, as well as the HTML5 spec:
// http://spec.commonmark.org/0.28/#email-address
// https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail)
emailRegex = regexp.MustCompile("(?:\\s|^|\\(|\\[)([a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9]{2,}(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)(?:\\s|$|\\)|\\]|;|,|\\?|!|\\.(\\s|$))")

// blackfriday extensions create IDs like fn:user-content-footnote
// blackfridayExtRegex is for blackfriday extensions create IDs like fn:user-content-footnote
blackfridayExtRegex = regexp.MustCompile(`[^:]*:user-content-`)

// EmojiShortCodeRegex find emoji by alias like :smile:
EmojiShortCodeRegex = regexp.MustCompile(`:[-+\w]+:`)
// emojiShortCodeRegex find emoji by alias like :smile:
emojiShortCodeRegex = regexp.MustCompile(`:[-+\w]+:`)
)

// CSS class for action keywords (e.g. "closes: #1")
const keywordClass = "issue-keyword"

// IsLink reports whether link fits valid format.
func IsLink(link []byte) bool {
return validLinksPattern.Match(link)
// IsFullURLBytes reports whether link fits valid format.
func IsFullURLBytes(link []byte) bool {
return fullURLPattern.Match(link)
}

func IsLinkStr(link string) bool {
return validLinksPattern.MatchString(link)
func IsFullURLString(link string) bool {
return fullURLPattern.MatchString(link)
}

// regexp for full links to issues/pulls
Expand Down Expand Up @@ -399,7 +399,7 @@ func visitNode(ctx *RenderContext, procs []processor, node *html.Node) {
if attr.Key != "src" {
continue
}
if len(attr.Val) > 0 && !IsLinkStr(attr.Val) && !strings.HasPrefix(attr.Val, "data:image/") {
if len(attr.Val) > 0 && !IsFullURLString(attr.Val) && !strings.HasPrefix(attr.Val, "data:image/") {
attr.Val = util.URLJoin(ctx.Links.ResolveMediaLink(ctx.IsWiki), attr.Val)
}
attr.Val = camoHandleLink(attr.Val)
Expand Down Expand Up @@ -650,7 +650,7 @@ func shortLinkProcessor(ctx *RenderContext, node *html.Node) {
if equalPos := strings.IndexByte(v, '='); equalPos == -1 {
// There is no equal in this argument; this is a mandatory arg
if props["name"] == "" {
if IsLinkStr(v) {
if IsFullURLString(v) {
// If we clearly see it is a link, we save it so

// But first we need to ensure, that if both mandatory args provided
Expand Down Expand Up @@ -725,7 +725,7 @@ func shortLinkProcessor(ctx *RenderContext, node *html.Node) {
DataAtom: atom.A,
}
childNode.Parent = linkNode
absoluteLink := IsLinkStr(link)
absoluteLink := IsFullURLString(link)
if !absoluteLink {
if image {
link = strings.ReplaceAll(link, " ", "+")
Expand Down Expand Up @@ -1059,7 +1059,7 @@ func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) {
start := 0
next := node.NextSibling
for node != nil && node != next && start < len(node.Data) {
m := EmojiShortCodeRegex.FindStringSubmatchIndex(node.Data[start:])
m := emojiShortCodeRegex.FindStringSubmatchIndex(node.Data[start:])
if m == nil {
return
}
Expand Down
15 changes: 15 additions & 0 deletions modules/markup/html_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,15 @@ func TestRender_links(t *testing.T) {
test(
"magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&dn=download",
`<p><a href="magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&amp;dn=download" rel="nofollow">magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&amp;dn=download</a></p>`)
test(
`[link](https://example.com)`,
`<p><a href="https://example.com" rel="nofollow">link</a></p>`)
test(
`[link](mailto:test@example.com)`,
`<p><a href="mailto:test@example.com" rel="nofollow">link</a></p>`)
test(
`[link](javascript:xss)`,
`<p>link</p>`)

// Test that should *not* be turned into URL
test(
Expand Down Expand Up @@ -673,3 +682,9 @@ func TestIssue18471(t *testing.T) {
assert.NoError(t, err)
assert.Equal(t, "<a href=\"http://domain/org/repo/compare/783b039...da951ce\" class=\"compare\"><code class=\"nohighlight\">783b039...da951ce</code></a>", res.String())
}

func TestIsFullURL(t *testing.T) {
assert.True(t, markup.IsFullURLString("https://example.com"))
assert.True(t, markup.IsFullURLString("mailto:test@example.com"))
assert.False(t, markup.IsFullURLString("/foo:bar"))
}
18 changes: 5 additions & 13 deletions modules/markup/markdown/goldmark.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ import (
"github.com/yuin/goldmark/util"
)

var byteMailto = []byte("mailto:")

// ASTTransformer is a default transformer of the goldmark tree.
type ASTTransformer struct{}

Expand Down Expand Up @@ -84,7 +82,7 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa
// 2. If they're not wrapped with a link they need a link wrapper

// Check if the destination is a real link
if len(v.Destination) > 0 && !markup.IsLink(v.Destination) {
if len(v.Destination) > 0 && !markup.IsFullURLBytes(v.Destination) {
v.Destination = []byte(giteautil.URLJoin(
ctx.Links.ResolveMediaLink(ctx.IsWiki),
strings.TrimLeft(string(v.Destination), "/"),
Expand Down Expand Up @@ -130,23 +128,17 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa
case *ast.Link:
// Links need their href to munged to be a real value
link := v.Destination
if len(link) > 0 && !markup.IsLink(link) &&
link[0] != '#' && !bytes.HasPrefix(link, byteMailto) {
// special case: this is not a link, a hash link or a mailto:, so it's a
// relative URL

var base string
isAnchorFragment := len(link) > 0 && link[0] == '#'
if !isAnchorFragment && !markup.IsFullURLBytes(link) {
base := ctx.Links.Base
if ctx.IsWiki {
base = ctx.Links.WikiLink()
} else if ctx.Links.HasBranchInfo() {
base = ctx.Links.SrcLink()
} else {
base = ctx.Links.Base
}

link = []byte(giteautil.URLJoin(base, string(link)))
}
if len(link) > 0 && link[0] == '#' {
if isAnchorFragment {
link = []byte("#user-content-" + string(link)[1:])
}
v.Destination = link
Expand Down
3 changes: 1 addition & 2 deletions modules/markup/orgmode/orgmode.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,7 @@ type Writer struct {
func (r *Writer) resolveLink(kind, link string) string {
link = strings.TrimPrefix(link, "file:")
if !strings.HasPrefix(link, "#") && // not a URL fragment
!markup.IsLinkStr(link) && // not an absolute URL
!strings.HasPrefix(link, "mailto:") {
!markup.IsFullURLString(link) {
if kind == "regular" {
// orgmode reports the link kind as "regular" for "[[ImageLink.svg][The Image Desc]]"
// so we need to try to guess the link kind again here
Expand Down
Loading