Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sitemap support #18407

Merged
merged 14 commits into from
Jun 25, 2022
66 changes: 66 additions & 0 deletions modules/sitemap/sitemap.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.

package sitemap

import (
"bytes"
"encoding/xml"
"fmt"
"io"
"time"
)

// Url represents a single sitemap entry
type URL struct {
URL string `xml:"loc"`
LastMod *time.Time `xml:"lastmod,omitempty"`
}

// SitemapUrl represents a sitemap
type Sitemap struct {
XMLName xml.Name
Namespace string `xml:"xmlns,attr"`

URLs []URL `xml:"url"`
}

// NewSitemap creates a sitemap
func NewSitemap() *Sitemap {
return &Sitemap{
XMLName: xml.Name{Local: "urlset"},
Namespace: "http://www.sitemaps.org/schemas/sitemap/0.9",
}
}

// NewSitemap creates a sitemap index.
func NewSitemapIndex() *Sitemap {
return &Sitemap{
XMLName: xml.Name{Local: "sitemapindex"},
Namespace: "http://www.sitemaps.org/schemas/sitemap/0.9",
}
}

// Add adds a URL to the sitemap
func (s *Sitemap) Add(u URL) {
s.URLs = append(s.URLs, u)
}

// Write writes the sitemap to a response
func (s *Sitemap) WriteTo(w io.Writer) (int64, error) {
if len(s.URLs) > 50000 {
return 0, fmt.Errorf("The sitemap contains too many URLs: %d", len(s.URLs))
}
buf := bytes.NewBufferString(xml.Header)
if err := xml.NewEncoder(buf).Encode(s); err != nil {
return 0, err
}
if err := buf.WriteByte('\n'); err != nil {
return 0, err
}
if buf.Len() > 50*1024*1024 {
danog marked this conversation as resolved.
Show resolved Hide resolved
return 0, fmt.Errorf("The sitemap is too big: %d", buf.Len())
}
return buf.WriteTo(w)
}
77 changes: 77 additions & 0 deletions modules/sitemap/sitemap_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.

package sitemap

import (
"bytes"
"encoding/xml"
"fmt"
"strings"
"testing"
"time"

"github.com/stretchr/testify/assert"
)

func TestOk(t *testing.T) {
testReal := func(s *Sitemap, name string, urls []URL, expected string) {
for _, url := range urls {
s.Add(url)
}
buf := &bytes.Buffer{}
_, err := s.WriteTo(buf)
assert.NoError(t, nil, err)
assert.Equal(t, xml.Header+"<"+name+" xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">"+expected+"</"+name+">\n", buf.String())
}
test := func(urls []URL, expected string) {
testReal(NewSitemap(), "urlset", urls, expected)
testReal(NewSitemapIndex(), "sitemapindex", urls, expected)
}

ts := time.Unix(1651322008, 0)

test(
[]URL{},
"",
)
test(
[]URL{
{URL: "https://gitea.io/test1", LastMod: &ts},
},
"<url><loc>https://gitea.io/test1</loc><lastmod>2022-04-30T14:33:28+02:00</lastmod></url>",
)
test(
[]URL{
{URL: "https://gitea.io/test2", LastMod: nil},
},
"<url><loc>https://gitea.io/test2</loc></url>",
)
test(
[]URL{
{URL: "https://gitea.io/test1", LastMod: &ts},
{URL: "https://gitea.io/test2", LastMod: nil},
},
"<url><loc>https://gitea.io/test1</loc><lastmod>2022-04-30T14:33:28+02:00</lastmod></url>"+
"<url><loc>https://gitea.io/test2</loc></url>",
)
}

func TestTooManyURLs(t *testing.T) {
s := NewSitemap()
for i := 0; i < 50001; i++ {
s.Add(URL{URL: fmt.Sprintf("https://gitea.io/test%d", i)})
}
buf := &bytes.Buffer{}
_, err := s.WriteTo(buf)
assert.EqualError(t, err, "The sitemap contains too many URLs: 50001")
}

func TestSitemapTooBig(t *testing.T) {
s := NewSitemap()
s.Add(URL{URL: strings.Repeat("b", 50*1024*1024)})
buf := &bytes.Buffer{}
_, err := s.WriteTo(buf)
assert.EqualError(t, err, "The sitemap is too big: 52428931")
}
22 changes: 21 additions & 1 deletion routers/web/explore/repo.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ import (
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/sitemap"
)

const (
Expand All @@ -30,7 +32,13 @@ type RepoSearchOptions struct {

// RenderRepoSearch render repositories search page
func RenderRepoSearch(ctx *context.Context, opts *RepoSearchOptions) {
page := ctx.FormInt("page")
// Sitemap index for sitemap paths
page := int(ctx.ParamsInt64("idx"))
isSitemap := ctx.Params("idx") != ""
if page <= 1 {
page = ctx.FormInt("page")
}

if page <= 0 {
page = 1
}
Expand Down Expand Up @@ -100,6 +108,18 @@ func RenderRepoSearch(ctx *context.Context, opts *RepoSearchOptions) {
ctx.ServerError("SearchRepository", err)
return
}
if isSitemap {
m := sitemap.NewSitemap()
for _, item := range repos {
m.Add(sitemap.URL{URL: item.HTMLURL(), LastMod: item.UpdatedUnix.AsTimePtr()})
}
ctx.Resp.Header().Set("Content-Type", "text/xml")
if _, err := m.WriteTo(ctx.Resp); err != nil {
log.Error("Failed writing sitemap: %v", err)
}
return
}

ctx.Data["Keyword"] = keyword
ctx.Data["Total"] = count
ctx.Data["Repos"] = repos
Expand Down
21 changes: 20 additions & 1 deletion routers/web/explore/user.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ import (
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/sitemap"
"code.gitea.io/gitea/modules/structs"
"code.gitea.io/gitea/modules/util"
)
Expand All @@ -33,7 +35,12 @@ func isKeywordValid(keyword string) bool {

// RenderUserSearch render user search page
func RenderUserSearch(ctx *context.Context, opts *user_model.SearchUserOptions, tplName base.TplName) {
opts.Page = ctx.FormInt("page")
// Sitemap index for sitemap paths
opts.Page = int(ctx.ParamsInt64("idx"))
isSitemap := ctx.Params("idx") != ""
if opts.Page <= 1 {
opts.Page = ctx.FormInt("page")
}
if opts.Page <= 1 {
opts.Page = 1
}
Expand Down Expand Up @@ -73,6 +80,18 @@ func RenderUserSearch(ctx *context.Context, opts *user_model.SearchUserOptions,
return
}
}
if isSitemap {
m := sitemap.NewSitemap()
for _, item := range users {
m.Add(sitemap.URL{URL: item.HTMLURL(), LastMod: item.UpdatedUnix.AsTimePtr()})
}
ctx.Resp.Header().Set("Content-Type", "text/xml")
if _, err := m.WriteTo(ctx.Resp); err != nil {
log.Error("Failed writing sitemap: %v", err)
}
return
}

ctx.Data["Keyword"] = opts.Keyword
ctx.Data["Total"] = count
ctx.Data["Users"] = users
Expand Down
54 changes: 54 additions & 0 deletions routers/web/home.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,18 @@ package web

import (
"net/http"
"strconv"

"code.gitea.io/gitea/models"
"code.gitea.io/gitea/models/db"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/sitemap"
"code.gitea.io/gitea/modules/structs"
"code.gitea.io/gitea/modules/util"
"code.gitea.io/gitea/modules/web/middleware"
"code.gitea.io/gitea/routers/web/auth"
"code.gitea.io/gitea/routers/web/user"
Expand Down Expand Up @@ -59,6 +66,53 @@ func Home(ctx *context.Context) {
ctx.HTML(http.StatusOK, tplHome)
}

// HomeSitemap renders the main sitemap
func HomeSitemap(ctx *context.Context) {
m := sitemap.NewSitemapIndex()
if !setting.Service.Explore.DisableUsersPage {
_, cnt, err := user_model.SearchUsers(&user_model.SearchUserOptions{
Actor: nil,
danog marked this conversation as resolved.
Show resolved Hide resolved
Type: user_model.UserTypeIndividual,
ListOptions: db.ListOptions{PageSize: 1},
IsActive: util.OptionalBoolTrue,
Visible: []structs.VisibleType{structs.VisibleTypePublic},
})
if err != nil {
ctx.ServerError("SearchUsers", err)
return
}
count := int(cnt)
idx := 1
for i := 0; i < count; i += setting.UI.ExplorePagingNum {
m.Add(sitemap.URL{URL: setting.AppURL + "explore/users/sitemap-" + strconv.Itoa(idx) + ".xml"})
idx++
}
}

_, cnt, err := models.SearchRepository(&models.SearchRepoOptions{
danog marked this conversation as resolved.
Show resolved Hide resolved
ListOptions: db.ListOptions{
PageSize: 1,
},
Actor: ctx.Doer,
AllPublic: true,
})
if err != nil {
ctx.ServerError("SearchRepository", err)
return
}
count := int(cnt)
idx := 1
for i := 0; i < count; i += setting.UI.ExplorePagingNum {
danog marked this conversation as resolved.
Show resolved Hide resolved
m.Add(sitemap.URL{URL: setting.AppURL + "explore/repos/sitemap-" + strconv.Itoa(idx) + ".xml"})
idx++
}

ctx.Resp.Header().Set("Content-Type", "text/xml")
if _, err := m.WriteTo(ctx.Resp); err != nil {
log.Error("Failed writing sitemap: %v", err)
}
}

// NotFound render 404 page
func NotFound(ctx *context.Context) {
ctx.Data["Title"] = "Page Not Found"
Expand Down
3 changes: 3 additions & 0 deletions routers/web/web.go
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,7 @@ func RegisterRoutes(m *web.Route) {
// Routers.
// for health check
m.Get("/", Home)
m.Get("/sitemap.xml", ignExploreSignIn, HomeSitemap)
m.Group("/.well-known", func() {
m.Get("/openid-configuration", auth.OIDCWellKnown)
m.Group("", func() {
Expand All @@ -310,7 +311,9 @@ func RegisterRoutes(m *web.Route) {
ctx.Redirect(setting.AppSubURL + "/explore/repos")
})
m.Get("/repos", explore.Repos)
m.Get("/repos/sitemap-{idx}.xml", explore.Repos)
m.Get("/users", explore.Users)
m.Get("/users/sitemap-{idx}.xml", explore.Users)
m.Get("/organizations", explore.Organizations)
m.Get("/code", explore.Code)
m.Get("/topics/search", explore.TopicSearch)
Expand Down