diff --git a/cmd/capcli/cli.go b/cmd/capcli/cli.go index 2eaf35ad4ab..872f5bed018 100644 --- a/cmd/capcli/cli.go +++ b/cmd/capcli/cli.go @@ -578,7 +578,7 @@ func (d *DownloadSnapshots) Run(ctx *Context) error { if err != nil { return err } - downlo, err := downloader.New(ctx, downloaderCfg, dirs, log.Root(), log.LvlInfo, true) + downlo, err := downloader.New(ctx, downloaderCfg, log.Root(), log.LvlInfo, true) if err != nil { return err } diff --git a/cmd/downloader/main.go b/cmd/downloader/main.go index 93b9415e4e2..a00e7b8b1dd 100644 --- a/cmd/downloader/main.go +++ b/cmd/downloader/main.go @@ -4,9 +4,12 @@ import ( "context" "errors" "fmt" + "io/fs" "net" + "net/url" "os" "path/filepath" + "sort" "strings" "time" @@ -109,9 +112,18 @@ func init() { rootCmd.AddCommand(torrentCat) rootCmd.AddCommand(torrentMagnet) + withDataDir(torrentClean) + rootCmd.AddCommand(torrentClean) + withDataDir(manifestCmd) + withChainFlag(manifestCmd) rootCmd.AddCommand(manifestCmd) + manifestVerifyCmd.Flags().StringVar(&webseeds, utils.WebSeedsFlag.Name, utils.WebSeedsFlag.Value, utils.WebSeedsFlag.Usage) + manifestVerifyCmd.PersistentFlags().BoolVar(&verifyFailfast, "verify.failfast", false, "Stop on first found error. Report it and exit") + withChainFlag(manifestVerifyCmd) + rootCmd.AddCommand(manifestVerifyCmd) + withDataDir(printTorrentHashes) withChainFlag(printTorrentHashes) printTorrentHashes.PersistentFlags().BoolVar(&forceRebuild, "rebuild", false, "Force re-create .torrent files") @@ -216,7 +228,7 @@ func Downloader(ctx context.Context, logger log.Logger) error { cfg.AddTorrentsFromDisk = true // always true unless using uploader - which wants control of torrent files - d, err := downloader.New(ctx, cfg, dirs, logger, log.LvlInfo, seedbox) + d, err := downloader.New(ctx, cfg, logger, log.LvlInfo, seedbox) if err != nil { return err } @@ -276,7 +288,7 @@ var printTorrentHashes = &cobra.Command{ var manifestCmd = &cobra.Command{ Use: "manifest", - Example: "go run ./cmd/downloader torrent_hashes --datadir ", + Example: "go run ./cmd/downloader manifest --datadir ", RunE: func(cmd *cobra.Command, args []string) error { logger := debug.SetupCobra(cmd, "downloader") if err := manifest(cmd.Context(), logger); err != nil { @@ -286,6 +298,18 @@ var manifestCmd = &cobra.Command{ }, } +var manifestVerifyCmd = &cobra.Command{ + Use: "manifest-verify", + Example: "go run ./cmd/downloader manifest-verify --chain [--webseeds 'a','b','c']", + RunE: func(cmd *cobra.Command, args []string) error { + logger := debug.SetupCobra(cmd, "downloader") + if err := manifestVerify(cmd.Context(), logger); err != nil { + log.Error(err.Error()) + } + return nil + }, +} + var torrentCat = &cobra.Command{ Use: "torrent_cat", Example: "go run ./cmd/downloader torrent_cat ", @@ -308,6 +332,44 @@ var torrentCat = &cobra.Command{ return nil }, } +var torrentClean = &cobra.Command{ + Use: "torrent_clean", + Short: "Remove all .torrent files from datadir directory", + Example: "go run ./cmd/downloader torrent_clean --datadir=", + RunE: func(cmd *cobra.Command, args []string) error { + dirs := datadir.New(datadirCli) + + logger.Info("[snapshots.webseed] processing local file etags") + removedTorrents := 0 + walker := func(path string, de fs.DirEntry, err error) error { + if err != nil || de.IsDir() { + if err != nil { + logger.Warn("[snapshots.torrent] walk and cleanup", "err", err, "path", path) + } + return nil //nolint + } + + if !strings.HasSuffix(de.Name(), ".torrent") || strings.HasPrefix(de.Name(), ".") { + return nil + } + err = os.Remove(filepath.Join(dirs.Snap, path)) + if err != nil { + logger.Warn("[snapshots.torrent] remove", "err", err, "path", path) + return err + } + removedTorrents++ + return nil + } + + sfs := os.DirFS(dirs.Snap) + if err := fs.WalkDir(sfs, ".", walker); err != nil { + return err + } + logger.Info("[snapshots.torrent] cleanup finished", "count", removedTorrents) + return nil + }, +} + var torrentMagnet = &cobra.Command{ Use: "torrent_magnet", Example: "go run ./cmd/downloader torrent_magnet ", @@ -325,25 +387,76 @@ var torrentMagnet = &cobra.Command{ }, } +func manifestVerify(ctx context.Context, logger log.Logger) error { + webseedsList := common.CliString2Array(webseeds) + if known, ok := snapcfg.KnownWebseeds[chain]; ok { + webseedsList = append(webseedsList, known...) + } + + webseedUrlsOrFiles := webseedsList + webseedHttpProviders := make([]*url.URL, 0, len(webseedUrlsOrFiles)) + webseedFileProviders := make([]string, 0, len(webseedUrlsOrFiles)) + for _, webseed := range webseedUrlsOrFiles { + if !strings.HasPrefix(webseed, "v") { // has marker v1/v2/... + uri, err := url.ParseRequestURI(webseed) + if err != nil { + if strings.HasSuffix(webseed, ".toml") && dir.FileExist(webseed) { + webseedFileProviders = append(webseedFileProviders, webseed) + } + continue + } + webseedHttpProviders = append(webseedHttpProviders, uri) + continue + } + + if strings.HasPrefix(webseed, "v1:") { + withoutVerisonPrefix := webseed[3:] + if !strings.HasPrefix(withoutVerisonPrefix, "https:") { + continue + } + uri, err := url.ParseRequestURI(withoutVerisonPrefix) + if err != nil { + log.Warn("[webseed] can't parse url", "err", err, "url", withoutVerisonPrefix) + continue + } + webseedHttpProviders = append(webseedHttpProviders, uri) + } else { + continue + } + } + + _ = webseedFileProviders // todo add support of file providers + logger.Warn("file providers are not supported yet", "fileProviders", webseedFileProviders) + + wseed := downloader.NewWebSeeds(webseedHttpProviders, log.LvlDebug, logger) + return wseed.VerifyManifestedBuckets(ctx, verifyFailfast) +} + func manifest(ctx context.Context, logger log.Logger) error { dirs := datadir.New(datadirCli) + + files, err := downloader.SeedableFiles(dirs, chain) + if err != nil { + return err + } + extList := []string{ ".torrent", - ".seg", ".idx", // e2 - ".kv", ".kvi", ".bt", ".kvei", // e3 domain - ".v", ".vi", //e3 hist - ".ef", ".efi", //e3 idx - ".txt", //salt.txt + //".seg", ".idx", // e2 + //".kv", ".kvi", ".bt", ".kvei", // e3 domain + //".v", ".vi", //e3 hist + //".ef", ".efi", //e3 idx + ".txt", //salt.txt, manifest.txt } l, _ := dir.ListFiles(dirs.Snap, extList...) for _, fPath := range l { _, fName := filepath.Split(fPath) - fmt.Printf("%s\n", fName) + files = append(files, fName) } l, _ = dir.ListFiles(dirs.SnapDomain, extList...) for _, fPath := range l { _, fName := filepath.Split(fPath) - fmt.Printf("domain/%s\n", fName) + files = append(files, "domain/"+fName) } l, _ = dir.ListFiles(dirs.SnapHistory, extList...) for _, fPath := range l { @@ -351,7 +464,7 @@ func manifest(ctx context.Context, logger log.Logger) error { if strings.Contains(fName, "commitment") { continue } - fmt.Printf("history/%s\n", fName) + files = append(files, "history/"+fName) } l, _ = dir.ListFiles(dirs.SnapIdx, extList...) for _, fPath := range l { @@ -359,15 +472,12 @@ func manifest(ctx context.Context, logger log.Logger) error { if strings.Contains(fName, "commitment") { continue } - fmt.Printf("idx/%s\n", fName) + files = append(files, "idx/"+fName) } - l, _ = dir.ListFiles(dirs.SnapAccessors, extList...) - for _, fPath := range l { - _, fName := filepath.Split(fPath) - if strings.Contains(fName, "commitment") { - continue - } - fmt.Printf("accessors/%s\n", fName) + + sort.Strings(files) + for _, f := range files { + fmt.Printf("%s\n", f) } return nil } diff --git a/cmd/downloader/readme.md b/cmd/downloader/readme.md index 4fadac96e92..8146934d6e9 100644 --- a/cmd/downloader/readme.md +++ b/cmd/downloader/readme.md @@ -153,6 +153,14 @@ downloader --datadir= --chain=mainnet --webseed= downloader torrent_cat /path/to.torrent downloader torrent_magnet /path/to.torrent + +downloader torrent_clean --datadir # remote all .torrent files in datadir +``` + +## Remote manifest verify +To check that remote webseeds has available manifest and all manifested files are available, has correct format of ETag, does not have dangling torrents etc. +``` +downloader manifest-verify --chain [--webseeds 'a','b','c'] ``` ## Faster rsync diff --git a/erigon-lib/downloader/downloader.go b/erigon-lib/downloader/downloader.go index 253f716bc86..7d270c8bfd8 100644 --- a/erigon-lib/downloader/downloader.go +++ b/erigon-lib/downloader/downloader.go @@ -96,6 +96,7 @@ type Downloader struct { type webDownloadInfo struct { url *url.URL length int64 + md5 string torrent *torrent.Torrent } @@ -118,7 +119,7 @@ type AggStats struct { LocalFileHashTime time.Duration } -func New(ctx context.Context, cfg *downloadercfg.Cfg, dirs datadir.Dirs, logger log.Logger, verbosity log.Lvl, discover bool) (*Downloader, error) { +func New(ctx context.Context, cfg *downloadercfg.Cfg, logger log.Logger, verbosity log.Lvl, discover bool) (*Downloader, error) { db, c, m, torrentClient, err := openClient(ctx, cfg.Dirs.Downloader, cfg.Dirs.Snap, cfg.ClientConfig) if err != nil { return nil, fmt.Errorf("openClient: %w", err) @@ -151,7 +152,7 @@ func New(ctx context.Context, cfg *downloadercfg.Cfg, dirs datadir.Dirs, logger torrentClient: torrentClient, lock: mutex, stats: stats, - webseeds: &WebSeeds{logger: logger, verbosity: verbosity, downloadTorrentFile: cfg.DownloadTorrentFilesFromWebseed, torrentsWhitelist: lock.Downloads}, + webseeds: NewWebSeeds(cfg.WebSeedUrls, verbosity, logger), logger: logger, verbosity: verbosity, torrentFiles: &TorrentFiles{dir: cfg.Dirs.Snap}, @@ -161,13 +162,13 @@ func New(ctx context.Context, cfg *downloadercfg.Cfg, dirs datadir.Dirs, logger downloading: map[string]struct{}{}, webseedsDiscover: discover, } + d.webseeds.SetTorrent(d.torrentFiles, lock.Downloads, cfg.DownloadTorrentFilesFromWebseed) if cfg.ClientConfig.DownloadRateLimiter != nil { downloadLimit := cfg.ClientConfig.DownloadRateLimiter.Limit() d.downloadLimit = &downloadLimit } - d.webseeds.torrentFiles = d.torrentFiles d.ctx, d.stopMainLoop = context.WithCancel(ctx) if cfg.AddTorrentsFromDisk { @@ -342,7 +343,7 @@ func initSnapshotLock(ctx context.Context, cfg *downloadercfg.Cfg, db kv.RoDB, s Chain: cfg.ChainName, } - files, err := seedableFiles(cfg.Dirs, cfg.ChainName) + files, err := SeedableFiles(cfg.Dirs, cfg.ChainName) if err != nil { return nil, err } @@ -656,7 +657,8 @@ func (d *Downloader) mainLoop(silent bool) error { d.wg.Add(1) go func() { defer d.wg.Done() - d.webseeds.Discover(d.ctx, d.cfg.WebSeedUrls, d.cfg.WebSeedFiles, d.cfg.Dirs.Snap) + // webseeds.Discover may create new .torrent files on disk + d.webseeds.Discover(d.ctx, d.cfg.WebSeedFiles, d.cfg.Dirs.Snap) // apply webseeds to existing torrents if err := d.addTorrentFilesFromDisk(true); err != nil && !errors.Is(err, context.Canceled) { d.logger.Warn("[snapshots] addTorrentFilesFromDisk", "err", err) @@ -1272,8 +1274,6 @@ func (d *Downloader) checkComplete(name string) (bool, int64, *time.Time) { } func (d *Downloader) getWebDownloadInfo(t *torrent.Torrent) (webDownloadInfo, []*seedHash, error) { - torrentHash := t.InfoHash() - d.lock.RLock() info, ok := d.webDownloadInfo[t.Name()] d.lock.RUnlock() @@ -1282,46 +1282,16 @@ func (d *Downloader) getWebDownloadInfo(t *torrent.Torrent) (webDownloadInfo, [] return info, nil, nil } - seedHashMismatches := make([]*seedHash, 0, len(d.cfg.WebSeedUrls)) - - for _, webseed := range d.cfg.WebSeedUrls { - downloadUrl := webseed.JoinPath(t.Name()) - - if headRequest, err := http.NewRequestWithContext(d.ctx, "HEAD", downloadUrl.String(), nil); err == nil { - headResponse, err := http.DefaultClient.Do(headRequest) - - if err != nil { - continue - } - - headResponse.Body.Close() - - if headResponse.StatusCode == http.StatusOK { - if meta, err := getWebpeerTorrentInfo(d.ctx, downloadUrl); err == nil { - if bytes.Equal(torrentHash.Bytes(), meta.HashInfoBytes().Bytes()) { - // TODO check the torrent's hash matches this hash - return webDownloadInfo{ - url: downloadUrl, - length: headResponse.ContentLength, - torrent: t, - }, seedHashMismatches, nil - } else { - hash := meta.HashInfoBytes() - seedHashMismatches = append(seedHashMismatches, &seedHash{url: webseed, hash: &hash}) - continue - } - } - } - } - - seedHashMismatches = append(seedHashMismatches, &seedHash{url: webseed}) + // todo this function does not exit on first matched webseed hash, could make unexpected results + infos, seedHashMismatches, err := d.webseeds.getWebDownloadInfo(d.ctx, t) + if err != nil || len(infos) == 0 { + return webDownloadInfo{}, seedHashMismatches, fmt.Errorf("can't find download info: %w", err) } - - return webDownloadInfo{}, seedHashMismatches, fmt.Errorf("can't find download info") + return infos[0], seedHashMismatches, nil } func getWebpeerTorrentInfo(ctx context.Context, downloadUrl *url.URL) (*metainfo.MetaInfo, error) { - torrentRequest, err := http.NewRequestWithContext(ctx, "GET", downloadUrl.String()+".torrent", nil) + torrentRequest, err := http.NewRequestWithContext(ctx, http.MethodGet, downloadUrl.String()+".torrent", nil) if err != nil { return nil, err @@ -2215,7 +2185,7 @@ func (d *Downloader) AddMagnetLink(ctx context.Context, infoHash metainfo.Hash, return nil } -func seedableFiles(dirs datadir.Dirs, chainName string) ([]string, error) { +func SeedableFiles(dirs datadir.Dirs, chainName string) ([]string, error) { files, err := seedableSegmentFiles(dirs.Snap, chainName) if err != nil { return nil, fmt.Errorf("seedableSegmentFiles: %w", err) diff --git a/erigon-lib/downloader/downloader_test.go b/erigon-lib/downloader/downloader_test.go index 598cad68c8b..dd0b8f0d062 100644 --- a/erigon-lib/downloader/downloader_test.go +++ b/erigon-lib/downloader/downloader_test.go @@ -18,7 +18,7 @@ func TestChangeInfoHashOfSameFile(t *testing.T) { dirs := datadir.New(t.TempDir()) cfg, err := downloadercfg2.New(dirs, "", lg.Info, 0, 0, 0, 0, 0, nil, nil, "testnet", false) require.NoError(err) - d, err := New(context.Background(), cfg, dirs, log.New(), log.LvlInfo, true) + d, err := New(context.Background(), cfg, log.New(), log.LvlInfo, true) require.NoError(err) defer d.Close() err = d.AddMagnetLink(d.ctx, snaptype.Hex2InfoHash("aa"), "a.seg") diff --git a/erigon-lib/downloader/util.go b/erigon-lib/downloader/util.go index 86f9aab7a8c..375bbf48cb5 100644 --- a/erigon-lib/downloader/util.go +++ b/erigon-lib/downloader/util.go @@ -167,7 +167,7 @@ func BuildTorrentFilesIfNeed(ctx context.Context, dirs datadir.Dirs, torrentFile logEvery := time.NewTicker(20 * time.Second) defer logEvery.Stop() - files, err := seedableFiles(dirs, chain) + files, err := SeedableFiles(dirs, chain) if err != nil { return err } diff --git a/erigon-lib/downloader/webseed.go b/erigon-lib/downloader/webseed.go index ae2e287a229..bc2088b499f 100644 --- a/erigon-lib/downloader/webseed.go +++ b/erigon-lib/downloader/webseed.go @@ -3,12 +3,15 @@ package downloader import ( "bytes" "context" + "errors" "fmt" + "github.com/anacrolix/torrent" "io" "net/http" "net/url" "os" "path/filepath" + "sort" "strings" "sync" @@ -33,21 +36,263 @@ type WebSeeds struct { downloadTorrentFile bool torrentsWhitelist snapcfg.Preverified + seeds []*url.URL + logger log.Logger verbosity log.Lvl torrentFiles *TorrentFiles } -func (d *WebSeeds) Discover(ctx context.Context, urls []*url.URL, files []string, rootDir string) { - listsOfFiles := d.constructListsOfFiles(ctx, urls, files) +func NewWebSeeds(seeds []*url.URL, verbosity log.Lvl, logger log.Logger) *WebSeeds { + return &WebSeeds{ + seeds: seeds, + logger: logger, + verbosity: verbosity, + } +} + +func (d *WebSeeds) getWebDownloadInfo(ctx context.Context, t *torrent.Torrent) (infos []webDownloadInfo, seedHashMismatches []*seedHash, err error) { + torrentHash := t.InfoHash().Bytes() + + for _, webseed := range d.seeds { + downloadUrl := webseed.JoinPath(t.Name()) + + if headRequest, err := http.NewRequestWithContext(ctx, http.MethodHead, downloadUrl.String(), nil); err == nil { + headResponse, err := http.DefaultClient.Do(headRequest) + + if err != nil { + continue + } + + headResponse.Body.Close() + + if headResponse.StatusCode != http.StatusOK { + d.logger.Warn("[snapshots] webseed HEAD request failed", "url", downloadUrl, "status", headResponse.Status) + continue + } + if meta, err := getWebpeerTorrentInfo(ctx, downloadUrl); err == nil { + if bytes.Equal(torrentHash, meta.HashInfoBytes().Bytes()) { + md5tag := headResponse.Header.Get("Etag") + if md5tag != "" { + md5tag = strings.Trim(md5tag, "\"") + } + + infos = append(infos, webDownloadInfo{ + url: downloadUrl, + length: headResponse.ContentLength, + md5: md5tag, + torrent: t, + }) + } else { + hash := meta.HashInfoBytes() + seedHashMismatches = append(seedHashMismatches, &seedHash{url: webseed, hash: &hash}) + } + } + } + seedHashMismatches = append(seedHashMismatches, &seedHash{url: webseed}) + } + + return infos, seedHashMismatches, nil +} + +func (d *WebSeeds) SetTorrent(t *TorrentFiles, whiteList snapcfg.Preverified, downloadTorrentFile bool) { + d.downloadTorrentFile = downloadTorrentFile + d.torrentsWhitelist = whiteList + d.torrentFiles = t +} + +func (d *WebSeeds) checkHasTorrents(manifestResponse snaptype.WebSeedsFromProvider, report *webSeedCheckReport) { + // check that for each file in the manifest, there is a corresponding .torrent file + torrentNames := make(map[string]struct{}) + for name := range manifestResponse { + if strings.HasSuffix(name, ".torrent") { + torrentNames[name] = struct{}{} + } + } + hasTorrents := len(torrentNames) > 0 + report.missingTorrents = make([]string, 0) + for name := range manifestResponse { + // todo extract list of extensions which are + // seeded as torrents (kv, ef, v, seg) + // seeded as is (.txt, efi) + // temporarily not seedable (.idx) + if !strings.HasSuffix(name, ".torrent") && !strings.HasSuffix(name, ".txt") { + tname := name + ".torrent" + if _, ok := torrentNames[tname]; !ok { + report.missingTorrents = append(report.missingTorrents, name) + continue + } + delete(torrentNames, tname) + } + } + + if len(torrentNames) > 0 { + report.danglingTorrents = make([]string, 0, len(torrentNames)) + for file := range torrentNames { + report.danglingTorrents = append(report.danglingTorrents, file) + } + } + report.torrentsOK = len(report.missingTorrents) == 0 && len(report.danglingTorrents) == 0 && hasTorrents +} + +func (d *WebSeeds) fetchFileEtags(ctx context.Context, manifestResponse snaptype.WebSeedsFromProvider) (tags map[string]string, invalidTags, etagFetchFailed []string, err error) { + etagFetchFailed = make([]string, 0) + tags = make(map[string]string) + invalidTagsMap := make(map[string]string) + + for name, wurl := range manifestResponse { + u, err := url.Parse(wurl) + if err != nil { + return nil, nil, nil, fmt.Errorf("webseed.fetchFileEtags: %w", err) + } + md5Tag, err := d.retrieveFileEtag(ctx, u) + if err != nil { + if errors.Is(err, ErrInvalidEtag) { + invalidTagsMap[name] = md5Tag + continue + } + if errors.Is(err, ErrEtagNotFound) { + etagFetchFailed = append(etagFetchFailed, name) + continue + } + d.logger.Debug("[snapshots.webseed] get file ETag", "err", err, "url", u.String()) + return nil, nil, nil, fmt.Errorf("webseed.fetchFileEtags: %w", err) + } + tags[name] = md5Tag + } + + invalidTags = make([]string, 0) + if len(invalidTagsMap) > 0 { + for name, tag := range invalidTagsMap { + invalidTags = append(invalidTags, fmt.Sprintf("%-50s %s", name, tag)) + } + } + return tags, invalidTags, etagFetchFailed, nil +} + +func (d *WebSeeds) VerifyManifestedBuckets(ctx context.Context, failFast bool) error { + var supErr error + for _, webSeedProviderURL := range d.seeds { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + d.logger.Debug("[snapshots.webseed] verify manifest", "url", webSeedProviderURL.String()) + + if err := d.VerifyManifestedBucket(ctx, webSeedProviderURL); err != nil { + d.logger.Warn("[snapshots.webseed] verify manifest", "err", err) + if failFast { + return err + } else { + supErr = err + } + } + } + return supErr +} + +type webSeedCheckReport struct { + seed *url.URL + manifestExist bool + torrentsOK bool + missingTorrents []string + danglingTorrents []string + totalEtags int + invalidEtags []string + etagFetchFailed []string +} + +func (w *webSeedCheckReport) sort() { + sort.Strings(w.missingTorrents) + sort.Strings(w.invalidEtags) + sort.Strings(w.etagFetchFailed) + sort.Strings(w.danglingTorrents) +} + +func (w *webSeedCheckReport) String() string { + if !w.manifestExist { + return fmt.Sprintf("## REPORT on %s: manifest not found\n", w.seed) + } + w.sort() + var b strings.Builder + b.WriteString(fmt.Sprintf("## REPORT on %s\n", w.seed)) + b.WriteString(fmt.Sprintf(" - manifest exist: %t\n", w.manifestExist)) + b.WriteString(fmt.Sprintf(" - missing torrents (files without torrents): %d\n", len(w.missingTorrents))) + b.WriteString(fmt.Sprintf(" - dangling (data file not found) torrents: %d\n", len(w.danglingTorrents))) + b.WriteString(fmt.Sprintf(" - invalid ETags format: %d/%d\n", len(w.invalidEtags), w.totalEtags)) + b.WriteString(fmt.Sprintf(" - ETag fetch failed: %d/%d\n", len(w.etagFetchFailed), w.totalEtags)) + + titles := []string{ + "Missing torrents", + "Dangling torrents", + "Invalid ETags format", + "ETag fetch failed", + } + + fnamess := [][]string{ + w.missingTorrents, + w.danglingTorrents, + w.invalidEtags, + w.etagFetchFailed, + } + + var printedAnything bool + for ti, names := range fnamess { + if len(names) == 0 { + continue + } + if ti == 0 { + b.WriteByte(10) + } + printedAnything = true + b.WriteString(fmt.Sprintf("# %s\n", titles[ti])) + for _, name := range names { + b.WriteString(fmt.Sprintf("%s\n", name)) + } + if ti != len(fnamess)-1 { + b.WriteByte(10) + } + } + if !printedAnything { + b.WriteString(fmt.Sprintf("== OK %s\n", w.seed.String())) + } else { + b.WriteString(fmt.Sprintf("== BAD %sn", w.seed.String())) + } + return b.String() +} + +func (d *WebSeeds) VerifyManifestedBucket(ctx context.Context, webSeedProviderURL *url.URL) error { + report := &webSeedCheckReport{seed: webSeedProviderURL} + manifestResponse, err := d.retrieveManifest(ctx, webSeedProviderURL) + report.manifestExist = len(manifestResponse) != 0 + defer func() { fmt.Printf("%s\n", report.String()) }() + if err != nil { + return err + } + + d.checkHasTorrents(manifestResponse, report) + remoteTags, invalidTags, noTags, err := d.fetchFileEtags(ctx, manifestResponse) + if err != nil { + return err + } + + report.invalidEtags = invalidTags + report.etagFetchFailed = noTags + report.totalEtags = len(remoteTags) + len(noTags) + return nil +} + +func (d *WebSeeds) Discover(ctx context.Context, files []string, rootDir string) { + listsOfFiles := d.constructListsOfFiles(ctx, d.seeds, files) torrentMap := d.makeTorrentUrls(listsOfFiles) webSeedMap := d.downloadTorrentFilesFromProviders(ctx, rootDir, torrentMap) d.makeWebSeedUrls(listsOfFiles, webSeedMap) } func (d *WebSeeds) constructListsOfFiles(ctx context.Context, httpProviders []*url.URL, diskProviders []string) []snaptype.WebSeedsFromProvider { - log.Debug("[snapshots] webseed providers", "http", len(httpProviders), "disk", len(diskProviders)) + log.Debug("[snapshots.webseed] providers", "http", len(httpProviders), "disk", len(diskProviders)) listsOfFiles := make([]snaptype.WebSeedsFromProvider, 0, len(httpProviders)+len(diskProviders)) for _, webSeedProviderURL := range httpProviders { @@ -154,6 +399,40 @@ func (d *WebSeeds) ByFileName(name string) (metainfo.UrlList, bool) { v, ok := d.byFileName[name] return v, ok } + +var ErrInvalidEtag = fmt.Errorf("invalid etag") +var ErrEtagNotFound = fmt.Errorf("not found") + +func (d *WebSeeds) retrieveFileEtag(ctx context.Context, file *url.URL) (string, error) { + request, err := http.NewRequest(http.MethodHead, file.String(), nil) + if err != nil { + return "", err + } + + request = request.WithContext(ctx) + resp, err := http.DefaultClient.Do(request) + if err != nil { + return "", fmt.Errorf("webseed.http: %w, url=%s", err, file.String()) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + if resp.StatusCode == http.StatusNotFound { + return "", ErrEtagNotFound + } + return "", fmt.Errorf("webseed.http: status code %d, url=%s", resp.StatusCode, file.String()) + } + + etag := resp.Header.Get("Etag") // file md5 + if etag == "" { + return "", fmt.Errorf("webseed.http: file has no etag, url=%s", file.String()) + } + etag = strings.Trim(etag, "\"") + if strings.Contains(etag, "-") { + return etag, ErrInvalidEtag + } + return etag, nil +} + func (d *WebSeeds) retrieveManifest(ctx context.Context, webSeedProviderUrl *url.URL) (snaptype.WebSeedsFromProvider, error) { baseUrl := webSeedProviderUrl.String() ref, err := url.Parse("manifest.txt") @@ -169,22 +448,35 @@ func (d *WebSeeds) retrieveManifest(ctx context.Context, webSeedProviderUrl *url request = request.WithContext(ctx) resp, err := http.DefaultClient.Do(request) if err != nil { - return nil, fmt.Errorf("webseed.http: %w, host=%s, url=%s", err, webSeedProviderUrl.Hostname(), webSeedProviderUrl.EscapedPath()) + return nil, fmt.Errorf("webseed.http: %w, url=%s", err, u.String()) } defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + fmt.Printf("## Seed %s manifest.txt fetch failed: %s\n", webSeedProviderUrl.String(), resp.Status) + return nil, fmt.Errorf("webseed.http: status=%d, url=%s", resp.StatusCode, u.String()) + } + b, err := io.ReadAll(resp.Body) if err != nil { - return nil, fmt.Errorf("webseed.http: %w, host=%s, url=%s, ", err, webSeedProviderUrl.Hostname(), webSeedProviderUrl.EscapedPath()) + return nil, fmt.Errorf("webseed.http: %w, url=%s, ", err, u.String()) } + response := snaptype.WebSeedsFromProvider{} fileNames := strings.Split(string(b), "\n") - for _, f := range fileNames { + for fi, f := range fileNames { + if strings.TrimSpace(f) == "" { + if fi != len(fileNames)-1 { + fmt.Printf("## Seed %s empty line in manifest.txt at line %d\n", webSeedProviderUrl.String(), fi) + } + continue + } + response[f], err = url.JoinPath(baseUrl, f) if err != nil { return nil, err } } - d.logger.Debug("[snapshots.webseed] get from HTTP provider", "urls", len(response), "host", webSeedProviderUrl.Hostname(), "url", webSeedProviderUrl.EscapedPath()) + d.logger.Debug("[snapshots.webseed] get from HTTP provider", "urls", len(response), "url", webSeedProviderUrl.EscapedPath()) return response, nil } func (d *WebSeeds) readWebSeedsFile(webSeedProviderPath string) (snaptype.WebSeedsFromProvider, error) { diff --git a/eth/backend.go b/eth/backend.go index ba09dd766a9..529595a2abc 100644 --- a/eth/backend.go +++ b/eth/backend.go @@ -1246,7 +1246,7 @@ func (s *Ethereum) setUpSnapDownloader(ctx context.Context, downloaderCfg *downl } discover := true - s.downloader, err = downloader.New(ctx, downloaderCfg, s.config.Dirs, s.logger, log.LvlDebug, discover) + s.downloader, err = downloader.New(ctx, downloaderCfg, s.logger, log.LvlDebug, discover) if err != nil { return err }