Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

stagedSync: Optimize prune old chunks #10019

Merged
merged 7 commits into from
Apr 24, 2024
Merged
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 7 additions & 17 deletions eth/stagedsync/stage_log_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -395,11 +395,8 @@ func pruneOldLogChunks(tx kv.RwTx, bucket string, inMem *etl.Collector, pruneTo
return err
}
var blockNum uint64
if bucket == kv.Log {
blockNum = binary.BigEndian.Uint64(k)
} else {
blockNum = uint64(binary.BigEndian.Uint32(k[len(key):]))
}
blockNum = uint64(binary.BigEndian.Uint32(k[len(key):]))

if !bytes.HasPrefix(k, key) || blockNum >= pruneTo {
break
}
Expand Down Expand Up @@ -434,11 +431,10 @@ func PruneLogIndex(s *PruneState, tx kv.RwTx, cfg LogIndexCfg, ctx context.Conte
}

pruneTo := cfg.prune.Receipts.PruneTo(s.ForwardProgress)
// s.PruneProgress
if err = pruneLogIndex(logPrefix, tx, cfg.tmpdir, s.PruneProgress, pruneTo, ctx, logger, cfg.noPruneContracts); err != nil {
return err
}
if err = s.Done(tx); err != nil {
if err = s.DoneAt(tx, pruneTo); err != nil {
return err
}

Expand All @@ -460,12 +456,10 @@ func pruneLogIndex(logPrefix string, tx kv.RwTx, tmpDir string, pruneFrom, prune
defer topics.Close()
addrs := etl.NewCollector(logPrefix, tmpDir, etl.NewOldestEntryBuffer(bufferSize), logger)
defer addrs.Close()
pruneLogKeyCollector := etl.NewCollector(logPrefix, tmpDir, etl.NewOldestEntryBuffer(bufferSize), logger)
defer pruneLogKeyCollector.Close()

reader := bytes.NewReader(nil)
{
c, err := tx.Cursor(kv.Log)
c, err := tx.RwCursor(kv.Log)
if err != nil {
return err
}
Expand All @@ -481,7 +475,7 @@ func pruneLogIndex(logPrefix string, tx kv.RwTx, tmpDir string, pruneFrom, prune
}
select {
case <-logEvery.C:
logger.Info(fmt.Sprintf("[%s]", logPrefix), "table", kv.Log, "block", blockNum)
logger.Info(fmt.Sprintf("[%s]", logPrefix), "table", kv.Log, "block", blockNum, "pruneFrom", pruneFrom, "pruneTo", pruneTo)
case <-ctx.Done():
return libcommon.ErrStopped
default:
Expand All @@ -502,6 +496,7 @@ func pruneLogIndex(logPrefix string, tx kv.RwTx, tmpDir string, pruneFrom, prune
break
}
}

if toPrune {
for _, l := range logs {
for _, topic := range l.Topics {
Expand All @@ -513,9 +508,7 @@ func pruneLogIndex(logPrefix string, tx kv.RwTx, tmpDir string, pruneFrom, prune
return err
}
}
if err := pruneLogKeyCollector.Collect(k, nil); err != nil {
return err
}
c.DeleteCurrent()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. err not checked
  2. plz use tx.Delete(table, k). Because seems some version of mdbx has bug when same cursor used for for iteration and DeleteCurrent - we did workaround it by using 2 cursors or tx.Delete, tnx. I'm not sure which versions are affected - because not easy to reproduce.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • tx.DeleteCurrent pattern is used at many places in the code base. Are you also saying all of them need fixing too, then? Because the way i see it, DeleteCurrent can only be used in a for loop like that.
  • What is the performance impact of using tx.Delete here, compared to DeleteCurrent?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DeleteCurrent is not bad - use 1 cursor to iterate and in same time DeleteCurrent - bad in some mdbx versions.

No performance impact (of course it’s there - O(1) vs O(n log n), but mdbx’s methods itself are fast-enough, bottleneck is usually in “touching cold data”, but you touching it by loop anyway).

Don’t need modify other places.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, for now, using tx.Delete in its place

}
}
}
Expand All @@ -526,8 +519,5 @@ func pruneLogIndex(logPrefix string, tx kv.RwTx, tmpDir string, pruneFrom, prune
if err := pruneOldLogChunks(tx, kv.LogAddressIndex, addrs, pruneTo, ctx); err != nil {
return err
}
if err := pruneOldLogChunks(tx, kv.Log, pruneLogKeyCollector, pruneTo, ctx); err != nil {
return err
}
return nil
}
Loading