Skip to content

Commit

Permalink
Merge pull request #1426 from bnb-chain/master
Browse files Browse the repository at this point in the history
feat: sync master to develop
  • Loading branch information
ruojunm committed Jun 18, 2024
2 parents 451ef72 + e9144c8 commit 37e3aa3
Show file tree
Hide file tree
Showing 10 changed files with 104 additions and 32 deletions.
20 changes: 20 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,25 @@
# Changelog

## v1.8.0
BUGFIXES
* [#1404](https://github.com/bnb-chain/greenfield-storage-provider/pull/1404) fix: use exponential backoff for task retry policy
* [#1406](https://github.com/bnb-chain/greenfield-storage-provider/pull/1406) fix: change replicate failure error code to 404 when object is not found
* [#1407](https://github.com/bnb-chain/greenfield-storage-provider/pull/1407) fix: refine error code for bucket migration
* [#1409](https://github.com/bnb-chain/greenfield-storage-provider/pull/1409) fix: avoid users to select network in universal endpoint page
* [#1412](https://github.com/bnb-chain/greenfield-storage-provider/pull/1412) fix: bs bucket migrate event bug
* [#1414](https://github.com/bnb-chain/greenfield-storage-provider/pull/1414) fix: metric check sp
* [#1415](https://github.com/bnb-chain/greenfield-storage-provider/pull/1415) fix: check sp health retry
* [#1422](https://github.com/bnb-chain/greenfield-storage-provider/pull/1422) fix: gvg staking storage size

FEATURES
* [#1405](https://github.com/bnb-chain/greenfield-storage-provider/pull/1405) feat: bs add bucket status field
* [#1408](https://github.com/bnb-chain/greenfield-storage-provider/pull/1408) feat: add detailed logs for special customized logs
* [#1416](https://github.com/bnb-chain/greenfield-storage-provider/pull/1416) perf: logic

DOCS
* [#1402](https://github.com/bnb-chain/greenfield-storage-provider/pull/1402) docs: move SP cmd/module/api docs docs.bnbchain.org to sp repo
* [#1417](https://github.com/bnb-chain/greenfield-storage-provider/pull/1417) fix:readme

## v1.7.0
BUGFIXES
* [#1394](https://github.com/bnb-chain/greenfield-storage-provider/pull/1394) fix: pick new gvg when retry failed replicate piece task
Expand Down
63 changes: 41 additions & 22 deletions base/gfspvgmgr/virtual_group_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"math/rand"
"net/http"
"sort"
"strconv"
"strings"
"sync"
"time"
Expand All @@ -19,6 +20,7 @@ import (
"github.com/bnb-chain/greenfield-storage-provider/core/consensus"
"github.com/bnb-chain/greenfield-storage-provider/core/vgmgr"
"github.com/bnb-chain/greenfield-storage-provider/pkg/log"
"github.com/bnb-chain/greenfield-storage-provider/pkg/metrics"
"github.com/bnb-chain/greenfield-storage-provider/util"
sptypes "github.com/bnb-chain/greenfield/x/sp/types"
virtualgrouptypes "github.com/bnb-chain/greenfield/x/virtualgroup/types"
Expand All @@ -30,12 +32,14 @@ const (
VirtualGroupManagerSpace = "VirtualGroupManager"
RefreshMetaInterval = 5 * time.Second
MaxStorageUsageRatio = 0.95
DefaultInitialGVGStakingStorageSize = uint64(1) * 1024 * 1024 * 1024 * 1024 // 1TB per GVG, chain side DefaultMaxStoreSizePerFamily is 64 TB
additionalGVGStakingStorageSize = uint64(1) * 1024 * 1024 * 1024 * 512 // 0.5TB
DefaultInitialGVGStakingStorageSize = uint64(1) * 1024 * 1024 * 1024 * 256 // 256GB per GVG, chain side DefaultMaxStoreSizePerFamily is 64 TB
additionalGVGStakingStorageSize = uint64(1) * 1024 * 1024 * 1024 * 512 // 0.5TB

defaultSPCheckTimeout = 3 * time.Second
defaultSPHealthCheckerInterval = 10 * time.Second
httpStatusPath = "/status"
defaultSPCheckTimeout = 1 * time.Minute
defaultSPHealthCheckerInterval = 10 * time.Second
defaultSPHealthCheckerRetryInterval = 1 * time.Second
defaultSPHealthCheckerMaxRetries = 5
httpStatusPath = "/status"

emptyGVGSafeDeletePeriod = int64(60) * 60 * 24
)
Expand Down Expand Up @@ -525,6 +529,10 @@ func (vgm *virtualGroupManager) FreezeSPAndGVGs(spID uint32, gvgs []*virtualgrou
vgm.freezeSPPool.FreezeSPAndGVGs(spID, gvgs)
}

func (vgm *virtualGroupManager) ReleaseAllSP() {
vgm.freezeSPPool.ReleaseAllSP()
}

// releaseSPAndGVGLoop runs periodically to release SP from the freeze pool
func (vgm *virtualGroupManager) releaseSPAndGVGLoop() {
ticker := time.NewTicker(ReleaseSPJobInterval)
Expand Down Expand Up @@ -773,7 +781,7 @@ func (checker *HealthChecker) checkAllSPHealth() {

func (checker *HealthChecker) checkSPHealth(sp *sptypes.StorageProvider) bool {
if !sp.IsInService() {
log.CtxInfow(context.Background(), "the sp is not in service,sp is treated as unhealthy", "sp", sp)
log.CtxInfow(context.Background(), "the sp is not in service, sp is treated as unhealthy", "sp", sp)
return false
}

Expand All @@ -785,30 +793,41 @@ func (checker *HealthChecker) checkSPHealth(sp *sptypes.StorageProvider) bool {
Transport: &http.Transport{
TLSClientConfig: &tls.Config{MinVersion: tls.VersionTLS12},
},
Timeout: defaultSPCheckTimeout * time.Second,
Timeout: defaultSPCheckTimeout,
}

// Create an HTTP request to test the validity of the endpoint
urlToCheck := fmt.Sprintf("%s%s", endpoint, httpStatusPath)
req, err := http.NewRequestWithContext(ctxTimeout, http.MethodGet, urlToCheck, nil)
if err != nil {
return false
}
for attempt := 0; attempt < defaultSPHealthCheckerMaxRetries; attempt++ {
start := time.Now()
req, err := http.NewRequestWithContext(ctxTimeout, http.MethodGet, urlToCheck, nil)
if err != nil {
log.CtxErrorw(context.Background(), "failed to create request", "sp", sp, "error", err)
return false
}

resp, err := client.Do(req)
if err != nil {
log.CtxErrorw(context.Background(), "failed to connect to sp", "sp", sp, "error", err)
return false
}
defer resp.Body.Close()
resp, err := client.Do(req)
duration := time.Since(start)
metrics.SPHealthCheckerTime.WithLabelValues(strconv.Itoa(int(sp.Id))).Observe(duration.Seconds())
if err != nil {
log.CtxErrorw(context.Background(), "failed to connect to sp", "sp", sp, "error", err, "duration", duration)
time.Sleep(defaultSPHealthCheckerRetryInterval)
continue
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
log.CtxErrorw(context.Background(), "failed to check sp healthy", "sp", sp, "http_status_code", resp.StatusCode, "resp_body", resp.Body)
return false
if resp.StatusCode == http.StatusOK {
log.CtxInfow(context.Background(), "succeed to check the sp healthy", "sp", sp, "duration", duration)
return true
} else {
metrics.SPHealthCheckerFailureCounter.WithLabelValues(strconv.Itoa(int(sp.Id))).Inc()
log.CtxErrorw(context.Background(), "failed to check sp healthy", "sp", sp, "http_status_code", resp.StatusCode, "duration", duration)
time.Sleep(defaultSPHealthCheckerRetryInterval)
}
}

log.CtxInfow(context.Background(), "succeed to check the sp healthy", "sp", sp)
return true
log.CtxErrorw(context.Background(), "failed to check sp healthy after retries", "sp", sp)
return false
}

func (checker *HealthChecker) Start() {
Expand Down
3 changes: 3 additions & 0 deletions core/vgmgr/virtual_group_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ type VirtualGroupManager interface {
// For those SPs which are in the pool will be skipped when creating a GVG, GVGs in the pool will not be chosen to seal Object
// until released
FreezeSPAndGVGs(spID uint32, gvgs []*virtualgrouptypes.GlobalVirtualGroup)
// ReleaseAllSP release all sp and their related GVG, in case that there is no enough balance to create a new GVG.
// should use the exisiting GVG even it failed to serve previously.
ReleaseAllSP()
}

// NewVirtualGroupManager is the virtual group manager init api.
Expand Down
12 changes: 12 additions & 0 deletions core/vgmgr/virtual_group_manager_mock.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ require (
github.com/aliyun/credentials-go v1.3.0
github.com/avast/retry-go/v4 v4.3.1
github.com/aws/aws-sdk-go v1.44.159
github.com/bnb-chain/greenfield v1.7.1-0.20240521062200-cc41c389096c
github.com/bnb-chain/greenfield v1.8.0
github.com/bnb-chain/greenfield-common/go v0.0.0-20240228080631-2683b0ee669a
github.com/bytedance/gopkg v0.0.0-20221122125632-68358b8ecec6
github.com/cometbft/cometbft v0.38.6
Expand Down Expand Up @@ -306,7 +306,7 @@ replace (
github.com/cometbft/cometbft => github.com/bnb-chain/greenfield-cometbft v1.2.1-0.20240408033601-a6b682aa870e
github.com/cometbft/cometbft-db => github.com/bnb-chain/greenfield-cometbft-db v0.8.1-alpha.1
github.com/confio/ics23/go => github.com/cosmos/cosmos-sdk/ics23/go v0.8.0
github.com/cosmos/cosmos-sdk => github.com/bnb-chain/greenfield-cosmos-sdk v1.6.1-0.20240419024340-b5c75cfd8110
github.com/cosmos/cosmos-sdk => github.com/bnb-chain/greenfield-cosmos-sdk v1.8.0
github.com/cosmos/iavl => github.com/bnb-chain/greenfield-iavl v0.20.1
github.com/forbole/juno/v4 => github.com/bnb-chain/juno/v4 v4.0.0-20240604033531-028f2cc8f76d
github.com/gogo/protobuf => github.com/regen-network/protobuf v1.3.3-alpha.regen.1
Expand Down
8 changes: 4 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -176,16 +176,16 @@ github.com/bgentry/speakeasy v0.1.1-0.20220910012023-760eaf8b6816/go.mod h1:+zsy
github.com/bits-and-blooms/bitset v1.10.0 h1:ePXTeiPEazB5+opbv5fr8umg2R/1NlzgDsyepwsSr88=
github.com/bits-and-blooms/bitset v1.10.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
github.com/bmizerany/pat v0.0.0-20170815010413-6226ea591a40/go.mod h1:8rLXio+WjiTceGBHIoTvn60HIbs7Hm7bcHjyrSqYB9c=
github.com/bnb-chain/greenfield v1.7.1-0.20240521062200-cc41c389096c h1:azKKvzAC/yLthneXB1sH6iPwVVj7sRK8X58PuTU/TsM=
github.com/bnb-chain/greenfield v1.7.1-0.20240521062200-cc41c389096c/go.mod h1:RkoY1ISUFUMNbw2iR7iX8M+ToqmB8AlcRFKpwCJEX3Q=
github.com/bnb-chain/greenfield v1.8.0 h1:5E8GQF9bS+ltG2PVXOiDXr6zBNL8aG/QhWYp7C2EgDM=
github.com/bnb-chain/greenfield v1.8.0/go.mod h1:R4itO5Q7d5wj0L9sAXpbrVZMUrdDyRtSjccW8XOEFvI=
github.com/bnb-chain/greenfield-cometbft v1.2.1-0.20240408033601-a6b682aa870e h1:4ttDy8yBhBUW0gdFyBK0wHMJS5ZtlhBdoYx/O6T6Eqg=
github.com/bnb-chain/greenfield-cometbft v1.2.1-0.20240408033601-a6b682aa870e/go.mod h1:q9/nqW19iXvxyma5XgcZfxL/OkWI9s5e7yX9ecePz8A=
github.com/bnb-chain/greenfield-cometbft-db v0.8.1-alpha.1 h1:XcWulGacHVRiSCx90Q8Y//ajOrLNBQWR/KDB89dy3cU=
github.com/bnb-chain/greenfield-cometbft-db v0.8.1-alpha.1/go.mod h1:ey1CiK4bYo1RBNJLRiVbYr5CMdSxci9S/AZRINLtppI=
github.com/bnb-chain/greenfield-common/go v0.0.0-20240228080631-2683b0ee669a h1:VjUknQkIcqkjYCt1hmfpinM7kToOBuUU+KykrrqFsEM=
github.com/bnb-chain/greenfield-common/go v0.0.0-20240228080631-2683b0ee669a/go.mod h1:K9jK80fbahciC+FAvrch8Qsbw9ZkvVgjfKsqrzPTAVA=
github.com/bnb-chain/greenfield-cosmos-sdk v1.6.1-0.20240419024340-b5c75cfd8110 h1:max1dH2HkKrNZpL2Jv6xwl+XWHsjJC6Ay+caN17u3CI=
github.com/bnb-chain/greenfield-cosmos-sdk v1.6.1-0.20240419024340-b5c75cfd8110/go.mod h1:siglWrVkM1+6tj8ZPwzMIITWQh7D8gsKJUk0Suz+ul0=
github.com/bnb-chain/greenfield-cosmos-sdk v1.8.0 h1:XaHBYnlAJNIEVTr9dXp3jzw12gCoIEL5jHiAMp+PX0s=
github.com/bnb-chain/greenfield-cosmos-sdk v1.8.0/go.mod h1:2bwmwdXYBISnQoMwgAcZTVGt21lMsHZSeeeMByTvDlQ=
github.com/bnb-chain/greenfield-cosmos-sdk/api v0.0.0-20231206043955-0855e0965bc8 h1:mUMOeNo3K0SZvAhiOHNKW4mmkrhOphBF8tDUyK6e1tY=
github.com/bnb-chain/greenfield-cosmos-sdk/api v0.0.0-20231206043955-0855e0965bc8/go.mod h1:vhsZxXE9tYJeYB5JR4hPhd6Pc/uPf7j1T8IJ7p9FdeM=
github.com/bnb-chain/greenfield-cosmos-sdk/math v0.0.0-20231206043955-0855e0965bc8 h1:1Ud7itq03c4Q9h0kBpw1FYlWKN3kco8cgj59vdd50UQ=
Expand Down
2 changes: 2 additions & 0 deletions modular/manager/manage_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ func (m *ManageModular) pickGVGAndReplicate(ctx context.Context, vgfID uint32, t
gvgMeta, err := m.pickGlobalVirtualGroup(ctx, vgfID, task.GetStorageParams())
log.CtxInfow(ctx, "pick global virtual group", "time_cost", time.Since(startPickGVGTime).Seconds(), "gvg_meta", gvgMeta, "error", err)
if err != nil {
// If there is no way to create a new GVG, release all sp from freeze Pool, better than not serving requests.
m.virtualGroupManager.ReleaseAllSP()
return err
}
replicateTask := &gfsptask.GfSpReplicatePieceTask{}
Expand Down
16 changes: 16 additions & 0 deletions pkg/metrics/metric_items.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ var MetricsItems = []prometheus.Collector{
ManagerCounter,
ManagerTime,
GCBlockNumberGauge,
SPHealthCheckerTime,
SPHealthCheckerFailureCounter,

// workflow metrics category
PerfApprovalTime,
Expand Down Expand Up @@ -247,6 +249,20 @@ var (
Name: "gc_block_number",
Help: "Track the next gc block number.",
}, []string{"gc_block_number"})
SPHealthCheckerTime = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "sp_health_checker_request_time",
Help: "Request duration in seconds.",
},
[]string{"sp_id"},
)
SPHealthCheckerFailureCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "sp_health_checker_request_counter",
Help: "Request failure count.",
},
[]string{"sp_id"},
)
)

// workflow metrics items
Expand Down
2 changes: 1 addition & 1 deletion store/sqldb/object_integrity.go
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ func (s *SpDBImpl) DeleteReplicatePieceChecksum(objectID uint64, segmentIdx uint
metrics.SPDBTime.WithLabelValues(SPDBSuccessDelReplicatePieceChecksum).Observe(
time.Since(startTime).Seconds())
}()
err = s.db.Where("object_id = ? and segment_idx = ? and redundancy_index = ? ", objectID, segmentIdx, redundancyIdx).Delete(PieceHashTable{}).Error
err = s.db.Where("object_id = ? and segment_index = ? and redundancy_index = ? ", objectID, segmentIdx, redundancyIdx).Delete(PieceHashTable{}).Error
return err
}

Expand Down
6 changes: 3 additions & 3 deletions store/sqldb/object_integrity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ func TestSpDBImpl_DeleteReplicatePieceChecksumSuccess(t *testing.T) {
)
s, mock := setupDB(t)
mock.ExpectBegin()
mock.ExpectExec("DELETE FROM `piece_hash` WHERE object_id = ? and segment_idx = ? and redundancy_index = ?").
mock.ExpectExec("DELETE FROM `piece_hash` WHERE object_id = ? and segment_index = ? and redundancy_index = ?").
WithArgs(objectID, segmentIdx, redundancyIdx).WillReturnResult(sqlmock.NewResult(1, 1))
mock.ExpectCommit()
err := s.DeleteReplicatePieceChecksum(objectID, segmentIdx, redundancyIdx)
Expand Down Expand Up @@ -546,7 +546,7 @@ func TestSpDBImpl_DeleteAllReplicatePieceChecksumSuccess(t *testing.T) {
)
s, mock := setupDB(t)
mock.ExpectBegin()
mock.ExpectExec("DELETE FROM `piece_hash` WHERE object_id = ? and segment_idx = ? and redundancy_index = ?").
mock.ExpectExec("DELETE FROM `piece_hash` WHERE object_id = ? and segment_index = ? and redundancy_index = ?").
WithArgs(objectID, segmentIdx, redundancyIdx).WillReturnResult(sqlmock.NewResult(1, 1))
mock.ExpectCommit()
err := s.DeleteAllReplicatePieceChecksum(objectID, redundancyIdx, pieceCount)
Expand All @@ -562,7 +562,7 @@ func TestSpDBImpl_DeleteAllReplicatePieceChecksumFailure(t *testing.T) {
)
s, mock := setupDB(t)
mock.ExpectBegin()
mock.ExpectExec("DELETE FROM `piece_hash` WHERE object_id = ? and segment_idx = ? and redundancy_index = ?").
mock.ExpectExec("DELETE FROM `piece_hash` WHERE object_id = ? and segment_index = ? and redundancy_index = ?").
WithArgs(objectID, segmentIdx, redundancyIdx).WillReturnError(mockDBInternalError)
mock.ExpectRollback()
mock.ExpectCommit()
Expand Down

0 comments on commit 37e3aa3

Please sign in to comment.